Fitting models with custom data • spdgt.sight

library(spdgt.sight)

Overview

sight_fit_model_id() accepts a data parameter that lets you pass your own observation data instead of fetching from the database. This is useful when you need to: - Run models on modified or filtered observations - Test estimation with synthetic data - Reproduce results with a known dataset

There are two estimation paths depending on the model type:

Abundance estimation uses a sightability model to correct for detection bias and extrapolate across the survey design
Ratio estimation computes demographic ratios (e.g., fawn:doe) from composition survey counts

Before building custom data you need to discover the right IDs for your species, survey type, analysis unit, strata, and model. IDs are not stable across projects, so always use the lookup functions rather than hardcoding values.

Discovering your parameters

Species

# Browse all species in the project
lkup_species()

# Get the ID for a specific species
species_id <- lkup_species_id("Mule Deer")

Survey types

# List survey types available for this species
lkup_survey_type(species_id = species_id)

# Get the ID for a sightability survey
survey_type_id <- lkup_survey_type_id(
  "Sightability",
  species_id = species_id
)

Analysis units (DAUs)

# List DAUs for this species
lkup_dau(species_id = species_id)

# Get a specific DAU ID
dau_id <- lkup_dau_id(
  "North Converse 755",
  species_id = species_id
)

Strata

# List strata for a species + survey type combination
lkup_strata(
  species_id = species_id,
  survey_type_name = "Sightability"
)
# Returns id, name pairs (e.g., 17 = "High", 19 = "Low")
# stratum_id = 0 or NA maps to the "Other" stratum (non-extrapolating)

Models

# Find models available for your species + survey type
models <- sight_read_model(
  species = "Mule Deer",
  survey_type = "Sightability"
)
print(models)

# Pick the model you need
model_id <- models$id[1]

# Check the model's metadata to understand its estimation method
model <- sight_read_model(id = model_id)
model$metadata[[1]]
# $method tells you the estimation approach:
#   "sightability" - classical Wong estimator (requires covariates)
#   "cochran"      - Cochran ratio estimator
#   "quasibinomial"- quasi-binomial GLM variance

# For sightability models, inspect the covariates
betas <- sight_read_betas_id(
  model_id = model_id,
  includes = "covar"
)
print(betas)
# Shows which covariates the model uses (activity, vegetation_type,
# snow_percentage, group_size) and their coded levels

Subunits

# List subunits within a DAU
spdgt.core::lkup_read_subunit(dau_id = dau_id)
# Returns id, management_unit_id, name, sort_order

Data schema reference

The data parameter expects a tibble that matches the structure of prepared survey data. The columns fall into four groups:

Context columns

The API extracts these to populate response metadata. Each column should contain a single unique value across all rows.

Column	Type	Description
`species_id`	integer	Species identifier
`bio_year`	integer	Biological year (e.g., 2024)
`analysis_unit_id`	integer	Analysis unit (DAU) identifier
`survey_type_id`	integer	Survey type identifier

Observation columns

These drive the estimation calculations.

Column	Type	Description
`subunit_id`	integer	Subunit (survey unit) identifier
`stratum_id`	integer	Stratum identifier (0 = “Other”)
`is_selected`	logical	Whether the subunit was selected in the design
`is_surveyed`	logical	Whether the subunit was actually surveyed
`total`	integer	Total animal count
`males`	integer	Male count
`females`	integer	Female count
`youngs`	integer	Young-of-year count
`unclass`	integer	Unclassified count

The demographic count column names vary by survey type. Use sight_read_survey_cols_id() to discover the correct names for your survey type.

Covariate columns (sightability models only)

Required when the model method is "sightability". Values must match the coded levels in the model’s betas table (see sight_read_betas_id()).

Column	Type	Description
`activity`	integer	Activity code (e.g., 1 = Moving, 2 = Standing)
`vegetation_type`	integer	Vegetation code (e.g., 1 = Conifer, 2 = Grassland)
`snow_percentage`	integer	Snow cover percentage (0–100)

Spatial columns

Column	Type	Description
`management_unit_id`	integer	GMU identifier (from subunit lookup)

Optional columns

Column	Type	Description
`metadata`	list	Empty `list()` per row; only needed if entry-column-mapping transforms were applied

Example 1: Abundance estimation (sightability model)

This example builds a synthetic 20-row tibble and passes it to sight_fit_model_id() with a sightability model.

Discover IDs

species_id <- lkup_species_id("Mule Deer")

survey_type_id <- lkup_survey_type_id(
  "Sightability",
  species_id = species_id
)

dau_id <- lkup_dau_id(
  "North Converse 755",
  species_id = species_id
)

# Find available models for this species + survey type
models <- sight_read_model(
  species = "Mule Deer",
  survey_type = "Sightability"
)
model_id <- models$id[1]

# Inspect betas to learn covariate coding
betas <- sight_read_betas_id(
  model_id = model_id,
  includes = "covar"
)
print(betas)
# e.g., activity: 1=Moving, 2=Standing, 3=Bedded, 4=Running
#        vegetation_type: 1=Conifer, 2=Grassland/Open
#        snow_percentage: continuous (0-100)

# Get strata
strata <- lkup_strata(
  species_id = species_id,
  survey_type_name = "Sightability"
)
print(strata)
# e.g., 17=High, 19=Low; 0=Other

Build synthetic data

Use real subunit IDs from the DAU. The subunit lookup provides the management_unit_id (GMU) for each subunit, which the estimation pipeline requires.

# Look up subunits for the DAU
subunits <- spdgt.core::lkup_read_subunit(dau_id = dau_id)

# Pick 10 subunits, 2 observations each (one per stratum)
su <- subunits[1:10, ]

obs <- tibble::tibble(
  species_id       = species_id,
  bio_year         = 2024L,
  analysis_unit_id = dau_id,
  survey_type_id   = survey_type_id,
  management_unit_id = rep(su$management_unit_id, each = 2),
  subunit_id       = rep(su$id, each = 2),
  stratum_id       = rep(c(17L, 19L), 10),
  is_selected      = TRUE,
  is_surveyed      = TRUE,
  total            = as.integer(sample(1:30, 20, replace = TRUE)),
  males            = NA_integer_,
  females          = NA_integer_,
  youngs           = NA_integer_,
  unclass          = NA_integer_,
  activity         = as.integer(sample(1:4, 20, replace = TRUE)),
  vegetation_type  = as.integer(sample(1:2, 20, replace = TRUE)),
  snow_percentage  = as.integer(sample(0:80, 20, replace = TRUE)),
  metadata         = replicate(20, list(), simplify = FALSE)
)

# Fill in demographic breakdowns
obs$males   <- as.integer(round(obs$total * 0.35))
obs$females <- as.integer(round(obs$total * 0.40))
obs$youngs  <- as.integer(round(obs$total * 0.20))
obs$unclass <- obs$total - obs$males - obs$females - obs$youngs

Fit the model

results <- sight_fit_model_id(
  spatial_focus = "DAU",
  model_id = model_id,
  data = obs
)

print(results)
# Returns a tibble with columns:
#   species_id, bio_year, analysis_unit_id, spatial_focus,
#   survey_type_id, model_id, Demographic, RawCount, Estimate,
#   SightInflation, SampInflation, TotalVar, SE, LCL, UCL, CV

Example 2: Ratio estimation (composition model)

Composition surveys produce demographic ratios (e.g., fawn:doe) without spatial extrapolation. The data is simpler – no covariates are needed.

Discover IDs

species_id <- lkup_species_id("Mule Deer")

# Look up a Composition survey type
survey_type_id <- lkup_survey_type_id(
  "Composition",
  species_id = species_id
)

dau_id <- lkup_dau_id(
  "North Converse 755",
  species_id = species_id
)

# Find models for this species + survey type
models <- sight_read_model(
  species = "Mule Deer",
  survey_type = "Composition"
)
model_id <- models$id[1]

# Confirm estimation method
model <- sight_read_model(id = model_id)
model$metadata[[1]]
# Expect method = "cochran" or "quasibinomial"

Build synthetic data

# Re-use subunit lookup from the DAU
subunits <- spdgt.core::lkup_read_subunit(dau_id = dau_id)
su <- subunits[1:15, ]

obs <- tibble::tibble(
  species_id       = species_id,
  bio_year         = 2024L,
  analysis_unit_id = dau_id,
  survey_type_id   = survey_type_id,
  management_unit_id = su$management_unit_id,
  subunit_id       = su$id,
  stratum_id       = 0L,
  is_selected      = TRUE,
  is_surveyed      = TRUE,
  total            = as.integer(sample(10:50, 15, replace = TRUE)),
  males            = NA_integer_,
  females          = NA_integer_,
  youngs           = NA_integer_,
  unclass          = 0L,
  metadata         = replicate(15, list(), simplify = FALSE)
)

# Fill in demographic breakdowns
obs$males   <- as.integer(round(obs$total * 0.30))
obs$females <- as.integer(round(obs$total * 0.45))
obs$youngs  <- as.integer(round(obs$total * 0.20))
obs$unclass <- obs$total - obs$males - obs$females - obs$youngs

Fit the model

results <- sight_fit_model_id(
  spatial_focus = "DAU",
  model_id = model_id,
  data = obs
)

print(results)
# Returns a tibble with ratio estimates:
#   Demographic = "MFRatio", "YFRatio", etc.
#   Estimate, TotalVar, SE, LCL, UCL

Tips

Models are linked to survey types. Always filter by species + survey type when looking up models.
Covariate codings come from the betas table. Check sight_read_betas_id(model_id, includes = "covar") to see the expected integer codes for activity and vegetation_type.
stratum_id = 0 or NA maps to “Other” – a non-extrapolating stratum used for opportunistic observations.
metadata can be list(). It is only needed when entry-column-mapping transforms were applied to the original data.
is_selected = NA rows are treated as unselected (design-only units not in the sampling frame).
To modify real data rather than building from scratch, start with sight_read_data_id() and edit the returned tibble. Columns that are all NA (e.g., metadata) may be dropped during JSON serialization to the API, so replace them with a list column before sending:

data <- sight_read_data_id(
  species_id = species_id,
  survey_type_id = survey_type_id,
  analysis_unit_id = dau_id,
  bio_year = 2024
)

# Make your modifications
data$total[1] <- 25L

# Ensure metadata column survives JSON round-trip
data$metadata <- replicate(nrow(data), list(), simplify = FALSE)

# Pass modified data to the model
results <- sight_fit_model_id(
  spatial_focus = "DAU",
  model_id = model_id,
  data = data
)