Retrieve all FishMORPH rows matching a taxon name (species, genus, family, or order). Automatically falls back to broader taxonomic levels when the exact match is not found, with an interactive or automatic fallback strategy.
| Argument | Default | Description |
|---|---|---|
| query | — | Taxon name: species binomial, genus, family, or order |
| fishmorph_path | — | Path to TraitFishImputed.txt |
| fishmorph_info | NULL | Pre-computed GBIF taxonomy table (from $fishmorph_info of a previous call) |
| level | "auto" | Taxonomic level to search: "species", "genus", "family", "order", or "auto" (detected from the query) |
| fallback | TRUE | If query not found at requested level, try progressively broader levels |
| interactive | FALSE | If TRUE and fallback matches found, ask user to confirm before returning broader results |
| min_n | 1 | Minimum number of rows required; warn if below |
| cache_path | NULL | Path to save/load the GBIF taxonomy as .rds (e.g. "cache/fm_taxonomy.rds"). Created automatically on first run, loaded instantly on subsequent calls. |
| conf_threshold | 80 | GBIF synonym confidence (used only if fishmorph_info is NULL) |
| verbose | TRUE | Print match summary |
query_fishmorph <- function(
query,
fishmorph_path,
fishmorph_info = NULL,
level = "auto",
fallback = TRUE,
interactive = FALSE,
min_n = 1,
conf_threshold = 80,
cache_path = NULL, # path to save/load fishmorph_info as .rds (e.g. "cache/fm_taxonomy.rds")
verbose = TRUE
) {
# ── Packages ──────────────────────────────────────────────────────────────
if (!requireNamespace("data.table", quietly=TRUE)) install.packages("data.table")
if (!requireNamespace("pbapply", quietly=TRUE)) install.packages("pbapply")
if (!requireNamespace("traitdataform",quietly=TRUE)) install.packages("traitdataform")
library(data.table); library(pbapply); library(traitdataform)
query <- trimws(query)
# ── 1. Load FishMORPH ────────────────────────────────────────────────────
fishTraits <- read.table(fishmorph_path)
traitCols <- setdiff(colnames(fishTraits), "IUCN")
fishMorph <- trimws(rownames(fishTraits))
# ── 2. Build or reuse taxonomy lookup (with optional disk cache) ────────
if (is.null(fishmorph_info)) {
# Try to load from disk cache first
if (!is.null(cache_path) && file.exists(cache_path)) {
if (verbose) message("Loading cached FishMORPH taxonomy from: ", cache_path)
fishmorph_info <- readRDS(cache_path)
} else {
if (verbose) {
message("Resolving FishMORPH taxonomy via GBIF (~5-10 min on first run)...")
if (!is.null(cache_path))
message("Will save to cache: ", cache_path, " (reused on all future calls)")
}
fishmorph_info <- pbapply::pblapply(fishMorph, function(sp) {
tryCatch(
traitdataform::get_gbif_taxonomy(sp, subspecies=TRUE, higherrank=TRUE,
conf_threshold=conf_threshold, resolve_synonyms=TRUE)[1,],
error=function(e) NULL)
}) |> data.table::rbindlist(fill=TRUE) |> data.table::as.data.table()
# Save to disk cache
if (!is.null(cache_path)) {
dir.create(dirname(cache_path), recursive=TRUE, showWarnings=FALSE)
saveRDS(fishmorph_info, cache_path)
if (verbose) message("Taxonomy cached to: ", cache_path)
}
}
} else {
fishmorph_info <- data.table::as.data.table(fishmorph_info)
}
# Attach species names
fishmorph_info[, species := fishMorph]
fishmorph_info[, genus := sub(" .*$", "", fishMorph)]
# ── 3. Auto-detect level ─────────────────────────────────────────────────
# Heuristic: 2 words → species, 1 word → could be genus/family/order
if (level == "auto") {
if (grepl(" ", query)) {
level <- "species"
} else {
# Check if it matches any known genus, family, or order
if (query %in% fishmorph_info$genus) {
level <- "genus"
} else if (query %in% fishmorph_info$family) {
level <- "family"
} else if (query %in% fishmorph_info$order) {
level <- "order"
} else {
level <- "species" # will fail gracefully below
}
}
if (verbose) message("Auto-detected level: ", level)
}
# ── 4. Fallback chain ────────────────────────────────────────────────────
fallback_chain <- c("species","genus","family","order")
start_idx <- match(level, fallback_chain)
if (is.na(start_idx)) stop("level must be one of: species, genus, family, order, auto")
levels_to_try <- if (fallback) fallback_chain[start_idx:length(fallback_chain)] else level
# ── 5. Search each level ─────────────────────────────────────────────────
for (lv in levels_to_try) {
# Build match index
idx <- switch(lv,
species = which(fishMorph == query |
fishmorph_info$scientificName == query |
fishmorph_info$verbatimScientificName == query),
genus = which(fishmorph_info$genus == query),
family = which(fishmorph_info$family == query),
order = which(fishmorph_info$order == query)
)
idx <- unique(idx[!is.na(idx)])
if (length(idx) == 0) {
if (verbose) message(sprintf(" '%s' not found at %s level.", query, lv))
if (!fallback || lv == utils::tail(levels_to_try, 1)) {
message(sprintf(
"\n No match found for '%s' at any level (%s).\n",
query, paste(levels_to_try, collapse=" → ")))
return(invisible(NULL))
}
next # try next level
}
# Found something
result <- fishTraits[idx, traitCols, drop=FALSE]
result$match_level <- lv
result$matched_name <- fishMorph[idx]
# Add taxonomy columns
result$genus <- fishmorph_info$genus[idx]
result$family <- fishmorph_info$family[idx]
result$order <- fishmorph_info$order[idx]
if (verbose) {
if (lv != level) {
message(sprintf(
" Not found at '%s' level — matched at '%s' level instead.", level, lv))
}
message(sprintf(" Found %d row(s) matching '%s' (%s level).",
nrow(result), query, lv))
}
# Warn if very few rows
if (nrow(result) < min_n) {
warning(sprintf(
"Only %d row(s) returned for '%s'. Consider using a broader taxonomic level.",
nrow(result), query), call.=FALSE)
}
# Interactive confirmation for fallback
if (interactive && lv != level) {
ans <- readline(sprintf(
"Found %d row(s) at %s level for '%s'. Use these? [y/n]: ",
nrow(result), lv, query))
if (!tolower(trimws(ans)) %in% c("y","yes","o","oui")) {
message("Cancelled by user.")
return(invisible(NULL))
}
}
return(result)
}
invisible(NULL)
}
# ── Examples ──────────────────────────────────────────────────────────────────
# Load FishMORPH
# fm <- read.table("data/TraitFishImputed.txt")
#
# ── 1. Single species
# query_fishmorph("Gobio gobio", fishmorph_path="data/TraitFishImputed.txt")
#
# ── 2. Species not found → auto-fallback to genus
# query_fishmorph("Gobio vulgaris", # old synonym, not in FishMORPH
# fishmorph_path="data/TraitFishImputed.txt",
# fallback=TRUE)
# # → "Not found at species level — matched at genus level instead."
# # → returns all Gobio species in FishMORPH
#
# ── 3. Query a genus directly
# query_fishmorph("Hoplias",
# fishmorph_path="data/TraitFishImputed.txt",
# level="genus")
#
# ── 4. Query a family
# query_fishmorph("Cichlidae",
# fishmorph_path="data/TraitFishImputed.txt",
# level="family")
#
# ── 5. Query an order
# query_fishmorph("Characiformes",
# fishmorph_path="data/TraitFishImputed.txt",
# level="order")
#
# ── 6. Disk cache — compute once, instant forever
# First call: slow (GBIF queries), saves taxonomy to disk
# r1 <- query_fishmorph("Gobio gobio",
# fishmorph_path = "data/TraitFishImputed.txt",
# cache_path = "cache/fm_taxonomy.rds")
# → "Resolving FishMORPH taxonomy via GBIF (~5-10 min)..."
# → "Taxonomy cached to: cache/fm_taxonomy.rds"
#
# All subsequent calls load from disk instantly (no GBIF):
# r2 <- query_fishmorph("Cichlidae",
# fishmorph_path = "data/TraitFishImputed.txt",
# cache_path = "cache/fm_taxonomy.rds",
# level = "family")
# → "Loading cached FishMORPH taxonomy from: cache/fm_taxonomy.rds"
# → Found 312 row(s) matching 'Cichlidae' (family level).
#
# r3 <- query_fishmorph("Hoplias",
# fishmorph_path = "data/TraitFishImputed.txt",
# cache_path = "cache/fm_taxonomy.rds")
#
# Tip: you can also pass the in-memory object directly (even faster):
# fm_tax <- readRDS("cache/fm_taxonomy.rds")
# r4 <- query_fishmorph("Characiformes", fishmorph_path="...",
# fishmorph_info = fm_tax, level="order")
#
# ── 7. Interactive confirmation when fallback is triggered
# query_fishmorph("Hoplias macroptera", # hypothetical absent species
# fishmorph_path="...",
# interactive=TRUE)
# # → prompts: "Found 8 row(s) at genus level for 'Hoplias macroptera'. Use these? [y/n]:"