Load AVONET BirdLife morphological traits, fetch IUCN Red List categories via API v4 (rredlist), merge into a single species × traits table, and resolve taxonomic duplicates. Results cached to data/processed/BirdTraitCombined.csv.
# ---- Reproducibility -------------------------------------------------------
GLOBAL_SEED <- 20251029
set.seed(GLOBAL_SEED)
source("scripts/00_GeneralScript.R")
########################################################################
### 1. Loading original trait and taxonomic standardization
########################################################################
if(!("BirdTraitCombined.csv" %in% list.files("data/raw"))){
### 1.a. AVONET morphological traits
avonet_raw <- readxl::read_excel(
path = "data/raw/AVONET.xlsx",
sheet = "AVONET1_BirdLife",
col_types = c(
"skip", "text", "text", "text", "skip", "skip", "skip", "skip", "skip", "skip",
"numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric", "numeric",
rep("skip", 16)
)
)
avonet_raw <- as.data.frame(avonet_raw)
avonet_raw <- subset(avonet_raw, Family1 != "Apterygidae") # remove flightless
### 1.b. IUCN categories via API v4 (rredlist >= 1.0.0)
if (!requireNamespace("rredlist", quietly = TRUE)) install.packages("rredlist")
library(rredlist)
if (utils::packageVersion("rredlist") < "1.0.0")
stop("rredlist >= 1.0.0 required (API v4). install.packages('rredlist')")
`%||%` <- function(a, b) if (is.null(a) || length(a) == 0L) b else a
parse_binomial <- function(x, sep = "[_ ]+") {
parts <- strsplit(as.character(x), sep)
data.frame(
genus = sapply(parts, function(p) if (length(p) >= 1L) p[1] else NA_character_),
species = sapply(parts, function(p) if (length(p) >= 2L) p[2] else NA_character_),
stringsAsFactors = FALSE
)
}
query_iucn <- function(genus, species, key) {
if (is.na(genus) || is.na(species) || genus == "" || species == "")
return(NA_character_)
tryCatch({
res <- rredlist::rl_species(genus = genus, species = species, key = key)
if (is.null(res) || is.null(res$assessments)) return(NA_character_)
asm <- res$assessments
if (is.data.frame(asm)) {
if (nrow(asm) == 0L) return(NA_character_)
latest_idx <- which(asm$latest == TRUE)
if (length(latest_idx) == 0L) latest_idx <- 1L
return(asm$red_list_category_code[latest_idx[1]])
}
if (is.list(asm) && length(asm) > 0L) {
latest_idx <- which(vapply(asm, function(a) isTRUE(a$latest), logical(1L)))
if (length(latest_idx) == 0L) latest_idx <- 1L
return(asm[[latest_idx[1]]]$red_list_category_code %||% NA_character_)
}
NA_character_
}, error = function(e) {
if (!grepl("404|not found", conditionMessage(e), ignore.case = TRUE))
message(sprintf(" [ERROR] %s %s — %s", genus, species, conditionMessage(e)))
NA_character_
})
}
fetch_iucn_categories <- function(binomials, key,
cache_path = "data/processed/iucn_cache.rds",
rate_limit_s = 0.6,
retry_failed = FALSE) {
binomials <- unique(binomials[!is.na(binomials) & binomials != ""])
cache <- if (file.exists(cache_path)) readRDS(cache_path) else list()
to_query <- if (retry_failed) {
binomials[!binomials %in% names(cache) |
vapply(binomials, function(sp) is.null(cache[[sp]]) || is.na(cache[[sp]]), logical(1L))]
} else {
binomials[!binomials %in% names(cache)]
}
if (length(to_query) > 0L) {
parsed <- parse_binomial(to_query)
for (i in seq_along(to_query)) {
cache[[to_query[i]]] <- query_iucn(parsed$genus[i], parsed$species[i], key = key)
if (i %% 100L == 0L) {
dir.create(dirname(cache_path), showWarnings = FALSE, recursive = TRUE)
saveRDS(cache, cache_path)
}
if (i < length(to_query)) Sys.sleep(rate_limit_s)
}
dir.create(dirname(cache_path), showWarnings = FALSE, recursive = TRUE)
saveRDS(cache, cache_path)
}
out <- unlist(cache[binomials]); names(out) <- binomials; out
}
API_KEY <- Sys.getenv("IUCN_REDLIST_KEY")
if (API_KEY == "") stop("Set IUCN_REDLIST_KEY in .Renviron (https://api.iucnredlist.org)")
iucn_categories <- fetch_iucn_categories(
binomials = unique(avonet_raw$Species1), key = API_KEY,
cache_path = "data/processed/iucn_cache.rds", rate_limit_s = 0.6
)
avonet_raw$iucn_category <- iucn_categories[avonet_raw$Species1]
### 1.c. Taxonomic fix + merge AMNIOTE + EltonTraits
avonet_merged <- avonet_raw
avonet_merged$scientificNameStd <- avonet_merged$Species1
avonet_merged$GenusSpecies <- avonet_merged$Species1
avonet_traitdata_map <- traitdata::avonet
avonet_traitdata_map$GenusSpecies <- paste(avonet_traitdata_map$Genus, avonet_traitdata_map$Species)
avonet_traitdata_map <- unique(avonet_traitdata_map[, c("GenusSpecies", "scientificNameStd")])
colnames(avonet_traitdata_map) <- c("Species1", "scientificNameStd_traitdata")
avonet_merged <- merge(avonet_merged, avonet_traitdata_map, by = "Species1", all.x = TRUE)
avonet_merged <- avonet_merged[!is.na(avonet_merged$scientificNameStd), ]
avonet_merged <- avonet_merged[order(avonet_merged$scientificNameStd), ]
species_to_remove <- c(
'Atlantisia rogersi', 'Casuarius bennetti', 'Casuarius casuarius',
'Casuarius unappendiculatus', 'Dromaius novaehollandiae',
'Rhea americana', 'Rhea pennata', 'Struthio camelus'
)
avonet_filtered <- subset(avonet_merged, !(scientificNameStd %in% species_to_remove))
# Deduplicate: keep row matching GenusSpecies, else row with fewest NAs
duplicated_species <- names(which(table(avonet_filtered$scientificNameStd) > 1))
avonet_final <- subset(avonet_filtered, !(scientificNameStd %in% duplicated_species))
for (sp in duplicated_species) {
entries <- avonet_filtered[avonet_filtered$scientificNameStd == sp, ]
if (sp %in% entries$GenusSpecies) {
avonet_final <- rbind(avonet_final, entries[entries$GenusSpecies == sp, ])
} else {
na_counts <- apply(entries, 1, function(row) sum(is.na(row)))
avonet_final <- rbind(avonet_final, entries[which.min(na_counts), , drop = FALSE])
}
}
resolve_duplicates <- function(df_grouped) {
if (any(df_grouped$GenusSpecies == df_grouped$scientificNameStd[1])) {
return(df_grouped %>% filter(GenusSpecies == scientificNameStd[1]) %>% slice(1))
} else {
return(df_grouped %>% mutate(n_NA = rowSums(is.na(.))) %>% arrange(n_NA) %>% slice(1) %>% select(-n_NA))
}
}
clean_amniote_aves <- function(data = traitdata::amniota) {
aves_data <- data %>% filter(Class == "Aves") %>% distinct() %>%
filter(!is.na(scientificNameStd)) %>% mutate(GenusSpecies = paste(Genus, Species))
dup_names <- aves_data %>% count(scientificNameStd) %>% filter(n > 1) %>% pull(scientificNameStd)
aves_clean <- aves_data %>% filter(!scientificNameStd %in% dup_names)
aves_resolved <- aves_data %>% filter(scientificNameStd %in% dup_names) %>%
group_by(scientificNameStd) %>% group_modify(~ resolve_duplicates(.x)) %>% ungroup()
final_data <- bind_rows(aves_clean, aves_resolved)
stopifnot(!any(duplicated(final_data$scientificNameStd)))
return(final_data)
}
aveTraitsFinal <- clean_amniote_aves()
clean_elton_diet_aves <- function(elton_birds_data) {
elton_clean <- elton_birds_data %>%
mutate(Genus = trimws(Genus), Species = trimws(Species),
scientificNameStd = paste(Genus, Species), GenusSpecies = paste(Genus, Species)) %>%
select(-c(1:7, 10, 36:42)) %>% distinct() %>% filter(!is.na(scientificNameStd))
dup_names <- elton_clean %>% count(scientificNameStd) %>% filter(n > 1) %>% pull(scientificNameStd)
elton_ok <- elton_clean %>% filter(!scientificNameStd %in% dup_names)
dup_resolved <- elton_clean %>% filter(scientificNameStd %in% dup_names) %>%
group_by(scientificNameStd) %>%
group_modify(~ {
match_gs <- .x %>% filter(GenusSpecies == .x$scientificNameStd[1])
if (nrow(match_gs) > 0) return(slice(match_gs, 1))
.x %>% mutate(n_NA = rowSums(is.na(.))) %>% arrange(n_NA) %>% slice(1) %>% select(-n_NA)
}) %>% ungroup()
elton_final <- bind_rows(elton_ok, dup_resolved)
stopifnot(!any(duplicated(elton_final$scientificNameStd)))
return(elton_final)
}
aveDietFinal <- clean_elton_diet_aves(traitdata::elton_birds)
birdTraits <- avonet_final %>%
rename(scientificNameStd_birdlife = scientificNameStd,
scientificNameStd = scientificNameStd_traitdata) %>%
full_join(aveDietFinal, by = "scientificNameStd") %>%
full_join(aveTraitsFinal, by = "scientificNameStd") %>%
mutate(scientificNameStd_traitdata = scientificNameStd,
scientificNameStd = ifelse(!is.na(scientificNameStd_birdlife),
scientificNameStd_birdlife, scientificNameStd_traitdata))
cols_to_keep <- c("scientificNameStd",
colnames(birdTraits)[c(18,47:53,15)],
colnames(birdTraits)[4:14],
colnames(birdTraits)[21:30],
colnames(birdTraits)[35:42],
colnames(birdTraits)[54:82])
birdTraits <- birdTraits[, unique(cols_to_keep)]
if (any(duplicated(birdTraits$scientificNameStd))) {
birdTraits <- birdTraits %>% group_by(scientificNameStd) %>% slice(1) %>% ungroup()
}
write.csv(birdTraits, file = "data/processed/BirdTraitCombined.csv", row.names = FALSE)
} else {
birdTraits <- read.csv("data/processed/BirdTraitCombined.csv")
}