01_DATA_load_and_clean — Step 3

Assemble the final PhenoBird matrix: remove problematic species, filter DD/EX/EW categories, compute trait completeness statistics, and save the ready-to-analyse dataset.

data wrangling IUCN filter
Inputs: data/processed/phenoBirds.csv data/processed/phenoBirdsImputedAll.csv
Outputs: data/processed/phenoBirdsImputedREADY.csv data/processed/Shortnames_Birds.csv data/processed/species_table.rds
SpToRem <- c("Struthio_molybdophanes","Neochmia_phaeton","Rhea_americana",
             "Stagonopleura_guttata","Carpococcyx_renauldi","Laterallus_rogersi",
             "Menura_novaehollandiae","Rallicula_leucospila","Rhea_tarapacensis")

birdTraitsPhy <- read.csv("data/processed/phenoBirds.csv")
birdTraitsPhy <- birdTraitsPhy[!birdTraitsPhy$X %in% SpToRem, ]
phenoBird     <- read.csv("data/processed/phenoBirdsImputedAll.csv")
colnames(phenoBird)[1] <- "scientificNameStd"
rownames(phenoBird)    <- phenoBird[, 1]
phenoBird <- phenoBird[!rownames(phenoBird) %in% SpToRem, ]

morphoTrait <- c("Tarsus.Length","Wing.Length","Kipps.Distance","Secondary1",
                 "Hand.Wing.Index","Tail.Length","Mass","adult_svl_cm")
LHTTrait    <- c("litter_or_clutch_size_n","egg_mass_g","incubation_d",
                 "longevity_y","fledging_age_d","litters_or_clutches_per_y")
DietTrait   <- c("Diet.Inv","Diet.Vend","Diet.Vect","Diet.Vfish","Diet.Vunk",
                 "Diet.Scav","Diet.Fruit","Diet.Nect","Diet.Seed","Diet.PlantO")

shortNames <- cbind(
  original     = c(morphoTrait, LHTTrait, DietTrait),
  short        = c("trl","wl","kd","s1","hwl","tll","bmA","svl",
                   "ls","em","inc","lg","fled","ly",
                   "DI","DVd","DVt","DVf","DVk","DSv","DF","DN","DSd","DP"),
  color        = c(rep("#2E7D32",8), rep("#1565C0",6), rep("#C62828",10)),
  completeness = rep(NA, length(c(morphoTrait, LHTTrait, DietTrait))),
  type         = c(rep("Morphological (M)",8), rep("Life-history (L)",6), rep("Diet (D)",10))
)
for (i in 1:nrow(shortNames)) {
  cplet <- birdTraitsPhy[, shortNames[i,1]]
  isna  <- length(which(!is.na(cplet)))
  shortNames[i,'completeness'] <- paste0(isna,"/",length(cplet)," (",round(isna/length(cplet)*100,2),"%)")
}
write.csv(shortNames, file = "data/processed/Shortnames_Birds.csv")

phenoDiet <- na.omit(as.data.frame(prep.fuzzy(phenoBird[, DietTrait],
                                              col.blocks = ncol(phenoBird[, DietTrait]),
                                              label = "diet")))
phenoDiet <- replace(phenoDiet, phenoDiet < 0, 0)
phenoBird <- phenoBird[rownames(phenoDiet), ]

# Finalize IUCN column + filter
phenoBird$category <- phenoBird$iucn_category
unresolved <- unique(phenoBird$scientificNameStd[is.na(phenoBird$category)])
if (length(unresolved) > 0L)
  write.csv(data.frame(species = unresolved), "data/processed/iucn_unresolved.csv", row.names = FALSE)

phenoBird <- phenoBird[!phenoBird$category %in% c("DD","EW","EX","RE"), ]
phenoBird <- phenoBird[!is.na(phenoBird$category), ]

write.csv(phenoBird, "data/processed/phenoBirdsImputedREADY.csv", row.names = FALSE)
saveRDS(unique(phenoBird$scientificNameStd), "data/processed/species_table.rds")