Standardise species names across datasets by removing duplicates and mismatches.
standard_names <- function(Traits, nrowTraits, source = 163, dim = NA) {
require(taxize)
standNamesTraits <- as.data.frame(
matrix(
NA,
nrow = nrowTraits,
ncol = 1,
dimnames = list(rownames(Traits), "IUCNName")
)
)
batchSize <- 1000
for (i in 1:(ceiling(nrowTraits / batchSize))) {
cat(paste0("\nBatch ", i, " out of ", (ceiling(nrowTraits / batchSize)), "\n"))
rowsSelect <- ((i - 1) * batchSize + 1):min((i * batchSize), nrowTraits)
taxoAux <- as.data.frame(
taxize::gnr_resolve(
names = rownames(standNamesTraits)[rowsSelect],
preferred_data_sources = source, # 163 is IUCN
best_match_only = TRUE,
canonical = TRUE
)
)
standNamesTraits[taxoAux$user_supplied_name, "IUCNName"] <- taxoAux$matched_name2
}
standNamesTraits[, "IUCNName"] <- gsub(
x = standNamesTraits[, "IUCNName"],
pattern = " ",
replacement = "_"
)
naIUCN <- which(is.na(standNamesTraits[, "IUCNName"]))
standNamesTraits[naIUCN, "IUCNName"] <- rownames(standNamesTraits)[naIUCN]
# Some species could not be resolved into IUCN naming. Remove them:
yesName <- which(!is.na(standNamesTraits[, "IUCNName"]))
if (dim == 1) {
return(Traits[yesName])
} else {
return(Traits[yesName, ])
}
}