standard_names(Traits, nrowTraits, source=163, dim=NA)

Standardise species names across datasets by removing duplicates and mismatches.

utilitydata
Args:Traits — trait data framenrowTraits — expected row countsource=163 — source IDdim=NA — trait dimensions
standard_names <- function(Traits, nrowTraits, source = 163, dim = NA) {
  require(taxize)
  standNamesTraits <- as.data.frame(
    matrix(
      NA,
      nrow = nrowTraits,
      ncol = 1,
      dimnames = list(rownames(Traits), "IUCNName")
    )
  )
  batchSize <- 1000

  for (i in 1:(ceiling(nrowTraits / batchSize))) {
    cat(paste0("\nBatch ", i, " out of ", (ceiling(nrowTraits / batchSize)), "\n"))
    rowsSelect <- ((i - 1) * batchSize + 1):min((i * batchSize), nrowTraits)
    taxoAux <- as.data.frame(
      taxize::gnr_resolve(
        names = rownames(standNamesTraits)[rowsSelect],
        preferred_data_sources = source, # 163 is IUCN
        best_match_only = TRUE,
        canonical = TRUE
      )
    )
    standNamesTraits[taxoAux$user_supplied_name, "IUCNName"] <- taxoAux$matched_name2
  }

  standNamesTraits[, "IUCNName"] <- gsub(
    x = standNamesTraits[, "IUCNName"],
    pattern = " ",
    replacement = "_"
  )
  naIUCN <- which(is.na(standNamesTraits[, "IUCNName"]))
  standNamesTraits[naIUCN, "IUCNName"] <- rownames(standNamesTraits)[naIUCN]

  # Some species could not be resolved into IUCN naming. Remove them:
  yesName <- which(!is.na(standNamesTraits[, "IUCNName"]))
  if (dim == 1) {
    return(Traits[yesName])
  } else {
    return(Traits[yesName, ])
  }
}