01_DATA_load_and_clean — Step 5

Load 5 BirdLife BOTW shapefiles, reconcile taxonomy with the trait dataset via ITIS synonymy lookup, rasterize at 0.5° resolution, then assign species to DGGS hexagonal grid at resolution 7.

spatial taxonomy
Inputs: data/raw/BOTW_1-5.shp data/raw/geogInfo_dggs7.RDS
Outputs: data/raw/BOTW_1-5_New.shp data/raw/uniquedggs7.RDS data/raw/sitesdggs7.RDS
geog7 <- readRDS("data/raw/geogInfo_dggs7.RDS")
dggs7 <- dggridR::dgconstruct(res = 7, metric = FALSE, resround = "down", topology = "HEXAGON")
sitesInRealms7 <- unique(geog7$cell[!is.na(geog7$Realm)])

botw_files   <- file.path("data/raw", sprintf("BOTW_%d.shp", 1:5))
missing_files <- botw_files[!file.exists(botw_files)]
if (length(missing_files) > 0L)
  stop("Missing BOTW files: ", paste(basename(missing_files), collapse = ", "))

sp_chunks <- lapply(botw_files, function(f) sf::st_read(dsn = f, quiet = TRUE))
sp <- do.call(rbind, sp_chunks); rm(sp_chunks); gc(verbose = FALSE)

sp_site_old <- unique(sp$SCINAME)
sp_site     <- unique(sp$SCINAME)
spTrait     <- phenoBird$scientificNameStd
spToCheck   <- sp_site[!sp_site %in% gsub("_", " ", spTrait)]

if (length(spToCheck) > 0L) {
  chk <- taxize::synonyms(sci_id = spToCheck, db = "itis", row = 1)
  progressr::with_progress({
    p <- progressr::progressor(steps = length(chk))
    for (i in seq_along(chk)) {
      p(sprintf("ITIS lookup %d/%d", i, length(chk)))
      entry <- chk[[i]]
      if (is.null(dim(entry)) || dim(entry)[1] == 0L) next
      if ("acc_name" %in% colnames(entry)) {
        sp_site[sp_site == names(chk)[i]] <- entry[["acc_name"]][1]
      } else if ("syn_name" %in% colnames(entry)) {
        syn_underscore <- gsub(" ", "_", entry[["syn_name"]])
        match_idx <- which(syn_underscore %in% spTrait)
        if (length(match_idx) > 0L)
          sp_site[sp_site == names(chk)[i]] <- syn_underscore[match_idx[1]]
      }
    }
  })
}
trinomial_idx <- grepl("[A-Za-z]+\\s+[A-Za-z]+\\s+[A-Za-z]", sp_site)
sp_site[trinomial_idx] <- sp_site_old[trinomial_idx]

# Write reconciled shapefiles (chunks of 4000)
names_spatial <- data.frame(old = sp_site_old, new = sp_site, stringsAsFactors = FALSE)
sp1 <- merge(sp, names_spatial, by.x = "SCINAME", by.y = "old", all.x = TRUE)
chunk_size <- 4000L; n_chunks <- ceiling(nrow(sp1) / chunk_size)
for (j in seq_len(n_chunks)) {
  rows <- ((j-1L)*chunk_size+1L):min(j*chunk_size, nrow(sp1))
  sf::st_write(sp1[rows, ], file.path("data/raw", sprintf("BOTW_%d_New.shp", j)),
               quiet = TRUE, append = FALSE)
}

# Rasterize and assign to hexagons
pathSave <- "data/raw/SpeciesData"
dir.create(pathSave, showWarnings = FALSE, recursive = TRUE)
for (j in seq_len(n_chunks))
  rasterSp::rasterizeRange(dsn = file.path("data/raw", sprintf("BOTW_%d_New.shp", j)),
                           id = "new", resolution = 0.5, origin = 1,
                           presence = c(1, 2, 3), save = TRUE, path = pathSave)

fileList           <- list.files(path = pathSave, pattern = "*.tif")
coordsSpecies      <- est_coord_sp("data/raw", fileList,
                                   pathSave = "data/raw/coordinates_Resol05.RDS")
coordsSpeciesOrdered <- assign_hexagons(coordsSpecies, dggs7, sitesInRealms7,
                                        savePath = "data/raw/coordinates_Resol05_dggs7.RDS")
valid_sites <- coordsSpeciesOrdered[!(unlist(lapply(coordsSpeciesOrdered, ncol)) != 3)]
uniquedggs7 <- reorder_hexagon(valid_sites, savePath = "data/raw/uniquedggs7.RDS")
sitesdggs7  <- siteswithspecies(valid_sites, uniquedggs7, savePath = "data/raw/sitesdggs7.RDS")