# ============================================================================= # DAY 4 — Fish Morphological Traits & Functional Diversity # Course: Trait-Based Approach in Community Ecology — Belém, Brazil # Dataset: 12 Neotropical fish species (raw landmarks) + 12 sites × 30 species # ============================================================================= # OBJECTIVES # - Derive the 10 standardised morphological traits (Villéger et al. 2017) # from raw landmark/anatomical measurements # - Aggregate individual measurements to species means and inspect ITV # - Compute functional diversity indices (FRic, FEve, FDiv, FDis) on # Amazonian fish communities # - Relate FD to environmental degradation along the gradient # - Visualise the Amazonian fish functional space and trophic guilds # # DATA FILES USED # data/fish_landmarks_day4.csv — raw measurements per individual # (12 spp × 5 ind. = 60 fish) # data/fish_traits_sp_means.csv — pre-computed species means (30 spp) # data/fish_communities_amazon.csv — sites × species matrix # data/fish_site_env.csv — site environmental metadata # data/fish_species_metadata.csv — common name, family, trophic guild # # PACKAGES # tidyverse, FD, vegan, ggrepel, patchwork, corrplot, irr # # KEY REFERENCES # Villéger et al. (2017) Ecography 40: 1–7 — fish trait protocol # Brosse et al. (2021) Sci. Data 8: 254 — FishMorph # Toussaint et al. (2016) Sci. Reports 6: 22125 — Neotropical FD # ============================================================================= library(tidyverse) library(FD) # dbFD() library(vegan) # rda(), procrustes() library(ggrepel) library(patchwork) library(corrplot) theme_set( theme_bw(base_size = 12) + theme(panel.grid.minor = element_blank(), strip.background = element_rect(fill = "#E0F0F5"), strip.text = element_text(colour = "#0A4A5A", face = "bold")) ) if (!dir.exists("outputs")) dir.create("outputs") # ============================================================================= # 1. LOAD RAW LANDMARK MEASUREMENTS # ============================================================================= # Output of the ImageJ practical: one row per individual, columns are # anatomical distances in millimetres. raw <- read_csv("data/fish_landmarks_day4.csv", show_col_types = FALSE) glimpse(raw) # 60 specimens × measurements (TL, BD, BW, ED, MH, MW, HMd, PH, PA, CFL, CFA, # CPL, PFP, BD_CP) # Quick QA — visualise the distribution of body lengths per species p_qa <- ggplot(raw, aes(species, TL_mm)) + geom_boxplot(fill = "#A8D5BA", outlier.colour = "#B23A48") + geom_jitter(width = 0.15, alpha = 0.6, size = 1.6) + coord_flip() + labs(title = "Body size (TL) variation across measured Amazonian species", y = "Total length (mm)", x = NULL) ggsave("outputs/day4_qa_body_size.png", p_qa, width = 8, height = 5, dpi = 300) # ============================================================================= # 2. COMPUTE THE 10 MORPHOLOGICAL TRAITS (VILLÉGER ET AL. 2017) # ============================================================================= # All traits are dimensionless ratios — comparable across species of very # different body sizes (a 3 cm Astyanax to a 3 m Arapaima). fish_traits_ind <- raw |> mutate( # ── Locomotion (6 traits) ───────────────────────────────────────── Body_size = log10(TL_mm), # log body size Body_elong = TL_mm / BD_mm, # elongation Body_comp = BD_mm / BW_mm, # lateral compression Pec_AR = PH_mm^2 / PA_mm2, # pectoral fin aspect ratio Pec_pos = PFP_mm / BD_mm, # pectoral fin position CP_throttle = (BD_CP_mm)^2 / CPL_mm^2, # caudal peduncle throttling Caud_AR = CFL_mm^2 / CFA_mm2, # caudal fin aspect ratio # ── Trophic / foraging (4 traits) ───────────────────────────────── Mouth_pos = HMd_mm / BD_mm, # mouth position (0=ventral,1=dorsal) Oral_gape = pi * (MH_mm * MW_mm) / 4 / BD_mm^2, # oral gape surface Eye_size = ED_mm^2 / BD_mm^2 # relative eye size ) # ============================================================================= # 3. INTRA-SPECIFIC VARIATION OF TRAITS (PRELUDE TO DAY 5) # ============================================================================= # CV per species per trait — quick look at within-species variability. trait_cols <- c("Body_size","Body_elong","Body_comp","Pec_AR","Pec_pos", "CP_throttle","Caud_AR","Mouth_pos","Oral_gape","Eye_size") cv_by_sp <- fish_traits_ind |> pivot_longer(all_of(trait_cols), names_to = "trait", values_to = "value") |> group_by(species, trait) |> summarise(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE), # Guard against division by zero / Inf when mean is 0 or near 0 cv = ifelse(abs(mean) < 1e-10, NA_real_, sd / abs(mean)), .groups = "drop") p_cv <- ggplot(cv_by_sp, aes(trait, cv, fill = trait)) + geom_boxplot(alpha = 0.85, show.legend = FALSE) + scale_fill_viridis_d(option = "viridis") + labs(title = "Within-species coefficient of variation across the 10 traits", x = NULL, y = "CV") + theme(axis.text.x = element_text(angle = 30, hjust = 1)) ggsave("outputs/day4_within_species_CV.png", p_cv, width = 8, height = 4.5, dpi = 300) # Aggregate to species means (for FD computation downstream) fish_sp_means_practical <- fish_traits_ind |> group_by(species) |> summarise(across(all_of(trait_cols), mean, na.rm = TRUE), .groups = "drop") write_csv(fish_sp_means_practical, "outputs/day4_practical_fish_sp_means.csv") # ============================================================================= # 4. WORK WITH THE FULL 30-SPECIES TRAIT MATRIX # ============================================================================= # For functional diversity at the community scale we need ALL species in the # Amazonian dataset. Use the pre-compiled species means (see Day 1/3). fish_traits_sp <- read_csv("data/fish_traits_sp_means.csv", show_col_types = FALSE) |> column_to_rownames("species") fish_comm <- read_csv("data/fish_communities_amazon.csv", show_col_types = FALSE) |> column_to_rownames("site_id") site_env <- read_csv("data/fish_site_env.csv", show_col_types = FALSE) sp_meta <- read_csv("data/fish_species_metadata.csv", show_col_types = FALSE) # Align species columns of fish_comm with rows of trait matrix stopifnot(setequal(colnames(fish_comm), rownames(fish_traits_sp))) fish_traits_sp <- fish_traits_sp[colnames(fish_comm), , drop = FALSE] # Trait correlation matrix — useful before FD computation # corrplot draws on the current graphics device. To save the figure, open # a png() / pdf() device first then call corrplot. png("outputs/day4_trait_correlation.png", width = 1500, height = 1500, res = 200) corrplot::corrplot( cor(as.matrix(fish_traits_sp), use = "pairwise.complete.obs"), method = "color", type = "upper", addCoef.col = "black", number.cex = 0.7, tl.col = "#0A4A5A", tl.srt = 35, col = colorRampPalette(c("#2C7BB6","white","#D7191C"))(200)) dev.off() # Scale traits (different units). # Guard: drop / warn if any trait has zero variance (would produce NaN columns). trait_mat_raw <- as.matrix(fish_traits_sp) trait_sd <- apply(trait_mat_raw, 2, sd, na.rm = TRUE) if (any(trait_sd == 0 | is.na(trait_sd))) { warning("Traits with zero or NA variance dropped before scaling: ", paste(names(trait_sd)[trait_sd == 0 | is.na(trait_sd)], collapse = ", ")) trait_mat_raw <- trait_mat_raw[, trait_sd > 0 & !is.na(trait_sd), drop = FALSE] } trait_mat_sc <- scale(trait_mat_raw) # ============================================================================= # 5. COMPUTE FUNCTIONAL DIVERSITY INDICES — dbFD() # ============================================================================= # Adaptive m: number of PCoA axes can never exceed (n_species - 1). # Wrapped in tryCatch in case any community has too few species for FRic. m_use <- min(4, ncol(trait_mat_sc), nrow(trait_mat_sc) - 1) fd_fish <- tryCatch( dbFD(x = trait_mat_sc, a = as.matrix(fish_comm), calc.FRic = TRUE, calc.FDiv = TRUE, calc.CWM = TRUE, CWM.type = "dom", stand.x = FALSE, m = m_use, print.pco = FALSE, messages = FALSE), error = function(e) { message("dbFD() failed with m = ", m_use, " — retrying without FRic.") dbFD(x = trait_mat_sc, a = as.matrix(fish_comm), calc.FRic = FALSE, calc.FDiv = FALSE, calc.CWM = TRUE, CWM.type = "dom", stand.x = FALSE, print.pco = FALSE, messages = FALSE) } ) # Provide NA columns if calc.FRic / calc.FDiv didn't run if (is.null(fd_fish$FRic)) fd_fish$FRic <- rep(NA_real_, nrow(fish_comm)) if (is.null(fd_fish$FDiv)) fd_fish$FDiv <- rep(NA_real_, nrow(fish_comm)) fd_df <- tibble(site_id = rownames(fish_comm), SR = fd_fish$nbsp, FRic = fd_fish$FRic, FEve = fd_fish$FEve, FDiv = fd_fish$FDiv, FDis = fd_fish$FDis, RaoQ = fd_fish$RaoQ) |> left_join(site_env, by = "site_id") write_csv(fd_df, "outputs/day4_fd_per_site.csv") # CWM matrix (sites × traits) — left_join may drop duplicate names if a trait # accidentally shares a name with an env variable; we use suffix() to be safe. cwm_df <- as.data.frame(fd_fish$CWM) |> rownames_to_column("site_id") |> left_join(site_env, by = "site_id", suffix = c("_cwm", "_env")) write_csv(cwm_df, "outputs/day4_cwm_per_site.csv") # ============================================================================= # 6. FD vs. ENVIRONMENTAL DEGRADATION # ============================================================================= p_fric <- ggplot(fd_df, aes(degradation, FRic)) + geom_smooth(method = "lm", colour = "#0A4A5A", fill = "#A8D5BA") + geom_point(aes(colour = river), size = 4) + geom_text_repel(aes(label = site_id), size = 3) + scale_colour_brewer(palette = "Set2") + labs(title = "Functional richness along the Amazonian degradation gradient", x = "Degradation index (composite 0–10)", y = "FRic") p_fdis <- ggplot(fd_df, aes(degradation, FDis)) + geom_smooth(method = "lm", colour = "#0A4A5A", fill = "#A8D5BA") + geom_point(aes(colour = river), size = 4) + geom_text_repel(aes(label = site_id), size = 3) + scale_colour_brewer(palette = "Set2") + labs(title = "Functional dispersion along the Amazonian degradation gradient", x = "Degradation index (composite 0–10)", y = "FDis") p_fd <- p_fric + p_fdis + plot_layout(guides = "collect") ggsave("outputs/day4_FD_vs_degradation.png", p_fd, width = 12, height = 5, dpi = 300) # ============================================================================= # 7. FUNCTIONAL SPACE — PCoA + species labels coloured by trophic guild # ============================================================================= sp_dist <- dist(trait_mat_sc) pcoa <- ape::pcoa(sp_dist) # Guard: pcoa() may return < 2 valid axes if data is degenerate n_axes_avail <- ncol(pcoa$vectors) n_axes_use <- min(2, n_axes_avail) pcoa_df <- as.data.frame(pcoa$vectors[, seq_len(n_axes_use), drop = FALSE]) |> rownames_to_column("species") # Always rename to PC1 (and PC2 if available) pc_names <- paste0("PC", seq_len(n_axes_use)) colnames(pcoa_df)[2:(1 + n_axes_use)] <- pc_names if (n_axes_use < 2) pcoa_df$PC2 <- 0 # so plot still works pcoa_df <- left_join(pcoa_df, sp_meta, by = "species") p_space <- ggplot(pcoa_df, aes(PC1, PC2, colour = trophic_guild)) + geom_hline(yintercept = 0, lty = 2, colour = "grey60") + geom_vline(xintercept = 0, lty = 2, colour = "grey60") + geom_point(size = 3.5, alpha = 0.9) + geom_text_repel(aes(label = species), size = 2.6, max.overlaps = 30) + scale_colour_brewer(palette = "Dark2") + labs(title = "Amazonian fish functional space (PCoA on 10 morphological traits)", subtitle = "Coloured by trophic guild — see Villéger 2017 trait set", colour = "Trophic guild") ggsave("outputs/day4_functional_space.png", p_space, width = 10, height = 6.5, dpi = 300) # ============================================================================= # 8. CWM ALONG ENVIRONMENTAL GRADIENTS # ============================================================================= cwm_long <- cwm_df |> pivot_longer(any_of(colnames(fish_traits_sp)), names_to = "trait", values_to = "CWM_value") p_cwm <- ggplot(cwm_long, aes(degradation, CWM_value)) + geom_smooth(method = "lm", colour = "#0A4A5A", fill = "#A8D5BA") + geom_point(colour = "#B23A48", alpha = 0.7) + facet_wrap(~ trait, scales = "free_y", ncol = 5) + labs(title = "CWM of morphological traits along the degradation gradient", x = "Degradation index", y = "CWM") ggsave("outputs/day4_CWM_gradient.png", p_cwm, width = 14, height = 6, dpi = 300) # ============================================================================= # 9. SAVE OBJECTS FOR THE EXERCISES # ============================================================================= saveRDS(list(fish_traits_ind = fish_traits_ind, fish_traits_sp = fish_traits_sp, fish_comm = fish_comm, trait_mat_sc = trait_mat_sc, fd_df = fd_df, cwm_df = cwm_df, pcoa = pcoa, sp_meta = sp_meta), "outputs/day4_objects.rds") cat("\n=== DAY 4 — outputs written to outputs/ ===\n") cat("Number of sites:", nrow(fish_comm), " Number of species:", ncol(fish_comm), "\n") cat("Mean species richness per site:", round(mean(fd_df$SR), 1), " Mean FRic:", round(mean(fd_df$FRic, na.rm=TRUE), 3), "\n") # ============================================================================= # EXERCISES — see exercises_day4.R # Q1 Measurement quality control — TEM, ICC, landmark error # Q2 Trait–ecology relationships — Mouth_pos × Body_elong, correlation matrix, PCoA # Q3 FD across sites — degradation gradient, FRic vs FDis sensitivity # Q4 From morphology to function — CWM(Oral_gape) vs invertebrates, fish FUSE # =============================================================================