# ============================================================================= # DAY 2 — Community-Weighted Means and Functional Diversity Indices # Course: Trait-Based Ecology — Theory and Applications in R # ============================================================================= # OBJECTIVE # - Compute CWM from community composition and trait matrices # - Calculate FRic, FEve, FDiv, FDis using {FD} # - Visualise functional space and interpret FD indices # - Assess sensitivity of FD indices to trait selection and sample size # # DATA FILES USED # outputs/day1_sp_traits_clean.rds (from Day 1) # data/03_community_composition.csv # data/02_environment.csv # # PACKAGES REQUIRED # tidyverse, FD, vegan, ggplot2, patchwork, ggforce # ============================================================================= library(tidyverse) library(FD) # dbFD(), functcomp() library(vegan) # diversity(), decostand() library(patchwork) library(ggforce) # geom_mark_ellipse() theme_set( theme_bw(base_size = 12) + theme(panel.grid.minor = element_blank(), strip.background = element_rect(fill = "#E1F5EE"), strip.text = element_text(colour = "#085041", face = "bold")) ) # ── 1. Load data ─────────────────────────────────────────────────────────────── sp_traits <- readRDS("outputs/day1_sp_traits_clean.rds") comm_raw <- read_csv("data/03_community_composition.csv", show_col_types = FALSE) env <- read_csv("data/02_environment.csv", show_col_types = FALSE) # ── 2. Prepare matrices for {FD} ────────────────────────────────────────────── # dbFD() expects: # x = trait matrix (species × traits, row names = species names) # a = abundance matrix (sites × species, row names = site names) # IMPORTANT: species names must match EXACTLY between x and a # 2a. Trait matrix (log-transform skewed traits for Euclidean distance quality) trait_mat <- sp_traits |> select(species, SLA, LDMC, LeafN, Height, SeedMass) |> mutate( SLA = log(SLA), LDMC = log(LDMC), Height = log(Height), SeedMass = log(SeedMass) ) |> column_to_rownames("species") |> as.matrix() # Scale traits to unit variance — mandatory when traits have different units # Without scaling, Height (cm, large values) would dominate the functional space trait_mat_sc <- scale(trait_mat) cat("Trait matrix dimensions:", dim(trait_mat_sc), "\n") # 2b. Community matrix (sites × species) comm_mat <- comm_raw |> column_to_rownames("plot_id") |> as.matrix() # Ensure species order is identical between trait and community matrices shared_sp <- intersect(rownames(trait_mat_sc), colnames(comm_mat)) cat("Species in trait matrix:", nrow(trait_mat_sc), "\n") cat("Species in community matrix:", ncol(comm_mat), "\n") cat("Shared species:", length(shared_sp), "\n") # Subset to shared species trait_mat_sc <- trait_mat_sc[shared_sp, ] comm_mat <- comm_mat[, shared_sp] # Remove plots with zero total cover (can happen after species filtering) comm_mat <- comm_mat[rowSums(comm_mat) > 0, ] # ── 3. Community-Weighted Mean (CWM) ────────────────────────────────────────── # CWM_trait = Σ (relative abundance_i × trait_i) for all species i in the community # Equivalent to the abundance-weighted average trait value # Interpretation: reflects the dominant strategy in the community (mass-ratio hypothesis) # Manual computation (pedagogical) # Note: comm_mat rows already sum to 1 (relative cover), so no need to divide cwm_manual <- comm_mat %*% trait_mat_sc cwm_manual <- as.data.frame(cwm_manual) cwm_manual$plot_id <- rownames(cwm_manual) # Using functcomp() from {FD} — same result, more convenient cwm_fd <- functcomp( x = trait_mat_sc, a = comm_mat, CWM.type = "all" # returns mean for continuous traits ) # Verify manual == package results cat("Max difference between manual and FD CWM (SLA):", max(abs(cwm_manual$SLA - cwm_fd$SLA)), "\n") # should be ≈ 0 # Join CWM with environmental data cwm <- cwm_fd |> rownames_to_column("plot_id") |> left_join(env, by = "plot_id") # ── 4. CWM along the elevational gradient ──────────────────────────────────── # Prediction: CWM_SLA should decrease with elevation (conservative strategy prevails) # Prediction: CWM_LeafN should decrease with elevation (nutrient limitation) p_cwm_sla <- ggplot(cwm, aes(x = elevation, y = SLA)) + geom_point(aes(colour = disturbance), size = 2.5) + geom_smooth(method = "lm", se = TRUE, colour = "#085041") + scale_colour_gradient(low = "#E1F5EE", high = "#D85A30", name = "Disturbance") + labs(x = "Elevation (m)", y = "CWM log(SLA) — scaled", title = "A — CWM(SLA) along elevational gradient") + annotate("text", x = 2600, y = max(cwm$SLA)*0.9, label = paste0("r = ", round(cor(cwm$elevation, cwm$SLA), 2)), size = 4, colour = "#085041") p_cwm_leafn <- ggplot(cwm, aes(x = elevation, y = LeafN)) + geom_point(aes(colour = disturbance), size = 2.5) + geom_smooth(method = "lm", se = TRUE, colour = "#085041") + scale_colour_gradient(low = "#E1F5EE", high = "#D85A30", name = "Disturbance") + labs(x = "Elevation (m)", y = "CWM log(LeafN) — scaled", title = "B — CWM(Leaf N) along elevational gradient") p_cwm_sla + p_cwm_leafn + plot_layout(guides = "collect") & theme(legend.position = "bottom") # ── 5. Computing FD indices with dbFD() ─────────────────────────────────────── # dbFD() computes all standard FD indices from a trait matrix and abundance matrix # Key arguments: # calc.FRic — compute functional richness (convex hull volume)? TRUE/FALSE # calc.FDiv — compute FDiv and FEve? TRUE/FALSE # m — number of PCoA axes to retain (default: all non-negative) # stand.x — standardise trait matrix? (we already did it manually) # w — trait weights (default: equal weights) cat("Computing FD indices — this may take 1–2 minutes...\n") fd_results <- dbFD( x = trait_mat_sc, a = comm_mat, calc.FRic = TRUE, calc.FDiv = TRUE, stand.x = FALSE, # already scaled m = 3 # retain 3 PCoA axes (sufficient for 5 traits) ) # fd_results is a list — extract key indices fd_df <- data.frame( plot_id = rownames(comm_mat), SR = fd_results$nbsp, # species richness FRic = fd_results$FRic, # functional richness (convex hull volume) FEve = fd_results$FEve, # functional evenness FDiv = fd_results$FDiv, # functional divergence FDis = fd_results$FDis, # functional dispersion (=weighted mean dist to centroid) RaoQ = fd_results$RaoQ # Rao's quadratic entropy ) |> left_join(env, by = "plot_id") cat("FD indices computed for", nrow(fd_df), "plots.\n") # Quick summary statistics fd_df |> select(SR, FRic, FEve, FDiv, FDis) |> summary() # ── 6. Relationships between FD indices and elevation ───────────────────────── # Expected: FRic peaks at intermediate elevations (hump-shaped pattern)? # FDis decreases with elevation (filtering increases trait convergence)? p_fric <- ggplot(fd_df, aes(x = elevation, y = FRic)) + geom_point(aes(colour = SR), size = 2.5) + geom_smooth(method = "loess", se = TRUE, colour = "#185FA5") + scale_colour_viridis_c(name = "Species richness") + labs(x = "Elevation (m)", y = "FRic", title = "A — Functional richness along elevation") p_fdis <- ggplot(fd_df, aes(x = elevation, y = FDis)) + geom_point(aes(colour = disturbance), size = 2.5) + geom_smooth(method = "loess", se = TRUE, colour = "#185FA5") + scale_colour_gradient(low = "#E1F5EE", high = "#D85A30", name = "Disturbance") + labs(x = "Elevation (m)", y = "FDis", title = "B — Functional dispersion along elevation") p_feve <- ggplot(fd_df, aes(x = FEve, y = FDiv)) + geom_point(aes(colour = elevation), size = 2.5) + scale_colour_viridis_c(name = "Elevation (m)", option = "C") + labs(x = "Functional evenness (FEve)", y = "Functional divergence (FDiv)", title = "C — FEve vs. FDiv relationship") + annotate("text", x = min(fd_df$FEve, na.rm=TRUE) + 0.05, y = max(fd_df$FDiv, na.rm=TRUE), label = paste0("r = ", round(cor(fd_df$FEve, fd_df$FDiv, use="complete.obs"), 2)), size = 4) (p_fric + p_fdis) / p_feve # ── 7. Visualising functional space ─────────────────────────────────────────── # We use the first 2 PCoA axes from the trait distance matrix # to visualise the functional space occupied by communities # PCoA of trait matrix (Euclidean distance after scaling) trait_dist <- dist(trait_mat_sc, method = "euclidean") pcoa_res <- cmdscale(trait_dist, k = 2, eig = TRUE) # % variance explained by each axis eig_pct <- round(100 * pcoa_res$eig[1:2] / sum(pcoa_res$eig[pcoa_res$eig > 0]), 1) cat(sprintf("PCoA Axis 1: %.1f%% | Axis 2: %.1f%%\n", eig_pct[1], eig_pct[2])) # Species scores in functional space sp_pcoa <- as.data.frame(pcoa_res$points) colnames(sp_pcoa) <- c("PC1", "PC2") sp_pcoa <- sp_pcoa |> rownames_to_column("species") |> left_join(sp_traits |> select(species, PFT_label), by = "species") # Plot: species in functional space, coloured by PFT ggplot(sp_pcoa, aes(x = PC1, y = PC2, colour = PFT_label, label = species)) + ggforce::geom_mark_ellipse( aes(fill = PFT_label, colour = PFT_label), alpha = 0.12, linewidth = 0.6 ) + geom_point(size = 3.5) + ggrepel::geom_text_repel(size = 2.6, max.overlaps = 12, segment.colour = "grey70") + scale_colour_brewer(palette = "Set2", name = "PFT", aesthetics = c("colour", "fill")) + labs( x = paste0("PCoA 1 (", eig_pct[1], "% variance)"), y = paste0("PCoA 2 (", eig_pct[2], "% variance)"), title = "Functional space — 30 alpine grassland species", subtitle = "Ellipses = 95% confidence regions per PFT" ) # ── 8. Sensitivity analysis — effect of trait number ───────────────────────── # Key concern: does FRic change substantially when we add/remove traits? # This is a major source of methodological uncertainty (Mouillot et al. 2021) trait_combos <- list( "2 traits (SLA + LDMC)" = c("SLA", "LDMC"), "3 traits (+ LeafN)" = c("SLA", "LDMC", "LeafN"), "4 traits (+ Height)" = c("SLA", "LDMC", "LeafN", "Height"), "5 traits (+ SeedMass)" = c("SLA", "LDMC", "LeafN", "Height", "SeedMass") ) fdric_sensitivity <- map_dfr(names(trait_combos), function(combo_name) { selected <- trait_combos[[combo_name]] mat_sub <- trait_mat_sc[, selected, drop = FALSE] fd_sub <- suppressMessages( dbFD(x = mat_sub, a = comm_mat, calc.FRic = TRUE, calc.FDiv = FALSE, stand.x = FALSE, m = min(2, ncol(mat_sub))) ) data.frame( combo = combo_name, plot_id = names(fd_sub$FRic), FRic = fd_sub$FRic ) }) # Visualise variance in FRic across trait combinations ggplot(fdric_sensitivity, aes(x = combo, y = FRic, fill = combo)) + geom_violin(alpha = 0.6, colour = NA) + geom_boxplot(width = 0.15, outlier.shape = 21, colour = "grey30") + scale_fill_brewer(palette = "Set1", guide = "none") + labs( x = NULL, y = "Functional richness (FRic)", title = "Sensitivity of FRic to number of traits included", caption = "Each violin = distribution of FRic across 40 plots." ) + theme(axis.text.x = element_text(angle = 20, hjust = 1)) # CRITICAL INTERPRETATION: FRic values are not comparable across different # numbers of traits. Always use the same trait set across all communities. # ── 9. Species richness vs. functional diversity ────────────────────────────── # A key question: is FD simply a proxy for SR, or does it carry independent info? ggplot(fd_df, aes(x = SR, y = FDis)) + geom_point(aes(colour = elevation), size = 2.5, alpha = 0.8) + geom_smooth(method = "lm", se = TRUE, colour = "#185FA5", linewidth = 1) + scale_colour_viridis_c(name = "Elevation (m)") + labs( x = "Species richness (SR)", y = "Functional dispersion (FDis)", title = "SR vs. FDis — are they independent?", caption = paste0("r = ", round(cor(fd_df$SR, fd_df$FDis, use = "complete.obs"), 2)) ) # If r is high (> 0.7), consider using the FD residuals from SR # (i.e., functional diversity unexplained by species richness) # ── 10. Save outputs ───────────────────────────────────────────────────────── saveRDS(fd_df, "outputs/day2_fd_indices.rds") saveRDS(cwm, "outputs/day2_cwm.rds") saveRDS(sp_pcoa, "outputs/day2_pcoa_scores.rds") cat("Saved: outputs/day2_*.rds\n") # ============================================================================= # EXERCISE — Students complete the following # ============================================================================= # Q1. Compute CWM for plant height. Plot CWM(Height) as a function of # disturbance index. Interpret: which PFT drives the pattern? # # Q2. Compute FRic using ONLY the two LES axes (SLA and LDMC). # Compare communities with high vs. low FEve. What does low FEve # indicate about species packing in functional space? # # Q3. FRic is sensitive to the number of species in the community. # Compute the Pearson correlation between SR and FRic across plots. # Propose a method to separate the effect of SR from FRic. # (Hint: look at ?dbFD argument 'calc.FRic' and Villeger et al. 2008) # # Q4. Identify the three plots with the highest FDis and the three with the # lowest FDis. What are their elevations and disturbance levels? # Formulate a biological hypothesis to explain this pattern. # =============================================================================