Simple mean imputation: replace NA values with column means (optionally per taxonomic group). Fast fallback before random-forest imputation.
mean_impute <- function(df, by_group = NULL) {
num_cols <- names(df)[sapply(df, is.numeric)]
if (is.null(by_group)) {
for (col in num_cols) {
df[[col]][is.na(df[[col]])] <- mean(df[[col]], na.rm = TRUE)
}
} else {
for (col in num_cols) {
group_means <- tapply(df[[col]], df[[by_group]], mean, na.rm = TRUE)
idx <- is.na(df[[col]])
df[[col]][idx] <- group_means[df[[by_group]][idx]]
# Fallback to global mean if group mean is also NA
df[[col]][is.na(df[[col]])] <- mean(df[[col]], na.rm = TRUE)
}
}
df
}