Report trait completeness for each column: N available, N missing, and percentage complete. Returns a tidy data frame sorted by completeness.
check_completeness <- function(df, cols = NULL) {
if (is.null(cols)) cols <- names(df)[sapply(df, is.numeric)]
out <- do.call(rbind, lapply(cols, function(col) {
x <- df[[col]]
n <- length(x)
ok <- sum(!is.na(x))
data.frame(trait = col, n_total = n, n_available = ok,
n_missing = n - ok,
pct_complete = round(100 * ok / n, 1),
stringsAsFactors = FALSE)
}))
out[order(out$pct_complete), ]
}
# Example
check_completeness(iris)