How to merge tables and how to split tables.
Be very careful when merging data tables by combining them by columns: the order of the rows must be the same in both tables! It is a good practice to try to make some graphs to visualise and check manually some values to see that everything matches.
When reading files, be sure that these files are present in the working directory
# Cleaning the memory before starting
rm(list=ls())
## Reading files we saved last time:
xy = read.table("coordinates.txt") # default settings
soil.data = read.table("soil.data.csv", sep = ";", header = T) # defining delimiting character (here ";") and that we have column names (header=T)
## As we did last time, let's split soil data into two using the function subset
high.ph.soils = subset(soil.data, pH.KCl > median(pH.KCl))
low.ph.soils = subset(soil.data, pH.KCl <= median(pH.KCl))
combine1 = cbind(xy, soil.data) # putting together columns
head(combine1)
## lon lat Proovi.nimi pH.KCl N.. P.mg.kg K.mg.kg Proovi.nimi
## 1 26.45699 58.24387 Vapramae 3.72 0.4246261 31.57133 79.04454 Vapramae
## 2 26.43730 58.19846 Illi 3.65 0.1364021 22.26459 42.73691 Illi
## 3 26.41474 58.15658 Vitipalu 4.25 0.2500752 58.04788 60.71264 Vitipalu
## 4 26.27875 58.21060 Konguta 4.40 2.3192906 63.00445 131.18302 Konguta
## 5 26.15339 58.22432 Vehendi 2.62 0.3698321 25.62339 131.56542 Vehendi
## 6 26.32119 58.28426 Erumae 3.28 0.2426881 16.21594 66.16427 Erumae
combine2 = rbind(high.ph.soils, low.ph.soils) # putting together rows
dim(combine2)
## [1] 30 5
combine3 = cbind(xy, combine2) # trying again, but ....
plot(combine1$lon, combine1$pH.KCl)
points(combine3$lon, combine3$pH.KCl, pch = "x", col = "green") # adding points, symbol "x"
head(combine3) # note that site names from xy and new soil data do not match!!
## lon lat Proovi.nimi pH.KCl N.. P.mg.kg K.mg.kg
## 1 26.45699 58.24387 Vapramae 3.72 0.4246261 31.571334 79.04454
## 2 26.43730 58.19846 Illi 3.65 0.1364021 22.264589 42.73691
## 3 26.41474 58.15658 Vitipalu 4.25 0.2500752 58.047875 60.71264
## 4 26.27875 58.21060 Konguta 4.40 2.3192906 63.004453 131.18302
## 5 26.15339 58.22432 Vehendi 5.77 0.2557405 8.214731 99.41711
## 6 26.32119 58.28426 Erumae 4.01 0.2749614 13.283927 64.78593
## Proovi.nimi
## 1 Vapramae
## 2 Illi
## 3 Vitipalu
## 4 Konguta
## 5 Porgumae
## 6 Aardla
# The 'merge' function can help in merging two datasets that share a common column with unique identifiers, like the "Proovi.nimi" one in our example:
combine3 = merge(xy, combine2, by = "Proovi.nimi") # merging by common column
head(combine3)
## Proovi.nimi lon lat pH.KCl N.. P.mg.kg K.mg.kg
## 1 Aardla 26.74540 58.32910 4.01 0.2749614 13.283927 64.78593
## 2 Aravu 27.42453 58.25486 2.75 0.4787677 24.440573 107.38674
## 3 Erumae 26.32119 58.28426 3.28 0.2426881 16.215942 66.16427
## 4 Haavametsa 27.36984 58.26345 2.69 0.1794443 8.860196 65.97427
## 5 Ignase 26.81712 58.26367 3.40 0.2200201 37.057869 79.17404
## 6 Illi 26.43730 58.19846 3.65 0.1364021 22.264589 42.73691
points(combine3$lon, combine3$pH.KCl, pch = "x", col = "red") # now OK!
# an alternative with the function 'order':
combine4 = cbind(xy[order(xy$Proovi.nimi), ], combine2[order(combine2$Proovi.nimi), ])
points(combine4$lon, combine4$pH.KCl, pch = "+", col = "blue")
Community data is usually a matrix of taxa x sites. In R it is tradition to have rows as sites (samples) and columns as species (taxa). In the field often the opposite direction is used in books, so be careful with this when transcribing your data into R. Here we read a table of forest floor vascular plants from 10x10 m plots.
vas.plants = read.table("vascular.plants.txt")
dim(vas.plants)
## [1] 79 30
vas.plants[1:5, 1:5] # taxa as rows, sites as columns
## X001Vapramae X002Illi X003Vitipalu X004Konguta X005Vehendi
## ACTAspic 0 0 0 0 0
## AEGOpoda 0 0 1 0 0
## ANEMnemo 0 0 1 0 0
## ASAReuro 1 0 0 0 0
## ATHYfil.fe 0 0 0 0 0
vas.plants = t(vas.plants) # transposing the matrix (replacing rows and columns)
vas.plants = as.data.frame(vas.plants) # transposing loose data.frame sturcture, putting back
vas.plants[1:5, 1:5]
## ACTAspic AEGOpoda ANEMnemo ASAReuro ATHYfil.fe
## X001Vapramae 0 0 0 1 0
## X002Illi 0 0 0 0 0
## X003Vitipalu 0 1 1 0 0
## X004Konguta 0 0 0 0 0
## X005Vehendi 0 0 0 0 0
image(t(vas.plants),
axes = F,
ylab = "Sites",
xlab = "Taxa") ## Quick visualisation of a table
apply(vas.plants, 1, FUN = max) # a function can be applied to each row (1) or column (2). Here we find max of each row.
## X001Vapramae X002Illi X003Vitipalu X004Konguta X005Vehendi
## 3 3 3 5 5
## X006.Erumae X007Porgumae X008Aardla X009Kurepalu X010Sarakuste
## 3 2 3 3 3
## X011Kannu X012Vonnu X013Rookse X014Kammeri X015Kambja
## 3 3 4 3 3
## X016Reola X017Ignase X018Unikula X019Haavametsa X020Kruusahaua
## 3 5 5 3 3
## X021Aravu X026Pedajamae X027Miti X028Puhaste X031Pimmelaan
## 5 4 3 4 4
## X032Logina X033Voorepalu X036Taevaskoja X044Savimae X073Pausakunnu
## 2 4 3 2 2
Use function
applyand calculate max values for each taxon in the objectvas.plants.
apply(vas.plants, 2, FUN = max)
## ACTAspic AEGOpoda ANEMnemo ASAReuro ATHYfil.fe BRACpinn CALAarun
## 1.0 3.0 2.0 1.0 1.0 1.0 3.0
## CALLvulg CAREdigi CIRCalpi CONVmaja DESCflex DRYOcart DRYOexpa
## 3.0 2.0 2.0 3.0 1.0 3.0 1.0
## EPILangu FESTovin FRAGvesc GALElute GERAsylv GYMNdryo HEPAnobi
## 1.0 1.0 2.0 3.0 1.0 1.0 3.0
## IMPAnoli LATHvern LUZUpilo LYCOanno MAIAbifo MELAprat MELInuta
## 0.5 1.0 2.0 5.0 3.0 4.0 1.0
## MILIeffu MOLIcaer MYCEmura ORTHsecu OXALacet PARIquad PTERaqui
## 3.0 1.0 1.0 2.0 5.0 1.0 2.0
## RANUcass RUBUsaxa STELholo STELnemo TRIEeuro VACCmyrt VACCvit.id
## 1.0 3.0 2.0 1.0 2.0 5.0 3.0
## VEROcham VIOLmira ANGEsylv ANTHsylv CAMPpers CAREcane CAREglob
## 1.0 1.0 1.0 1.0 0.5 1.0 1.0
## CHIMumbe CREPpalu DESCcaes DRYOfili EQUIprat EQUIsylv GALEOPsp
## 1.0 2.0 1.0 3.0 1.0 2.0 1.0
## GALIalbu. GALIpalu GOODrepe HIERvulg IMPAparv LYSIvulg MELAnemo
## 1.0 1.0 1.0 2.0 3.0 1.0 1.0
## MELAsylv MOERtrin PHEGconn POAnemo POLYodor POTEerec PULMobsc
## 1.0 1.0 1.0 1.0 1.0 1.0 1.0
## PYROmino RANUauri. SCORhumi SOLIvirg URTIdioi VEROoffi VIOLcani
## 1.0 1.0 1.0 2.0 1.0 1.0 1.0
## VIOLepip VIOLrivi.
## 1.0 1.0
Data matrices might have too many zeros since most taxa are rare. Because of this, a “long” format, with columns indicating site, taxon and some abundance measure is often used. Here we read an example of long format from trees in forest sites from an excel file.
library(readxl) # package to read excel files
data = read_excel("trees.xlsx")
data = as.data.frame(data) # to make the file data.frame
head(data)
## ala nr rinne puuliik D1 D2 H
## 1 008Aardla 1 1 PN 43 41.5 30.1
## 2 008Aardla 2 1 TA 30.5 31.5 21.5
## 3 008Aardla 3 1 HB 19 19.5 21.3
## 4 008Aardla 4 2 TM 7 8.0 10.4
## 5 008Aardla 5 2 TM 7.5 7.0 11.0
## 6 008Aardla 6 2 TM 8 7.0 12.4
# ala = site, puuliik = species (codes of tree species)
tree.counts = table(data[, c(1, 4)]) # counts of occurrences (number of trees)
tree.height = xtabs(H ~ ala + puuliik, data = data) # sum of H (height)
str(tree.counts) # from function table we obtain a bit special format, we better make it a simple data.frame by function as.data.frame
## 'table' int [1:30, 1:12] 0 0 0 0 0 0 0 3 0 0 ...
## - attr(*, "dimnames")=List of 2
## ..$ ala : chr [1:30] "001Vapramäe" "002Illi" "003Vitipalu" "004Konguta" ...
## ..$ puuliik: chr [1:12] "HB" "KS" "KU" "LH" ...
tree.counts = as.data.frame.matrix(tree.counts)
str(tree.counts)
## 'data.frame': 30 obs. of 12 variables:
## $ HB: int 0 0 0 0 0 0 0 3 0 0 ...
## $ KS: int 0 1 3 0 0 0 0 1 0 2 ...
## $ KU: int 3 1 8 8 0 8 5 0 3 6 ...
## $ LH: int 0 0 0 0 0 0 0 0 0 0 ...
## $ MA: int 3 4 1 2 5 2 0 0 3 1 ...
## $ MN: int 0 0 0 0 0 0 0 0 0 0 ...
## $ PI: int 0 0 0 0 0 0 1 0 0 0 ...
## $ PN: int 0 0 0 0 0 0 0 1 0 0 ...
## $ RE: int 0 0 0 0 0 0 0 0 0 0 ...
## $ TA: int 0 0 0 0 0 0 1 1 0 0 ...
## $ TM: int 0 0 0 0 0 0 0 4 0 0 ...
## $ VA: int 0 0 0 0 0 0 0 0 0 0 ...
For missing values in a table, NA is automatically
used.
summary(data) # Note D2
## ala nr rinne puuliik
## Length:239 Min. : 1.000 Min. :1.000 Length:239
## Class :character 1st Qu.: 3.000 1st Qu.:1.000 Class :character
## Mode :character Median : 5.000 Median :1.000 Mode :character
## Mean : 5.188 Mean :1.439
## 3rd Qu.: 7.000 3rd Qu.:2.000
## Max. :16.000 Max. :2.000
##
## D1 D2 H
## Length:239 Min. : 5.20 Min. : 6.00
## Class :character 1st Qu.:12.00 1st Qu.:13.10
## Mode :character Median :24.50 Median :22.00
## Mean :24.96 Mean :22.40
## 3rd Qu.:37.75 3rd Qu.:30.85
## Max. :54.00 Max. :44.30
## NA's :9
complete.cases(data) # do we have real values for a row
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE FALSE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [73] TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [109] TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [169] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [193] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [205] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [217] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [229] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
data[!complete.cases(data), ] # not complete cases (note "!")
## ala nr rinne puuliik D1 D2 H
## 11 073Sulaoja 1 1 MA ü177 NA 24.9
## 20 073Sulaoja 11 1 MA ü189 NA 23.9
## 21 015Kambja 1 1 KU ü166.5 NA 35.0
## 24 015Kambja 5 1 KS ü229 NA 36.4
## 36 007Põrgumäe 1 1 TA ü174 NA 33.0
## 44 016Reola 4 1 KU ü248 NA 34.6
## 77 003Vitipalu 9 1 MA ü82 NA 36.8
## 113 028Puhaste 2 1 MA ü203 NA 34.9
## 129 031Pimmelaan 2 1 MA ü199 NA 36.4
is.na(data$D2) # where we have NA values?
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE TRUE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
Before we analyse community and site data together, it is suggested to check if we have the same order in both tables. We can list rownames and site names side by side
# Checking that the order is the same
cbind(row.names(vas.plants), as.character(xy$Proovi.nimi))
## [,1] [,2]
## [1,] "X001Vapramae" "Vapramae"
## [2,] "X002Illi" "Illi"
## [3,] "X003Vitipalu" "Vitipalu"
## [4,] "X004Konguta" "Konguta"
## [5,] "X005Vehendi" "Vehendi"
## [6,] "X006.Erumae" "Erumae"
## [7,] "X007Porgumae" "Porgumae"
## [8,] "X008Aardla" "Aardla"
## [9,] "X009Kurepalu" "Kurepalu"
## [10,] "X010Sarakuste" "Sarakuste"
## [11,] "X011Kannu" "Kannu"
## [12,] "X012Vonnu" "Vonnu"
## [13,] "X013Rookse" "Rookse"
## [14,] "X014Kammeri" "Kammeri"
## [15,] "X015Kambja" "Kambja"
## [16,] "X016Reola" "Reola"
## [17,] "X017Ignase" "Ignase"
## [18,] "X018Unikula" "Unikula"
## [19,] "X019Haavametsa" "Haavametsa"
## [20,] "X020Kruusahaua" "Kruusahaua"
## [21,] "X021Aravu" "Aravu"
## [22,] "X026Pedajamae" "Pedajamae"
## [23,] "X027Miti" "Miti"
## [24,] "X028Puhaste" "Puhaste"
## [25,] "X031Pimmelaan" "Pimmelaan"
## [26,] "X032Logina" "Logina"
## [27,] "X033Voorepalu" "Voorepalu"
## [28,] "X036Taevaskoja" "Taevaskoja"
## [29,] "X044Savimae" "Savimae"
## [30,] "X073Pausakunnu" "Sulaoja"
# Species distribution maps
i=3 # id of a species
plot(xy[,1:2],cex=0.3) ## Coordinates of sites and small points
points(xy[,1:2], cex=vas.plants[,i]*2,col="green") # cex changes point size relative to 1
title(main=colnames(vas.plants)[i]) # adding title taking the i-th colname from the community data
If we have names of sites and species, we might also need some text manipulations to cut, paste, search and replace parts of texts.
sites1 = row.names(vas.plants)
sites1
## [1] "X001Vapramae" "X002Illi" "X003Vitipalu" "X004Konguta"
## [5] "X005Vehendi" "X006.Erumae" "X007Porgumae" "X008Aardla"
## [9] "X009Kurepalu" "X010Sarakuste" "X011Kannu" "X012Vonnu"
## [13] "X013Rookse" "X014Kammeri" "X015Kambja" "X016Reola"
## [17] "X017Ignase" "X018Unikula" "X019Haavametsa" "X020Kruusahaua"
## [21] "X021Aravu" "X026Pedajamae" "X027Miti" "X028Puhaste"
## [25] "X031Pimmelaan" "X032Logina" "X033Voorepalu" "X036Taevaskoja"
## [29] "X044Savimae" "X073Pausakunnu"
nchar(sites1) # how many characters (length of the text)
## [1] 12 8 12 11 11 11 12 10 12 13 9 9 10 11 10 9 10 11 14 14 9 13 8 11 13
## [26] 10 13 14 11 14
sites2 = as.character(soil.data$Proovi.nimi) # Making character from a factor data type
sites2
## [1] "Vapramae" "Illi" "Vitipalu" "Konguta" "Vehendi"
## [6] "Erumae" "Porgumae" "Aardla" "Kurepalu" "Sarakuste"
## [11] "Kannu" "Vonnu" "Rookse" "Kammeri" "Kambja"
## [16] "Reola" "Ignase" "Unikula" "Haavametsa" "Kruusahaua"
## [21] "Aravu" "Pedajamae" "Miti" "Puhaste" "Pimmelaan"
## [26] "Logina" "Voorepalu" "Taevaskoja" "Savimae" "Sulaoja"
substring(sites1, 2, 4) # substring, from 2nd to 4th character
## [1] "001" "002" "003" "004" "005" "006" "007" "008" "009" "010" "011" "012"
## [13] "013" "014" "015" "016" "017" "018" "019" "020" "021" "026" "027" "028"
## [25] "031" "032" "033" "036" "044" "073"
paste(1:30, sites2, sep = "-") # pasting different texts (non-text will be converted)
## [1] "1-Vapramae" "2-Illi" "3-Vitipalu" "4-Konguta"
## [5] "5-Vehendi" "6-Erumae" "7-Porgumae" "8-Aardla"
## [9] "9-Kurepalu" "10-Sarakuste" "11-Kannu" "12-Vonnu"
## [13] "13-Rookse" "14-Kammeri" "15-Kambja" "16-Reola"
## [17] "17-Ignase" "18-Unikula" "19-Haavametsa" "20-Kruusahaua"
## [21] "21-Aravu" "22-Pedajamae" "23-Miti" "24-Puhaste"
## [25] "25-Pimmelaan" "26-Logina" "27-Voorepalu" "28-Taevaskoja"
## [29] "29-Savimae" "30-Sulaoja"
grep("mae", sites1) # numbers of cases with a search string
## [1] 1 6 7 22 29
sites1[grep("mae", sites1)]
## [1] "X001Vapramae" "X006.Erumae" "X007Porgumae" "X026Pedajamae"
## [5] "X044Savimae"
sites1[grep("\\.", sites1)] # some characters as "." "?" have a special meaning but we can search them by leading "\\"
## [1] "X006.Erumae"
gsub("mae", "oru", sites1) # replacing a search string
## [1] "X001Vapraoru" "X002Illi" "X003Vitipalu" "X004Konguta"
## [5] "X005Vehendi" "X006.Eruoru" "X007Porguoru" "X008Aardla"
## [9] "X009Kurepalu" "X010Sarakuste" "X011Kannu" "X012Vonnu"
## [13] "X013Rookse" "X014Kammeri" "X015Kambja" "X016Reola"
## [17] "X017Ignase" "X018Unikula" "X019Haavametsa" "X020Kruusahaua"
## [21] "X021Aravu" "X026Pedajaoru" "X027Miti" "X028Puhaste"
## [25] "X031Pimmelaan" "X032Logina" "X033Voorepalu" "X036Taevaskoja"
## [29] "X044Savioru" "X073Pausakunnu"
Make a character vector from
sites1without leading “X”
substring(sites1, 2, 9999)
## [1] "001Vapramae" "002Illi" "003Vitipalu" "004Konguta"
## [5] "005Vehendi" "006.Erumae" "007Porgumae" "008Aardla"
## [9] "009Kurepalu" "010Sarakuste" "011Kannu" "012Vonnu"
## [13] "013Rookse" "014Kammeri" "015Kambja" "016Reola"
## [17] "017Ignase" "018Unikula" "019Haavametsa" "020Kruusahaua"
## [21] "021Aravu" "026Pedajamae" "027Miti" "028Puhaste"
## [25] "031Pimmelaan" "032Logina" "033Voorepalu" "036Taevaskoja"
## [29] "044Savimae" "073Pausakunnu"
We make a set of forest type codes. In the database we have 4 digits but we use a broader classification of three digits. Then we can find how two sets differ.
forest.types = read.table("envir.txt")$forest.types
forest.types
## [1] 1152 1131 1153 1512 1132 1132 1161 1162 1132 1161 1161 1142 1161 1141 1161
## [16] 1142 1142 1153 1132 1161 1511 1141 1132 1132 1161 1511 1132 1132 1511 1142
forest.types = as.character(forest.types) # numbers do not have meaning, just codes
forest.types
## [1] "1152" "1131" "1153" "1512" "1132" "1132" "1161" "1162" "1132" "1161"
## [11] "1161" "1142" "1161" "1141" "1161" "1142" "1142" "1153" "1132" "1161"
## [21] "1511" "1141" "1132" "1132" "1161" "1511" "1132" "1132" "1511" "1142"
forest.types = substring(forest.types, 1, 3)
forest.types
## [1] "115" "113" "115" "151" "113" "113" "116" "116" "113" "116" "116" "114"
## [13] "116" "114" "116" "114" "114" "115" "113" "116" "151" "114" "113" "113"
## [25] "116" "151" "113" "113" "151" "114"
unique(forest.types) # set of unique values
## [1] "115" "113" "151" "116" "114"
table(forest.types) # frequency table
## forest.types
## 113 114 115 116 151
## 9 6 3 8 4
# separating 2 groups according to soil pH
forest.types.1 = forest.types[soil.data$pH.KCl < 3]
forest.types.2 = forest.types[soil.data$pH.KCl >= 3]
forest.types.1
## [1] "113" "113" "114" "114" "113" "151" "113" "113" "151"
forest.types.2
## [1] "115" "113" "115" "151" "113" "116" "116" "116" "116" "114" "116" "116"
## [13] "114" "115" "116" "114" "113" "116" "151" "113" "114"
union(forest.types.1, forest.types.2) # from two sets (here same as unique of all)
## [1] "113" "114" "151" "115" "116"
intersect(forest.types.1, forest.types.2) # in both
## [1] "113" "114" "151"
setdiff(forest.types.2, forest.types.1) # in set 2 but not in set 1
## [1] "115" "116"
setdiff(forest.types.1, forest.types.2) # None!
## character(0)
is.element(forest.types.2, forest.types.1) # asking which in set 2 are present in set 1
## [1] FALSE TRUE FALSE TRUE TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## [13] TRUE FALSE FALSE TRUE TRUE FALSE TRUE TRUE TRUE
Sometimes we want to combine different files which include different sites but species composition do not match. I have not found a good function in R for that. But we can make our own functions!
# We start with some artificial examples, just two species in both sets
t1 = data.frame(a = c(1, 0, 1), b = c(2, 2, 0))
t2 = data.frame(b = c(3, 0), c = c(0, 4))
t1
## a b
## 1 1 2
## 2 0 2
## 3 1 0
t2
## b c
## 1 3 0
## 2 0 4
t2[, setdiff(names(t1), names(t2))] = 0 # adding now species only found in table1 to table2 as zeros
t1[, setdiff(names(t2), names(t1))] = 0 # same for table1
t1
## a b c
## 1 1 2 0
## 2 0 2 0
## 3 1 0 0
t2
## b c a
## 1 3 0 0
## 2 0 4 0
rbind(t1, t2) # now mergind two tables (rbind looks names!)
## a b c
## 1 1 2 0
## 2 0 2 0
## 3 1 0 0
## 4 0 3 0
## 5 0 0 4
# Let's make a function from script above!
tables.join = function(t1, t2) {
# asking two tables which got names t1 and t2
t2[, setdiff(names(t1), names(t2))] = 0
t1[, setdiff(names(t2), names(t1))] = 0
t12 = rbind(t1, t2)
return(t12) # returns value from function
}
tables.join(t1, t2)
## a b c
## 1 1 2 0
## 2 0 2 0
## 3 1 0 0
## 4 0 3 0
## 5 0 0 4
t12 # objects within a functions are not kept in memory!
## Error in eval(expr, envir, enclos): object 't12' not found
When using this function you can specify your input objects (let’s
imagine that these are called a.table and
b.table) either by defining function parameters:
tables.join(t1=a.table,t2=b.table), or just by position
tables.join(a.table,b.table)
save(vas.plants,
tree.counts,
forest.types,
xy,
soil.data,
tables.join,
file = "community.rda")