4 Scripts and data in R

4.1 Working directory

In which folder (working directory) you have your files?

wd=getwd() # get working directory
wd
## [1] "C:/Users/Carlos Perez Carmona/Dropbox/Courses/DataAnalysisCommunityEcology/2023-2024/Data"
setwd("C:/Users/Carlos Perez Carmona/Dropbox/Courses/DataAnalysisCommunityEcology/2023-2024/Data") # change to your favorite folder!
dir() # lists files
##  [1] "basal_area.R"      "bry.tab.txt"       "climate.txt"      
##  [4] "clusters.rda"      "community.rda"     "coordinates.txt"  
##  [7] "Data.Rproj"        "droughtnet.txt"    "eesti reljeef.DAT"
## [10] "eesti reljeef.ID" 
##  [ reached getOption("max.print") -- omitted 26 entries ]

Sometimes it is needed to give double backslashes: "c:\\mp\\doc\\"

4.2 Keeping track of objects in the memory, saving R objects

You can check what objects you currently have and remove unnecessary.

# Making some objects
a=3
diversity=c(18,27,10,22,25,8,12)
soil.ph=c(4.5,6.4,3.9,5.3,5.9,3.2,4.2)
soil.type=c("mineral","mineral","peat","mineral","mineral","peat","peat")

ls()
## [1] "a"         "diversity" "rootDir"   "soil.ph"   "soil.type" "wd"
ls.str() # more information
## a :  num 3
## diversity :  num [1:7] 18 27 10 22 25 8 12
## rootDir :  chr "C:/Users/Carlos Perez Carmona/Dropbox/Courses/DataAnalysisCommunityEcology/2023-2024/Data"
## soil.ph :  num [1:7] 4.5 6.4 3.9 5.3 5.9 3.2 4.2
## soil.type :  chr [1:7] "mineral" "mineral" "peat" "mineral" "mineral" "peat" "peat"
## wd :  chr "C:/Users/Carlos Perez Carmona/Dropbox/Courses/DataAnalysisCommunityEcology/2023-2024/Data"

save(diversity,soil.ph,soil.type,file="my.data.Rda") # saving so tahat R can use later
## NB! Other software cannot read these files!

rm(diversity,soil.ph,soil.type) # removes objects not needed
ls()
## [1] "a"       "rootDir" "wd"
load("my.data.Rda") # loads what has been saved in R
ls()
## [1] "a"         "diversity" "rootDir"   "soil.ph"   "soil.type" "wd"

Explore with ?rm how to remove all objects in the memory. WARNING, use carefully!

Answer
# currently we can still use this. Sometimes it is good to start your session with that expression
rm(list=ls())

# We now need to load our saved data again:
load("my.data.Rda")

4.3 Creating own scripts

Scripts are just text files with codes. In RStudio you can add new R Script file. When saving, use extension .R, for example my_first_script.R

Add comments for your own, and for others with # mark

Run your script by highlighting a part and then Cntr+Enter (=run!)

## My script, autumn 2019

# a is a variable ....
a = 5

print("Hello! This is my first script")
## [1] "Hello! This is my first script"

## end of my script

Save the text above to a text file with name “my_first_script.R”.

Now we can call the same script.

a=NA
a
## [1] NA

source("my_first_script.R") # runs your script from the file
## Warning in file(filename, "r", encoding = encoding): cannot open file
## 'my_first_script.R': No such file or directory
## Error in file(filename, "r", encoding = encoding): cannot open the connection

a # a has been defined in the script!
## [1] NA

4.4 Conditions in scripts

Does something only if conditions are fulfilled. The proportion of script for which the condition applies can be given between special parentheses { and }

soil="peat" # try to change to "mineral"

if (soil=="peat") {      # commands for conditions are between {}
  print("Has been wet")
}
## [1] "Has been wet"

answer=ifelse(soil.type=="peat","wet","dry") # conditions, if true, if not
print(answer)
## [1] "dry" "dry" "wet" "dry" "dry" "wet" "wet"

Use function ifelse with plot to have scatterplot between soil.ph and diversity and select symbol colors depending on soil.type

Answer
plot(soil.ph,diversity,col=ifelse(soil.type=="peat","brown","black"))

4.5 Loops in scripts

Loops will make a similar thing n times, for example with each element of a vector. The part of script which is repeated should be marked with { and }. Actually in R often you can avoid loops since you can have logical questions to whole vectors.

for (i in 1:7) { # i is index the loop is between {}
  print(soil.type[i]) # within loop you need to add print to get output
}
## [1] "mineral"
## [1] "mineral"
## [1] "peat"
## [1] "mineral"
## [1] "mineral"
## [1] "peat"
## [1] "peat"

for (i in soil.type) { # now i is an element of the vector
  print(i)
} 
## [1] "mineral"
## [1] "mineral"
## [1] "peat"
## [1] "mineral"
## [1] "mineral"
## [1] "peat"
## [1] "peat"

soils.high.div=NULL   # NULL is empty object, but we can define it!
for (i in 1:length(diversity)) { 
  if (diversity[i]>20) soils.high.div=c(soils.high.div,soil.type[i])
} 
soils.high.div
## [1] "mineral" "mineral" "mineral"

Please make soils.high.div without loop.

Answer
soils.high.div=soil.type[diversity>20] # if possible, avoid loops!

4.6 Data frames

We can have a single table which can include different types of data (e.g. numeric and character). We can also specify a grouping variable factor. When createing a data.frame, most non-numerical components are defined as factors. You can access individual columns of data.frame by name or by number or defining with data_frame_name$column_name




my.data=data.frame(soil.ph,soil.type,diversity) # creating data frame
my.data
##   soil.ph soil.type diversity
## 1     4.5   mineral        18
## 2     6.4   mineral        27
## 3     3.9      peat        10
## 4     5.3   mineral        22
## 5     5.9   mineral        25
## 6     3.2      peat         8
## 7     4.2      peat        12
str(my.data) # Note that in data-frame texts are transformed to factors with numeric levels!
## 'data.frame':    7 obs. of  3 variables:
##  $ soil.ph  : num  4.5 6.4 3.9 5.3 5.9 3.2 4.2
##  $ soil.type: chr  "mineral" "mineral" "peat" "mineral" ...
##  $ diversity: num  18 27 10 22 25 8 12

my.data[,2] # accessing by rows and columns
## [1] "mineral" "mineral" "peat"    "mineral" "mineral" "peat"    "peat"

names(my.data)
## [1] "soil.ph"   "soil.type" "diversity"
colnames(my.data) # same!
## [1] "soil.ph"   "soil.type" "diversity"
my.data[,"soil.type"]  # accessing a column by name
## [1] "mineral" "mineral" "peat"    "mineral" "mineral" "peat"    "peat"
my.data$soil.type # accessing by $ symbol
## [1] "mineral" "mineral" "peat"    "mineral" "mineral" "peat"    "peat"

as.character(my.data$soil.type) # transforming factors to characters
## [1] "mineral" "mineral" "peat"    "mineral" "mineral" "peat"    "peat"
as.numeric(my.data$soil.type) # to text: factor levels!
## Warning: NAs introduced by coercion
## [1] NA NA NA NA NA NA NA

my.data$high.diversity=my.data$diversity>20 # adds a new logical column
my.data
##   soil.ph soil.type diversity high.diversity
## 1     4.5   mineral        18          FALSE
## 2     6.4   mineral        27           TRUE
## 3     3.9      peat        10          FALSE
## 4     5.3   mineral        22           TRUE
## 5     5.9   mineral        25           TRUE
## 6     3.2      peat         8          FALSE
## 7     4.2      peat        12          FALSE

4.7 Reading and exploring data from text file

We generally read data from files. We can read text files by setting column limits (space, comma, semicolon etc.), defining headers (column titles), column for row names etc. etc.

Save file ´envir.txt´to your R working directory!

envir=read.table("envir.txt",header=T, sep=" ",row.names = 1)

# Let's explore the file!

ncol(envir) # number of columns
## [1] 12
nrow(envir) # ... rows
## [1] 30
dim(envir)  # dimensions
## [1] 30 12
names(envir) # names of columns (colnames works as well)
##  [1] "lon"          "lat"          "forest.types" "Jrk..nr."     "Proovi.nimi" 
##  [6] "pH.KCl"       "N.."          "P.mg.kg"      "K.mg.kg"      "Ca.mg.kg"    
## [11] "Mg.mg.kg"     "OA.."
row.names(envir) # if there are row names
##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14" "15"
## [16] "16" "17" "18" "19" "20" "21" "26" "27" "28" "31" "32" "33" "36" "44" "73"
str(envir) 
## 'data.frame':    30 obs. of  12 variables:
##  $ lon         : num  26.5 26.4 26.4 26.3 26.2 ...
##  $ lat         : num  58.2 58.2 58.2 58.2 58.2 ...
##  $ forest.types: int  1152 1131 1153 1512 1132 1132 1161 1162 1132 1161 ...
##  $ Jrk..nr.    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Proovi.nimi : chr  "Vapramae" "Illi" "Vitipalu" "Konguta" ...
##  $ pH.KCl      : num  3.72 3.65 4.25 4.4 2.62 3.28 5.77 4.01 2.51 4.04 ...
##  $ N..         : num  0.425 0.136 0.25 2.319 0.37 ...
##  $ P.mg.kg     : num  31.6 22.3 58 63 25.6 ...
##  $ K.mg.kg     : num  79 42.7 60.7 131.2 131.6 ...
##  $ Ca.mg.kg    : num  370 154 649 11208 282 ...
##  $ Mg.mg.kg    : num  43.9 21.6 83.3 601.5 74.6 ...
##  $ OA..        : num  11.4 5.24 7.78 70.85 24.83 ...
head(envir) # just upper rows
##        lon      lat forest.types Jrk..nr. Proovi.nimi pH.KCl       N..  P.mg.kg
## 1 26.45699 58.24387         1152        1    Vapramae   3.72 0.4246261 31.57133
## 2 26.43730 58.19846         1131        2        Illi   3.65 0.1364021 22.26459
## 3 26.41474 58.15658         1153        3    Vitipalu   4.25 0.2500752 58.04788
## 4 26.27875 58.21060         1512        4     Konguta   4.40 2.3192906 63.00445
## 5 26.15339 58.22432         1132        5     Vehendi   2.62 0.3698321 25.62339
## 6 26.32119 58.28426         1132        6      Erumae   3.28 0.2426881 16.21594
##     K.mg.kg   Ca.mg.kg  Mg.mg.kg      OA..
## 1  79.04454   370.4051  43.87278 11.397929
## 2  42.73691   154.3332  21.64893  5.237742
## 3  60.71264   648.9929  83.34118  7.777811
## 4 131.18302 11208.0226 601.45503 70.845348
## 5 131.56542   281.8415  74.61169 24.833289
## 6  66.16427   260.2084  25.95761  7.988277
summary(envir) # short summary of each variable
##       lon             lat         forest.types     Jrk..nr.    
##  Min.   :26.11   Min.   :58.09   Min.   :1131   Min.   : 1.00  
##  1st Qu.:26.44   1st Qu.:58.17   1st Qu.:1132   1st Qu.: 8.25  
##  Median :26.78   Median :58.24   Median :1147   Median :15.50  
##  Mean   :26.77   Mean   :58.22   Mean   :1194   Mean   :18.70  
##  3rd Qu.:27.02   3rd Qu.:58.27   3rd Qu.:1161   3rd Qu.:26.75  
##  Max.   :27.42   Max.   :58.36   Max.   :1512   Max.   :73.00  
##  Proovi.nimi            pH.KCl           N..            P.mg.kg      
##  Length:30          Min.   :2.510   Min.   :0.1189   Min.   : 8.215  
##  Class :character   1st Qu.:2.965   1st Qu.:0.2207   1st Qu.:12.889  
##  Mode  :character   Median :3.525   Median :0.2575   Median :17.712  
##                     Mean   :3.593   Mean   :0.3812   Mean   :25.488  
##                     3rd Qu.:4.010   3rd Qu.:0.3751   3rd Qu.:31.160  
##                     Max.   :5.770   Max.   :2.3193   Max.   :65.774  
##     K.mg.kg          Ca.mg.kg           Mg.mg.kg           OA..       
##  Min.   : 32.93   Min.   :   80.24   Min.   : 18.71   Min.   : 4.871  
##  1st Qu.: 66.69   1st Qu.:  225.05   1st Qu.: 43.89   1st Qu.: 7.045  
##  Median : 80.91   Median :  430.02   Median : 76.38   Median : 8.408  
##  Mean   : 87.97   Mean   :  817.52   Mean   :101.06   Mean   :12.627  
##  3rd Qu.: 98.67   3rd Qu.:  617.13   3rd Qu.:105.18   3rd Qu.:11.693  
##  Max.   :167.19   Max.   :11208.02   Max.   :601.46   Max.   :70.845

envir[1:4,6:12] # short subset
##   pH.KCl       N..  P.mg.kg   K.mg.kg   Ca.mg.kg  Mg.mg.kg      OA..
## 1   3.72 0.4246261 31.57133  79.04454   370.4051  43.87278 11.397929
## 2   3.65 0.1364021 22.26459  42.73691   154.3332  21.64893  5.237742
## 3   4.25 0.2500752 58.04788  60.71264   648.9929  83.34118  7.777811
## 4   4.40 2.3192906 63.00445 131.18302 11208.0226 601.45503 70.845348

my.cols=c(6:9,5)  # pH, N, P, K and site name
soil.data=envir[,my.cols] # subset by selected columns to new data.frame
head(envir[,-c(1:2,4,6:12)]) ## leaving out some columns (not "-" before "c")
##   forest.types Proovi.nimi
## 1         1152    Vapramae
## 2         1131        Illi
## 3         1153    Vitipalu
## 4         1512     Konguta
## 5         1132     Vehendi
## 6         1132      Erumae

my.rows=envir$pH.KCl>median(envir$pH.KCl) # logical vector
my.rows
##  [1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE
## [13]  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE
## [25]  TRUE FALSE  TRUE FALSE FALSE  TRUE
high.ph.soils=soil.data[my.rows,] 
low.ph.soils=soil.data[!my.rows,] # note !

high.ph.soils.1=subset(soil.data, pH.KCl>median(pH.KCl)) # subset function

high.ph.soils==high.ph.soils.1 # comparing all values individually
##    pH.KCl  N.. P.mg.kg K.mg.kg Proovi.nimi
## 1    TRUE TRUE    TRUE    TRUE        TRUE
## 2    TRUE TRUE    TRUE    TRUE        TRUE
## 3    TRUE TRUE    TRUE    TRUE        TRUE
## 4    TRUE TRUE    TRUE    TRUE        TRUE
## 7    TRUE TRUE    TRUE    TRUE        TRUE
## 8    TRUE TRUE    TRUE    TRUE        TRUE
## 10   TRUE TRUE    TRUE    TRUE        TRUE
## 13   TRUE TRUE    TRUE    TRUE        TRUE
## 15   TRUE TRUE    TRUE    TRUE        TRUE
## 18   TRUE TRUE    TRUE    TRUE        TRUE
## 20   TRUE TRUE    TRUE    TRUE        TRUE
## 26   TRUE TRUE    TRUE    TRUE        TRUE
## 31   TRUE TRUE    TRUE    TRUE        TRUE
## 33   TRUE TRUE    TRUE    TRUE        TRUE
## 73   TRUE TRUE    TRUE    TRUE        TRUE
all.equal(high.ph.soils,high.ph.soils.1) # comparing two objects
## [1] TRUE

sample(nrow(soil.data)) # random order 
##  [1] 25 12 26 22 11  3 21 23 18 29  6 30  1 28 14  9 20 13  5  7 19 17 10 24 15
## [26] 16 27  4  2  8

rand.soils=soil.data[sample(nrow(soil.data)),]

make a separate data.frame xy for coordinates lon and lat and site name.

Answer
xy=envir[,c(1,2,5)]

4.8 Writing data tables to text files


write.table(xy,file="coordinates.txt") # default settings

write.table(soil.data,file="soil.data.csv", sep=";", row.names = F) # own settings

4.9 R packages

R has some basic packages but you need to install packages for special tasks. Let’s install package “readxl” which allows to read excel files.

    
# install.packages("readxl") # if you do not have this, please omit # and install

library(readxl) # even if installed, you should call the package library each R session with the function library

test=read_excel("excel.xlsx")
test # a bit strange format
## # A tibble: 7 × 3
##   soil.ph soil.type diversity
##     <dbl> <chr>         <dbl>
## 1     4.5 mineral          18
## 2     6.4 mineral          27
## 3     3.9 peat             10
## 4     5.3 mineral          22
## 5     5.9 mineral          25
## 6     3.2 peat              8
## 7     4.2 peat             12
test=as.data.frame(test) # making a siple data.frame
test
##   soil.ph soil.type diversity
## 1     4.5   mineral        18
## 2     6.4   mineral        27
## 3     3.9      peat        10
## 4     5.3   mineral        22
## 5     5.9   mineral        25
## 6     3.2      peat         8
## 7     4.2      peat        12