This is a step-by-step walk-through of the analysis of the German BIODEPTH site in the paper to illustrate the different analyses we conducted to evaluate the relationship between biodiversity and ecosystem multifunctionality.
We begin by loading the multifunc package. If it is not currently installed, it can be found on github, and installed using the devtools library.
library(devtools)
install_github("multifunc", username="jebyrnes")
library(multifunc)
#for plotting
library(ggplot2)
library(patchwork)
#for data
library(tidyr)
library(dplyr)
library(purrr)
library(forcats)
#for analysus
library(car)
Next, we load in the BIODEPTH data. We use some of the helper functions in the package to identify the column numbers of the relevant functions for later use.
#Read in data to run sample analyses on the biodepth data
data(all_biodepth)
<-c("biomassY3", "root3", "N.g.m2", "light3", "N.Soil", "wood3", "cotton3")
allVars
<-which(names(all_biodepth) %in% allVars) varIdx
As we want to work with just the German data, we next subset down the data to just Germany. We then look at the whether species were seeded or not, and determine which species were indeed seeded in Germany. If a column of species is full of zeroes, then it was not seeded in these plots. We will need the column indices of relevant species for later use in the overlap analyses.
#######
# Now, specify what data we're working with - Germany
# and the relevant variables we'll be working with
#######
<- all_biodepth %>%
germanyfilter(location=="Germany")
# which variables have > 2/3 of the values not NA?
<- whichVars(germany, allVars)
german_vars
# What are the names of species in this dataset
# that have at least some values > 0?
<- relevantSp(germany,26:ncol(germany))
species
<- which(names(germany) %in% species) #in case we need these spIDX
First, we will demonstrate the qualitative single function approach. As we’re using ggplot2 for plotting, we’ll use tidyr to pivot the data long for a nice faceted plot of each relationship.
<- germany %>%
germanyForPlotting select(Diversity, german_vars) %>%
pivot_longer(cols = german_vars,
names_to = "variable",
values_to = "value") %>%
mutate(variable = factor(variable))
#make the levels of the functions into something nice for plots
levels(germanyForPlotting$variable) <- c('Aboveground Biomass', 'Root Biomass', 'Cotton Decomposition', 'Soil Nitrogen', 'Plant Nitrogen')
Nest, as we want to display additional information about the fit of diversity to each function, we’ll need to iterate over each function, derive a fit, and then make some labels to be used in the eventual plot.
<- germanyForPlotting %>%
germanyLabels group_by(variable) %>%
nest() %>%
mutate(fits = map(data, ~lm(value ~ Diversity, data=.x)),
stats = map(fits, broom::glance)) %>%
unnest(stats) %>%
ungroup() %>%
mutate(labels = paste0("p = ", round(p.value, 3), ", ",
expression(R^2), " = ", round(r.squared, 2)),
labels = gsub("p = 0 ", "p < 0.001 ", labels),
Diversity = 7,
value=c(2000, 1200, 1.5, 6, 20))
Finally, we will put it all together into a single plot.
ggplot(aes(x=Diversity, y=value),data=germanyForPlotting) +
geom_point(size=3)+
facet_wrap(~variable, scales="free") +
theme_bw(base_size=15)+
stat_smooth(method="lm", colour="black", size=2) +
xlab("Species Richness") +
ylab("Value of Function") +
geom_text(data=germanyLabels,
aes(label=labels)) +
theme(panel.grid = element_blank())
Next, we ask whether the best performing species differs between functions. To do that, we have to pull out all monoculture plots, then reshape the data and run a quick qualitative analysis on it to determine the best performer.
#pull out only those plots planted in monoculture
<- germany %>%
monoGermanyfilter(rowSums(select(., ACHMIL1:VICTET1))==1) %>%
#get mono names
pivot_longer(ACHMIL1:VICTET1,
names_to = "mono") %>%
filter(value !=0) %>%
select(-value) %>%
#pivot by function
pivot_longer(german_vars,
names_to = "variable") %>%
#get the mean for each monoculture
group_by(variable, mono) %>%
summarize(value = mean(value))
#now, who is the best performer?
%>%
monoGermany group_by(variable) %>%
filter(value == max(value))
# # A tibble: 5 × 3
# # Groups: variable [5]
# variable mono value
# <chr> <chr> <dbl>
# 1 N.Soil TRIREP1 4.81
# 2 N.g.m2 TRIPRA1 16.9
# 3 biomassY3 TRIPRA1 818.
# 4 cotton3 TRIREP1 0.854
# 5 root3 FESRUB1 1072.
One of the functions we are looking at, soil N, first needs to be reflected before we can apply the overlap approach.
<- germany %>%
germany mutate(N.Soil = -1*N.Soil +max(N.Soil, na.rm=T))
We can examine any single species and generate coefficient values and ‘effects’ - i.e., whether a species has a positive (1), negative (-1), or neutral (0) effect on a function. Let’s examine this for biomass production in year 3.
<-sAICfun("biomassY3", species, germany)
spList spList
# $pos.sp
# [1] "DACGLO1" "FESPRA1" "LATPRA1" "LOTCOR1" "TRIPRA1" "TRIREP1"
#
# $neg.sp
# [1] "CAMPAT1" "PHLPRA1" "RANACR1"
#
# $neu.sp
# [1] "ACHMIL1" "ALOPRA1" "ANTODO1" "ARRELA1" "BROHOR1" "CENJAC1" "CHRLEU1"
# [8] "CREBIE1" "CYNCRI1" "FESRUB1" "GERPRA1" "HOLLAN1" "KNAARV1" "LEOAUT1"
# [15] "LOLPER1" "LYCFLO1" "PIMMAJ1" "PLALAN1" "RUMACE1" "TAROFF1" "VICCRA1"
# [22] "VICSEP1"
#
# $functions
# [1] "biomassY3"
#
# $coefs
# ACHMIL1 ALOPRA1 ANTODO1 ARRELA1 BROHOR1 CAMPAT1
# 0.0000 0.0000 0.0000 0.0000 0.0000 -391.5405
# CENJAC1 CHRLEU1 CREBIE1 CYNCRI1 DACGLO1 FESPRA1
# 0.0000 0.0000 0.0000 0.0000 104.6404 135.6735
# FESRUB1 GERPRA1 HOLLAN1 KNAARV1 LATPRA1 LEOAUT1
# 0.0000 0.0000 0.0000 0.0000 248.5566 0.0000
# LOLPER1 LOTCOR1 LYCFLO1 PHLPRA1 PIMMAJ1 PLALAN1
# 0.0000 375.6694 0.0000 -297.7452 0.0000 0.0000
# RANACR1 RUMACE1 TAROFF1 TRIPRA1 TRIREP1 VICCRA1
# -140.2996 0.0000 0.0000 492.6311 325.0173 0.0000
# VICSEP1 (Intercept)
# 0.0000 335.8382
#
# $effects
# ACHMIL1 ALOPRA1 ANTODO1 ARRELA1 BROHOR1 CAMPAT1 CENJAC1 CHRLEU1 CREBIE1 CYNCRI1
# 0 0 0 0 0 -1 0 0 0 0
# DACGLO1 FESPRA1 FESRUB1 GERPRA1 HOLLAN1 KNAARV1 LATPRA1 LEOAUT1 LOLPER1 LOTCOR1
# 1 1 0 0 0 0 1 0 0 1
# LYCFLO1 PHLPRA1 PIMMAJ1 PLALAN1 RANACR1 RUMACE1 TAROFF1 TRIPRA1 TRIREP1 VICCRA1
# 0 -1 0 0 -1 0 0 1 1 0
# VICSEP1
# 0
Using this as our starting point, we can use the getRedundancy function to generate a) an effect matrix for all functions, b) a coefficient matrix for all functions, and c) a standardized coefficient matrix for all functions.
<-getRedundancy(german_vars, species, germany)
redund<-getRedundancy(german_vars, species, germany, output="coef")
coefs<-stdEffects(coefs, germany, german_vars, species)
stdCoefs
#for example
redund
# ACHMIL1 ALOPRA1 ANTODO1 ARRELA1 BROHOR1 CAMPAT1 CENJAC1 CHRLEU1
# biomassY3 0 0 0 0 0 -1 0 0
# root3 1 0 -1 0 1 0 0 0
# N.g.m2 1 0 0 -1 0 0 0 0
# N.Soil -1 0 -1 0 1 -1 0 0
# cotton3 0 1 1 0 -1 1 0 0
# CREBIE1 CYNCRI1 DACGLO1 FESPRA1 FESRUB1 GERPRA1 HOLLAN1 KNAARV1
# biomassY3 0 0 1 1 0 0 0 0
# root3 0 0 0 0 1 0 -1 0
# N.g.m2 -1 0 0 1 0 0 -1 0
# N.Soil 1 0 1 -1 0 0 0 0
# cotton3 0 0 0 0 0 0 0 0
# LATPRA1 LEOAUT1 LOLPER1 LOTCOR1 LYCFLO1 PHLPRA1 PIMMAJ1 PLALAN1
# biomassY3 1 0 0 1 0 -1 0 0
# root3 1 0 1 0 0 0 0 -1
# N.g.m2 0 0 1 1 0 -1 0 -1
# N.Soil 1 0 0 1 0 1 0 0
# cotton3 -1 0 -1 0 0 0 0 1
# RANACR1 RUMACE1 TAROFF1 TRIPRA1 TRIREP1 VICCRA1 VICSEP1
# biomassY3 -1 0 0 1 1 0 0
# root3 -1 0 0 -1 0 0 0
# N.g.m2 -1 0 0 1 1 0 0
# N.Soil 1 0 0 -1 -1 0 0
# cotton3 0 0 0 1 1 0 0
Using the effect matrix, we then estimate the average number of species affecting all possible combinations of functions - both positive and negative - and then plot the results.
#plot the num. functions by fraction of the species pool needed
<-divNeeded(redund, type="positive")
posCurve
$div<-posCurve$div/ncol(redund)
posCurve
<-qplot(nfunc, div, data=posCurve, group=nfunc, geom=c("boxplot"))+
pcgeom_jitter(size=4, position = position_jitter(height=0.001, width = .04))+
ylab("Fraction of Species Pool\nwith Positive Effect\n")+
xlab("\nNumber of Functions")+theme_bw(base_size=24)+ylim(c(0,1))
<-divNeeded(redund, type="negative")
negCurve
$div<-negCurve$div/ncol(redund)
negCurve
<-qplot(nfunc, div, data=negCurve, group=nfunc, geom=c("boxplot"))+
ncgeom_jitter(size=4, position = position_jitter(height=0.001, width = .04))+
ylab("Fraction of Species Pool\nwith Negative Effect\n")+
xlab("\nNumber of Functions")+theme_bw(base_size=24)+ylim(c(0,1))
#combine these into one plot
+ nc +
pc plot_annotation(tag_levels = 'A')
We could have also looked at overlap indices, such as the Sorenson index.
<- map_df(2:nrow(redund),
allOverlapPos ~getOverlapSummary(redund, m = .x, denom = "set"),
.id = "m")
allOverlapPos
# # A tibble: 4 × 4
# m meanOverlap sdOverlap n
# <chr> <dbl> <dbl> <dbl>
# 1 1 0.283 0.206 10
# 2 2 0.0658 0.116 10
# 3 3 0 0 5
# 4 4 0 NA 1
One we’ve examined the consequences of functional overlap, we can then move on to compare the number and size of these effects. First, what is the relative balance of positive to negative contributions for each species?
<-data.frame(Species = colnames(redund),
posNegPos = colSums(filterOverData(redund)),
Neg = colSums(filterOverData(redund, type="negative")))
#plot it
ggplot(aes(x=Pos,y= Neg), data=posNeg) +
geom_jitter(position = position_jitter(width = 0.05, height = 0.05), size=5, shape=1) +
theme_bw(base_size=18) +
xlab("\n# of Positive Contributions per species\n") +
ylab("# of Negative Contributions per species\n") +
annotate("text", x=0, y=3, label="a)") +
stat_smooth(method="glm", colour="black", size=2,
method.args = list(family=poisson(link="identity"))) +
geom_abline(slope = 1, intercept = 0, lty = 2, color = "red")
So, a generally positive relationship. We can see from the figure that the slope is less than 1:1, so positive effects accumulate faster. Still, more positive effects = more negative effects. What about the size of those positive versus negative effects?
#now get the standardized effect sizes
<-within(posNeg, {
posNeg
<- colSums(filterCoefData(stdCoefs))/Pos
stdPosMean which(is.nan(stdPosMean))] <-0
stdPosMean[
<- colSums(filterCoefData(stdCoefs, type="negative"))/Neg
stdNegMean which(is.nan(stdNegMean))] <-0
stdNegMean[
})
ggplot(aes(x=stdPosMean,y= stdNegMean), data=posNeg) +
# geom_point(size=3) +
geom_jitter(position = position_jitter(width = .02, height = .02), size=5, shape=1) +
theme_bw(base_size=18) +
xlab("\nAverage Standardized Size of\nPositive Contributions") +
ylab("Average Standardized Size of\nNegative Contributions\n") +
stat_smooth(method="lm", colour="black", size=2)+
annotate("text", x=0, y=0.25, label="b)") +
geom_abline(slope=-1, intercept=0, size=1, colour="black", lty=3)
Now we’ll try the averaging approach. The first step is to create a set of columns where we have standardized all of the functions of interest, and then create an average of those standardized functions.
#add on the new functions along with the averaged multifunctional index
<-cbind(germany, getStdAndMeanFunctions(germany, german_vars))
germany#germany<-cbind(germany, getStdAndMeanFunctions(germany, vars, standardizeZScore))
We can then plot the averaged multifunctionality quite simply
#plot it
ggplot(aes(x=Diversity, y=meanFunction),data=germany)+geom_point(size=3)+
theme_bw(base_size=15)+
stat_smooth(method="lm", colour="black", size=2) +
xlab("\nSpecies Richness") +
ylab("Average Value of Standardized Functions\n")
We may want to look at all of the functions on a standardized scale, and add in the averaged line for comparison as well. We can do this by reshaping the data, and plotting it.
#reshape for plotting everything with ggplot2
<- germany %>%
germanyMeanForPlotting select(Diversity, biomassY3.std:meanFunction) %>%
pivot_longer(cols = c(biomassY3.std:meanFunction),
names_to = "variable") %>%
mutate(variable = fct_inorder(variable))
#nice names for plotting
levels(germanyMeanForPlotting$variable) <- c('Aboveground Biomass', 'Root Biomass', 'Cotton Decomposition', 'Soil Nitrogen', 'Plant Nitrogen', 'Mean Multifuncion Index')
#plot it
ggplot(aes(x=Diversity, y=value),data=germanyMeanForPlotting)+geom_point(size=3)+
facet_grid(~variable) +
theme_bw(base_size=15)+
stat_smooth(method="lm", colour="black", size=2) +
xlab("\nSpecies Richness") +
ylab("Standardized Value of Function\n")
Last, let’s test the statistical fit of the effect of diversity on the averaged multifunctionality index.
#statistical fit
<-lm(meanFunction ~ Diversity, data=germany)
aveFitAnova(aveFit)
# Anova Table (Type II tests)
#
# Response: meanFunction
# Sum Sq Df F value Pr(>F)
# Diversity 0.29502 1 32.63 4.042e-07 ***
# Residuals 0.52440 58
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aveFit)
#
# Call:
# lm(formula = meanFunction ~ Diversity, data = germany)
#
# Residuals:
# Min 1Q Median 3Q Max
# -0.200533 -0.069321 0.006506 0.062717 0.288942
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.385422 0.017049 22.606 < 2e-16 ***
# Diversity 0.015360 0.002689 5.712 4.04e-07 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#
# Residual standard error: 0.09509 on 58 degrees of freedom
# Multiple R-squared: 0.36, Adjusted R-squared: 0.349
# F-statistic: 32.63 on 1 and 58 DF, p-value: 4.042e-07
First, we need to create a new data set that looks at the number of functions greater than or equal to a threshold across a wide range of thresholds.
<-getFuncsMaxed(germany, german_vars, threshmin=0.05, threshmax=0.99, prepend=c("plot","Diversity"), maxN=7) germanyThresh
Next, let’s perform an analysis on just the 0.8 threshold data.
<-glm(funcMaxed ~ Diversity, data=subset(germanyThresh, germanyThresh$thresholds=="0.8"), family=quasipoisson(link="identity"))
mfuncGermanyLinear08
Anova(mfuncGermanyLinear08, test.statistic="F")
# Analysis of Deviance Table (Type II tests)
#
# Response: funcMaxed
# Error estimate based on Pearson residuals
#
# Sum Sq Df F value Pr(>F)
# Diversity 6.375 1 6.4928 0.0135 *
# Residuals 56.944 58
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mfuncGermanyLinear08)
#
# Call:
# glm(formula = funcMaxed ~ Diversity, family = quasipoisson(link = "identity"),
# data = subset(germanyThresh, germanyThresh$thresholds ==
# "0.8"))
#
# Deviance Residuals:
# Min 1Q Median 3Q Max
# -1.4741 -0.9796 -0.8101 0.8364 2.0621
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.27756 0.11302 2.456 0.0171 *
# Diversity 0.05056 0.02476 2.042 0.0457 *
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#
# (Dispersion parameter for quasipoisson family taken to be 0.9817929)
#
# Null deviance: 66.909 on 59 degrees of freedom
# Residual deviance: 60.534 on 58 degrees of freedom
# AIC: NA
#
# Number of Fisher Scoring iterations: 6
To get a better sense of how robust these results are to threshold choice, let’s plot the diversity versus number of functions greater than or equal to a threshold for four different thresholds.
<-subset(germanyThresh, germanyThresh$thresholds %in% qw(0.2, 0.4, 0.6, 0.8)) #note, using qw as %in% is a string comparison operator
gcPlot
$percent<-paste(100*gcPlot$thresholds, "%", sep="")
gcPlot
qplot(Diversity, funcMaxed, data=gcPlot, facets=~percent) +
stat_smooth(method="glm",
method.args = list(family=quasipoisson(link="identity")),
colour="red", lwd=1.2) +
ylab(expression("Number of Functions" >= Threshold)) +
xlab("Species Richness") +
theme_bw(base_size=14) +
geom_text(data=data.frame(percent = unique(gcPlot$percent),
lab = paste(letters[1:4], ")", sep=""),
Diversity=2,
funcMaxed=6
mapping=aes(x=Diversity, y=funcMaxed, label=lab)) ),
Given that variation, let’s look at the entire spread of thresholds.
$percent <- 100*germanyThresh$thresholds
germanyThreshggplot(data=germanyThresh, aes(x=Diversity, y=funcMaxed, group=percent)) +
ylab(expression("Number of Functions" >= Threshold)) +
xlab("Species Richness") +
stat_smooth(method="glm",
method.args = list(family=quasipoisson(link="identity")),
lwd=0.8, fill=NA, aes(color=percent)) +
theme_bw(base_size=14) +
scale_color_gradient(name="Percent of \nMaximum", low="blue", high="red")
To systematically explore the variation in the relationship based on threshold choice, let’s look at the slope of the relationship and its confidence interval over all thresholds. We will then plot this relationship.
<-getCoefTab(funcMaxed ~ Diversity,
germanyLinearSlopesdata = germanyThresh,
coefVar = "Diversity",
family = quasipoisson(link="identity"))
######
# Plot the values of the diversity slope at
# different levels of the threshold
######
<- ggplot(germanyLinearSlopes, aes(x=thresholds*100,
germanSlopes y = estimate,
ymax = estimate + 1.96 * std.error,
ymin = estimate - 1.96 * std.error)) +
geom_ribbon(fill="grey50") +
geom_point() +
ylab("Change in Number of Functions per Addition of 1 Species\n") +
xlab("\nThreshold (%)") +
geom_abline(intercept=0, slope=0, lwd=1, linetype=2) +
theme_bw(base_size=14)
germanSlopes
We can easily see a number of indices mentioned in the text - Tmin, Tmax, Tmde, etc. Let’s pull them out with our indices function.
<- getIndices(germanyLinearSlopes, germanyThresh, funcMaxed ~ Diversity)
germanIDX germanIDX
# Tmin Tmax Tmde Rmde.linear Pmde.linear Mmin Mmax Mmde nFunc
# 1 0.11 0.81 0.41 0.1734241 0.554957 5.050544 1.06222 4.53135 5
We can now add annotations to our earlier plot to make it more rich, and show us those crucial threshold values.
$estimate[which(germanyLinearSlopes$thresholds==germanIDX$Tmde)] germanyLinearSlopes
# [1] 0.1734241
$IDX <- 0
germanyThresh$IDX [which(germanyThresh$thresholds %in%
germanyThreshc(germanIDX$Tmin, germanIDX$Tmax, germanIDX$Tmde))] <- 1
ggplot(data=germanyThresh, aes(x=Diversity, y=funcMaxed, group=percent)) +
ylab(expression("Number of Functions" >= Threshold)) +
xlab("Species Richness") +
geom_smooth(method="glm",
method.args = list(family=quasipoisson(link="identity")),
fill=NA, aes(color=percent, lwd=IDX)) +
theme_bw(base_size=14) +
scale_color_gradient(name="Percent of \nMaximum", low="blue", high="red") +
scale_size(range=c(0.3,5), guide="none") +
annotate(geom="text", x=0, y=c(0.2,2,4.6), label=c("Tmax", "Tmde", "Tmin")) +
annotate(geom="text", x=16.7, y=c(germanIDX$Mmin, germanIDX$Mmax, germanIDX$Mmde), label=c("Mmin", "Mmax", "Mmde"))