-
Notifications
You must be signed in to change notification settings - Fork 0
/
tgr_constrained.R
328 lines (320 loc) · 16.5 KB
/
tgr_constrained.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
#' ---
#' title: "Constrained and summary analysis"
#' author: "Beau Larkin\n"
#' date: "Last updated: `r format(Sys.time(), '%d %B, %Y')`"
#' output:
#' github_document:
#' toc: true
#' toc_depth: 3
#' df_print: paged
#' fig_width: 8
#' fig_height: 7
#' ---
#'
#' # Description
#' This script presents a constrained analysis on restored sites in the Blue Mounds area.
#' Lack of replication prevents constraints from being significants across regions, although
#' this was tried previously (not shown).
#'
#' # Packages and libraries
packages_needed = c("tidyverse", "vegan", "conflicted", "knitr", "GGally", "ape", "ade4", "GGally")
packages_installed = packages_needed %in% rownames(installed.packages())
#+ packages,message=FALSE
if (any(!packages_installed)) {
install.packages(packages_needed[!packages_installed])
}
#+ libraries,message=FALSE
for (i in 1:length(packages_needed)) {
library(packages_needed[i], character.only = T)
}
#+ conflicts,message=FALSE
conflict_prefer("filter", "dplyr")
conflict_prefer("select", "dplyr")
#'
#' # Functions
#' Reference the sidecar file `tgr_constrained_functions.R`.
source("supporting_files/tgr_constrained_functions.R")
#'
#' # Data
#' ## Site metadata and experimental design
#+ sites
sites <-
read_csv(paste0(getwd(), "/clean_data/sites.csv"), show_col_types = FALSE) %>%
mutate(
field_type = factor(
field_type,
ordered = TRUE,
levels = c("corn", "restored", "remnant"))) %>%
select(-lat, -long, -yr_restore, -yr_rank) %>%
arrange(field_key)
#'
#' ## Plant data
#' ### Traits
#' Plant abundance data was only available at 16 sites (none at Fermi). These were translated into
#' percent cover of traits in `plant.R`. Site metadata are joined to allow custom filtering of sites
#' later.
ptr <- read_csv(paste0(getwd(), "/clean_data/plant_trait_abund.csv"), show_col_types = FALSE) %>%
left_join(sites %>% select(starts_with("field"), region), by = join_by("field_name")) %>%
left_join(read_csv(paste0(getwd(), "/clean_data/spe_plant_abund.csv"), show_col_types = FALSE) %>%
rename(field_name = SITE) %>% select(field_name, BARESOIL, LITTER), by = join_by(field_name)) %>%
select(field_name, field_type, region, everything(), -field_key)
#' Plant releve data was available from four Fermi sites, but survey data aren't correctable
#' between Fermi and other sites. Also, translating counts of species to counts of traits isn't appropriate.
#' Trait data isn't included for the plant presence dataset.
#'
#' ### Plant communities
#' Both abundance and presence data are provided.
#' Plant site-species matrices use field names for rows, site metadata will be joined.
pspe <- list(
ab = read_csv(paste0(getwd(), "/clean_data/spe_plant_abund.csv"), show_col_types = FALSE),
pr = read_csv(paste0(getwd(), "/clean_data/spe_plant_presence.csv"), show_col_types = FALSE)
) %>% map(function(x) x %>%
rename(field_name = SITE) %>%
left_join(sites %>% select(starts_with("field"), region), by = join_by("field_name")) %>%
select(field_name, field_type, region, everything(), -field_key, -BARESOIL, -LITTER))
#'
#' ## Environmental data
#' Use precipitation as proxy for soil moisture
rain = read_csv(paste0(getwd(), "/clean_data/site_precip_normal.csv"), show_col_types = FALSE)
#' Create subsets of soil environmental data to align with plant abundance or presence sites
soil <- read_csv(paste0(getwd(), "/clean_data/soil.csv"), show_col_types = FALSE) %>%
left_join(rain, by = join_by("field_key")) %>%
filter(field_key %in% sites$field_key) %>%
left_join(sites %>% select(starts_with("field"), region), by = join_by("field_name", "field_key")) %>%
select(field_name, field_type, region, everything(), -field_key)
#'
#' ## Microbial data
#' ### Fungal communities
#' Fungal species matrices must have field names instead of field keys to align all datasets.
#' Create subsets of fungal species matrices to align with plant abundance or presence sites
#' Sites-species tables contain rarefied sequence abundances. This list includes
#' composition summarized in fields.
#'
#' - Fermi switchgrass prairies are removed because they have no plant data associated.
#' - CSV files were produced in [process_data.R](process_data.md)
fspe <- list(
its = read_csv(paste0(getwd(), "/clean_data/spe_ITS_rfy.csv"), show_col_types = FALSE),
amf = read_csv(paste0(getwd(), "/clean_data/spe_18S_rfy.csv"), show_col_types = FALSE)
) %>% map(function(x) x %>%
left_join(sites %>% select(starts_with("field"), region), by = join_by("field_key")) %>%
filter(!(field_name %in% c("FLRSP1", "FLRSP2", "FLRSP3"))) %>%
select(field_name, field_type, region, everything(), -field_key))
#'
#' ### Species metadata
#' The OTUs and sequence abundances in these files matches the rarefied data in `spe$` above.
#' CSV files were produced in the [microbial diversity script](microbial_diversity.md).
fspe_meta <- list(
its = read_csv(paste0(getwd(), "/clean_data/speTaxa_ITS_rfy.csv"), show_col_types = FALSE),
amf = read_csv(paste0(getwd(), "/clean_data/speTaxa_18S_rfy.csv"), show_col_types = FALSE)
)
#'
#' ## Response data
#' ### Fungal biomass
fb <- read_csv(paste0(getwd(), "/clean_data/plfa.csv"), show_col_types = FALSE) %>%
left_join(sites %>% select(starts_with("field"), region), by = join_by("field_key", "field_name")) %>%
select(field_name, field_type, region, everything(), -field_key, -starts_with("fa_"))
#'
#' ### Water stable aggregates
# Remove rows from old field sites (26 and 27)
wsa <- read_csv(paste0(getwd(), "/clean_data/wsa.csv"), show_col_types = FALSE)[-c(26:27), ] %>%
left_join(sites, by = "field_key")
#'
#' # Analysis and Results
#' A great number of symmetric and asymmetric comparative and constrained analyses have been attempted
#' with these data. The best and simplest is a db-RDA, executed with the custom function
#' `dbrda_fun()`. The strategy is to forward select variables
#' of experimental interest. These are the variables that we expect to change
#' as a result of the restoration, or affect the microbial community directly.
#' Agricultural nutrients, soil organic matter, and plant community/traits data are in this group.
#'
#' ## Plant community axes
#' In this analysis, both plant community and traits data can be used. Let's use `pcoa_fun()` to produce
#' community axes for the abundance-based plant data.
#+ plant_pcoa_ab
(pspe_pcoa_ab <- pcoa_fun(pspe$ab))
#'
#' ## Microbial communities with constraints
#' db-RDA in function `dbrda-fun()`
#'
#' ### Fungal community (ITS sequence abundance)
#' #### Blue Mounds with plant traits data
#+ dbrda_bm_tr_its,message=FALSE,warning=FALSE
(dbrda_bm_tr_its <- dbrda_fun(s = fspe$its, pspe_pcoa = "none"))[c(3, 4, 5, 2)]
#' The number of sites limits the permutation design, but forb was selected. We know that this variable
#' is inversely related to C4 grass and years since restoration.
#+ plot_bm_tr_its,fig.align='center'
plot_dbrda(site_sc = dbrda_bm_tr_its$plot_data$sites,
site_bp = dbrda_bm_tr_its$plot_data$biplot)
write_csv(as_tibble(dbrda_bm_tr_its$plot_data$sites, rownames = "field_name"), "tgr_constrained_files/bm_tr_its_sitelocs.csv")
write_csv(as_tibble(dbrda_bm_tr_its$plot_data$biplot, rownames = "envvar"), "tgr_constrained_files/bm_tr_its_bp.csv")
#'
#' #### Blue Mounds with plant community data
#+ dbrda_bm_ab_its,message=FALSE,warning=FALSE
(dbrda_bm_ab_its <- dbrda_fun(s = fspe$its, pspe_pcoa = pspe_pcoa_ab$site_vectors))[c(3, 4, 5, 2)]
#' The number of sites limits the permutation design, but years since was selected.
#+ plot_bm_ab_its,fig.align='center'
plot_dbrda(site_sc = dbrda_bm_ab_its$plot_data$sites,
site_bp = dbrda_bm_ab_its$plot_data$biplot)
write_csv(as_tibble(dbrda_bm_ab_its$plot_data$sites, rownames = "field_name"), "tgr_constrained_files/bm_ab_its_sitelocs.csv")
write_csv(as_tibble(dbrda_bm_ab_its$plot_data$biplot, rownames = "envvar"), "tgr_constrained_files/bm_ab_its_bp.csv")
#' #### Blue Mounds and Fermi with plant community data
#+ dbrda_bmf_pr_its,message=FALSE,warning=FALSE
(dbrda_bmf_pr_its <- dbrda_fun(s = fspe$its, pspe_pcoa = pcoa_fun(pspe$pr, rg = c("BM", "FL"), method = "jaccard")$site_vectors, rg = c("BM", "FL")))[c(3, 4, 5, 2)]
#' Years since still holds, but I'm not convinced that this is correct. With only three sites in Fermi?
#+ plot_bmf_pr_its,fig.align='center'
plot_dbrda(site_sc = dbrda_bmf_pr_its$plot_data$sites,
site_bp = dbrda_bmf_pr_its$plot_data$biplot)
write_csv(as_tibble(dbrda_bmf_pr_its$plot_data$sites, rownames = "field_name"), "tgr_constrained_files/bmf_pr_its_sitelocs.csv")
write_csv(as_tibble(dbrda_bmf_pr_its$plot_data$biplot, rownames = "envvar"), "tgr_constrained_files/bmf_pr_its_bp.csv")
#'
#' ### AMF community (18S sequence abundance)
#' #### Blue Mounds with plant traits data
#+ dbrda_bm_tr_amf,message=FALSE,warning=FALSE
(dbrda_bm_tr_amf <- dbrda_fun(s = fspe$amf, pspe_pcoa = "none"))[c(3, 4, 5, 2)]
#' The number of sites limits the permutation design, but C4 grass was selected.
#+ plot_bm_tr_amf,fig.align='center'
plot_dbrda(site_sc = dbrda_bm_tr_amf$plot_data$sites,
site_bp = dbrda_bm_tr_amf$plot_data$biplot)
write_csv(as_tibble(dbrda_bm_tr_amf$plot_data$sites, rownames = "field_name"), "tgr_constrained_files/bm_tr_amf_sitelocs.csv")
write_csv(as_tibble(dbrda_bm_tr_amf$plot_data$biplot, rownames = "envvar"), "tgr_constrained_files/bm_tr_amf_bp.csv")
#' #### Blue Mounds with plant community data
#+ dbrda_bm_ab_amf,message=FALSE,warning=FALSE
(dbrda_bm_ab_amf <- dbrda_fun(s = fspe$amf, pspe_pcoa = pspe_pcoa_ab$site_vectors))[c(3, 4, 5, 2)]
#' The number of sites limits the permutation design, but years since was selected.
#+ plot_bm_ab_amf,fig.align='center'
plot_dbrda(site_sc = dbrda_bm_ab_amf$plot_data$sites,
site_bp = dbrda_bm_ab_amf$plot_data$biplot)
write_csv(as_tibble(dbrda_bm_ab_amf$plot_data$sites, rownames = "field_name"), "tgr_constrained_files/bm_ab_amf_sitelocs.csv")
write_csv(as_tibble(dbrda_bm_ab_amf$plot_data$biplot, rownames = "envvar"), "tgr_constrained_files/bm_ab_amf_bp.csv")
#' #### Blue Mounds and Fermi with plant community data
#+ dbrda_bmf_pr_amf,message=FALSE,warning=FALSE
(dbrda_bmf_pr_amf <- dbrda_fun(s = fspe$amf, pspe_pcoa = pcoa_fun(pspe$pr, rg = c("BM", "FL"), method = "jaccard")$site_vectors, rg = c("BM", "FL")))[c(3, 4, 5, 2)]
#' Years since still holds, but I'm not convinced that this is correct. With only three sites in Fermi?
#+ plot_bmf_pr_amf,fig.align='center'
plot_dbrda(site_sc = dbrda_bmf_pr_amf$plot_data$sites,
site_bp = dbrda_bmf_pr_amf$plot_data$biplot)
write_csv(as_tibble(dbrda_bmf_pr_amf$plot_data$sites, rownames = "field_name"), "tgr_constrained_files/bmf_pr_amf_sitelocs.csv")
write_csv(as_tibble(dbrda_bmf_pr_amf$plot_data$biplot, rownames = "envvar"), "tgr_constrained_files/bmf_pr_amf_bp.csv")
#'
#' Microbial communities align with years since restoration across regions and types (general fungi and amf).
#'
#' ## Effects of soil and plants
#' We know that restored plant communities differ among fields, and that those differences change
#' in a systematic way over time. Within our chronosequence sites at Blue Mounds, what is the relative
#' effect of soil and plant data on soil microbes?
#'
#' Variation partitioning will be used. Since forward selection failed to find a parsimonious number
#' of important explanatory variables, we'll just use the entire plant and soil datasets here to look for
#' the relative contribution of these weak effects.
#'
#' With so many explanatory axes, the analysis failed with raw data, so explanatory data
#' will first be transformed into PCoA axes.
#'
#' ### Soil fungi variation partitioning
soil_pcoa <- pcoa_fun(soil, method = "euclidean", corr = "lingoes")
vpdat_its <- list(
Y = fspe$its %>% filter(region == "BM", field_type == "restored") %>%
select(-field_type, -region) %>%
data.frame(., row.names = 1),
X1 = pspe_pcoa_ab$site_vectors,
X2 = soil_pcoa$site_vectors
)
vpdat_its_zcols <- vpdat_its %>% map(\(df) which(apply(df, 2, sum) == 0))
#+ varpart_its
(vp_its <- varpart(vpdat_its$Y %>% select(-vpdat_its_zcols$Y), vpdat_its$X1, vpdat_its$X2))
#+ varpart_its_plot,fig.align='center'
plot(vp_its, digits = 2, bg = c("tan", "palegreen"))
#' Testing significance of A and B fractions with `rda()`
anova(rda(vpdat_its$Y %>% select(-vpdat_its_zcols$Y) ~ Axis.1 + Axis.2 + Condition(as.matrix(vpdat_its$X2)), data = vpdat_its$X1))
#' Plant data not significant
anova(rda(vpdat_its$Y %>% select(-vpdat_its_zcols$Y) ~ Axis.1 + Axis.2 + Condition(as.matrix(vpdat_its$X1)), data = vpdat_its$X2))
#' Soil data not significant
#'
#' With other axes accounted for, plant or soil axes explain ~23% of the ITS fungal community variation in Blue Mounds.
#' Neither is significant.
#'
#' ### AMF variation partitioning
vpdat_amf <- list(
Y = fspe$amf %>% filter(region == "BM", field_type == "restored") %>%
select(-field_type, -region) %>%
data.frame(., row.names = 1),
X1 = pspe_pcoa_ab$site_vectors,
X2 = soil_pcoa$site_vectors
)
vpdat_amf_zcols <- vpdat_amf %>% map(\(df) which(apply(df, 2, sum) == 0))
#+ varpart_amf
(vp_amf <- varpart(vpdat_amf$Y %>% select(-vpdat_amf_zcols$Y), vpdat_amf$X1, vpdat_amf$X2))
#+ varpart_amf_plot,fig.align='center'
plot(vp_amf, digits = 2, bg = c("tan", "palegreen"))
#' Testing significance of A and B fractions with `rda()`
anova(rda(vpdat_amf$Y %>% select(-vpdat_amf_zcols$Y) ~ Axis.1 + Axis.2 + Condition(as.matrix(vpdat_amf$X2)), data = vpdat_amf$X1))
#' Plant data not significant
anova(rda(vpdat_amf$Y %>% select(-vpdat_amf_zcols$Y) ~ Axis.1 + Axis.2 + Condition(as.matrix(vpdat_amf$X1)), data = vpdat_amf$X2))
#' Soil data not significant
#'
#' With soil axes accounted for, plant axes explain 31% of the ITS fungal community variation in Blue Mounds.
#' The unique explanation made by soil variables is about half as much. Neither is significant.
#'
#' ## Response correlations
#' Fungal communities varied with years since restoration, C4 grass, and forbs. How do these predictors affect
#' microbial biomass and function?
#+ func_vars_df
func_vars <-
sites %>%
left_join(ptr %>% select(field_name, C4_grass, forb), by = join_by(field_name)) %>%
left_join(fb %>% select(field_name, fungi, amf), by = join_by(field_name)) %>%
left_join(wsa %>% select(field_name, wsa), by = join_by(field_name)) %>%
rename(
C4_grass_pct = C4_grass,
forb_pct = forb,
mass_fungi = fungi,
mass_amf = amf
) %>%
select(-field_key)
#' Plant traits data are only available in Wisconsin; try these first.
#+ pairs_wi,fig.align='center'
ggpairs(
data = func_vars %>% filter(field_type == "restored", region != "FL"),
columns = 4:9
) +
theme_bw()
#' In all restored sites, response correlations are still possible without plant traits.
#+ pairs_all,fig.align='center'
ggpairs(
data = func_vars %>% filter(field_type == "restored") %>%
select(-C4_grass_pct, -forb_pct),
columns = 4:7
) +
theme_bw()
#' Intercorrelations among responses are obvious and not very useful. Years since restoration correlates
#' positively with fungal biomass, and with sites from Fermi included, this relationship is fairly strong.
#' Fermi sites, with abundant SOM, are probably influencing this result more than years are, though.
#'
#' **What if we correlate the constrained axes with responses?** This might help a little by
#' reordering sites based on time since restoration *and* community differences, reducing the leverage of
#' sites from Fermi. Let's arrange the first axes from each dbRDA and join them with response data.
#+ axis_correlations
axis_corr <-
list(
bm_tr_its = dbrda_bm_tr_its$plot_data$sites,
bm_ab_its = dbrda_bm_ab_its$plot_data$sites,
bm_tr_amf = dbrda_bm_tr_amf$plot_data$sites,
bm_ab_amf = dbrda_bm_ab_amf$plot_data$sites
) %>%
map( ~ .x %>%
data.frame() %>%
rownames_to_column(var = "field_name") %>%
left_join(sites %>% select(field_name, yr_since), by = join_by(field_name)) %>%
left_join(fb %>% select(field_name, fungi, amf), by = join_by(field_name)) %>%
left_join(wsa %>% select(field_name, wsa), by = join_by(field_name)) %>%
rename(mass_fungi = fungi, mass_amf = amf) %>%
select(-dbRDA2)
)
#+ axis_correlation_plots_list
axis_corr_plot <-
axis_corr %>%
map( ~ .x %>%
ggpairs(columns = 2:6) +
theme_bw())
#'
#' Pairs panels were investigated (not shown), and nothing new shows up.