Diversity analysis
Packages
library(phyloseq)
library(ggpubr)
library(tidyverse)
theme_set(theme_bw())
Load phyloseq object
For detail on how to get a phyloseq object please see chapter ‘From Qiime2 into R’.
# reading in a previously saved phyloseq object
physeqB.flt <- readRDS('physeqB.flt')
physeqB.flt
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4932 taxa and 136 samples ]
## sample_data() Sample Data: [ 136 samples by 38 sample variables ]
## tax_table() Taxonomy Table: [ 4932 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 4932 tips and 4931 internal nodes ]
Alpha diversity
Pre-processing of phyloseq object
#filter
`%notin%` = Negate(`%in%`)
physeqB.flt.norefs <- prune_samples(sample_data(physeqB.flt)$Name %notin% c("41AutoclavedControlT0","42AutoclavedControlT0","43AutoclavedControlT0","44AutoclavedControlT0"), physeqB.flt)
physeqB.flt.norefs <- prune_samples(sample_data(physeqB.flt.norefs)$Treatment != "Reference", physeqB.flt.norefs)
physeqB.flt.norefs <- prune_samples(sample_data(physeqB.flt.norefs)$Treatment != "Blank", physeqB.flt.norefs)
physeqB.flt.norefs <- phyloseq::prune_taxa(taxa_sums(physeqB.flt.norefs) != 0, physeqB.flt.norefs)
# checking minimum sample size
min(colSums(otu_table(physeqB.flt.norefs)))
## [1] 9214
# rarefy
rare_Bac <- phyloseq::rarefy_even_depth(physeqB.flt.norefs, sample.size = min(colSums(otu_table(physeqB.flt.norefs))), rngseed = TRUE, replace = TRUE, trimOTUs = TRUE)
# check rarefied phyloseq object
rare_Bac
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4929 taxa and 128 samples ]
## sample_data() Sample Data: [ 128 samples by 38 sample variables ]
## tax_table() Taxonomy Table: [ 4929 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 4929 tips and 4928 internal nodes ]
#create diversity indices
ps.div <- phyloseq::estimate_richness(rare_Bac)
# add sample names
ps.div$`#SampleID` <- phyloseq::sample_names(physeqB.flt.norefs)
# add diversity indices to phyloseq object
meta.df <- data.frame(sample_data(physeqB.flt.norefs))
meta.df <- meta.df %>% rownames_to_column("#SampleID") %>%
left_join(ps.div, by = "#SampleID") %>% column_to_rownames("#SampleID")
# add pilou evenness (see calculation in "Numerical Ecology with R - Bocard et al")
meta.df$pielou_ev <- ps.div$Shannon/log(ps.div$Observed)
# replace metadata with temp meta.df which includes the diversity indices
physeqB.flt.norefs@sam_data <- sample_data(meta.df)
Plotting
Colors
#manually created with help of https://htmlcolorcodes.com/
cols <- c("#F9E79F", "#FDEBD0", "#F6DDCC", "#E5E8E8", "#CCD1D1", "#AAB7B8", "#707B7C", "#566573","#82E0AA", "#641E16", "#8E44AD")
cols_noheatH2O2 <- c("#F9E79F", "#E5E8E8", "#CCD1D1", "#AAB7B8", "#707B7C", "#566573","#82E0AA", "#641E16", "#8E44AD")
cols_ref <- c("#F9E79F", "#FDEBD0", "#F6DDCC", "#E5E8E8", "#CCD1D1", "#AAB7B8", "#707B7C", "#566573", "#82E0AA","#641E16", "#8E44AD", "#E74C3C")
Richness
p1 <- data.frame(phyloseq::sample_data(physeqB.flt.norefs)) %>%
ggpubr::ggboxplot(., x = "Treatment", y = "Observed", facet.by = "Week", fill = "Treatment", ylab = "Richness", xlab = "Treatment", palette = cols) +
theme_bw() +
xlab("") +
theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) +
geom_jitter(width = 0.1) +
# theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
ggpubr::stat_compare_means(method = "kruskal",label.x = 2)
Shannon
# Shannon
p2 <- data.frame(phyloseq::sample_data(physeqB.flt.norefs)) %>%
ggpubr::ggboxplot(., x = "Treatment", y = "Shannon", facet.by = "Week", fill = "Treatment", ylab = "Shannon (H)", xlab = "Treatment", palette = cols) +
theme_bw() +
xlab("") +
theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) +
geom_jitter(width = 0.1) +
# theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
ggpubr::stat_compare_means(method = "kruskal",label.x = 2)
Pielou’s evenness
# Pielou
p3 <- data.frame(phyloseq::sample_data(physeqB.flt.norefs)) %>%
ggpubr::ggboxplot(., x = "Treatment", y = "pielou_ev", facet.by = "Week", fill = "Treatment", ylab = "Pilou Evenness (J)", xlab = "Treatment", palette = cols) +
theme_bw() +
xlab("") +
theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) +
geom_jitter(width = 0.1) +
# theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
ggpubr::stat_compare_means(method = "kruskal",label.x = 2)
All plots together
pdf(NULL)
g1 <- ggpubr::ggarrange(p1, p2, p3, common.legend = TRUE, ncol = 1)
x = dev.off()
g1
Beta diversity
PCA
# filter and phyloseq object from alphadiversity chunk: "physeqB.flt.norefs"
# create a new phyloseq object to be less error-prone for downstream analyis
physeqPCA <- physeqB.flt.norefs # excluding Heat, H2O2, reference and blank
# normalisation and transform using the microbiome package
# I chose a centred log-ratio transform and a principle component analysis in a euclidean space.
physeqPCA <- microbiome::transform(physeqPCA, "clr")
#ordination
ordination <- phyloseq::ordinate(physeqPCA , "RDA")
p1 <- phyloseq::plot_ordination(physeqPCA, ordination , color = "Treatment", shape = "Week") +
geom_point(aes(size = Dieldrin_trans)) +
ggtitle("PCA (clr transform)") +
# scale_color_manual(values = cols_noheatH2O2)
# OR
scale_color_manual(values = cols_ref)
# p1
# ggsave("PCA.png", height=6, width=7.5, units='in', dpi=600)
knitr::include_graphics("./PCA.png")
RDA
# PERCENT FILTERVALUE FOR ALL RDAs
pcnt <- 0.25
# FORMULA FOR ALL RDAs
form <- formula(~ Treatment+Week)
# labels for arrows
# pos for arrows
xarw <- 5
yarw <- 5
# arrow length map
larw <- 1.5
# arrow length map for chromosol rda
larwch <- 1
# fixed width of x axis
xaxis = c(-6.75, 6.5)
# position of annotation
textposx = c(-4.4)
# arrow color
arrowcolor <- "grey13"
physeqB.flt.norefs <- prune_samples(sample_data(physeqB.flt)$Treatment != "Reference", physeqB.flt)
physeqB.flt.norefs <- prune_samples(sample_data(physeqB.flt.norefs)$Treatment != "Blank", physeqB.flt.norefs)
physeqB.flt.norefs <- prune_samples(sample_data(physeqB.flt.norefs)$Treatment != "Heat", physeqB.flt.norefs)
physeqB.flt.norefs <- prune_samples(sample_data(physeqB.flt.norefs)$Treatment != "H2O2", physeqB.flt.norefs)
physeqB.flt.norefs <- phyloseq::prune_taxa(taxa_sums(physeqB.flt.norefs) != 0, physeqB.flt.norefs)
physeqPCA <- physeqB.flt.norefs # excluding Heat, H2O2, reference and blank
# normalisation and transform
physeqPCA <- microbiome::transform(physeqPCA, "clr")
#ordination
ordination <- phyloseq::ordinate(physeqPCA, "RDA", "bray", formula = form )
p2 <- phyloseq::plot_ordination(physeqPCA, ordination , color = "Treatment", shape = "Week") +
geom_hline(yintercept=0, linetype="dashed", color = "gray") +
geom_vline(xintercept=0, linetype="dashed", color = "gray") +
geom_point(aes(size = Dieldrin_trans)) +
#scale_colour_gradient(low = colourlow, high = colourhigh) +
ggtitle("RDA Abundances (clr) ~ Treatment+Week ") +
scale_color_manual(values = cols_noheatH2O2)
# Now add the environmental variables as arrows to either of these p1 or p2
arrowmat <- vegan::scores(ordination, display = "bp")
arrowmat <- data.frame(arrowmat)
#rownames(arrowmat) <- arrowlabel
# Add labels, make a data.frame
arrowdf <- data.frame(labels = rownames(arrowmat), arrowmat)
# Define the arrow aesthetic mapping
arrow_map <- aes(xend = larw * RDA1,
yend = larw * RDA2,
x = 0,
y = 0,
shape = NULL,
color = NULL,
label = labels)
label_map <- aes(x = xarw * RDA1,
y = yarw * RDA2,
shape = NULL,
color = NULL,
label = labels)
arrowhead = arrow(length = unit(0.02, "npc"))
p2 <- p2 +
geom_segment(
mapping = arrow_map,
size = .5,
data = arrowdf,
color = arrowcolor,
arrow = arrowhead) +
geom_text(
mapping = label_map,
size = 4,
data = arrowdf,
show.legend = FALSE)
# p2
# save figure for publication
# ggsave("RDA.png", height=6, width=7.5, units='in', dpi=600)
knitr::include_graphics("./RDA.png")
unweighted unifrac
physeqPCA <- physeqB.flt.norefs
#ordination
ordination <- phyloseq::ordinate(physeqPCA , "PCoA", distance = 'unifrac')
p1 <- phyloseq::plot_ordination(physeqPCA, ordination , color = "Treatment", shape = "Week") +
geom_point(aes(size = Dieldrin_trans)) +
ggtitle("PCoA unweighted unifrac") +
# scale_color_manual(values = cols_noheatH2O2)
# OR
scale_color_manual(values = cols_noheatH2O2)
p1
Note: This code is an amalgamation from various sources. Apart from putting it together into a pipeline I do not take credit for it.