# Please note this is all the raw R code pulled from all the `.Rmd` files. # Use at your own risk as this has not been tested independent of the Rmarkdown # workflows. In other words, the code works in the Rmarkdown format but the # complete pipeline has not been tested using just this code. I used `knitr::purl()` # to pull out the R code from the Rmarkdown file. I did this for you in case you # wanted the code and not hear me drone on about colors or zoomable figures. The # first part is the DADA2 workflow and the second part is the phyloseq workflow. # Commands that are commented out are things I tried that I could never get to work. # Any line that starts like this: `## ----` is the code chuck name and details. ######################## # BEGIN DADA2 WORKFLOW # ######################## ## ----setup, include=FALSE------------------------------------------------ remove(list = ls()) library(dada2); packageVersion("dada2") library(ShortRead); packageVersion("ShortRead") library(ggplot2); packageVersion("ggplot2") library(TeachingDemos) library(ips) library(seqinr) library(ape) library(rstudioapi) library(knitr) library(rmarkdown) ## ----set_wd, include=FALSE----------------------------------------------- knitr::opts_knit$set(root.dir = getwd()) ptm <- proc.time() ## ----list_fastq_files---------------------------------------------------- path_X1 <- "02_MERGED/RUN01/INPUT_FILES/" head(list.files(path_X1)) path_Y2 <- "02_MERGED/RUN02/INPUT_FILES/" head(list.files(path_Y2)) path_Z3 <- "02_MERGED/RUN03/INPUT_FILES/" head(list.files(path_Z3)) ## ----sort_files---------------------------------------------------------- fnFs_X1 <- sort(list.files(path_X1, pattern = "_R1.merged.fastq")) fnRs_X1 <- sort(list.files(path_X1, pattern = "_R2.merged.fastq")) fnFs_Y2 <- sort(list.files(path_Y2, pattern = "_R1.merged.fastq")) fnRs_Y2 <- sort(list.files(path_Y2, pattern = "_R2.merged.fastq")) fnFs_Z3 <- sort(list.files(path_Z3, pattern = "_R1.merged.fastq")) fnRs_Z3 <- sort(list.files(path_Z3, pattern = "_R2.merged.fastq")) ## ----split_name---------------------------------------------------------- sample.names_X1 <- sapply(strsplit(fnFs_X1, "_"), `[`, 1) fnFs_X1 <-file.path(path_X1, fnFs_X1) fnRs_X1 <-file.path(path_X1, fnRs_X1) sample.names_Y2 <- sapply(strsplit(fnFs_Y2, "_"), `[`, 1) fnFs_Y2 <-file.path(path_Y2, fnFs_Y2) fnRs_Y2 <-file.path(path_Y2, fnRs_Y2) sample.names_Z3 <- sapply(strsplit(fnFs_Z3, "_"), `[`, 1) fnFs_Z3 <-file.path(path_Z3, fnFs_Z3) fnRs_Z3 <-file.path(path_Z3, fnRs_Z3) ## ----plot_quality_scores_forward, warning=FALSE-------------------------- plotQualityProfile(fnFs_X1[9:11]) plotQualityProfile(fnFs_Y2[9:11]) plotQualityProfile(fnFs_Z3[2:4]) ## ----plot_quality_scores_reverse, warning=FALSE-------------------------- plotQualityProfile(fnRs_X1[9:11]) plotQualityProfile(fnRs_Y2[9:11]) plotQualityProfile(fnRs_Z3[2:4]) ## ----move_files---------------------------------------------------------- #Place filtered files in filtered/ subdirectory filt_path_X1 <- file.path(path_X1, "filtered") filtFs_X1 <- file.path(filt_path_X1, paste0(sample.names_X1, "_F_filt.fastq.gz")) filtRs_X1 <- file.path(filt_path_X1, paste0(sample.names_X1, "_R_filt.fastq.gz")) filt_path_Y2 <- file.path(path_Y2, "filtered") filtFs_Y2 <- file.path(filt_path_Y2, paste0(sample.names_Y2, "_F_filt.fastq.gz")) filtRs_Y2 <- file.path(filt_path_Y2, paste0(sample.names_Y2, "_R_filt.fastq.gz")) filt_path_Z3 <- file.path(path_Z3, "filtered") filtFs_Z3 <- file.path(filt_path_Z3, paste0(sample.names_Z3, "_F_filt.fastq.gz")) filtRs_Z3 <- file.path(filt_path_Z3, paste0(sample.names_Z3, "_R_filt.fastq.gz")) ## ----filter-------------------------------------------------------------- out_X1 <- filterAndTrim(fnFs_X1, filtFs_X1, fnRs_X1, filtRs_X1, truncLen=c(260,160), maxN=0, maxEE=c(2,5), truncQ=2, rm.phix=TRUE, compress=TRUE, multithread=TRUE) head(out_X1) out_Y2 <- filterAndTrim(fnFs_Y2, filtFs_Y2, fnRs_Y2, filtRs_Y2, truncLen=c(260,160), maxN=0, maxEE=c(2,5), truncQ=2, rm.phix=TRUE, compress=TRUE, multithread=TRUE) head(out_Y2) out_Z3 <- filterAndTrim(fnFs_Z3, filtFs_Z3, fnRs_Z3, filtRs_Z3, truncLen=c(260,140), maxN=0, maxEE=c(2,5), truncQ=2, rm.phix=TRUE, compress=TRUE, multithread=TRUE) head(out_Z3) ## ----learn_errors_forward------------------------------------------------ errF_X1 <- learnErrors(filtFs_X1, multithread = TRUE) errF_Y2 <- learnErrors(filtFs_Y2, multithread = TRUE) errF_Z3 <- learnErrors(filtFs_Z3, multithread = TRUE) ## ----learn_errors_reverse------------------------------------------------ errR_X1 <- learnErrors(filtRs_X1, multithread = TRUE) errR_Y2 <- learnErrors(filtRs_Y2, multithread = TRUE) errR_Z3 <- learnErrors(filtRs_Z3, multithread = TRUE) ## ----plot_error, warning=FALSE------------------------------------------- plotErrors(errR_X1, nominalQ=TRUE) plotErrors(errR_Y2, nominalQ=TRUE) plotErrors(errR_Z3, nominalQ=TRUE) ## ----dereplicate_reads_F------------------------------------------------- derepFs_X1 <- derepFastq(filtFs_X1) names(derepFs_X1) <- sample.names_X1 derepFs_Y2 <- derepFastq(filtFs_Y2) names(derepFs_Y2) <- sample.names_Y2 derepFs_Z3 <- derepFastq(filtFs_Z3) names(derepFs_Z3) <- sample.names_Z3 ## ----dereplicate_reads_R------------------------------------------------- derepRs_X1 <- derepFastq(filtRs_X1) names(derepRs_X1) <- sample.names_X1 derepRs_Y2 <- derepFastq(filtRs_Y2) names(derepRs_Y2) <- sample.names_Y2 derepRs_Z3 <- derepFastq(filtRs_Z3) names(derepRs_Z3) <- sample.names_Z3 ## ----run_dada2_forward--------------------------------------------------- #Run01 dadaFs_X1 <- dada(derepFs_X1, err = errF_X1, multithread = TRUE) dadaFs_X1[[1]] #Run02 dadaFs_Y2 <- dada(derepFs_Y2, err = errF_Y2, multithread = TRUE) dadaFs_Y2[[1]] #Run03 dadaFs_Z3 <- dada(derepFs_Z3, err = errF_Z3, multithread = TRUE) dadaFs_Z3[[1]] ## ----run_dada2_reverse--------------------------------------------------- #Run01 dadaRs_X1 <- dada(derepRs_X1, err = errR_X1, multithread = TRUE) dadaRs_X1[[1]] #Run02 dadaRs_Y2 <- dada(derepRs_Y2, err = errR_Y2, multithread = TRUE) dadaRs_Y2[[1]] #Run03 dadaRs_Z3 <- dada(derepRs_Z3, err = errR_Z3, multithread = TRUE) dadaRs_Z3[[1]] ## ----merge_paired_reads-------------------------------------------------- mergers_X1 <- mergePairs(dadaFs_X1, derepFs_X1, dadaRs_X1, derepRs_X1) mergers_Y2 <- mergePairs(dadaFs_Y2, derepFs_Y2, dadaRs_Y2, derepRs_Y2) mergers_Z3 <- mergePairs(dadaFs_Z3, derepFs_Z3, dadaRs_Z3, derepRs_Z3) ## ----head_file, eval = FALSE, echo = FALSE------------------------------- ## head(mergers_X1[[1]]) ## head(mergers_Y2[[1]]) ## head(mergers_Z3[[1]]) ## ----seq_table----------------------------------------------------------- #Run01 seqtab_X1 <- makeSequenceTable(mergers_X1) dim(seqtab_X1) table(nchar(getSequences(seqtab_X1))) #Run02 seqtab_Y2 <- makeSequenceTable(mergers_Y2) dim(seqtab_Y2) table(nchar(getSequences(seqtab_Y2))) #Run03 seqtab_Z3 <- makeSequenceTable(mergers_Z3) dim(seqtab_Z3) table(nchar(getSequences(seqtab_Z3))) ## ----trim_length_length-------------------------------------------------- #Run01 seqtab_X1.2 <- seqtab_X1[,nchar(colnames(seqtab_X1)) %in% seq(368,380)] dim(seqtab_X1.2) table(nchar(getSequences(seqtab_X1.2))) #Run02 seqtab_Y2.2 <- seqtab_Y2[,nchar(colnames(seqtab_Y2)) %in% seq(368,380)] dim(seqtab_Y2.2) table(nchar(getSequences(seqtab_Y2.2))) #Run03 seqtab_Z3.2 <- seqtab_Z3[,nchar(colnames(seqtab_Z3)) %in% seq(368,380)] dim(seqtab_Z3.2) table(nchar(getSequences(seqtab_Z3.2))) ## ----save_RDS------------------------------------------------------------ saveRDS(seqtab_X1.2, "02_MERGED/RUN01/seqtab_X1.2.rds") saveRDS(seqtab_Y2.2, "02_MERGED/RUN02/seqtab_Y2.2.rds") saveRDS(seqtab_Z3.2, "02_MERGED/RUN03/seqtab_Z3.2.rds") ## ----proc_time_1--------------------------------------------------------- proc.time() - ptm ## ----chimera_on_ind_runs------------------------------------------------- #Run01 seqtab_X1.2.nochim <- removeBimeraDenovo(seqtab_X1.2, method="consensus", multithread=TRUE) dim(seqtab_X1.2.nochim) sum(seqtab_X1.2.nochim)/sum(seqtab_X1.2) #Run02 seqtab_Y2.2.nochim <- removeBimeraDenovo(seqtab_Y2.2, method="consensus", multithread=TRUE) dim(seqtab_Y2.2.nochim) sum(seqtab_Y2.2.nochim)/sum(seqtab_Y2.2) #Run03 seqtab_Z3.2.nochim <- removeBimeraDenovo(seqtab_Z3.2, method="consensus", multithread=TRUE) dim(seqtab_Z3.2.nochim) sum(seqtab_Z3.2.nochim)/sum(seqtab_Z3.2) ## ----build_table_to_track_reads------------------------------------------ #Run01 getN_X1 <- function(x) sum(getUniques(x)) track_X1 <- cbind(out_X1, sapply(dadaFs_X1, getN_X1), sapply(dadaRs_X1, getN_X1), sapply(mergers_X1, getN_X1), rowSums(seqtab_X1.2.nochim)) colnames(track_X1) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim") rownames(track_X1) <- sample.names_X1 #Run02 getN_Y2 <- function(x) sum(getUniques(x)) track_Y2 <- cbind(out_Y2, sapply(dadaFs_Y2, getN_Y2), sapply(dadaRs_Y2, getN_Y2), sapply(mergers_Y2, getN_Y2), rowSums(seqtab_Y2.2.nochim)) colnames(track_Y2) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim") rownames(track_Y2) <- sample.names_Y2 #Run03 getN_Z3 <- function(x) sum(getUniques(x)) track_Z3 <- cbind(out_Z3, sapply(dadaFs_Z3, getN_Z3), sapply(dadaRs_Z3, getN_Z3), sapply(mergers_Z3, getN_Z3), rowSums(seqtab_Z3.2.nochim)) colnames(track_Z3) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim") rownames(track_Z3) <- sample.names_Z3 ## ----track_changes------------------------------------------------------- #Run01 track_X1 write.table(track_X1, "RUN01_read_changes.txt", sep = "\t", quote = FALSE, col.names=NA) #Run02 track_Y2 write.table(track_Y2, "RUN02_read_changes.txt", sep = "\t", quote = FALSE, col.names=NA) #Run03 track_Z3 write.table(track_Z3, "RUN03_read_changes.txt", sep = "\t", quote = FALSE, col.names=NA) ## ----save_rds2----------------------------------------------------------- saveRDS(seqtab_X1.2.nochim, "02_MERGED/RUN01/seqtab_X1.2.nochim.rds") saveRDS(seqtab_Y2.2.nochim, "02_MERGED/RUN02/seqtab_Y2.2.nochim.rds") saveRDS(seqtab_Z3.2.nochim, "02_MERGED/RUN03/seqtab_Z3.2.nochim.rds") ## ----proc_time_2--------------------------------------------------------- proc.time() - ptm ## ----clear_data---------------------------------------------------------- remove(list = ls()) ## ----set_wd_2, include=FALSE--------------------------------------------- knitr::opts_knit$set(root.dir = getwd()) ptm <- proc.time() ## ----read_RDS_files_combo------------------------------------------------ seqtab.1 <- readRDS("02_MERGED/RUN01/seqtab_X1.2.rds") seqtab.2 <- readRDS("02_MERGED/RUN02/seqtab_Y2.2.rds") seqtab.3 <- readRDS("02_MERGED/RUN03/seqtab_Z3.2.rds") ## ----order_files_combo--------------------------------------------------- rownames(seqtab.1) <- sapply(strsplit(rownames(seqtab.1), "_"), `[`, 1) rownames(seqtab.2) <- sapply(strsplit(rownames(seqtab.2), "_"), `[`, 1) identical(sort(rownames(seqtab.1)), sort(rownames(seqtab.2))) # Should be TRUE seqtab.2 <- seqtab.2[rownames(seqtab.1),] ## ----matrix_sum_combo---------------------------------------------------- samples <- rownames(seqtab.1) seqs <- unique(c(colnames(seqtab.1), colnames(seqtab.2))) st.sum <- matrix(0L, nrow=length(samples), ncol=length(seqs)) rownames(st.sum) <- samples colnames(st.sum) <- seqs st.sum[,colnames(seqtab.1)] <- st.sum[,colnames(seqtab.1)] + seqtab.1 st.sum[,colnames(seqtab.2)] <- st.sum[,colnames(seqtab.2)] + seqtab.2 ## ----save_combo---------------------------------------------------------- saveRDS(st.sum, "02_MERGED/combo_run1_run2.rds") ## ----merge_combo--------------------------------------------------------- combo <- readRDS("02_MERGED/combo_run1_run2.rds") seqtab.3 <- readRDS("02_MERGED/RUN03/seqtab_Z3.2.rds") st.all <- mergeSequenceTables(combo, seqtab.3) ## ----chimera_on_combined_runs-------------------------------------------- seqtab <- removeBimeraDenovo(st.all, method="consensus", multithread=TRUE) ## ----assign_tax---------------------------------------------------------- set.seed(119)#for reproducability tax_gg <- assignTaxonomy(seqtab, "gg_13_8_train_set_97.fa.gz", multithread=TRUE) set.seed(911)#for reproducability tax_silva <- assignTaxonomy(seqtab, "silva_nr_v132_train_set.fa.gz", multithread = TRUE) ## ----save_image_combo---------------------------------------------------- save.image("combo_pipeline.rdata") ## ----proc_time_3--------------------------------------------------------- proc.time() - ptm ## ----sessionInfo, include=TRUE, echo=TRUE, results='markup'-------------- sessionInfo() devtools::session_info() ######################## # END DADA2 WORKFLOW # ######################## ############################# # BEGIN PHYLOSEQ WORKFLOW # ############################# ## ----setup, include=FALSE------------------------------------------------ remove(list = ls()) library(dada2); packageVersion("dada2") library(ShortRead); packageVersion("ShortRead") library(phyloseq); packageVersion("phyloseq") library(ggplot2); packageVersion("ggplot2") library("plyr"); packageVersion("plyr") library(vegan) library(scales) library(grid) library(reshape2) library(rstudioapi) library(knitr) library(kableExtra) library(data.table) library(DT) library(rmarkdown) library(pander) library(formatR) library(tidyverse) library(gridExtra) library(grid) library(grDevices) library(svgPanZoom) library(RCurl) library(plotly) library(pairwiseAdonis) sessionInfo() set.seed(0199) ## ----set_wd, include=FALSE----------------------------------------------- knitr::opts_knit$set(root.dir = getwd()) # This will setwd to wherever the .Rmd file is opened. ptm <- proc.time() start_time <- Sys.time() #opts_chunk$set(cache=TRUE) #formatR::tidy_app() run this in R to tidy code. How to do it here? ## UNCOMMENT TO RUN lintr #rm(list=ls()) #library(lintr) #names(lintr::default_linters) #lintr::lint( # filename = "Phyloseq_workflow.Rmd", # linters = lintr::with_defaults( # trailing_whitespace_linter = NULL, # commented_code_linter = NULL # ) #) ## ----mkdirs, include=FALSE----------------------------------------------- dir.create("TABLES/OUTPUT/PS/", recursive = TRUE) dir.create("TABLES/OUTPUT/OTHER/", recursive = TRUE) dir.create("TABLES/OUTPUT/LEfSe/", recursive = TRUE) dir.create("TABLES/OUTPUT/SUPP/", recursive = TRUE) dir.create("FIGURES/OUTPUT/", recursive = TRUE) dir.create("PS_OBJECTS/", recursive = TRUE) ## ----define_color_blind_scheme------------------------------------------- #Full palette friend_pal <- c("#009E73", "#D55E00", "#F0E442", "#CC79A7", "#56B4E9", "#E69F00", "#0072B2", "#7F7F7F", "#B6DBFF", "#000000") #Fish palette samp_pal <- c("#CC79A7", "#0072B2", "#009E73", "#56B4E9", "#E69F00") ## ----mean_bite_analysis-------------------------------------------------- library(ggthemes) all_traits <- read.csv("TABLES/INPUT/Mean_bite_characteristics.txt", header = TRUE, sep = "\t") ids <- all_traits[, 1:2] write.table(all_traits, "TABLES/OUTPUT/SUPP/Table_S1.txt", sep = "\t", row.names = FALSE, quote = FALSE, col.names = c("Host species", "Site", "Sediment depth (mm)", "Turf height(mm)", "Prop. mark on substrate", "Prop. vertical", "Prop. concave", "Prop. convex", "Articulated red coralline", "CCA/Turf on CCA", "Dictyota", "Epiphytes", "Gorgonian", "Halimeda", "Laurencia", "Sponge", "Stypopodium", "Turf")) datatable(all_traits, rownames = FALSE, width = "100%", colnames = c("Host species", "Site", "Sediment depth (mm)", "Turf height(mm)", "Prop. mark on substrate", "Prop. vertical", "Prop. concave", "Prop. convex", "Articulated red coralline", "CCA/Turf on CCA", "Dictyota", "Epiphytes", "Gorgonian", "Halimeda", "Laurencia", "Sponge", "Stypopodium", "turf"), caption = htmltools::tags$caption( style = "caption-side: bottom; text-align: left;", "Supplementary Table 1: ", htmltools::em("INSERT DESCRIPTION.")), extensions = "Buttons", options = list(columnDefs = list(list( className = "dt-left", targets = 0)), dom = "Blrtip", pageLength = 5, lengthMenu = c(5, 10, 15), buttons = c("csv", "copy"), scrollX = TRUE, scrollCollapse = TRUE)) ## ----standardize_diet_variables, results = 'hide'------------------------ # First quant_traits_std <- decostand(all_traits[, 3:4], "range") / 10 # Second Mean_prop_mark_on_substrate_std <- all_traits[, 5] / (10 / 2) prop_vertical_std <- all_traits[, 6] / (10 / 2) prop_concave_std <- all_traits[, 7] / (10 / 3) prop_convex_std <- all_traits[, 8] / (10 / 3) all_traits_std <- cbind(quant_traits_std, Mean_prop_mark_on_substrate_std, prop_vertical_std, prop_concave_std, prop_convex_std, all_traits[, 9:18]) nmds <- metaMDS(all_traits_std, distance = "bray", k = 3, trymax = 40) ## ----diet_NMDS_mods------------------------------------------------------ NMDS <- data.frame(NMDS1 = nmds$points[, 1], NMDS2 = nmds$points[, 2], MDS3 = nmds$points[, 3]) stressplot(nmds) ## ----get_env_vectors----------------------------------------------------- set.seed(1) vec_sp <- envfit(nmds$points, all_traits[, 3:18], perm = 1000, choices = c(1, 2, 3)) spp_scrs <- as.data.frame(scores(vec_sp, display = "vectors")) spp_scrs$species <- rownames(spp_scrs) colnames(spp_scrs) <- c("S_MDS1", "S_MDS2", "SMDS3", "species") ## ----mean_bite_initial_plot---------------------------------------------- #Plot results using ggplot2 #Merge with data that we want to use in plotting NMDS <- cbind(ids, NMDS) stuff <- ggplot(NMDS) + geom_point(mapping = aes(x = NMDS1, y = NMDS2, shape = Site, colour = Species), size = 4) + geom_segment(aes(x = 0, y = 0, xend = S_MDS1, yend = S_MDS2), data = spp_scrs, arrow = arrow(length = unit(0.5, "cm")), colour = "black", inherit.aes = FALSE) + geom_text(data = spp_scrs, aes(x = S_MDS1, y = S_MDS2, label = species), size = 3) + labs(title = "bray distance on all traits (standardized)", subtitle = "Stress = 0.04") #####Now subset just the spp. scrs with the highest correlations spp_scrs_sub <- spp_scrs[-c(5, 11, 12), ] #Categorize species scores into different variable types for plotting variable_type <- as.factor(c(rep("substrate", 5), rep("algae", 8))) spp_scrs_sub <- cbind(variable_type, spp_scrs_sub) spp_scrs_substrate <- subset(spp_scrs_sub, spp_scrs_sub$variable_type == "substrate") spp_scrs_algae <- subset(spp_scrs_sub, spp_scrs_sub$variable_type == "algae") ## ----mean_bite_figure, fig.align = "center", fig.cap = "Figure 1: Bray curtis distance on all traits; stress = 0.04", out.width = "90%"---- fig_1 <- ggplot(NMDS) + geom_point(mapping = aes(x = NMDS1, y = NMDS2, shape = Site, colour = Species), size = 4) + scale_colour_manual(values = samp_pal) + geom_segment(aes(x = 0, y = 0, xend = S_MDS1, yend = S_MDS2), data = spp_scrs_substrate, arrow = arrow(length = unit(0.5, "cm")), color = "red") + geom_text(data = spp_scrs_substrate, aes(x = S_MDS1, y = S_MDS2, label = species), nudge_y = c(-0.025, -0.025, -0.05, 0.05, 0.05)) + geom_segment(aes(x = 0, y = 0, xend = S_MDS1, yend = S_MDS2), data = spp_scrs_algae, arrow = arrow(length = unit(0.5, "cm")), color = "black") + geom_text(data = spp_scrs_algae, aes(x = S_MDS1, y = S_MDS2, label = species), nudge_y = c(-0.025, 0.025, 0.025, 0.025, 0.025, -0.025, -0.05, -0.025)) + theme_classic(base_size = 15) fig_1 <- fig_1 + coord_fixed() fig_1 pdf("FIGURES/OUTPUT/Figure_1.pdf") fig_1 invisible(dev.off()) ## ----deliniate_sample_types---------------------------------------------- load("combo_pipeline.rdata") samples.out <- rownames(seqtab) subject <- sapply(strsplit(samples.out, "[[:digit:]]"), `[`, 1) # this splits the string at first instance of a digit # use the whole string for individuals # use the first two letters for genus # use the next three letters for species sample_name <- substr(samples.out, 1, 999) genus <- substr(samples.out, 1, 2) species <- substr(samples.out, 1, 5) ## ----define_variables---------------------------------------------------- #define a sample data frame samdf <- data.frame(SamName = sample_name, Gen = genus, Sp = species) rownames(samdf) <- samples.out kable(samdf[c(1, 13, 20, 30, 44), 1:3], row.names = FALSE) %>% kable_styling(bootstrap_options = "striped", full_width = FALSE, position = "float_right") %>% column_spec(1:3, width = "3.5cm") ## ----create_ps_object---------------------------------------------------- # this create the phyloseq object ps_slv <- phyloseq(otu_table(seqtab, taxa_are_rows = FALSE), sample_data(samdf), tax_table(tax_silva)) tax_table(ps_slv) <- cbind(tax_table(ps_slv), rownames(tax_table(ps_slv))) # adding unique ASV names taxa_names(ps_slv) <- paste0("ASV", seq(ntaxa(ps_slv))) tax_table(ps_slv) <- cbind(tax_table(ps_slv), rownames(tax_table(ps_slv))) ps_slv ## ----export_seq_tax_tables----------------------------------------------- colnames(tax_table(ps_slv)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "ASV_SEQ", "ASV_ID") write.table(tax_table(ps_slv), "TABLES/OUTPUT/PS/full_tax_table.txt", sep = "\t", quote = FALSE, col.names = NA) write.table(t(otu_table(ps_slv)), "TABLES/OUTPUT/PS/full_seq_table.txt", sep = "\t", quote = FALSE, col.names = NA) write.table(sample_data(ps_slv), "TABLES/OUTPUT/PS/full_sample_data.txt", sep = "\t", quote = FALSE, row.names = FALSE) ## ----select_samples------------------------------------------------------ ps_slv_base <- prune_samples(c("SpAur01", "SpAur02", "SpAur03", "SpAur04", "SpAur10", "SpAur11", "SpAur12", "SpAur13", "SpVir01", "SpVir02", "SpVir03", "SpVir04", "SpVir05", "SpVir06", "SpVir07", "SpVir08", "SpVir09", "SpVir10", "SpVir11", "AcCoe01", "AcCoe02", "AcCoe03", "AcCoe04", "AcCoe05", "AcCoe06", "AcCoe07", "AcCoe08", "AcTra01", "AcTra02", "AcTra03", "AcTra04", "AcTra05", "AcTra06", "AcTra07", "AcTra08", "AcTra09", "ScTae01", "ScTae02", "ScTae03", "ScTae04", "ScTae05", "ScTae06", "ScTae07", "ScTae08", "ScTae09", "SpAur05", "SpAur06", "SpAur07", "SpAur08", "SpAur09"), ps_slv) ps_slv_base ## ----remove_ASV_with_zeros_reads----------------------------------------- ps_slv_work <- prune_taxa(taxa_sums(ps_slv_base) > 0, ps_slv_base) ps_slv_work ## ----gen_stats_raw, eval = TRUE, include = TRUE-------------------------- write.table(tax_table(ps_slv_work), "TABLES/OUTPUT/PS/trim_tax_table.txt", sep = "\t", quote = FALSE, col.names = NA) write.table(t(otu_table(ps_slv_work)), "TABLES/OUTPUT/PS/trim_seq_table.txt", sep = "\t", quote = FALSE, col.names = NA) write.table(sample_data(ps_slv_work), "TABLES/OUTPUT/PS/trim_sample_data.txt", sep = "\t", quote = FALSE, row.names = FALSE) # general stats for the dataset. sample_sum_df_raw <- data.frame(sum = sample_sums(ps_slv_work)) total_reads_raw <- sum(otu_table(ps_slv_work)) smin_raw <- as.integer(min(sample_sums(ps_slv_work))) smean_raw <- as.integer(mean(sample_sums(ps_slv_work))) smax_raw <- as.integer(max(sample_sums(ps_slv_work))) ## ----remove_cyano-------------------------------------------------------- # generate a file with Chloroplast ASVs chloro_p_ps <- subset_taxa(ps_slv_work, Order == "Chloroplast") chloro_p_tab <- as(tax_table(chloro_p_ps), "matrix") chloro_p_tab <- chloro_p_tab[, 8] chloro_p_df <- as.factor(chloro_p_tab) goodTaxaCH <- setdiff(taxa_names(ps_slv_work), chloro_p_df) ps_slv_work_no_cyano <- prune_taxa(goodTaxaCH, ps_slv_work) ps_slv_work_no_cyano ###### Summarize data total_asv_chloro_p <- length(chloro_p_df) sample_sum_chloro_p_ps <- data.frame(sum = sample_sums(chloro_p_ps)) total_reads_chloro_p_ps <- sum(otu_table(chloro_p_ps)) smin_chloro_p_ps <- min(sample_sums(chloro_p_ps)) smean_chloro_p_ps <- mean(sample_sums(chloro_p_ps)) smax_chloro_p_ps <- max(sample_sums(chloro_p_ps)) write.table(tax_table(chloro_p_ps), "TABLES/OUTPUT/PS/chloroplast_tax_table.txt", sep = "\t", quote = FALSE, col.names = NA) write.table(t(otu_table(chloro_p_ps)), "TABLES/OUTPUT/PS/chloroplast_seq_table.txt", sep = "\t", quote = FALSE, col.names = NA) write.table(sample_data(chloro_p_ps), "TABLES/OUTPUT/PS/chloroplast_sample_data.txt", sep = "\t", quote = FALSE, row.names = FALSE) ## ----remove_specific_taxa------------------------------------------------ # generate a file with mitochondria ASVs MT1_ps <- subset_taxa(ps_slv_work_no_cyano, Family == "Mitochondria") MT1 <- as(tax_table(MT1_ps), "matrix") MT1 <- MT1[, 8] MT1df <- as.factor(MT1) goodTaxa <- setdiff(taxa_names(ps_slv_work_no_cyano), MT1df) ps_slv_work_filt <- prune_taxa(goodTaxa, ps_slv_work_no_cyano) ps_slv_work_filt ###### Summarize data total_asv_MT1 <- length(MT1df) # colnames(tax_table(MT1_ps)) sample_sum_MT1_ps <- data.frame(sum = sample_sums(MT1_ps)) total_reads_MT1_ps <- sum(otu_table(MT1_ps)) smin_MT1_ps <- min(sample_sums(MT1_ps)) smean_MT1_ps <- mean(sample_sums(MT1_ps)) smax_MT1_ps <- max(sample_sums(MT1_ps)) ## ----gen_stats, eval = TRUE, include = TRUE------------------------------ # general stats for the dataset. #colnames(tax_table(ps_slv_work_filt)) sample_sum_df <- data.frame(sum = sample_sums(ps_slv_work_filt)) total_reads <- sum(otu_table(ps_slv_work_filt)) smin <- as.integer(min(sample_sums(ps_slv_work_filt))) smean <- as.integer(mean(sample_sums(ps_slv_work_filt))) smax <- as.integer(max(sample_sums(ps_slv_work_filt))) ## ----merge--------------------------------------------------------------- mergedGP <- merge_samples(ps_slv_work_filt, "Sp") SD <- merge_samples(sample_data(ps_slv_work_filt), "Sp") mergedGP ## ----save_ps_objects----------------------------------------------------- save(ps_slv_work_filt, mergedGP, file = "PS_OBJECTS/ps.RData") saveRDS(ps_slv_work_filt, "PS_OBJECTS/ps_slv_work_filt.rds") saveRDS(mergedGP, "PS_OBJECTS/mergedGP.rds") ## ----sample_summary_table, warning = FALSE, fig.align = "center"--------- total_reads <- sample_sums(ps_slv_work_filt) total_reads <- as.data.frame(total_reads, make.names = TRUE) total_reads <- total_reads %>% rownames_to_column("host_ID") total_asvs <- estimate_richness(ps_slv_work_filt, measures = "Observed") total_asvs <- total_asvs %>% rownames_to_column("host_ID") sam_details <- sample_data(ps_slv) sam_details <- sam_details %>% mutate(genus = case_when( Gen == "Ac" ~ "Acanthurus", Gen == "Sc" ~ "Scarus", Gen == "Sp" ~ "Sparisoma")) sam_details <- sam_details %>% mutate(species = case_when( Sp == "AcCoe"~ "coeruleus", Sp == "AcTra"~ "tractus", Sp == "ScTae"~ "taeniopterus", Sp == "SpAur"~ "aurofrenatum", Sp == "SpVir"~ "viride")) #Sp == "SpChr"~ "chrysopterum", #Sp == "ScVet"~ "vetula" sam_details <- sam_details %>% mutate(common_name = case_when( Sp == "AcCoe" ~ "blue tang surgeonfish", Sp == "AcTra" ~ "fiveband surgeonfish", Sp == "ScTae" ~ "princess parrotfish", Sp == "SpAur" ~ "redband parrotfish", Sp == "SpVir" ~ "stoplight parrotfish")) #Sp == "SpChr" ~ "redtail parrotfish", #Sp == "ScVet" ~ "queen parrotfish")) sam_details <- sam_details %>% mutate(NCBI_txid = case_when( Sp == "AcCoe" ~ "157585", Sp == "AcTra" ~ "1316013", Sp == "ScTae" ~ "544418", Sp == "SpAur" ~ "59663", Sp == "SpVir" ~ "59666")) #Sp == "SpChr" ~ "51766", #Sp == "ScVet" ~ "84543")) sam_details <- sam_details[-c(2, 3)] colnames(sam_details) <- c("host_ID", "host_genus", "host_species", "full_name", "NCBI_txid") merge_tab <- merge(sam_details, total_reads, by = "host_ID") merge_tab2 <- merge(merge_tab, total_asvs, by = "host_ID") colnames(merge_tab2) <- c("host_ID", "host_genus", "host_species", "common_name", "NCBI_txid", "total_reads", "total_ASVs") # We also have a datatable containing metrics for each host. Lets bring this in # and merge with the summary table metrics <- read.table("TABLES/INPUT/host_metrics.txt", sep = "\t", header = TRUE) host_details <- merge(merge_tab2, metrics, by = "host_ID") colnames(host_details) <- c("host_ID", "host_genus", "host_species", "common_name", "NCBI_txid", "total_reads", "total_ASVs", "collection_date", "phase", "weight", "total_length", "foregut_length", "midgut_length", "hindgut_length", "total_gut_length") datatable(host_details, rownames = FALSE, width = "100%", colnames = c("host_ID", "host_genus", "host_species", "common_name", "NCBI_txid", "total_reads", "total_ASVs", "Collection_date", "Phase", "Weight (g)", "Total length (cm)", "Fore gut length (cm)", "Mid gut length (cm)", "Hind gut length (cm)", "Total gut length (cm)"), caption = htmltools::tags$caption(style = "caption-side: bottom; text-align: left;", "Table: ", htmltools::em("Sample summary.")), extensions = "Buttons", options = list(columnDefs = list(list(className = "dt-left", targets = 0)), dom = "Blfrtip", pageLength = 5, lengthMenu = c(5, 10, 25, 50), buttons = c("csv", "copy"), scrollX = TRUE, scrollCollapse = TRUE)) ## ----diversity_table, fig.align = "center"------------------------------- # generate the ASV table tax_asv <- table(tax_table(ps_slv_work_filt)[, "Class"], exclude = NULL, dnn = "Taxa") tax_asv <- as.data.frame(tax_asv, make.names = TRUE) #### change to Unclassified # Get levels and add "None" levels <- levels(tax_asv$Taxa) levels[length(levels) + 1] <- "Unclassified" # refactor Taxa to include "Unclassified" as a factor level # and replace NA with "Unclassified" tax_asv$Taxa <- factor(tax_asv$Taxa, levels = levels) tax_asv$Taxa[is.na(tax_asv$Taxa)] <- "Unclassified" # generate the reads table tax_reads <- factor(tax_table(ps_slv_work_filt)[, "Class"], exclude = NULL) tax_reads <- apply(otu_table(ps_slv_work_filt), MARGIN = 1, function(x) { tapply(x, INDEX = tax_reads, FUN = sum, na.rm = FALSE, simplify = TRUE)}) #RENAME NA --> Unclassified rownames(tax_reads)[72] <- "Unclassified" tax_reads <- as.data.frame(tax_reads, make.names = TRUE) tax_reads <- cbind(tax_reads, reads = rowSums(tax_reads)) #DELETE all but last column tax_reads <- tax_reads[51] tax_reads <- setDT(tax_reads, keep.rownames = TRUE)[] # merge the two tables and make everything look pretty # in an interactive table taxa_read_asv_tab <- merge(tax_reads, tax_asv, by.x = "rn", by.y = "Taxa") taxa_read_asv_tab <- mutate(taxa_read_asv_tab, prop_of_ASVs = Freq / sum(Freq), prop_of_reads = reads / sum(reads)) taxa_read_asv_tab <- taxa_read_asv_tab[c(1, 2, 5, 3, 4)] names(taxa_read_asv_tab) <- c("Class", "total_reads", "prop_of_reads", "total_ASVs", "prop_of_ASVs") taxa_read_asv_tab2 <- taxa_read_asv_tab taxa_read_asv_tab2$prop_of_reads <- round(taxa_read_asv_tab2$prop_of_reads, digits = 6) taxa_read_asv_tab2$prop_of_ASVs <- round(taxa_read_asv_tab2$prop_of_ASVs, digits = 6) ## ------------------------------------------------------------------------ #kills sci notation options(scipen = 999) write.table(taxa_read_asv_tab2, "TABLES/OUTPUT/SUPP/Table_S4.txt", sep = "\t", row.names = FALSE, quote = FALSE) datatable(taxa_read_asv_tab2, rownames = FALSE, width = "100%", colnames = c("Class", "total_reads", "prop_of_reads", "total_ASVs", "prop_of_ASVs"), caption = htmltools::tags$caption( style = "caption-side: bottom; text-align: left;", "Table: ", htmltools::em("Total reads & ASVs by Class")), extensions = "Buttons", options = list(columnDefs = list(list(className = "dt-left", targets = 0)), dom = "Blfrtip", pageLength = 5, lengthMenu = c(5, 10, 35, 70), buttons = c("csv", "copy"))) ## ----calc_rel_abund_and_merge-------------------------------------------- # calculate the averages and merge by species ps_slv_filt_AVG <- transform_sample_counts(ps_slv_work_filt, function(x) x / sum(x)) mergedGP_BAR <- merge_samples(ps_slv_filt_AVG, "Sp") SD_BAR <- merge_samples(sample_data(ps_slv_filt_AVG), "Sp") # merge taxa by rank. If you choose a different rank be sure to change # the rank throughout this code chunk mdata_phy <- tax_glom(mergedGP_BAR, taxrank = "Class", NArm = FALSE) mdata_phyrel <- transform_sample_counts(mdata_phy, function(x) x / sum(x)) meltd <- psmelt(mdata_phyrel) meltd$Class <- as.character(meltd$Class) # calculate the total relative abundance for all taxa means <- ddply(meltd, ~Class, function(x) c(mean = mean(x$Abundance))) means$mean <- round(means$mean, digits = 8) # this order in decending fashion taxa_means <- means[order(-means$mean), ] # ditch the sci notation taxa_means <- format(taxa_means, scientific = FALSE) #RENAME NA to UNCLASSIFIED taxa_means$Class <- gsub("NA", "Unclassified", taxa_means$Class) ## ----rel_abund_table, fig.align = 'center'------------------------------- datatable(taxa_means, rownames = FALSE, width = "65%", colnames = c("Class", "mean"), caption = htmltools::tags$caption(style = "caption-side: bottom; text-align: left;", "Table: ", htmltools::em("Class-level relative abundance.")), extensions = "Buttons", options = list(columnDefs = list(list(className = "dt-center", targets = "_all")), dom = "Blfrtip", pageLength = 10, lengthMenu = c(5, 10, 50, 70), buttons = c("csv", "copy"))) ## ----define_other-------------------------------------------------------- Other <- means[means$mean <= 0.02, ]$Class # or you can chose specifc taxa like this # Other_manual <- c("list", "taxa", "in", "this", "format") ## ----metld_bar----------------------------------------------------------- meltd[meltd$Class %in% Other, ]$Class <- "Other" samp_names <- aggregate(meltd$Abundance, by = list(meltd$Sample), FUN = sum)[, 1] .e <- environment() meltd[, "Class"] <- factor(meltd[, "Class"], sort(unique(meltd[, "Class"]))) meltd <- meltd[order(meltd[, "Class"]), ] # Here we order Classes by the Phylum they belong to. meltd$Class <- factor(meltd$Class, levels = c("Bacteroidia", "Clostridia", "Erysipelotrichia", "Fusobacteriia", "Alphaproteobacteria", "Deltaproteobacteria", "Gammaproteobacteria", "Planctomycetacia", "Oxyphotobacteria", "Other")) ## ----plot_bar_fig2A, fig.align = "center", fig.cap = "Figure 2A", fig.height = 3---- fig2A <- ggplot(meltd, aes_string(x = "Sample", y = "Abundance", fill = "Class"), environment = .e, ordered = TRUE, xlab = "x-axis label", ylab = "y-axis label") fig2A <- fig2A + geom_bar(stat = "identity", position = position_stack(reverse = TRUE), width = 0.95) + coord_flip() + theme(aspect.ratio = 1 / 2) fig2A <- fig2A + scale_fill_manual(values = friend_pal) fig2A <- fig2A + theme(axis.text.x = element_text(angle = 0, hjust = 0.45, vjust = 1)) fig2A <- fig2A + guides(fill = guide_legend(override.aes = list(colour = NULL), reverse = FALSE)) + theme(legend.key = element_rect(colour = "black")) fig2A <- fig2A + labs(x = "Host species", y = "Relative abundance (% total reads)", title = "Abundance of bacterial taxa across host species") fig2A <- fig2A + theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_rect(colour = "black", fill = NA, size = 1)) fig2A pdf("FIGURES/OUTPUT/Figure_2A.pdf") fig2A invisible(dev.off()) ## ----gen_summary_table, warning = FALSE---------------------------------- diversity <- estimate_richness(ps_slv_work_filt, measures = c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher")) diversity_calc <- diversity %>% rownames_to_column("host_ID") # round values diversity_calc[c(3, 5, 10)] <- round(diversity_calc[c(3, 5, 10)], 1) diversity_calc[c(4, 6, 7, 9)] <- round(diversity_calc[c(4, 6, 7, 9)], 2) diversity_calc[8] <- round(diversity_calc[8], 3) host_summary <- merge(host_details, diversity_calc) host_summary$Observed <- NULL host_summary <- host_summary[c(1, 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23)] write.table(host_summary, "TABLES/OUTPUT/SUPP/Table_S3.txt", sep = "\t", row.names = FALSE, quote = FALSE, col.names = c("Sample ID", "Host genus", "Host species", "Common name", "NCBI tAxID", "Collection date", "Life phase", "Weight (g)", "Total length (cm)", "Foregut length (cm)", "Midgut length (cm)", "Hindgut length (cm)", "Total gut length (cm)", "Total reads", "Total ASVs", "Chao1", "Chao1 (se)", "ACE", "ACE (se)", "Shannon", "Simpson", "InvSimpson", "Fisher")) datatable(host_summary, rownames = FALSE, width = "100%", colnames = c("Sample ID", "Host genus", "Host species", "Common name", "NCBI tAxID", "Collection date", "Life phase", "Weight (g)", "Total length (cm)", "Foregut length (cm)", "Midgut length (cm)", "Hindgut length (cm)", "Total gut length (cm)", "Total reads", "Total ASVs", "Chao1", "Chao1 (se)", "ACE", "ACE (se)", "Shannon", "Simpson", "InvSimpson", "Fisher"), caption = htmltools::tags$caption(style = "caption-side: bottom; text-align: left;", "Table: ", htmltools::em("Host-associated metadata & microbial diversity")), extensions = "Buttons", options = list(columnDefs = list(list(className = "dt-left", targets = 0)), dom = "Blfrtip", pageLength = 5, lengthMenu = c(5, 10, 50), buttons = c("csv", "copy"), scrollX = TRUE, scrollCollapse = TRUE)) ## ----alpha_div_test_norm, warning = FALSE-------------------------------- # Convert to ps object sample_div <- sample_data(diversity) # Create new ps object with diversity estimates added to sample_data ps_slv_work_filt_div <- merge_phyloseq(ps_slv_work_filt, sample_div) # Run Shapiro test shapiro_test_Shan <- shapiro.test(sample_data(ps_slv_work_filt_div)$Shannon) shapiro_test_invSimp <- shapiro.test(sample_data(ps_slv_work_filt_div)$InvSimpson) shapiro_test_Chao1 <- shapiro.test(sample_data(ps_slv_work_filt_div)$Chao1) shapiro_test_Observed <- shapiro.test(sample_data(ps_slv_work_filt_div)$Observed) ## ----shap_Shan, echo = FALSE--------------------------------------------- shapiro_test_Shan ## ----shap_invS, echo = FALSE--------------------------------------------- shapiro_test_invSimp ## ----shap_Choa1, echo = FALSE-------------------------------------------- shapiro_test_Chao1 ## ----shap_Observed, echo = FALSE----------------------------------------- shapiro_test_Observed ## ----normal-------------------------------------------------------------- sampledataDF <- data.frame(sample_data(ps_slv_work_filt_div)) aov.shannon <- aov(Shannon ~ Sp, data = sampledataDF) #Call for the summary of that ANOVA, which will include P-values summary(aov.shannon) ## ----tukey--------------------------------------------------------------- TukeyHSD(aov.shannon) ## ----krusk_invsimp------------------------------------------------------- #library(FSA) #dunnTest(InvSimpson ~ Sp, data = sampledataDF, method="bh") kruskal.test(InvSimpson ~ Sp, data = sampledataDF) ## ----krusk_chao---------------------------------------------------------- #dunnTest(Chao1 ~ Sp, data = sampledataDF, method="bh") kruskal.test(Chao1 ~ Sp, data = sampledataDF) ## ----krusk_observed------------------------------------------------------ #library(FSA) #dunnTest(Observed ~ Sp, data = sampledataDF, method="bh") kruskal.test(Observed ~ Sp, data = sampledataDF) ## ----wilcox_invsimp------------------------------------------------------ pairwise.wilcox.test(sampledataDF$InvSimpson, sampledataDF$Sp, p.adjust.method = "fdr") ## ----wilcox_chao--------------------------------------------------------- pairwise.wilcox.test(sampledataDF$Chao1, sampledataDF$Sp, p.adjust.method = "fdr") ## ----wilcox_observed, warning = FALSE------------------------------------ pairwise.wilcox.test(sampledataDF$Observed, sampledataDF$Sp, p.adjust.method = "fdr") ## ----alpha_div_fig_2B, fig.align = "center", fig.cap = "Figure 2B", warning = FALSE---- fig2B <- plot_richness(ps_slv_work_filt, x = "Sp", measures = c("Observed", "Shannon", "InvSimpson", "Chao1"), color = "Sp", nrow = 1) fig2B <- fig2B + geom_boxplot() + geom_jitter(width = 0.05) fig2B <- fig2B + scale_colour_manual(values = samp_pal) + labs(x = "Host species", y = "Diversity", title = "Alpha diversity of bacterial communities in herbivorous reef fish") #fig2B + geom_boxplot(aes(colour = black)) #fig2B <- fig2B + theme_bw() + geom_point(size = 2.5, aes(color = Sp)) + fig2B pdf("FIGURES/OUTPUT/Figure_2B.pdf") fig2B invisible(invisible(dev.off())) ## ----run_correlations, warning = FALSE, message = FALSE------------------ dt <- read.table("TABLES/OUTPUT/SUPP/Table_S3.txt", sep = "\t", header = TRUE) library(ggpubr) scarus <- host_summary[host_summary$host_genus %in% "Scarus", ] sparisoma <- host_summary[host_summary$host_genus %in% "Sparisoma", ] acanthurus <- host_summary[host_summary$host_genus %in% "Acanthurus", ] alphametric <- c("total_ASVs", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher") physical_char <- c("weight", "total_length", "foregut_length", "midgut_length", "hindgut_length", "total_gut_length") # Full set: not significant -> "weight", "total_length" #"foregut_length", R = 0.32 #"midgut_length", R = 0.50 #"hindgut_length", R = 0.17-0.34 #"total_gut_length" R = 0.50 #By genus and midgut_length : # scarus NS # sparisoma R = 0.8 # acanthurus NS # acanthurus not significant for any parameters # scarus not significant for any parameters # sparisoma significant for all parameters except hindgut_length was a bit weak # "weight", "total_length" "foregut_length" "midgut_length" # "hindgut_length" "total_gut_length" # To do all diversity metric change "y = " to y = alphametric and # ylab = alphametric par(mfrow = c(2, 3)) shan_by_length <- ggscatter(host_summary, x = "total_gut_length", y = "Shannon", add = "reg.line", conf.int = FALSE,cor.coef = TRUE, cor.method = "spearman", xlab = "total_length (cm)", ylab = "Shannon", color = "host_genus", palette = samp_pal, legend = "bottom") shan_by_weight <- ggscatter(host_summary, x = "weight", y = "Shannon", add = "reg.line", conf.int = FALSE, cor.coef = TRUE, cor.method = "spearman", xlab = "weight (g)", ylab = "Shannon", color = "host_genus", palette = samp_pal, legend = "top") grid.arrange(shan_by_length, shan_by_weight, ncol = 2) #ggscatter(host_summary, x = "total_gut_length", y = alphametric, # add = "reg.line", conf.int = FALSE, # cor.coef = TRUE, cor.method = "spearman", # xlab = "mid-gut length (cm)", ylab = alphametric, # color = "host_genus", palette = samp_pal) #, facet.by = "host_species") ###################################################### #shapiro.test(host_summary$Shannon) # => p = 0.1229 ## Shapiro-Wilk normality test for wt #shapiro.test(host_summary$midgut_length) # => p = 0.09 #ggqqplot(host_summary$Shannon) # => p = 0.1229 ## Shapiro-Wilk normality test for wt #ggqqplot(host_summary$midgut_length) # => p = 0.09 ###################################################### ## ----run_nmds, results = 'hide'------------------------------------------ set.seed(3131) ord.nmds.jsd_slv <- ordinate(ps_slv_work_filt, method = "NMDS", distance = "jsd") stressplot(ord.nmds.jsd_slv) ## ----ord_results, echo = FALSE------------------------------------------- ord.nmds.jsd_slv ## ----beta_div_fig_2C, fig.align = "center", fig.cap = "Figure 2C"-------- fig2C <- plot_ordination(ps_slv_work_filt, ord.nmds.jsd_slv, color = "Sp", label = "SamName", title = "Jensen-Shannon divergence") fig2C <- fig2C + geom_point(size = 4) + geom_point(shape = 1, size = 3.6, colour = "black", stroke = 0.75) # + xlim(-0.4, 0.4) + ylim(-0.4, 0.4) fig2C <- fig2C + scale_colour_manual(values = samp_pal) fig2C <- fig2C + theme(axis.line = element_line(colour = "black"), panel.background = element_blank(), panel.grid.major = element_line("grey"), panel.grid.minor = element_line("grey"), panel.border = element_rect(colour = "black", fill = NA, size = 1)) + theme(legend.key = element_rect(colour = "black")) fig2C <- fig2C + coord_fixed() fig2C <- fig2C + stat_ellipse(type = "t") + theme_bw() fig2C pdf("FIGURES/OUTPUT/Figure_2C.pdf") fig2C invisible(dev.off()) ## ----ordination_stats_adonis--------------------------------------------- set.seed(1911) fish.jsd <- phyloseq::distance(ps_slv_work_filt, method = "jsd") sampledf <- data.frame(sample_data(ps_slv_work_filt)) fish_adonis <- adonis(fish.jsd ~ Sp, data = sampledf, permutations = 1000) fish_adonis ## ----pairwise_adonis----------------------------------------------------- pairwise.adonis(fish.jsd, factors = sampledf$Sp, p.adjust.m = "bonferroni") ## ----betadisper---------------------------------------------------------- beta_adonis <- betadisper(fish.jsd, sampledf$Sp, bias.adjust = TRUE) beta_adonis ## ----permutest----------------------------------------------------------- permutest(beta_adonis, pairwise = TRUE, permutations = 1000) ## ----ordination_stats_anosim--------------------------------------------- spgroup <- get_variable(ps_slv_work_filt, "Sp") fish_anosim <- anosim(distance(ps_slv_work_filt, "jsd"), grouping = spgroup) summary(fish_anosim) ## ----simper, eval = FALSE, echo = FALSE, include = FALSE----------------- ## source("HELPER_SCRIPTS/simper_pretty.R") ## #Using the function ## otutab <- as.table(otu_table(ps_slv_work_filt)) ## stuff <- simper.pretty(otu_table(ps_slv_work_filt), ## sample_data(ps_slv_work_filt), "Sp", ## perc_cutoff = 0.5, low_cutoff = "y", ## low_val = 0.01, "name") ## ----gen_lefse_input_files, results='hide'------------------------------- ########## OTU table OTU1 <- as(otu_table(ps_slv_work_filt), "matrix") # transpose if necessary # Coerce to data.frame OTUdf <- as.data.frame(t(OTU1)) setDT(OTUdf, keep.rownames = TRUE)[] write.table(OTUdf, "TABLES/OUTPUT/OTHER/seq_tab_for_core.txt", sep = "\t", row.names = FALSE, quote = FALSE) colnames(OTUdf)[1] <- "#NAME" write.table(OTUdf, "TABLES/OUTPUT/LEfSe/LEfSe_INPUT_seq_tab.txt", sep = "\t", row.names = FALSE, quote = FALSE) ############ TAX Table # Remember in the `tax_table` we added the last columns as the actual sequence # of each ASV and the ASV_ID. We do not need those here. # So lets only keep the first 6 columns (the taxonomic lineage) TAX1 <- as(tax_table(ps_slv_work_filt), "matrix") TAXdf <- as.data.frame(TAX1) setDT(TAXdf, keep.rownames = TRUE)[] colnames(TAXdf)[1] <- "#TAXONOMY" TAXdf <- TAXdf[, 1:6] write.table(TAXdf, "TABLES/OUTPUT/LEfSe/LEfSe_INPUT_tax_tab.txt", sep = "\t", row.names = FALSE, quote = FALSE) ############ Metadata file meta_file <- data.frame(NAME = sample_name, SampleType = species, Gen = genus) rownames(meta_file) <- samples.out colnames(meta_file) <- c("#NAME", "Species", "Genus") # but we still have those three samples that need to be removed meta_file <- filter(meta_file, Species != "SpChr" & Species != "ScVet") write.table(meta_file, "TABLES/OUTPUT/LEfSe/LEfSe_INPUT_metadata.txt", sep = "\t", row.names = FALSE, quote = FALSE) ## ----lefse_table--------------------------------------------------------- lefse_tab <- read.table("TABLES/INPUT/lefse_results.txt", header = TRUE, sep = "\t", check.names = FALSE) write.table(lefse_tab, "TABLES/OUTPUT/SUPP/Table_S5.txt", quote = FALSE, sep = "\t", row.names = FALSE) datatable(lefse_tab, rownames = FALSE, width = "100%", caption = htmltools::tags$caption( style = "caption-side: bottom; text-align: left;", "Table 4: ", htmltools::em("Results of LEfSe analysis.")), extensions = "FixedColumns", "Buttons", options = list(columnDefs = list(list(className = "dt-center", targets = c(1, 2, 3, 4, 5, 6, 7, 8))), dom = "Blfrtip", pageLength = 5, lengthMenu = c(5, 10, 25, 60), buttons = c("csv", "copy"), scrollX = TRUE, scrollCollapse = TRUE)) ## ----get_core_shared_file------------------------------------------------ cm <- read.table("TABLES/OUTPUT/OTHER/seq_tab_for_core.txt", sep = "\t", header = TRUE, row.names = 1) cm_t <- t(cm) cm_df <- as.data.frame(cm_t) numcols <- ncol(cm_df) cm_df <- cm_df %>% tibble::rownames_to_column("Group") cm_df <- cm_df %>% mutate(label = 0.03, numOtus = numcols) %>% select(label, Group, numOtus, everything()) write.table(cm_df, "TABLES/OUTPUT/OTHER/ps_slv_work_filt.txt", quote = FALSE, sep = "\t", row.names = FALSE) ####COMBINE by fish species cm_Merge <- cm %>% tibble::rownames_to_column("ASV") cm_Merge <- cm_Merge %>% mutate(AcCoe = AcCoe01 + AcCoe02 + AcCoe03 + AcCoe04 + AcCoe05 + AcCoe06 + AcCoe07 + AcCoe08) cm_Merge <- cm_Merge %>% mutate(AcTra = AcTra01 + AcTra02 + AcTra03 + AcTra04 + AcTra05 + AcTra06 + AcTra07 + AcTra08 + AcTra09) cm_Merge <- cm_Merge %>% mutate(ScTra = ScTae01 + ScTae02 + ScTae03 + ScTae04 + ScTae05 + ScTae06 + ScTae07 + ScTae08 + ScTae09) cm_Merge <- cm_Merge %>% mutate(SpAur = SpAur01 + SpAur02 + SpAur03 + SpAur04 + SpAur05 + SpAur06 + SpAur07 + SpAur08 + SpAur09 + SpAur10 + SpAur11 + SpAur12 + SpAur13) cm_Merge <- cm_Merge %>% mutate(SpVir = SpVir01 + SpVir02 + SpVir03 + SpVir04 + SpVir05 + SpVir06 + SpVir07 + SpVir08 + SpVir09 + SpVir10 + SpVir11) cm_Merge <- cm_Merge %>% select(ASV, AcCoe, AcTra, ScTra, SpAur, SpVir) cm_Merge_2 <- cm_Merge[, -1] rownames(cm_Merge_2) <- cm_Merge[, 1] cm_Merge_2_t <- t(cm_Merge_2) cm_Merge_2_df <- as.data.frame(cm_Merge_2_t) cm_Merge_2_df <- cm_Merge_2_df %>% tibble::rownames_to_column("Group") cm_Merge_2_df <- cm_Merge_2_df %>% mutate(label = 0.03, numOtus = numcols) %>% select(label, Group, numOtus, everything()) write.table(cm_Merge_2_df, "TABLES/OUTPUT/OTHER/ps_slv_work_filt_combine.txt", quote = FALSE, sep = "\t", row.names = FALSE) ## ----files_for_public_search--------------------------------------------- # Object of DA ASVs lefse_asvs <- c("ASV21", "ASV25", "ASV35", "ASV44", "ASV159", "ASV17", "ASV174", "ASV22", "ASV234", "ASV60", "ASV114", "ASV268", "ASV14", "ASV226", "ASV23", "ASV29", "ASV30", "ASV48", "ASV90", "ASV98", "ASV18", "ASV41", "ASV7", "ASV43", "ASV5", "ASV54", "ASV8", "ASV9", "ASV250", "ASV12", "ASV32", "ASV34", "ASV39", "ASV224", "ASV398", "ASV127", "ASV128", "ASV151", "ASV323", "ASV359", "ASV374", "ASV91", "ASV56", "ASV6", "ASV165", "ASV284", "ASV395", "ASV450", "ASV15", "ASV2", "ASV20", "ASV298", "ASV57", "ASV69", "ASV75", "ASV82", "ASV1", "ASV70", "ASV49") # Create ps objects da_asvs <- prune_taxa(lefse_asvs, mergedGP) da_asvs_full <- prune_taxa(lefse_asvs, ps_slv_work_filt) # Create fasta file from tax_table table2format <- tax_table(da_asvs) #retain only the column with the sequences table2format_trim <- table2format[, 7] table2format_trim_df <- data.frame(row.names(table2format_trim), table2format_trim) colnames(table2format_trim_df) <- c("ASV_ID", "ASV_SEQ") #format fasta table2format_trim_df$ASV_ID <- sub("ASV", ">ASV", table2format_trim_df$ASV_ID) write.table(table2format_trim_df, "TABLES/OUTPUT/ASV_FOR_BLAST.fasta", sep = "\r", col.names = FALSE, row.names = FALSE, quote = FALSE, fileEncoding = "UTF-8") ## ----blast_table--------------------------------------------------------- blast_tab <- read.table("TABLES/INPUT/BLAST_results.txt", header = TRUE, sep = "\t", check.names = FALSE) write.table(blast_tab, "TABLES/OUTPUT/BLAST_RESULTS.txt", row.names = FALSE, sep = "\t", quote = FALSE) datatable(blast_tab, rownames = FALSE, caption = htmltools::tags$caption( style = "caption-side: bottom; text-align: left;", "Supplementary Table 6: ", htmltools::em("Results of BLASTn analysis.")), extensions = "FixedColumns", "Buttons", options = list(columnDefs = list(list(className = "dt-center", targets = c(1, 2, 3, 4, 5, 6, 7, 8))), dom = "Blfrtip", pageLength = 5, lengthMenu = c(5, 10, 25, 65), buttons = c("csv", "copy"), scrollX = TRUE, scrollCollapse = TRUE)) ## ----insert_itol, out.width = "100%", fig.cap = "Figure 3", eval = FALSE, echo = FALSE---- ## # This is to include a static image ## itol_tree <- knitr::include_graphics("FIGURES/INPUT/Figure_3A.svg") ## itol_tree ## ----zoom, echo = FALSE, message = FALSE, fig.align = "center", out.width = "100%"---- # Note: in order for this to work, especially if the SVG fie # has been manipulated in Inkscape, # it is wise to do the following: # 1. do whatever you need do, # 2. resize to drawing, # 3. Save a copy, # 4. save as Optimized SVG itol_tree <- svgPanZoom("FIGURES/INPUT/Figure_3_TreeA_2.svg", controlIconsEnabled = TRUE, viewBox = FALSE, width = "100%") itol_tree ## ----tree_legend, echo = FALSE, message = FALSE, fig.align = "center"---- itol_legend <- knitr::include_graphics("FIGURES/INPUT/Figure_3_Legend_A.svg") itol_legend ## ----habitat_table------------------------------------------------------- habi_tab <- read.table("TABLES/INPUT/habitat_specificity.txt", header = TRUE, sep = "\t", check.names = FALSE) # order by habitat and host enriched habi_tab2 <- habi_tab[order(habi_tab$habitat_code, habi_tab$Enriched), ] #habi_tab <- habi_tab[, -2] #delete code column da_asvs_counts <- as.data.frame(taxa_sums(da_asvs)) colnames(da_asvs_counts) <- c("total_reads") # make rownames a column da_asvs_counts <- cbind(ASV = rownames(da_asvs_counts), da_asvs_counts) temp_table <- merge(da_asvs_counts, blast_tab, by = "ASV", all = TRUE, sort = FALSE) summ_table <- merge(temp_table, habi_tab2, by = "ASV", all = TRUE, sort = FALSE, no.dups = TRUE) summ_table <- summ_table[-c(22, 26, 27, 28, 29, 31)] summ_table <- summ_table[c(1, 22, 23, 2, 24, 4, 3, 5, 6, 7, 8, 18, 19, 20, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 25)] datatable(summ_table, rownames = FALSE, colnames = c( "ASV", "Putative habitat", "Enriched", "Total reads", "Taxon", "Num perfect hits", "Top hit acc", "% identity", "Isolation source", "Nat host", "Common name", "Collection year", "Country", "PubMed ID", "Alignment length", "Mismatches", "Gap opens", "Q. start", "Q. end", "S. start", "S. end", "Evalue", "Bit score", "Num IMNGS hits", "Sullam lifestyle"), editable = TRUE, caption = htmltools::tags$caption( style = "caption-side: bottom; text-align: left;", "Supplementary Table 6: ", htmltools::em("Assessing habitat specificity.")), extensions = "Buttons", options = list(columnDefs = list(list(className = "dt-center", targets = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))), dom = "Blfrtip", pageLength = 5, lengthMenu = c(5, 10, 25, 60), buttons = c("csv", "copy"), scrollX = TRUE, scrollCollapse = TRUE)) write.table(summ_table, "TABLES/OUTPUT/SUPP/Table_S6.txt", sep = "\t", col.names = c( "ASV", "Putative habitat", "Enriched", "Total reads", "Taxon", "Num perfect hits", "Top hit acc", "% identity", "Isolation source", "Nat host", "Common name", "Collection year", "Country", "PubMed ID", "Alignment length", "Mismatches", "Gap opens", "Q. start", "Q. end", "S. start", "S. end", "Evalue", "Bit score", "Num IMNGS hits", "Sullam lifestyle"), row.names = FALSE, quote = FALSE, fileEncoding = "UTF-8") ## ----habitat_table_summary----------------------------------------------- habi_summary <- read.table("TABLES/INPUT/Table_1.txt", header = TRUE, sep = "\t", check.names = FALSE) datatable(habi_summary, rownames = FALSE, editable = TRUE, caption = htmltools::tags$caption( style = "caption-side: bottom; text-align: left;", "Table 1: ", htmltools::em("Summary of habitat specificity.")), extensions = "Buttons", options = list(columnDefs = list(list(className = "dt-center", targets = c(1, 2, 3, 4, 5))), dom = "Brti", buttons = c("csv", "copy"), scrollX = TRUE, scrollCollapse = TRUE)) ## ----proportion_of_taxa-------------------------------------------------- # Change this to select different taxa calc_tax_prop <- subset_taxa(mergedGP, Family == "Desulfovibrionaceae") calc_tax_prop sample_sums_by_taxa <- sample_sums(calc_tax_prop) total_taxa_reads <- sum(sample_sums_by_taxa) sample_sums_by_taxa <- as.data.frame(sample_sums_by_taxa) sample_sums_by_taxa$proportion <- (sample_sums_by_taxa$sample_sums_by_taxa / total_taxa_reads) * 100 colnames(sample_sums_by_taxa) <- c("total taxa reads", "Proportion") sample_sums_by_taxa$Proportion <- round(sample_sums_by_taxa$Proportion, digits = 2) total_taxa_reads_int <- as.integer(total_taxa_reads) sample_sums_by_taxa ## ----proportion_of_taxa_cyano-------------------------------------------- # Change this to select different taxa calc_tax_prop_Cyan <- subset_taxa(mergedGP, Phylum == "Cyanobacteria") calc_tax_prop_Cyan sample_sums_by_taxa_Cyan <- sample_sums(calc_tax_prop_Cyan) total_taxa_reads_Cyan <- sum(sample_sums_by_taxa_Cyan) sample_sums_by_taxa_Cyan <- as.data.frame(sample_sums_by_taxa_Cyan) sample_sums_by_taxa_Cyan$proportion <- (sample_sums_by_taxa_Cyan$sample_sums_by_taxa_Cyan / total_taxa_reads_Cyan) * 100 colnames(sample_sums_by_taxa_Cyan) <- c("total taxa reads", "Proportion") sample_sums_by_taxa_Cyan$Proportion <- round(sample_sums_by_taxa_Cyan$Proportion, digits = 2) total_taxa_reads_Cyan_int <- as.integer(total_taxa_reads_Cyan) sample_sums_by_taxa_Cyan ## ----da_asv_bar_setup, message = FALSE----------------------------------- # calculate the averages and merge by species # grab the da_asv ps object & merge by samples daASV_mergedGP_BAR <- merge_samples(da_asvs_full, "Sp") #daASV_SD_BAR <- merge_samples(sample_data(da_asvs_full), "Sp") # calculate percent proportion daASV_AVG <- apply(t(otu_table(daASV_mergedGP_BAR)), 1, function(x) x / sum(x)) # transpose daASV_t_AVG <- t(daASV_AVG) daASV_t_AVG_df <- as.data.frame(daASV_t_AVG) ###################### # choose columns of interest da_ASV_tax <- habi_tab[c("ASV", "Taxon", "Putative_habitat")] da_ASV_tax2 <- da_ASV_tax[, -1] rownames(da_ASV_tax2) <- da_ASV_tax[, 1] ###################### # combine based on ASV column daASV_work <- merge(daASV_t_AVG_df, da_ASV_tax2, by = 0, sort = FALSE) rownames(daASV_work) <- daASV_work[, 1] daASV_work[, 1] <- NULL #daASV_work # then make column row.names daASV_work2 <- cbind(ASV = rownames(daASV_work), daASV_work) # melt the df # wide to long format? daASV_work3 <- melt(daASV_work2, value.name = "ASV") colnames(daASV_work3) <- c("ASV", "Taxon", "Putative_habitat", "Sample", "Proportion") daASV_work3$Proportion <- round(daASV_work3$Proportion, digits = 4) datatable(daASV_work3, rownames = TRUE, editable = FALSE, caption = htmltools::tags$caption( style = "caption-side: bottom; text-align: left;", "Table 8: ", htmltools::em("DA ASV sample proportion.")), extensions = "Buttons", options = list(columnDefs = list(list(className = "dt-center", targets = c(1, 2, 3, 4, 5))), dom = "Blfrtip", pageLength = 5, lengthMenu = c(5, 10, 50, 100, 300), buttons = c("csv", "copy"), scrollX = TRUE, scrollCollapse = TRUE)) write.table(daASV_work3, "TABLES/OUTPUT/prop_ASV_reads_by_host.txt", sep = "\t", row.names = FALSE, quote = FALSE) ## ----set_lefse_order_asvs------------------------------------------------ asv_order <- c("ASV450", "ASV165", "ASV395", "ASV284", "ASV56", "ASV6", "ASV359", "ASV128", "ASV127", "ASV91", "ASV374", "ASV151", "ASV323", "ASV398", "ASV224", "ASV39", "ASV34", "ASV12", "ASV32", "ASV250", "ASV43", "ASV54", "ASV9", "ASV5", "ASV49", "ASV8", "ASV41", "ASV18", "ASV7", "ASV90", "ASV29", "ASV98", "ASV23", "ASV30", "ASV226", "ASV48", "ASV70", "ASV1", "ASV14", "ASV298", "ASV82", "ASV75", "ASV69", "ASV57", "ASV20", "ASV15", "ASV2", "ASV268", "ASV114", "ASV234", "ASV174", "ASV60", "ASV17", "ASV22", "ASV159", "ASV44", "ASV25", "ASV21", "ASV35") asv_order_rev <- rev(asv_order) ## ----order_asvs---------------------------------------------------------- daASV_work3$ASV <- as.character(daASV_work3$ASV) daASV_work3$ASV <- factor(daASV_work3$ASV, levels = unique(daASV_work3$ASV)) daASV_work3$ASV <- factor(daASV_work3$ASV, levels = asv_order) ## ----da_asv_bar_chart, message = FALSE----------------------------------- #Bar charts ASV_bar <- ggplot(daASV_work3, aes_string(x = "ASV", y = "Proportion", fill = "Sample"), environment = .e, ordered = TRUE, xlab = "x-axis label", ylab = "y-axis label") ASV_bar <- ASV_bar + geom_bar(stat = "identity", position = position_stack(reverse = TRUE), width = 0.95) + coord_flip() + theme(aspect.ratio = 2 / 1) ASV_bar <- ASV_bar + scale_fill_manual(values = samp_pal) ASV_bar <- ASV_bar + theme(axis.text.x = element_text(angle = 0, hjust = 0.95, vjust = 1)) ASV_bar <- ASV_bar + guides(fill = guide_legend(override.aes = list(colour = NULL), reverse = FALSE)) + theme(legend.key = element_rect(colour = "black")) ASV_bar <- ASV_bar + labs(x = "Host species", y = "Proportion (% total reads)", title = "ASV Proportion by host species") ASV_bar <- ASV_bar + theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_rect(colour = "black", fill = NA, size = 1)) ASV_bar ## ----message = FALSE, include = FALSE, warning = FALSE------------------- pdf("FIGURES/OUTPUT/Figure_4A.pdf") ASV_bar invisible(dev.off()) ## ----heatmap_alt_setup, warning=FALSE, message = FALSE------------------ # Heatmap library(ComplexHeatmap) library(circlize) library(heatmap3) library(gdata) fig4_heat <- as.data.frame(t(otu_table(da_asvs))) # Convert habi_table to df and store in new variable fig4_tax <- as.data.frame(habi_tab2) # eliminate 1st column so can combine based on row names fig4_tax_tab <- fig4_tax[, -1] # Make new row.names from original table rownames(fig4_tax_tab) <- fig4_tax[, 1] # Reorder fig4_tax_tab <- fig4_tax_tab[c(4, 2, 3, 1, 5, 6, 7, 8)] # Select columns fig4_tax_tab <- fig4_tax_tab[c(1:4)] # Combine the two df by rowname If the matching involved row names, # an extra character column called Row.names # is added at the left, and in all cases the result has ‘automatic’ row names. fig4_heatmap2 <- merge(fig4_tax_tab, fig4_heat, by = 0, all = TRUE) fig4_heatmap <- subset(fig4_heatmap2, select = -c(Row.names)) rownames(fig4_heatmap) <- fig4_heatmap2[, "Row.names"] # make rownames a column fig4_heatmap <- cbind(ASV = rownames(fig4_heatmap), fig4_heatmap) fig4_heatmap$ASV <- factor(fig4_heatmap$ASV, levels = rev(asv_order)) fig4_heatmap <- fig4_heatmap[order(fig4_heatmap$ASV), ] # combine the columns to make one name fig4_heatmap$ID <- paste(fig4_heatmap$ASV, fig4_heatmap$Taxon, fig4_heatmap$Putative_habitat, fig4_heatmap$Enriched, sep = "_") # delete the original columns fig4_heatmap2 <- fig4_heatmap[-c(1:5)] # reorder fig4_heatmap2 <- fig4_heatmap2[c(6, 1, 2, 3, 4, 5)] rownames(fig4_heatmap2) <- fig4_heatmap2[, 1] fig4_heatmap2 <- fig4_heatmap2[-1] ####Define Colors taxa_colors <- unlist(lapply(row.names(fig4_heatmap2), function(x) { if (grepl # generalists ("Alphaproteobacteria", x)) "#000000" else if (grepl("Pirellulaceae", x)) "#000000" else if (grepl("Rubritaleaceae", x)) "#000000" else if (grepl("Flavobacteriaceae", x)) "#000000" # fish/animal else if (grepl("Desulfovibrionaceae", x)) "#0072b2" else if (grepl("Lachnospiraceae", x)) "#f0e442" else if (grepl("Erysipelotrichaceae", x)) "#009e73" else if (grepl("Ruminococcaceae", x)) "#e69f00" else if (grepl("Bacteroidales", x)) "#d55e00" else if (grepl("Fusobacteriaceae", x)) "#56b4e9" else if (grepl("Vibrionaceae", x)) "#cc79a7" # other else if (grepl("Family_XIII", x)) "#808080" else if (grepl("Mollicutes", x)) "#808080" else if (grepl("Brevinemataceae", x)) "#808080" else if (grepl("Peptostreptococcaceae", x)) "#808080" })) habitat_colors <- unlist(lapply(row.names(fig4_heatmap2), function(x) { if (grepl ("fish", x)) "#808080" else if (grepl("animal", x)) "#000000" else if (grepl("generalist", x)) "#808080" #else if (grepl("undetermined", x)) "#000000" })) heatColors <- cbind(taxa_colors, habitat_colors) colnames(heatColors)[1] <- "Taxa" colnames(heatColors)[2] <- "Habitat" ### SAVE/display heatmap col <- colorRampPalette(bias = 1, c("#000033", "#66CCFF"))(16) pdf(file = "FIGURES/OUTPUT/Figure_4B.pdf") heatmap3(fig4_heatmap2, cexRow = 0.5, cexCol = 1, margins = c(3, 13), RowSideColors = heatColors, scale = "row", Colv = NA, Rowv = NA, revC = TRUE, balanceColor = FALSE, col = col) invisible(dev.off()) heatmap3(fig4_heatmap2, cexRow = 0.5, cexCol = 1, margins = c(3, 13), RowSideColors = heatColors, scale = "row", Colv = NA, Rowv = NA, revC = TRUE, balanceColor = FALSE, col = col) ## ----files_for_network_analysis, eval=FALSE, include = FALSE------------- ## ###Make a graph ## #make a df from otu_table ## da_asvs_df <- as.data.frame(t(otu_table(da_asvs))) ## #copy rownames as new column ## setDT(da_asvs_df, keep.rownames = TRUE) ## #change the name ## colnames(da_asvs_df) [1] <- "ASV" ## #melt from wide to long form ## da_asvs_df2 <- melt(da_asvs_df, value.name = "ASV") # wide to long format? ## #rename columns ## colnames(da_asvs_df2) <- c("Source", "Target", "total_reads") ## # remove rows with total_reads == 0 ## da_asvs_df2 <- da_asvs_df2[!(da_asvs_df2$total_reads == 0)] ## da_asvs_df2 <- da_asvs_df2[, c(1, 3, 2)] ## write.csv(da_asvs_df2, "NETWORKS/graph.csv", quote = FALSE, row.names = FALSE) ## ## ###Make attribute ## da_asvs_tax <- tax_table(da_asvs) ## #retain only the columns with taxa info ## da_asvs_tax <- da_asvs_tax[, 1:5] ## da_asvs_tax_df <- data.frame(row.names(da_asvs_tax), da_asvs_tax) ## colnames(da_asvs_tax_df) [1] <- "Id" ## da_asvs_tax_df$Label <- da_asvs_tax_df$Id ## da_asvs_tax_df$type <- "ASV" ## # Reorder ## da_asvs_tax_df <- da_asvs_tax_df[c(1, 7, 8, 2, 3, 4, 5, 6)] ## ## da_asvs_samp <- sample_data(da_asvs) ## da_asvs_samp_df <- data.frame(row.names(da_asvs_samp), da_asvs_samp) ## colnames(da_asvs_samp_df) [1] <- "Id" ## da_asvs_samp_df$Label <- da_asvs_samp_df$Id ## da_asvs_samp_df <- da_asvs_samp_df[-c(2:4)] ## da_asvs_samp_df$type <- "Host" ## empty_columns <- c("Kingdom", "Phylum", "Class", "Order", "Family") ## da_asvs_samp_df[empty_columns] <- NA ## # combine and save ## graph_attributes <- rbind(da_asvs_tax_df, da_asvs_samp_df) ## write.csv(graph_attributes, "NETWORKS/attributes.csv", quote = FALSE, ## row.names = FALSE) ## ----heatmap_alt_print, warning=FALSE, message = FALSE, echo = FALSE, eval = FALSE---- ## fig4_heatmap2_disp <- heatmap3(fig4_heatmap2, trace = "none", density = "none", ## cexRow = 0.5, cexCol = 1, margins = c(3, 15), ## RowSideColors = heatColors, scale = "row", ## Colv = NA, Rowv = NA, revC = TRUE, ## balanceColor = FALSE, col = col) ## ----mantel_test_a, warning=FALSE, message = FALSE, fig.align = "center"---- detach("package:phyloseq", unload=TRUE) library(ape) library(picante) library(ggtree) library(tidytree) library(treeio) # Get phylogenetic data ------------------ # Read newick tree ------------------ tree <- read.tree("TABLES/INPUT/MANTEL_TEST/item_orders.txt") #ggtree(tree) + geom_tiplab(color = "blue") host_tree <- knitr::include_graphics("FIGURES/INPUT/collapse_tree.svg", dpi = 300) host_tree ## ----fig.align = "center", warning=FALSE, message = FALSE---------------- d <- matrix(nrow = 1, ncol = 5) colnames(d) <- c("HM379826_Acanthurus_coeruleus", "LIDM544-07_Acanthurus_tractus", "MXIV480-10_Scarus_taeniopterus", "JQ841390_Sparisoma_aurofrenatum", "JQ839595_Sparisoma_viride") tree.p <- prune.sample(phylo = tree, samp = d) #plot(tree.p) # Change the names to species names tree.p$tip.label[1] <- "Sparisoma_aurofrenatum" tree.p$tip.label[2] <- "Sparisoma_viride" tree.p$tip.label[3] <- "Scarus_taeniopterus" tree.p$tip.label[4] <- "Acanthurus_tractus" tree.p$tip.label[5] <- "Acanthurus_coeruleus" plot(tree.p) ## ----mantel_test_b, fig.align = "center", warning=FALSE, message = FALSE---- #Transform tree into a distance matrix trx <- cophenetic(tree.p) #that works but I need these in alphabetical order by species T <- dist(cophenetic(tree.p)) ordering <- sort(attr(T, "Labels")) T.mat <- as.matrix(T)[ordering, ordering] T <- as.dist(T.mat) #plot cluster of distance matrix cluster_phylo <- hclust(T, method = "ward.D") plot(cluster_phylo, main = "Phylogenetic clustering", xlab = "Host species", ylab = "Distance", sub = "hellinger/bray-curtis/ward") ## ----mantel_test_c, warning=FALSE, message = FALSE----------------------- all_traits <- read.csv( "TABLES/INPUT/MANTEL_TEST/Mean_bite_characteristics.csv", header = TRUE ) ids <- all_traits[, 1:2] quant_traits_std <- decostand(all_traits[, 3:4], "range") / 10 Mean_prop_mark_on_substrate_std <- all_traits[, 5] / (10 / 2) prop_vertical_std <- all_traits[, 6] / (10 / 2) prop_concave_std <- all_traits[, 7] / (10 / 3) prop_convex_std <- all_traits[, 8] / (10 / 3) all_traits_std <- cbind(quant_traits_std, Mean_prop_mark_on_substrate_std, prop_vertical_std, prop_concave_std, prop_convex_std, all_traits[, 9:18] ) mean_traits <- aggregate(all_traits[, 3:18], by = list(all_traits$Species), mean) traits <- mean_traits[, 2:17] rownames(traits) <- as.vector(mean_traits[, 1]) Fish_species <- as.vector(mean_traits[, 1]) ## ----mantel_test_d, fig.align = "center", warning=FALSE, message = FALSE---- traits_trans <- decostand(traits, method = "hellinger") traits_dist <- vegdist(traits_trans, method = "bray") cluster_traits <- hclust(traits_dist, method = "ward.D") plot(cluster_traits, labels = Fish_species, main = "Ecological traits", xlab = "Host species", ylab = "Distance", sub = "hellinger/bray-curtis/ward") ## ----message = FALSE----------------------------------------------------- # Is ecological data correlated with phylogeny mantel(traits_dist, T, method = "pearson", permutations = 9999) ## ----mantel_test_e, warning=FALSE, message=FALSE------------------------- asv4 <- read.delim( "TABLES/INPUT/MANTEL_TEST/2_da_asv_merged_fish.txt", header = T ) asv5 <- read.delim( "TABLES/INPUT/MANTEL_TEST/2_da_asv_merged_animal.txt", header = T ) asv_host <- rbind(asv4, asv5) # Merge the fish and animal datasets together # Transpose dataframe asv_host_t <- data.frame(t(asv_host[-1])) colnames(asv_host_t) <- asv_host[, 1] library(vegan) # Create dendrogram based on similarity in asv Fish_species <- as.vector(colnames(asv_host[2:6])) Gut_contents <- sqrt(asv_host_t[]) #Try hellinger transformation Gut_contents <- decostand(asv_host_t, method = "hellinger") Gut_dist_host <- vegdist(Gut_contents, method = "bray") cluster_gut_host <- hclust(Gut_dist_host, method = "ward.D") plot(cluster_gut_host, labels = Fish_species, main = "Host-associated ASVs", xlab = "Host species", ylab = "Distance", sub = "hellinger/bray-curtis/ward") ## ----mantel_test_f, warning=FALSE, message=FALSE------------------------- mantel(Gut_dist_host, T, method = "pearson", permutations = 9999) ## ----mantel_test_g, warning=FALSE, message=FALSE------------------------- mantel(Gut_dist_host, traits_dist, method = "pearson", permutations = 9999) # Not associated with ecological traits ## ----mantel_test_h, warning=FALSE, message=FALSE, fig.align = "center"---- asv6 <- read.delim( "TABLES/INPUT/MANTEL_TEST/2_da_asv_merged_environmental.txt", header = T) # Transpose dataframe asv_env_t <- data.frame(t(asv6[-1])) colnames(asv_env_t) <- asv6[, 1] # Create dendrogram based on similarity in asv Fish_species <- as.vector(colnames(asv6[2:6])) Gut_contents <- sqrt(asv_env_t[]) #Try hellinger transformation Gut_contents <- decostand(asv_env_t, method = "hellinger") Gut_dist_env <- vegdist(Gut_contents, method = "bray") cluster_gut_env <- hclust(Gut_dist_env, method = "ward.D") plot(cluster_gut_env, labels = Fish_species, main = "Environment-associated ASVs", xlab = "Host species", ylab = "Distance", sub = "hellinger/bray-curtis/ward") ## ----mantel_test_i, warning=FALSE, message=FALSE------------------------- mantel(Gut_dist_env, T, method = "pearson", permutations = 9999 ) # Marginally significant correlation ## ----mantel_test_j, warning=FALSE, message=FALSE------------------------- mantel(Gut_dist_env, traits_dist, method = "pearson", permutations = 9999 ) ## ----mantel_test_k, warning=FALSE, message=FALSE------------------------- mantel.partial(Gut_dist_env, T, traits_dist, method = "pearson", permutations = 9999 ) mantel.partial(Gut_dist_env, traits_dist, T, method = "pearson", permutations = 9999 ) ## ----supp_fig1_calc, results = "hide", message=FALSE--------------------- library(phyloseq) mdata_phy_all <- tax_glom(ps_slv_filt_AVG, taxrank = "Class", NArm = FALSE) # You can choose any taxonomic level here mdata_phyrel_all <- transform_sample_counts( mdata_phy_all, function(x) x / sum(x) ) meltd_all <- psmelt(mdata_phyrel_all) meltd_all$Class <- as.character(meltd_all$Class) means <- ddply(meltd_all, ~Class, function(x) c(mean = mean(x$Abundance))) # decending order taxa_means <- means[order(-means$mean), ] # ditch the sci notation taxa_means <- format(taxa_means, scientific = FALSE) # Here we conglomerate at 2%. Other <- means[means$mean <= 0.026, ]$Class meltd_all[meltd_all$Class %in% Other, ]$Class <- "Other" samp_names <- aggregate(meltd_all$Abundance, by = list(meltd_all$Sample), FUN = sum)[, 1] .e <- environment() meltd_all[, "Class"] <- factor(meltd_all[, "Class"], sort(unique(meltd_all[, "Class"]))) meltd_all <- meltd_all[order(meltd_all[, "Class"]), ] levels(meltd_all$Class) # Here we order Classes by the Phylum they belong to. meltd_all$Class <- factor(meltd_all$Class, levels = c( "Bacteroidia", "Clostridia", "Erysipelotrichia", "Fusobacteriia", "Alphaproteobacteria", "Deltaproteobacteria", "Gammaproteobacteria", "Planctomycetacia", "Oxyphotobacteria", "Other")) sup_fig1 <- qplot(data = meltd_all, x = Sp, y = Abundance, fill = Class, geom = "boxplot", ylab = "Relative Abundance") + theme(legend.position = "bottom") + facet_grid(Class ~ ., scales = "free_y", space = "free_y") + geom_jitter(width = 0.05) + geom_point(colour = "black", fill = "white") #+ guides(guide_legend(reverse = FALSE) ) sup_fig1 <- sup_fig1 + scale_fill_manual(values = friend_pal) + labs(x = "Host species", y = "Relative abundance (% total reads)") pdf("FIGURES/OUTPUT/box_and_whisker.pdf") sup_fig1 invisible(dev.off()) ## ----separate_bars_stacked, fig.align = "center", fig.cap = "Supplementary Figure 1. Relative abundance of major Classes by sample", out.width = "90%"---- sup_fig2 <- ggplot(meltd_all, aes_string(x = "Sample", y = "Abundance", fill = "Class"), environment = .e, Ordered = TRUE) sup_fig2 <- sup_fig2 + geom_bar(stat = "identity", position = "stack") + facet_grid(Class ~ Sp, scales = "free", space = "free") sup_fig2 <- sup_fig2 + scale_fill_manual(values = friend_pal) # sup_fig2 <- sup_fig2 + theme(axis.text.x = element_text(angle = -90, # hjust = 0)) sup_fig2 <- sup_fig2 + theme(axis.text.x = element_blank()) sup_fig2 <- sup_fig2 + guides(fill = guide_legend(override.aes = list(colour = NULL), reverse = FALSE)) + theme(legend.key = element_rect(colour = "black"), legend.position = "bottom") + labs(x = "Individual samples", y = "Relative abundance (% total reads)") pdf("FIGURES/OUTPUT/Figure_S1.pdf") sup_fig2 invisible(dev.off()) ## ----grid.arrange_plots, fig.align = "center", fig.cap = "Figure S1. Relative abundance of major Classes by sample", out.width = "90%"---- grid.arrange(sup_fig1, sup_fig2, ncol = 2) ## ----plot_da_asv_heatmap_full, warning=FALSE, fig.cap = "Figure 4", message = FALSE, eval = FALSE, echo = FALSE---- ## #Old heat map using phyloseq. ## fig4_heat <- plot_heatmap(da_asvs, "PCoA", "jsd", "Sp", ## low = "#000033", high = "#66CCFF", ## trans = log_trans(10), sample.order = "Sp", ## max.label = 250, taxa.order = asv_order) ## fig4_heat <- fig4_heat + coord_fixed(ratio = 1 / 12) + ## scale_x_discrete(labels = c("AcCoe", "AcTra", "ScTae", "SpAur", "SpVir")) ## ## fig4_heat ## pdf("FIGURES/OUTPUT/fig4_da_asvs_heat.pdf") ## fig4_heat ## invisible(dev.off()) ## ----picrust, eval = FALSE, echo = FALSE, include = FALSE---------------- ## #make shared fike ## tabble <- as(otu_table(ps_slv_work_filt), "matrix") ## tabbledf <- as.data.frame(tabble) ## numOtus <- "10565" ## df1 <- cbind(numOtus, tabbledf) ## df1 <- df1 %>% rownames_to_column("Group") ## label <- 0.03 ## df2 <- cbind(label, df1) ## write.table(df2, "TABLES/OUTPUT/OTHER/ps_slv_work_filt.shared", ## sep = "\t", quote = FALSE, row.names = FALSE) ## fasta <- tax_table(ps_slv_work_filt) ## write.table(fasta2format_trim, ## "TABLES/OUTPUT/OTHER/ps_slv_work_filt.fasta", ## sep = "\t", quote = FALSE, col.names = NA) ## # fix the file ## # mothur ## classify.seqs(fasta=ps_slv_work_filt.fasta, template=gg_13_5_99.fasta, ## taxonomy=gg_13_5_99.gg.tax, processors=8) ## make.biom(shared=ps_slv_work_filt.shared, label=0.03, ## constaxonomy=ps_slv_work_filt.gg.wang.cons.taxonomy, ## picrust=97_otu_map.txt, reftaxonomy=gg_13_5_99.gg.tax) ## ## taxsums <- as.data.frame(taxa_sums(ps_slv_work_filt)) ## write.table(taxsums, "TABLES/OUTPUT/OTHER/taxsums.txt", ## sep="\t", quote = FALSE) ## ----sessionInfo, include=TRUE------------------------------------------- proc.time() - ptm sessionInfo() devtools::session_info() end_time <- Sys.time() end_time - start_time ############################# # END PHYLOSEQ WORKFLOW # #############################