├── .gitignore
├── Alphadiversity.Rmd
├── Alphadiversity.html
├── Atlas.Rmd
├── Atlas.html
├── Betadiversity.Rmd
├── Betadiversity.html
├── Bimodality.Rmd
├── Bimodality.html
├── Clustering.Rmd
├── Clustering.html
├── CoAbundantTaxa.Rmd
├── CoAbundantTaxa.html
├── Comparisons.Rmd
├── Comparisons.html
├── Composition.Rmd
├── Composition.html
├── CompositionAmplicondata.html
├── CompositionAmplicondata2.Rmd
├── CompositionAmplicondata2.html
├── Contributing.Rmd
├── Contributing.html
├── Core.Rmd
├── Core.html
├── CoremicrobiotaAmplicon.Rmd
├── CoremicrobiotaAmplicon.html
├── DMM.Rmd
├── DMM.html
├── DOC.Rmd
├── DOC.html
├── Data.R
├── Data.Rmd
├── Data.html
├── Diversity.html
├── Edu.html
├── Edu
    ├── .gitignore
    ├── all.Rmd
    ├── all.html
    ├── bibliography.bib
    ├── extra.Rmd
    ├── extra.html
    ├── lm.R
    ├── lm.Rmd
    ├── lm.html
    ├── main.R
    ├── misc.R
    ├── pooled.R
    ├── pooled.Rmd
    ├── pooled.html
    ├── univariate.Rmd
    └── univariate.html
├── Experimental.html
├── HITChip.html
├── Heatmap.Rmd
├── Heatmap.html
├── Installation.Rmd
├── Installation.html
├── Interactive.Rmd
├── Interactive.html
├── LICENSE
├── Landscaping.Rmd
├── Landscaping.html
├── Misc
    ├── CompositionAmplicondata.Rmd
    ├── CompositionAmplicondata.html
    ├── CompositionAmplicondata2.Rmd
    ├── CompositionAmplicondata2.html
    ├── Experimental.Rmd
    ├── HITChip.Rmd
    ├── Probelevel.Rmd
    ├── Visualization.Rmd
    └── info.Rmd
├── Mixedmodels.Rmd
├── Mixedmodels.html
├── MovingPictures_MicrobiomeExperiment.rds
├── Negativebinomial.Rmd
├── Negativebinomial.html
├── Networks.Rmd
├── Networks.html
├── Ordination.Rmd
├── Ordination.html
├── Output.Rmd
├── Output.html
├── PERMANOVA.Rmd
├── PERMANOVA.html
├── PlotDiversity.Rmd
├── PlotDiversity.html
├── Preprocessing.Rmd
├── Preprocessing.html
├── Probelevel.html
├── RDA.Rmd
├── RDA.html
├── README.Rmd
├── README.html
├── README.md
├── Regression.Rmd
├── Regression.html
├── Rplotvio1.jpeg
├── Rplotvio2.jpeg
├── Stability.Rmd
├── Stability.html
├── TODO.Rmd
├── TODO.html
├── Template.html
├── TemporalMicrobiotaTrajectory.Rmd
├── TemporalMicrobiotaTrajectory.html
├── Themes.Rmd
├── Themes.html
├── Tutorial.Rmd
├── Tutorial.html
├── Visualization.Rmd
├── Visualization.html
├── _config.yml
├── all.Rmd
├── all.html
├── ancom
    ├── .gitignore
    ├── README.md
    ├── ancom.R
    ├── ancom.Rmd
    ├── ancom.md
    ├── figure
    │   ├── tops-1.png
    │   ├── tops-2.png
    │   └── tops-3.png
    ├── functions.R
    └── main.R
├── archive
    ├── Baxter_FITs_Microbiome_2016_fit.final.tx.1.cons.taxonomy
    ├── Baxter_FITs_Microbiome_2016_fit.final.tx.1.subsample.shared
    ├── Baxter_FITs_Microbiome_2016_mapping.csv
    ├── Crosshyb.Rmd
    ├── DiversityTable.tab
    ├── DynamicsIBD.rda
    ├── F1000
    │   └── MyArticle
    │   │   ├── F1000header.png
    │   │   ├── MyArticle.Rmd
    │   │   ├── f1000_styles.sty
    │   │   ├── frog.jpg
    │   │   └── sample.bib
    ├── HITChipAtlas.Rmd
    ├── Maturity.Rmd
    ├── Motionchart.Rmd
    ├── PhylotypeRatios.R
    ├── ProvasIdata.R
    ├── RDA
    │   ├── firstlib2.R
    │   ├── plot_rda_bagged.R
    │   └── rda_bagged.R
    ├── ROC.Rmd
    ├── affybatch.R
    ├── anticorrelation.old.R
    ├── betadiversity.R
    ├── bicor.R
    ├── bimodality_sarle.R
    ├── core_abundance.R
    ├── core_bootstrap.R
    ├── core_bootstrap2.R
    ├── core_members.R
    ├── core_richness.R
    ├── crash-handler.conf
    ├── crosshyb.tab
    ├── data.R
    ├── data_old.R
    ├── debug.R
    ├── densityplot.R
    ├── deprecated.R
    ├── devtools.R
    ├── get_ordination.R
    ├── gini.R
    ├── gini_index.R
    ├── index.Rmd
    ├── index.html
    ├── internal.R
    ├── legacy.R
    ├── limma.Rmd
    ├── linearmodel.R
    ├── lm_phyloseq.R
    ├── low_abundance.R
    ├── lower.triangle.R
    ├── make.abundancy.table.R
    ├── maturity.R
    ├── metadata.xls
    ├── misc.R
    ├── misc.Rmd
    ├── multimodality_phyloseq.R
    ├── myfile.tab
    ├── neatsort.R
    ├── nets.R
    ├── nets.Rmd
    ├── old.R
    ├── peerj32_lipids.csv
    ├── peerj32_meta.csv
    ├── peerj32_microbes.csv
    ├── pet.R
    ├── pet2.R
    ├── plot-methods.R
    ├── plot_diversity.R
    ├── plot_matrix.R
    ├── plot_potential.R
    ├── plot_taxa_prevalence.Rmd
    ├── potential_analysis.R
    ├── project.data.R
    ├── qiita1629.biom
    ├── qiita1629_mapping.csv
    ├── qiita1629_mapping_subset.csv
    ├── qiita1629_otu_table.csv
    ├── qiita1629_taxonomy_table.csv
    ├── rare_abundance.R
    ├── rare_members.R
    ├── rare_richness.R
    ├── rda.R
    ├── rda_bagged.R
    ├── read.profiling.R
    ├── read_profiling.R
    ├── reading.R
    ├── save.phylogeny.R
    ├── subject_tables.R
    ├── summarize.rpa.R
    ├── summarize.sum.R
    ├── test.R
    ├── top_abundance.R
    ├── validate.R
    └── vm.R
├── bibliography.bib
├── bioc.css
├── cleaning_taxonomy_table.Rmd
├── cleaning_taxonomy_table.html
├── core_venn.html
├── core_venn.rmd
├── crash-handler-permission
├── crash-handler.conf
├── debug.log
├── deseq2.Rmd
├── deseq2.html
├── figure
    ├── pooled3-1.png
    ├── pooled_overdispersion-1.png
    ├── pooled_pcomp-1.png
    ├── tail-1.png
    ├── univariate5-1.png
    ├── univariate6-1.png
    ├── univariate_boxplot-1.png
    └── univariate_densityplot-1.png
├── ieee.csl
├── index.html
├── index.rmd
├── info.html
├── init.R
├── limma.Rmd
├── limma.html
├── main.R
├── misc.Rmd
├── plot_sample_trajectory.R
├── plot_time_trajectory.R
├── post_hoc.Rmd
├── post_hoc.html
├── rstanarm.Rmd
└── rstanarm.html


/.gitignore:
--------------------------------------------------------------------------------
 1 | .RData
 2 | Preprocessing.md
 3 | R
 4 | vignettes
 5 | inst
 6 | vignettes
 7 | R
 8 | inst
 9 | OAtoken
10 | *_cache
11 | *_files
12 | cache/
13 | figure/
14 | *~
15 | *.md
16 | *.Rproj
17 | docs/*
18 | Temp/*
19 | .Rproj.user
20 | me
21 | wis
22 | .Rhistory


--------------------------------------------------------------------------------
/Atlas.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "HITChip Atlas"
 3 | author: "Leo Lahti, Sudarshan Shetty et al."
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 | ---
19 | <!--
20 |   %\VignetteEngine{knitr::rmarkdown}
21 |   %\VignetteIndexEntry{microbiome tutorial - atlas}
22 |   %\usepackage[utf8]{inputenc}
23 | -->
24 | 
25 | 
26 | ## Intestinal microbiota diversity in 1006 western adults
27 | 
28 | The data set from [Lahti et al. Nat. Comm. 5:4344, 2014](http://www.nature.com/ncomms/2014/140708/ncomms5344/full/ncomms5344.html) has microbiota profiling of 130 genus-like taxa across 1006 normal western adults from [Data Dryad](http://doi.org/10.5061/dryad.pk75d). Load the data in R:
29 | 
30 | ```{r data2, warning=FALSE, message=FALSE}
31 | # Download the required R packages and then the HITChip Atlas data set
32 | library(microbiome)
33 | data(atlas1006)
34 | ```
35 | 
36 | Estimate ecosystem alpha diversity and related indicators for this data set: 
37 | 
38 | ```{r div-example, warning=FALSE, message=FALSE}
39 | tab <- microbiome::alpha(atlas1006, index = c("shannon", "invsimpson"))
40 | library(knitr)
41 | kable(head(tab))
42 | ```
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/Bimodality.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Bimodality analysis"
  3 | author: "Leo Lahti, Sudarshan Shetty et al."
  4 | bibliography: 
  5 | - bibliography.bib
  6 | output:
  7 |   BiocStyle::html_document:
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 4
 11 |     toc_float: true
 12 |     self_contained: true
 13 |     thumbnails: true
 14 |     lightbox: true
 15 |     gallery: true
 16 |     use_bookdown: false
 17 |     highlight: haddock
 18 | ---
 19 | <!--
 20 |   %\VignetteEngine{knitr::rmarkdown}
 21 |   %\VignetteIndexEntry{microbiome tutorial - stability}
 22 |   %\usepackage[utf8]{inputenc}
 23 |   %\VignetteEncoding{UTF-8}  
 24 | -->
 25 | 
 26 | 
 27 | Get example data - [HITChip Atlas of 130 genus-like taxa across 1006 healthy western adults](http://www.nature.com/ncomms/2014/140708/ncomms5344/full/ncomms5344.html). 
 28 | 
 29 | ```{r bistability, message=FALSE}
 30 | # Load the example data
 31 | library(microbiome)
 32 | library(dplyr)
 33 | data(atlas1006)
 34 | 
 35 | # Rename the example data
 36 | pseq <- atlas1006
 37 | 
 38 | # Focus on specific DNA extraction method
 39 | pseq <- pseq %>% subset_samples(DNA_extraction_method == "r")
 40 | 
 41 | # Use relative abundances
 42 | pseq <- microbiome::transform(pseq, "compositional")
 43 | 
 44 | # Merge rare taxa to speed up examples
 45 | pseq <- aggregate_rare(pseq, level = "Genus", detection = .1/100, prevalence = 10/100)
 46 | 
 47 | # For cross-sectional analysis, include
 48 | # only the zero time point:
 49 | pseq0 <- subset_samples(pseq, time == 0)
 50 | ```
 51 | 
 52 | 
 53 | # Bimodality indicators
 54 | 
 55 | Bimodality of the abundance distribution provides an indirect
 56 | indicator of bistability, although other explanations such as sampling
 57 | biases etc. should be controlled. Multiple bimodality scores are
 58 | available.
 59 | 
 60 | 
 61 | Multimodality score using [potential analysis with
 62 | bootstrap](http://www.nature.com/ncomms/2014/140708/ncomms5344/full/ncomms5344.html). Sarle's bimodality coefficient is available as well; and for classical test of unimodality, see the DIP test.
 63 | 
 64 | ```{r bimodality2, message=FALSE, warning=FALSE}
 65 | # Bimodality is better estimated from log10 abundances
 66 | pseq0.clr <- microbiome::transform(pseq0, "clr")
 67 | bimodality <- bimodality(pseq0.clr, method = "potential_analysis", bs.iter = 20)
 68 | ```
 69 | 
 70 | # Visualization   
 71 | 
 72 | **Visualize population densities for unimodal and bimodal groups**
 73 | 
 74 | ```{r stability2, message=FALSE, warning=FALSE, fig.width=12, fig.height=5, out.width="500px"}
 75 | # Pick the most and least bimodal taxa as examples
 76 | unimodal  <- names(sort(bimodality))[[1]]
 77 | bimodal  <- rev(names(sort(bimodality)))[[1]]
 78 | 
 79 | # Visualize population frequencies at the baseline time point
 80 | library(ggplot2)
 81 | theme_set(theme_bw(20))
 82 | p1 <- plot_density(pseq0.clr, variable = unimodal) 
 83 | p2 <- plot_density(pseq0.clr, variable = bimodal) 
 84 | library(gridExtra)
 85 | library(ggplot2)
 86 | grid.arrange(p1, p2, nrow = 1)
 87 | ```
 88 | 
 89 | 
 90 | ## Variation lineplot and bimodality hotplot
 91 | 
 92 | Pick subset of the [HITChip Atlas data set](http://doi.org/10.5061/dryad.pk75d) and plot the subject abundance variation lineplot (**Variation tip plot**) and **Bimodality hotplot** for a given taxon as in [Lahti et al. 2014](http://www.nature.com/ncomms/2014/140708/ncomms5344/full/ncomms5344.html). The Dialister has bimodal population distribution and reduced temporal stability within subjects at intermediate abundances.
 93 | 
 94 | For examples on tipping point detection, see
 95 | [Stability](Stability). We set the tipping point manually in the
 96 | following example.
 97 | 
 98 | ```{r stability-variationplot, message=FALSE, warning=FALSE, fig.show='hold', out.width="430px", eval=FALSE}
 99 | # Bimodality hotplot:
100 | # Consider a unique sample from each subject: the baseline time point 
101 | p <- hotplot(pseq0, tax, tipping.point = 0.005)
102 | print(p)
103 | 
104 | # Visualize bimodality
105 | pv <- plot_tipping(pseq, tax, tipping.point = 0.005)
106 | print(pv)
107 | ```
108 | 
109 | 


--------------------------------------------------------------------------------
/Clustering.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Cluster analysis"
 3 | author: "Leo Lahti, Sudarshan Shetty et al."
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 | ---
19 | <!--
20 |   %\VignetteEngine{knitr::rmarkdown}
21 |   %\VignetteIndexEntry{microbiome tutorial - clustering}
22 |   %\usepackage[utf8]{inputenc}
23 |   %\VignetteEncoding{UTF-8}  
24 | -->
25 | 
26 | 
27 | ## Multivariate (infinite) Gaussian mixture model
28 | 
29 | Fit and visualize Variational Dirichlet process multivariate infinite Gaussian mixture. This variational version has been partially written in C and it is relatively fast. Kindly cite [this article](http://bioinformatics.oxfordjournals.org/content/26/21/2713.short). Note that the implementation uses diagonal covariances on the Gaussian modes. The C code was partially derived from [Honkela et al. 2008](http://www.sciencedirect.com/science/article/pii/S0925231208000659).
30 | 
31 | ```{r LCA2, fig.width=6, fig.height=5, warning=FALSE, message=FALSE, eval=FALSE}
32 | library(netresponse)
33 | 
34 | # Generate simulated data
35 | res <- generate.toydata(Dim = 2)
36 | D <- res$data
37 | component.means <- res$means
38 | component.sds   <- res$sds
39 | 
40 | # Fit the mixture
41 | m <- mixture.model(D, mixture.method = "vdp", pca.basis = FALSE)
42 | 
43 | # Plot the data, and indicate estimated modes with colors. 
44 | # If data dimensionality exceeds 2, 
45 | # the results are visualized on PCA projection
46 | # (with pca.basis = TRUE the data is projected on PCA coordinates;
47 | # without loss of information. This trick can help to avoid overlearning 
48 | # as the variational mixture relies
49 | # on diagonal covariance matrices, so the ellipsoidal axes of the 
50 | # Gaussian modes are parallel to the coordinate axes.)
51 | p <- PlotMixtureMultivariate(D, means = m$mu, sds = m$sd, ws = m$w, modes = apply(m$qofz,1,which.max))
52 | plot(p)
53 | ```
54 | 
55 | ## Univariate (infinite) Gaussian mixture model
56 | 
57 | Fit and visualize Variational Dirichlet process univariate infinite Gaussian mixture. Kindly cite [this article](http://bioinformatics.oxfordjournals.org/content/26/21/2713.short) for the code.
58 | 
59 | ```{r LCA1, fig.width=7, fig.height=5, warning=FALSE, message=FALSE, eval=FALSE}
60 | # Generate simulated bimodal univariate data
61 | x <- c(rnorm(200), rnorm(200, mean = 5))
62 | 
63 | # Variational Dirichlet process univariate Gaussian mixture
64 | m <- mixture.model(x, mixture.method = "vdp", max.responses = 10) 
65 | 
66 | # Plot the data and estimated modes
67 | p <- PlotMixtureUnivariate(x, means = m$mu, sds = m$sd, ws = m$w, binwidth = 0.1, qofz = m$qofz)
68 | 
69 | plot(p)
70 | ```
71 | 
72 | ## Clustering samples with mixed variables
73 | 
74 | Gower distance is useful for samples with mixed-type variables (binary, factor, numeric)):
75 | 
76 | ```{r clustering-gower, fig.width=10, fig.height=4, warning=FALSE, message=FALSE, eval=FALSE}
77 | # Example data
78 | data("dietswap")
79 | 
80 | library(FD)
81 | d <- gowdis(as(sample_data(dietswap), "data.frame"))
82 | 
83 | plot(hclust(d))
84 | ```
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/CoAbundantTaxa.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Identify Co-abundant taxa groups"
 3 | author: "Leo Lahti, Sudarshan Shetty et al."
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 | ---
19 | <!--
20 |   %\VignetteIndexEntry{microbiome tutorial}
21 |   %\VignetteEngine{knitr::rmarkdown}
22 |   %\usepackage[utf8]{inputenc}
23 |   %\VignetteEncoding{UTF-8}
24 | -->
25 | 
26 | **Co-abundant groups of taxa**    
27 | 
28 | Get data  
29 | ```{r cag-1}
30 | library(microbiome)
31 | pseq <- atlas1006
32 | pseq.rel <- microbiome::transform(pseq, "compositional")
33 | ```
34 | 
35 | Identify co-abundant taxa. 
36 | ```{r cag-2}
37 | # get median abundances 
38 | median_abund = apply(abundances(pseq.rel), MARGIN = 1, FUN = median)
39 | 
40 | # select taxa with median abundance of more than or equal to 0.01
41 | abundant_taxa = abundances(pseq)[median_abund >= 0.01, ]
42 | # check how many taxa
43 | nrow(abundant_taxa)
44 | # check which taxa
45 | rownames(abundant_taxa)
46 | ```
47 | 
48 | 
49 | Use Spearman's correlation to identify which of the co-abundant groups show correlation.   
50 | ```{r cag-1b}
51 | # cor() is from `stats` package
52 | spearman_matrix = cor(t(abundant_taxa), method = "spearman")
53 | # hclust() is from `stats` package
54 | spearman_tree = hclust(dist(spearman_matrix), method = "ward")
55 | plot(spearman_tree)
56 | 
57 | ```
58 | 
59 | Using the `heat` function in `microbiome pkg` now compare the correlation with the tree above.  
60 | ```{r plot-heat}
61 | spearman_long_df <- reshape2::melt(spearman_matrix)
62 | head(spearman_long_df)
63 | 
64 | heat(spearman_long_df, "Var1","Var2", 
65 |      order.rows = TRUE, order.cols = TRUE) + 
66 |   theme_bw() + theme(axis.text.x = element_text(angle = 90))
67 | ```
68 | 
69 | Validation of CAGs   
70 | ```{r}
71 | # Validation of CAGs 
72 | # Randomly split the dataset, compute a correlation matrix and run a Mantel test
73 | subsample = sample(c(1:300), size = 220, replace = F)
74 | train_data = abundant_taxa[,subsample]
75 | test_data = abundant_taxa[,-subsample]
76 | train_spearman = cor(t(train_data), method = "spearman")
77 | test_spearman = cor(t(test_data), method = "spearman")
78 | vegan::mantel(as.dist(train_spearman), as.dist(test_spearman), permutations=999)
79 | ```
80 | 
81 | Further reading:  
82 | 
83 | [de la Cuesta-Zuluaga, J., Corrales-Agudelo, V., Velásquez-Mejía, E.P. et al. Gut microbiota is associated with obesity and cardiometabolic disease in a population in the midst of Westernization. Sci Rep 8, 11356 (2018).](https://doi.org/10.1038/s41598-018-29687-x)   
84 | 
85 | [Claesson, M., Jeffery, I., Conde, S. et al. Gut microbiota composition correlates with diet and health in the elderly. Nature 488, 178–184 (2012).](https://doi.org/10.1038/nature11319)  
86 | 


--------------------------------------------------------------------------------
/Comparisons.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Community comparisons"
 3 | author: "Leo Lahti, Sudarshan Shetty et al."
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 | ---
19 | <!--
20 |   %\VignetteEngine{knitr::rmarkdown}
21 |   %\VignetteIndexEntry{microbiome tutorial - comparisons}
22 |   %\usepackage[utf8]{inputenc}
23 |   %\VignetteEncoding{UTF-8}  
24 | -->
25 | 
26 | 
27 | ## Group-wise comparisons
28 | 
29 | A number of methods for microbiota community comparisons have been proposed. For a recent benchmarking study, see [Weiss et al. (2017)](http://doi.org/10.1186/s40168-017-0237-y). For a comprehensive example workflow, see [Callahan et al. F1000 (2017)](https://f1000research.com/articles/5-1492/v2).
30 | 
31 | ### Univariate comparisons
32 | 
33 | For individual taxa, diversity indicators etc.
34 | 
35 |  * [Linear mixed effect models](Mixedmodels.html) 
36 |  * [Negative binomial test](Negativebinomial.html)
37 |  * [post-hoc testing For Kruskal-Wallis](post_hoc.html)
38 | 
39 | 
40 | Other methods, not implemented here (see [Weiss et al. (2017)](http://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-017-0237-y) for a recent survey):
41 | 
42 |  * [Zero-inflated Gaussians (ZIGs)](https://www.ncbi.nlm.nih.gov/pubmed/24076764/) (see [metagenomeSeq](https://bioconductor.org/packages/release/bioc/html/metagenomeSeq.html) Bioconductor package)
43 |  * [DESeq2](deseq2.html) and other advanced methods based on negative binomial
44 | 
45 | 
46 | ### Multivariate comparisons
47 | 
48 | For community-level multivariate comparisons
49 | 
50 |  * [Multivariate linear models (limma)](limma.html)
51 |  * [PERMANOVA](PERMANOVA.html)
52 | 
53 | 


--------------------------------------------------------------------------------
/Contributing.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "How to contribute?"
 3 | author: "Leo Lahti, Sudarshan Shetty et al."
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 | ---
19 | <!--
20 |   %\VignetteEngine{knitr::rmarkdown}
21 |   %\VignetteIndexEntry{microbiome tutorial - comparisons}
22 |   %\usepackage[utf8]{inputenc}
23 |   %\VignetteEncoding{UTF-8}  
24 | -->
25 | 
26 | 
27 | ### Get in touch
28 | 
29 |   * [Research homepage](http://www.iki.fi/Leo.Lahti)
30 |   * [Twitter](https://twitter.com/openreslabs)
31 |   * [Gitter](https://gitter.im/microbiome)    
32 |   * [Mailing list](https://groups.google.com/forum/#!forum/microbiome-devel) (microbiome-devel@googlegroups.com)
33 | 
34 | ### Contribute
35 | 
36 |   * [Issue Tracker](https://github.com/microbiome/microbiome/issues) 
37 |   * [Pull requests](https://github.com/microbiome/microbiome/)
38 |   * [Star us on the Github page](https://github.com/microbiome/microbiome)
39 | 
40 | We welcome new examples and tutorials to be added in this collection,
41 | and will fully acknowledge such contributions.
42 | 
43 | You can modify one of the existing examples (.html) and make a pull request. Or, you can propose a new tutorial page. The Rmarkdown source code (..html) for all [tutorials](http://microbiome.github.io/tutorials/) is available in the Github [index.page](https://github.com/microbiome/tutorials). Fork the repository, clone it, modify the tutorials, and make a pull request.
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/DMM.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Dirichlet Multinomial Mixtures"
  3 | author: "Leo Lahti, Sudarshan Shetty et al."
  4 | bibliography: 
  5 | - bibliography.bib
  6 | output:
  7 |   BiocStyle::html_document:
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 4
 11 |     toc_float: true
 12 |     self_contained: true
 13 |     thumbnails: true
 14 |     lightbox: true
 15 |     gallery: true
 16 |     use_bookdown: false
 17 |     highlight: haddock
 18 | ---
 19 | <!--
 20 |   %\VignetteEngine{knitr::rmarkdown}
 21 |   %\VignetteIndexEntry{microbiome tutorial - DMM}
 22 |   %\usepackage[utf8]{inputenc}
 23 |   %\VignetteEncoding{UTF-8}  
 24 | -->
 25 | 
 26 | 
 27 | ## Community typing with Dirichlet Multinomial Mixtures
 28 | 
 29 | [Dirichlet Multinomial Mixtures (DMM)](https://doi.org/10.1371/journal.pone.0030126) (Quince et al. 2012) is a probabilistic method for community typing (or clustering) of microbial community profiling data. It is an infinite mixture model, which means that the method can infer the optimal number of community types. Note that the number of community types is likely to grow with data size.
 30 | 
 31 | 
 32 | Let us load example data.
 33 | 
 34 | ```{r DMM0, fig.width=6, fig.height=5, warning=FALSE, message=FALSE}
 35 | library(microbiome)
 36 | library(DirichletMultinomial)
 37 | library(reshape2)
 38 | library(magrittr)
 39 | library(dplyr)
 40 | # Load example data
 41 | data(dietswap)
 42 | pseq <- dietswap
 43 | 
 44 | # To speed up, only consider the core taxa
 45 | # that are prevalent at 0.1% relative abundance in 50% of the samples
 46 | # (note that this is not strictly correct as information is
 47 | # being discarded; one alternative would be to aggregate rare taxa)
 48 | pseq.comp <- microbiome::transform(pseq, "compositional")
 49 | taxa <- core_members(pseq.comp, detection = 0.1/100, prevalence = 50/100)
 50 | pseq <- prune_taxa(taxa, pseq)
 51 | 
 52 | # Pick the OTU count matrix
 53 | # and convert it into samples x taxa format
 54 | dat <- abundances(pseq)
 55 | count <- as.matrix(t(dat))
 56 | ```
 57 | 
 58 | 
 59 | Fit the DMM model. Let us set the maximum allowed number of community types to 3 to speed up the example.
 60 | 
 61 | ```{r DMM, fig.width=6, fig.height=5, warning=FALSE, message=FALSE, eval=TRUE}
 62 | fit <- lapply(1:3, dmn, count = count, verbose=TRUE)
 63 | ```
 64 | 
 65 | 
 66 | Check model fit with different number of mixture components using standard information criteria
 67 | 
 68 | ```{r DMMplot, fig.width=6, fig.height=5, warning=FALSE, message=FALSE, eval=TRUE}
 69 | lplc <- base::sapply(fit, DirichletMultinomial::laplace) # AIC / BIC / Laplace
 70 | aic  <- base::sapply(fit, DirichletMultinomial::AIC) # AIC / BIC / Laplace
 71 | bic  <- base::sapply(fit, DirichletMultinomial::BIC) # AIC / BIC / Laplace
 72 | #plot(lplc, type="b", xlab="Number of Dirichlet Components", ylab="Model Fit")
 73 | #lines(aic, type="b", lty = 2)
 74 | #lines(bic, type="b", lty = 3)
 75 | ```
 76 | 
 77 | Pick the optimal model
 78 | 
 79 | ```{r DMM3, fig.width=6, fig.height=5, warning=FALSE, message=FALSE,error=FALSE, eval=TRUE}
 80 | best <- fit[[which.min(unlist(lplc))]]
 81 | ```
 82 | 
 83 | Mixture parameters pi and theta
 84 | 
 85 | ```{r DMM4, fig.width=6, fig.height=5, warning=FALSE, message=FALSE, eval=TRUE}
 86 | mixturewt(best)
 87 | ```
 88 | 
 89 | Sample-component assignments
 90 | 
 91 | ```{r DMM5, fig.width=6, fig.height=5, warning=FALSE, message=FALSE, eval=TRUE} 
 92 | ass <- apply(mixture(best), 1, which.max)
 93 | ```
 94 | 
 95 | Contribution of each taxonomic group to each component
 96 | 
 97 | ```{r DMM6, fig.width=9, fig.heigth=6, out.width="400px", warning=FALSE, message=FALSE}
 98 | for (k in seq(ncol(fitted(best)))) {
 99 |   d <- melt(fitted(best))
100 |   colnames(d) <- c("OTU", "cluster", "value")
101 |   d <- subset(d, cluster == k) %>%
102 |      # Arrange OTUs by assignment strength
103 |      arrange(value) %>%
104 |      mutate(OTU = factor(OTU, levels = unique(OTU))) %>%
105 |      # Only show the most important drivers
106 |      filter(abs(value) > quantile(abs(value), 0.8))     
107 | 
108 |   p <- ggplot(d, aes(x = OTU, y = value)) +
109 |        geom_bar(stat = "identity") +
110 |        coord_flip() +
111 |        labs(title = paste("Top drivers: community type", k))
112 |   print(p)
113 | }
114 | ```
115 | 
116 | 


--------------------------------------------------------------------------------
/DOC.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Dissimilarity-Overlap analysis"
 3 | author: "Leo Lahti "
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 |    
19 | ---
20 | <!--
21 |   %\VignetteEngine{knitr::rmarkdown}
22 |   %\VignetteIndexEntry{microbiome tutorial - doc}
23 |   %\usepackage[utf8]{inputenc}
24 |   %\VignetteEncoding{UTF-8}  
25 | -->
26 | 
27 | ```{r setup, message = FALSE, warning = FALSE, results = 'hide'}
28 | #library("devtools")
29 | #install_github("microbiome/microbiome")
30 | ```
31 | 
32 | # Dissimilarity-Overlap analysis
33 | 
34 | This page provides an example of running the dissimilarity-overlap analysis as proposed in Bashan, A., Gibson, T., Friedman, J. et al. Universality of human microbial dynamics. Nature 534, 259–262 (2016). [DOI:10.1038/nature18301](https://doi.org/10.1038/nature18301).
35 | 
36 | 
37 | Load example data:
38 | 
39 | ```{r doc_data, warning=FALSE, message=FALSE}
40 | # Dissimilarity-Overlap analysis
41 | library(microbiome)
42 | data(atlas1006)
43 | ```
44 | 
45 | Estimate the overlap and dissimilarity quantities:
46 | 
47 | ```{r doc_quantities, warning=FALSE, message=FALSE}
48 | # Dissimilarity
49 | d <- phyloseq::distance(microbiome::transform(atlas1006, "compositional"), "jsd", parallel=TRUE)
50 | 
51 | # Overlap
52 | o <- overlap(atlas1006, detection = 0.2/100)
53 | ```
54 | 
55 | ```{r doc_compare, warning=FALSE, message=FALSE}
56 | # Compare
57 | dvec <- d[lower.tri(d)]
58 | ovec <- o[lower.tri(o)]
59 | 
60 | # Assess rough correlation
61 | cc <- cor(dvec, ovec, method = "spearman", use = "pairwise.complete.obs")
62 | 
63 | # Scatterplot
64 | plot(dvec, ovec, pch = ".", main = paste("Spearman rho", round(cc, 2)), las = 1, xlab = "Dissimilarity (Jensen-Shannon)", ylab = "Overlap")
65 | 
66 | #p <- data.frame(D = dvec, O = ovec) %>%
67 | #  ggplot(aes(x = D, y = O)) +
68 | #  geom_hex()
69 | ```
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/Data.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/Data.R


--------------------------------------------------------------------------------
/Data.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Data"
 3 | author: "Leo Lahti, Sudarshan Shetty et al."
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 | ---
19 | <!--
20 |   %\VignetteEngine{knitr::rmarkdown}
21 |   %\VignetteIndexEntry{microbiome tutorial - data}
22 |   %\usepackage[utf8]{inputenc}
23 |   %\VignetteEncoding{UTF-8}  
24 | -->
25 | 
26 | 
27 | ## Importing microbiome data in R
28 | 
29 | 
30 | ### Importing standard formats (CSV, Mothur, BIOM)
31 | 
32 | The microbiome package has import functions for certain standard data formats for 16S profiling (Simple CSV, Mothur, biom). For details, see the function help. To import these, use:
33 | 
34 | ```{r read_phyloseq, eval=FALSE}
35 | # Import output CSV files generated by write_phyloseq
36 | pseq1 <- read_phyloseq(otu.file, taxonomy.file, metadata.file, type = "simple")
37 | 
38 | # Import mother .shared and .taxonomy and metadata files
39 | pseq2 <- read_phyloseq(otu.file, taxonomy.file, metadata.file, type = "mothur")
40 | 
41 | # Import BIOM files
42 | pseq3 <- read_phyloseq(otu.file, taxonomy.file, metadata.file, type = "biom")
43 | ```
44 | 
45 | You can also use additional [import functions](http://joey711.github.io/phyloseq/import-data) from the independent phyloseq R package. 
46 | 
47 | 
48 | ### Converting you own data to phyloseq format in R
49 | 
50 | Alternatively, you can read your data in R (read.table, read.csv or other standard functions) and convert into phyloseq format. The procedure is well explained in the [phyloseq tutorial](http://joey711.github.io/phyloseq/import-data) from the independent phyloseq R package. See also examples on [manipulating](Preprocessing.html) for phyloseq objects.
51 | 
52 | 
53 | 
54 | ## Microbiome example data sets
55 | 
56 | ### Intestinal microbiota profiling of 1006 Western adults
57 | 
58 | [The HITChip Atlas](Atlas.html) data set is available via the microbiome R package in phyloseq format, and via [Data Dryad](http://doi.org/10.5061/dryad.pk75d) in tabular format. This data set from [Lahti et al. Nat. Comm. 5:4344, 2014](http://www.nature.com/ncomms/2014/140708/ncomms5344/full/ncomms5344.html) comes with 130 genus-like taxonomic groups across 1006 western adults with no reported health complications. Some subjects have also short time series. Load the data in R with:
59 | 
60 | ```{r atlasdata, warning=FALSE, message=FALSE}
61 | # Data citation doi: 10.1038/ncomms5344
62 | library(microbiome)
63 | data(atlas1006) 
64 | print(atlas1006)
65 | ```
66 | 
67 | 
68 | ### Diet swap between Rural and Western populations
69 | 
70 | A two-week diet swap study between western (USA) and traditional
71 | (rural Africa) diets, reported in [O'Keefe et al. Nat. Comm. 6:6342,
72 | 2015](http://dx.doi.org/10.1038/ncomms7342). The data is also
73 | available for download from [Data
74 | Dryad](http://dx.doi.org/10.5061/dryad.1mn1n). Load in R with:
75 | 
76 | ```{r dietswap2}
77 | # Data citation doi: 10.1038/ncomms7342
78 | data(dietswap)
79 | print(dietswap)
80 | ```
81 | 
82 | 
83 | ### Intestinal microbiota versus blood metabolites
84 | 
85 | Data set from [Lahti et al. PeerJ 1:e32,
86 | 2013](https://doi.org/10.7717/peerj.32) characterizes associations
87 | between human intestinal microbiota and blood serum lipids. Note that
88 | this data set contains an additional data matrix of lipid
89 | species. Load the data in R with:
90 | 
91 | ```{r peerj2}
92 | # Data citation doi: 10.7717/peerj.32
93 | data(peerj32)
94 | print(names(peerj32))
95 | ```
96 | 


--------------------------------------------------------------------------------
/Edu/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | 


--------------------------------------------------------------------------------
/Edu/extra.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Linear models"
 3 | author: "`r Sys.Date()`"
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   rmdformats::readthedown:
 8 |     self_contained: true
 9 |     thumbnails: true
10 |     lightbox: true
11 |     gallery: true
12 |     use_bookdown: false
13 |     highlight: haddock
14 | ---
15 | <!--
16 |   %\VignetteEngine{knitr::rmarkdown}
17 |   %\VignetteIndexEntry{microbiome tutorial - atlas}
18 |   %\usepackage[utf8]{inputenc}
19 | -->
20 | 
21 | 
22 | ```{r, echo=FALSE, message=FALSE, warning=FALSE}
23 | # Handle citations
24 | require(knitcitations)
25 | require(bookdown)
26 | # cleanbib()
27 | # options("citation_format" = "pandoc")
28 | bib <- read.bibtex("bibliography.bib")
29 | #opts_chunk$set(fig.width=4, fig.height=3, par=TRUE, out.width='2in', fig.pos='H')
30 | library(knitr)
31 | knitr::opts_chunk$set(fig.path = "figure/", dev="CairoPNG")
32 | ```
33 | 
34 | 
35 | # Other usefulf models and algorithms
36 | 
37 | - envfit / bioenvfit
38 | - ordistep
39 | - mixed models
40 |     
41 | 
42 | # Unimodality vs. bi/multimodality
43 | 
44 | - How this may affect the results in the above tests
45 | - How to quantify
46 | 
47 | 
48 | # Dirichlet Mixture Model (DMM) 
49 | 
50 | - Clustering
51 | - Nonparametric models
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/Edu/lm.R:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Linear models"
  3 | author: "`r Sys.Date()`"
  4 | bibliography: 
  5 | - bibliography.bib
  6 | output:
  7 |   rmdformats::readthedown:
  8 |     self_contained: true
  9 |     thumbnails: true
 10 |     lightbox: true
 11 |     gallery: true
 12 |     use_bookdown: false
 13 |     highlight: haddock
 14 | ---
 15 | <!--
 16 |   %\VignetteEngine{knitr::rmarkdown}
 17 |   %\VignetteIndexEntry{microbiome tutorial - atlas}
 18 |   %\usepackage[utf8]{inputenc}
 19 | -->
 20 | 
 21 | 
 22 | ```{r, echo=FALSE, message=FALSE, warning=FALSE}
 23 | # Handle citations
 24 | require(knitcitations)
 25 | require(bookdown)
 26 | # cleanbib()
 27 | # options("citation_format" = "pandoc")
 28 | bib <- read.bibtex("bibliography.bib")
 29 | #opts_chunk$set(fig.width=4, fig.height=3, par=TRUE, out.width='2in', fig.pos='H')
 30 | library(knitr)
 31 | knitr::opts_chunk$set(fig.path = "figure/", dev="CairoPNG")
 32 | ```
 33 | 
 34 | 
 35 | # Linear models
 36 | 
 37 | This section provides a hands-on introduction to linear and
 38 | generalized linear models, including covariates.
 39 | 
 40 | 
 41 | Load example data.
 42 | 
 43 | ```{r lm1, warning=FALSE, message=FALSE}
 44 | library(microbiome)
 45 | data(dietswap)
 46 | d <- dietswap
 47 | 
 48 | # Pick microbial abundances for a given taxonomic group
 49 | taxa <- "Dialister"
 50 | ```
 51 | 
 52 | 
 53 | Construct a data.frame with the selected taxonomic group and grouping.
 54 | 
 55 | ```{r lm2, warning=FALSE, message=FALSE}
 56 | df <- data.frame(Abundance = abundances(d)[taxa,],
 57 |                  Group = meta(d)$nationality,
 58 | 		 Log10_Abundance = log10(1 + df$Abundance)		 
 59 | 		 )
 60 | 
 61 | # Compare the groups with a linear model.
 62 | # Use Log10 abundances 
 63 | res <- glm(Log10_Abundance ~ Group, data = df, family = "gaussian")
 64 | ```
 65 | 
 66 | 
 67 | Investigate model coefficients
 68 | 
 69 | ```{r lm_coefs, warning=FALSE, message=FALSE}
 70 | print(summary(res)$coefficients)
 71 | 
 72 | # The intercept equals to the mean in the first group
 73 | # The group term equals to the difference between group means
 74 | print(mean(subset(df, Group == "AAM")$Log10_Abundance))
 75 | print(mean(subset(df, Group == "AFR")$Log10_Abundance) - mean(subset(df, Group == "AAM")$Log10_Abundance))
 76 | ```
 77 | 
 78 | 
 79 | Significance with t-test assuming equal variances.
 80 | 
 81 | ```{r lm_signif, warning=FALSE, message=FALSE}
 82 | print(t.test(Log10_Abundance ~ Group, data = df, var.equal=TRUE)$p.value)
 83 | ```
 84 | 
 85 | 
 86 | Now, using the linear model allows incorporation of additional variables,
 87 | for instance potential confounders.
 88 | 
 89 | ```{r lm_glm, warning=FALSE, message=FALSE}
 90 | df$sex <- meta(d)$sex
 91 | res <- glm(Log10_Abundance ~ Group + sex, data = df, family = "gaussian")
 92 | ```
 93 | 
 94 | 
 95 | Generalized linear models
 96 | 
 97 | The GLM consists of three elements:
 98 |     1. A probability distribution (from exponential family)
 99 |     1. A linear predictor η = Xβ .
100 |     1. A link function g such that E(Y) = μ = g−1(η).
101 | 
102 | We use Poisson with (its natural) log-link.
103 | 
104 | ```{r lm_glm2, warning=FALSE, message=FALSE}
105 | res <- glm(Abundance ~ Group, data = df, family = "poisson")
106 | print(summary(res))
107 | ```
108 | 
109 | 


--------------------------------------------------------------------------------
/Edu/lm.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Linear models"
 3 | author: "`r Sys.Date()`"
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   rmdformats::readthedown:
 8 |     self_contained: true
 9 |     thumbnails: true
10 |     lightbox: true
11 |     gallery: true
12 |     use_bookdown: false
13 |     highlight: haddock
14 | ---
15 | <!--
16 |   %\VignetteEngine{knitr::rmarkdown}
17 |   %\VignetteIndexEntry{microbiome tutorial - atlas}
18 |   %\usepackage[utf8]{inputenc}
19 | -->
20 | 
21 | 
22 | # Linear models
23 | 
24 | This section provides a hands-on introduction to linear and
25 | generalized linear models, including covariates.
26 | 
27 | 
28 | Load example data.
29 | 
30 | ```{r lm1, warning=FALSE, message=FALSE}
31 | library(microbiome)
32 | data(dietswap)
33 | d <- dietswap
34 | 
35 | # Pick microbial abundances for a given taxonomic group
36 | taxa <- "Dialister"
37 | ```
38 | 
39 | 
40 | Construct a data.frame with the selected taxonomic group and grouping.
41 | 
42 | ```{r lm2, warning=FALSE, message=FALSE}
43 | df <- data.frame(Abundance = abundances(d)[taxa,],
44 |                  Group = meta(d)$nationality,
45 | 		 Log10_Abundance = log10(1 + df$Abundance)		 
46 | 		 )
47 | 
48 | # Compare the groups with a linear model.
49 | # Use Log10 abundances 
50 | res <- glm(Log10_Abundance ~ Group, data = df, family = "gaussian")
51 | ```
52 | 
53 | 
54 | Investigate model coefficients
55 | 
56 | ```{r lm_coefs, warning=FALSE, message=FALSE}
57 | print(summary(res)$coefficients)
58 | 
59 | # The intercept equals to the mean in the first group
60 | # The group term equals to the difference between group means
61 | print(mean(subset(df, Group == "AAM")$Log10_Abundance))
62 | print(mean(subset(df, Group == "AFR")$Log10_Abundance) - mean(subset(df, Group == "AAM")$Log10_Abundance))
63 | ```
64 | 
65 | 
66 | Significance with t-test assuming equal variances.
67 | 
68 | ```{r lm_signif, warning=FALSE, message=FALSE}
69 | print(t.test(Log10_Abundance ~ Group, data = df, var.equal=TRUE)$p.value)
70 | ```
71 | 
72 | 
73 | Now, using the linear model allows incorporation of additional variables,
74 | for instance potential confounders.
75 | 
76 | ```{r lm_glm, warning=FALSE, message=FALSE}
77 | df$sex <- meta(d)$sex
78 | res <- glm(Log10_Abundance ~ Group + sex, data = df, family = "gaussian")
79 | ```
80 | 
81 | 
82 | Generalized linear models
83 | 
84 | The GLM consists of three elements:
85 |     1. A probability distribution (from exponential family)
86 |     1. A linear predictor η = Xβ .
87 |     1. A link function g such that E(Y) = μ = g−1(η).
88 | 
89 | We use Poisson with (its natural) log-link.
90 | 
91 | ```{r lm_glm2, warning=FALSE, message=FALSE}
92 | res <- glm(Abundance ~ Group, data = df, family = "poisson")
93 | print(summary(res))
94 | ```
95 | 
96 | 


--------------------------------------------------------------------------------
/Edu/main.R:
--------------------------------------------------------------------------------
1 | library(rmarkdown)
2 | render("all.Rmd")
3 | 
4 | 


--------------------------------------------------------------------------------
/Edu/misc.R:
--------------------------------------------------------------------------------
1 | # Regression plot and model fit
2 | 
3 | # Residuals of the fitted model
4 | 
5 | # Offset terms
6 | 


--------------------------------------------------------------------------------
/Installation.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Installation of microbiome R tools"
 3 | author: "Leo Lahti, Sudarshan Shetty et al."
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock    
18 | ---
19 | <!--
20 |   %\VignetteEngine{knitr::rmarkdown}
21 |   %\VignetteIndexEntry{microbiome tutorial - Installation}
22 |   %\usepackage[utf8]{inputenc}
23 |   %\VignetteEncoding{UTF-8}  
24 | -->
25 | 
26 | 
27 | ## Installing R/RStudio
28 | 
29 | **If you do not already have R/RStudio installed**, do as follows. 
30 | 
31 |   1. Install [R](http://www.r-project.org/) 
32 |   1. Install [RStudio](http://rstudio.org)
33 |   1. With Windows, install also [RTools](http://cran.r-project.org/bin/windows/Rtools/) (version corresponding to your R version)
34 | 
35 | 
36 | ## Installing microbiome R package
37 | 
38 | Open R and install the package. If the installation fails, ensure from
39 | the RStudio tools panel that you have access to the Bioconductor
40 | repository.
41 | 
42 | ```{r installation, message=FALSE, warning=FALSE, eval=FALSE}
43 | library(BiocManager)
44 | BiocManager::install("microbiome")
45 | ```
46 | 
47 | Alternatively, to install the bleeding edge (potentially unstable)
48 | development version, run in R:
49 | 
50 | ```{r microbiomeinstall, message=FALSE, warning=FALSE, eval=FALSE}
51 | library(devtools) # Load the devtools package
52 | install_github("microbiome/microbiome") # Install the package
53 | ```
54 | 
55 | ## Using the tools
56 | 
57 | Once the package has been installed, load it in R
58 | 
59 | ```{r loading, eval=TRUE, message=FALSE}
60 | library(microbiome)  
61 | ```
62 | 
63 | For a brief overview, see the [package vignette](https://bioconductor.org/packages/devel/bioc/vignettes/microbiome/inst/doc/vignette.html) and the more extensive [on-line tutorial](http://microbiome.github.io/tutorials/).
64 | 
65 | 
66 | 
67 | ## Further reading
68 | 
69 |  * [Rmarkdown tips](http://rmarkdown.rstudio.com/)
70 |  * [R cheat sheets](http://devcheatsheet.com/tag/r/)
71 |  * [Using Github with R and RStudio](http://www.molecularecologist.com/2013/11/using-github-with-r-and-rstudio/)
72 |  * [Molecular ecologist's view on code sharing](http://www.molecularecologist.com/2013/08/want-to-share-your-code/)
73 | 
74 | 
75 | ## General instructions to install R packages
76 | 
77 | Most R packages are maintained in CRAN, Bioconductor or Github. To
78 | install the package from each, use:
79 | 
80 | ```{r generalinstall, message=FALSE, warning=FALSE, eval=FALSE}
81 | # Installing from Bioconductor
82 | #source("http://www.bioconductor.org/biocLite.R")
83 | BiocManager::install("MASS")
84 | 
85 | # Installing from CRAN
86 | install.packages("sorvi")
87 | 
88 | # Installing from Github
89 | library(devtools)
90 | install_github("antagomir/netresponse")
91 | ```
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/Interactive.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Interactive examples"
 3 | author: "Leo Lahti, Sudarshan Shetty et al."
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 | ---
19 | <!--
20 |   %\VignetteEngine{knitr::rmarkdown}
21 |   %\VignetteIndexEntry{microbiome tutorial - Interactive}
22 |   %\usepackage[utf8]{inputenc}
23 |   %\VignetteEncoding{UTF-8}  
24 | -->
25 | 
26 | 
27 | ## Interactive plots
28 | 
29 | ```{r interactive-example, warning=FALSE, message=FALSE, fig.height=5, eval = TRUE}
30 | library(ggplot2)
31 | library(rvg)
32 | library(ggiraph)
33 | library(microbiome)
34 | data("atlas1006")
35 | pseq <- atlas1006
36 | x <- microbiome::transform(atlas1006, "compositional")
37 | 
38 | mytheme_main <- theme( panel.background = element_blank(), 
39 |   panel.grid.major = element_line(colour = "#dddddd"), 
40 |   axis.ticks = element_line(colour = "#dddddd") )
41 | 
42 | mytheme_map <- theme(
43 |   panel.background = element_blank(), axis.title.x = element_blank(),
44 |   axis.text = element_blank(), axis.line.x = element_blank(),
45 |   axis.line.y = element_blank(), axis.title.y = element_blank(),
46 |   axis.ticks.x = element_blank(), axis.ticks.y = element_blank() )
47 | 
48 | df <- as(sample_data(x), "data.frame")
49 | df$Dialister <- get_sample(x, "Dialister")
50 | df$Prevotella <- get_sample(x, "Prevotella melaninogenica et rel.")
51 | df$sample <- row.names(df)
52 | 
53 | # geom_point_interactive example
54 | gg_point_1 <- ggplot(df, aes(x = Prevotella, y = Dialister, 
55 |         color = age, tooltip = sample) ) + 
56 |     geom_point_interactive(size=3)
57 | 
58 | # htmlwidget call
59 | ggiraph(code = {print(gg_point_1 + mytheme_main)})
60 | ```
61 | 
62 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2011-2014
2 | COPYRIGHT HOLDER: Leo Lahti, Jarkko Salojarvi
3 | 


--------------------------------------------------------------------------------
/Misc/CompositionAmplicondata.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Microbiome composition"
  3 | author: "Leo Lahti, Sudarshan Shetty et al."
  4 | bibliography: 
  5 | - bibliography.bib
  6 | output:
  7 |   BiocStyle::html_document:
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 4
 11 |     toc_float: true
 12 |     self_contained: true
 13 |     thumbnails: true
 14 |     lightbox: true
 15 |     gallery: true
 16 |     use_bookdown: false
 17 |     highlight: haddock
 18 | ---
 19 |   <!--
 20 |   %\VignetteEngine{knitr::rmarkdown}
 21 |   %\VignetteIndexEntry{microbiome tutorial - composition}
 22 |   %\usepackage[utf8]{inputenc}
 23 |   %\VignetteEncoding{UTF-8}  
 24 | -->
 25 |   
 26 |   
 27 | Also see [phyloseq barplot examples](http://joey711.github.io/phyloseq/plot_bar-examples.html).
 28 |   
 29 | Read example data from a [diet swap study](http://dx.doi.org/10.1038/ncomms7342):
 30 |   
 31 | ```{r composition-example1}
 32 | # Example data
 33 | library(microbiome)
 34 | library(dplyr)
 35 | data(dietswap)
 36 | 
 37 | # Just use prevalent taxa to speed up examples
 38 | # (not absolute counts used in this example)
 39 | pseq <- core(dietswap, detection = 8^2, prevalence = 90/100)
 40 | 
 41 | # Pick sample subset
 42 | library(phyloseq)
 43 | pseq2 <- subset_samples(pseq, group == "DI" & nationality == "AFR" & timepoint.within.group == 1)
 44 | ```
 45 | 
 46 | ### Composition barplots
 47 | 
 48 | Same with compositional (relative) abundances; for each sample (left), or averafged by group (right).
 49 |   
 50 | ```{r composition-example4b, fig.width=12, fig.height=5, out.width="400px", fig.show="hold", warning=FALSE, message=FALSE}
 51 | # Try another theme
 52 | # from https://github.com/hrbrmstr/hrbrthemes
 53 | library(hrbrthemes)
 54 | library(gcookbook)
 55 | library(tidyverse)
 56 | 
 57 | # Limit the analysis on core taxa and specific sample group
 58 | p <- plot_composition(pseq2,
 59 | 		      taxonomic.level = "OTU",
 60 |                       sample.sort = "nationality",
 61 |                       x.label = "nationality") +
 62 |      guides(fill = guide_legend(ncol = 1)) +
 63 |      scale_y_percent() +
 64 |      labs(x = "Samples", y = "Relative abundance (%)",
 65 |                                    title = "Relative abundance data",
 66 |                                    subtitle = "Subtitle",
 67 |                                    caption = "Caption text.") + 
 68 |      theme_ipsum(grid="Y")
 69 | print(p)  
 70 | 
 71 | # Averaged by group
 72 | p <- plot_composition(pseq2,
 73 |                       average_by = "bmi_group", transform = "compositional")
 74 | print(p)
 75 | ```
 76 | 
 77 | 
 78 | 
 79 | ### Composition heatmaps
 80 | 
 81 | 
 82 | Heatmap for CLR-transformed abundances, with samples and OTUs sorted
 83 | with the neatmap method:
 84 |   
 85 | ```{r composition-example7, fig.width=10, fig.height=4, eval=FALSE}
 86 | tmp <- plot_composition(pseq2, plot.type = "heatmap", transform = "compositional", 
 87 |             sample.sort = "neatmap", otu.sort = "neatmap", mar = c(6, 13, 1, 1))
 88 | ```
 89 | 
 90 | 
 91 | 
 92 | ### Plot taxa prevalence
 93 | 
 94 | This function
 95 | allows you to have an overview of OTU prevalences alongwith their
 96 | taxonomic affiliations. This will aid in checking if you filter OTUs
 97 | based on prevalence, then what taxonomic affliations will be lost.
 98 | 
 99 | ```{r plot_prev, fig.height=6, fig.width=8, dev="CairoPNG"}
100 | data(atlas1006)
101 | 
102 | # Use sample and taxa subset to speed up example
103 | p0 <- subset_samples(atlas1006, DNA_extraction_method == "r")
104 | 
105 | # Define detection and prevalence thresholds to filter out rare taxa
106 | p0 <- core(p0, detection = 10, prevalence = 0)
107 | 
108 | # For the available taxonomic levels
109 | plot_taxa_prevalence(p0, "Phylum", detection = 10)
110 | ```
111 | 
112 | ### Amplicon data
113 | 
114 | See further examples on [community composition for amplicon data](CompositionAmplicondata2.html).
115 | 
116 | 


--------------------------------------------------------------------------------
/Misc/Probelevel.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Probe-level analysis for phylogenetic microarrays"
 3 | author: "Leo Lahti, Sudarshan Shetty et al. `r Sys.Date()`"
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 | ---
19 | <!--
20 |   %\VignetteEngine{knitr::rmarkdown}
21 |   %\VignetteIndexEntry{microbiome tutorial - probe}
22 |   %\usepackage[utf8]{inputenc}
23 |   %\VignetteEncoding{UTF-8}  
24 | -->
25 | 
26 | 
27 | ## Probe summarization
28 | 
29 | Summarize (preprocessed) oligo-level data into phylotype level; examples with simulated data; see [read_hitchip](reading) to use your own data. We use and recommend the [Robust Probabilistic Averaging (RPA)](https://github.com/antagomir/RPA/wiki) for probe summarization.
30 | 
31 | 
32 | ```{r probelevel-ex, eval=FALSE}
33 | library(microbiome)
34 | 
35 | library(HITChipDB)
36 | data.directory <- system.file("extdata", package = "microbiome")
37 | 
38 | # Read oligo-level data (here: simulated example data)
39 | probedata <- HITChipDB::read_hitchip(data.directory, method = "frpa")$probedata
40 | 
41 | # Read phylogeny map
42 | # NOTE: use phylogeny.filtered for species/L1/L2 summarization
43 | # Load taxonomy from output directory
44 | f <- system.file("inst/extdata/get_hitchip_taxonomy.R", package = "microbiome")
45 | source(f)
46 | taxonomy <- get_hitchip_taxonomy("HITChip", "filtered")
47 | 
48 | # Summarize oligos into higher level phylotypes
49 | dat <- RPA::summarize_probedata(
50 |                  probedata = probedata,
51 | 		 taxonomy = taxonomy, 
52 |                  method = "rpa",
53 | 		 level = "species")
54 | ```
55 | 
56 | 
57 | ## Retrieve probe-level data
58 | 
59 | Get probes for each probeset:
60 | 
61 | ```{r probelevel-ex2, eval=FALSE}
62 | sets <- RPA::retrieve.probesets(taxonomy, level = "species", name = NULL)
63 | ```
64 | 
65 | 
66 | Get probeset data matrix/matrices:
67 | 
68 | ```{r probelevel-ex3, eval=FALSE}
69 | set <- RPA::get.probeset("Actinomyces naeslundii", "species",
70 |        		     taxonomy, probedata, log10 = TRUE)
71 | ```
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/Misc/Visualization.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Visualization"
 3 | author: "Leo Lahti, Sudarshan Shetty et al. `r Sys.Date()`"
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 | ---
19 | <!--
20 |   %\VignetteEngine{knitr::rmarkdown}
21 |   %\VignetteIndexEntry{microbiome tutorial - stability}
22 |   %\usepackage[utf8]{inputenc}
23 |   %\VignetteEncoding{UTF-8}  
24 | -->
25 | 
26 | 
27 | # Experimental functions
28 | 
29 | 
30 | **Time series for individual subjects**
31 | 
32 | ```{r homogeneity-timeseries, message=FALSE, warning=FALSE, fig.height=5, fig.height=5, eval=FALSE}
33 | source(system.file("extdata/plot_longitudinal.R", package = "microbiome"))
34 | p <- plot_longitudinal(pseq, "Dialister", subject = "831", tipping.point = 0.5)
35 | print(p)
36 | ```
37 | 


--------------------------------------------------------------------------------
/Misc/info.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Rendering statistics"
 3 | author: "`r Sys.Date()`"
 4 | output:
 5 |   BiocStyle::html_document:
 6 |     number_sections: no
 7 |     toc: yes
 8 |     toc_depth: 4
 9 |     toc_float: true
10 |     self_contained: true
11 |     thumbnails: true
12 |     lightbox: true
13 |     gallery: true
14 |     use_bookdown: false
15 |     highlight: haddock    
16 | ---
17 | <!--
18 |   %\VignetteEngine{knitr::rmarkdown}
19 |   %\VignetteIndexEntry{microbiome tutorial - DMM}
20 |   %\usepackage[utf8]{inputenc}
21 |   %\VignetteEncoding{UTF-8}  
22 | -->
23 | 
24 | 
25 | ## Execution times
26 | 
27 | This visualization shows the rendering times for the tutorial pages for possible optimization purposes.
28 | 
29 | ```{r exectime, echo=FALSE, fig.width=20, fig.height=20, warning=FALSE, message=FALSE, out.width="100%"}
30 | library(knitr)
31 | theme_set(theme_bw(40))
32 | df <- as.data.frame(t(sapply(times, identity)))
33 | df$file <- rownames(df)
34 | dfm <- as_tibble(melt(df, id = "file")) %>%
35 |          filter(variable %in% c("user.self", "elapsed")) %>%
36 | 	 arrange(value) %>%
37 | 	 mutate(file = factor(file, levels = unique(file)))
38 | p <- ggplot(dfm, aes(x = file, fill = variable, y = value)) +
39 |        geom_bar(stat = "identity", position = "dodge", color = "black") +
40 |        coord_flip() + labs(x = "", y = "Time (s)")
41 | print(p)
42 | ```
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/Mixedmodels.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Comparisons of microbiome community composition"
 3 | author: "Leo Lahti, Sudarshan Shetty et al."
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 | ---
19 | <!--
20 |   %\VignetteEngine{knitr::rmarkdown}
21 |   %\VignetteIndexEntry{microbiome tutorial - comparisons}
22 |   %\usepackage[utf8]{inputenc}
23 |   %\VignetteEncoding{UTF-8}  
24 | -->
25 | 
26 | 
27 | ## Mixed models for univariate comparisons
28 | 
29 | 
30 | Load example data:
31 | 
32 | ```{r boxplot-example_w, warning=FALSE, message=FALSE}
33 | # Load libraries
34 | library(microbiome)
35 | library(ggplot2)
36 | library(dplyr)
37 | library(IRanges)
38 | 
39 | # Probiotics intervention example data 
40 | data(peerj32) # Source: https://peerj.com/articles/32/
41 | pseq <- peerj32$phyloseq # Rename the example data
42 | ```
43 | 
44 | 
45 | Abundance boxplot
46 | 
47 | ```{r boxplot2_w, warning=FALSE, message=FALSE}
48 | p <- boxplot_abundance(pseq, x = "time", y = "Akkermansia", line = "subject") +
49 |     scale_y_log10()
50 | print(p)
51 | ```
52 | 
53 | 
54 | ## Linear model comparison with random effect subject term
55 | 
56 | Test individual taxonomic group
57 | 
58 | ```{r comparisons-lmer_w, message=FALSE, warning=FALSE}
59 | # Get sample metadata
60 | dfs <- meta(pseq)
61 | 
62 | # Add abundance as the signal to model
63 | dfs$signal <- abundances(pseq)["Akkermansia", rownames(dfs)]
64 | 
65 | # Paired comparison
66 | # with fixed group effect and random subject effect
67 | library(lme4)
68 | out <- lmer(signal ~ group + (1|subject), data = dfs)
69 | out0 <- lmer(signal ~ (1|subject), data = dfs)
70 | comp <- anova(out0, out)
71 | pv <- comp[["Pr(>Chisq)"]][[2]]
72 | print(pv)
73 | ```
74 | 
75 | 


--------------------------------------------------------------------------------
/MovingPictures_MicrobiomeExperiment.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/MovingPictures_MicrobiomeExperiment.rds


--------------------------------------------------------------------------------
/Negativebinomial.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Negative binomial"
 3 | author: "Leo Lahti, Sudarshan Shetty et al."
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 | ---
19 | <!--
20 |   %\VignetteEngine{knitr::rmarkdown}
21 |   %\VignetteIndexEntry{microbiome tutorial - comparisons}
22 |   %\usepackage[utf8]{inputenc}
23 |   %\VignetteEncoding{UTF-8}  
24 | -->
25 | 
26 | 
27 | ## Group-wise comparisons with negative binomial
28 | 
29 | [Read more on negative binomials](http://www.ats.ucla.edu/stat/r/dae/nbreg.htm)
30 | 
31 | Load example data:
32 | 
33 | ```{r boxplot-example, warning=FALSE, message=FALSE}
34 | # Load libraries
35 | library(microbiome)
36 | library(ggplot2)
37 | library(dplyr)
38 | 
39 | # Probiotics intervention example data 
40 | data(peerj32) # Source: https://peerj.com/articles/32/
41 | pseq <- peerj32$phyloseq # Rename the example data
42 | ```
43 | 
44 | 
45 | Visually compare Akkermansia abundance between time point 1 and 2
46 | 
47 | ```{r boxplot2, warning=FALSE, message=FALSE}
48 | p <- boxplot_abundance(pseq, x = "time", y = "Akkermansia", line = "subject") + scale_y_log10()
49 | print(p)
50 | ```
51 | 
52 | Test statistical significance with negative binomial:
53 | 
54 | ```{r comparisons2b, message=FALSE, error=FALSE, warning=FALSE}
55 | library(MASS)
56 | library(tidyr)
57 | 
58 | # Analyse specific taxa
59 | tax <- "Akkermansia"
60 | 
61 | # Pick the signal (abundance) for this tax
62 | sample_data(pseq)$signal <- get_sample(pseq, tax)
63 | 
64 | # Negative binomial test with group and gender included
65 | res <- glm.nb(signal ~ group + sex, data = meta(pseq))
66 | 
67 | # Show the results
68 | print(coef(summary(res)))
69 | ```
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/Networks.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Taxonomic network visualization"
  3 | author: "Leo Lahti, Sudarshan Shetty et al."
  4 | bibliography: 
  5 | - bibliography.bib
  6 | output:
  7 |   BiocStyle::html_document:
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 4
 11 |     toc_float: true
 12 |     self_contained: true
 13 |     thumbnails: true
 14 |     lightbox: true
 15 |     gallery: true
 16 |     use_bookdown: false
 17 |     highlight: haddock
 18 | ---
 19 | <!--
 20 |   %\VignetteEngine{knitr::rmarkdown}
 21 |   %\VignetteIndexEntry{microbiome tutorial - networks}
 22 |   %\usepackage[utf8]{inputenc}
 23 |   %\VignetteEncoding{UTF-8}  
 24 | -->
 25 | 
 26 | 
 27 | Load [example data](Data.html):
 28 | 
 29 | ```{r networks1, message=FALSE, warning=FALSE}
 30 | library(microbiome)
 31 | data(dietswap)
 32 | pseq <- dietswap
 33 | 
 34 | # Keep only the prevalent taxa to speed up examples
 35 | pseq <- core(pseq, detection = 5^2, prevalence = 80/100)
 36 | pseq <- subset_samples(pseq, nationality == "AFR" & group == "DI" & bmi_group == "lean")
 37 | ```
 38 | 
 39 | 
 40 | ### Taxonomic network reconstruction 
 41 | 
 42 | See the [phyloseq tutorial](http://joey711.github.io/phyloseq/plot_network-examples) for
 43 | additional network visualization tools.
 44 | 
 45 | The widely reported compositionality bias in similarity measures can
 46 | be fixed with SpiecEasi or SparCC; the implementations are available
 47 | via the [SpiecEasi package](https://github.com/zdk123/SpiecEasi). Note
 48 | that the execution is slow.
 49 | 
 50 | ```{r networks4, warning=FALSE, message=FALSE, fig.width=10, fig.height=10}
 51 | # Pick the OTU table
 52 | library(phyloseq)
 53 | otu <- abundances(pseq)
 54 | ```
 55 | 
 56 | 
 57 | ```{r spieceasi, warning=FALSE, message=FALSE, fig.width=10, fig.height=10, eval=TRUE}
 58 | # SPIEC-EASI network reconstruction
 59 | # In practice, use more repetitions
 60 | library(devtools)
 61 | #install_github("zdk123/SpiecEasi")
 62 | library(SpiecEasi) #install_github("zdk123/SpiecEasi")
 63 | net <- spiec.easi(t(otu), method='mb', lambda.min.ratio=1e-2, nlambda=5, icov.select.params=list(rep.num=1))
 64 | 
 65 | ## Create graph object
 66 | n <- net$refit
 67 | #colnames(n) <- rownames(n) <- rownames(otu)
 68 | 
 69 | # Network format
 70 | library(network)
 71 | #netw <- network(as.matrix(n), directed = FALSE)
 72 | 
 73 | # igraph format
 74 | library(igraph)
 75 | # ig <- graph.adjacency(n, mode='undirected', add.rownames = TRUE)
 76 | 
 77 | # Network layout
 78 | # coord <- layout.fruchterman.reingold(ig)
 79 | 
 80 | ## set size of vertex to log2 mean abundance 
 81 | # vsize <- log2(rowMeans(otu))
 82 | 
 83 | # Visualize the network
 84 | # print(plot(ig, layout = coord, vertex.size = vsize, vertex.label = names(vsize)))
 85 | ```
 86 | 
 87 | 
 88 | Investigate degree distribution with the following:
 89 | 
 90 | ```{r degree, warning=FALSE, message=FALSE, fig.width=10, fig.height=7, eval=FALSE}
 91 | #dd <- degree.distribution(ig)
 92 | #plot(0:(length(dd)-1), dd, ylim = c(0,.35), type = 'b',  ylab = "Frequency", xlab = "Degree", main = "Degree Distributions")
 93 | ```
 94 | 
 95 | 
 96 | Visualize the network with [ggnet2](https://briatte.github.io/ggnet):
 97 | 
 98 | ```{r networks5, warning=FALSE, message=FALSE, fig.width=12, fig.height=7, eval = TRUE}
 99 | library(GGally)
100 | #library(ggnet)
101 | library(network)
102 | library(sna)
103 | library(ggplot2)
104 | library(intergraph) # ggnet2 works also with igraph with this
105 | 
106 | phyla <- map_levels(rownames(otu),
107 |            from = "Genus", to = "Phylum",
108 |            tax_table(pseq))
109 | 
110 | #netw %v% "Phylum" <- phyla
111 | #p <- ggnet2(netw, color = "Phylum", label = TRUE, label.size = 2)
112 | ```
113 | 
114 | 
115 | ```{r networks5_plot, warning=FALSE, message=FALSE, fig.width=12, fig.height=7, eval = TRUE}
116 | print(p)
117 | ```
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/Ordination.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Ordination analysis"
  3 | author: "Leo Lahti, Sudarshan Shetty et al."
  4 | bibliography: 
  5 | - bibliography.bib
  6 | output:
  7 |   BiocStyle::html_document:
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 4
 11 |     toc_float: true
 12 |     self_contained: true
 13 |     thumbnails: true
 14 |     lightbox: true
 15 |     gallery: true
 16 |     use_bookdown: false
 17 |     highlight: haddock
 18 | ---
 19 | <!--
 20 |   %\VignetteEngine{knitr::rmarkdown}
 21 |   %\VignetteIndexEntry{microbiome tutorial - ordination}
 22 |   %\usepackage[utf8]{inputenc}
 23 |   %\VignetteEncoding{UTF-8}  
 24 | -->
 25 | 
 26 | 
 27 | 
 28 | 
 29 | Full examples for standard ordination techniques applied to phyloseq data, based on the [phyloseq ordination tutorial](http://joey711.github.io/phyloseq/plot_ordination-examples.html). For handy wrappers for some common ordination tasks in microbiome analysis, see [landscaping examples](Landscaping.html)
 30 | 
 31 | 
 32 | Load example data:
 33 | 
 34 | ```{r ordination1, message=FALSE, warning=FALSE, eval=TRUE}
 35 | library(microbiome)
 36 | library(phyloseq)
 37 | library(ggplot2)
 38 | data(dietswap)
 39 | pseq <- dietswap
 40 | 
 41 | # Convert to compositional data
 42 | pseq.rel <- microbiome::transform(pseq, "compositional")
 43 | 
 44 | # Pick core taxa with with the given prevalence and detection limits
 45 | pseq.core <- core(pseq.rel, detection = .1/100, prevalence = 90/100)
 46 | 
 47 | # Use relative abundances for the core
 48 | pseq.core <- microbiome::transform(pseq.core, "compositional")
 49 | ```
 50 | 
 51 | 
 52 | ## Sample ordination
 53 | 
 54 | Project the samples with the given method and dissimilarity measure. 
 55 | 
 56 | ```{r ordination2, message=FALSE, warning=FALSE, results="hide"}
 57 | # Ordinate the data
 58 | set.seed(4235421)
 59 | # proj <- get_ordination(pseq, "MDS", "bray")
 60 | ord <- ordinate(pseq, "MDS", "bray")
 61 | ```
 62 | 
 63 | 
 64 | ## Multidimensional scaling (MDS / PCoA)
 65 | 
 66 | ```{r ordination-ordinate23, warning=FALSE, message=FALSE, fig.width=8, fig.height=6, fig.show="hold", out.width="400px"}
 67 | plot_ordination(pseq, ord, color = "nationality") +
 68 |                 geom_point(size = 5)
 69 | ```
 70 | 
 71 | 
 72 | ## Canonical correspondence analysis (CCA)
 73 | 
 74 | ```{r ordination-ordinate24a, warning=FALSE, message=FALSE, fig.width=8, fig.height=6, fig.show="hold", out.width="400px"}
 75 | # With samples
 76 | pseq.cca <- ordinate(pseq, "CCA")
 77 | p <- plot_ordination(pseq, pseq.cca,
 78 |        type = "samples", color = "nationality")
 79 | p <- p + geom_point(size = 4)
 80 | print(p)
 81 | 
 82 | # With taxa:
 83 | p <- plot_ordination(pseq, pseq.cca,
 84 |        type = "taxa", color = "Phylum")
 85 | p <- p + geom_point(size = 4)
 86 | print(p)
 87 | ```
 88 | 
 89 | 
 90 | ## Split plot
 91 | 
 92 | ```{r ordination-ordinate25, warning=FALSE, message=FALSE, fig.width=14, fig.height=5}
 93 | plot_ordination(pseq, pseq.cca,
 94 | 		      type = "split", shape = "nationality", 
 95 |     		      color = "Phylum", label = "nationality")
 96 | ```
 97 | 
 98 | 
 99 | 
100 | ## t-SNE
101 | 
102 | t-SNE is a popular new ordination technique.
103 | 
104 | ```{r tsne, warning=FALSE, message=FALSE, fig.width=14, fig.height=5}
105 | library(vegan)
106 | library(microbiome)
107 | library(Rtsne) # Load package
108 | set.seed(423542)
109 | 
110 | method <- "tsne"
111 | trans <- "hellinger"
112 | distance <- "euclidean"
113 | 
114 | # Distance matrix for samples
115 | ps <- microbiome::transform(pseq, trans)
116 | 
117 | # Calculate sample similarities
118 | dm <- vegdist(otu_table(ps), distance)
119 | 
120 | # Run TSNE
121 | tsne_out <- Rtsne(dm, dims = 2) 
122 | proj <- tsne_out$Y
123 | rownames(proj) <- rownames(otu_table(ps))
124 | 
125 | library(ggplot2)
126 | p <- plot_landscape(proj, legend = T, size = 1) 
127 | print(p)
128 | ```
129 | 
130 | 


--------------------------------------------------------------------------------
/Output.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Generating output files"
 3 | author: "Leo Lahti, Sudarshan Shetty et al."
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock 
18 | 
19 | ---
20 | <!--
21 |   %\VignetteEngine{knitr::rmarkdown}
22 |   %\VignetteIndexEntry{microbiome tutorial - output}
23 |   %\usepackage[utf8]{inputenc}
24 |   %\VignetteEncoding{UTF-8}  
25 | -->
26 | 
27 | 
28 | ## Writing diversity table into file
29 | 
30 | ```{r diversity-example4, warning=FALSE, eval = FALSE}
31 | output.dir <- "./"
32 | write.table(div.table, file = "DiversityTable.tab", sep = "\t")
33 | ```
34 | 
35 | ## Save clustering image to a file
36 | 
37 | Save in PDF:
38 | 
39 | ```{r clustering-saving, message=FALSE, warning=FALSE, eval=FALSE}
40 | pdf("myplot.pdf", width = 7, height = 7 * length(hc$order)/20)
41 | plot(hc, hang=-1, main = "Hierarchical clustering")
42 | dev.off()
43 | ```
44 | 
45 | Save in TIFF:
46 | 
47 | ```{r clustering-saving2, message=FALSE, warning=FALSE, eval=FALSE}
48 | tiff("myplot.tif", width = 480, height = 480 * length(hc$order)/20)
49 | plot(hc, hang=-1, main = "Hierarchical clustering")
50 | dev.off()
51 | ```
52 | 
53 | To save in Microsoft EMF format, try the following. If you find a
54 | way to tune figure width for emf files kindly let the admins know.
55 | 
56 | ```{r clustering-saving3, message=FALSE, warning=FALSE, eval=FALSE}
57 | plot(hc, hang=-1, main = "Hierarchical clustering")
58 | savePlot("myplot.emf", type = "emf")
59 | dev.off()
60 | ```
61 | 


--------------------------------------------------------------------------------
/PERMANOVA.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Group-wise comparisons of microbiome composition"
  3 | author: "Leo Lahti, Sudarshan Shetty et al."
  4 | bibliography: 
  5 | - bibliography.bib
  6 | output:
  7 |   BiocStyle::html_document:
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 4
 11 |     toc_float: true
 12 |     self_contained: true
 13 |     thumbnails: true
 14 |     lightbox: true
 15 |     gallery: true
 16 |     use_bookdown: false
 17 |     highlight: haddock    
 18 | ---
 19 | <!--
 20 |   %\VignetteEngine{knitr::rmarkdown}
 21 |   %\VignetteIndexEntry{microbiome tutorial - comparisons}
 22 |   %\usepackage[utf8]{inputenc}
 23 |   %\VignetteEncoding{UTF-8}  
 24 | -->
 25 | 
 26 | 
 27 | ## PERMANOVA for community-level multivariate comparisons
 28 | 
 29 | PERMANOVA quantifies multivariate community-level differences between
 30 | groups.
 31 | 
 32 | 
 33 | Load example data:
 34 | 
 35 | ```{r boxplot-example, warning=FALSE, message=FALSE}
 36 | # Load libraries
 37 | library(microbiome)
 38 | library(ggplot2)
 39 | library(dplyr)
 40 | 
 41 | # Probiotics intervention example data 
 42 | data(peerj32) # Source: https://peerj.com/articles/32/
 43 | pseq <- peerj32$phyloseq # Rename the example data
 44 | 
 45 | # Pick relative abundances (compositional) and sample metadata 
 46 | pseq.rel <- microbiome::transform(pseq, "compositional")
 47 | otu <- abundances(pseq.rel)
 48 | meta <- meta(pseq.rel)
 49 | ```
 50 | 
 51 | 
 52 | ## Visualize microbiome variation
 53 | 
 54 | Visualize the population density and highlight sample groups (probiotic treatment LGG  vs Placebo):
 55 | 
 56 | ```{r comparisons_permanova_visu, error=FALSE, message=FALSE, warnings=FALSE}
 57 | p <- plot_landscape(pseq.rel, method = "NMDS", distance = "bray", col = "group", size = 3)
 58 | print(p)
 59 | ```
 60 | 
 61 | 
 62 | ## PERMANOVA significance test for group-level differences
 63 | 
 64 | Now let us evaluate whether the group (probiotics vs. placebo) has a
 65 | significant effect on overall gut microbiota composition. Perform PERMANOVA: 
 66 | 
 67 | ```{r comparisons_permanova_analyse, message=FALSE, warnings=FALSE}
 68 | # samples x species as input
 69 | library(vegan)
 70 | permanova <- adonis(t(otu) ~ group,
 71 |                data = meta, permutations=99, method = "bray")
 72 | 
 73 | # P-value
 74 | print(as.data.frame(permanova$aov.tab)["group", "Pr(>F)"])
 75 | ```
 76 | 
 77 | 
 78 | ## Checking the homogeneity condition
 79 | 
 80 | Check that variance homogeneity assumptions hold (to ensure the reliability of the results):
 81 | 
 82 | 
 83 | ```{r comparisons-permanova2, message=FALSE, warnings=FALSE}
 84 | # Note the assumption of similar multivariate spread among the groups
 85 | # ie. analogous to variance homogeneity
 86 | # Here the groups have signif. different spreads and
 87 | # permanova result may be potentially explained by that.
 88 | dist <- vegdist(t(otu))
 89 | anova(betadisper(dist, meta$group))
 90 | ```
 91 | 
 92 | ## Investigate the top factors
 93 | 
 94 | Show coefficients for the top taxa separating the groups
 95 | 
 96 | ```{r permanova_top, fig.width=5, fig.height=5, message=FALSE, error=FALSE, warnings=FALSE}
 97 | coef <- coefficients(permanova)["group1",]
 98 | top.coef <- coef[rev(order(abs(coef)))[1:20]]
 99 | par(mar = c(3, 14, 2, 1))
100 | barplot(sort(top.coef), horiz = T, las = 1, main = "Top taxa")
101 | ```
102 | 
103 | 


--------------------------------------------------------------------------------
/PlotDiversity.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Plot alpha diversity"
  3 | author: "Leo Lahti, Sudarshan Shetty et al."
  4 | bibliography: 
  5 | - bibliography.bib
  6 | output:
  7 |   BiocStyle::html_document:
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 4
 11 |     toc_float: true
 12 |     self_contained: true
 13 |     thumbnails: true
 14 |     lightbox: true
 15 |     gallery: true
 16 |     use_bookdown: false
 17 |     highlight: haddock
 18 | ---
 19 | <!--
 20 |   %\VignetteIndexEntry{microbiome tutorial}
 21 |   %\VignetteEngine{knitr::rmarkdown}
 22 |   %\usepackage[utf8]{inputenc}
 23 |   %\VignetteEncoding{UTF-8}
 24 | -->
 25 | 
 26 | ## Diversity plots  
 27 | 
 28 | Using the `alpha` function in `microbiome R packge` you can calculate a wide variaty of diversity indices. Comparison and visualising group based differecences or similarities is also important. Here, we show steps from calculating diversity indices using microbiome R package and visualising the differences and/or similarities between groups. A useful R package is [ggpubr](http://www.sthda.com/english/rpkgs/ggpubr/). If you have not installed it, please install it.     
 29 | 
 30 | Load libraries and data.
 31 | 
 32 | ```{r Ad-plot, warning=FALSE, message=FALSE}
 33 | 
 34 | library(microbiome)
 35 | library(ggpubr)
 36 | library(knitr)
 37 | library(dplyr)
 38 | 
 39 | data(dietswap)
 40 | pseq <- dietswap
 41 | ```
 42 | 
 43 | 
 44 | ## Alpha diversity
 45 | 
 46 | This returns a table with selected diversity indicators. Check a separate page on [Alpha](Diversity.html) for other functions.
 47 | 
 48 | ```{r Ad-plot1, warning=FALSE, message=FALSE, results="asis"}
 49 | 
 50 | ps1 <- prune_taxa(taxa_sums(pseq) > 0, pseq)
 51 | 
 52 | tab <- microbiome::alpha(ps1, index = "all")
 53 | kable(head(tab))
 54 | 
 55 | ```
 56 | 
 57 | 
 58 | ## Prepare data for vizualisation
 59 | Now, get the metadata (sample_data) from the `phyloseq` object
 60 | 
 61 | ```{r Ad-plot2, warning=FALSE, message=FALSE, results="asis"}
 62 | 
 63 | ps1.meta <- meta(ps1)
 64 | kable(head(ps1.meta))
 65 | 
 66 | ```
 67 | 
 68 | Add the diversity table to metadata
 69 | 
 70 | ```{r Ad-plot3, warning=FALSE, message=FALSE}
 71 | 
 72 | ps1.meta$Shannon <- tab$diversity_shannon 
 73 | ps1.meta$InverseSimpson <- tab$diversity_inverse_simpson
 74 | ```
 75 | 
 76 | Let's say we want to compare differences in Shannon index between bmi group of the study subjects.  
 77 | 
 78 | ```{r Ad-plot4, warning=FALSE, message=FALSE}
 79 | 
 80 | # create a list of pairwise comaprisons
 81 | bmi <- levels(ps1.meta$bmi_group) # get the variables
 82 | 
 83 | # make a pairwise list that we want to compare.
 84 | bmi.pairs <- combn(seq_along(bmi), 2, simplify = FALSE, FUN = function(i)bmi[i])
 85 | 
 86 | print(bmi.pairs)
 87 | ```
 88 | 
 89 | Sometimes that variables can be stored as characters and sometime as factors. In such a senario `levels()` may return an empty vector. A work around for this can be found [here](https://github.com/microbiome/microbiome/issues/143).   
 90 | 
 91 | 
 92 | ## Violin plot  
 93 | 
 94 | Using `ggpubr` a violin plot will be created
 95 | 
 96 | ```{r Ad-plot5, warning=FALSE, message=FALSE, fig.width=8, fig.height=6, eval=FALSE}
 97 | #ps1.meta$'' <- alpha(ps1, index = 'shannon')
 98 | p1 <- ggviolin(ps1.meta, x = "bmi_group", y = "Shannon",
 99 |  add = "boxplot", fill = "bmi_group", palette = c("#a6cee3", "#b2df8a", "#fdbf6f")) 
100 | print(p1)
101 | ```
102 | ![violin plot](Rplotvio1.jpeg)
103 | 
104 | ## Statistics  
105 | 
106 | Pairwise comparision using non-parametric test (Wilcoxon test).
107 | 
108 | ```{r Ad-plot6, warning=FALSE, message=FALSE, eval=FALSE}
109 | p1 <- p1 + stat_compare_means(comparisons = bmi.pairs) 
110 | print(p1)
111 | ```
112 | ![violin for comparison](Rplotvio2.jpeg)
113 | 
114 | 
115 | For more information and useful tips and suggestions check the [Statistical tools for high-throughput data analysis](http://www.sthda.com/english/rpkgs/ggpubr/).  
116 | 
117 | 


--------------------------------------------------------------------------------
/RDA.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "RDA"
  3 | author: "Leo Lahti, Sudarshan Shetty et al."
  4 | bibliography: 
  5 | - bibliography.bib
  6 | output:
  7 |   BiocStyle::html_document:
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 4
 11 |     toc_float: true
 12 |     self_contained: true
 13 |     thumbnails: true
 14 |     lightbox: true
 15 |     gallery: true
 16 |     use_bookdown: false
 17 |     highlight: haddock
 18 | ---
 19 | <!--
 20 |   %\VignetteEngine{knitr::rmarkdown}
 21 |   %\VignetteIndexEntry{microbiome tutorial - rda}
 22 |   %\usepackage[utf8]{inputenc}
 23 |   %\VignetteEncoding{UTF-8}  
 24 | -->
 25 | 
 26 | 
 27 | ## RDA analysis and visualization. 
 28 | 
 29 | NOTE: These functions have unresolved issues and many
 30 | dependencies. They will require thorough revision before inclusion to
 31 | the package is possible.
 32 | 
 33 | Load the package and example data:
 34 | 
 35 | ```{r rda, warning=FALSE, message=FALSE, eval=FALSE}
 36 | library(microbiome)
 37 | data(peerj32) # Data from https://peerj.com/articles/32/
 38 | pseq <- peerj32$phyloseq # phyloseq data
 39 | 
 40 | # Only check the core taxa to speed up examples
 41 | pseq <- core(pseq, detection = 10^2, prevalence = 95/100)
 42 | 
 43 | pseq.trans <- transform(pseq, "hell") # Hellinger transform
 44 | ```
 45 | 
 46 | 
 47 | ## Bagged RDA
 48 | 
 49 | Bagged RDA provides added robustness in the analysis compared to the standard RDA. Fit bagged (bootstrap aggregated) RDA on a phyloseq object (alternatively you could apply it to the abundance matrix and covariates directly):
 50 | 
 51 | ```{r rda5, warning=FALSE, message=FALSE, eval=FALSE}
 52 | # In any real study, use bs.iter = 100 or higher
 53 | # to achieve meaningful benefits from the bagged version.
 54 | # In this example we use bs.iter = 2 just to speed up the
 55 | # example code for educational purposes
 56 | res <- rda_bagged(pseq.trans, "group", bs.iter=2)
 57 | ```
 58 | 
 59 | Visualizing bagged RDA:
 60 | 
 61 | ```{r rda6, warning=FALSE, message=FALSE, fig.width=8, fig.height=8, eval=FALSE}
 62 | plot_rda_bagged(res)
 63 | ```
 64 | 
 65 | 
 66 | 
 67 | ## Standard RDA 
 68 | 
 69 | Standard RDA for microbiota profiles versus the given (here 'time')
 70 | variable from sample metadata (see also the RDA method in
 71 | phyloseq::ordinate)
 72 | 
 73 | ```{r rda2, warning=FALSE, message=FALSE, eval=FALSE}
 74 | x <- pseq.trans
 75 | otu <- abundances(x)
 76 | metadata <- meta(x)
 77 | 
 78 | library(vegan)
 79 | rda.result <- vegan::rda(t(otu) ~ factor(metadata$time),
 80 |                          na.action = na.fail, scale = TRUE)
 81 | ```
 82 | 
 83 | Proportion explained by the given factor
 84 | 
 85 | ```{r rda2b, warning=FALSE, message=FALSE, eval=FALSE}
 86 | summary(rda.result)$constr.chi/summary(rda.result)$tot.chi
 87 | ```
 88 | 
 89 | 
 90 | ## RDA visualization
 91 | 
 92 | Visualize the standard RDA output.
 93 | 
 94 | ```{r rda4, warning=FALSE, message=FALSE, fig.width=8, fig.height=8, eval=FALSE}
 95 | plot(rda.result, choices = c(1,2), type = "points", pch = 15, scaling = 3, cex = 0.7, col = metadata$time)
 96 | points(rda.result, choices = c(1,2), pch = 15, scaling = 3, cex = 0.7, col = metadata$time)
 97 | pl <- ordihull(rda.result, metadata$time, scaling = 3, label = TRUE)
 98 | ```
 99 | 
100 | 
101 | ## RDA significance test
102 | 
103 | ```{r rda2bc, warning=FALSE, message=FALSE, eval=FALSE}
104 | permutest(rda.result) 
105 | ```
106 | 
107 | 
108 | ## RDA with confounding variables 
109 | 
110 | For more complex RDA scenarios, use the standard RDA available via the
111 | vegan R package.
112 | 
113 | ```{r rda3, warning=FALSE, message=FALSE, fig.width=8, fig.height=8, eval=FALSE}
114 | # Pick microbiota profiling data from the phyloseq object
115 | otu <- abundances(pseq.trans)
116 | 
117 | # Sample annotations
118 | metadata <- meta(pseq.trans)
119 | 
120 | # RDA with confounders using the vegan function
121 | rda.result2 <- vegan::rda(t(otu) ~ metadata$time + Condition(metadata$subject + metadata$gender))
122 | ```
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/Regression.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Regression"
 3 | author: "Leo Lahti, Sudarshan Shetty et al."
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 | ---
19 | <!--
20 |   %\VignetteEngine{knitr::rmarkdown}
21 |   %\VignetteIndexEntry{microbiome tutorial - regression}
22 |   %\usepackage[utf8]{inputenc}
23 |   %\VignetteEncoding{UTF-8}  
24 | -->
25 | 
26 | 
27 | ## Regression plots
28 | 
29 | Regression curve with smoothed error bars based on
30 | the [Visually-Weighted Regression](http://www.fight-entropy.com/2012/07/visually-weighted-regression.html) by Solomon M. Hsiang and Felix Schonbrodt's original code](http://www.nicebread.de/visually-weighted-watercolor-plots-new-variants-please-vote/). 
31 | 
32 | ```{r variability-regression, message=FALSE, fig.width=10, fig.height=5, warning=FALSE, dev="CairoPNG"}
33 | library(microbiome)
34 | data(atlas1006)
35 | plot_regression(diversity ~ age, meta(atlas1006))
36 | ```
37 | 
38 | 


--------------------------------------------------------------------------------
/Rplotvio1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/Rplotvio1.jpeg


--------------------------------------------------------------------------------
/Rplotvio2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/Rplotvio2.jpeg


--------------------------------------------------------------------------------
/TODO.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "TODO"
 3 | prettydoc::html_pretty:
 4 |     theme: cayman
 5 |     highlight: github
 6 | ---
 7 | <!--
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteIndexEntry{microbiome tutorial - Installation}
10 |   %\usepackage[utf8]{inputenc}
11 |   %\VignetteEncoding{UTF-8}  
12 | -->
13 | 
14 | List of issues to improve the package and tutorials:
15 | 
16 | * Merge with [microbiomeSeq]() R package
17 | * Add a list of related resources ([pathoStat]())
18 | * Lay out overall project roadmap
19 | * Consider package/tutorial collection instead of a single package
20 | 
21 | ### Missing examples
22 | 
23 |  * is_compositional
24 | 
25 | ### Related work
26 | 
27 |  * [metagenomeSeq](https://bioconductor.org/packages/release/bioc/html/metagenomeSeq.html)
28 |  * [microbiomeSeq]()
29 |  * [pathoStat]()  


--------------------------------------------------------------------------------
/Themes.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Themes"
  3 | author: "Leo Lahti, Sudarshan Shetty et al."
  4 | bibliography: 
  5 | - bibliography.bib
  6 | output:
  7 |   BiocStyle::html_document:
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 4
 11 |     toc_float: true
 12 |     self_contained: true
 13 |     thumbnails: true
 14 |     lightbox: true
 15 |     gallery: true
 16 |     use_bookdown: false
 17 |     highlight: haddock
 18 | ---
 19 | <!--
 20 |   %\VignetteEngine{knitr::rmarkdown}
 21 |   %\VignetteIndexEntry{microbiome tutorial - clustering}
 22 |   %\usepackage[utf8]{inputenc}
 23 |   %\VignetteEncoding{UTF-8}  
 24 | -->
 25 | 
 26 | 
 27 | ## Cross-plot themes
 28 | 
 29 | 
 30 | Load example data:
 31 | 
 32 | ```{r core-prevalence, message=FALSE, warning=FALSE}
 33 | # Load data
 34 | library(microbiome)
 35 | data(atlas1006)
 36 | 
 37 | # Rename the data
 38 | pseq <- subset_samples(atlas1006, DNA_extraction_method == "r")
 39 | ```
 40 | 
 41 | 
 42 | ```{r crossplot1, fig.width=6, fig.height=5, warning=FALSE, message=FALSE, fig.show="hold", out.width="300px"}
 43 | library(hrbrthemes)
 44 | library(gcookbook)
 45 | library(tidyverse)
 46 | # Themes from https://github.com/hrbrmstr/hrbrthemse
 47 | 
 48 | p <- ggplot(meta(pseq), aes(age, diversity)) +
 49 |   geom_point() +
 50 |   labs(x="Age (y)", y="Diversity (Shannon)",
 51 |        title="HITChip Atlas",
 52 |        subtitle="Age-diversity relation",
 53 |        caption="Caption shown here 'g'") 
 54 | 
 55 | 
 56 | print(p + theme_ipsum() + ggtitle("HITChip - theme_ipsum"))
 57 | print(p + theme_ipsum_rc() + ggtitle("HITChip - theme_ipsum_rc"))
 58 | ```
 59 | 
 60 | 
 61 | ## Color fill example
 62 | 
 63 | ```{r crossplot2, fig.width=6, fig.height=5, warning=FALSE, message=FALSE, fig.show="hold", out.width="300px"}
 64 | # Theme from https://github.com/hrbrmstr/hrbrthemse
 65 | p <- ggplot(uspopage, aes(x=Year, y=Thousands, fill=AgeGroup)) + 
 66 |   geom_area() +
 67 |   scale_fill_ipsum() +
 68 |   scale_x_continuous(expand=c(0,0)) +
 69 |   scale_y_comma() +
 70 |   labs(title="Age distribution of population in the U.S., 1900-2002",
 71 |        subtitle="Example data from the R Graphics Cookbook",
 72 |        caption="Source: R Graphics Cookbook") +
 73 |   theme_ipsum_rc(grid="XY") +
 74 |   theme(axis.text.x=element_text(hjust=c(0, 0.5, 0.5, 0.5, 1))) +
 75 |   theme(legend.position="bottom")
 76 | print(p)  
 77 | ```
 78 | 
 79 | ## Barplot example
 80 | 
 81 | ```{r crossplot3, fig.width=6, fig.height=5, warning=FALSE, message=FALSE, fig.show="hold", out.width="300px", eval=FALSE}
 82 | update_geom_font_defaults(font_rc_light)
 83 | df <- count(mpg, class) %>% 
 84 |   mutate(n = n*2000) %>% 
 85 |   arrange(n) %>% 
 86 |   mutate(class=factor(class, levels=class)) 
 87 | 
 88 | p <- ggplot(df, aes(class, n)) +
 89 |   geom_col() +
 90 |   geom_text(aes(label=scales::comma(n)), hjust=0, nudge_y=2000) +
 91 |   scale_y_comma(limits=c(0,150000)) +
 92 |   coord_flip() +
 93 |   labs(x="Fuel effiiency (mpg)", y="Weight (tons)",
 94 |        title="Seminal ggplot2 column chart example with commas",
 95 |        subtitle="A plot that is only useful for demonstration purposes, esp since you'd never\nreally want direct labels and axis labels",
 96 |        caption="Caption goes here") + 
 97 |   theme_ipsum_rc(grid="X")
 98 | 
 99 | print(p)
100 | ```
101 | 
102 | 
103 | ## Viridis scale
104 | 
105 | 
106 | ```{r core2, fig.width=9, fig.heigth=6, out.width="400px", warning=FALSE}
107 | # Load data
108 | library(microbiome)
109 | data(peerj32)
110 | 
111 | # Rename the data
112 | pseq <- peerj32$phyloseq
113 | 
114 | # Calculate compositional version of the data
115 | # (relative abundances)
116 | pseq.rel <- microbiome::transform(pseq, "compositional")
117 | 
118 | # With compositional (relative) abundances
119 | det <- c(0, 0.1, 0.5, 2, 5, 20)/100
120 | prevalences <- seq(.05, 1, .05)
121 | p <- plot_core(pseq.rel, prevalences = prevalences, detections = det, plot.type = "lineplot") + xlab("Relative Abundance (%)")
122 | 
123 | # Same with the viridis color palette
124 | # color-blind friendly and uniform
125 | # options: viridis, magma, plasma, inferno
126 | # https://cran.r-project.org/web/packages/viridis/vignettes/intro-to-viridis.html
127 | # Also discrete=TRUE versions available
128 | library(viridis)
129 | print(p + scale_fill_viridis())
130 | ```
131 | 


--------------------------------------------------------------------------------
/Visualization.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Visualization"
 3 | author: "Leo Lahti, Sudarshan Shetty et al."
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 | ---
19 | <!--
20 |   %\VignetteEngine{knitr::rmarkdown}
21 |   %\VignetteIndexEntry{microbiome tutorial - stability}
22 |   %\usepackage[utf8]{inputenc}
23 |   %\VignetteEncoding{UTF-8}  
24 | -->
25 | 
26 | 
27 | # Experimental functions
28 | 
29 | 
30 | **Time series for individual subjects**
31 | 
32 | ```{r homogeneity-timeseries, message=FALSE, warning=FALSE, fig.height=5, fig.height=5, eval=FALSE}
33 | source(system.file("extdata/plot_longitudinal.R", package = "microbiome"))
34 | p <- plot_longitudinal(pseq, "Dialister", subject = "831", tipping.point = 0.5)
35 | print(p)
36 | ```
37 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-dinky


--------------------------------------------------------------------------------
/ancom/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | *~
3 | 


--------------------------------------------------------------------------------
/ancom/README.md:
--------------------------------------------------------------------------------
1 | # ANCOM
2 | 
3 | This minimal example on [ANCOM](https://www.ncbi.nlm.nih.gov/pubmed/26028277) (Mandal et al. 205) can be run with main.R (in R). This generates the summary in [ancom.md](ancom.md)
4 | 
5 | 


--------------------------------------------------------------------------------
/ancom/ancom.R:
--------------------------------------------------------------------------------
  1 | library(MASS)
  2 | library(dplyr)
  3 | 
  4 | # Load functions
  5 | source("functions.R")
  6 | 
  7 | # Load example data
  8 | library(microbiome)
  9 | data(dietswap)
 10 | pseq <- dietswap
 11 | 
 12 | # Use CLR abundances
 13 | pseq <- transform(pseq, "compositional")
 14 | 
 15 | # Initialize results
 16 | groups <- c("sex", "nationality")
 17 | pvals.ancom <- matrix(NA, nrow = ntaxa(pseq),
 18 |                             ncol = length(groups))
 19 | rownames(pvals.ancom) <- taxa(pseq)
 20 | colnames(pvals.ancom) <- groups
 21 | 
 22 | 
 23 | for (gr in groups) {
 24 | 
 25 |     # Run ANCOM for comparisons within this group
 26 |     print(gr)
 27 |   
 28 |     ps <- pseq
 29 |     sample_data(ps)$group <- unlist(meta(ps)[, gr])
 30 |     ps <- subset_samples(ps, !is.na(group))  
 31 |     
 32 |     v <- ancom(ps, "group")
 33 | 
 34 |     pvals.ancom[names(v), gr] <- v
 35 |     
 36 |   }
 37 | 
 38 | print("ANCOM for loop ok")
 39 | 
 40 | # Note: multiple testing is done per group by ANCOM
 41 | # We do not apply further correction
 42 | pvals.ancom <- pvals.ancom[which(rowMeans(pvals.ancom < 0.05) > 0),]
 43 | 
 44 | # Add fold change information
 45 | library(reshape)
 46 | dfm <- melt(pvals.ancom)
 47 | names(dfm) <- c("OTU", "Grouping", "pval.ancom")
 48 | dfm$relab.cond <- rep(NA, nrow(dfm))
 49 | dfm$relab.control <- rep(NA, nrow(dfm))
 50 | dfm$log10FC <- rep(NA, nrow(dfm))
 51 | 
 52 | # Provide a table with relative abundances and log10 fold changes
 53 | # for each group
 54 | # If log10FC is infinite it means that all controls for this tax are 0
 55 | check <- c()
 56 | 
 57 | pseq.clr <- transform(pseq, "clr")  
 58 | for (i in 1:nrow(dfm)) {
 59 | 
 60 |     tax <- as.character(dfm[i, 1])
 61 |     loc <- as.character(dfm[i, 2])    
 62 |     x <- get_sample(pseq, tax)
 63 |     x.clr <- get_sample(pseq.clr, tax)    
 64 | 
 65 |     gr <- meta(pseq)[, loc]
 66 |     N <- as.numeric(gr)
 67 |     N <- N - min(N, na.rm = TRUE)
 68 |     
 69 |     cond <- mean(x[which(N == 1)], na.rm = TRUE)
 70 |     control <- mean(x[which(N == 0)], na.rm = TRUE)
 71 | 
 72 |     cond.clr <- mean(x.clr[which(N == 1)], na.rm = TRUE)
 73 |     control.clr <- mean(x.clr[which(N == 0)], na.rm = TRUE)
 74 |     
 75 |     dfm[i,"relab.cond"]    <- 100 * cond
 76 |     dfm[i,"relab.control"] <- 100 * control
 77 | 
 78 |     # Check fold-changes with CLR transformed data
 79 |     dfm[i,"log10FC"] <- log10(exp(cond.clr - control.clr))
 80 | 
 81 |     # Verify that comp and clr abundance FC are consistent
 82 |     check <- rbind(check,
 83 |                 c(
 84 | 		    cond.clr = cond.clr,
 85 | 		    control.clr = control.clr,
 86 | 		    cond = cond,
 87 | 		    control = control,		    
 88 | 		    clr.fc = log10(exp(cond.clr - control.clr)),
 89 |     	           comp.fc = log10(cond/control)
 90 | 		   
 91 |                    ))
 92 |     
 93 | }
 94 |   
 95 | 
 96 | res.ancom <- dfm %>% arrange(pval.ancom) %>% filter(log10FC < 1)
 97 | 
 98 | 
 99 | 
100 | print("ANCOM ok")
101 | 
102 | 


--------------------------------------------------------------------------------
/ancom/ancom.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Group comparisons at the level of individual taxonomic groups"
 3 | author: "Author"
 4 | date: "`r date()`"
 5 | output: md_document
 6 | ---
 7 | 
 8 | ```{r init, echo=FALSE, warning=FALSE, message=FALSE}
 9 | library(knitr)
10 | theme_set(theme_bw(20))
11 | opts_chunk$set(fig.width=10, fig.height=10, message=FALSE, warning=FALSE)
12 | opts_chunk$set(dev="CairoPNG")
13 | padj.th <- 0.25
14 | ```
15 | 
16 | # ANCOM analysis
17 | 
18 | Significant taxa (regarding the grouping). Only the hits with adjusted p<`r padj.th` are shown. Adjustment is done per group based on ANCOM defaults.
19 | 
20 | The log10 fold changes have been calculated from CLR-transformed abundances.
21 | 
22 | ```{r deseq2, echo=FALSE, warning=FALSE, message=FALSE}
23 | tab <- subset(res.ancom, pval.ancom < 0.05) %>% arrange(desc(abs(log10FC)))
24 | tab$relab.cond <- round(tab$relab.cond, 2)
25 | tab$relab.control <- round(tab$relab.control, 2)
26 | kable(tab)
27 | ```
28 | 
29 | 
30 | Illustration of the significant genera. CLR transformed abundances (just first 3 shown).
31 | 
32 | ```{r tops, echo=FALSE, warning=FALSE, message=FALSE, fig.show="hold", out.width = "20%", fig.height=6, fig.width=5}
33 | library(ggbeeswarm)
34 | theme_set(theme_bw(20))
35 | df <- meta(pseq)
36 | ps <- transform(pseq, "clr")
37 | tab <- subset(res.ancom, pval.ancom < 0.05) %>% arrange(desc(abs(log10FC)))
38 | for (i in 1:min(nrow(tab), 3)) {
39 |   tax <- as.character(res.ancom[i, "OTU"])
40 |   group <- as.character(res.ancom[i, "Grouping"]    )
41 |   df <- meta(ps)
42 |   df$Group <- as.factor(df[, gsub("/", "\\.", group)])
43 |   df$Abundance <- abundances(ps)[tax, ]
44 |   df <- subset(df, !is.na(Group))
45 |   p <- ggplot(df, aes(x = Group, y = Abundance)) +
46 |          #geom_boxplot() +
47 | 	 geom_beeswarm() +
48 | 	 #geom_jitter(width = 0.25, alpha = 0.5) + 	 
49 | 	 # scale_y_log10() +
50 | 	 #coord_flip() +
51 | 	 theme(axis.text.x = element_text(angle = 45, vjust = 0.5)) + 
52 | 	 labs(x = "",
53 | 	      y = "Abundance (CLR)",
54 | 	      title = paste(tax),
55 | 	      subtitle = group)
56 |   print(p)
57 | }
58 | ```
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/ancom/figure/tops-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/ancom/figure/tops-1.png


--------------------------------------------------------------------------------
/ancom/figure/tops-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/ancom/figure/tops-2.png


--------------------------------------------------------------------------------
/ancom/figure/tops-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/ancom/figure/tops-3.png


--------------------------------------------------------------------------------
/ancom/main.R:
--------------------------------------------------------------------------------
1 | # Run analysis with example data
2 | source("ancom.R")
3 | 
4 | # Generate summaries
5 | library(knitr)
6 | knitr::knit("ancom.Rmd")
7 | 


--------------------------------------------------------------------------------
/archive/Crosshyb.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Crosshyb"
 3 | author: "Leo Lahti"
 4 | date: "`r Sys.Date()`"
 5 | bibliography: 
 6 | - bibliography.bib
 7 | 
 8 | output: 
 9 |   rmarkdown::html_vignette
10 | ---
11 | <!--
12 |   %\VignetteEngine{knitr::rmarkdown}
13 |   %\VignetteIndexEntry{microbiome tutorial - crosshyb}
14 |   %\usepackage[utf8]{inputenc}
15 |   %\VignetteEncoding{UTF-8}  
16 | -->
17 | 
18 | ## Visualizing cross-hybridization
19 | 
20 | To visualize cross-hybridization between selected taxa on HITChip (or
21 | other chips), use the following scripts. By default the groups with no
22 | cross-hyb are filtered out for clarity. Rows and columns are ordered
23 | by hierarchical clustering. The cross-hyb is shown in percentages,
24 | rounded as indicated by the rounding argument. The percentage
25 | indicates which fraction of probes for a row taxon overlaps with
26 | probes of a column taxon. This is not symmetric if the row and col
27 | taxon have a different total number of probes. For details, see
28 | help(plot_crosshyb).
29 | 
30 | ```{r chyb, warning=FALSE, fig.width=20, fig.height=20}
31 | library(microbiome, quietly = TRUE)
32 | library(dplyr)
33 | 
34 | # Pick the phylogeny which was used to summarize probes to species level
35 | tax.table <- get_hitchip_taxonomy("HITChip", "full")
36 | 
37 | # Check cross-hyb between all L2 groups
38 | res <- plot_crosshyb(tax.level = "L2", rounding = 1, show.plot = FALSE, tax.table = tax.table)
39 |     
40 | # Pick the crosshyb table and figure
41 | crosshyb.table <- res$data
42 | p <- res$plot
43 | 
44 | # Plot the figure    
45 | print(p)
46 | 
47 | # Organize the Crosshyb table
48 | suppressMessages(library(dplyr))
49 | s <- filter(res$data, crosshyb > 0)
50 | s <- s[rev(order(s$crosshyb)),]
51 | head(s)
52 | ```
53 | 
54 | 
55 | ### Further examples
56 | 
57 | Investigate species-species cross-hybridization within the Dialister L2 group
58 | 
59 | ```{r chyb2, warning=FALSE}
60 | # Select species belonging to Dialister L2 group
61 | mytaxa <- map_levels("Dialister", from = "L2", to = "species", tax.table)[[1]]
62 | 
63 | # Check cross-hyb between Dialister species
64 | res <- plot_crosshyb(tax.level = "species", selected.taxa = mytaxa, rounding = 0, tax.table = tax.table)
65 | 
66 | # Check the cross-hyb data as well
67 | library(knitr)
68 | kable(head(res$data))
69 | ```
70 | 
71 | 


--------------------------------------------------------------------------------
/archive/DiversityTable.tab:
--------------------------------------------------------------------------------
 1 | "evenness"	"richness"	"diversity"
 2 | "Sample.1"	0.839046123101901	918	6.01587586771419
 3 | "Sample.2"	0.810739894076034	728	5.75936452810286
 4 | "Sample.3"	0.85349658142741	990	6.13728643981587
 5 | "Sample.4"	0.80880377585622	698	5.63006638760482
 6 | "Sample.5"	0.804770162759795	661	5.59598750340119
 7 | "Sample.6"	0.818618485682415	334	5.32788428995849
 8 | "Sample.7"	0.828834270972343	849	5.81993383442343
 9 | "Sample.8"	0.813447510065929	403	5.21496684971292
10 | "Sample.9"	0.827701107735919	757	5.75403174217837
11 | "Sample.10"	0.85652952898239	868	6.01197293131539
12 | "Sample.11"	0.84061172939295	727	5.92423984049287
13 | "Sample.12"	0.852296304462099	864	5.98017494112037
14 | "Sample.13"	0.776055940729176	307	4.92336081081147
15 | "Sample.14"	0.762121306552714	166	4.68350349000648
16 | "Sample.15"	0.818048201679392	314	5.32965300142074
17 | "Sample.16"	0.711330612392343	249	6.64434948926102
18 | "Sample.17"	0.812135444880361	849	5.92982179142138
19 | "Sample.18"	0.792626929117248	516	5.48986635888394
20 | "Sample.19"	0.804225756552546	414	5.28372396308021
21 | "Sample.20"	0.851672544379265	1112	6.20427213572866
22 | 


--------------------------------------------------------------------------------
/archive/DynamicsIBD.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/archive/DynamicsIBD.rda


--------------------------------------------------------------------------------
/archive/F1000/MyArticle/F1000header.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/archive/F1000/MyArticle/F1000header.png


--------------------------------------------------------------------------------
/archive/F1000/MyArticle/f1000_styles.sty:
--------------------------------------------------------------------------------
  1 | \ProvidesPackage{f1000_styles}
  2 | % Packages
  3 | \usepackage{authblk}
  4 | \usepackage[english]{babel}
  5 | \usepackage[utf8]{inputenc}
  6 | \usepackage[T1]{fontenc}
  7 | \usepackage[bitstream-charter]{mathdesign}
  8 | \usepackage{colortbl}
  9 | \usepackage[usenames,dvipsnames,table]{xcolor}
 10 | \usepackage{amsmath}
 11 | \usepackage{graphicx}
 12 | \usepackage{fancyhdr}
 13 | \usepackage{setspace}
 14 | \usepackage[labelsep=period,justification=justified]{caption}
 15 | \usepackage{lastpage}
 16 | \usepackage{xifthen}
 17 | \usepackage{todonotes}
 18 | \usepackage[hmargin=1.8cm,vmargin=2.2cm]{geometry}
 19 | 
 20 | % Define the versionDate and flogo commands
 21 | \newcommand{\versionDate}{\footnotesize \textit{F1000Research} 2016 - DRAFT ARTICLE (PRE-SUBMISSION)}
 22 | \newcommand{\flogo}{\includegraphics[height=12pt]{F1000header.png}}
 23 |            
 24 | % Section heading styles
 25 | \usepackage[compact]{titlesec} 
 26 | 
 27 | \titleformat*{\section}{\Large\usefont{OT1}{phv}{b}{n}\color{darkgray}}
 28 | \titleformat*{\subsection}{\large\usefont{OT1}{phv}{b}{n}\color{MidnightBlue}}
 29 | \titleformat*{\subsubsection}{\large\usefont{OT1}{phv}{b}{n}\color{MidnightBlue}}
 30 | 
 31 | % Section heading spacing (reduced space below headings)
 32 | \titlespacing\section{0pt}{3.5ex plus 1.2ex minus .2ex}{0ex}
 33 | \titlespacing\subsection{0pt}{3.25ex plus 1.2ex minus .2ex}{0ex}
 34 | \titlespacing\subsubsection{0pt}{3.25ex plus 1.2ex minus .2ex}{0ex}
 35 | 
 36 | % Caption style
 37 | \captionsetup{labelfont={color=red,bf},textfont={color=black,bf}}
 38 | 
 39 | % Table style
 40 | \definecolor{DarkKhaki}{rgb}{0.74,0.72,0.42}
 41 | \colorlet{tableheadcolor}{DarkKhaki} % Table header colour = 25% gray
 42 | \newcommand{\header}{\rowcolor{tableheadcolor}} %
 43 | \colorlet{tablerowcolor}{DarkKhaki!50} % Table row separator colour = 10% gray
 44 | \newcommand{\row}{\rowcolor{tablerowcolor}} %
 45 | \newenvironment{tabledata}[1][1]{%
 46 |   \renewcommand*{\extrarowheight}{0.1cm}%
 47 |   \tabular%
 48 | }{%
 49 |   \endtabular
 50 | }
 51 | 
 52 | % Define a title note command
 53 | \newcommand\titlenote[1]{\renewcommand\@titlenote{#1}}
 54 | \newcommand\@titlenote{}
 55 | 
 56 | % Column separation
 57 | \setlength{\columnsep}{0.25in}
 58 | 
 59 | % Page margins, headers and footers
 60 | % 1 For the main body of the article
 61 | \fancypagestyle{main}{
 62 | \newgeometry{hmargin=3.3cm,vmargin=2.2cm}
 63 | \fancyheadoffset{1.5cm} % Set header width to match front pages
 64 | \fancyfootoffset{1.5cm} % Set footer width to match front pages
 65 | \setlength{\parindent}{0pt}
 66 | \setlength{\headheight}{16.5pt}
 67 | \renewcommand{\headrulewidth}{0pt}
 68 | \renewcommand{\footrulewidth}{0pt}
 69 | \lhead{}
 70 | \chead{}
 71 | \rhead{\versionDate}
 72 | \lfoot{}
 73 | \cfoot{}
 74 | \rfoot{\footnotesize Page \thepage\ of \pageref{LastPage}}
 75 | }
 76 | 
 77 | % 2 For the front page(s)
 78 | \fancypagestyle{front}{
 79 | \newgeometry{hmargin=1.8cm,vmargin=2.2cm}
 80 | \setlength{\parindent}{0pt}
 81 | \setlength{\headheight}{16.5pt}
 82 | \renewcommand{\footrulewidth}{1pt}
 83 | \lhead{\textsc{\flogo}}
 84 | \chead{}
 85 | \rhead{\versionDate}
 86 | \lfoot{}
 87 | \cfoot{}
 88 | \rfoot{\footnotesize Page \thepage\ of \pageref{LastPage}}
 89 | }
 90 | 
 91 | %
 92 | \renewenvironment{abstract}{%
 93 | \usefont{OT1}{phv}{m}{n}
 94 | \begin{minipage}{0.65\textwidth}
 95 | \rule{\textwidth}{1pt}\\ \textcolor{Orange}{\textbf{\abstractname}}}
 96 | {\par\noindent\rule{\textwidth}{1pt}\end{minipage}}
 97 | %
 98 | \makeatletter
 99 | \renewcommand\@maketitle{%
100 | \begin{minipage}{0.95\textwidth}
101 | \vskip 2em
102 | \let\footnote\thanks 
103 | {\fontsize{20}{22}\usefont{OT1}{phv}{b}{n} \@title \par {\small{\@titlenote}} }
104 | \vskip 1.5em
105 | {\@author \par}
106 | \end{minipage}
107 | \vskip 1em \par
108 | }
109 | \makeatother
110 | \renewcommand\Authfont{\fontsize{16}{18}\usefont{OT1}{phv}{b}{n}}
111 | \renewcommand\Affilfont{\fontsize{10}{12}\usefont{OT1}{phv}{b}{n}}


--------------------------------------------------------------------------------
/archive/F1000/MyArticle/frog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/archive/F1000/MyArticle/frog.jpg


--------------------------------------------------------------------------------
/archive/F1000/MyArticle/sample.bib:
--------------------------------------------------------------------------------
 1 | @BOOK{Smith:2012qr,
 2 | 	title = {{B}ook {T}itle},
 3 | 	publisher = {Publisher},
 4 | 	author = {Smith, J.~M. and Jones, A.~B.},
 5 | 	year = {2012},
 6 | 	edition = {7th},
 7 | }
 8 | 
 9 | @ARTICLE{Smith:2013jd,
10 | 	author = {Jones, A.~B. and Smith, J.~M.},
11 | 	title = {{A}rticle {T}itle},
12 | 	journal = {Journal title},
13 | 	year = {2013},
14 | 	volume = {13},
15 | 	pages = {123-456},
16 | 	number = {52},
17 | 	month = {March},
18 | 	publisher = {Publisher}
19 | }


--------------------------------------------------------------------------------
/archive/Maturity.Rmd:
--------------------------------------------------------------------------------
 1 | ### Other community indices: Microbiome maturity index (experimental)
 2 | 
 3 | The microbiota maturity index has been adapted from [Subramanian,
 4 | S. et
 5 | al. (2014)](http://www.nature.com/nature/journal/v510/n7505/abs/nature13421.html)
 6 | and [Dogra, S. et
 7 | al. (2015)](http://mbio.asm.org/content/6/1/e02419-14.short), where
 8 | microbiota maturity index has been shown to differentiate healthy
 9 | children. In [Korpela et
10 | al. (2016)](www.nature.com/ncomms/2016/160126/ncomms10410/full/ncomms10410.html)
11 | this was calculated as the first principal coordinate from a PCoA
12 | (MDS) using only significantly age-associated genus-level taxa (all
13 | groups included). In this function NMDS is used instead. The maturity
14 | index is also adjusted for age. This artificial example is based on
15 | readily available adult data set whereas the maturity index is
16 | typically calculated from and used for babies/children:
17 | 
18 | ```{r div-example, warning=FALSE, message=FALSE, fig.path = "figure/"}
19 | library(phyloseq)
20 | data("atlas1006")
21 | pseq <- atlas1006
22 | pseq <- subset_samples(pseq, DNA_extraction_method == "r" & time == 0)
23 | pseq <- transform_phyloseq(pseq, "relative.abundance")
24 | maturity.index <- maturity(pseq)
25 | ```
26 | 
27 | 


--------------------------------------------------------------------------------
/archive/Motionchart.Rmd:
--------------------------------------------------------------------------------
 1 | ### Interactive motioncharts with googleVis
 2 | 
 3 | This provides interactive charts where you can change the variables at
 4 | X/Y axis, point size and color as you like. With time series data
 5 | moving animations are also possible.
 6 | 
 7 | 
 8 | Prepare the data:
 9 | 
10 | ```{r motionchart-example, fig.width=8, fig.height=8, message=FALSE}
11 | library(microbiome)  
12 | 
13 | data(peerj32)
14 | 
15 | # Species-level data in phyloseq format
16 | pseq <- peerj32$phyloseq
17 | 
18 | # Retrieve L2 (genus-like) data in phyloseq format
19 | pseq.L2 <- aggregate_taxa(pseq, level = "L2")
20 | 
21 | # Convert L2 to matrix format
22 | genus.matrix.log10.simulated <- log10(otu_table(pseq.L2)@.Data)
23 | 
24 | # Combine phylotype profiling data and sample metadata
25 | df <- cbind(metadata.simulated, t(genus.matrix.log10.simulated))  
26 | ```
27 | 
28 | Plot a Motion Chart using googleVis - package. Note: this requires
29 | flash and internet connection. 
30 | 
31 | Form a motion chart from example data NOTE: the data set must be given
32 | as data.frame which can contain NUMERIC and CHARACTER fields (NO
33 | FACTORS, NOR LOGICAL variables!). The FIRST FOUR FIELDS must be
34 | provided in the following order: idvar, timevar, two numeric fields,
35 | then any number of numeric and character fields.
36 | 
37 | The plot shows only the first time point.  Replace the time field with
38 | a constant to plot all in one figure using df$time <- rep(1, nrow(df))
39 | 
40 | ```{r motionchart-example-2, fig.width=8, fig.height=8, message=FALSE}
41 | library(googleVis)  
42 | 
43 | # See help(gvisMotionChart) for further details
44 | mchart <- gvisMotionChart(df, idvar = "sample", timevar = "time")  
45 | 
46 | # Plot immediately (opens in browser, requires flash)
47 | plot(mchart)  
48 | ```
49 | 
50 | Save as html (needs javascript to open!). NOTE: html file viewing does not work locally - store the html file on server and view through internet:
51 | 
52 | ```{r motionchart-example2, fig.width=8, fig.height=8, eval=FALSE}
53 | print(mchart, file="~/MotionChart.html")
54 | ```
55 | 


--------------------------------------------------------------------------------
/archive/PhylotypeRatios.R:
--------------------------------------------------------------------------------
 1 | #' @title Phylotype ratios
 2 | #' @description Calculate phylotype ratios (eg. Bacteroides vs.
 3 | #'          Prevotella etc.) for a given phylotypes vs. samples matrix
 4 | #' @param dat phylotypes vs. samples data matrix in log10 scale
 5 | #' @return phylotype pairs x samples matrix indicating the ratio 
 6 | #'                 (in log10 domain) between each unique pair
 7 | #' @export 
 8 | #' @examples 
 9 | #'   data(peerj32)
10 | #'   ratios <- PhylotypeRatios(peerj32$microbes)
11 | #' @references See citation('microbiome')
12 | #' @author Leo Lahti \email{microbiome-admin@@googlegroups.com}
13 | #' @keywords utilities
14 | PhylotypeRatios <- function(dat) {
15 |     
16 |     phylogroups <- rownames(dat)
17 |     Nratios <- (length(phylogroups)^2 - length(phylogroups))/2
18 |     Nsamples <- ncol(dat)
19 |     ratios <- list()
20 |     for (i in 1:(length(phylogroups) - 1)) {
21 |         for (j in (i + 1):length(phylogroups)) {
22 |             pt1 <- phylogroups[[i]]
23 |             pt2 <- phylogroups[[j]]
24 |             ratios[[paste(pt1, pt2, sep = "-")]] <- dat[pt1, ] - dat[pt2, ]
25 |         }
26 |     }
27 |     ratios <- do.call(cbind, ratios)
28 |     
29 |     t(ratios)
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/archive/ProvasIdata.R:
--------------------------------------------------------------------------------
 1 | # This script retrieves the data from 
 2 | # Lahti et al. PeerJ 1:e32, 2013 https://peerj.com/articles/32/
 3 | # and saves it into a more compact matrix format.
 4 | 
 5 | # Download and extract the tarball (data stored in Data/ directory)::
 6 | system("wget https://dfzljdn9uc3pi.cloudfront.net/2013/32/1/SupplementalDataS2.tar.gz")
 7 | system("tar -zxvf SupplementalDataS2.tar.gz")
 8 | 
 9 | # Read HITChip genus-level intestinal microbiota-profiling data
10 | hit <- read.csv("Data/SupplementalDataHITChipS3.tab", sep = " ", skip = 1, header = FALSE, row.names = 1)
11 | 
12 | # The taxon names are unfortunately space-separated, so they need to be
13 | # specified and read in manually from the file:
14 | h <- unlist(strsplit(readLines("Data/SupplementalDataHITChipS3.tab", n = 1), " "))
15 | inds <- list(1,2,3,4,5:8,9:11,12,13,14:17,18:21,22:25,26,27,28:30,31,32,33:36,37:40,41:44,45:48,49:52,53:56,57:60,61:64,65,66:68,69,70:73,74:77,78,79:82,83,84:87,88,89:92,93:96,97:100,101:104,105:108,109:112,113:116,117:120,121:124,125:128,129:132,133:136,137,138:141,142:145,146,147:149,150,151:154,155:158,159:162,163,164:167,168:171,172:175,176:179,180:183,184:187,188:191,192:195,196:199,200,201,202,203,204,205:208,209:212,213:216,217:220,221:224,225:228,229:232,233,234,235:238,239:242,243,244,245:248,249,250,251,252:255,256:259,260:263,264:267,268:271,272:275,276:279,280:283,284:287,288:291,292:295,296:299,300:303,304,305:307,308,309:312,313:316,317:320,321:324,325:328,329:332,333,334:337,338,339:342,343:346,347:350,351:354,355:358,359:361,362:363,364:365,366:368,369:371,372:373,374:375,376,377:379,380,381:383,384,385:387)
16 | nams <- sapply(inds, function (i) {paste(h[i], collapse = " ")})
17 | 
18 | # Add the retrieved taxon names to HITChip data columns
19 | colnames(hit) <- nams
20 | 
21 | # Read Lipidomic profiling data
22 | lip <- read.csv("Data/SupplementalDataLipidsS2.tab", sep = " ", row.names = 1)
23 | lip2 <- apply(lip, 2, function (x) as.numeric(gsub("\\,", "\\.", x)))
24 | dimnames(lip2) <- dimnames(lip)
25 | 
26 | # Write to file
27 | write.table(hit, file = "~/Rpackages/microbiome/microbiome/inst/extdata/ProvasI-L2.tab", sep = ";", quote = FALSE)
28 | write.table(lip2, file = "~/Rpackages/microbiome/microbiome/inst/extdata/ProvasI-Lipid.tab", sep = ";", quote = FALSE)
29 | 
30 | # ........................
31 | 
32 | data(peerj32)
33 | # lipids microbes meta
34 | # Taxonomy
35 | taxonomy <- GetPhylogeny("HITChip", "filtered")
36 | taxonomy <- unique(as.data.frame(taxonomy[, c("L1", "L2")]))
37 | rownames(taxonomy) <- as.vector(taxonomy[, "L2"])
38 | 
39 | # Merging data matrices into phyloseq format:
40 | pseq <- hitchip2physeq(t(otu), meta, taxonomy)
41 | 
42 | round(peerj32$microbes - min(peerj32$microbes))


--------------------------------------------------------------------------------
/archive/RDA/firstlib2.R:
--------------------------------------------------------------------------------
1 | #' @importFrom ade4 s.class
2 | #' @importFrom grDevices chull
3 | 
4 | 


--------------------------------------------------------------------------------
/archive/RDA/plot_rda_bagged.R:
--------------------------------------------------------------------------------
 1 | #' @title Plot RDA
 2 | #' @description Visualize rda_bagged output.
 3 | #' @param x Output from rda_bagged
 4 | #' @param which.bac TBA
 5 | #' @param ptype Plot type. 'spider' or 'hull'
 6 | #' @param comp TBA
 7 | #' @param cex.bac Plot size.
 8 | #' @param plot.names Plot names
 9 | #' @param group.cols Group colors.
10 | #' @param ... Other arguments to be passed
11 | #' @return TBA
12 | #' @examples
13 | #' data(peerj32)
14 | #' x <- t(peerj32$microbes)
15 | #' y <- factor(peerj32$meta$time); names(y) <- rownames(peerj32$meta)
16 | #' # Use more iterations in practice
17 | #' res <- rda_bagged(x, y, bs.iter=5)
18 | #' tmp <- plot_rda_bagged(res)
19 | #' @export
20 | #' @references See citation('microbiome') 
21 | #' @author Contact: Jarkko Salojarvi \email{microbiome-admin@@googlegroups.com}
22 | #' @keywords utilities
23 | plot_rda_bagged <- function(x, which.bac=1:nrow(x$loadings),
24 |     ptype="spider", comp=1:2, cex.bac=0.5, plot.names=TRUE,
25 |     group.cols=as.numeric(unique(Y)), ...) {
26 | 
27 |     # TODO: can we speed up this function ?
28 |     # For instance by switching from for loops to vectorization etc
29 | 
30 |     y <- cluster <- x.centroid <- y.centroid <- NULL
31 |     
32 |     bag <- x  #[which(!names(x) == 'variable')] #$bagged.rda
33 |     Y <- x$variable
34 |     
35 |     scaled.loadings <- (bag$loadings/max(abs(bag$loadings)))[, comp]
36 |     scaled.scores <- (bag$scores/max(abs(bag$scores)))[, comp]
37 |     
38 |     plot(rbind(scaled.scores, scaled.loadings), type="n",
39 |         xlab=paste(names(bag$R2)[1], 
40 |         " (", format(100 * bag$R2[1], digits=2), "%)", sep=""),
41 |     ylab=paste(names(bag$R2)[2], 
42 |         " (", format(100 * bag$R2[2], digits=2), "%)", sep=""))
43 |     
44 |     if (ptype == "spider") 
45 |         s.class(scaled.scores, factor(Y), grid=FALSE, col=group.cols,
46 |         cellipse=0.5, cpoint=0, add.plot=TRUE)
47 |     
48 |     # TODO: Same with ggplot (not ready)
49 |     skip <- TRUE
50 |     if (!skip) {
51 |         # build ggplot dataframe with points (x,y) and
52 |         # corresponding groups (cluster)
53 |         gg <- as.data.frame(scaled.scores)
54 |         names(gg) <- c("x", "y")
55 |         gg$cluster <- factor(Y)
56 |         # calculate group centroid locations
57 |         centroids <- aggregate(cbind(x, y) ~ cluster, data=gg, mean)
58 |         # merge centroid locations into ggplot dataframe
59 |         gg <- merge(gg, centroids, by="cluster",
60 |         suffixes=c("", ".centroid"))
61 |         # generate star plot...
62 |         ggplot(gg) +
63 |         geom_point(aes(x=x, y=y, color=cluster), size=3) +
64 |         geom_point(data=centroids, 
65 |                 aes(x=x, y=y, color=cluster), size=4) +
66 |         geom_segment(aes(x=x.centroid, 
67 |                 y=y.centroid, xend=x, yend=y, color=cluster))
68 |     }
69 |     
70 |     if (ptype == "hull") {
71 |         
72 |         ll <- split(rownames(scaled.scores), Y)
73 |         hulls <- lapply(ll, function(ii) ii[chull(scaled.scores[ii, ])])
74 |         for (i in 1:length(hulls)) {
75 |             polygon(scaled.scores[hulls[[i]], ], border=group.cols[i])
76 |         }
77 |     }
78 |     if (plot.names) {
79 |         text(scaled.scores, rownames(scaled.scores), cex=0.5, ...)
80 |     } else {
81 |         points(scaled.scores, ...)
82 |     }
83 |     text(scaled.loadings[which.bac, ],
84 |         rownames(scaled.loadings)[which.bac], cex=cex.bac)
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/archive/ROC.Rmd:
--------------------------------------------------------------------------------
 1 | ### ROC analysis
 2 | 
 3 | A basic example of ROC/AUC analysis.
 4 | 
 5 | 
 6 | ### Load example data
 7 | 
 8 | ```{r roc-example, warning=FALSE, fig.path = "figure/"}
 9 | library(microbiome)
10 | #pseq <- download_microbiome("dietswap")
11 | data("dietswap")
12 | pseq <- dietswap
13 | 
14 | # Pick two groups of samples
15 | # African, DI group, time points 1 and 2
16 | # See the original publication for details: 
17 | # references provided in help(dietswap)
18 | pseq1 <- subset_samples(pseq, nationality == "AFR" & 
19 | 		     timepoint.within.group == 1 & 
20 | 		     group == "DI")
21 | pseq2 <- subset_samples(pseq, nationality == "AFR" & 
22 | 		     timepoint.within.group == 2 & 
23 | 		     group == "DI")
24 | 
25 | # Pick OTU matrix
26 | otu <- otu_table(pseq)@.Data
27 | 
28 | # Pick sample metadata
29 | meta <- sample_data(pseq)
30 | 
31 | # Define two sample groups for demonstration purpose
32 | g1 <- sample_names(pseq1)
33 | g2 <- sample_names(pseq2)
34 | 
35 | # Compare the two groups with Wilcoxon test
36 | pvalues <- c()
37 | for (tax in rownames(otu)) {
38 |   pvalues[[tax]] <- wilcox.test(otu[tax, g1], otu[tax, g2])$p.value
39 | }
40 | 
41 | # Assume there are some known true positives 
42 | # Here for instance Bacteroidetes
43 | bacteroidetes <- levelmap("Bacteroidetes", from = "L1", to = "L2", GetPhylogeny("HITChip", "filtered"))$Bacteroidetes
44 | ```
45 | 
46 | 
47 | ### Overall ROC analysis 
48 | 
49 | Based on the [xrobin/pROC](https://github.com/xrobin/pROC) package
50 | (see that page for more examples with confidence limits etc):
51 | 
52 | ```{r roc-example2, warning=FALSE, fig.path = "figure/"}
53 | library(pROC)
54 | res <- roc(names(pvalues) %in% bacteroidetes, pvalues)
55 | ```
56 | 
57 | 
58 | ### ROC/AUC value
59 | 
60 | ```{r roc-example3, warning=FALSE, fig.path = "figure/"}
61 | res$auc
62 | ```
63 | 
64 | 
65 | ### Plot ROC curve
66 | 
67 | ```{r roc-example4, warning=FALSE, fig.path = "figure/"}
68 | plot(res)
69 | ```
70 | 


--------------------------------------------------------------------------------
/archive/affybatch.R:
--------------------------------------------------------------------------------
  1 | data.dir <- system.file("extdata", package = "microbiome")
  2 | 
  3 |   # Read probe-level data
  4 |   f <- paste(data.dir, "/oligoprofile.tab", sep = "")
  5 |   tab <- read.csv(f, header = TRUE, sep = "\t", row.names = 1, as.is = TRUE)
  6 |   colnames(tab) <- unlist(strsplit(readLines(f, 1), "\t"))[-1]
  7 |   probedata <- as.matrix(tab)
  8 | 
  9 |   # Read taxonomy table
 10 |   f <- paste(data.dir, "/taxonomy.tab", sep = "")
 11 |   tab <- read.csv(f, header = TRUE, sep = "\t", as.is = TRUE)
 12 |   # Convert into phyloseq taxonomyTable format
 13 |   taxonomy <- tax_table(as.matrix(tab))     
 14 | 
 15 | require(affy)
 16 | 
 17 | # probesets
 18 | map <- unique(taxonomy[, c("species", "oligoID")]@.Data)
 19 | colnames(map) <- c("set", "probe")
 20 | rownames(map) <- map[, "probe"]
 21 | 
 22 | # Align the probe mappings with the data
 23 | # only use the probes that have mapping
 24 | coms <- intersect(rownames(probedata), rownames(map))
 25 | map <- map[coms,]
 26 | probedata <- probedata[coms,]
 27 | 
 28 | # Custom CDF
 29 | nsamples <- ncol(probedata)
 30 | nrow <- nrow(probedata)
 31 | ncol <- 1
 32 | sets <- unique(map[, "set"])
 33 | cdf <- lapply(sets, function(set) {
 34 |     tmp <- matrix(which(map[, "set"] == set), ncol = 1)
 35 |     colnames(tmp) <- "pm"
 36 |     return(tmp)
 37 | })
 38 | names(cdf) <- sets
 39 | cdfname <- list2env(cdf)
 40 | 
 41 | ab <- new("AffyBatch", exprs = probedata,
 42 | 		 cdfName = "cdfname", 
 43 |             	 #phenoData = phenoData,
 44 | 		 nrow = nrow,
 45 | 		 ncol = ncol
 46 |             #annotation = cleancdfname(cdfname, addcdf = FALSE), 
 47 |             #protocolData = protocol,
 48 | 	    #description = description, 
 49 |             #notes = notes
 50 | 	    )
 51 | 
 52 | eset <- exprs(rma(ab, background = FALSE, normalize = FALSE))
 53 | 
 54 | # Not sure if this takes normalization out - check
 55 | eset <- exprs(rpa(ab, bg.method = "none", normalization.method = NULL))
 56 | 
 57 | ## Check given species
 58 | #set <- sample(sets, 1)
 59 | ## Summary
 60 | #x <- eset[set,]
 61 | ## Probedata
 62 | #this.probes <- as.vector(map[map[, "set"] == set, "probe"])
 63 | #x2 <- probedata[this.probes,]
 64 | #cors <- cor(x, t(probedata), method = "spearman"); names(cors) <- rownames(probedata)
 65 | # Ranks wrt correct data
 66 | #sort(match(this.probes, rev(names(sort(cors)))))
 67 | # Ranks wrt random data
 68 | #sort(match(this.probes, rev(names(sample(cors)))))
 69 | 
 70 | # -----------------------------------------------
 71 | 
 72 | 
 73 | # Toydata example
 74 | 
 75 | nsamples <- 20
 76 | nrow <- 3
 77 | ncol <- 4
 78 | 
 79 | exprs <- matrix(runif(nrow * ncol * nsamples), nrow = nrow * ncol, ncol = nsamples)
 80 | colnames(exprs) <- paste("sample", 1:ncol(exprs), sep = "-")
 81 | 
 82 | # Custom CDF
 83 | maxIndx <- nrow * ncol
 84 | nsets <- 3
 85 | cdf <- lapply(seq(1, nsets), function(x) {
 86 |     tmp <- matrix(sample(maxIndx, 2), nrow = 2, ncol = 1)
 87 |     colnames(tmp) <- "pm"
 88 |     return(tmp)
 89 | })
 90 | names(cdf) <- paste("set", seq(1, nsets), sep = "")
 91 | cdf[["set1"]][, "pm"] <- c(2, 7)
 92 | 
 93 | v <- runif(nsamples)
 94 | exprs[2, ] <- v
 95 | exprs[7, ] <- v + runif(nsamples) * 0.01
 96 | 
 97 | cdfname <- list2env(cdf)
 98 | 
 99 | # OK !
100 | ab <- new("AffyBatch", exprs = exprs,
101 | 		 cdfName = "cdfname", 
102 |             	 #phenoData = phenoData,
103 | 		 nrow = nrow,
104 | 		 ncol = ncol
105 |             #annotation = cleancdfname(cdfname, addcdf = FALSE), 
106 |             #protocolData = protocol,
107 | 	    #description = description, 
108 |             #notes = notes
109 | 	    )
110 | 
111 | indices2xy(30, nc = ncol)
112 | xy2indices(x = 2, y = 3, nc = ncol)
113 | exprs(ab)
114 | 
115 | # Summarize
116 | # Jess !
117 | exprs(rma(ab, background = FALSE, normalize = FALSE))
118 | 
119 | ------------------------------------------
120 | 
121 | 


--------------------------------------------------------------------------------
/archive/anticorrelation.old.R:
--------------------------------------------------------------------------------
  1 | anticorrelation.old <- function(x, type = "interindividual", group_by = "group", method = "spearman") {
  2 | 
  3 |     # Pick metadata		     
  4 |     meta <- sample_data(x)
  5 | 
  6 |     if (!"sample" %in% names(meta)) {
  7 |       warning("Using the metadata rownames as the sample ID")
  8 |       meta$sample = rownames(sample_data(x))
  9 |     }
 10 | 
 11 |     # OTU data Log10
 12 |     otu <- log10(t(abundances(x)@.Data))
 13 | 
 14 |     # Ensure compatiblity
 15 |     if (!nrow(otu) == nrow(meta)) {
 16 |       otu <- t(otu)
 17 |     }
 18 | 
 19 |     if (!all(rownames(otu) == rownames(meta))) {
 20 |       stop("OTU table and metadata do not match.")
 21 |     }
 22 | 
 23 |     correlation <- NULL
 24 |  		         
 25 |     # Split the data by group
 26 |     group <- NULL
 27 |     if (!group_by %in% names(meta)) {
 28 |       stop(paste("The group_by variable", group_by, "is not included in sample_data(x)."))
 29 |     }
 30 |     datasets <- split(as.data.frame(otu), meta[[group_by]], drop = TRUE)
 31 | 
 32 |     if (type == "interindividual") {
 33 | 
 34 |       tmp <- setdiff(c("sample", group_by), names(meta))
 35 |       if (length(tmp) > 0) {
 36 |         stop(paste("The following variables needed by betadiversity function type=interindividual are 
 37 | 	            missing from sample metadata:", paste(tmp, collapse = ",")))
 38 |       }
 39 |   
 40 |       # Within-matrix stability NOTE: earlier this was calculated as
 41 |       # the average of upper triangular correlation matrix This is
 42 |       # heavily biased since the values are dependent Now replaced
 43 |       # by calculating correlations against the mean of the whole
 44 |       # sample set cors <- lower.triangle(cor(dat1))
 45 |       dfs <- NULL      
 46 |       for (ds in names(datasets)) {
 47 |         dat1 <- datasets[[ds]]
 48 |         cors <- as.vector(cor(t(dat1), matrix(colMeans(dat1)), method = method, use = "pairwise.complete.obs"))
 49 |         dfs <- rbind(dfs, data.frame(group = rep(ds, length(cors)),
 50 |                    	             sample = rownames(dat1),
 51 |   		   		     anticorrelation = 1 - cors))
 52 |       }
 53 |   
 54 |       pval <- anova(lm(anticorrelation ~ group, data = dfs))[["Pr(>F)"]][[1]]
 55 |       stats <- dfs %>% group_by(group) %>%
 56 |                        summarize(mean = mean(anticorrelation),
 57 | 		       sd = sd(anticorrelation))
 58 |       homogeneity <- list(data = dfs, statistics = stats, p.value = pval)
 59 | 
 60 |     } else if (type == "intraindividual") {
 61 | 
 62 |       tmp <- setdiff(c("time", "subject", "sample", group_by), names(meta))
 63 |       if (length(tmp) > 0) {
 64 |         stop(paste("The following variables needed by betadiversity function 
 65 |                type=intraindividual are missing from sample metadata:", paste(tmp, collapse = ",")))
 66 |       }
 67 | 
 68 |       if (!all(sapply(split(meta$time, meta[[group_by]]), function (x) {length(unique(x))}) == 2)) {
 69 |         stop("Two time points needed for each group for the intraindividual type. 
 70 |               Some groups are having a different number of time points.")
 71 |       }
 72 | 
 73 |       homogeneity <- list()
 74 |       dfs <- NULL
 75 |       for (ds in names(datasets)) {
 76 |       
 77 |         # Pick the data and metadata for this group
 78 |         xsub <- datasets[[ds]]        
 79 | 	msub <- meta[rownames(xsub),]
 80 | 
 81 |         # Use interindividual functionality to assess correlations
 82 | 	# within subjects. Subjects are used as groups
 83 |         datasets2 <- split(xsub, droplevels(msub[["subject"]]))
 84 | 	cors <- c()
 85 |         for (subj in names(datasets2)) {
 86 |           dats <- datasets2[[subj]]
 87 |           cors[[subj]] <- cor(unlist(dats[1,]), unlist(dats[2,]),
 88 | 	                      method = method, use = "pairwise.complete.obs")
 89 |         }
 90 | 
 91 |         dfs <- rbind(dfs, data.frame(group = rep(ds, length(cors)),
 92 |                    	             subject = names(cors),
 93 |   		   		     anticorrelation = 1 - cors))
 94 | 
 95 |       }
 96 | 
 97 |       # Between time point correlations within subjects
 98 |       # and the mean over those correlations
 99 |       pval <- anova(lm(anticorrelation ~ group, data = dfs))[["Pr(>F)"]][[1]]      
100 |       stats <- dfs %>%
101 |                  group_by(group) %>%
102 |                  summarize(mean = mean(anticorrelation, na.rm = T),
103 | 		           sd = sd(anticorrelation, na.rm = T))
104 | 			   
105 |       anticor <- list(data = dfs, statistics = stats, p.value = pval)
106 | 
107 |     }
108 |     
109 |     anticor
110 |     
111 | }
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/archive/betadiversity.R:
--------------------------------------------------------------------------------
 1 | #' @title Beta Diversity
 2 | #' @description Quantification of beta diversity.
 3 | #' @details
 4 | #' 
 5 | #' Beta diversity quantifies community similarity between
 6 | #' two samples. At the group level, it can be used to quantify group
 7 | #' variability (or spread). This function provides a wrapper that is handy
 8 | #' for group-level comparisons of beta diversity.
 9 | #'
10 | #' When the sample size n=2, this function calculates the beta diversity
11 | #' between the two samples.
12 | #'
13 | #' When the sample size n>2, it calculates beta
14 | #' diversity of each sample against the group mean. This can be
15 | #' compared between groups in order to compare differences in group
16 | #' homogeneity. 
17 | #'
18 | #' Note that this homogeneity measure is affected by sample size.
19 | #' Subsampling or bootstrapping can be applied to equalize sample sizes
20 | #' between comparisons.
21 | #' 
22 | #' More advanced beta correlation measures are available but not implemented
23 | #' here yet.
24 | #' 
25 | #' The anticorrelation mode is a simple educational indicator that returns
26 | #' average spearman correlation between samples of the input data and
27 | #' the overall group-wise average.
28 | #' 
29 | #' @param x phyloseq object 
30 | #' @param mode Beta diversity method
31 | #' @return Vector with beta diversities; one for each sample, quantifying the
32 | #' dissimilarity of the sample from the group-level mean
33 | #' @export
34 | #' @examples 
35 | #'   # Example data
36 | #'   library(microbiome)
37 | #'   data(peerj32)
38 | #'   # Assess beta diversity among the African samples
39 | #'   # in a diet swap study
40 | #'   b <- group_diversity(subset_samples(dietswap, group == "AFR"))
41 | #' @references 
42 | #' The inter- and intra-individual homogeneity in
43 | #' Salonen et al. ISME J. 8:2218-30, 2014 are obtained as
44 | #' 1 - beta where beta is the "anticorrelation" beta diversity.
45 | #' To cite this R package, see citation('microbiome')
46 | #' @seealso the vegdist function from the \pkg{vegan} package provides many
47 | #' standard beta diversity measures
48 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
49 | #' @keywords utilities
50 | group_diversity <- function(x, mode = "anticorrelation") {
51 | 
52 |   # Abundance matrix (taxa x samples)
53 |   if (is.phyloseq(x)) {
54 |     x <- abundances(x)
55 |   }
56 | 
57 |   if (ncol(x) == 2) {
58 |     if (mode == "anticorrelation") {
59 |       ret <- 1 - cor(x[,1], x[,2], method = "spearman", use = "pairwise.complete.obs")
60 |     }
61 |   } else if (ncol(x) > 2 & mode == "anticorrelation") {
62 |     ret <- anticorrelation(x, "spearman")
63 |   }
64 | 
65 |   ret
66 |   
67 | }
68 | 
69 | 
70 | anticorrelation <- function(x, method = "spearman") {
71 | 
72 |   # Correlations calculated against the mean of the sample set
73 |   cors <- as.vector(cor(
74 |        	    x, matrix(rowMeans(x)),
75 |        	    method = method,
76 | 	    use = "pairwise.complete.obs"))
77 | 
78 |   1 - cors    
79 | 
80 | }
81 | 


--------------------------------------------------------------------------------
/archive/bimodality_sarle.R:
--------------------------------------------------------------------------------
 1 | #' @title Sarle's Bimodality Coefficient
 2 | #' @description Sarle's bimodality coefficient.
 3 | #' @param x Data vector for which bimodality will be quantified
 4 | #' @param bs.iter Bootstrap iterations
 5 | #' @param na.rm Remove NAs
 6 | #' @param type Score type ("Sarle.finite.sample" or "Sarle.asymptotic")
 7 | #' @return Bimodality score
 8 | #' @export
 9 | #' @examples
10 | #'   b <- bimodality_sarle(rnorm(100), type = "Sarle.finite.sample")
11 | #' @details The coefficient lies in (0, 1).
12 | #' 
13 | #'	    The 'Sarle.asymptotic' version is defined as
14 | #'          \deqn{b = (g^2 + 1) / k}.
15 | #'          This is coefficient of bimodality from Ellison AM Am. J. Bot. 1987, 
16 | #'          for microbiome analysis it has been used for instance in
17 | #'          Shade et al. 2014.
18 | #'
19 | #'          The formula for 'Sarle.finite.sample' (SAS 2012):
20 | #'
21 | #' 	    \deqn{b = \frac{g^2 + 1}{k + (3(n-1)^2)/((n-2)(n-3))}}
22 | #'          where n is sample size and 
23 | #' 
24 | #'          In both formulas, \eqn{g} is sample skewness and \eqn{k} is the kth
25 | #'          standardized moment (also called the sample kurtosis, or
26 | #'          excess kurtosis).
27 | #'
28 | #' @references
29 | #'   \itemize{
30 | #'     \item{}{Shade et al. mBio 5(4):e01371-14, 2014.}
31 | #'     \item{}{Ellison AM (1987) Am J Botany 74(8):1280-1288.}
32 | #'     \item{}{SAS Institute Inc. (2012). SAS/STAT 12.1 user's guide. Cary, NC.}
33 | #'     \item{}{To cite the microbiome R package, see citation('microbiome')}
34 | #'  }
35 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
36 | #' @seealso Check the dip.test from the \pkg{DIP} package for a
37 | #' classical test of multimodality.
38 | #' @keywords utilities
39 | bimodality_sarle <- function(x, bs.iter = 1, na.rm = TRUE, type = "Sarle.finite.sample") {
40 | 
41 |     g <- skewness(x, na.rm)
42 |     k <- kurtosis(x, na.rm)      
43 | 
44 |     if (type == "Sarle.asymptotic") {
45 | 
46 |       s <- (1 + g^2)/(k + 3)
47 | 
48 |     } else if (type == "Sarle.finite.sample") {
49 |     
50 |       n <- length(x)
51 |       s <- (g^2 + 1) / (k + (3*(n-1)^2)/((n-2)*(n-3)))
52 |       
53 |     }
54 | 
55 |     if (bs.iter > 1) {
56 |       s <- c()
57 |       for (i in 1:bs.iter) {
58 |         xbs <- sample(x, replace = TRUE)
59 | 	s[[i]] <- bimodality_sarle(xbs, type = type)
60 |       }
61 |       s <- mean(s)
62 |     }
63 | 
64 |     s
65 |     
66 | }
67 | 


--------------------------------------------------------------------------------
/archive/core_abundance.R:
--------------------------------------------------------------------------------
 1 | #' @title Core Abundance
 2 | #' @description Calculate total core abundance.
 3 | #' @param x \code{\link{phyloseq-class}} object
 4 | #' @param detection Detection threshold (non-negative real)
 5 | #' @param prevalence Prevalence threshold (in [0, 100])
 6 | #' @return Total core abunance vector.
 7 | #' @examples
 8 | #'  data(dietswap)
 9 | #'  a <- core_abundance(dietswap)
10 | #' @export
11 | #' @references See citation("microbiome")
12 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
13 | #' @keywords utilities
14 | core_abundance <- function(x, detection = 1, prevalence = 95) {
15 | 	      
16 |   core.taxa <- core_members(x, detection = detection,
17 |   	               prevalence = prevalence)
18 | 
19 |   # Core matrix
20 |   # FIXME: directly use the core function
21 |   xx <- abundances(prune_taxa(core.taxa, x))
22 |   xx <- matrix(xx, nrow = length(core.taxa))
23 |   
24 |   # Total sum of core abundances
25 |   ab <- colSums(xx, na.rm = TRUE)
26 | 
27 |   ab
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/archive/core_members.R:
--------------------------------------------------------------------------------
 1 | #' @title Core Taxa
 2 | #' @description Determine members of the core microbiota with given abundance
 3 | #''             and prevalences.
 4 | #' @param x \code{\link{phyloseq-class}} object
 5 | #' @param detection Detection threshold (non-negative real)
 6 | #' @param prevalence Prevalence threshold (in [0, 100])
 7 | #' @param sort Logical. Sort the taxa.
 8 | #' @return Vector of core members
 9 | #' @details For phyloseq object, lists taxa that are more prevalent with the
10 | #'   given detection. For matrix, lists columns that satisfy
11 | #'   these criteria.
12 | #' @examples
13 | #'   data(dietswap)
14 | #'   a <- core_members(dietswap, 1, 95)
15 | #' @export
16 | #' @aliases prevalent_taxa
17 | #' @references 
18 | #'   A Salonen et al. The adult intestinal core microbiota is determined by 
19 | #'   analysis depth and health status. Clinical Microbiology and Infection 
20 | #'   18(S4):16 20, 2012. 
21 | #'   To cite the microbiome R package, see citation('microbiome') 
22 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
23 | #' @keywords utilities
24 | core_members <- function(x, detection = 1, prevalence = 95,
25 |                     sort = TRUE)  {
26 | 
27 |   if (class(x) == "phyloseq") {
28 |     x <- abundances(x)
29 |   } 
30 |   taxa <- names(which(prevalence(x, detection) > prevalence))
31 | 
32 |   if (sort) {
33 |     taxa <- sort(taxa)
34 |   }
35 | 
36 |   taxa
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/archive/core_richness.R:
--------------------------------------------------------------------------------
 1 | #' @title Core Richness
 2 | #' @description Calculate total core richness
 3 | #' @param x \code{\link{phyloseq-class}} object
 4 | #' @param detection Detection threshold (non-negative real)
 5 | #' @param prevalence Prevalence threshold (in [0, 100])
 6 | #' @return Total core abundance vector.
 7 | #' @examples
 8 | #'  data(dietswap)
 9 | #'  a <- core_richness(dietswap)
10 | #' @export
11 | #' @references See citation("microbiome")
12 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
13 | #' @keywords utilities
14 | core_richness <- function(x, detection = 1, prevalence = 95) {
15 | 	      
16 |   core.taxa <- core_members(x, detection = detection,
17 |   	               prevalence = prevalence)
18 | 
19 |   # Core matrix
20 |   # FIXME: directly use the core function
21 |   xx <- abundances(prune_taxa(core.taxa, x))
22 |   xx <- matrix(xx, nrow = length(core.taxa))
23 | 
24 |   # Core taxa richness in each sample  
25 |   ab <- colSums(xx > detection, na.rm = TRUE)
26 | 
27 |   ab
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/archive/crash-handler.conf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/archive/crash-handler.conf


--------------------------------------------------------------------------------
/archive/data.R:
--------------------------------------------------------------------------------
 1 | # Load the package
 2 | library(microbiome)  
 3 | 
 4 | # Define data path (here we retrieve data from R package itself)
 5 | data.directory <- system.file("extdata", package = "microbiome")
 6 | 
 7 | # Read HITChip data matrix (log10 values)
 8 | level <- "L2"
 9 | method <- "frpa"
10 | hitchip.data <- read.profiling(level = level, 
11 | 	     		       method = method, 
12 |               		       data.dir = data.directory, 
13 | 	      	       	       log10 = TRUE)  
14 | 
15 | # Oligo level data (absolute values - no log10)
16 | oligo.data <- read.profiling(level = "oligo", 
17 |                              data.dir = data.directory, 
18 | 			     log10 = FALSE)  
19 | 
20 | # Oligo-phylogeny mapping table
21 | phylogeny.info <- read.profiling(level = "phylogeny.full", 
22 |                            	 data.dir = data.directory)
23 | 
24 | # --------------------------------------------------------------
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/archive/data_old.R:
--------------------------------------------------------------------------------
 1 | #' @title Dynamics of the Human Gut Microbiome in Inflammatory Bowel Disease
 2 | #' @description This data set contains OTU level microbiota profiling with
 3 | #' Illumina HiSeq 2000 for the V4 region with 515F/806RBC for an
 4 | #' Inflammatory bowel disease (IBD) cohort reported in 
 5 | #' reported in Halfvarson J., et al. Nature Microbiology (2017).
 6 | #' \url{http://www.nature.com/articles/nmicrobiol20174}.
 7 | #' @name DynamicsIBD
 8 | #' @details The data is also available for download from Qiita (ID 1629).
 9 | #' \url{https://qiita.ucsd.edu/study/description/1629}. 
10 | #' @docType data
11 | #' @return Loads the data set in R.
12 | #' @author Sudarshan A. Shetty \email{sudarshanshetty9@@gmail.com} 
13 | #' @references 
14 | #' Halfvarson et al. Dynamics of the human gut microbiome in inflammatory
15 | #' bowel disease. Nat Microbiology. 2:17004, 2017. 
16 | #' To cite the microbiome R package, see citation('microbiome') 
17 | #' @usage data(DynamicsIBD) 
18 | #' @format The data set in \code{\link{phyloseq-class}} format.  
19 | #' @keywords data
20 | NULL
21 | 


--------------------------------------------------------------------------------
/archive/debug.R:
--------------------------------------------------------------------------------
 1 | library(microbiome)
 2 | data(peerj32); 
 3 | x <- as.matrix(peerj32$lipids)
 4 | y <- as.matrix(peerj32$microbes[, 1])
 5 | cc <- cross.correlate(x, y); 
 6 | 
 7 | 
 8 | 
 9 | p <- correlation.heatmap(cc, "X1", "X2", "Correlation")
10 | 


--------------------------------------------------------------------------------
/archive/densityplot.R:
--------------------------------------------------------------------------------
 1 | #' @title Density Plot
 2 | #' @description Density visualization for data points overlaid on cross-plot.
 3 | #' @param x Data matrix to plot. The first two columns will be visualized as a cross-plot.
 4 | #' @param main title text
 5 | #' @param x.ticks Number of ticks on the X axis
 6 | #' @param rounding Rounding for X axis tick values
 7 | #' @param add.points Plot the data points as well
 8 | #' @param col Color of the data points. NAs are marked with darkgray.
 9 | #' @param adjust Kernel width adjustment
10 | #' @param size point size
11 | #' @param legend plot legend TRUE/FALSE
12 | #' @return ggplot2 object
13 | #' @examples p <- densityplot(cbind(rnorm(100), rnorm(100)))
14 | #' @export
15 | #' @references See citation('microbiome') 
16 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
17 | #' @keywords utilities
18 | densityplot <- function(x,
19 | 	       main = NULL,
20 | 	       x.ticks = 10,
21 | 	       rounding = 0, 
22 |                add.points = TRUE,
23 | 	       col = "black",
24 | 	       adjust = 1,
25 | 	       size = 1,
26 | 	       legend = FALSE) {
27 | 
28 |     df <- x
29 |     if (!is.data.frame(df)) {
30 |       df <- as.data.frame(as.matrix(df))
31 |     }
32 | 
33 |     # Avoid warnings
34 |     x <- y <- ..density.. <- color <- NULL
35 | 
36 |     # If colors are NA:
37 |     col <- as.character(col)
38 |     col[unname(which(is.na(col)))] <- "darkgray"
39 | 
40 |     theme_set(theme_bw(20))
41 |     xvar <- colnames(df)[[1]]
42 |     yvar <- colnames(df)[[2]]
43 |     df[["x"]] <- df[, 1]
44 |     df[["y"]] <- df[, 2]
45 |     df[["color"]] <- col
46 |     df[["size"]] <- size
47 | 
48 |     # Remove NAs
49 |     df <- df[!(is.na(df[["x"]]) | is.na(df[["y"]])), ]
50 |     
51 |     # Determine bandwidth for density estimation
52 |     bw <- adjust * c(bandwidth.nrd(df[["x"]]), bandwidth.nrd(df[["y"]]))
53 |     if (any(bw == 0)) {
54 |       warning("Zero bandwidths (possibly due to small number of observations). Using minimal bandwidth.")
55 |       bw[bw == 0] = bw[bw == 0] + min(bw[!bw == 0])
56 |     }
57 | 
58 |     # Construct the figure
59 |     p <- ggplot(df)
60 |     p <- p + stat_density2d(aes(x, y, fill = ..density..), geom = "raster", h = bw, contour = FALSE)
61 |     p <- p + scale_fill_gradient(low = "white", high = "black")
62 | 
63 | 
64 |     if (add.points) {
65 |       if (length(unique(df$color)) == 1 && length(unique(df$size)) == 1) {
66 | 
67 |         p <- p + geom_point(aes(x = x, y = y), col = unique(df$color), size = unique(df$size))
68 |       } else if (length(unique(df$color)) == 1 && length(unique(df$size)) > 1) {
69 |         p <- p + geom_point(aes(x = x, y = y, size = size), col = unique(df$color))
70 |       } else if (length(unique(df$color)) > 1 && length(unique(df$size)) == 1) {
71 |         p <- p + geom_point(aes(x = x, y = y, col = color), size = unique(df$size))
72 |       } else {
73 |         p <- p + geom_point(aes(x = x, y = y, col = color, size = size))
74 |       }      
75 |     }
76 | 
77 |     p <- p + xlab(xvar) + ylab(yvar)
78 |     
79 |     if (!legend) {
80 |         p <- p + theme(legend.position = "none")
81 |     }
82 | 
83 | 
84 |     p <- p + scale_x_continuous(breaks = round(seq(floor(min(df[["x"]])), 
85 |                    ceiling(max(df[["x"]])), 
86 |                    length = x.ticks), rounding))
87 | 
88 |     if (!is.null(main)) {
89 |         p <- p + ggtitle(main)
90 |     }
91 | 
92 |     p
93 |     
94 | }
95 | 


--------------------------------------------------------------------------------
/archive/deprecated.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | 
  5 | #' read.profiling.old
  6 | #' 
  7 | #' Read run.profiling.script output into R
  8 | #'
  9 | #' @param method ('frpa' / 'rpa' / 'sum' / 'ave')
 10 | #' @param data.dir Profiling script output directory for reading the data. 
 11 | #'                 If not given, GUI will ask to specify the file and 
 12 | #'             	   overruns the possible level / method arguments in the 
 13 | #'             	   function call.
 14 | #' 
 15 | #' @return data matrix (phylo x samples)
 16 | #'
 17 | #' @examples 
 18 | #'   data.dir <- system.file("extdata", package = "microbiome")
 19 | #'   dat <- read.profiling('frpa', data.dir = data.dir)
 20 | #'
 21 | #' @references See citation('microbiome')
 22 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
 23 | #' @keywords utilities
 24 | 
 25 | read.profiling.old <- function(method = "frpa", data.dir) {
 26 | 
 27 |   results <- list()
 28 | 
 29 |   message(paste("Reading Chip data from", data.dir))
 30 | 
 31 |   for (level in c("L1", "L2", "species")) {
 32 | 
 33 |     if (method == "frpa" && length(grep(method, dir(data.dir))) == 0) {
 34 |       warning("frpa method not available; using rpa instead")
 35 |       method <- "rpa"
 36 |     }
 37 | 
 38 |     f <- paste(data.dir, "/", level, "-", method, ".tab", sep = "")
 39 | 
 40 |     if (file.exists(f)) {
 41 |       tab <- read.csv(f, header = TRUE, sep = "\t", row.names = 1, as.is = TRUE)
 42 |       colnames(tab) <- unlist(strsplit(readLines(f, 1), "\t"))[-1]
 43 | 
 44 |       results[[level]] <- tab
 45 |     }
 46 |   }
 47 | 
 48 |   # oligo data
 49 |   f <- paste(data.dir, "/oligoprofile.tab", sep = "")
 50 |   tab <- read.csv(f, header = TRUE, sep = "\t", row.names = 1, as.is = TRUE)
 51 |   colnames(tab) <- unlist(strsplit(readLines(f, 1), "\t"))[-1]
 52 |   results[["oligo"]] <- tab
 53 | 
 54 |   # Phylogeny
 55 |   f <- paste(data.dir, "/phylogeny.full.tab", sep = "")
 56 |   #f <- paste(data.dir, "/phylogeny.filtered.tab", sep = "")        
 57 |   tab <- read.csv(f, header = TRUE, sep = "\t", as.is = TRUE)
 58 |   tab <- polish.tax.table(tab)
 59 |   results[["taxonomy"]] <- tab  
 60 | 
 61 |   # Metadata      
 62 |   # Read simulated example metadata
 63 |   #library(gdata)
 64 |   #metadata.file <- paste(data.dir, "/metadata.xls", sep = "")
 65 |   #metadata <- read.xls(metadata.file, as.is = TRUE)
 66 |   #rownames(metadata) <- metadata$sampleID    
 67 |   f <- paste(data.dir, "/meta.tab", sep = "")
 68 |   tab <- read.csv(f, header = TRUE, sep = "\t", as.is = TRUE)
 69 |   rownames(tab) <- tab$sample
 70 |   results[["meta"]] <- tab      
 71 | 
 72 |   # Convert to numeric
 73 |   for (level in c("oligo", "species", "L1", "L2")) {
 74 | 
 75 |     tab <- results[[level]]        
 76 |     rnams <- rownames(tab)
 77 |     cnams <- colnames(tab)
 78 | 
 79 |     tab <- apply(tab, 2, as.numeric)
 80 |     rownames(tab) <- rnams
 81 |     colnames(tab) <- cnams
 82 | 
 83 |     if (any(is.na(tab))) {
 84 |         warning(paste("The", level, " matrix has ", sum(is.na(tab)), 
 85 |                       " missing values \n
 86 |                       - imputing.."))
 87 | 
 88 |         for (i in 1:ncol(tab)) {
 89 | 	  inds <- which(is.na(tab[,i]))
 90 | 	  if (length(inds) > 0) {
 91 |             tab[inds, i] <- sample(tab[-inds, i], length(inds))
 92 |   	  }
 93 | 	}
 94 | 
 95 |     }
 96 | 
 97 |     results[[level]] <- tab        
 98 | 
 99 |   }
100 |     
101 |   results
102 |     
103 | } 
104 | 
105 | 


--------------------------------------------------------------------------------
/archive/devtools.R:
--------------------------------------------------------------------------------
 1 | # Package release instructions: http://r-pkgs.had.co.nz/release.html
 2 | 
 3 | # Documentation, Build and Check
 4 | library(devtools)
 5 | document("../../")
 6 | build("../../")
 7 | check("../../")
 8 | 
 9 | # Submissions:
10 | #
11 | # release() # Submit to CRAN
12 | # submit_cran() # Submit to CRAN without all release() questions
13 | 
14 | # Utilities:
15 | #
16 | # build_win("../../") # Windows check
17 | # revdep_check("../../")
18 | # add_rstudio_project("../../")
19 | # use_build_ignore("../NEWS.md", pkg = "../../") # NEWS.md not supported by CRAN
20 | # use_package("dplyr") # add package to imports
21 | # load_all(".") # Reload the package
22 | # test() # Run tests
23 | # run_examples()
24 | 
25 | # Vignettes:
26 | #
27 | # library(knitr)
28 | # knit("../../vignettes/eurostat_tutorial.Rmd", "../../vignettes/eurostat_tutorial.md")
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/archive/get_ordination.R:
--------------------------------------------------------------------------------
 1 | #' @title Get Ordination
 2 | #' @description Ordinate phyloseq data and merge it with sample metadata
 3 | #' @param x \code{\link{phyloseq-class}} object or a data matrix 
 4 | #' (features x samples; eg. HITChip taxa vs. samples)
 5 | #' @param method Ordination method, see phyloseq::plot_ordination
 6 | #' @param distance Ordination distance, see phyloseq::plot_ordination
 7 | #' @return data.frame with ordination coordinates and metadata
 8 | #' @examples
 9 | #' data(dietswap)
10 | #' fc <- get_ordination(dietswap)
11 | #' @seealso phyloseq::plot_ordination
12 | #' @export
13 | #' @details This is a wrapper for phyloseq ordination functions, providing
14 | #' smooth access to ordinated data.frame with full info on the projection
15 | #' and metadata necessary for further visualizations.
16 | #' @references See citation('microbiome') 
17 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
18 | #' @keywords utilities
19 | get_ordination <- function(x, method="NMDS", distance="bray") {
20 |     
21 |     x.ord <- ordinate(x, method, distance)
22 |     
23 |     # Pick the projected data (first two columns + metadata)
24 |     proj <- phyloseq::plot_ordination(x, x.ord, justDF=TRUE)
25 |     
26 |     # Rename the projection axes
27 |     names(proj)[1:2] <- paste("Comp", 1:2, sep=".")
28 |     
29 |     proj
30 |     
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/archive/gini.R:
--------------------------------------------------------------------------------
 1 | #' @title Top Abundance Index
 2 | #' @description Calculates the community top_abundance index.
 3 | #' @param rank Optional. The rank of the dominant taxa to consider.
 4 | #' @param aggregate Aggregate (TRUE; default) the top members or not. If aggregate=TRUE, then the sum of relative abundances is returned. Otherwise the relative abundance is returned for the single taxa with the indicated rank. 
 5 | #' @inheritParams diversity
 6 | #' @return A vector of top_abundance indices
 7 | #' @export
 8 | #' @examples
 9 | #'   data(dietswap)
10 | #'   d <- top_abundance(dietswap)
11 | #' @details The top_abundance index gives the abundance of the most abundant species in [0,1]. This simple diversity index is occasionally used in ecological literature, and sometimes also called dominance. However, note that the microbiome::dominance function uses a different definition. With rank = 2, the sum of abundances for the two most abundant taxa are returned etc. However, if aggregate=FALSE, the abundance for the single n'th most dominant taxa (n = rank) is returnde instead the sum of abundances up to that rank.
12 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
13 | #' @seealso dominance, diversity
14 | #' @keywords utilities
15 | top_ab <- function(x, rank = 1, aggregate = TRUE, split = TRUE) {
16 | 
17 |   # Pick the OTU data
18 |   otu <- abundances(x)
19 | 
20 |   if (!split) {
21 |     otu <- as.matrix(rowSums(otu), nrow = nrow(otu))
22 |   }
23 | 
24 |   if (!aggregate) {
25 |     do <- apply(otu, 2, function (x) {rev(sort(x/sum(x, na.rm = TRUE)))[[rank]]})
26 |   } else {
27 |     do <- apply(otu, 2, function (x) {sum(rev(sort(x/sum(x, na.rm = TRUE)))[1:rank])})
28 |   }
29 |   names(do) <- sample_names(x)
30 |   
31 |   do
32 | 
33 | }
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/archive/gini_index.R:
--------------------------------------------------------------------------------
 1 | 
 2 | gini_index <- function() {
 3 | 
 4 | 
 5 | 
 6 | }
 7 | 
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/archive/internal.R:
--------------------------------------------------------------------------------
 1 | 
 2 | get.file.method <- function( f ) {
 3 |     
 4 |     method <- NULL
 5 |     
 6 |     if (f == "rpa") {
 7 |         method <- "rpa"
 8 |     }
 9 |     if (f == "frpa") {
10 |         method <- "frpa"
11 |     }
12 |     if (!length(grep("sum", f)) == 0) {
13 |         method <- "sum"
14 |     }
15 |     if (!length(grep("ave", f)) == 0) {
16 |         method <- "ave"
17 |     }
18 |     if (!length(grep("nmf", f)) == 0) {
19 |         method <- "nmf"
20 |     }
21 |     
22 |     method
23 |     
24 | }
25 | 
26 | get.file.level <- function(f) {
27 |     
28 |     level <- NULL
29 |     
30 |     if (!length(grep("oligo", f)) == 0) {
31 |         level <- "oligo"
32 |     }
33 |     if (!length(grep("species", f)) == 0) {
34 |         level <- "species"
35 |     }
36 |     if (!length(grep("L0", f)) == 0) {
37 |         level <- "L0"
38 |     }
39 |     if (!length(grep("L1", f)) == 0) {
40 |         level <- "L1"
41 |     }
42 |     if (!length(grep("L2", f)) == 0) {
43 |         level <- "L2"
44 |     }
45 |     if (!length(grep("phylogeny", f)) == 0) {
46 |         level <- "phylogeny.info"
47 |     }
48 |     
49 |     level
50 |     
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/archive/limma.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Limma"
  3 | author: "Leo Lahti"
  4 | date: "`r Sys.Date()`"
  5 | bibliography: 
  6 | - bibliography.bib
  7 | 
  8 | output: 
  9 |   rmarkdown::html_vignette
 10 | ---
 11 | <!--
 12 |   %\VignetteEngine{knitr::rmarkdown}
 13 |   %\VignetteIndexEntry{microbiome tutorial - limma}
 14 |   %\usepackage[utf8]{inputenc}
 15 |   %\VignetteEncoding{UTF-8}  
 16 | -->
 17 | 
 18 | 
 19 | ## Linear models with limma
 20 | 
 21 | ### Discrete variables: sex
 22 | 
 23 | Identify most significantly different taxa between males and females.
 24 | 
 25 | For further details, see [limma
 26 | homepage](http://bioinf.wehi.edu.au/limma/) and [limma User's
 27 | guide](http://www.lcg.unam.mx/~lcollado/R/resources/limma-usersguide.pdf). For
 28 | discussion on why limma is preferred over t-test, see [this
 29 | article](http://www.plosone.org/article/info:doi/10.1371/journal.pone.0012336).
 30 | 
 31 | ```{r limma-example, warning=FALSE, fig.path = "figure/"}
 32 | # Get example data
 33 | library(microbiome)
 34 | data("peerj32")
 35 | pseq <- peerj32$phyloseq
 36 | otu <- taxa_abundances(transform_phyloseq(pseq, "log10"))
 37 | meta <- sample_data(pseq)
 38 | groups <- meta$gender
 39 | 
 40 | # Compare the two groups with limma
 41 | library(limma)
 42 | 
 43 | # Prepare the design matrix which states the groups for each sample
 44 | # in the otu
 45 | design <- cbind(intercept = 1, Grp2vs1 = groups)
 46 | rownames(design) <- rownames(meta)
 47 | design <- design[colnames(otu), ]
 48 | 
 49 | # NOTE: results and p-values are given for all groupings in the design matrix
 50 | # Now focus on the second grouping ie. pairwise comparison
 51 | coef.index <- 2
 52 |      
 53 | # Fit the limma model
 54 | fit <- lmFit(otu, design)
 55 | fit <- eBayes(fit)
 56 | pvalues.limma = fit$p.value[, 2]
 57 | 
 58 | # Summarise 
 59 | kable(topTable(fit, coef = coef.index, p.value=0.05), digits = 2)
 60 | ```
 61 | 
 62 | 
 63 | ### Q-Q plot
 64 | 
 65 | 
 66 | ```{r limma-qq, warning=FALSE, fig.path = "figure/"}
 67 | qqt(fit$t[, coef.index], df = fit$df.residual + fit$df.prior)
 68 | abline(0,1)
 69 | ```
 70 | 
 71 | ### Volcano plot
 72 | 
 73 | ```{r limma-volcano, warning=FALSE, fig.path = "figure/"}
 74 | volcanoplot(fit, coef = coef.index, highlight = coef.index)
 75 | ```
 76 | 
 77 | 
 78 | 
 79 | ### Comparison between limma and t-test
 80 | 
 81 | Order the taxa with t-test for comparison and validation purposes. The
 82 | differences are small in this simulated example, but [can be
 83 | considerable in real
 84 | data](http://www.plosone.org/article/info:doi/10.1371/journal.pone.0012336).
 85 | 
 86 | ```{r limma-compairson, warning=FALSE, fig.path = "figure/"}
 87 | # Compare the two groups with t-test
 88 | library(dplyr)
 89 | pvalues.ttest <- c()
 90 | male.samples <- dplyr::filter(meta, gender == "male")$sample
 91 | female.samples <- dplyr::filter(meta, gender == "female")$sample
 92 | for (tax in rownames(otu)) {
 93 |   pvalues.ttest[[tax]] <- t.test(otu[tax, male.samples], otu[tax, female.samples])$p.value
 94 | }
 95 | # Multiple testing correction
 96 | pvalues.ttest <- p.adjust(pvalues.ttest, method = "fdr")
 97 | 
 98 | # Compare p-values between limma and t-test
 99 | taxa <- rownames(otu)
100 | plot(pvalues.ttest[taxa], pvalues.limma[taxa])
101 | abline(0,1,lty = 2)
102 | ```
103 | 
104 | ### Continuous variables
105 | 
106 | Quantify continuous associations with lm_phyloseq function. This uses
107 | the limma model to generate a table of P-values and effect sizes. Note
108 | that no confounding variables taken into account in this wrapper. See
109 | the [limma homepage](http://bioinf.wehi.edu.au/limma/) for more
110 | detailed analyses.
111 | 
112 | ```{r limma-lm-phyloseq, warning=FALSE, fig.path = "figure/"}
113 | data("atlas1006")
114 | source(system.file("extdata/lm_phyloseq.R", package = "microbiome"))
115 | tab <- lm_phyloseq(atlas1006, "age")
116 | kable(head(tab), digits = 3)
117 | ```
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/archive/linearmodel.R:
--------------------------------------------------------------------------------
 1 | #' @title Linear model with random subject effects for phyloseq
 2 | #' @description Linear model test for paired comparisons for phyloseq objects with random effect subject term.
 3 | #' @param x \code{\link{phyloseq-class}} object 
 4 | #' @param group Metadata field specifying the groups.
 5 | #' @param p.adjust.method p-value correction method for p.adjust function 
 6 | #'               (default 'BH'). For other options, see ?p.adjust
 7 | #' @return Corrected p-values. 
 8 | #' @examples 
 9 | #'   data(peerj32)
10 | #'   pval <- check_lmer(peerj32$phyloseq, "group")
11 | #' @export
12 | #' @importFrom lme4 lmer
13 | #' @references See citation('microbiome') 
14 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
15 | #' @keywords utilities
16 | check_lmer <- function (x, group, p.adjust.method = "BH") {
17 | 
18 |   # We need taxa x samples matrix
19 |   mydata <- otu_table(x)@.Data
20 |   if (!taxa_are_rows(x)) {mydata <- t(mydata)}
21 | 
22 |   metadata <- sample_data(x)
23 |   metadata$group <- droplevels(factor(metadata[[group]]))
24 | 
25 |   # Pvalues
26 |   pv <- c()
27 |   for (tax in rownames(mydata)) {
28 | 
29 |     dfs <- metadata
30 |     dfs$signal <- mydata[tax, rownames(dfs)]
31 | 
32 |     # Paired comparison 
33 |     out <- lmer(signal ~ group + (1|subject), data = dfs)
34 |     out0 <- lmer(signal ~ (1|subject), data = dfs)
35 |     comp <- anova(out0, out)
36 |     pv[[tax]] <- comp[["Pr(>Chisq)"]][[2]]
37 |     
38 |   }
39 | 
40 |   # Adjust ANOVA group-level p-values
41 |   pv <- p.adjust(pv[, "p.anova"], method = p.adjust.method)  
42 |   
43 |   # Order by p 
44 |   pv <- sort(pv)
45 | 
46 | }
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/archive/lm_phyloseq.R:
--------------------------------------------------------------------------------
 1 | #' @title Limma test for phyloseq
 2 | #' @description Limma for a single continuous variable vs. OTUs with a phyloseq object.
 3 | #' @param x \code{\link{phyloseq-class}} object 
 4 | #' @param varname Metadata field specifying the investigated metadata variable.
 5 | #' @param transformation Transformation for the original phyloseq values. By default a log10 transformation is done to better approximate the requirements of a linear model.
 6 | #' @param p.adj.method p-value correction method for p.adjust function 
 7 | #'               (default 'BH'). For other options, see ?p.adjust
 8 | #' @return Limma output table.
 9 | #' @examples 
10 | #'  data("atlas1006")
11 | #'  tab <- lm_phyloseq(atlas1006, "age")
12 | #' @export
13 | #' @importFrom phyloseq get_variable
14 | #' @references See citation('microbiome') 
15 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
16 | #' @keywords utilities
17 | lm_phyloseq <- function (x, varname, transformation = "log10", p.adj.method = "BH") {
18 | 
19 |   # Transformation useful for linear models  
20 |   x <- transform_phyloseq(x, transformation)
21 | 
22 |   # Limma significance analysis
23 |   # Prepare the design matrix which states the variable values for each sample
24 |   design <- cbind(intercept = 1, var = get_variable(x, varname))
25 |   rownames(design) <- rownames(sample_data(x)) 
26 | 
27 |   # OTU matrix
28 |   otu <- otu_table(x)@.Data
29 |   if (!taxa_are_rows(x)) {otu <- t(otu)}
30 | 
31 |   # Remove missing vals
32 |   keep = which(rowSums(is.na(design)) == 0)
33 | 
34 |   if (length(keep) > 0) {
35 |     otu = otu[,keep]
36 |     design = design[keep,]
37 |   } else {
38 |     warning("All samples have missing values in the metadata design matrix. Halting the computation.")
39 |     return(NULL)
40 |   }
41 |   
42 |   # Fit the limma model
43 |   fit <- lmFit(otu, design)
44 |   fit2 <- eBayes(fit)
45 | 
46 |   # NOTE: results and p-values are given for all groupings in the design matrix
47 |   # Now focus on the second grouping ie. pairwise comparison
48 |   topTable(fit2, coef = 2)
49 | 
50 | }


--------------------------------------------------------------------------------
/archive/low_abundance.R:
--------------------------------------------------------------------------------
 1 | #' @title Low Abundance Index
 2 | #' @description Calculates the community low_abundance index.
 3 | #' @inheritParams core
 4 | #' @param split (Optional). Logical. Pool all samples and estimate index for the entire set.
 5 | #' @return A vector of low_abundance indices
 6 | #' @export
 7 | #' @examples
 8 | #'   data(dietswap)
 9 | #'   d <- low_abundance(dietswap, detection = 0.2/100)
10 | #' @details The low_abundance index gives the relative proportion of rare species in [0,1]. The species that are below the indicated detection threshold are considered rare. Note that population prevalence is not considered.
11 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
12 | #' @keywords utilities
13 | #' @seealso core_abundance, rarity, diversity
14 | low_abundance <- function(x, detection = 0.2/100, split = TRUE) {
15 | 
16 |   # Ensure compositional data       
17 |   xc <- transform(x, "compositional")
18 | 
19 |   if (!split) {
20 |   
21 |     otu <- rowSums(abundances(xc))
22 |     x <- otu/sum(otu)
23 |     do <- sum(x[x < detection])
24 |     
25 |   } else {
26 | 
27 |     otu <- abundances(xc)
28 |     do <- apply(otu, 2, function (x) {sum(x[x < detection])})
29 |     names(do) <- sample_names(x)
30 | 
31 |   }
32 |   
33 |   do
34 | 
35 | }
36 | 
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/archive/lower.triangle.R:
--------------------------------------------------------------------------------
 1 | #' @title Lower triangle of a matrix
 2 | #' @description Get lower triangle of a square matrix.
 3 | #' as a numeric vector such that
 4 | #' row-by-row, picking elements in the order
 5 | #' 2,1;3,1;3,2;4,1,...
 6 | #'        
 7 | #' @param mat data matrix
 8 | #'
 9 | #' @return lower triangle as vector 
10 | #'
11 | #' @export
12 | #' @examples 
13 | #'   mat <- rbind(c(1,2,3), c(4,5,6), c(7,8,9))
14 | #'   vec <- lower.triangle(mat)
15 | #' @references See citation('microbiome') 
16 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
17 | #' @keywords utilities
18 | lower.triangle <- function(mat) {
19 |     
20 |   # TODO is this easy replace with standard R functions ?
21 | 
22 |     elements <- c()
23 |     nr <- dim(mat)[[1]]
24 |     nc <- dim(mat)[[2]]
25 |     
26 |     for (i in 2:nr) {
27 |         for (j in 1:(i - 1)) {
28 |             elements <- c(elements, mat[i, j])
29 |         }
30 |     }
31 |     elements
32 | } 
33 | 


--------------------------------------------------------------------------------
/archive/make.abundancy.table.R:
--------------------------------------------------------------------------------
 1 | #' @title Abundance table
 2 | #' @description Calculate abundancies.
 3 | #' Discretize Hitchip matrix to form abundancy table
 4 | #' of form j, nj where j is number of counts and nj is number
 5 | #' of phylotypes with the corresponding counts
 6 | #' this format is often required by richness estimation
 7 | #' @param dat data matrix
 8 | #' @param det.th detection threshold
 9 | #' @param discretization.resolution discretization resolution
10 | #' @return abundancy table
11 | #' @examples 
12 | #'   data(peerj32)
13 | #'   abtab <- make.abundancy.table(10^t(peerj32$microbes), det.th = 0)
14 | #' @export
15 | #' @references 
16 | #'    To cite the microbiome R package, see citation('microbiome') 
17 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
18 | #' @keywords utilities
19 | make.abundancy.table <- function(dat, det.th, discretization.resolution = 1) {
20 |     
21 |     di <- 10^dat - 10^det.th
22 |     di[di < 0] <- 0
23 |     di <- discretization.resolution * (di)
24 |     di[di > 0 & di < 1] <- 1
25 |     di <- round(di)
26 |     
27 |     ab <- t(di)
28 |     
29 |     ab
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/archive/maturity.R:
--------------------------------------------------------------------------------
 1 | #' @title Maturity index
 2 | #' @description Calculate maturity index for a phyloseq object
 3 | #' @param x \code{\link{phyloseq-class}} object 
 4 | #' @return Maturity index
 5 | #' @examples
 6 | #'  #This is an artificial example based on readily available
 7 | #'  #adult data set whereas
 8 | #'  #the maturity index is typically calculated from and
 9 | #'  #used for babies/children:
10 | #'   data("atlas1006")
11 | #'   pseq <- atlas1006
12 | #'   pseq <- subset_samples(pseq, DNA_extraction_method == "r" & time == 0)
13 | #'   pseq <- transform_phyloseq(pseq, "relative.abundance")
14 | #'   maturity(pseq) 
15 | #' @export
16 | #' @references To cite this R package, see citation('microbiome').
17 | #' The microbiota maturity index has been adapted from the following papers:
18 | #' Subramanian S et al. Nature 510:417-421, 2014.
19 | #' Dogra S et al. mBio 6:e02419-14, 2015.
20 | #' Korpela K et al. Nat. Comm. 7:10410, 2016.
21 | #' @details Microbiota maturity index has been shown to differentiate healthy children (see the references). In Korpela et al. (2016) this was calculated as the first principal coordinate from a PCoA (MDS) using only significantly age-associated genus-level taxa (all groups included). In this function NMDS is used instead. The maturity index is also adjusted for age.
22 | #' @importFrom phyloseq get_variable
23 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
24 | #' @keywords utilities
25 | maturity <- function (x) {
26 | 
27 |   # Pick significant age-associated taxa
28 |   age.taxa <- rownames(lm_phyloseq(x, "age"))
29 | 
30 |   # Taxa x samples matrix for age-associated taxa
31 |   #otu <- get_sample(x)[age.taxa,]
32 |   pf <- prune_taxa(age.taxa, x)
33 | 
34 |   # NMDS ordination
35 |   ord <- ordinate(pf, "NMDS", "bray")
36 | 
37 |   # Maturity index
38 |   index <- scores(ord)[,1]
39 | 
40 |   # Adjust for age
41 |   ages <- get_variable(x, "age")
42 |   res <- lm(index ~ age)
43 | 
44 |   # Residuals provide the adjusted index
45 |   index.adjusted <- residuals(res)
46 | 
47 |   index.adjusted
48 | 
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/archive/metadata.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/archive/metadata.xls


--------------------------------------------------------------------------------
/archive/misc.R:
--------------------------------------------------------------------------------
 1 | # Just apply variance stabilizing transformation
 2 | # varianceStabilizingTransformation(ds2, blind = TRUE, fitType = "parametric")
 3 | # ds2 <- estimateSizeFactors(ds2)
 4 | # ds2 <- estimateDispersions(ds2)
 5 | # abund <- getVarianceStabilizedData(ds2)
 6 | 
 7 | #dfm <- as.data.frame(otu)
 8 |   #dfm$ID <- rownames(otu)
 9 |   #dfm <- gather(dfm, ID)    
10 |   #colnames(dfm) <- c("Taxon", "Sample", "Abundance")
11 |   #dfm$Abundance <- as.numeric(as.character(dfm$Abundance))  
12 |   #dfm$Sample <- factor(as.character(dfm$Sample), levels = sort.samples)
13 |   #dfm$ID <- factor(as.character(dfm$ID), levels = sort.taxa)  
14 | 


--------------------------------------------------------------------------------
/archive/misc.Rmd:
--------------------------------------------------------------------------------
 1 | biocLite("devtools")
 2 | biocLite("phyloseq")
 3 | biocLite("ggplot2")
 4 | biocLite("dplyr")
 5 | biocLite("reshape2")
 6 | biocLite("tidyr")
 7 | biocLite("vegan")
 8 | biocLite("knitr")
 9 | biocLite("knitcitations")
10 | biocLite("compositions")
11 | biocLite("qvalue")
12 | biocLite("rmarkdown")
13 | 
14 | 


--------------------------------------------------------------------------------
/archive/multimodality_phyloseq.R:
--------------------------------------------------------------------------------
 1 | #' @title Phyloseq Multimodality Test
 2 | #' @description Calculate multimodality score based on bootstrapped
 3 | #' potential analysis.
 4 | #' @param x Phyloseq object
 5 | #' @param detection Mode detection
 6 | #' @param bw.adjust Bandwidth adjustment
 7 | #' @param bs.iterations Bootstrap iterations
 8 | #' @param detection.limit minimum accepted density for a maximum;
 9 | #'        as a multiple of kernel height
10 | #' @param verbose Verbose
11 | #' @return A list with following elements
12 | #'   \item{score}{Fraction of bootstrap samples where multiple modes
13 | #'                are observed}
14 | #'   \item{nmodes}{The most frequently observed number of modes in bootstrap
15 | #'                 sampling results}
16 | #'   \item{results}{Full results of potential_analysis_bootstrap for each
17 | #'                  row of the input matrix}
18 | #' @details Repeats potential analysis (Livina et al. 2010) multiple times
19 | #' with bootstrap sampling for each row of the input data
20 | #'  (as in Lahti et al. 2014) and returns the specified results.
21 | #' @export
22 | #' @author Leo Lahti \email{leo.lahti@@iki.fi}
23 | #' @examples 
24 | #'   data(peerj32)
25 | #'   s <- multimodality_score(
26 | #'            t(peerj32$microbes[, c("Akkermansia", "Dialister")]))
27 | #' @references
28 | #'  \itemize{
29 | #'   \item{}{Livina et al. (2010). Potential analysis reveals changing number
30 | #'           of climate states during the last 60 kyr.
31 | #'           \emph{Climate of the Past}, 6, 77-82.}
32 | #'   \item{}{Lahti et al. (2014). Tipping elements of the human intestinal
33 | #'           ecosystem. \emph{Nature Communications} 5:4344.}
34 | #'  }
35 | #' @keywords utilities
36 | multimodality_phyloseq <- function (x, detection = 1, bw.adjust = 1,
37 |                                     bs.iterations = 100, detection.limit = 1,
38 | 				    verbose = TRUE) {
39 |   x <- abundances(transform_phyloseq(x, "log10"))
40 |   msc <- multimodality_score(x, detection,
41 |       	   bw.adjust, bs.iterations, detection.limit, verbose)
42 |   msc
43 |   
44 | }
45 | 
46 | 


--------------------------------------------------------------------------------
/archive/neatsort.R:
--------------------------------------------------------------------------------
 1 | #' @title Neatmap Ordering for Matrix Features or Samples
 2 | #' @description Order features or samples based on the neatmap approach.
 3 | #' @param x A matrix.
 4 | #' @param arrange Order "features" or "samples". For matrices, it is assumed that the samples are on the columns and features are on the rows. For phyloseq objects, features are the taxa of the OTU table.
 5 | #' @param method Ordination method. Only NMDS implemented for now.
 6 | #' @param distance Distance method. See \code{\link{vegdist}} function from the \pkg{vegan} package.
 7 | #' @param first Optionally provide the name of the first sample (or feature) to start the ordering (the ordering is cyclic so we can start at any point). The choice of the first sample/feature may somewhat affect the overall ordering.
 8 | #' @param ... Arguments to pass.
 9 | #' @return Vector of ordered elements
10 | #' @export
11 | #' @examples \dontrun{
12 | #'   data(peerj32)
13 | #'   x <- peerj32$microbes
14 | #'   x <- neatsort(x, "features", method = "NMDS", distance = "bray")
15 | #'                   }
16 | #' @references This function is partially based on code derived from the \pkg{phyloseq} package. However for the original
17 | #'   neatmap approach for heatmap sorting, see (and cite):
18 | #'   Rajaram, S., & Oono, Y. (2010). NeatMap--non-clustering heat map alternatives in R. BMC Bioinformatics, 11, 45.
19 | #'
20 | #' @details This function borrows elements from the heatmap implementation in the \pkg{phyloseq} package. The row/column sorting is there
21 | #' not available as a separate function at present, however, hindering reuse in other tools. This function provides an independent
22 | #' method for easy row/column reordering for matrices. This a quick hack and using the ordination could be expanded further
23 | #' (now only NMDS is available, and the sorting is done independently for features and samples).
24 | #' @keywords utilities
25 | neatsort <- function (x, arrange, method = "NMDS", distance = "bray", first = NULL, ...) {
26 | 
27 |   if (arrange == "samples") {
28 |     x <- t(x)
29 |   }
30 |   
31 |   # Neatmap sorting for matrices with NMDS 
32 |   d <- vegdist(x, distance)
33 | 
34 |   # Order		     
35 |   # Capture the output to keep the screen clean
36 |   junk <- capture.output(
37 |     ord <- metaMDS(d, wascores = FALSE, autotransform = FALSE, noshare = FALSE), file=NULL)
38 | 
39 | 
40 |   # Order items with the  NeatMap procedure
41 |   # Reorder by the angle in radial coordinates on the 2-axis plane.
42 |   DF <- NULL
43 | 
44 |   # Define new sample ordering based on the ordination
45 |   tmp <- try({
46 |     DF <- scores(ord, choices = c(1, 2), display = "sites")}, silent = TRUE)
47 | 
48 |   if(inherits(tmp, "try-error")){
49 |     warning(paste("Order failed. Using default ordering.", sep = ""))
50 |   }
51 |   
52 |   if(!is.null(DF)){
53 |     # If the score accession worked, replace order
54 |     ordering <- rownames(x)[order(radial_theta(DF))] 
55 |   }
56 |   # Determine the starting item (OTU or sample)
57 |   if( !is.null(first) ){
58 |     ordering <- chunk_reorder(ordering, first)
59 |   }
60 | 
61 |   ordering
62 |   
63 | }
64 | 
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/archive/nets.R:
--------------------------------------------------------------------------------
1 | ```{r networks3, warning=FALSE, message=FALSE}
2 | theme_set(theme_bw(20))
3 | p <- plot_net(pseq, maxdist = 0.2,
4 |               shape = "group", color = "nationality",
5 | 	      distance = "bray", laymeth = "auto") +
6 |      scale_colour_brewer(palette = "Accent")
7 | print(p)		 
8 | ```
9 | 


--------------------------------------------------------------------------------
/archive/nets.Rmd:
--------------------------------------------------------------------------------
 1 | ### Taxonomic network reconstruction 
 2 | 
 3 | ```{r networks1, message=FALSE, warning=FALSE}
 4 | pseq <- transform(atlas1006, "compositional")
 5 | 
 6 | # Keep only the prevalent taxa 
 7 | pseq <- core(pseq, detection = 0.2/100, prevalence = 80/100)
 8 | pseq <- subset_samples(pseq, DNA_extraction_method == "r")
 9 | ```
10 | 
11 | The compositionality bias can be fixed with SpiecEasi or SparCC; the
12 | implementations are available via the [SpiecEasi
13 | package](https://github.com/zdk123/SpiecEasi). Note that the execution
14 | is slow.
15 | 
16 | 
17 | ```{r spieceasi, warning=FALSE, message=FALSE, fig.width=10, fig.height=10, eval=TRUE}
18 | library(SpiecEasi) #install_github("zdk123/SpiecEasi")
19 | library(phyloseq)
20 | 
21 | # Pick the OTU table
22 | otu <- abundances(pseq)
23 | net <- spiec.easi(t(otu), method='mb', icov.select.params=list(rep.num=100))
24 | 
25 | ## Create graph object
26 | n <- net$refit
27 | colnames(n) <- rownames(n) <- rownames(otu)
28 | 
29 | # Network Format
30 | library(network)
31 | netw <- network(as.matrix(n), directed = FALSE)
32 | ```
33 | 
34 | 
35 | Visualize the network with [ggnet2](https://briatte.github.io/ggnet):
36 | 
37 | ```{r networks5, warning=FALSE, message=FALSE, fig.width=12, fig.height=7}
38 | library(GGally)
39 | library(ggnet)
40 | library(network)
41 | library(sna)
42 | library(ggplot2)
43 | library(intergraph) # ggnet2 works also with igraph with this
44 | 
45 | phyla <- map_levels(rownames(otu),
46 |            from = "Genus", to = "Phylum",
47 |            tax_table(pseq))
48 | 
49 | netw %v% "Phylum" <- phyla
50 | p <- ggnet2(netw, color = "Phylum", label = TRUE, label.size = 2)
51 | print(p)
52 | ```
53 | 
54 | 
55 | 
56 | See the [phyloseq tutorial](http://joey711.github.io/phyloseq/plot_network-examples) for
57 | additional network visualization tools.
58 | 
59 | 


--------------------------------------------------------------------------------
/archive/peerj32_meta.csv:
--------------------------------------------------------------------------------
 1 | time	gender	subject	sample	group
 2 | sample-1	1	F	S1	sample-1	Placebo
 3 | sample-2	2	F	S1	sample-2	Placebo
 4 | sample-3	1	F	S2	sample-3	Placebo
 5 | sample-4	2	F	S2	sample-4	Placebo
 6 | sample-5	1	F	S3	sample-5	LGG
 7 | sample-6	2	F	S3	sample-6	LGG
 8 | sample-7	1	M	S4	sample-7	Placebo
 9 | sample-8	2	M	S4	sample-8	Placebo
10 | sample-9	1	F	S5	sample-9	Placebo
11 | sample-10	2	F	S5	sample-10	Placebo
12 | sample-11	1	F	S6	sample-11	LGG
13 | sample-12	2	F	S6	sample-12	LGG
14 | sample-13	1	F	S7	sample-13	Placebo
15 | sample-14	2	F	S7	sample-14	Placebo
16 | sample-15	1	F	S8	sample-15	LGG
17 | sample-16	2	F	S8	sample-16	LGG
18 | sample-17	1	M	S9	sample-17	Placebo
19 | sample-18	2	M	S9	sample-18	Placebo
20 | sample-19	1	F	S10	sample-19	LGG
21 | sample-20	2	F	S10	sample-20	LGG
22 | sample-21	1	F	S11	sample-21	LGG
23 | sample-22	2	F	S11	sample-22	LGG
24 | sample-23	1	F	S12	sample-23	Placebo
25 | sample-24	2	F	S12	sample-24	Placebo
26 | sample-25	1	M	S13	sample-25	LGG
27 | sample-26	2	M	S13	sample-26	LGG
28 | sample-27	1	F	S14	sample-27	Placebo
29 | sample-28	2	F	S14	sample-28	Placebo
30 | sample-29	1	F	S15	sample-29	Placebo
31 | sample-30	2	F	S15	sample-30	Placebo
32 | sample-31	1	F	S16	sample-31	Placebo
33 | sample-32	2	F	S16	sample-32	Placebo
34 | sample-33	1	M	S17	sample-33	Placebo
35 | sample-34	2	M	S17	sample-34	Placebo
36 | sample-35	1	M	S18	sample-35	Placebo
37 | sample-36	2	M	S18	sample-36	Placebo
38 | sample-37	1	M	S19	sample-37	LGG
39 | sample-38	2	M	S19	sample-38	LGG
40 | sample-39	1	F	S20	sample-39	Placebo
41 | sample-40	2	F	S20	sample-40	Placebo
42 | sample-41	1	M	S21	sample-41	LGG
43 | sample-42	2	M	S21	sample-42	LGG
44 | sample-43	1	F	S22	sample-43	Placebo
45 | sample-44	2	F	S22	sample-44	Placebo
46 | 


--------------------------------------------------------------------------------
/archive/pet.R:
--------------------------------------------------------------------------------
 1 | library(microbiome)
 2 | 
 3 | # New pipeline
 4 | frpa1 <- read.profiling(data.dir = "PET", level = "species", method = "frpa")
 5 | frpa2 <- read.profiling(data.dir = "PET", level = "species", method = "sum")
 6 | 
 7 | # New pipeline 2
 8 | sum.new2 <- read_hitchip("PET", method = "sum")
 9 | frpa.new2 <- read_hitchip("PET", method = "frpa") 
10 | rpa.new2 <- read_hitchip("PET", method = "rpa") 
11 | 
12 | # Old pipeline
13 | frpa.old <- read.profiling("~/tmp/pet14/data", method = "frpa")
14 | sum.old <- read.profiling("~/tmp/pet14/data", method = "sum")
15 | 
16 | # Old and New match perfect at species level
17 | level <- "species"
18 | s <- colnames(sum.old[[level]])
19 | taxa <- intersect(rownames(sum.new[[level]]), rownames(sum.old[[level]]))
20 | plot(log10(unlist(sum.new[[level]][taxa,s])), log10(unlist(sum.old[[level]][taxa,s])))
21 | plot(log10(unlist(frpa.new[[level]][taxa,s])), log10(unlist(frpa.old[[level]][taxa,s])))
22 | 
23 | level <- "species"
24 | par(mfrow = c(2,2))
25 | plot(log10(unlist(sum.new[[level]])), log10(unlist(sum.new2$abundance.table)))
26 | plot(log10(unlist(frpa.new[[level]])), log10(unlist(frpa.new2$abundance.table)))
27 | plot(log10(unlist(frpa.new[[level]])), log10(unlist(sum.new2$abundance.table)))
28 | 
29 | # FIXME: make this work also with old output
30 | # Read and recalculate
31 | res <- read_hitchip("PET", method = "sum")
32 | pseq <- res$pseq
33 | otu <- res$abundance.table
34 | 
35 | #  pseq@otu_table
36 | dim(tax_glom(pseq, "L2")@otu_table)
37 | dim(tax_glom(pseq, "L1")@otu_table)
38 | 
39 | plot(unlist(log10(tax_glom(pseq, "species")@otu_table)), unlist(log10(pseq@otu_table)))
40 | sum.new <- read.profiling("PET", method = "sum")
41 | 
42 | 
43 | #---------------------------------------------------------------
44 | 
45 | sum.old <- read.profiling("~/tmp/pet14/data", method = "sum")
46 | sum.new <- read.profiling("PET", method = "sum")
47 | pseq0 <- read_hitchip("PET", method = "sum", detection.threshold = 0)$pseq
48 | pseq <- read_hitchip("PET", method = "sum", detection.threshold = 10^1.8)$pseq
49 | pseq.L20 <- aggregate_taxa(pseq0, level = "L2")
50 | pseq.L2 <- aggregate_taxa(pseq, level = "L2")
51 | 
52 | taxa <- intersect(rownames(L2), rownames(sum.new$L2))
53 | 
54 | coms <- colnames(sum.old$L2[taxa,])
55 | 
56 | old <- log10(sum.old$L2[taxa,coms])
57 | new.pre <- log10(sum.new$L2[taxa,coms])
58 | new0 <- log10(otu_table(pseq.L20)@.Data[taxa,coms])
59 | new.th <- log10(otu_table(pseq.L2)@.Data[taxa,coms])
60 | 
61 | par(mfrow = c(2,2))
62 | plot(unlist(old[, coms]), unlist(new0[, coms]))
63 | plot(unlist(old[, coms]), unlist(new.th[, coms]))
64 | plot(unlist(new.th[, coms]), unlist(new.pre[, coms]))
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/archive/pet2.R:
--------------------------------------------------------------------------------
 1 | library(microbiome)
 2 | 
 3 | # Old PET data, pre-calculated
 4 | d1 <- read.profiling(data.dir = "~/antagomir/pet14/data", level = "species", method = "frpa")
 5 | 
 6 | # New PET data, calculated on the fly
 7 | d2 <- read_hitchip(data.dir = "~/antagomir/pet14b/", method = "frpa")
 8 | d2 <- otu_table(d2$pseq)@.Data
 9 | 
10 | cr <- intersect(rownames(d1), rownames(d2))
11 | cc <- intersect(colnames(d1), colnames(d2))
12 | 
13 | plot(unlist(d1[cr, cc]), log10(unlist(d2[cr, cc])), pch = "."); abline(0,1)
14 | 
15 | cors <- c()
16 | for (i in 1:nrow(frpa2)) {
17 |   cors[[i]] <- cor(as.vector(log10(frpa2[cr, cc][i,])), as.vector(log10(frpa2b[cr, cc][i,])))
18 | }
19 | hist(cors)
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/archive/plot_diversity.R:
--------------------------------------------------------------------------------
  1 | #' @title Plot Indicators
  2 | #' @description Plot global indicators.
  3 | #' This function estimates a number of alpha-diversity metrics using the 
  4 | #' \code{\link{estimate_richness}} function,
  5 | #' and returns a \code{ggplot} object. 
  6 | #' The plot generated by this function will include every sample
  7 | #' in \code{physeq}, but they can be further grouped on the horizontal axis
  8 | #' through the argument to \code{x}, 
  9 | #' and shaded according to the argument to \code{color} (see below).
 10 | #' You must use untrimmed, non-normalized count data for meaningful results.
 11 | #' @param x \code{\link{phyloseq-class}} object
 12 | #' @param y A variable to map to the horizontal axis. The vertical
 13 | #'  axis will be mapped to the alpha diversity index/estimate
 14 | #'  and have units of total taxa, and/or index value (dimensionless).
 15 | #'  This parameter (\code{x}) is a character string indicating a
 16 | #'  in the dataset (nsamples(x)).
 17 | #' @param index Default is \code{NULL}. In this case
 18 | #'  all available alpha-diversity index will be included.
 19 | #'  Alternatively, you can specify one or more index
 20 | #'  as a character vector. Values must be among those supported:
 21 | #'  \code{c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher")}.
 22 | #' @param nrow Number of rows for plot faceting.
 23 | #' @param scales scales for the plot
 24 | #' @param indicate.subjects Indicate subjects by lines. The sample_data(x) must have 'subject' field.
 25 | #' @param na.rm Remove samples with missing metadata (NA)
 26 | #' @return A \code{\link{ggplot}} plot object summarizing
 27 | #'  the richness estimates, and their standard error.
 28 | #' @details If subject is among the metadata variables, the matched subjects across groups are indicated by lines.
 29 | #' @seealso 
 30 | #'  \code{\link{estimate_richness}}
 31 | #'  \code{\link{global}}
 32 | #'  \code{\link{plot_richness}}
 33 | #'  \code{\link[vegan]{estimateR}}
 34 | #'  \code{\link[vegan]{diversity}}
 35 | #' @export
 36 | #' @examples
 37 | #'   # Load gut microbiota data on 1006 western adults
 38 | #'   # (see help(atlas1006) for references and details)
 39 | #'   data(atlas1006)
 40 | #'   # Visualize Shannon diversity across bmi groups; remove cases with no bmi info
 41 | #'   x <- global(atlas1006)$diversities_shannon
 42 | #'   y <- meta(atlas1006)$bmi_group
 43 | #'   p <- plot_global(x, variable = y, na.rm = TRUE)
 44 | #' @keywords utilities
 45 | plot_global <- function(x, y, nrow = 1, scales = "free_y", indicate.subjects = FALSE, na.rm = FALSE){ 
 46 | 
 47 |   horiz <- subject <- NULL
 48 | 
 49 |   # Coerce to data.frame
 50 |   DF <- data.frame(x, y)
 51 | 
 52 |   mdf <- gather(DF, "key", "value", dplyr::ends_with(".index"))
 53 |   mdf$key <- gsub("\\.index$", "", mdf$key)
 54 |   mdf$horiz <- mdf[[variable]]
 55 | 
 56 |   if (na.rm) {
 57 |     mdf <- dplyr::filter(mdf, !is.na(horiz))
 58 |     if (nrow(mdf) == 0) {
 59 |       warning(paste("All values in", variable, "are NA. Returning NULL."))
 60 |       NULL
 61 |     }
 62 |   }
 63 | 
 64 |   # Make the ggplot  
 65 |   theme_set(theme_bw(20))
 66 | 
 67 |   if (is.factor(mdf$horiz)) {
 68 |   
 69 |     p <- ggplot(mdf, aes(x = horiz, y = value))
 70 |     
 71 |     p <- p + geom_boxplot(na.rm=TRUE)
 72 | 
 73 |     if (indicate.subjects) {
 74 |       p <- p + geom_point()
 75 |       p <- p + geom_line(aes(group = subject))
 76 |     }
 77 |     
 78 |     # Rotate horizontal axis labels, and adjust
 79 |     p <- p + theme(axis.text.x=element_text(angle=-90, vjust=0.5, hjust=0))
 80 | 	
 81 |     # Facet wrap using user-options
 82 |     p <- p + facet_wrap(~key, nrow = nrow, scales = scales)
 83 | 	
 84 |   } else if (is.vector(mdf$horiz)) {
 85 | 
 86 |     if (length(measures) > 1) {
 87 |       stop(paste("The horizontal variable", variable, "is numeric. Provide a single diversity measure."))
 88 |     }
 89 | 
 90 |     p <- plot_regression(value~horiz, mdf)
 91 |     p <- p + xlab(variable) 
 92 | 
 93 |   }
 94 | 
 95 |   # Add y-label and title
 96 |   p <- p + ylab('Diversity') 
 97 | 
 98 |   p
 99 | 
100 | }
101 | 


--------------------------------------------------------------------------------
/archive/plot_taxa_prevalence.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Plot Taxa Prevalence"
 3 | output: 
 4 |   rmarkdown::html_vignette
 5 | ---
 6 | <!--
 7 |   %\VignetteEngine{knitr::rmarkdown}
 8 |   %\VignetteIndexEntry{microbiome tutorial - Plot Taxa Prevalence}
 9 |   %\usepackage[utf8]{inputenc}
10 |   %\VignetteEncoding{UTF-8}  
11 | -->
12 | 
13 | 
14 | ## Load data
15 | 
16 | Load [example data](Data.md):  
17 | For this example we will use data from [Halfvarson J., et al. Nature Microbiology, 2017](http://www.nature.com/articles/nmicrobiol20174). It was downloaded from [Qitta](https://qiita.ucsd.edu/study/description/1629).  
18 | 
19 | 
20 | ```{r}
21 | library(microbiome)
22 | data(DynamicsIBD)
23 | p0 <- DynamicsIBD
24 | ```
25 | 
26 | ## Plot Taxa Prevalence
27 | 
28 | Phylum level:
29 | 
30 | ```{r, fig.height=6, fig.width=8}
31 | p0.f <- format_phyloseq(p0)
32 | p <- plot_taxa_prevalence(p0.f, 'Phylum')
33 | print(p)
34 | ```
35 | 
36 | You can also plot the prevalence at Order/Class/Family level.  
37 | 
38 | 


--------------------------------------------------------------------------------
/archive/project.data.R:
--------------------------------------------------------------------------------
 1 | #' @title project.data
 2 | #' @description Project high-dimensional data on two-dimensional plane
 3 | #'              by various methods
 4 | #' 
 5 | #' @param amat data matrix (samples x features)
 6 | #' @param type projection type 
 7 | #'           (options: PCA, MDS.classical, MDS.nonmetric, Sammon)
 8 | #'
 9 | #' @return projected data matrix
10 | #'
11 | #' @export
12 | #' @importFrom MASS isoMDS
13 | #' @importFrom MASS sammon
14 | #' @importFrom mixOmics spca
15 | #'
16 | #' @examples 
17 | #'   data(peerj32)
18 | #'   xy <- project.data(peerj32$microbes[,1:3])
19 | #'
20 | #' @references 
21 | #'    
22 | #'    D. Hand and H. Mannila and P. Smyth: 
23 | #'    Principles of Data Mining. MIT Press. Cambridge, MA, US (2001).
24 | #'    
25 | #'    To cite microbiome R package, see citation('microbiome') 
26 | #'
27 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
28 | #' @keywords utilities
29 | 
30 | project.data <- function(amat, type = "PCA") {
31 |     
32 |     if (type == "PCA") {
33 |         if (nrow(amat) < ncol(amat)) {
34 |             
35 |             message("More samples than features, using sparse PCA")
36 |             
37 |             ## Spca example: we are selecting 50 variables on each of the PCs
38 |             result <- spca(amat, ncomp = 2, center = TRUE, scale = TRUE, 
39 |                            keepX = rep(50, 2))
40 |             scores <- result$x
41 |         } else {
42 |             message("PCA")
43 |             pca <- princomp(amat)  # Classical PCA
44 |             scores <- pca$scores
45 |         }
46 |         tab <- data.frame(scores[, 1:2])
47 |         rownames(tab) <- rownames(amat)
48 |     } else if (type == "Sammon") {
49 |         
50 |         d <- as.dist(1 - cor(t(amat)))
51 |         # This gave the clearest visualization.  
52 |         # Tuning magic parameter could still
53 |         # improve.  Try for instance magic = 0.05.
54 |         fit <- sammon(d, k = 2)
55 |         # Plot solution
56 |         tab <- data.frame(list(Comp.1 = fit$points[, 1], 
57 |                                Comp.2 = fit$points[, 2]))
58 |         rownames(tab) <- rownames(amat)
59 |     } else if (type == "MDS.classical") {
60 |         d <- as.dist(1 - cor(t(amat)))
61 |         fit <- cmdscale(d, eig = TRUE, k = 2)  # classical MDS
62 |         tab <- data.frame(list(Comp.1 = fit$points[, 1], 
63 |                                Comp.2 = fit$points[, 2]))
64 |     } else if (type == "MDS.nonmetric") {
65 |         d <- as.dist(1 - cor(t(amat)))
66 |         fit <- isoMDS(d, k = 2)  # nonmetric MDS
67 |         tab <- data.frame(list(Comp.1 = fit$points[, 1], 
68 |                                Comp.2 = fit$points[, 2]))
69 |     }
70 |     
71 |     # TODO Kernel-PCA kpc <- kpca(~., data=as.data.frame(x.train),
72 |     # kernel='rbfdot', features = 2) Print the principal component
73 |     # vectors pcv(kpc) Plot the data projection on the components
74 |     # par(mfrow=c(2,2)) plot(rotated(kpc), col =
75 |     # as.integer(as.factor(ann[rownames(x.train),'time'])), xlab='1st
76 |     # Principal Component', ylab='2nd Principal Comp onent')
77 |     # plot(rotated(kpc), col =
78 |     # as.integer(as.factor(ann[rownames(x.train),'lipids.group'])),
79 |     # xlab='1st Principal Component', ylab='2nd Principal Component')
80 |     # embed remaining points emb <- predict(kpc, x.test)
81 |     # plot(rotated(kpc), col =
82 |     # as.integer(as.factor(ann[rownames(x.train),'lipids.group'])),
83 |     # xlab='1st Principal Component', ylab='2nd Principal Component')
84 |     # points(emb, col =
85 |     # as.integer(as.factor(ann[rownames(x.train),'lipids.group'])))
86 |     
87 |     colnames(tab) <- c("Comp.1", "Comp.2")
88 |     
89 |     tab
90 | }


--------------------------------------------------------------------------------
/archive/qiita1629.biom:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/archive/qiita1629.biom


--------------------------------------------------------------------------------
/archive/rare_abundance.R:
--------------------------------------------------------------------------------
 1 | #' @title Rare Abundance
 2 | #' @description Calculate total rare abundance
 3 | #' @param x \code{\link{phyloseq-class}} object
 4 | #' @param detection Detection threshold (non-negative real)
 5 | #' @param prevalence Prevalence threshold (in [0, 100])
 6 | #' @return Total rare abunance vector.
 7 | #' @examples
 8 | #'  data(dietswap)
 9 | #'  a <- rare_abundance(dietswap)
10 | #' @export
11 | #' @references See citation("microbiome")
12 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
13 | #' @keywords utilities
14 | rare_abundance <- function(x, detection = 1, prevalence = 95) {
15 | 	      
16 |   rare.taxa <- rare_members(x, detection = detection,
17 |   	               prevalence = prevalence)
18 | 
19 |   # Rare matrix
20 |   # FIXME: directly use the rare function
21 |   xx <- abundances(prune_taxa(rare.taxa, x))
22 |   xx <- matrix(xx, nrow = length(rare.taxa))
23 |   
24 |   # Total sum of rare abundances
25 |   ab <- colSums(xx, na.rm = TRUE)
26 | 
27 |   ab
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/archive/rare_members.R:
--------------------------------------------------------------------------------
 1 | #' @title Rare Taxa
 2 | #' @description Determine members of the rare microbiota with given abundance
 3 | #''             and prevalences.
 4 | #' @param x \code{\link{phyloseq-class}} object
 5 | #' @param detection Detection threshold (non-negative real)
 6 | #' @param prevalence Prevalence threshold (in [0, 100])
 7 | #' @param sort Logical. Sort the taxa.
 8 | #' @return Vector of rare members
 9 | #' @details For phyloseq object, lists taxa that are more prevalent with the
10 | #'   given detection. For matrix, lists columns that satisfy
11 | #'   these criteria.
12 | #' @examples
13 | #'   data(dietswap)
14 | #'   a <- rare_members(dietswap, 1, 95)
15 | #' @export
16 | #' @references 
17 | #'   To cite the microbiome R package, see citation('microbiome') 
18 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
19 | #' @keywords utilities
20 | rare_members <- function(x, detection = 1, prevalence = 95,
21 |                     sort = TRUE)  {
22 | 
23 |   # First identify core based on these criteria
24 |   cm <- core_members(x, detection, prevalence, sort)
25 | 
26 |   # Then pick rare taxa as those that do not belong to core
27 |   # ie have prevalence below the given threshold at the given
28 |   # detection limit
29 |   taxa <- setdiff(taxa(x), cm)
30 | 
31 |   if (sort) {
32 |     taxa <- sort(taxa)
33 |   }
34 | 
35 |   taxa
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/archive/rare_richness.R:
--------------------------------------------------------------------------------
 1 | #' @title Rare Richness
 2 | #' @description Calculate total rare richness
 3 | #' @param x \code{\link{phyloseq-class}} object
 4 | #' @param detection Detection threshold (non-negative real)
 5 | #' @param prevalence Prevalence threshold (in [0, 100])
 6 | #' @return Total rare abundance vector.
 7 | #' @examples
 8 | #'  data(dietswap)
 9 | #'  a <- rare_richness(dietswap)
10 | #' @export
11 | #' @references See citation("microbiome")
12 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
13 | #' @keywords utilities
14 | rare_richness <- function(x, detection = 1, prevalence = 95) {
15 | 	      
16 |   rare.taxa <- rare_members(x, detection = detection,
17 |   	               prevalence = prevalence)
18 | 
19 |   # Rare matrix
20 |   # FIXME: directly use the rare function
21 |   xx <- abundances(prune_taxa(rare.taxa, x))
22 |   xx <- matrix(xx, nrow = length(rare.taxa))
23 | 
24 |   # Rare taxa richness in each sample  
25 |   ab <- colSums(xx > detection, na.rm = TRUE)
26 | 
27 |   ab
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/archive/rda.R:
--------------------------------------------------------------------------------
 1 | #' @title RDA for phyloseq objects
 2 | #' @description RDA for phyloseq objects based on the \code{\link{rda}}
 3 | #'    function from the \pkg{vegan} package.
 4 | #' @param x \code{\link{phyloseq-class}} object
 5 | #' @param y Variable to apply in RDA visualization.
 6 | #' @param scale See help(rda)
 7 | #' @param na.action See help(rda)
 8 | #' @param ... Other arguments to be passed
 9 | #' @return rda result. See help(vegan::rda)
10 | #' @export
11 | #' @examples
12 | #'   data(peerj32) # Data from https://peerj.com/articles/32/
13 | #'   pseq <- peerj32$phyloseq
14 | #'   pseq.trans <- transform(pseq, "hell") # Hellinger transform
15 | #'   # rda.result <- rda_pseq(pseq.trans, "time", scale = TRUE)
16 | #' @references See citation("microbiome") 
17 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
18 | #' @keywords utilities
19 | rda_pseq <- function (x, y, scale = FALSE, na.action = na.fail, ...) {
20 | 
21 |   # Microbiota profiling data (44 samples x 130 bacteria)
22 |   otu <- t(abundances(x))
23 | 
24 |   # Pick the indicated annotation field
25 |   if (!y %in% sample_variables(x)) {    
26 |       stop(paste("The variable y ('", y, "') is not available in the phyloseq object i.e. sample_data(x). Only use variables listed in sample_variables(x) ie. o
27 | ne of the following: ", paste(names(sample_data(x)), collapse = " / "), sep = ""))
28 |    }
29 | 
30 |   if (!"sample" %in% sample_variables(x)) {
31 |     sample_data(x)$sample <- rownames(sample_data(x))
32 |   }
33 | 
34 |   annot <- factor(sample_data(x)[[y]])
35 |   names(annot) <- sample_data(x)$sample
36 | 
37 |   r <- vegan::rda(otu ~ annot, scale = scale, na.action = na.action) 
38 |   
39 |   r
40 | 
41 | }
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/archive/read.profiling.R:
--------------------------------------------------------------------------------
 1 | #' @title Read profiling
 2 | #' @description Read run.profiling.script output into R
 3 | #' @param level phylogenetic level ('oligo' / 'species' / 'L1' / 'L2' / 'L0') 
 4 | #' 	  	or 'phylogeny.full', 'phylogeny.filtered'
 5 | #' @param method ('frpa' / 'rpa' / 'sum' / 'ave')
 6 | #' @param data.dir Profiling script output directory for reading the data. 
 7 | #'                 If not given, GUI will ask to specify the file and 
 8 | #'             	   overruns the possible level / method arguments in the 
 9 | #'             	   function call.
10 | #' @param log10 Logical. Logarithmize the data TRUE/FALSE. 
11 | #'              By default, the data is in original non-log scale.
12 | #' @param impute impute missing oligo signals
13 | #' @return data matrix (phylo x samples)
14 | #' @export
15 | #' @examples
16 | #'  \dontrun{
17 | #'   data.dir <- system.file("extdata", package = "microbiome")
18 | #'   dat <- read.profiling('L1', 'frpa', data.dir = data.dir)
19 | #'  }
20 | #' @references See citation('microbiome')
21 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
22 | #' @keywords utilities
23 | 
24 | read.profiling <- function(level = NULL, method = "frpa", data.dir, 
25 |                          log10 = TRUE, impute = TRUE) {
26 | 
27 |     .Deprecated("read_hitchip")
28 |     
29 |     # level <- 'oligo'; method = 'sum'; data.dir = 'test/'; log10 = TRUE
30 |     if (level %in% c("L0", "L1", "L2", "species")) {
31 |         if (method == "frpa" && length(grep(method, dir(data.dir))) == 0) {
32 |             warning("frpa method not available; using rpa instead")
33 |             method <- "rpa"
34 |         }
35 |         f <- paste(data.dir, "/", level, "-", method, ".tab", sep = "")
36 | 
37 |     } else if (level == "oligo") {
38 |         f <- paste(data.dir, "/oligoprofile.tab", sep = "")
39 |     } else if (level == "phylogeny.full") {
40 |         f <- paste(data.dir, "/phylogeny.full.tab", sep = "")
41 |     } else if (level %in% c("phylogeny.filtered")) {
42 |         f <- paste(data.dir, "/phylogeny.filtered.tab", sep = "")
43 |     } else if (level %in% c("phylogeny.info", "phylogeny.full", "phylogeny")) {
44 |         f <- paste(data.dir, "/phylogeny.full.tab", sep = "")
45 |     }
46 |     
47 |     
48 |     message(paste("Reading", f))
49 |     
50 |     if (level %in% c("L0", "L1", "L2", "species")) {
51 |         
52 |         tab <- read.csv(f, header = TRUE, sep = "\t", row.names = 1, 
53 |                         as.is = TRUE)
54 |         colnames(tab) <- unlist(strsplit(readLines(f, 1), "\t"))[-1]
55 |         
56 |     } else if (level == "oligo") {
57 |         
58 |         tab <- read.csv(f, header = TRUE, sep = "\t", row.names = 1, 
59 |                         as.is = TRUE)
60 |         colnames(tab) <- unlist(strsplit(readLines(f, 1), "\t"))[-1]
61 |         
62 |     } else if (length(grep("phylogeny", level)) > 0) {
63 |         
64 |         tab <- read.csv(f, header = TRUE, sep = "\t", as.is = TRUE)
65 |         
66 |     }
67 |     
68 |     # Convert to numeric
69 |     if (level %in% c("oligo", "species", "L0", "L1", "L2")) {
70 |         
71 |         rnams <- rownames(tab)
72 |         cnams <- colnames(tab)
73 |         tab <- apply(tab, 2, as.numeric)
74 |         rownames(tab) <- rnams
75 |         colnames(tab) <- cnams
76 |         
77 |     }
78 |     
79 |     if (impute && any(is.na(tab))) {
80 |         warning(paste("The matrix has ", sum(is.na(tab)), 
81 |                       " missing values \n                             
82 |                       - imputing."))
83 |         tab <- 10^t(impute(t(log10(tab))))
84 |     }
85 |     
86 |     if (log10 && (level %in% c("oligo", "species", "L0", "L1", "L2"))) {
87 |         message("Logarithmizing the data")
88 |         tab <- log10(tab)
89 |     }
90 |     
91 |     tab
92 |     
93 | } 
94 | 
95 | 


--------------------------------------------------------------------------------
/archive/read_profiling.R:
--------------------------------------------------------------------------------
 1 | #' Read HITChip run.profiling.script output into R
 2 | #'
 3 | #' @param data.dir Profiling script output directory for reading the data. 
 4 | #'                 If not given, GUI will ask to specify the file and 
 5 | #'             	   overruns the possible level / method arguments in the 
 6 | #'             	   function call.
 7 | #' @param method Select the preprocessing method that you like to check
 8 | #' @param verbose verbose
 9 | #' 
10 | #' @return data matrix (phylo x samples)
11 | #'
12 | #' @examples 
13 | #'   # data.dir <- system.file("extdata", package = "microbiome")
14 | #'   # dat <- read_profiling(data.dir)
15 | #'
16 | #' @references See citation('microbiome')
17 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
18 | #' @keywords utilities
19 | read_profiling <- function(data.dir, method, verbose = TRUE) {
20 | 
21 |   if (verbose) {message(paste("Reading Chip data from", data.dir))}
22 | 
23 |   results <- list()
24 | 
25 |   # Read probe-level data
26 |   f <- paste(data.dir, "/oligoprofile.tab", sep = "")
27 |   tab <- read.csv(f, header = TRUE, sep = "\t", row.names = 1, as.is = TRUE)
28 |   colnames(tab) <- unlist(strsplit(readLines(f, 1), "\t"))[-1]
29 |   results[["probedata"]] <- tab
30 | 
31 |   # Read abundance tables
32 |   #for (s in c("L0", "L1", "L2", "species")) {
33 |   #  f <- paste(data.dir, "/", s, "-", method, ".tab", sep = "")
34 |   #  if (file.exists(f)) {
35 |   #    tab <- read.csv(f, header = TRUE, sep = "\t", row.names = 1, as.is = TRUE)
36 |   #    colnames(tab) <- unlist(strsplit(readLines(f, 1), "\t"))[-1]
37 |   #    results[[s]] <- tab
38 |   #  }
39 |   #}
40 |   
41 |   # Read taxonomy table
42 |   f <- paste(data.dir, "/taxonomy.tab", sep = "")
43 |   if (!file.exists(f)) {
44 |     # Old outputs had this name
45 |     f <- paste(data.dir, "/phylogeny.filtered.tab", sep = "")    
46 |   }
47 |   taxonomy <- read.csv(f, header = TRUE, sep = "\t", as.is = TRUE)
48 |   results[["taxonomy"]] <- taxonomy
49 |   
50 |   # Read unfiltered taxonomy table
51 |   f <- paste(data.dir, "/taxonomy.full.tab", sep = "")
52 |   if (!file.exists(f)) {
53 |     # Old outputs had this name
54 |     f <- paste(data.dir, "/phylogeny.full.tab", sep = "")    
55 |   }
56 |   taxonomy.full <- read.csv(f, header = TRUE, sep = "\t", as.is = TRUE)
57 |   results[["taxonomy.full"]] <- taxonomy.full
58 | 
59 |   # Read sample metadata      
60 |   f <- paste(data.dir, "/meta.tab", sep = "")
61 |   if (file.exists(f)) {
62 |     tab <- read.csv(f, header = TRUE, sep = "\t", as.is = TRUE)
63 |     rownames(tab) <- tab$sample
64 |     meta <- tab
65 |     results[["meta"]] <- meta
66 |   }
67 | 
68 |   results
69 |     
70 | } 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/archive/reading.R:
--------------------------------------------------------------------------------
 1 | library(microbiome)
 2 | #library(phyloseq)
 3 | #fs <- list.files("~/microbiome/R/", full.names = TRUE)
 4 | #for (f in fs)  { source(f) }
 5 | 
 6 | ressum <- read_hitchip("output", method = "sum")
 7 | resrpa <- read_hitchip("output", method = "rpa")
 8 | resfrpa <- read_hitchip("output", method = "frpa")
 9 | 
10 | # Read precalculated
11 | res.frpa <- read_profiling("output", method = "frpa")
12 | res.rpa <- read_profiling("output", method = "rpa")
13 | res.sum <- read_profiling("output", method = "sum")
14 | 
15 | # Check compatibility with old function
16 | # frpa <- read.profiling(level = "L2", method = "frpa", data.dir = "output", log10 = TRUE, impute = TRUE) 
17 | 
18 | # Read and recalculate
19 | res <- read_hitchip("output", method = "sum")
20 | 
21 | res1 <- res.sum$species
22 | res2 <- res$abundance.table
23 | res3 <- res.rpa$species
24 | plot(log10(unlist(res1)), log10(unlist(res2)))
25 | plot(log10(unlist(res1)), log10(unlist(res3)))
26 | 
27 | 
28 | res1 <- res.rpa
29 | res2 <- resrpa
30 | plot(log10(unlist(res1$species)), log10(unlist(res2$abundance.table)))
31 | 
32 | 


--------------------------------------------------------------------------------
/archive/save.phylogeny.R:
--------------------------------------------------------------------------------
 1 | message("Fetching Phylogeny from the database")
 2 | library(microbiome)
 3 | library(HITChipDB)
 4 | dbuser = "root"; dbpwd = "fidipro"; dbname = "phyloarray"; host = '127.0.0.1'; port = 3307
 5 | 
 6 | message("Fetching Phylogeny from the database")
 7 | phylogeny.full <- get.phylogeny.info("16S", 
 8 | 	    		     dbuser = dbuser, 
 9 | 			     dbpwd = dbpwd, 
10 | 			     dbname = dbname, 
11 | 			     host = host, 
12 | 			     port = port,
13 | 			     chip = "HITChip")
14 | 
15 | phylogeny.filtered.orig <- GetPhylogeny("HITChip", "filtered")
16 | 
17 | # Fix taxon name
18 | phylogeny.full$L2 <- gsub("^Clostridia$", "Clostridium (sensu stricto)", as.character(phylogeny.full$L2))
19 | 
20 | # This handles also pmTm, complement and mismatch filtering
21 | # This is the phylogeny used in probe summarization into taxonomic levels
22 | rm.phylotypes <- phylotype.rm.list("HITChip") 
23 | rm.oligos <- sync.rm.phylotypes(rm.phylotypes, phylogeny.full)$oligos
24 | 
25 | phylogeny.filtered <- prune16S(phylogeny.full, pmTm.margin = 2.5, complement = 1, mismatch = 0, rmoligos = rm.oligos, remove.nonspecific.oligos = FALSE) # remove.nonspecific.oligos refers to probes with multiple L2 groups
26 | 
27 | # Remove probes that target multiple L1 groups
28 | lmap <- levelmap(NULL, level.from = "oligoID", level.to = "L1", phylogeny.info = phylogeny.filtered)
29 | hits <- sapply(lmap, length)
30 | ambiguous.l1.probes <- unique(names(which(hits > 1)))
31 | 
32 | phylogeny.filtered <- subset(phylogeny.filtered, !oligoID %in% ambiguous.l1.probes)
33 | 
34 | # Keep only relevant cols
35 | phylogeny.full <- phylogeny.full[, 1:6]; 
36 | phylogeny.filtered <- phylogeny.filtered[, 1:6]; 
37 | 
38 | # Remove duplicate rows
39 | phylogeny.full <- phylogeny.full[!duplicated(phylogeny.full),]
40 | phylogeny.filtered <- phylogeny.filtered[!duplicated(phylogeny.filtered),]
41 |  
42 | # Write to file
43 | write.table(phylogeny.filtered, file = "../extdata/phylogeny.filtered.tab", quote = F, row.names = F, sep = "\t")
44 | 
45 | 


--------------------------------------------------------------------------------
/archive/subject_tables.R:
--------------------------------------------------------------------------------
 1 | subject_tables <- function (x, meta) {
 2 | 
 3 |   stop("TODO: replaced now with timesort_subjects.R")
 4 | 
 5 |   # Focus on the signal from specific taxon
 6 |   meta$signal <- x
 7 | 
 8 |   # Pick data for each subject separately
 9 |   spl <- split(meta, as.character(meta$subject))
10 | 
11 |   tabs <- list()
12 |   cnt <- 0 
13 |   for (subj in names(spl)) {
14 | 
15 |     times <- as.numeric(spl[[subj]]$time)
16 |     signal <- as.numeric(spl[[subj]]$signal)
17 |     mintime <- which.min(times)
18 |     
19 |     # Shift in time from first time point
20 |     spl[[subj]]$time <- (times - times[[mintime]])
21 |     
22 |     # Shift in signal from first time point    		      
23 |     spl[[subj]]$shift <- (signal - signal[[mintime]])
24 | 
25 |     # Store
26 |     tabs[[subj]] <- spl[[subj]]
27 |   }
28 | 
29 |   tabs
30 | 
31 | }
32 | 
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/archive/summarize.rpa.R:
--------------------------------------------------------------------------------
 1 | #' Description: Probeset summarization with RPA
 2 | #' 
 3 | #' Arguments:
 4 | #'   @param taxonomy oligo - phylotype matching data.frame
 5 | #'   @param level taxonomic level for the summarization. 
 6 | #'   @param probedata preprocessed probes x samples data matrix in absolute domain
 7 | #'   @param verbose print intermediate messages
 8 | #'   @param probe.parameters Optional. If probe.parameters are given,
 9 | #'          the summarization is based on these and model parameters are not
10 | #' 	    estimated. A list. One element for each probeset with the following probe vectors: 
11 | #'	    affinities, variances
12 | #' Returns:
13 | #'   @return List with two elements: abundance.table (summarized data matrix in absolute scale) and probe.parameters (RPA probe level parameter estimates)
14 | #'
15 | #' @export
16 | #' @importFrom RPA d.update.fast 
17 | #' @importFrom RPA rpa.fit
18 | #'
19 | #' @references See citation("microbiome") 
20 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
21 | #' @keywords utilities
22 | summarize.rpa <- function (taxonomy, level, probedata, verbose = TRUE, probe.parameters = NULL) {
23 | 
24 |   # Convert to log10 domain	      
25 |   oligo.data <- log10(probedata) 
26 |   probeinfo <- list()
27 |  
28 |   probesets <- retrieve.probesets(taxonomy, level = level)
29 |   # probesets <- probesets[setdiff(names(probesets), rm.species)]
30 |   nPhylotypesPerOligo <- n.phylotypes.per.oligo(taxonomy, level) 
31 | 
32 |   # initialize
33 |   summarized.matrix <- matrix(NA, nrow = length(probesets), 
34 |   		       		  ncol = ncol(oligo.data))
35 |   rownames(summarized.matrix) <- names(probesets)
36 |   colnames(summarized.matrix) <- colnames(oligo.data)
37 | 
38 |   for (set in names(probesets)) {
39 | 
40 |     # Pick expression for particular probes
41 |     probes <- probesets[[set]]
42 | 
43 |     # Pick probe data for the probeset: probes x samples
44 |     # oligo.data assumed to be already in log10
45 |     dat <- as.matrix(oligo.data[probes,])
46 |     if (length(probes) == 1)  {
47 |       dat <- as.matrix(oligo.data[probes,], nrow = length(probes))
48 |     }
49 |     rownames(dat) <- probes
50 |     colnames(dat) <- colnames(oligo.data)
51 | 
52 |     if (length(probe.parameters) > 0) {
53 | 
54 |       # Summarize with pre-calculated variances
55 |       vec <- d.update.fast(dat, probe.parameters[[set]])
56 | 
57 |     } else {
58 | 
59 |       # RPA is calculated in log domain
60 |       # Downweigh non-specific probes with priors with 10% of virtual data and
61 |       # variances set according to number of matching probes
62 |       # This will provide slight emphasis to downweigh potentially
63 |       # cross-hybridizing probes
64 |       alpha <- 1 + 0.1*ncol(dat)/2
65 |       beta <- 1 + 0.1*ncol(dat)*nPhylotypesPerOligo[probes]^2
66 |       res <- rpa.fit(dat, alpha = alpha, beta = beta)
67 |       vec <- res$mu
68 |       probeinfo[[set]] <- res$tau2
69 | 
70 |     }
71 |       
72 |     summarized.matrix[set, ] <- vec 
73 | 
74 |   }
75 | 
76 |   if (!is.null(probe.parameters)) {
77 |     probeinfo <- probe.parameters
78 |   }
79 | 
80 |   # Return the data in absolute scale					
81 |   summarized.matrix <- 10^summarized.matrix
82 | 
83 |   list(abundance.table = summarized.matrix, probeinfo = probeinfo)
84 |   
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/archive/summarize.sum.R:
--------------------------------------------------------------------------------
 1 | #' Probeset summarization with SUM
 2 | #' 
 3 | #' Arguments:
 4 | #'   @param taxonomy oligo - phylotype matching data.frame
 5 | #'   @param level taxonomic level for the summarization. 
 6 | #'   @param probedata preprocessed probes x samples data matrix in absolute domain
 7 | #'   @param verbose print intermediate messages
 8 | #'   @param downweight.ambiguous.probes Downweight probes with multiple targets
 9 | #'
10 | #' Returns:
11 | #'   @return List with two elements: abundance.table (summarized data matrix in absolute scale) and probe.parameters used in the calculations
12 | #'
13 | #' @export
14 | #'
15 | #' @references See citation("microbiome") 
16 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
17 | #' @keywords utilities
18 | summarize.sum <- function (taxonomy, level, probedata, verbose = TRUE, downweight.ambiguous.probes = TRUE) {
19 | 
20 |   # Convert to log10 domain	      
21 |   oligo.data <- probedata
22 |   probe.parameters <- list()
23 |  
24 |   probesets <- retrieve.probesets(taxonomy, level = level)
25 | 
26 |   if (downweight.ambiguous.probes) {
27 |     nPhylotypesPerOligo <- n.phylotypes.per.oligo(taxonomy, level) 
28 |     probe.weights <- 1/nPhylotypesPerOligo
29 |   } else {
30 |     probe.weights <- rep(1, nrow(taxonomy))
31 |     names(probe.weights) <- rownames(taxonomy)
32 |   }
33 | 
34 |   # initialize
35 |   summarized.matrix <- matrix(NA, nrow = length(probesets), 
36 |   		       		  ncol = ncol(oligo.data))
37 |   rownames(summarized.matrix) <- names(probesets)
38 |   colnames(summarized.matrix) <- colnames(oligo.data)
39 | 
40 |   for (set in names(probesets)) {
41 | 
42 |     # print(set)
43 | 
44 |     # Pick expression for particular probes
45 |     probes <- probesets[[set]]
46 | 
47 |     # Pick probe data for the probeset: probes x samples
48 |     # oligo.data assumed to be already in log10
49 |     dat <- as.matrix(oligo.data[probes,])
50 |     if (length(probes) == 1)  {
51 |       dat <- as.matrix(oligo.data[probes,], nrow = length(probes))
52 |     }
53 |     rownames(dat) <- probes
54 |     colnames(dat) <- colnames(oligo.data)
55 | 
56 |     # Weight each probe by the inverse of the number of matching phylotypes
57 |     # Then calculate sum -> less specific probes are downweighted
58 |     # However, set the minimum signal to 0 in log10 scale (1 in original scale)!
59 |     if (nrow(dat) > 1) {
60 |       dat <- dat * probe.weights[rownames(dat)]
61 |       vec <- colSums(dat, na.rm = T)               
62 |     } else {
63 |       vec <- as.vector(unlist(dat))
64 |     }
65 | 
66 |     summarized.matrix[set, ] <- vec
67 | 
68 |   }
69 | 
70 |   list(abundance.table = summarized.matrix, probe.parameters = probe.weights)
71 |   
72 | }
73 | 
74 | 


--------------------------------------------------------------------------------
/archive/test.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | library(microbiome)
 4 | data.directory <- system.file("extdata", package = "microbiome")
 5 | 
 6 | method <- "sum"
 7 | 
 8 | so <- read.csv(paste("~/Rpackages/microbiome/microbiome/inst/extdata/species-", method, ".tab", sep = ""), sep = "\t", row.names = 1)
 9 | 
10 | pseq <- read_hitchip(data.directory, method = method)$pseq
11 | sn <- otu_table(pseq)@.Data
12 | 
13 | cro <- intersect(rownames(sn), rownames(so))
14 | cco <- intersect(colnames(sn), colnames(so))
15 | plot(log10(unlist(so[cro, cco])), log10(unlist(sn[cro, cco])))
16 | 
17 | cors <- c()
18 | for (i in 1:length(cro)) {
19 |   tax <- cro[[i]]
20 |   cors[[tax]] <- cor(log10(unlist(so[tax, ])), log10(unlist(sn[tax, ])))
21 | }
22 | hist(cors)
23 | 
24 | # --------------------------------
25 | 
26 | # The poorest correlate
27 | tax <- names(sort(cors))[[1]]
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/archive/top_abundance.R:
--------------------------------------------------------------------------------
 1 | #' @title Top Abundance Index
 2 | #' @description Calculates the community top_abundance index.
 3 | #' @param rank Optional. The rank of the dominant taxa to consider.
 4 | #' @param aggregate Aggregate (TRUE; default) the top members or not. If aggregate=TRUE, then the sum of relative abundances is returned. Otherwise the relative abundance is returned for the single taxa with the indicated rank. 
 5 | #' @inheritParams diversity
 6 | #' @return A vector of top_abundance indices
 7 | #' @export
 8 | #' @examples
 9 | #'   data(dietswap)
10 | #'   d <- top_abundance(dietswap)
11 | #' @details The top_abundance index gives the abundance of the most abundant species in [0,1]. This simple diversity index is occasionally used in ecological literature, and sometimes also called dominance. However, note that the microbiome::dominance function uses a different definition. With rank = 2, the sum of abundances for the two most abundant taxa are returned etc. However, if aggregate=FALSE, the abundance for the single n'th most dominant taxa (n = rank) is returnde instead the sum of abundances up to that rank.
12 | #' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
13 | #' @seealso dominance, diversity
14 | #' @keywords utilities
15 | top_abundance <- function(x, rank = 1, aggregate = TRUE, split = TRUE) {
16 | 
17 |   # Pick the OTU data
18 |   otu <- abundances(x)
19 | 
20 |   if (!split) {
21 |     otu <- as.matrix(rowSums(otu), nrow = nrow(otu))
22 |   }
23 | 
24 |   if (!aggregate) {
25 |     do <- apply(otu, 2, function (x) {rev(sort(x/sum(x, na.rm = TRUE)))[[rank]]})
26 |   } else {
27 |     do <- apply(otu, 2, function (x) {sum(rev(sort(x/sum(x, na.rm = TRUE)))[1:rank])})
28 |   }
29 |   names(do) <- sample_names(x)
30 |   
31 |   do
32 | 
33 | }
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/archive/validate.R:
--------------------------------------------------------------------------------
 1 | #' @title Validate Phyloseq
 2 | #' @description Validate phyloseq object.
 3 | #' @param x phyloseq object
 4 | #' @details Checks that the abundances and sample_data have exactly same samples.
 5 | #' @return A validated and polished phyloseq object
 6 | #' @export
 7 | #' @examples
 8 | #'   data(dietswap)
 9 | #'   validate(dietswap)
10 | validate <- function (x) {
11 | 
12 |   validated <- TRUE		  
13 | 
14 |   dat <- t(abundances(x))
15 |   meta <- sample_data(x)		         
16 |   coms <- intersect(rownames(dat), rownames(meta))
17 | 
18 |   if (length(coms) < 2) {
19 |     validated <- FALSE
20 |     warning("Check that the abundances and sample_data have more than 1 samples 
21 |              in common")
22 |     return(validated)
23 |   } else {
24 |     # Include only the common samples    
25 |     x@sam_data <- meta[coms,]
26 |     otu_table(x) <- otu_table(t(dat[coms,]), taxa_are_rows = TRUE)
27 |   }
28 | 
29 |   # Return validated and polished phyloseq object
30 |   if (validated) {
31 |     x
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/archive/vm.R:
--------------------------------------------------------------------------------
 1 | #fs <- list.files("~/HITChipDB/R/", full.names = TRUE)
 2 | #for (f in fs)  { source(f) }
 3 | #fs <- list.files("~/microbiome/R/", full.names = TRUE)
 4 | #for (f in fs)  { source(f) }
 5 | 
 6 | # -------------------------------------------
 7 | library(devtools)
 8 | install_github("microbiome/microbiome")
 9 | install_github("microbiome/HITChipDB")
10 | 
11 | res <- run.profiling.script(dbuser, dbpwd, dbname, verbose = FALSE, host = host, port = port, summarization.methods = summarization.methods, which.projects = which.projects)
12 | 
13 | projs <- list.mysql.projects(dbuser, dbpwd, dbname, host = host, port = port)
14 | 
15 | res <- run.profiling.script(dbuser, dbpwd, dbname, verbose = FALSE, host = host, port = port, summarization.methods = summarization.methods, which.projects = "TURKU PET STUDY")
16 | 
17 | # -------------------------------
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/crash-handler-permission:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/crash-handler-permission


--------------------------------------------------------------------------------
/crash-handler.conf:
--------------------------------------------------------------------------------
1 | crash-handling-enabled="1"
2 | 


--------------------------------------------------------------------------------
/debug.log:
--------------------------------------------------------------------------------
1 | [1019/142843.961:ERROR:directory_reader_win.cc(43)] FindFirstFile: The system cannot find the path specified. (0x3)
2 | 


--------------------------------------------------------------------------------
/deseq2.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "DESeq2"
 3 | author: "Leo Lahti, Sudarshan Shetty et al."
 4 | bibliography: 
 5 | - bibliography.bib
 6 | output:
 7 |   BiocStyle::html_document:
 8 |     number_sections: no
 9 |     toc: yes
10 |     toc_depth: 4
11 |     toc_float: true
12 |     self_contained: true
13 |     thumbnails: true
14 |     lightbox: true
15 |     gallery: true
16 |     use_bookdown: false
17 |     highlight: haddock
18 | ---
19 | <!--
20 |   %\VignetteEngine{knitr::rmarkdown}
21 |   %\VignetteIndexEntry{microbiome tutorial - comparisons}
22 |   %\usepackage[utf8]{inputenc}
23 |   %\VignetteEncoding{UTF-8}  
24 | -->
25 | 
26 | 
27 | ## Normalization and group-wise comparisons with DESeq2
28 | 
29 | Examples adapted from [Callahan et al. F1000 (2017)](https://f1000research.com/articles/5-1492/v2).
30 | 
31 | Load example data:
32 | 
33 | ```{r deseq2-example, warning=FALSE, message=FALSE, fig.width=8, fig.height=5}
34 | # Load libraries
35 | library(microbiome)
36 | library(ggplot2)
37 | library(magrittr)
38 | library(dplyr)
39 | # Probiotics intervention example data 
40 | data(dietswap) 
41 | 
42 | # Only check the core taxa to speed up examples
43 | pseq <- core(dietswap, detection = 50, prevalence = 80/100)
44 | ```
45 | 
46 | 
47 | Toy example, to be polished:
48 | 
49 | ```{r deseq2-example2, warning=FALSE, message=FALSE, fig.width=8, fig.height=5}
50 | library(phyloseq)
51 | library(reshape2)
52 | library(DESeq2)
53 | library(knitr)
54 | library(magrittr)
55 | # Running the DESeq2 analysis
56 | ds2 <- phyloseq_to_deseq2(pseq, ~ nationality)
57 | dds <- DESeq(ds2)
58 | res <- results(dds)
59 | df <- as.data.frame(res)
60 | df$taxon <- rownames(df)
61 | df <- df %>% arrange(log2FoldChange, padj)
62 | 
63 | library(knitr)
64 | print(head(kable((df))))
65 | ```
66 | 
67 | 
68 | ## Validating DESeq2 results
69 | 
70 | ```{r deseq2-validate, warning=FALSE, message=FALSE, fig.width=8, fig.height=5, eval=TRUE}
71 | # Identify top taxa based on standard ANOVA
72 | source(system.file("extdata/check_anova.R", package = "microbiome"))
73 | ano <- check_anova(pseq, "nationality");
74 | ano$log2FC <- log2(ano$ave.AFR) - log2(ano$ave.AAM)
75 | taxa.anova <- as.character(subset(ano, padj < 0.01 & abs(log2FC) > log2(2))$taxa)
76 | 
77 | # lowPick the top taxa based on DESEq2
78 | taxa.deseq <- subset(res, padj < 0.01 & abs(log2FoldChange) > log2(2))$taxon
79 | 
80 | # Check overlap
81 | # Most DESEq2 taxa are confirmed with ANOVA
82 | library(gplots)
83 | 
84 | # Also the est p-values are well correlated (higher not so)
85 | mf <- data.frame(df$padj, ano$padj)
86 | p <- ggplot(mf, aes(x = log10(df$padj), y = log10(ano$padj))) +
87 |        labs(x = 'DESeq2 adjusted p-value', y = 'ANOVA adjusted p-value') +
88 |        geom_point()
89 | print(p)
90 | ```
91 | 
92 | 
93 | ```{r plot_for_ven, echo=TRUE, message=F, error=F, eval=TRUE}
94 | library(venn) # Check UpSet plot instead
95 | venn( list(ANOVA = taxa.anova,DESeq2 = taxa.deseq))
96 | ```
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/figure/pooled3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/figure/pooled3-1.png


--------------------------------------------------------------------------------
/figure/pooled_overdispersion-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/figure/pooled_overdispersion-1.png


--------------------------------------------------------------------------------
/figure/pooled_pcomp-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/figure/pooled_pcomp-1.png


--------------------------------------------------------------------------------
/figure/tail-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/figure/tail-1.png


--------------------------------------------------------------------------------
/figure/univariate5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/figure/univariate5-1.png


--------------------------------------------------------------------------------
/figure/univariate6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/figure/univariate6-1.png


--------------------------------------------------------------------------------
/figure/univariate_boxplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/figure/univariate_boxplot-1.png


--------------------------------------------------------------------------------
/figure/univariate_densityplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microbiome/tutorials/4c9c5eb321358c97369b1d2325ff81906a14ac0b/figure/univariate_densityplot-1.png


--------------------------------------------------------------------------------
/init.R:
--------------------------------------------------------------------------------
 1 | if (!requireNamespace("BiocManager", quietly = TRUE))
 2 |   install.packages("BiocManager")
 3 | library(BiocManager)
 4 | 
 5 | 
 6 | .cran_packages <- c("ggplot2", "gridExtra",'ape', 'dplyr','ggpubr', 'knitr','Cairo','devtools','data.table', 
 7 |                     'splitstackshape', 'rmarkdown', 'tidyverse', 'readxl', 'vegan','knitcitations', 'reshape', 'reshape2', 
 8 |                     'magrittr', 'vegan', 'glue','stringr','devtools','captioner', 'rstan','rstanarm',
 9 |                     'hrbrthemes', 'gcookbook','GGally', 'rvg','ggiraph','network', 'gplots',
10 |                     'intergraph', 'rmdformats', 'FD', 'hrbrthemes', 'GGally', 'lattice', 'venn', 'PMCMRplus')
11 | 
12 | .bioc_packages <- c("dada2", "phyloseq",'SummarizedExperiment','Biobase', 
13 |                    "DECIPHER",'IRanges','BiocGenerics', "phangorn",
14 |                     'BiocStyle', "microbiome", "DESeq2", 'DirichletMultinomial')
15 | 
16 | # package.version("microbiome")
17 | #[1] "1.5.31"
18 | .inst <- .cran_packages %in% installed.packages()
19 | if(any(!.inst)) {
20 |   install.packages(.cran_packages[!.inst])
21 | }
22 | sapply(.cran_packages, require, character.only = TRUE)
23 | 
24 | 
25 | library('devtools')
26 | install_github('zdk123/SpiecEasi')
27 | install_github('briatte/ggnet')
28 | 
29 | .inst <- .bioc_packages %in% installed.packages()
30 | if(any(!.inst)) {
31 |   #source("http://bioconductor.org/biocLite.R")
32 |   
33 |   BiocManager::install(.bioc_packages[!.inst], ask = F)
34 | }
35 | install_github('antagomir/netresponse')
36 | install_github('microsud/microbiomeutilities')
37 | ###
38 | if (!requireNamespace("BiocManager", quietly = TRUE))
39 |   install.packages("BiocManager")
40 | 
41 | BiocManager::install(c(.bioc_packages,"SummarizedExperiment", 'DelayedArray'))
42 | # Load packages into session, and print package version
43 | sapply(c(.cran_packages, .bioc_packages), require, character.only = TRUE)
44 | 
45 | 


--------------------------------------------------------------------------------
/limma.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Limma - linear models for microarrays"
  3 | author: "Leo Lahti, Sudarshan Shetty et al."
  4 | bibliography: 
  5 | - bibliography.bib
  6 | output:
  7 |   BiocStyle::html_document:
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 4
 11 |     toc_float: true
 12 |     self_contained: true
 13 |     thumbnails: true
 14 |     lightbox: true
 15 |     gallery: true
 16 |     use_bookdown: false
 17 |     highlight: haddock
 18 | ---
 19 | <!--
 20 |   %\VignetteEngine{knitr::rmarkdown}
 21 |   %\VignetteIndexEntry{microbiome tutorial - limma}
 22 |   %\usepackage[utf8]{inputenc}
 23 |   %\VignetteEncoding{UTF-8}  
 24 | -->
 25 | 
 26 | 
 27 | ## Two-group comparison at community level with limma
 28 | 
 29 | Load example data:
 30 | 
 31 | ```{r boxplot-example, warning=FALSE, message=FALSE, fig.width=8, fig.height=5}
 32 | # Load libraries
 33 | library(microbiome)
 34 | library(ggplot2)
 35 | library(dplyr)
 36 | 
 37 | # Probiotics intervention example data 
 38 | data(peerj32) # Source: https://peerj.com/articles/32/
 39 | pseq <- peerj32$phyloseq # Rename the example data
 40 | 
 41 | # Get OTU abundances and sample metadata
 42 | otu <- abundances(microbiome::transform(pseq, "log10"))
 43 | meta <- meta(pseq)
 44 | ```
 45 | 
 46 | 
 47 | ## Linear models with limma
 48 | 
 49 | Identify most significantly different taxa between males and females using the limma method. See [limma homepage](http://bioinf.wehi.edu.au/limma/) and [limma User's guide](https://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf) for details. For discussion on why limma is preferred over t-test, see [this
 50 | article](http://www.plosone.org/article/info:doi/10.1371/journal.pone.0012336).
 51 | 
 52 | ```{r limma-example, warning=FALSE}
 53 | # Compare the two groups with limma
 54 | library(limma)
 55 | 
 56 | # Prepare the design matrix which states the groups for each sample
 57 | # in the otu
 58 | design <- cbind(intercept = 1, Grp2vs1 = meta[["sex"]])
 59 | rownames(design) <- rownames(meta)
 60 | design <- design[colnames(otu), ]
 61 | 
 62 | # NOTE: results and p-values are given for all groupings in the design matrix
 63 | # Now focus on the second grouping ie. pairwise comparison
 64 | coef.index <- 2
 65 |      
 66 | # Fit the limma model
 67 | fit <- lmFit(otu, design)
 68 | fit <- eBayes(fit)
 69 | 
 70 | # Limma P-values
 71 | pvalues.limma = fit$p.value[, 2]
 72 | 
 73 | # Limma effect sizes
 74 | efs.limma <-  fit$coefficients[, "Grp2vs1"]
 75 | 
 76 | # Summarise
 77 | library(knitr)
 78 | kable(topTable(fit, coef = coef.index, p.value=0.1), digits = 2)
 79 | ```
 80 | 
 81 | Quantile-Quantile plot and volcano plot for limma
 82 | 
 83 | ```{r limma-qq, warning=FALSE}
 84 | # QQ
 85 | qqt(fit$t[, coef.index], df = fit$df.residual + fit$df.prior); abline(0,1)
 86 | 
 87 | # Volcano
 88 | volcanoplot(fit, coef = coef.index, highlight = coef.index)
 89 | ```
 90 | 
 91 | 
 92 | ## Comparison between limma and t-test
 93 | 
 94 | Order the taxa with t-test for comparison and validation purposes. The
 95 | differences are small in this simulated example, but can be
 96 | considerable in real
 97 | data. For discussion on why limma is preferred over t-test, see [this
 98 | article](http://www.plosone.org/article/info:doi/10.1371/journal.pone.0012336).
 99 | 
100 | 
101 | ```{r limma-compairson, warning=FALSE}
102 | # Compare the two groups with t-test
103 | library(dplyr)
104 | pvalues.ttest <- c()
105 | male.samples <- dplyr::filter(meta, sex == "male")$sample
106 | female.samples <- dplyr::filter(meta, sex == "female")$sample
107 | for (tax in rownames(otu)) {
108 |   pvalues.ttest[[tax]] <- t.test(otu[tax, male.samples], otu[tax, female.samples])$p.value
109 | }
110 | # Multiple testing correction
111 | pvalues.ttest <- p.adjust(pvalues.ttest, method = "fdr")
112 | 
113 | # Compare p-values between limma and t-test
114 | taxa <- rownames(otu)
115 | plot(pvalues.ttest[taxa], pvalues.limma[taxa])
116 | abline(0,1,lty = 2)
117 | ```
118 | 
119 | 
120 | ## Continuous variables
121 | 
122 | Rapid quantification of continuous associations can be done with the
123 | lm_phyloseq wrapper function.
124 | 
125 | This uses the limma model to generate a table of P-values and effect
126 | sizes. Note that no confounding variables taken into account in this
127 | wrapper. See the [limma homepage](http://bioinf.wehi.edu.au/limma/)
128 | for more detailed analyses.
129 | 
130 | ```{r limma-lm-phyloseq2, warning=FALSE, eval=FALSE}
131 | data(atlas1006)
132 | source(system.file("extdata/lm_phyloseq.R", package = "microbiome"))
133 | tab <- lm_phyloseq(atlas1006, "age")
134 | kable(head(tab), digits = 3)
135 | ```
136 | 
137 | 
138 | 
139 | 


--------------------------------------------------------------------------------
/main.R:
--------------------------------------------------------------------------------
 1 | library(rmarkdown)
 2 | fs <- list.files(pattern = ".Rmd$")
 3 | fs <- sample(fs)
 4 | fs <- setdiff(fs, c("info.Rmd", "misc.Rmd", "all.Rmd"))
 5 | #fs <- setdiff(fs, c("Betadiversity.Rmd"))
 6 | #fs <- setdiff(fs, c("CompositionAmplicondata.Rmd"))
 7 | #fs <- setdiff(fs, c("Composition.Rmd"))
 8 | #fs <- setdiff(fs, c("Diversity.Rmd"))
 9 | fs0 <- fs
10 | rem <- fs
11 | for (myrmdfile in rem) {
12 |   print(myrmdfile)
13 |   render(myrmdfile, output_format = "html_document")
14 |   print(paste(myrmdfile, "has been rendered"))
15 |   rem <- setdiff(rem, myrmdfile) # Remaining files to handle
16 | }
17 | 


--------------------------------------------------------------------------------
/misc.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | 
 5 | As it can be seen, we see only OTu IDs and this may not be useful to interpret the data. We need to reprccess this figure to include taxonomic information. We can do this as follows:  
 6 | 
 7 | ```{r,core-example4, fig.width=14, fig.heigth=18, fig.show='hold', out.width = '200px', warning=FALSE, eval=FALSE}
 8 | library(RColorBrewer)
 9 | library(knitr)
10 | # Core with absolute counts and vertical view:
11 | # and minimum population prevalence (given as percentage)
12 | detections <- 10^seq(log10(1), log10(max(abundances(pseq.2))/10), length = 10)
13 | 
14 | healthycore <- plot_core(pseq.2, plot.type = "heatmap", 
15 |        		 prevalences = prevalences,
16 |        		 detections = detections,
17 | 		 colours = rev(brewer.pal(5, "Spectral")),
18 | 		 min.prevalence = .9)
19 | # get the data used for plotting 
20 | df <- healthycore$data 
21 | 
22 | # get the list of OTUs
23 | list <- df$Taxa 
24 | 
25 | # check the OTU ids
26 | # print(list) 
27 | 
28 | # get the taxonomy data
29 | tax <- tax_table(pseq.2)
30 | tax <- as.data.frame(tax)
31 | 
32 | # add the OTus to last column
33 | tax$OTU <- rownames(tax)
34 | 
35 | # select taxonomy of only 
36 | # those OTUs that are used in the plot
37 | tax2 <- dplyr::filter(tax, rownames(tax) %in% list) 
38 | 
39 | # head(tax2)
40 | 
41 | # We will merege all the column into one except the Doamin as all is bacteria in this case
42 | tax.unit <- tidyr::unite(tax2, Taxa_level,c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species", "OTU"), sep = "_;", remove = TRUE)
43 | 
44 | tax.unit$Taxa_level <- gsub(pattern="[a-z]__",replacement="", tax.unit$Taxa_level)
45 | 
46 | # add this new information into the plot data df
47 | 
48 | df$Taxa <- tax.unit$Taxa_level
49 | 
50 | # you can see now we have the taxonomic information
51 | knitr::kable(head(df))
52 | 
53 | # replace the data in the plot object
54 | healthycore$data <- df
55 | 
56 | plot(healthycore + theme(axis.text.y = element_text(face="italic")))
57 | ```
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/plot_sample_trajectory.R:
--------------------------------------------------------------------------------
  1 | # In this we demonstrate how to plot trajactory of a community over time
  2 | # We use the human microbiome time series data from Caporaso et al., 2011 Genome Biol.
  3 | 
  4 | library(microbiome)
  5 | library(jeevanuDB) # external database pkg for microbiome pkg with test data
  6 | library(dplyr)
  7 | library(ggplot2)
  8 | library(viridis)
  9 | # Example data
 10 | data("moving_pictures")
 11 | # Rename
 12 | ps <- moving_pictures
 13 | 
 14 | ps.gut <- subset_samples(ps, sample_type == "stool")
 15 | taxa_names(ps.gut) <- paste0("ASV-", seq(ntaxa(ps.gut)))
 16 | # remove asvs which are zero in all of these samples
 17 | ps.gut <- prune_taxa(taxa_sums(ps.gut) > 0, ps.gut)
 18 | 
 19 | # remove samples with less than 500 reads
 20 | ps.gut <- prune_samples(sample_sums(ps.gut) > 500, ps.gut)
 21 | 
 22 | # Covnert to relative abundances
 23 | ps.gut.rel <- microbiome::transform(ps.gut, "compositional")
 24 | 
 25 | ps.ord <- ordinate(ps.gut.rel, "PCoA")
 26 | 
 27 | ordip <- plot_ordination(ps.gut.rel, ps.ord, justDF = T)
 28 | 
 29 | # Get axis 1 and 2 variation
 30 | evals1 <- round(ps.ord$values$Eigenvalues[1] / sum(ps.ord$values$Eigenvalues) * 100, 2)
 31 | evals2 <- round(ps.ord$values$Eigenvalues[2] / sum(ps.ord$values$Eigenvalues) * 100, 2)
 32 | 
 33 | # Visualize
 34 | # theme_set(theme_bw(14))
 35 | # set colors
 36 | subject_cols <- c(F4 = "#457b9d", M3 = "#e63946")
 37 | 
 38 | # Add trajectory for given subject
 39 | dfs <- subset(ordip, host_subject_id == "F4")
 40 | # arrange according to sampling time
 41 | dfs <- dfs %>%
 42 |   arrange(days_since_experiment_start)
 43 | 
 44 | p <- ggplot(ordip, aes(x = Axis.1, y = Axis.2))
 45 | p2 <- p +
 46 |   geom_path(
 47 |     data = dfs, alpha = 0.5,
 48 |     arrow = arrow(
 49 |       angle = 15, length = unit(0.1, "inches"),
 50 |       ends = "last", type = "closed"
 51 |     )
 52 |   ) +
 53 |   geom_point(aes(color = host_subject_id), alpha = 0.6, size = 3) +
 54 |   scale_color_manual("Subject", values = subject_cols) +
 55 |   xlab(paste("PCoA 1 (", evals1, "%)", sep = "")) +
 56 |   ylab(paste("PCoA 2 (", evals2, "%)", sep = "")) +
 57 |   theme(
 58 |     panel.grid.major = element_blank(),
 59 |     panel.grid.minor = element_blank()
 60 |   )
 61 | # coord_fixed(sqrt(evals[2] / evals[1]))
 62 | 
 63 | # Print figure
 64 | print(p2)
 65 | 
 66 | # Alternatively we can just focus on one subject
 67 | 
 68 | ps.gut.rel.m3 <- subset_samples(ps.gut.rel, host_subject_id == "M3")
 69 | ps.ord.m3 <- ordinate(ps.gut.rel.m3, "PCoA")
 70 | 
 71 | ordip.m3 <- plot_ordination(ps.gut.rel.m3, ps.ord.m3, justDF = T)
 72 | 
 73 | # Get axis 1 and 2 variation
 74 | evals1 <- round(ordip.m3$values$Eigenvalues[1] / sum(ordip.m3$values$Eigenvalues) * 100, 2)
 75 | evals2 <- round(ordip.m3$values$Eigenvalues[2] / sum(ordip.m3$values$Eigenvalues) * 100, 2)
 76 | 
 77 | 
 78 | # arrange according to sampling time
 79 | ordip.m3 <- ordip.m3 %>%
 80 |   arrange(days_since_experiment_start) # important to arrange the time
 81 | 
 82 | # Visualize
 83 | # blank plot initiate
 84 | p1 <- ggplot(ordip.m3, aes(x = Axis.1, y = Axis.2))
 85 | # add layers
 86 | p3 <- p1 +
 87 |   # add arrows with geom_path
 88 |   geom_path(alpha = 0.5, arrow = arrow(
 89 |     angle = 30, length = unit(0.1, "inches"),
 90 |     ends = "last", type = "closed"
 91 |   )) +
 92 |   # add points
 93 |   geom_point(aes(color = days_since_experiment_start), size = 3) +
 94 |   # add gradient colors
 95 |   scale_color_viridis("Days from first sampling") +
 96 |   # add x and y labels
 97 |   xlab(paste("PCoA 1 (", evals1, "%)", sep = "")) +
 98 |   ylab(paste("PCoA 2 (", evals2, "%)", sep = "")) +
 99 |   # remove grids in the plot
100 |   theme(
101 |     panel.grid.major = element_blank(),
102 |     panel.grid.minor = element_blank()
103 |   )
104 | 
105 | print(p3)
106 | 


--------------------------------------------------------------------------------
/plot_time_trajectory.R:
--------------------------------------------------------------------------------
 1 | library(microbiome)
 2 | 
 3 | # Example data
 4 | data(dietswap)
 5 | # Rename
 6 | pseq <- dietswap
 7 | 
 8 | # CLR transformation to remove compositionality biases
 9 | # Add small constant on the data to avoid problems from zero counts
10 | pseq <- transform(pseq, "clr")
11 | 
12 | # Samples x taxa abundance matrix
13 | otu <- t(abundances(pseq))
14 | 
15 | # PCA
16 | proj <- princomp(otu)$scores[, 1:2]
17 | 
18 | # Pick the metadata
19 | df <- meta(pseq)
20 | 
21 | # Add projection
22 | df <- cbind(df, proj[rownames(df),])
23 | 
24 | # Visualize
25 | library(ggplot2)
26 | theme_set(theme_bw(20))
27 | p <- ggplot(df, aes(x = Comp.1, y = Comp.2)) + geom_point(aes(color = group))
28 | 
29 | # Add trajectory for given subject
30 | dfs <- subset(df, subject == "zaq")
31 | p2 <- p + geom_path(data = dfs)
32 | 
33 | # Print figure
34 | print(p2)
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------