├── LICENSE ├── docs ├── tutorial_cache │ └── html │ │ ├── __packages │ │ ├── pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.rdb │ │ ├── plot_plot_24f20174733d96dba5249a7d35c2c80c.rdb │ │ ├── pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.rdx │ │ ├── pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.RData │ │ ├── plot_plot_24f20174733d96dba5249a7d35c2c80c.rdx │ │ └── plot_plot_24f20174733d96dba5249a7d35c2c80c.RData ├── tutorial_files │ └── figure-html │ │ ├── ma_plot-1.png │ │ ├── pca_plot-1.png │ │ ├── plot_plot-1.png │ │ ├── unnamed-chunk-4-1.png │ │ ├── unnamed-chunk-5-1.png │ │ ├── unnamed-chunk-7-1.png │ │ ├── unnamed-chunk-8-1.png │ │ ├── unnamed-chunk-8-2.png │ │ ├── unnamed-chunk-8-3.png │ │ ├── unnamed-chunk-8-4.png │ │ ├── unnamed-chunk-9-1.png │ │ ├── unnamed-chunk-9-2.png │ │ ├── unnamed-chunk-9-3.png │ │ ├── unnamed-chunk-9-4.png │ │ └── coverage_threshold-1.png ├── atacr_which.html ├── differential_windows.html └── tutorial.Rmd ├── tests ├── testthat.R └── testthat │ ├── a1_smallSorted.bam │ ├── a2_smallSorted.bam │ ├── b1_smallSorted.bam │ ├── b2_smallSorted.bam │ ├── a1_smallSorted.bam.bai │ ├── a2_smallSorted.bam.bai │ ├── b1_smallSorted.bam.bai │ ├── b2_smallSorted.bam.bai │ ├── bait_genes.gff │ ├── sample_treatment_bam_mappings_for_test.csv │ ├── helper-functions.R │ ├── test_methods.R │ ├── test_differentials.R │ ├── test_normalisation.R │ ├── control_windows.txt │ ├── test_atacr.R │ └── test_loading.R ├── data ├── sim_counts.rda ├── small_counts.rda └── athal_wt_counts.rda ├── .gitignore ├── README-unnamed-chunk-2-1.png ├── .Rbuildignore ├── inst └── extdata │ ├── ATAC102 │ ├── alignedSorted.bam │ └── alignedSorted.bam.bai │ ├── ATAC103 │ ├── alignedSorted.bam │ └── alignedSorted.bam.bai │ ├── ATAC202 │ ├── alignedSorted.bam │ └── alignedSorted.bam.bai │ ├── ATAC203 │ ├── alignedSorted.bam │ └── alignedSorted.bam.bai │ ├── ATAC302 │ ├── alignedSorted.bam │ └── alignedSorted.bam.bai │ ├── ATAC303 │ ├── alignedSorted.bam │ └── alignedSorted.bam.bai │ └── tutorial_mappings.csv ├── man ├── simulate_counts.Rd ├── qqarb.Rd ├── treatments.Rd ├── make_tutorial_data.Rd ├── sample_pca_plot.Rd ├── print.atacr.Rd ├── gof.Rd ├── plot.atacr.Rd ├── ma_data.Rd ├── summary.atacr.Rd ├── as.data.frame.atacr.Rd ├── library_size_scaling_factors.Rd ├── library_size_normalisation_internal.Rd ├── make_csaw_params.Rd ├── get_t.Rd ├── get_bait_regions_from_gff.Rd ├── make_corrplot.Rd ├── assay_matrix_to_df.Rd ├── read_experiment_info.Rd ├── target_count_summary.Rd ├── Est.Depth.Rd ├── coverage_count_summary.Rd ├── target_count_coverage.Rd ├── as.matrix.atacr.Rd ├── median_virtual_experiment.Rd ├── as.DGEList.Rd ├── get_bait_regions_from_text.Rd ├── plot_counts.Rd ├── chromosome_coverage.Rd ├── small_counts.Rd ├── make_scanBamParam.Rd ├── text_to_gff.Rd ├── bootstrap_t.Rd ├── plot_GoF.Rd ├── sample_kmeans_cluster.Rd ├── get_expected_values.Rd ├── select_data.Rd ├── get_GoF_factors.Rd ├── estimate_GoFs.Rd ├── make_params.Rd ├── sample_correlation_plot.Rd ├── calc_quantiles.Rd ├── control_window_normalise_internal.Rd ├── view_gene.Rd ├── control_window_scaling_factors.Rd ├── observed_expected_bins.Rd ├── plot_count_by_chromosome.Rd ├── coverage_summary.Rd ├── ma_plot.Rd ├── estimate_bayes_factor.Rd ├── estimate_bayes_factor_multiclass.Rd ├── estimate_fdr.Rd ├── scale_factor_normalise.Rd ├── count_windows_under_threshold.Rd ├── edgeR_exact.Rd ├── make_UpSetR.Rd ├── estimate_fdr_multiclass.Rd ├── find_controls_by_GoF.Rd ├── edgeR_multiclass.Rd ├── windows_below_coverage_threshold_plot.Rd ├── extract_features_from_gff.Rd ├── load_atac.Rd ├── athal_wt_counts.Rd ├── load_rnaseq.Rd ├── normalise_by_window_width.Rd ├── library_size_normalisation.Rd ├── control_window_normalise.Rd ├── sim_counts.Rd └── make_counts.Rd ├── .travis.yml ├── atacr.Rproj ├── DESCRIPTION ├── NAMESPACE ├── vignettes ├── atacr_which.Rmd ├── summaries.Rmd ├── atacr.Rmd ├── normalisations.Rmd ├── differential_windows.Rmd └── loading.Rmd ├── README.Rmd ├── R ├── sims.R ├── methods.R ├── atacr.R ├── normalisation.R ├── loading.R └── differentials.R └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2017 2 | COPYRIGHT HOLDER: Dan MacLean 3 | -------------------------------------------------------------------------------- /docs/tutorial_cache/html/__packages: -------------------------------------------------------------------------------- 1 | base 2 | atacr 3 | bindrcpp 4 | -------------------------------------------------------------------------------- /docs/tutorial_cache/html/pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/tutorial_cache/html/plot_plot_24f20174733d96dba5249a7d35c2c80c.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(atacr) 3 | 4 | test_check("atacr") 5 | -------------------------------------------------------------------------------- /data/sim_counts.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/data/sim_counts.rda -------------------------------------------------------------------------------- /data/small_counts.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/data/small_counts.rda -------------------------------------------------------------------------------- /data/athal_wt_counts.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/data/athal_wt_counts.rda -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | inst/doc 6 | atacr.Rproj 7 | *.DS_Store 8 | -------------------------------------------------------------------------------- /README-unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/README-unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /tests/testthat/a1_smallSorted.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/a1_smallSorted.bam -------------------------------------------------------------------------------- /tests/testthat/a2_smallSorted.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/a2_smallSorted.bam -------------------------------------------------------------------------------- /tests/testthat/b1_smallSorted.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/b1_smallSorted.bam -------------------------------------------------------------------------------- /tests/testthat/b2_smallSorted.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/b2_smallSorted.bam -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^\.Rproj\.user$ 2 | ^\.travis\.yml$ 3 | ^.*\.Rproj$ 4 | ^README\.Rmd$ 5 | ^README-.*\.png$ 6 | TODO 7 | docs/ 8 | -------------------------------------------------------------------------------- /tests/testthat/a1_smallSorted.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/a1_smallSorted.bam.bai -------------------------------------------------------------------------------- /tests/testthat/a2_smallSorted.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/a2_smallSorted.bam.bai -------------------------------------------------------------------------------- /tests/testthat/b1_smallSorted.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/b1_smallSorted.bam.bai -------------------------------------------------------------------------------- /tests/testthat/b2_smallSorted.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/b2_smallSorted.bam.bai -------------------------------------------------------------------------------- /inst/extdata/ATAC102/alignedSorted.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC102/alignedSorted.bam -------------------------------------------------------------------------------- /inst/extdata/ATAC103/alignedSorted.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC103/alignedSorted.bam -------------------------------------------------------------------------------- /inst/extdata/ATAC202/alignedSorted.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC202/alignedSorted.bam -------------------------------------------------------------------------------- /inst/extdata/ATAC203/alignedSorted.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC203/alignedSorted.bam -------------------------------------------------------------------------------- /inst/extdata/ATAC302/alignedSorted.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC302/alignedSorted.bam -------------------------------------------------------------------------------- /inst/extdata/ATAC303/alignedSorted.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC303/alignedSorted.bam -------------------------------------------------------------------------------- /tests/testthat/bait_genes.gff: -------------------------------------------------------------------------------- 1 | Chr1 manual gene 246000 246200 . + . ID=FakeGeneA 2 | Chr1 manual gene 246700 247000 . + . ID=FakeGeneB -------------------------------------------------------------------------------- /inst/extdata/ATAC102/alignedSorted.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC102/alignedSorted.bam.bai -------------------------------------------------------------------------------- /inst/extdata/ATAC103/alignedSorted.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC103/alignedSorted.bam.bai -------------------------------------------------------------------------------- /inst/extdata/ATAC202/alignedSorted.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC202/alignedSorted.bam.bai -------------------------------------------------------------------------------- /inst/extdata/ATAC203/alignedSorted.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC203/alignedSorted.bam.bai -------------------------------------------------------------------------------- /inst/extdata/ATAC302/alignedSorted.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC302/alignedSorted.bam.bai -------------------------------------------------------------------------------- /inst/extdata/ATAC303/alignedSorted.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC303/alignedSorted.bam.bai -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/ma_plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/ma_plot-1.png -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/pca_plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/pca_plot-1.png -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/plot_plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/plot_plot-1.png -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/unnamed-chunk-8-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-8-2.png -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/unnamed-chunk-8-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-8-3.png -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/unnamed-chunk-8-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-8-4.png -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/unnamed-chunk-9-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-9-2.png -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/unnamed-chunk-9-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-9-3.png -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/unnamed-chunk-9-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-9-4.png -------------------------------------------------------------------------------- /docs/tutorial_files/figure-html/coverage_threshold-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/coverage_threshold-1.png -------------------------------------------------------------------------------- /docs/tutorial_cache/html/pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_cache/html/pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.rdx -------------------------------------------------------------------------------- /docs/tutorial_cache/html/pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_cache/html/pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.RData -------------------------------------------------------------------------------- /docs/tutorial_cache/html/plot_plot_24f20174733d96dba5249a7d35c2c80c.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_cache/html/plot_plot_24f20174733d96dba5249a7d35c2c80c.rdx -------------------------------------------------------------------------------- /docs/tutorial_cache/html/plot_plot_24f20174733d96dba5249a7d35c2c80c.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_cache/html/plot_plot_24f20174733d96dba5249a7d35c2c80c.RData -------------------------------------------------------------------------------- /tests/testthat/sample_treatment_bam_mappings_for_test.csv: -------------------------------------------------------------------------------- 1 | treatment,sample_name,bam_file_path 2 | test,test_1,a1_smallSorted.bam 3 | test,test_2,a2_smallSorted.bam 4 | control,control_1,b1_smallSorted.bam 5 | control,control_2,b2_smallSorted.bam 6 | -------------------------------------------------------------------------------- /man/simulate_counts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sims.R 3 | \name{simulate_counts} 4 | \alias{simulate_counts} 5 | \title{simulate counts and return an atacr object} 6 | \usage{ 7 | simulate_counts() 8 | } 9 | \description{ 10 | simulate counts and return an atacr object 11 | } 12 | -------------------------------------------------------------------------------- /tests/testthat/helper-functions.R: -------------------------------------------------------------------------------- 1 | 2 | expect_vectors_equal <- function(a,b){ 3 | if (sum(a %in% b) == length(a) & length(setdiff(a,b)) == 0){ 4 | return(TRUE) 5 | }else{ 6 | return(FALSE) 7 | } 8 | } 9 | 10 | expect_has_all_and_only_these_members <- function(l, v){ 11 | return(expect_vectors_equal(names(l), v)) 12 | } 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | language: R 4 | sudo: false 5 | cache: packages 6 | r: bioc-release 7 | r_packages: 8 | - covr 9 | 10 | warnings_are_errors: false 11 | 12 | after_success: 13 | - Rscript -e 'library(covr); codecov()' 14 | 15 | before_script: 16 | - echo "BiocParallel::register(BiocParallel::SerialParam())" > ~/.Rprofile 17 | -------------------------------------------------------------------------------- /man/qqarb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/figures.R 3 | \name{qqarb} 4 | \alias{qqarb} 5 | \title{Named distribution qqplot} 6 | \usage{ 7 | qqarb(obs, dist = "norm") 8 | } 9 | \arguments{ 10 | \item{obs}{observed values} 11 | 12 | \item{dist}{expected distribution} 13 | } 14 | \value{ 15 | ggplot2 object 16 | } 17 | \description{ 18 | Named distribution qqplot 19 | } 20 | -------------------------------------------------------------------------------- /man/treatments.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalisation.R 3 | \name{treatments} 4 | \alias{treatments} 5 | \title{return list of treatment names} 6 | \usage{ 7 | treatments(data) 8 | } 9 | \arguments{ 10 | \item{data}{an atacr object} 11 | } 12 | \value{ 13 | char vector of unique treatment names 14 | } 15 | \description{ 16 | return list of treatment names 17 | } 18 | -------------------------------------------------------------------------------- /atacr.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /man/make_tutorial_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loading.R 3 | \name{make_tutorial_data} 4 | \alias{make_tutorial_data} 5 | \title{make data to follow on in the tutorial} 6 | \usage{ 7 | make_tutorial_data(write_dir = getwd()) 8 | } 9 | \arguments{ 10 | \item{write_dir}{directory to put sample files in defaults to `getwd()`} 11 | } 12 | \description{ 13 | make data to follow on in the tutorial 14 | } 15 | -------------------------------------------------------------------------------- /inst/extdata/tutorial_mappings.csv: -------------------------------------------------------------------------------- 1 | treatment,sample_name,bam_file_path 2 | 4h_mock,4h_mock_rep1,inst/extdata/ATAC102/alignedSorted.bam 3 | 4h_mock,4h_mock_rep2,inst/extdata/ATAC202/alignedSorted.bam 4 | 4h_mock,4h_mock_rep3,inst/extdata/ATAC302/alignedSorted.bam 5 | 4h_infected,4h_infected_rep1,inst/extdata/ATAC103/alignedSorted.bam 6 | 4h_infected,4h_infected_rep2,inst/extdata/ATAC203/alignedSorted.bam 7 | 4h_infected,4h_infected_rep3,inst/extdata/ATAC303/alignedSorted.bam 8 | -------------------------------------------------------------------------------- /man/sample_pca_plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/figures.R 3 | \name{sample_pca_plot} 4 | \alias{sample_pca_plot} 5 | \title{PCA plot of samples} 6 | \usage{ 7 | sample_pca_plot(data, which = "bait_windows") 8 | } 9 | \arguments{ 10 | \item{data}{atacr object} 11 | 12 | \item{which}{the subset of the data to plot} 13 | } 14 | \value{ 15 | ggplot object 16 | } 17 | \description{ 18 | PCA plot of samples 19 | } 20 | -------------------------------------------------------------------------------- /man/print.atacr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/methods.R 3 | \name{print.atacr} 4 | \alias{print.atacr} 5 | \title{writes a summary of the metadata for a given atacr object} 6 | \usage{ 7 | \method{print}{atacr}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{an atacr object} 11 | 12 | \item{\dots}{other options for print generic} 13 | } 14 | \description{ 15 | writes a summary of the metadata for a given atacr object 16 | } 17 | -------------------------------------------------------------------------------- /man/gof.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalisation.R 3 | \name{gof} 4 | \alias{gof} 5 | \title{estimates Goodness of Fit for each row in a count matrix} 6 | \usage{ 7 | gof(mat) 8 | } 9 | \arguments{ 10 | \item{mat}{a count matrix usually from SummarizedExperiment::assay()} 11 | } 12 | \value{ 13 | a named vector of GoF estimates 14 | } 15 | \description{ 16 | estimates Goodness of Fit for each row in a count matrix 17 | } 18 | -------------------------------------------------------------------------------- /man/plot.atacr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/methods.R 3 | \name{plot.atacr} 4 | \alias{plot.atacr} 5 | \title{returns summary plot of data in atacr object} 6 | \usage{ 7 | \method{plot}{atacr}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{atacr object} 11 | 12 | \item{\dots}{extra options for generic} 13 | } 14 | \value{ 15 | gridExtra plot 16 | } 17 | \description{ 18 | returns summary plot of data in atacr object 19 | } 20 | -------------------------------------------------------------------------------- /man/ma_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/figures.R 3 | \name{ma_data} 4 | \alias{ma_data} 5 | \title{adds an 'm' and an 'a' column to an assay matrix dataframe for ma plots} 6 | \usage{ 7 | ma_data(sample_matrix) 8 | } 9 | \arguments{ 10 | \item{sample_matrix}{a SummarizedExperiment::assay from which to make the MA plot} 11 | } 12 | \description{ 13 | adds an 'm' and an 'a' column to an assay matrix dataframe for ma plots 14 | } 15 | -------------------------------------------------------------------------------- /man/summary.atacr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/methods.R 3 | \name{summary.atacr} 4 | \alias{summary.atacr} 5 | \title{writes a detailed data summary of the atacr object} 6 | \usage{ 7 | \method{summary}{atacr}(object, ...) 8 | } 9 | \arguments{ 10 | \item{object}{an atacr object} 11 | 12 | \item{\dots}{other options for summary generic} 13 | } 14 | \description{ 15 | writes a detailed data summary of the atacr object 16 | } 17 | -------------------------------------------------------------------------------- /man/as.data.frame.atacr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/methods.R 3 | \name{as.data.frame.atacr} 4 | \alias{as.data.frame.atacr} 5 | \title{returns dataframe of data in atacr object} 6 | \usage{ 7 | \method{as.data.frame}{atacr}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{object to print} 11 | 12 | \item{\dots}{other options for generic} 13 | } 14 | \value{ 15 | dataframe 16 | } 17 | \description{ 18 | returns dataframe of data in atacr object 19 | } 20 | -------------------------------------------------------------------------------- /man/library_size_scaling_factors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalisation.R 3 | \name{library_size_scaling_factors} 4 | \alias{library_size_scaling_factors} 5 | \title{calculate scaling factors for library size} 6 | \usage{ 7 | library_size_scaling_factors(se) 8 | } 9 | \arguments{ 10 | \item{se}{a SummarizedExperiment object such as 'bait_windows' from atacr::make_counts()} 11 | } 12 | \description{ 13 | calculate scaling factors for library size 14 | } 15 | -------------------------------------------------------------------------------- /man/library_size_normalisation_internal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalisation.R 3 | \name{library_size_normalisation_internal} 4 | \alias{library_size_normalisation_internal} 5 | \title{do a library size normalisation} 6 | \usage{ 7 | library_size_normalisation_internal(se) 8 | } 9 | \arguments{ 10 | \item{se}{a SummarizedExperiment object such as 'bait_windows' from atacr::make_counts()} 11 | } 12 | \description{ 13 | do a library size normalisation 14 | } 15 | -------------------------------------------------------------------------------- /man/make_csaw_params.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loading.R 3 | \name{make_csaw_params} 4 | \alias{make_csaw_params} 5 | \title{format a csaw::readParam object from the atacr::make_params() object} 6 | \usage{ 7 | make_csaw_params(p) 8 | } 9 | \arguments{ 10 | \item{p}{an object returned from atacr::make_params()} 11 | } 12 | \value{ 13 | a csaw::readParam object 14 | } 15 | \description{ 16 | format a csaw::readParam object from the atacr::make_params() object 17 | } 18 | -------------------------------------------------------------------------------- /man/get_t.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/differentials.R 3 | \name{get_t} 4 | \alias{get_t} 5 | \title{gets t-statistic for two vectors of data, x and y} 6 | \usage{ 7 | get_t(data, indices) 8 | } 9 | \arguments{ 10 | \item{data}{matrix of sample data} 11 | 12 | \item{indices}{indices selected by boot::boot} 13 | } 14 | \value{ 15 | t the t statistic from Student's t-test or NA if error 16 | } 17 | \description{ 18 | gets t-statistic for two vectors of data, x and y 19 | } 20 | -------------------------------------------------------------------------------- /man/get_bait_regions_from_gff.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loading.R 3 | \name{get_bait_regions_from_gff} 4 | \alias{get_bait_regions_from_gff} 5 | \title{reads a gff file containing the bait regions} 6 | \usage{ 7 | get_bait_regions_from_gff(file_name) 8 | } 9 | \arguments{ 10 | \item{file_name}{path to the file containing the bait regions} 11 | } 12 | \value{ 13 | GenomicRanges object of bait regions 14 | } 15 | \description{ 16 | reads a gff file containing the bait regions 17 | } 18 | -------------------------------------------------------------------------------- /man/make_corrplot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/figures.R 3 | \name{make_corrplot} 4 | \alias{make_corrplot} 5 | \title{generate corrplot from matrix of counts} 6 | \usage{ 7 | make_corrplot(counts, method = "pearson") 8 | } 9 | \arguments{ 10 | \item{counts}{a matrix of counts} 11 | 12 | \item{method}{the correlation method to use, any supported by `cor()` is useable} 13 | } 14 | \value{ 15 | ggplot2 plot 16 | } 17 | \description{ 18 | generate corrplot from matrix of counts 19 | } 20 | -------------------------------------------------------------------------------- /man/assay_matrix_to_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/figures.R 3 | \name{assay_matrix_to_df} 4 | \alias{assay_matrix_to_df} 5 | \title{converts SummarizedExperiment::assay matrix to a dataframe with cols 'window', 'sample' and 'count} 6 | \usage{ 7 | assay_matrix_to_df(matrix) 8 | } 9 | \arguments{ 10 | \item{matrix}{a SummarizedExperiment::assay matrix} 11 | } 12 | \description{ 13 | converts SummarizedExperiment::assay matrix to a dataframe with cols 'window', 'sample' and 'count 14 | } 15 | -------------------------------------------------------------------------------- /man/read_experiment_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loading.R 3 | \name{read_experiment_info} 4 | \alias{read_experiment_info} 5 | \title{Loads in a CSV file describing treatment, samples and bam files} 6 | \usage{ 7 | read_experiment_info(filename, should_be = c("treatment", "sample_name", 8 | "bam_file_path")) 9 | } 10 | \arguments{ 11 | \item{filename}{path and name of the file to load} 12 | } 13 | \description{ 14 | Loads in a CSV file describing treatment, samples and bam files 15 | } 16 | -------------------------------------------------------------------------------- /man/target_count_summary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/atacr.R 3 | \name{target_count_summary} 4 | \alias{target_count_summary} 5 | \title{Get a summary of reads hitting the bait and non bait windows} 6 | \usage{ 7 | target_count_summary(data) 8 | } 9 | \arguments{ 10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | } 12 | \value{ 13 | a table of on target and off target read counts 14 | } 15 | \description{ 16 | Get a summary of reads hitting the bait and non bait windows 17 | } 18 | -------------------------------------------------------------------------------- /man/Est.Depth.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalisation.R 3 | \name{Est.Depth} 4 | \alias{Est.Depth} 5 | \title{Depth estimation, directly from https://github.com/cran/PoissonSeq/blob/master/R/ps_cmeans.R} 6 | \usage{ 7 | Est.Depth(n, iter = 5) 8 | } 9 | \arguments{ 10 | \item{n}{a matrix} 11 | 12 | \item{iter, }{runs of the Depth finder.} 13 | } 14 | \value{ 15 | list of depths and means 16 | } 17 | \description{ 18 | Depth estimation, directly from https://github.com/cran/PoissonSeq/blob/master/R/ps_cmeans.R 19 | } 20 | -------------------------------------------------------------------------------- /man/coverage_count_summary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/atacr.R 3 | \name{coverage_count_summary} 4 | \alias{coverage_count_summary} 5 | \title{Get a summary of depth of coverage in the bait and non bait windows} 6 | \usage{ 7 | coverage_count_summary(data) 8 | } 9 | \arguments{ 10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | } 12 | \value{ 13 | a table of on target and off target mean depths 14 | } 15 | \description{ 16 | Get a summary of depth of coverage in the bait and non bait windows 17 | } 18 | -------------------------------------------------------------------------------- /man/target_count_coverage.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/atacr.R 3 | \name{target_count_coverage} 4 | \alias{target_count_coverage} 5 | \title{Read count and mean coverage hitting the bait and non bait windows} 6 | \usage{ 7 | target_count_coverage(data) 8 | } 9 | \arguments{ 10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | } 12 | \value{ 13 | a dataframe of on target and off target read counts 14 | } 15 | \description{ 16 | Read count and mean coverage hitting the bait and non bait windows 17 | } 18 | -------------------------------------------------------------------------------- /man/as.matrix.atacr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/methods.R 3 | \name{as.matrix.atacr} 4 | \alias{as.matrix.atacr} 5 | \title{returns given subset of data in atacr object as a matrix} 6 | \usage{ 7 | \method{as.matrix}{atacr}(x, ..., which = "bait_windows") 8 | } 9 | \arguments{ 10 | \item{x}{an atacr object} 11 | 12 | \item{\dots}{other options for generic} 13 | 14 | \item{which}{the subset of data to work on} 15 | } 16 | \value{ 17 | matrix of counts in subset 18 | } 19 | \description{ 20 | returns given subset of data in atacr object as a matrix 21 | } 22 | -------------------------------------------------------------------------------- /man/median_virtual_experiment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/atacr.R 3 | \name{median_virtual_experiment} 4 | \alias{median_virtual_experiment} 5 | \title{a median of window values across all samples in a vector, for ma plots} 6 | \usage{ 7 | median_virtual_experiment(sample_matrix) 8 | } 9 | \arguments{ 10 | \item{sample_matrix}{counts extracted from a SummarizedExperiment object} 11 | } 12 | \value{ 13 | the median of the provided counts, columnwise 14 | } 15 | \description{ 16 | a median of window values across all samples in a vector, for ma plots 17 | } 18 | -------------------------------------------------------------------------------- /man/as.DGEList.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loading.R 3 | \name{as.DGEList} 4 | \alias{as.DGEList} 5 | \title{returns DGEList for edgeR from atacr object} 6 | \usage{ 7 | as.DGEList(atacr, which = "bait_windows", remove.zeros = FALSE) 8 | } 9 | \arguments{ 10 | \item{atacr}{an atacr object} 11 | 12 | \item{which}{the subset of the data to work on} 13 | 14 | \item{remove.zeros}{whether to remove rows that have 0 total count.} 15 | } 16 | \value{ 17 | DGEList representing atacr data 18 | } 19 | \description{ 20 | returns DGEList for edgeR from atacr object 21 | } 22 | -------------------------------------------------------------------------------- /man/get_bait_regions_from_text.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loading.R 3 | \name{get_bait_regions_from_text} 4 | \alias{get_bait_regions_from_text} 5 | \title{reads a csv file containing the bait regions} 6 | \usage{ 7 | get_bait_regions_from_text(file_name) 8 | } 9 | \arguments{ 10 | \item{file_name}{path to a csv file containing the bait regions. File must have a header with columns `bait_name`, `seq_name`, `start`, `end`.} 11 | } 12 | \value{ 13 | GenomicRanges object of bait regions 14 | } 15 | \description{ 16 | reads a csv file containing the bait regions 17 | } 18 | -------------------------------------------------------------------------------- /tests/testthat/test_methods.R: -------------------------------------------------------------------------------- 1 | Sys.setenv("R_TESTS" = "") 2 | library(atacr) 3 | 4 | context("methods") 5 | 6 | test_that("as.data.frame.atacr() returns proper dataframe",{ 7 | d <- as.data.frame(sim_counts) 8 | expect_vectors_equal(names(d), c("chromosome", "start", "stop", "strand", "sample", "count", "window_type")) 9 | expect_is(d$chromosome, "factor") 10 | expect_is(d$start, "integer") 11 | expect_is(d$stop, "integer") 12 | expect_is(d$sample, "factor") 13 | expect_is(d$count, "numeric") 14 | expect_is(d$window_type, "factor") 15 | expect_equal(levels(d$chromosome), c("synth_chrom")) 16 | 17 | }) 18 | -------------------------------------------------------------------------------- /man/plot_counts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/figures.R 3 | \name{plot_counts} 4 | \alias{plot_counts} 5 | \title{Plot distribution of counts in given data set} 6 | \usage{ 7 | plot_counts(data, which = "bait_windows", log10 = TRUE) 8 | } 9 | \arguments{ 10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | 12 | \item{which}{the subdivision of the genome to plot} 13 | 14 | \item{log10}{log 10 the counts for plotting.} 15 | } 16 | \value{ 17 | ggplot2 plot 18 | } 19 | \description{ 20 | Plot distribution of counts in given data set 21 | } 22 | -------------------------------------------------------------------------------- /man/chromosome_coverage.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/figures.R 3 | \name{chromosome_coverage} 4 | \alias{chromosome_coverage} 5 | \title{Plot density of read counts by sample over the chromosomes} 6 | \usage{ 7 | chromosome_coverage(data, which = NULL) 8 | } 9 | \arguments{ 10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | 12 | \item{which}{the subdivision of the genome to plot (default = bait and non_bait windows)} 13 | } 14 | \value{ 15 | a ggplot2 object 16 | } 17 | \description{ 18 | Plot density of read counts by sample over the chromosomes 19 | } 20 | -------------------------------------------------------------------------------- /man/small_counts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/atacr.R 3 | \docType{data} 4 | \name{small_counts} 5 | \alias{small_counts} 6 | \title{small_counts - simulated count data 7 | The data `small_counts` is basically the same thing as `sim_counts` with smaller sample of 100 bait / non-bait windows.} 8 | \format{a list of SummarizedExperiment objects} 9 | \usage{ 10 | small_counts 11 | } 12 | \description{ 13 | small_counts - simulated count data 14 | The data `small_counts` is basically the same thing as `sim_counts` with smaller sample of 100 bait / non-bait windows. 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/make_scanBamParam.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loading.R 3 | \name{make_scanBamParam} 4 | \alias{make_scanBamParam} 5 | \title{format a rsamtools::scanBam object from the atacr::make_params() object} 6 | \usage{ 7 | make_scanBamParam(p, example_bam) 8 | } 9 | \arguments{ 10 | \item{p}{an object returned from atacr::make_params()} 11 | 12 | \item{example_bam}{a filename pointing to a BAM file from which genome size can be taken} 13 | } 14 | \value{ 15 | an rsamtools::scanBamParam object 16 | } 17 | \description{ 18 | format a rsamtools::scanBam object from the atacr::make_params() object 19 | } 20 | -------------------------------------------------------------------------------- /man/text_to_gff.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loading.R 3 | \name{text_to_gff} 4 | \alias{text_to_gff} 5 | \title{writes GFF3 version of a simple text file describing the bait region starts and stops} 6 | \usage{ 7 | text_to_gff(text_in, gff_out) 8 | } 9 | \arguments{ 10 | \item{text_in}{path to the file describing the bait regions. File must have a header with columns `bait_name`, `seq_name`, `start_pos`, `end_pos`.} 11 | 12 | \item{gff_out}{path to the gff file to be created} 13 | } 14 | \description{ 15 | writes GFF3 version of a simple text file describing the bait region starts and stops 16 | } 17 | -------------------------------------------------------------------------------- /man/bootstrap_t.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/differentials.R 3 | \name{bootstrap_t} 4 | \alias{bootstrap_t} 5 | \title{runs bootstrap t test, wrapper required for boot::boot function} 6 | \usage{ 7 | bootstrap_t(data, iterations = 10) 8 | } 9 | \arguments{ 10 | \item{data}{matrix of sample data} 11 | 12 | \item{iterations}{number of bootstrap iterations to run} 13 | } 14 | \value{ 15 | vector of 2 items, observed value t statisitc and p, calculated as proportion of bootstrap iterations greater than original t 16 | } 17 | \description{ 18 | runs bootstrap t test, wrapper required for boot::boot function 19 | } 20 | -------------------------------------------------------------------------------- /man/plot_GoF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/figures.R 3 | \name{plot_GoF} 4 | \alias{plot_GoF} 5 | \title{draw count distribution of GOF estimates} 6 | \usage{ 7 | plot_GoF(atacr, which = "bait_windows", controls = NULL) 8 | } 9 | \arguments{ 10 | \item{atacr}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | 12 | \item{which}{the subdivision of the genome to plot (default = bait and non_bait} 13 | 14 | \item{controls}{character vector of window names to consider control windows} 15 | } 16 | \value{ 17 | ggplot2 object 18 | } 19 | \description{ 20 | draw count distribution of GOF estimates 21 | } 22 | -------------------------------------------------------------------------------- /man/sample_kmeans_cluster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/atacr.R 3 | \name{sample_kmeans_cluster} 4 | \alias{sample_kmeans_cluster} 5 | \title{identify kmeans clusters for samples} 6 | \usage{ 7 | sample_kmeans_cluster(data, which = "bait_windows") 8 | } 9 | \arguments{ 10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | 12 | \item{which}{the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'} 13 | } 14 | \value{ 15 | dataframe of cluster_id and sample name 16 | } 17 | \description{ 18 | identify kmeans clusters for samples 19 | } 20 | -------------------------------------------------------------------------------- /man/get_expected_values.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/atacr.R 3 | \name{get_expected_values} 4 | \alias{get_expected_values} 5 | \title{given a vector of values return a set of random numbers from a given 6 | distribution} 7 | \usage{ 8 | get_expected_values(obs, dist = "norm") 9 | } 10 | \arguments{ 11 | \item{obs}{vector of observed values} 12 | 13 | \item{dist}{the distribution from which to return expected values} 14 | } 15 | \value{ 16 | a vector of length obs with random variates from distribution dist 17 | } 18 | \description{ 19 | given a vector of values return a set of random numbers from a given 20 | distribution 21 | } 22 | -------------------------------------------------------------------------------- /man/select_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/differentials.R 3 | \name{select_data} 4 | \alias{select_data} 5 | \title{selects appropriate columns and names from a} 6 | \usage{ 7 | select_data(data, treatment_a, treatment_b, which = NULL) 8 | } 9 | \arguments{ 10 | \item{data}{an atacr object} 11 | 12 | \item{treatment_a}{string naming the first treatment (numerator)} 13 | 14 | \item{treatment_b}{string naming the second treatment (denominator)} 15 | 16 | \item{which}{subset to work on Default = NULL} 17 | } 18 | \value{ 19 | list of data to be calculated with 20 | } 21 | \description{ 22 | selects appropriate columns and names from a 23 | } 24 | -------------------------------------------------------------------------------- /man/get_GoF_factors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalisation.R 3 | \name{get_GoF_factors} 4 | \alias{get_GoF_factors} 5 | \title{estimates sequencing depths based on windows with smallest GoF} 6 | \usage{ 7 | get_GoF_factors(atacr, which = "bait_windows") 8 | } 9 | \arguments{ 10 | \item{atacr}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | 12 | \item{which}{the subdivision of the genome to calculate GoF either 'whole_genome', 'bait_windows' or 'non_bait_windows'} 13 | } 14 | \value{ 15 | - a named vector of each windows GoF estimate. 16 | } 17 | \description{ 18 | estimates sequencing depths based on windows with smallest GoF 19 | } 20 | -------------------------------------------------------------------------------- /man/estimate_GoFs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalisation.R 3 | \name{estimate_GoFs} 4 | \alias{estimate_GoFs} 5 | \title{estimates Goodness of Fit from atacr object} 6 | \usage{ 7 | estimate_GoFs(atacr, which = "bait_windows") 8 | } 9 | \arguments{ 10 | \item{atacr}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | 12 | \item{which}{the subdivision of the genome to calculate GoF either 'whole_genome', 'bait_windows' or 'non_bait_windows'} 13 | } 14 | \value{ 15 | the original atacr object with a new slot - 'gofs' - a named vector of each windows GoF estimate. 16 | } 17 | \description{ 18 | estimates Goodness of Fit from atacr object 19 | } 20 | -------------------------------------------------------------------------------- /man/make_params.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loading.R 3 | \name{make_params} 4 | \alias{make_params} 5 | \title{set read filters for counting from the BAM file.} 6 | \usage{ 7 | make_params(paired_map = TRUE, minq = 30, dedup = TRUE) 8 | } 9 | \arguments{ 10 | \item{paired_map}{Should reads only be included if they are aligned in pairs. Default = TRUE} 11 | 12 | \item{minq}{The minimum mapping quality to retain a read. Default = 20} 13 | 14 | \item{dedup}{Should removal of PCR duplicates be performed. Default = TRUE} 15 | } 16 | \value{ 17 | a named vector of class "atacr_params" 18 | } 19 | \description{ 20 | set read filters for counting from the BAM file. 21 | } 22 | -------------------------------------------------------------------------------- /man/sample_correlation_plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/figures.R 3 | \name{sample_correlation_plot} 4 | \alias{sample_correlation_plot} 5 | \title{Plot sample count correlations} 6 | \usage{ 7 | sample_correlation_plot(data, which = "bait_windows", method = "pearson") 8 | } 9 | \arguments{ 10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | 12 | \item{which}{the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'} 13 | 14 | \item{method}{the correlation method to use. Any supported by `cor()` is useable} 15 | } 16 | \description{ 17 | Plot sample count correlations 18 | } 19 | -------------------------------------------------------------------------------- /man/calc_quantiles.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/atacr.R 3 | \name{calc_quantiles} 4 | \alias{calc_quantiles} 5 | \title{report counts at each quantile for each sample} 6 | \usage{ 7 | calc_quantiles(data, quantiles = c(0.01, 0.05, 0.95, 0.99), which = NULL) 8 | } 9 | \arguments{ 10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | 12 | \item{quantiles}{a vector of quantiles to report} 13 | 14 | \item{which}{the subset of data windows to report on. Default = 15 | "bait_windows" and "non_bait_windows"} 16 | } 17 | \value{ 18 | list of counts at quantiles 19 | } 20 | \description{ 21 | report counts at each quantile for each sample 22 | } 23 | -------------------------------------------------------------------------------- /man/control_window_normalise_internal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalisation.R 3 | \name{control_window_normalise_internal} 4 | \alias{control_window_normalise_internal} 5 | \title{do a control window scaling normalisation} 6 | \usage{ 7 | control_window_normalise_internal(se, window_file) 8 | } 9 | \arguments{ 10 | \item{se}{a SummarizedExperiment object such as 'bait_windows' from atacr::make_counts()} 11 | 12 | \item{window_file}{a text file containing the positions of control window/gene ranges} 13 | } 14 | \value{ 15 | SummarizedExperiment object, a copy of se with normalised values 16 | } 17 | \description{ 18 | do a control window scaling normalisation 19 | } 20 | -------------------------------------------------------------------------------- /man/view_gene.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/figures.R 3 | \name{view_gene} 4 | \alias{view_gene} 5 | \title{coverage over gene model} 6 | \usage{ 7 | view_gene(data, gene_id, which = "bait_windows", ensembl = "plants", 8 | ens_dataset = "athaliana_eg_gene") 9 | } 10 | \arguments{ 11 | \item{data}{atacr object} 12 | 13 | \item{gene_id}{the id of the gene to plot around} 14 | 15 | \item{which}{the subset of the data to plot.} 16 | 17 | \item{ensembl}{one of 'plants', 'ensembl' - which version of ensembl to connect to} 18 | 19 | \item{ens_dataset}{which ensembl dataset to connect to} 20 | } 21 | \value{ 22 | plot object 23 | } 24 | \description{ 25 | coverage over gene model 26 | } 27 | -------------------------------------------------------------------------------- /man/control_window_scaling_factors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalisation.R 3 | \name{control_window_scaling_factors} 4 | \alias{control_window_scaling_factors} 5 | \title{extract scaling factors from control windows (often from a file of control gene positions)} 6 | \usage{ 7 | control_window_scaling_factors(se, window_file) 8 | } 9 | \arguments{ 10 | \item{se}{a SummarizedExperiment object} 11 | 12 | \item{window_file}{a text file containing the positions of control window/gene ranges} 13 | } 14 | \value{ 15 | a vector of scaling factors from control genes 16 | } 17 | \description{ 18 | extract scaling factors from control windows (often from a file of control gene positions) 19 | } 20 | -------------------------------------------------------------------------------- /man/observed_expected_bins.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/atacr.R 3 | \name{observed_expected_bins} 4 | \alias{observed_expected_bins} 5 | \title{given a vector of numbersd returns the counts in bins of bin_width, and the count} 6 | \usage{ 7 | observed_expected_bins(obs, dist = "pois", bin_width = 10) 8 | } 9 | \arguments{ 10 | \item{obs}{a vector of numbers} 11 | 12 | \item{dist}{a string naming distribution from which to take expected counts} 13 | 14 | \item{bin_width}{the width of the bins for the counts} 15 | } 16 | \value{ 17 | list with members observed and expected which are vectors of counts 18 | } 19 | \description{ 20 | given a vector of numbersd returns the counts in bins of bin_width, and the count 21 | } 22 | -------------------------------------------------------------------------------- /man/plot_count_by_chromosome.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/figures.R 3 | \name{plot_count_by_chromosome} 4 | \alias{plot_count_by_chromosome} 5 | \title{plot the counts split by chromosome and sample} 6 | \usage{ 7 | plot_count_by_chromosome(data, which = "bait_windows", method = "bar") 8 | } 9 | \arguments{ 10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | 12 | \item{which}{the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'} 13 | 14 | \item{method}{(bar | smooth | point) which sort of plot to return} 15 | } 16 | \value{ 17 | ggplot2 plot 18 | } 19 | \description{ 20 | plot the counts split by chromosome and sample 21 | } 22 | -------------------------------------------------------------------------------- /man/coverage_summary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/figures.R 3 | \name{coverage_summary} 4 | \alias{coverage_summary} 5 | \title{Plot histograms of read counts by sample and window type} 6 | \usage{ 7 | coverage_summary(data, which = NULL, sample = NULL, log_axis = TRUE) 8 | } 9 | \arguments{ 10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | 12 | \item{which}{the subdivision of the genome to plot (default = bait and non_bait windows)} 13 | 14 | \item{sample}{the sample to plot (default = all )} 15 | 16 | \item{log_axis}{use a log scale for the x-axis} 17 | } 18 | \value{ 19 | a ggplot2 object 20 | } 21 | \description{ 22 | Plot histograms of read counts by sample and window type 23 | } 24 | -------------------------------------------------------------------------------- /man/ma_plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/figures.R 3 | \name{ma_plot} 4 | \alias{ma_plot} 5 | \title{plot M (log2 ratio of a windows sample count to windows all-sample median count ) versus A (log2 sum of a windows sample count to a windows all-sample median count ) for each window} 6 | \usage{ 7 | ma_plot(data, which = "bait_windows", by = NULL) 8 | } 9 | \arguments{ 10 | \item{data}{an atacr object} 11 | 12 | \item{which}{the subset of windows to operate on} 13 | 14 | \item{by}{a vector of seqnames of the genome to view} 15 | } 16 | \description{ 17 | plot M (log2 ratio of a windows sample count to windows all-sample median count ) versus A (log2 sum of a windows sample count to a windows all-sample median count ) for each window 18 | } 19 | -------------------------------------------------------------------------------- /man/estimate_bayes_factor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/differentials.R 3 | \name{estimate_bayes_factor} 4 | \alias{estimate_bayes_factor} 5 | \title{Estimate Bayes Factor and significantly different windows} 6 | \usage{ 7 | estimate_bayes_factor(atacr, treatment_a, treatment_b, which = "bait_windows", 8 | factor = 4) 9 | } 10 | \arguments{ 11 | \item{atacr}{an atacr object} 12 | 13 | \item{treatment_a}{the first treatment to consider} 14 | 15 | \item{treatment_b}{the second treatment to consider} 16 | 17 | \item{which}{the subset of windows to consider} 18 | 19 | \item{factor}{the BayesFactor at which to mark window as significant} 20 | } 21 | \value{ 22 | a dataframe 23 | } 24 | \description{ 25 | Estimate Bayes Factor and significantly different windows 26 | } 27 | -------------------------------------------------------------------------------- /man/estimate_bayes_factor_multiclass.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/differentials.R 3 | \name{estimate_bayes_factor_multiclass} 4 | \alias{estimate_bayes_factor_multiclass} 5 | \title{Estimate BayesFactor and mark significantly different windows for many experiments} 6 | \usage{ 7 | estimate_bayes_factor_multiclass(data, common_control, which = "bait_windows", 8 | factor = 4) 9 | } 10 | \arguments{ 11 | \item{data}{an atacr object} 12 | 13 | \item{common_control}{the treatment to consider the control for all other treatments} 14 | 15 | \item{which}{the subset of windows to consider} 16 | 17 | \item{factor}{the BayesFactor to consider significant} 18 | } 19 | \description{ 20 | Estimate BayesFactor and mark significantly different windows for many experiments 21 | } 22 | -------------------------------------------------------------------------------- /man/estimate_fdr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/differentials.R 3 | \name{estimate_fdr} 4 | \alias{estimate_fdr} 5 | \title{Estimate FDR and significantly different windows} 6 | \usage{ 7 | estimate_fdr(data, treatment_a, treatment_b, which = "bait_windows", 8 | iterations = 10, fdr_level = 0.05) 9 | } 10 | \arguments{ 11 | \item{data}{an atacr object} 12 | 13 | \item{treatment_a}{the first treatment to consider} 14 | 15 | \item{treatment_b}{the second treatment to consider} 16 | 17 | \item{which}{the subset of windows to consider} 18 | 19 | \item{iterations}{the number of bootstrap iterations to perform} 20 | 21 | \item{fdr_level}{the level at which to mark FDR as significant} 22 | } 23 | \description{ 24 | Estimate FDR and significantly different windows 25 | } 26 | -------------------------------------------------------------------------------- /man/scale_factor_normalise.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalisation.R 3 | \name{scale_factor_normalise} 4 | \alias{scale_factor_normalise} 5 | \title{normalise by a provided set of scaling factors} 6 | \usage{ 7 | scale_factor_normalise(data, which = "bait_windows", scaling_factors = NULL) 8 | } 9 | \arguments{ 10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | 12 | \item{which}{the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'} 13 | 14 | \item{scaling_factors}{a vector of scaling factors to normalise by} 15 | } 16 | \value{ 17 | a SummarizedExperiment with scale normalised window values 18 | } 19 | \description{ 20 | normalise by a provided set of scaling factors 21 | } 22 | -------------------------------------------------------------------------------- /man/count_windows_under_threshold.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/atacr.R 3 | \name{count_windows_under_threshold} 4 | \alias{count_windows_under_threshold} 5 | \title{count windows that have read counts below the threshold} 6 | \usage{ 7 | count_windows_under_threshold(data, which = "bait_windows", threshold = 0) 8 | } 9 | \arguments{ 10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | 12 | \item{which}{the subdivision of the genome to calculate correlations either 13 | 'whole_genome', 'bait_windows' or 'non_bait_windows'} 14 | 15 | \item{threshold}{counts windows with read counts lower than this level} 16 | } 17 | \value{ 18 | dataframe of sample name, count and threshold 19 | } 20 | \description{ 21 | count windows that have read counts below the threshold 22 | } 23 | -------------------------------------------------------------------------------- /man/edgeR_exact.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/differentials.R 3 | \name{edgeR_exact} 4 | \alias{edgeR_exact} 5 | \title{Estimate differential window counts and mark significantly different windows using edgeR exact method for two samples} 6 | \usage{ 7 | edgeR_exact(data, which = "bait_windows", treatment_a = NULL, 8 | treatment_b = NULL, remove.zeros = FALSE, sig_level = 0.05) 9 | } 10 | \arguments{ 11 | \item{data}{an atacr object} 12 | 13 | \item{which}{the subset of windows to consider} 14 | 15 | \item{sig_level}{the p_value to consider significant} 16 | 17 | \item{common_control}{the treatment to consider the control for all other treatments} 18 | } 19 | \description{ 20 | Estimate differential window counts and mark significantly different windows using edgeR exact method for two samples 21 | } 22 | -------------------------------------------------------------------------------- /man/make_UpSetR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/atacr.R 3 | \name{make_UpSetR} 4 | \alias{make_UpSetR} 5 | \title{given a dataframe from the estimate_fdr_multiclass() function, will return a 6 | list in the format suitable for UpSetR visualisation. 7 | Does not do any filtering of lists, so selected genes must be filtered before hand e.g with dplyr} 8 | \usage{ 9 | make_UpSetR(df) 10 | } 11 | \arguments{ 12 | \item{df}{dataframe from estimate_fdr_multiclass} 13 | } 14 | \value{ 15 | list of named vectors suitable for UpSetR fromList() function 16 | } 17 | \description{ 18 | given a dataframe from the estimate_fdr_multiclass() function, will return a 19 | list in the format suitable for UpSetR visualisation. 20 | Does not do any filtering of lists, so selected genes must be filtered before hand e.g with dplyr 21 | } 22 | -------------------------------------------------------------------------------- /man/estimate_fdr_multiclass.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/differentials.R 3 | \name{estimate_fdr_multiclass} 4 | \alias{estimate_fdr_multiclass} 5 | \title{Estimate FDR and significantly different windows for many experiments} 6 | \usage{ 7 | estimate_fdr_multiclass(data, common_control, which = "bait_windows", 8 | iterations = 10, fdr_level = 0.05) 9 | } 10 | \arguments{ 11 | \item{data}{an atacr object} 12 | 13 | \item{common_control}{the treatment to consider the control for all other treatments} 14 | 15 | \item{which}{the subset of windows to consider} 16 | 17 | \item{iterations}{the number of bootstrap iterations to perform} 18 | 19 | \item{fdr_level}{the level at which to mark FDR as significant} 20 | } 21 | \description{ 22 | Estimate FDR and significantly different windows for many experiments 23 | } 24 | -------------------------------------------------------------------------------- /man/find_controls_by_GoF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalisation.R 3 | \name{find_controls_by_GoF} 4 | \alias{find_controls_by_GoF} 5 | \title{find control windows by convergence method in https://academic.oup.com/biostatistics/article/13/3/523/248016/Normalization-testing-and-false-discovery-rate} 6 | \usage{ 7 | find_controls_by_GoF(atacr, which = "bait_windows") 8 | } 9 | \arguments{ 10 | \item{atacr}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | 12 | \item{which}{the subdivision of the genome to calculate GoF either 'whole_genome', 'bait_windows' or 'non_bait_windows'} 13 | } 14 | \value{ 15 | a character vector of window names 16 | } 17 | \description{ 18 | find control windows by convergence method in https://academic.oup.com/biostatistics/article/13/3/523/248016/Normalization-testing-and-false-discovery-rate 19 | } 20 | -------------------------------------------------------------------------------- /man/edgeR_multiclass.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/differentials.R 3 | \name{edgeR_multiclass} 4 | \alias{edgeR_multiclass} 5 | \title{Estimate differential window counts and mark significantly different windows using edgeR glmFIT method for multiple samples with common control} 6 | \usage{ 7 | edgeR_multiclass(data, common_control, which = "bait_windows", 8 | sig_level = 0.05, remove.zeros = FALSE) 9 | } 10 | \arguments{ 11 | \item{data}{an atacr object} 12 | 13 | \item{which}{the subset of windows to consider} 14 | 15 | \item{sig_level}{the p_value to consider significant} 16 | 17 | \item{treatment_a}{the first treatment to consider} 18 | 19 | \item{treatment_b}{the second treatment to consider} 20 | } 21 | \description{ 22 | Estimate differential window counts and mark significantly different windows using edgeR glmFIT method for multiple samples with common control 23 | } 24 | -------------------------------------------------------------------------------- /man/windows_below_coverage_threshold_plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/figures.R 3 | \name{windows_below_coverage_threshold_plot} 4 | \alias{windows_below_coverage_threshold_plot} 5 | \title{generate cumulative plot of number of windows below a threshold in samples} 6 | \usage{ 7 | windows_below_coverage_threshold_plot(data, which = "bait_windows", 8 | from = 0, to = 10) 9 | } 10 | \arguments{ 11 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 12 | 13 | \item{which}{("bait_windows") the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'} 14 | 15 | \item{from}{(0) the lowest threshold to consider} 16 | 17 | \item{to}{(10) the highest threshold to consider} 18 | } 19 | \value{ 20 | ggplot2 plot 21 | } 22 | \description{ 23 | generate cumulative plot of number of windows below a threshold in samples 24 | } 25 | -------------------------------------------------------------------------------- /man/extract_features_from_gff.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loading.R 3 | \name{extract_features_from_gff} 4 | \alias{extract_features_from_gff} 5 | \title{pulls lines out of a gff file based on identifierss provided} 6 | \usage{ 7 | extract_features_from_gff(ids, gff, type = c("gene"), col = "ID", 8 | out_file = NULL, version = "3") 9 | } 10 | \arguments{ 11 | \item{ids}{character vector of ids/names of feature to extract} 12 | 13 | \item{gff}{path to gff file} 14 | 15 | \item{type}{feature type of features to extract.} 16 | 17 | \item{col}{column name of GFF file containing id to use (ID)} 18 | 19 | \item{out_file}{path of file name to write. If NULL, no file is written} 20 | 21 | \item{version}{which gff version to export (Default is "3")} 22 | } 23 | \value{ 24 | GenomicRanges or NULL with GFF outfile. 25 | } 26 | \description{ 27 | pulls lines out of a gff file based on identifierss provided 28 | } 29 | -------------------------------------------------------------------------------- /tests/testthat/test_differentials.R: -------------------------------------------------------------------------------- 1 | Sys.setenv("R_TESTS" = "") 2 | library(atacr) 3 | 4 | context("differential count functions") 5 | 6 | test_that("get_t() returns proper value", { 7 | expect_equal( unname(get_t(1:100, c(1:10, 90:100))), -64.6472, tolerance = 0.0000001) 8 | }) 9 | 10 | test_that("select_comparisons() extracts proper columns", { 11 | l <- select_comparisons(sim_counts, "treatment", "control") 12 | expect_has_all_and_only_these_members(l, c("treatment_a_data", "treatment_b_data")) 13 | expect_vectors_equal(l$treatment_a_data, c("treatment_001", "treatment_002", "treatment_003")) 14 | expect_vectors_equal(l$treatment_b_data, c("control_001", "control_002", "control_003")) 15 | }) 16 | 17 | test_that("estimate_fdr() returns proper dataframe", { 18 | expect_vectors_equal(names(estimate_fdr(sim_counts, "control", "treatment")), c("window", "t", "p_value", "fdr", "mean_count_a", "mean_count_b", "sd_a", "sd_b", "log2fc", "is_sig")) 19 | }) 20 | -------------------------------------------------------------------------------- /man/load_atac.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loading.R 3 | \name{load_atac} 4 | \alias{load_atac} 5 | \title{populate the result object with the RangedSummarizedExperiment from the bam files from ATAC seq data. Called from make_counts() when is_rnaseq == FALSE.} 6 | \usage{ 7 | load_atac(result, width, filter_params, window_file) 8 | } 9 | \arguments{ 10 | \item{result}{list from make_counts()} 11 | 12 | \item{width}{an integer of the width of the bins the bait regions will be divided into} 13 | 14 | \item{filter_params}{a params object, described in atacr::make_counts()} 15 | 16 | \item{window_file}{a filename of a CSV file with the bait regions} 17 | } 18 | \value{ 19 | a list with window counts for bait/non-bait windows 20 | } 21 | \description{ 22 | populate the result object with the RangedSummarizedExperiment from the bam files from ATAC seq data. Called from make_counts() when is_rnaseq == FALSE. 23 | } 24 | -------------------------------------------------------------------------------- /man/athal_wt_counts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/atacr.R 3 | \docType{data} 4 | \name{athal_wt_counts} 5 | \alias{athal_wt_counts} 6 | \title{athal_wt_counts - real capture RNASeq count data 7 | The data `athal_wt_counts` are real, experimentally derived counts from untreated WT Arabidopsis leaves for 52 baits, each set of baits representing a gene. Three replicates are provided for each gene. This data set is intended to be used in resampling procedures for making test data sets.} 8 | \format{a named vector of counts} 9 | \usage{ 10 | athal_wt_counts 11 | } 12 | \description{ 13 | athal_wt_counts - real capture RNASeq count data 14 | The data `athal_wt_counts` are real, experimentally derived counts from untreated WT Arabidopsis leaves for 52 baits, each set of baits representing a gene. Three replicates are provided for each gene. This data set is intended to be used in resampling procedures for making test data sets. 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/load_rnaseq.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loading.R 3 | \name{load_rnaseq} 4 | \alias{load_rnaseq} 5 | \title{populate the result object with the RangedSummarizedExperiment from the bam files from RNA seq data. Called from make_counts() when is_rnaseq == TRUE.} 6 | \usage{ 7 | load_rnaseq(result, filter_params, window_file, gene_id_col = "ID") 8 | } 9 | \arguments{ 10 | \item{result}{list from make_counts()} 11 | 12 | \item{filter_params}{a params object, described in atacr::make_counts()} 13 | 14 | \item{window_file}{a filename of a CSV file with the bait regions} 15 | 16 | \item{gene_id_col}{a character string stating which attribute name to take from the final column of the GFF file to use for the window name in RNASeq data. Usually this is the name of the gene. Default = ID.} 17 | } 18 | \description{ 19 | populate the result object with the RangedSummarizedExperiment from the bam files from RNA seq data. Called from make_counts() when is_rnaseq == TRUE. 20 | } 21 | -------------------------------------------------------------------------------- /man/normalise_by_window_width.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalisation.R 3 | \name{normalise_by_window_width} 4 | \alias{normalise_by_window_width} 5 | \title{normalise counts by window width (counts / window width)} 6 | \usage{ 7 | normalise_by_window_width(data, which = "bait_windows", per = 1000) 8 | } 9 | \arguments{ 10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 11 | 12 | \item{which}{the subset of the data to normalise. Default = bait_windows} 13 | 14 | \item{per}{= the expression count / width gives the reads in the window divided by the width, so a 3000 nt gene with 30000 reads mapping to it will have a read count of just 10. Setting this parameter allows you to represent the counts per some other number of nts. Default = 1000, so gives the reads per kb of the gene.} 15 | } 16 | \value{ 17 | SummarizedExperiment object with normalised counts 18 | } 19 | \description{ 20 | normalise counts by window width (counts / window width) 21 | } 22 | -------------------------------------------------------------------------------- /tests/testthat/test_normalisation.R: -------------------------------------------------------------------------------- 1 | Sys.setenv("R_TESTS" = "") 2 | library(atacr) 3 | 4 | 5 | context("normalisation functions") 6 | 7 | 8 | test_that("library_size_normalisation_internal() returns proper values", { 9 | 10 | expected_counts <- matrix(c(2.5, 4, 4.375, 5, 5, 5, 7.5, 6, 5.625), nrow=3) 11 | expected_se <- SummarizedExperiment::SummarizedExperiment(assays=list(counts=expected_counts)) 12 | in_se <- SummarizedExperiment::SummarizedExperiment(assays=list(counts=matrix(1:9, nrow=3))) 13 | out_se <- library_size_normalisation_internal(in_se) 14 | 15 | expect_equal(out_se, expected_se) 16 | 17 | }) 18 | 19 | test_that("get_scaling_factors() gets proper values", { 20 | expected_factors <- c(2,1,0.666666667) 21 | tm <- matrix(c(rep(1,3), rep(2,3), rep(3,3)), nrow=3) 22 | expect_equal(get_scaling_factors(tm), expected_factors) 23 | }) 24 | 25 | 26 | test_that("scale_normalise() returns proper values", { 27 | expected_mat <- matrix(rep(2,9), nrow=3) 28 | tm <- matrix(c(rep(1,3), rep(2,3), rep(3,3)), nrow=3) 29 | expect_equal(scale_normalise(tm, c(2,1,0.66667)), expected_mat, tolerance = 1e-05) 30 | }) 31 | -------------------------------------------------------------------------------- /man/library_size_normalisation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalisation.R 3 | \name{library_size_normalisation} 4 | \alias{library_size_normalisation} 5 | \title{performs a whole library size normalisation of the selected set of windows, calculates a median virtual experiment and normalises to that} 6 | \usage{ 7 | library_size_normalisation(data, which = "bait_windows", 8 | by_treatment = FALSE) 9 | } 10 | \arguments{ 11 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 12 | 13 | \item{which}{the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'} 14 | 15 | \item{by_treatment}{(FALSE) will group the assay into different treatments and normalise each separately - assumes that within treatment groups the samples should show little difference, but between sample treatment groups could show lots of difference between windows.} 16 | } 17 | \value{ 18 | a SummarizedExperiment object with a new, normalised assay matrix 19 | } 20 | \description{ 21 | performs a whole library size normalisation of the selected set of windows, calculates a median virtual experiment and normalises to that 22 | } 23 | -------------------------------------------------------------------------------- /man/control_window_normalise.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalisation.R 3 | \name{control_window_normalise} 4 | \alias{control_window_normalise} 5 | \title{performs control window based normalisation of the selected set of windows, calculates a median virtual experiment and normalises to that} 6 | \usage{ 7 | control_window_normalise(data, window_file, which = "bait_windows", 8 | by_treatment = FALSE) 9 | } 10 | \arguments{ 11 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()} 12 | 13 | \item{window_file}{a text file containing the positions of control window/gene ranges} 14 | 15 | \item{which}{the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'} 16 | 17 | \item{by_treatment}{should normalisation be done by all experiments (one median virtualexperiment to compare all samples to) OR should normalisation be done by each treatment type (one median virtual experiment for each different treatment type)} 18 | } 19 | \value{ 20 | a vector of scaling factors from control genes 21 | } 22 | \description{ 23 | performs control window based normalisation of the selected set of windows, calculates a median virtual experiment and normalises to that 24 | } 25 | -------------------------------------------------------------------------------- /tests/testthat/control_windows.txt: -------------------------------------------------------------------------------- 1 | "bait_name","seq_name","start_pos","end_pos" 2 | "AT4G05320_1","Chr4",2716013,2716132 3 | "AT4G05320_14","Chr4",2717352,2717471 4 | "AT4G05320_15","Chr4",2717455,2717574 5 | "AT4G05320_16","Chr4",2717558,2717677 6 | "AT4G05320_18","Chr4",2717764,2717883 7 | "AT4G05320_19","Chr4",2717867,2717986 8 | "AT4G05320_2","Chr4",2716116,2716235 9 | "AT4G05320_20","Chr4",2717970,2718089 10 | "AT4G05320_22","Chr4",2718176,2718295 11 | "AT4G05320_23","Chr4",2718279,2718398 12 | "AT4G05320_24","Chr4",2718382,2718501 13 | "AT4G05320_25","Chr4",2718485,2718604 14 | "AT4G05320_26","Chr4",2718588,2718707 15 | "AT4G05320_27","Chr4",2718691,2718810 16 | "AT4G05320_28","Chr4",2718794,2718913 17 | "AT4G05320_29","Chr4",2718897,2719016 18 | "AT4G05320_3","Chr4",2716219,2716338 19 | "AT4G05320_30","Chr4",2719000,2719119 20 | "AT4G05320_31","Chr4",2719103,2719222 21 | "AT4G05320_32","Chr4",2719206,2719325 22 | "AT4G05320_33","Chr4",2719309,2719428 23 | "AT4G05320_34","Chr4",2719412,2719531 24 | "AT4G05320_35","Chr4",2719515,2719634 25 | "AT4G05320_36","Chr4",2719618,2719737 26 | "AT4G05320_38","Chr4",2719824,2719943 27 | "AT4G05320_39","Chr4",2719927,2720046 28 | "AT4G05320_4","Chr4",2716322,2716441 29 | "AT4G05320_40","Chr4",2720030,2720149 30 | "AT4G05320_41","Chr4",2720133,2720252 31 | "AT4G05320_42","Chr4",2720236,2720355 32 | "AT4G05320_43","Chr4",2720339,2720458 33 | "AT4G05320_44","Chr4",2720441,2720560 34 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: atacr 2 | Type: Package 3 | Title: Analysing Capture Seq Count Data 4 | Version: 0.4.14 5 | Authors@R: c( 6 | person("Dan","MacLean", email="dan.maclean@tsl.ac.uk", role=c("aut", "cre")), 7 | person("Ram-Krishna", "Shrestha", email="ram-krishna.shrestha@tsl.ac.uk", role="aut")) 8 | Description: This package helps with the analysis of count data from RNA and ATAC capture-seq experiments. 9 | Using BioConductor RangedSummarizedExperiment objects, atacr implements a set of helper 10 | functions and quality control plots specific to the analysis of particularly windows. 11 | Especially, atacr is useful for performing control window based between sample normalizations and for 12 | easily running non-standard tests for differentially accessible windows in common reference designs. 13 | Depends: R (>= 3.0.0) 14 | License: MIT + file LICENSE 15 | LazyData: TRUE 16 | Imports: 17 | BayesFactor, 18 | boot, 19 | biomaRt, 20 | corrplot, 21 | csaw, 22 | dplyr, 23 | edgeR, 24 | fitdistrplus, 25 | GenomeGraphs, 26 | GenomicAlignments, 27 | GenomicRanges, 28 | grid, 29 | gridExtra, 30 | ggjoy, 31 | ggplot2, 32 | ggthemes, 33 | heatmap3, 34 | IRanges, 35 | magrittr, 36 | methods, 37 | plyr, 38 | RColorBrewer, 39 | reshape, 40 | reshape2, 41 | Rsamtools, 42 | rtracklayer, 43 | S4Vectors, 44 | stringr, 45 | SummarizedExperiment, 46 | tidyr 47 | RoxygenNote: 6.0.1 48 | Suggests: testthat, 49 | knitr, 50 | pander, 51 | rmarkdown, 52 | UpSetR 53 | VignetteBuilder: knitr 54 | biocViews: 55 | -------------------------------------------------------------------------------- /man/sim_counts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/atacr.R 3 | \docType{data} 4 | \name{sim_counts} 5 | \alias{sim_counts} 6 | \alias{sim_counts} 7 | \title{sim_counts - simulated count data} 8 | \format{A SummarizedExperiment object} 9 | \usage{ 10 | sim_counts 11 | 12 | sim_counts 13 | } 14 | \description{ 15 | The data `sim_counts` is a simulated data set with computer generated window counts for three replicates of each of two conditions in experiments with 500 bait and non-bait windows. We'll set each experiment to have 10 \% of windows differentially accessible at a difference of approximately 2 fold. 16 | 17 | The data `sim_counts` is a simulated data set with computer generated window counts for three replicates of each of two conditions in experiments with 500 bait and non-bait windows. We'll set each experiment to have 10 \% of windows differentially accessible at a difference of approximately 2 fold. 18 | } 19 | \details{ 20 | Counts in bait windows for "control" samples will be modelled as \eqn{C \sim NB(\mu = 30, size = 10\mu)}. 21 | 22 | Counts in bait windows for "treatment" samples will be modelled as \eqn{C \cdot unif(0.8,1.2)}. 23 | 24 | Differentially accessible bait windows will be modelled as \eqn{C_{1..50} \cdot \mathcal{N}( \mu=2,\sigma = \mu/2)} 25 | 26 | Counts in bait windows for "control" samples will be modelled as \eqn{C \sim NB(\mu = 30, size = 10\mu)}. 27 | 28 | Counts in bait windows for "treatment" samples will be modelled as \eqn{C \cdot unif(0.8,1.2)}. 29 | 30 | Differentially accessible bait windows will be modelled as \eqn{C_{1..50} \cdot \mathcal{N}( \mu=2,\sigma = \mu/2)} 31 | } 32 | \keyword{datasets} 33 | -------------------------------------------------------------------------------- /man/make_counts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loading.R 3 | \name{make_counts} 4 | \alias{make_counts} 5 | \title{load BAM files and calculate window coverage} 6 | \usage{ 7 | make_counts(window_file, sample_treatment_file, width = 50, 8 | filter_params = make_params(), with_df = FALSE, is_rnaseq = FALSE, 9 | gene_id_col = "ID") 10 | } 11 | \arguments{ 12 | \item{window_file}{A filename of a CSV file with the bait regions} 13 | 14 | \item{sample_treatment_file}{A filename of a CSV file that lists treatments, samples and bam file paths} 15 | 16 | \item{width}{an integer of the width of the bins the bait regions will be divided into} 17 | 18 | \item{filter_params}{a params object from atacr::make_params() that define how reads will be extracted from the BAM files. Optionally, for greater control, either a csaw::readParam() (for ATACseq) or Rsamtools::ScanBamParam() object for RNASeq can be provided. See http://bioconductor.org/packages/release/bioc/manuals/csaw/man/csaw.pdf or https://www.rdocumentation.org/packages/Rsamtools/versions/1.24.0/topics/ScanBamParam for details} 19 | 20 | \item{with_df}{attach a dataframe version of the data Default = FALSE} 21 | 22 | \item{is_rnaseq}{a boolean stating whether this is RNASeq data. Default = FALSE} 23 | 24 | \item{gene_id_col}{a character string stating which attribute name to take from the final column of the GFF file to use for the window name in RNASeq data. Usually this is the name of the gene. Default = ID.} 25 | } 26 | \value{ 27 | a list of metadata and RangedSummarizedExperiment objects with read count in windows for whole genome, bait windows and non-bait windows for each sample 28 | } 29 | \description{ 30 | load BAM files and calculate window coverage 31 | } 32 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(as.data.frame,atacr) 4 | S3method(as.matrix,atacr) 5 | S3method(plot,atacr) 6 | S3method(print,atacr) 7 | S3method(summary,atacr) 8 | export(as.DGEList) 9 | export(calc_quantiles) 10 | export(chromosome_coverage) 11 | export(control_window_normalise) 12 | export(control_window_scaling_factors) 13 | export(count_windows_under_threshold) 14 | export(coverage_count_summary) 15 | export(coverage_summary) 16 | export(edgeR_exact) 17 | export(edgeR_multiclass) 18 | export(estimate_GoFs) 19 | export(estimate_bayes_factor) 20 | export(estimate_bayes_factor_multiclass) 21 | export(estimate_fdr) 22 | export(estimate_fdr_multiclass) 23 | export(extract_features_from_gff) 24 | export(find_controls_by_GoF) 25 | export(get_GoF_factors) 26 | export(get_expected_values) 27 | export(library_size_normalisation) 28 | export(library_size_scaling_factors) 29 | export(ma_data) 30 | export(ma_plot) 31 | export(make_UpSetR) 32 | export(make_counts) 33 | export(make_params) 34 | export(make_tutorial_data) 35 | export(median_virtual_experiment) 36 | export(normalise_by_window_width) 37 | export(observed_expected_bins) 38 | export(plot_GoF) 39 | export(plot_count_by_chromosome) 40 | export(plot_counts) 41 | export(qqarb) 42 | export(sample_correlation_plot) 43 | export(sample_kmeans_cluster) 44 | export(sample_pca_plot) 45 | export(scale_factor_normalise) 46 | export(simulate_counts) 47 | export(target_count_coverage) 48 | export(target_count_summary) 49 | export(text_to_gff) 50 | export(treatments) 51 | export(view_gene) 52 | export(windows_below_coverage_threshold_plot) 53 | importFrom(SummarizedExperiment,rbind) 54 | importFrom(graphics,hist) 55 | importFrom(magrittr,"%>%") 56 | importFrom(methods,as) 57 | importFrom(stats,cor) 58 | importFrom(stats,cor.test) 59 | importFrom(stats,kmeans) 60 | importFrom(stats,median) 61 | importFrom(stats,p.adjust) 62 | importFrom(stats,quantile) 63 | importFrom(stats,rlnorm) 64 | importFrom(stats,rnbinom) 65 | importFrom(stats,rnorm) 66 | importFrom(stats,rpois) 67 | importFrom(stats,runif) 68 | importFrom(stats,sd) 69 | importFrom(stats,start) 70 | importFrom(stats,t.test) 71 | importFrom(stats,window) 72 | importFrom(utils,capture.output) 73 | importFrom(utils,read.csv) 74 | importFrom(utils,str) 75 | -------------------------------------------------------------------------------- /vignettes/atacr_which.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "atacr objects and the which argument" 3 | author: "Dan MacLean" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{atacr which} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ## The `atacr` Object 13 | 14 | When `make_counts()` is run, an `atacr` object is returned. This is a simple, somewhat informal object based on the R list type. It is basically an R list with the following members: 15 | 16 | 1. treatments - a character vector of treatment names 17 | 2. samples - a character vector of sample names 18 | 3. bam_files - a character vector of paths for the used BAM files 19 | 4. bait_regions - a `GenomicRanges::Granges` object describing the bait window regions 20 | 5. bait_windows - a `RangedSummarizedExperiment` object containing the counts in the windows in `bait_regions` 21 | 6. non_bait_windows - a `RangedSummarizedExperiment` object containing the counts in the windows in the regoions outside `bait_regions` 22 | 7. whole_genome - the union of bait_windows and non_bait_windows 23 | 8. dataframe - an optional member and the result of calling `as.data.frame()` on the `atacr` object 24 | 25 | ### Column Order 26 | 27 | The `RangedSummarizedExperiment` objects carry the count data. They are organised as a matrix with rows representing windows and columns different samples. Their order is conserved and is the same as that in the `treatments`, `samples` and `bam_files`. 28 | 29 | ## The 'which' argument 30 | 31 | Many of the functions allow you to state which member of the `atacr` list (really a `RangedSummarizedExperiment`) you wish to apply the function to with the `which` argument, e.g 32 | 33 | ```{r, eval=FALSE} 34 | plot_counts(counts, which = "bait_windows", log10 = FALSE) 35 | ``` 36 | 37 | ## Adding members to the `atacr` object 38 | 39 | In this way you can use functions that return `RangedSummarizedExperiment`s to become new members in the list and work on them as with the built in ones, this is especially useful for normalisations. 40 | 41 | ```{r, eval=FALSE} 42 | 43 | counts$by_sample <- library_size_normalisation(counts, 44 | by_treatment = TRUE) 45 | 46 | plot_counts(counts, which = "by_sample", log10 = FALSE) 47 | ``` 48 | 49 | -------------------------------------------------------------------------------- /vignettes/summaries.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Summaries" 3 | author: "Dan MacLean" 4 | date: "`r Sys.Date()`" 5 | output: 6 | rmarkdown::html_vignette: 7 | fig_caption: yes 8 | vignette: > 9 | %\VignetteIndexEntry{Summaries} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | The `atacr` package provides functions for getting quick summaries of your data. An overview comes from `summary()` 15 | 16 | ```{r, echo=FALSE, eval=TRUE} 17 | library(atacr) 18 | counts <- simulate_counts() 19 | ``` 20 | 21 | ```{r, echo=TRUE, eval=TRUE} 22 | summary(counts) 23 | ``` 24 | 25 | which shows the on and off target hit counts, the quantiles and the mean read depths. 26 | 27 | The count distributions across the bait and non-bait windows by sample can be plotted quickly with `coverage_summary()`. 28 | 29 | ```{r, echo=TRUE, eval=TRUE, fig.width=7} 30 | coverage_summary(counts) 31 | ``` 32 | 33 | ## Diagnostic plots 34 | 35 | It is possible to look coverage in a given data set and look at raw counts. 36 | 37 | ```{r, fig.width=7} 38 | plot_counts(counts, which = "bait_windows", log10 = FALSE) 39 | ``` 40 | 41 | ### Low counts in windows 42 | 43 | The number of windows below a threshold for each experiment can be seen with `windows_below_coverage_threshold_plot()`, and you can set the lower and upper bounds with the `to` and `from` arguments. 44 | 45 | ```{r, echo=TRUE, eval=TRUE, fig.width=7} 46 | 47 | windows_below_coverage_threshold_plot(counts, from = 5, to = 25) 48 | ``` 49 | 50 | 51 | ### MA plots 52 | 53 | MA plots of sample count versus all sample median count - to highlight odd looking experiments and extreme outliers - can be displayed with `ma_plot()`. By default this will use the `bait_windows` data, but you can set the `which` argument to use other subsets, e.g `non_bait_windows` 54 | ```{r, fig.width=7} 55 | ma_plot(counts) 56 | ``` 57 | 58 | ### Per chromosome plots 59 | 60 | These are bar charts of coverage at the windows across the chromosomes (`seqnames`) provided in the data. 61 | 62 | ```{r, fig.width=7, fig.height=7, fig.cap="The simulated data here are spread randomly across the chromosome."} 63 | plot_count_by_chromosome(counts) 64 | ``` 65 | 66 | ### Sample comparison plots 67 | 68 | A matrix of correlation between counts in the samples can be plot with the `sample_correlation_plot()` function. In this plot the colour and size scale of the dots represents the Pearson correlation coefficient. Pairwise comparisons with _p_ < 0.05 have a blank space. 69 | 70 | ```{r, fig.width=7} 71 | sample_correlation_plot(counts) 72 | ``` 73 | 74 | A PCA plot that clusters the most simlar samples can also be generated using the `sample_pca_plot()` function. 75 | 76 | ```{r, fig.width=7} 77 | sample_pca_plot(counts) 78 | ``` 79 | -------------------------------------------------------------------------------- /tests/testthat/test_atacr.R: -------------------------------------------------------------------------------- 1 | Sys.setenv("R_TESTS" = "") 2 | library(atacr) 3 | 4 | 5 | context("summary and count functions") 6 | 7 | test_that("target_count_summary() returns proper dataframe", { 8 | 9 | smry <- target_count_summary(sim_counts) 10 | expect_is(smry, "data.frame") #right class 11 | expect_vectors_equal(colnames(smry), c("sample","percent_on_target", "on_target", "off_target")) 12 | 13 | }) 14 | 15 | test_that("coverage_count_summary() returns proper dataframe", { 16 | 17 | smry <- coverage_count_summary(sim_counts) 18 | expect_is(smry, "data.frame") 19 | expect_vectors_equal(colnames(smry), c("on_target", "off_target", "sample")) 20 | 21 | }) 22 | 23 | test_that("target_count_coverage() returns proper dataframe", { 24 | cov <- target_count_coverage(sim_counts) 25 | expect_is(cov, "data.frame") 26 | expect_vectors_equal(colnames(cov), c("sample", "target", "count_sum", "mean_coverage")) 27 | }) 28 | 29 | test_that("target_count_coverage() returns proper sized dataframe", { 30 | cov <- target_count_coverage(sim_counts) 31 | expect_length(cov$count_sum, 12) 32 | expect_equal(nrow(cov[cov$target == "on_target",]), 6) 33 | expect_equal(nrow(cov[cov$target == "off_target",]), 6) 34 | }) 35 | 36 | test_that("target_count_coverage() returns proper values in dataframe", { 37 | cov <- target_count_coverage(sim_counts) 38 | expect_vectors_equal(cov$count_sum,c(15001.00,15170.00,14976.00,16665.77,16755.63,16640.31,355.00,359.00,360.00,364.00,405.00,376.00)) 39 | }) 40 | 41 | test_that("sample_kmeans() returns proper dataframe and proper values in the dataframe", { 42 | k <- sample_kmeans_cluster(sim_counts) 43 | expect_vectors_equal(colnames(k), c("cluster_id", "sample")) 44 | expect_vectors_equal(k$cluster_id, c(1,1,1,2,2,2)) 45 | }) 46 | 47 | test_that("count_windows_under_threshold() returns proper dataframe with proper values", { 48 | th <- count_windows_under_threshold(sim_counts, threshold=15) 49 | expect_vectors_equal(colnames(th), c("count", "threshold", "sample")) 50 | expect_vectors_equal(th$count, c(0,0,0,4,4,4)) 51 | }) 52 | 53 | test_that("calc_quantiles() returns list() when threshold == NULL", { 54 | l <- calc_quantiles(sim_counts) 55 | expect_is(l, "list") 56 | expect_has_all_and_only_these_members(l, c("bait_windows", "non_bait_windows")) 57 | }) 58 | 59 | test_that("get_expected_values() returns right random numbers",{ 60 | set.seed(1234) 61 | expect_vectors_equal(get_expected_values(c(1,2,3,4,1,2,3,4),dist="norm"),c(1.057280,2.831591,3.796155,-0.303645,3.012902,3.104852,1.813054,1.846650)) 62 | }) 63 | 64 | test_that("observed_expected_bins() gives right values", { 65 | l <- observed_expected_bins(c(1,2,3,4,1,2,3,4)) 66 | expect_has_all_and_only_these_members(l, c("observed", "expected")) 67 | expect_vectors_equal(l$observed, c(8)) 68 | expect_vectors_equal(l$expected, c(8)) 69 | }) 70 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) 5 | [![Build Status](https://travis-ci.org/TeamMacLean/atacr.svg?branch=master)](https://travis-ci.org/TeamMacLean/atacr) 6 | [![codecov](https://codecov.io/gh/TeamMacLean/atacr/branch/master/graph/badge.svg)](https://codecov.io/gh/TeamMacLean/atacr) 7 | 8 | --- 9 | 10 | [![minimal R version](https://img.shields.io/badge/R%3E%3D-3.0.0-6666ff.svg)](https://cran.r-project.org/) 11 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/atacr)](https://cran.r-project.org/package=atacr) 12 | [![packageversion](https://img.shields.io/badge/Package%20version-0.4.14-orange.svg?style=flat-square)](commits/master) 13 | 14 | --- 15 | 16 | [![Last-changedate](https://img.shields.io/badge/last%20change-`r gsub('-', '--', Sys.Date())`-yellowgreen.svg)](/commits/master) 17 | 18 | 19 | 20 | ```{r, echo = FALSE, warnings=FALSE, message=FALSE} 21 | knitr::opts_chunk$set( 22 | collapse = TRUE, 23 | comment = "#>", 24 | fig.path = "README-" 25 | ) 26 | devtools::load_all("~/Desktop/atacr") 27 | 28 | ``` 29 | 30 | # atacR 31 | 32 | Helps with the analysis of count data from RNA-capture-seq and ATAC-capture-seq experiments. Using BioConductor RangedSummarizedExperiment objects, atacr implements a set of helper functions and quality control plots specific to the analysis of counts of reads in windows across genomes. Especially, atacr is useful for performing sample normalizations and for easily running bootstrap and Bayes factor tests for differentially accessible windows in common reference designs. 33 | 34 | ## Installation 35 | 36 | You can install atacR from github with: 37 | 38 | ```{r gh-installation, eval = FALSE} 39 | # install.packages("devtools") 40 | devtools::install_github("TeamMacLean/atacr") 41 | ``` 42 | 43 | ## Documentation 44 | 45 | You can read documentation on the following topics 46 | 47 | 1. [Tutorial - A worked example](https://teammaclean.github.io/atacr) 48 | 2. [atacR - General Overview](https://teammaclean.github.io/atacr/atacr.html) 49 | 3. [Loading Data](https://teammaclean.github.io/atacr/loading.html) 50 | 3. [Summaries of Data](https://teammaclean.github.io/atacr/summaries.html) 51 | 4. [Normalising Data](https://teammaclean.github.io/atacr/normalisations.html) 52 | 5. [Differential Windows](https://teammaclean.github.io/atacr/differential_windows.html) 53 | 6. [Subsetting Data](https://teammaclean.github.io/atacr/atacr_which.html) 54 | 55 | ## Quick start: 56 | 57 | ```{r example, echo=TRUE, fig.height=7, fig.width=7} 58 | library(atacr) 59 | summary(sim_counts) 60 | ``` 61 | 62 | ```{r} 63 | plot(sim_counts) 64 | ``` 65 | -------------------------------------------------------------------------------- /vignettes/atacr.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using atacr for Enriched RNAseq and ATACseq analysis" 3 | author: "Dan MacLean" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{atacr} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding[utf8]{inputenc} 10 | --- 11 | 12 | _atacr_ is a package for creating statistics and diagnostic plots for short read sequence data from capture enriched RNAseq and ATACseq experiments. 13 | 14 | This vignette provides a brief overview of the capabilities of `atacr`. 15 | 16 | ## Sample data 17 | 18 | > The function `simulate_counts()` will give us a small simulated data set of three replicates from a control and treatment. Each of the six sets of counts follows a mixed distribution of 10 counts drawn from a log-normal distribution with logmean 4 and SD 1, and 40 counts with logmean 10 and SD 1. This mimics the enrichment pattern we see with capture enriched data. 10 of the counts are multiplied by a value drawn from the normal distribution with mean 2 and SD 1 so can appear differentially expressed. These counts represent bait-windows - regions of the genome for which baits were designed. The bait-window counts are mixed with 50 non-bait-windows which have 0 counts. 19 | 20 | ```{r} 21 | library(atacr) 22 | counts <-simulate_counts() 23 | ``` 24 | 25 | ## Experiment Summary Information 26 | 27 | It's very easy to get information on the coverage for bait/non-bait windows on a per sample basis 28 | 29 | ```{r, echo=TRUE, fig.height=7, fig.width=7} 30 | summary(counts) 31 | plot(counts) 32 | ``` 33 | 34 | 35 | These plots can be generated individually with the following functions 36 | 37 | ```{r, eval=FALSE, include=FALSE} 38 | coverage_summary(counts) 39 | chromosome_coverage(counts) 40 | ``` 41 | 42 | ## QC Plots 43 | 44 | ### Plot for coverage by sequence and sample 45 | ```{r, fig.height=7, fig.width=7} 46 | plot_count_by_chromosome(counts) 47 | ``` 48 | 49 | 50 | ### Correlations between sample counts 51 | ```{r, fig.height=7, fig.width=7} 52 | sample_correlation_plot(counts) 53 | ``` 54 | 55 | 56 | 57 | ### Count windows below a threshold. 58 | ```{r, fig.height=7, fig.width=7} 59 | windows_below_coverage_threshold_plot(counts, which = "bait_windows", from=0, to=1000) 60 | ``` 61 | 62 | 63 | ### MA plots 64 | ```{r, fig.height=7, fig.width=7} 65 | ma_plot(counts) 66 | ``` 67 | 68 | ## Normalisation 69 | 70 | Normalisation strategies are easy to implement with `atacr` and helpful functions are included 71 | 72 | ```{r, fig.height=7, fig.width=7} 73 | counts$library_size_normalised <- library_size_normalisation(counts) 74 | ma_plot(counts, which = "library_size_normalised") 75 | ``` 76 | 77 | Normalisation by control windows. Requires a text file with the control window positions 78 | 79 | ```{r, eval=FALSE} 80 | window_file <- "control_windows.txt" 81 | counts$control_window_normalisation <- control_window_normalise(sim_counts, window_file) 82 | ``` 83 | 84 | ## Detect differentially expressed/accessible windows 85 | 86 | Using a simple bootstrap _t_-test method for simple two-way comparisons. 87 | ```{r, results = "asis" } 88 | 89 | result <- estimate_fdr(sim_counts, "treatment", "control", which = "bait_windows") 90 | 91 | pander::pandoc.table(head(result)) 92 | ``` 93 | This can also be done for multiclass designs with multiple samples against a common reference. 94 | 95 | ```{r, results = "asis"} 96 | multi_result <- estimate_fdr_multiclass(sim_counts, "control", which = "bait_windows") 97 | pander::pandoc.table(head(multi_result)) 98 | ``` 99 | 100 | -------------------------------------------------------------------------------- /R/sims.R: -------------------------------------------------------------------------------- 1 | #' simulate counts and return an atacr object 2 | #' @export 3 | simulate_counts <- function() { 4 | # Each of the six sets of counts follows a mixed distribution of 10 counts drawn from a log-normal distribution with logmean 4 and SD 1, and 40 counts with logmean 10 and SD 1. This mimics the enrichment pattern we see with capture enriched data. 10 of the counts are multiplied by a value drawn from the normal distribution with mean 2 and SD 1 so can appear differentially expressed. These counts represent bait-windows - regions of the genome for which baits were designed and selected. 5 | num_windows = 100 #50 bait windows, 50 non bait windows 6 | reps = 3 7 | 8 | 9 | col_data <- S4Vectors::DataFrame(Treatment = c(rep("control", reps), rep("treatment", reps)), 10 | col.names = c( 11 | sprintf("control_%03d", 1:reps), 12 | sprintf("treatment_%03d", 1:reps) 13 | )) 14 | 15 | 16 | row_ranges <- GenomicRanges::GRanges( 17 | rep("synth_chrom", num_windows), 18 | IRanges::IRanges(seq(1, (num_windows * 50) , by = 50), width = 50), 19 | strand = sample(c("+", "-"), num_windows, TRUE), 20 | feature_id = sprintf("window_%06d", 1:num_windows) 21 | ) 22 | 23 | names(row_ranges) <- sprintf("window_%06d", 1:num_windows) 24 | 25 | a <- 26 | floor(c( 27 | rlnorm(10, meanlog = 4, sdlog = 1), 28 | rlnorm(40, meanlog = 10, sdlog = 1) 29 | )) #basic two peak dist 30 | b <- floor(a * abs(rnorm(50, 1, sd = 1))) 31 | c <- floor(a * abs(rnorm(50, 1, sd = 1))) 32 | d <- 33 | floor(a * abs(c(rnorm(10, 2, sd = 1), rnorm(40, 1, sd = 1)))) 34 | e <- 35 | floor(a * abs(c(rnorm(10, 2, sd = 1), rnorm(40, 1, sd = 1)))) 36 | f <- 37 | floor(a * abs(c(rnorm(10, 2, sd = 1), rnorm(40, 1, sd = 1)))) 38 | 39 | blank <- rep(0, 50) 40 | a <- c(a, blank) 41 | b <- c(b, blank) 42 | c <- c(c, blank) 43 | d <- c(d, blank) 44 | e <- c(e, blank) 45 | f <- c(f, blank) 46 | 47 | counts <- 48 | data.frame( 49 | control_001 = a, 50 | control_002 = b, 51 | control_003 = c, 52 | treatment_001 = d, 53 | treatment_002 = e, 54 | treatment_003 = f 55 | ) 56 | 57 | counts <- as.matrix(counts[sample(nrow(counts)), ]) 58 | row.names(counts) <- sprintf("window_%06d", 1:num_windows) 59 | 60 | se <- SummarizedExperiment::SummarizedExperiment( 61 | assays = list(counts = counts), 62 | rowRanges = row_ranges, 63 | colData = col_data 64 | ) 65 | 66 | r <- list() 67 | class(r) <- c("atacr", "list") 68 | r$whole_genome <- se 69 | r$treatments <- c(rep("control", 3), rep("treatment", 3)) 70 | r$sample_names <- 71 | c(sprintf("control_%03d", 1:3), 72 | sprintf("treatment_%03d", 1:3)) 73 | r$bam_files <- "no.bam" 74 | 75 | bw <- which(counts[, 'control_001'] > 0) 76 | 77 | start_pos <- (bw - 1) * 50 78 | 79 | end_pos <- start_pos + 49 80 | seq_names <- rep("synth_chrom", length(bw)) 81 | r$bait_regions <- GenomicRanges::GRanges( 82 | seqnames = S4Vectors::Rle(seq_names), 83 | ranges = IRanges::IRanges( 84 | start_pos, 85 | end = end_pos, 86 | names = sprintf("bait_%02d", 1:length(bw)) 87 | ) 88 | ) 89 | 90 | 91 | r$bait_windows <- r$whole_genome[bw,] 92 | r$non_bait_windows <- r$whole_genome[!bw,] 93 | r$whole_genome@rowRanges@ranges@NAMES <- 94 | as.character(r$whole_genome@rowRanges) 95 | r$bait_windows@rowRanges@ranges@NAMES <- 96 | as.character(r$bait_windows@rowRanges) 97 | r$non_bait_windows@rowRanges@ranges@NAMES <- 98 | as.character(r$non_bait_windows@rowRanges) 99 | r$dataframe <- as.data.frame(r) 100 | return(r) 101 | } 102 | -------------------------------------------------------------------------------- /R/methods.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | meta_summary <- function(atcr) { 4 | samples = paste(unique(atcr$sample_names), collapse = ",") 5 | treatments = paste(unique(atcr$treatments), collapse = ",") 6 | sample_count = length(unique(atcr$sample_names)) 7 | treat_count = length(unique(atcr$treatments)) 8 | return( 9 | cat( 10 | "ATAC-seq experiment of", 11 | treat_count, 12 | "treatments in", 13 | sample_count, 14 | "samples\n", 15 | "Treatments:", 16 | treatments, 17 | "\n", 18 | "Samples:", 19 | samples, 20 | "\n", 21 | "Bait regions used:", 22 | length(atcr$bait_regions), 23 | "\n", 24 | "Total Windows:", 25 | length(atcr$whole_genome) , 26 | "\n" 27 | 28 | ) 29 | ) 30 | } 31 | 32 | #' writes a summary of the metadata for a given atacr object 33 | #' @export 34 | #' @param x an atacr object 35 | #' @param \dots other options for print generic 36 | print.atacr <- function(x, ...) { 37 | meta_summary(x) 38 | invisible(x) 39 | } 40 | 41 | #' writes a detailed data summary of the atacr object 42 | #' @export 43 | #' @param object an atacr object 44 | #' @param \dots other options for summary generic 45 | summary.atacr <- function(object, ...) { 46 | atcr <- object 47 | meta <- meta_summary(atcr) 48 | on_target <- 49 | paste(capture.output(target_count_summary(atcr)), collapse = "\n") 50 | coverage <- 51 | paste(capture.output(coverage_count_summary(atcr)), collapse = "\n") 52 | quantiles <- 53 | paste(capture.output(calc_quantiles(atcr)), collapse = "\n") 54 | return( 55 | cat( 56 | meta, 57 | "\n", 58 | "On/Off target read counts:\n", 59 | on_target, 60 | "\n", 61 | "Quantiles:", 62 | "\n", 63 | quantiles, 64 | "\n", 65 | "Read depths:\n", 66 | coverage 67 | ) 68 | ) 69 | 70 | } 71 | #' returns given subset of data in atacr object as a matrix 72 | #' @export 73 | #' @param x an atacr object 74 | #' @param \dots other options for generic 75 | #' @param which the subset of data to work on 76 | #' @return matrix of counts in subset 77 | as.matrix.atacr <- function(x, ..., which = "bait_windows") { 78 | atcr <- x 79 | return(SummarizedExperiment::assay(atcr[[which]])) 80 | } 81 | 82 | #' returns dataframe of data in atacr object 83 | #' @export 84 | #' @param x object to print 85 | #' @param \dots other options for generic 86 | #' @return dataframe 87 | as.data.frame.atacr <- function(x, ...) { 88 | atcr <- x 89 | if (is.null(atcr[["dataframe"]])) { 90 | bw <- as.matrix.atacr(atcr, which = "bait_windows") 91 | nbw <- as.matrix.atacr(atcr, which = "non_bait_windows") 92 | bw_df <- reshape2::melt(bw) 93 | colnames(bw_df) <- c("name", "sample", "count") 94 | bw_df$window_type <- factor(rep("bait_windows", nrow(bw_df))) 95 | nbw_df <- reshape2::melt(nbw) 96 | colnames(nbw_df) <- c("name", "sample", "count") 97 | nbw_df$window_type <- 98 | factor(rep("non_bait_windows", nrow(nbw_df))) 99 | df <- rbind(bw_df, nbw_df) 100 | df$name <- stringr::str_replace(df$name, "-$", "minus") 101 | df$name <- stringr::str_replace(df$name, "\\+$", "plus") 102 | name <- NULL #deal with NSE of devtools::check() 103 | df <- 104 | tidyr::separate(df, name, c("chromosome", "start", "stop", "strand"), sep = 105 | '[-:]') 106 | df$start <- as.integer(df$start) 107 | df$stop <- as.integer(df$stop) 108 | df$chromosome <- factor(df$chromosome) 109 | atcr[["dataframe"]] <- df 110 | return(df) 111 | } 112 | else{ 113 | return(atcr[["dataframe"]]) 114 | } 115 | 116 | } 117 | 118 | #' returns summary plot of data in atacr object 119 | #' @method plot atacr 120 | #' @export 121 | #' @param x atacr object 122 | #' @param \dots extra options for generic 123 | #' @return gridExtra plot 124 | plot.atacr <- function(x, ...) { 125 | atcr <- x 126 | #histogram of coverages by sample and window type 127 | p1 <- coverage_summary(atcr) 128 | 129 | #density of coverage by chromosome region, bait windows 130 | p2 <- chromosome_coverage(atcr) 131 | 132 | return(gridExtra::grid.arrange(p1, p2, nrow = 2)) 133 | 134 | } 135 | -------------------------------------------------------------------------------- /tests/testthat/test_loading.R: -------------------------------------------------------------------------------- 1 | library(atacr) 2 | Sys.setenv("R_TESTS" = "") 3 | 4 | context("loading BAM files") 5 | 6 | test_that("get_bait_regions_from_text() gets correct bait regions", { 7 | regions <- get_bait_regions_from_text('individual_bait_regions.txt') 8 | 9 | expect_is(regions, "GRanges") #right class 10 | expect_that(levels(regions@seqnames), equals(c("Chr1", "Chr2", "Chr3", "Chr4", "Chr5"))) #right seqnames 11 | expect_that(regions[1]@ranges@NAMES, equals("AT1G01680_1")) #right first name 12 | expect_that(regions[1]@ranges@start, equals(249021)) #right start 13 | expect_that(regions[1]@ranges@width, equals(120)) #right calculated width 14 | expect_that(length(regions), equals(2219)) #right number of regions 15 | 16 | }) 17 | 18 | 19 | 20 | all_atac <- make_counts('individual_bait_regions.gff', 21 | 'sample_treatment_bam_mappings_for_test.csv', 22 | filter_params = make_params(paired_map = FALSE, minq=1, dedup = F)) 23 | 24 | filtered_atac <- 25 | make_counts('individual_bait_regions.gff', 26 | 'sample_treatment_bam_mappings_for_test.csv') 27 | 28 | filtered_rnaseq <- 29 | make_counts('bait_genes.gff', 30 | 'sample_treatment_bam_mappings_for_test.csv', 31 | is_rnaseq = TRUE 32 | ) 33 | 34 | all_rnaseq <- 35 | make_counts('bait_genes.gff', 36 | 'sample_treatment_bam_mappings_for_test.csv', 37 | is_rnaseq = TRUE, 38 | filter_params = NULL 39 | ) 40 | 41 | 42 | 43 | test_that("when loading RNASeq, genome subsections are RangedSummarizedExperiments", 44 | { 45 | expect_is(all_rnaseq$whole_genome, "RangedSummarizedExperiment") 46 | expect_is(all_rnaseq$bait_windows, "RangedSummarizedExperiment") 47 | expect_is(all_rnaseq$non_bait_windows, 48 | "RangedSummarizedExperiment") 49 | 50 | }) 51 | 52 | test_that("when loading ATACSeq, genome subsections are RangedSummarizedExperiments", 53 | { 54 | expect_is(all_atac$whole_genome, "RangedSummarizedExperiment") 55 | expect_is(all_atac$bait_windows, "RangedSummarizedExperiment") 56 | expect_is(all_atac$non_bait_windows, "RangedSummarizedExperiment") 57 | 58 | }) 59 | 60 | test_that("when loading bam files for ATACSeq, region names load correctly", { 61 | # check that first range in each set of windows (genome, bait, none_baits) has right rownames - presumably is parsed correctly... this tests whether the windows are loaded correctly 62 | 63 | expect_that(names(all_atac$whole_genome)[1], equals("Chr1:1-50")) 64 | expect_that(names(all_atac$non_bait_windows)[1], equals("Chr1:1-50")) 65 | expect_that(names(all_atac$bait_windows)[1], 66 | equals("Chr1:245951-246000")) 67 | 68 | }) 69 | 70 | test_that("when loading BAM files for RNAseq, region names are computed correctly", 71 | { 72 | expect_that(names(all_rnaseq$non_bait_windows)[1], 73 | equals("Chr1:1-246000")) 74 | expect_that(names(all_rnaseq$non_bait_windows)[2], 75 | equals("Chr1:246201-246700")) 76 | expect_that(names(all_rnaseq$bait_windows)[1], equals("FakeGeneA")) 77 | expect_that(names(all_rnaseq$bait_windows)[2], equals("FakeGeneB")) 78 | 79 | }) 80 | 81 | test_that("when loading BAM files for RNAseq, poor reads are filtered properly", { 82 | 83 | expect_that(unname(SummarizedExperiment::assay(filtered_rnaseq$bait_windows)["FakeGeneA",]), equals(c(0,4,3,70))) 84 | expect_that(unname(SummarizedExperiment::assay(filtered_rnaseq$bait_windows)["FakeGeneB",]), equals( c(3,7,7,186))) 85 | 86 | }) 87 | 88 | test_that("when loading BAM files for ATACseq, poor reads are filtered properly", { 89 | 90 | expect_that(unname(SummarizedExperiment::assay(filtered_atac$bait_windows)["Chr1:246251-246300",]), equals(c(0,1,1,8))) 91 | 92 | }) 93 | 94 | p <- make_csaw_params(make_params()) 95 | 96 | test_that("make_csaw_params() returns properly populated object", { 97 | expect_is(p, "readParam") 98 | expect_that(unname(p@pe), equals(c("both"))) 99 | expect_that(p@max.frag, equals(500)) 100 | expect_that(p@dedup, equals(TRUE)) 101 | expect_that(p@minq, equals(30)) 102 | expect_that(p@forward, equals(NA)) 103 | }) 104 | 105 | q <- make_scanBamParam(make_params(), filtered_rnaseq$bam_files[1]) 106 | test_that("make_scanBamParam() returns properly populated object", { 107 | 108 | expect_is(q, "ScanBamParam") 109 | expect_that(unname(q@flag), equals(c(2045,1023))) 110 | expect_that(q@simpleCigar, equals(FALSE)) 111 | expect_that(q@reverseComplement, equals(FALSE)) 112 | expect_that(q@mapqFilter, equals(30)) 113 | }) 114 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) 2 | [![Build Status](https://travis-ci.org/TeamMacLean/atacr.svg?branch=master)](https://travis-ci.org/TeamMacLean/atacr) 3 | [![codecov](https://codecov.io/gh/TeamMacLean/atacr/branch/master/graph/badge.svg)](https://codecov.io/gh/TeamMacLean/atacr) 4 | 5 | ----------------------------------------- 6 | 7 | [![minimal R version](https://img.shields.io/badge/R%3E%3D-3.0.0-6666ff.svg)](https://cran.r-project.org/) 8 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/atacr)](https://cran.r-project.org/package=atacr) 9 | ![package_version](https://img.shields.io/badge/Package%20version-0.4.14-orange.svg?style=flat-square) 10 | 11 | 12 | --------------------------------------- 13 | 14 | ![Last-changedate](https://img.shields.io/badge/last%20change-"2018--05--15"-yellowgreen.svg) 15 | 16 | 17 | atacr 18 | ===== 19 | 20 | Helps with the analysis of count data from RNA-capture-seq and ATAC-capture-seq experiments. Using BioConductor RangedSummarizedExperiment objects, atacr implements a set of helper functions and quality control plots specific to the analysis of counts of reads in windows across genomes. Especially, atacr is useful for performing sample normalizations and for easily running bootstrap and Bayes factor tests for differentially accessible windows in common reference designs. 21 | 22 | Installation 23 | ------------ 24 | 25 | You can install atacr from github with: 26 | 27 | ``` r 28 | # install.packages("devtools") 29 | devtools::install_github("TeamMacLean/atacr") 30 | ``` 31 | 32 | Documentation 33 | -------------- 34 | 35 | You can read documentation on the following topics 36 | 37 | 1. [Tutorial - A worked example](https://teammaclean.github.io/atacr) 38 | 2. [atacR - General Overview](https://teammaclean.github.io/atacr/atacr.html) 39 | 3. [Loading Data](https://teammaclean.github.io/atacr/loading.html) 40 | 3. [Summaries of Data](https://teammaclean.github.io/atacr/summaries.html) 41 | 4. [Normalising Data](https://teammaclean.github.io/atacr/normalisations.html) 42 | 5. [Differential Windows](https://teammaclean.github.io/atacr/differential_windows.html) 43 | 6. [Subsetting Data](https://teammaclean.github.io/atacr/atacr_which.html) 44 | 45 | Quick start: 46 | ------------ 47 | 48 | ``` r 49 | library(atacr) 50 | summary(sim_counts) 51 | #> ATAC-seq experiment of 2 treatments in 6 samples 52 | #> Treatments: control,treatment 53 | #> Samples: control_001,control_002,control_003,treatment_001,treatment_002,treatment_003 54 | #> Bait regions used: 500 55 | #> Total Windows: 1000 56 | #> 57 | #> On/Off target read counts: 58 | #> sample off_target on_target percent_on_target 59 | #> 1 control_001 312 15160 97.98345 60 | #> 2 control_002 347 14777 97.70563 61 | #> 3 control_003 339 15115 97.80639 62 | #> 4 treatment_001 321 16955 98.14193 63 | #> 5 treatment_002 346 16490 97.94488 64 | #> 6 treatment_003 335 17064 98.07460 65 | #> Quantiles: 66 | #> $bait_windows 67 | #> control_001 control_002 control_003 treatment_001 treatment_002 68 | #> 1% 19.99 16.99 19 16.99 16.00 69 | #> 5% 22.00 20.00 22 20.00 19.00 70 | #> 95% 40.00 40.00 39 63.00 65.05 71 | #> 99% 45.00 46.00 44 109.00 89.03 72 | #> treatment_003 73 | #> 1% 16.00 74 | #> 5% 21.00 75 | #> 95% 61.00 76 | #> 99% 109.06 77 | #> 78 | #> $non_bait_windows 79 | #> control_001 control_002 control_003 treatment_001 treatment_002 80 | #> 1% 0 0 0.00 0 0.00 81 | #> 5% 0 0 0.00 0 0.00 82 | #> 95% 3 4 3.05 3 3.05 83 | #> 99% 4 4 4.00 4 4.00 84 | #> treatment_003 85 | #> 1% 0 86 | #> 5% 0 87 | #> 95% 3 88 | #> 99% 4 89 | #> 90 | #> Read depths: 91 | #> sample off_target on_target 92 | #> 1 control_001 0.624 30.320 93 | #> 2 control_002 0.694 29.554 94 | #> 3 control_003 0.678 30.230 95 | #> 4 treatment_001 0.642 33.910 96 | #> 5 treatment_002 0.692 32.980 97 | #> 6 treatment_003 0.670 34.128 98 | ``` 99 | 100 | ``` r 101 | plot(sim_counts) 102 | #> Picking joint bandwidth of 0.0243 103 | #> Picking joint bandwidth of 0.0582 104 | ``` 105 | 106 | ![](README-unnamed-chunk-2-1.png) 107 | -------------------------------------------------------------------------------- /vignettes/normalisations.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Normalisations" 3 | author: "Dan MacLean" 4 | date: "`r Sys.Date()`" 5 | output: 6 | rmarkdown::html_vignette: 7 | fig_caption: yes 8 | vignette: > 9 | %\VignetteIndexEntry{Normalisations} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | Normalisations help make the count estimates more easily comparable between experiments. `atacr` provides a few options for this. 15 | 16 | ## Library size normalisation 17 | 18 | This is the simplest, but probably the least useful normalisation. The total counts are scaled such that each sample has a similar total count to account for different sequencing depths. This procedure can be done in one step with `library_size_normalisation()` 19 | 20 | ```{r, echo=FALSE, eval=TRUE} 21 | library(atacr) 22 | counts <- simulate_counts() 23 | ``` 24 | 25 | ```{r} 26 | normalised_counts <- library_size_normalisation(counts) 27 | ``` 28 | 29 | The `by_treatment` option will group the samples into different treatments and normalise each separately. This method assumes that within treatment groups the samples should show little difference, but between sample treatment groups could show lots of difference and prevents the treatment structure affecting the wider experiment. 30 | 31 | ```{r,eval=FALSE} 32 | by_sample_normalised_counts <- library_size_normalisation(counts, 33 | by_treatment = TRUE) 34 | ``` 35 | 36 | ## Control window normalisation 37 | 38 | This option allows you to perform a scaling of the data based on user-specified control regions, usually these will be genomic windows corresponding to baits from control genes/regions. A one-step option is to provide these control window locations to `control_window_normalise()` in a separate file. 39 | 40 | ```{r, eval=FALSE} 41 | control_window_normalised_counts <- control_window_normalise(counts, "my_controls.csv") 42 | ``` 43 | 44 | The control window file should be a simple `.csv` file with header and columns `seq_name,start_pos,end_pos,bait_name`. 45 | 46 | 47 | ## Finding internal scaling factors 48 | 49 | A better way to normalise will often be to find the least variable windows in your sample and scale by those. `atacr` provides a method for doing this by `goodness of fit` as described previously in [Li et al, 2012](https://academic.oup.com/biostatistics/article/13/3/523/248016/Normalization-testing-and-false-discovery-rate) and [on Harold Pimentel's blog](https://haroldpimentel.wordpress.com/2014/12/08/in-rna-seq-2-2-between-sample-normalization/). 50 | 51 | Essentially, Goodness of Fit (GoF) is a method of estimating variability over samples for each window. Each window gets a GoF, the lower it is, the lower the variability. These should then be the best ones to use as controls for scaling. The vector of normalisation factors for each sample can be obtained using `get_GoF_factors()` 52 | 53 | ```{r} 54 | gof_norm_factors <- get_GoF_factors(counts) 55 | gof_norm_factors 56 | ``` 57 | 58 | ## Applying scaling factors 59 | 60 | If you have a set of scaling factors from `get_GoF_factors()` or some other package or function, then it is possible to apply them to the data using the `scale_factor_normalise()` function. 61 | 62 | ```{r, fig.width=7 } 63 | gof_normalised_counts <- scale_factor_normalise(counts, 64 | scaling_factors = gof_norm_factors) 65 | 66 | ## You can add the normalised counts to a slot on the original object 67 | counts$normalised_counts <- gof_normalised_counts 68 | 69 | plot_counts(counts, which = "normalised_counts") 70 | ``` 71 | 72 | ## Comparing sets of potential control windows 73 | 74 | To allow comparison the GoF metric of different sets of windows (e.g those determined by `get_GoF_factors()` or your own list) we can plot the distribution of 'control' windows against the rest using `plot_GoF()`, we just need a vector of names of windows to use as controls. 75 | 76 | The `atacr` function `find_controls_by_GoF()` is useful here, it returns a vector of window names used by the normalisation that can be plugged into the plot. Alternatively, a character vector of your own 77 | 78 | ```{r, fig.width=7} 79 | auto_controls <- find_controls_by_GoF(counts) 80 | head(auto_controls) 81 | 82 | plot_GoF(counts, controls = auto_controls) 83 | 84 | ``` 85 | 86 | ## Further normalisations by window size and other factors 87 | 88 | The normalisations described here are not sensitive to factors such as window size and the counts from them may need to be corrected further, especially for RNAseq data with different window sizes. There are many packages in the Bioconductor libraries and on CRAN that can be used for this, check out the [edgeR](http://bioconductor.org/packages/release/bioc/html/edgeR.html), [DESeq](https://bioconductor.org/packages/release/bioc/html/DESeq.html) and [csaw](https://bioconductor.org/packages/release/bioc/html/csaw.html) packages among others. 89 | 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /vignettes/differential_windows.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Differentially accessible or expressed windows" 3 | author: "Dan MacLean" 4 | date: "`r Sys.Date()`" 5 | output: 6 | rmarkdown::html_vignette: 7 | fig_caption: yes 8 | vignette: > 9 | %\VignetteIndexEntry{Differential Windows} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | ```{r echo = FALSE} 14 | knitr::opts_chunk$set( 15 | message = FALSE, 16 | warning = FALSE 17 | ) 18 | ``` 19 | 20 | Finding windows that correspond to differentially expressed or accessible windows is possible with two related functions in `atacr` - `estimate_fdr()` which implements bootstrap _t_-tests, via the boot package and `estimate_bayes_factor()` which implements a Bayes factor ANOVA using the BayesFactor package. A tidy dataframe of results is returned in each case. 21 | 22 | ```{r, echo=FALSE, eval=TRUE} 23 | library(atacr) 24 | normalized_counts <- simulate_counts() 25 | result <- estimate_fdr(normalized_counts, 26 | treatment_a = "treatment", 27 | treatment_b = "control") 28 | ``` 29 | 30 | ### Bootstrap _t_-tests 31 | 32 | For simple comparison of two treatments with bootstrap _t_ tests, provide treatment 'a' and 'b' names and the number of bootstrap iterations (default is 10, which is fast for testing code, but useless analytically). You can set the threshold for marking as significant with `fdr_level`. 33 | 34 | ```{r, echo=TRUE, eval=FALSE} 35 | result <- estimate_fdr(normalized_counts, 36 | treatment_a = "treatment", 37 | treatment_b = "control", 38 | iterations = 100000, 39 | fdr_level = 0.01) 40 | ``` 41 | ```{r, echo=FALSE, eval=TRUE} 42 | head(result) 43 | ``` 44 | 45 | The output has columns as follows: 46 | 47 | * `window` - the name of the window with data on this row 48 | * `t` - the value of the _t_ statistic for the first (non-bootstrap) iteration 49 | * `p_value` - the computed _p_ value for the window 50 | * `fdr` - the false detection rate at this window 51 | * `mean_count_a` - the mean count for treatment 'a' 52 | * `mean_count_b` - the mean count for treatment 'b' 53 | * `sd_a` - standard deviation for treatment 'a' 54 | * `sd_b` - standard deviation for treatment 'b' 55 | * `log2_fc` - log 2 of the ratio of the mean counts 56 | * `is_sig` - flag showing whether window was significant according to the level set in the function with parameter `fdr_level` 57 | 58 | 59 | To analyse all treatments against a common comparison at once you can use the wrapper function `estimate_fdr_multiclass()` which requires the name of the common comparison treatment 60 | 61 | 62 | 63 | ```{r, echo=TRUE, eval=FALSE} 64 | multi_result <- estimate_fdr_multiclass(normalized_counts, 65 | common_control = "control", 66 | iterations = 100000, 67 | fdr_level = 0.01) 68 | 69 | head(multi_result) 70 | ``` 71 | 72 | ```{r, echo=FALSE, eval=TRUE} 73 | multi_result <- estimate_fdr_multiclass(normalized_counts, 74 | common_control = "control") 75 | head(multi_result) 76 | ``` 77 | 78 | The results here has two extra columns: 79 | 80 | * a - the name of the treatment 81 | * b - the name of the common control 82 | 83 | ### Bayes Factor Analysis 84 | 85 | A similar pair of functions is available for Bayes factor analysis. `estimate_bayes_factor()` for the two-way comparison. The `factor` argument sets the Bayes factor at which to mark the window as having different counts. 86 | 87 | ```{r} 88 | result_bf <- estimate_bayes_factor(normalized_counts, 89 | treatment_a = "treatment", 90 | treatment_b = "control", 91 | factor = 2.0) 92 | 93 | head(result_bf) 94 | ``` 95 | 96 | 97 | Again, a `estimate_bayes_factor_multiclass()` function works for all comparisons to a common control. 98 | 99 | The results data frame is similar to that from the Bootstrap _t_ methods, with a `factor` column in place of the `t` and `fdr` columns. 100 | 101 | ### EdgeR analysis 102 | 103 | The single comparison edgeR analysis returns a dataframe similar to the above methods. 104 | 105 | In all the runs of edgeR the `estimateDisp()` function is used. This means that the `edgeR_exact()` methods will be increasingly less useful as a greater proportion of windows show differential counts. edgeR is the most powerful method when only a few genes are showing differential counts, use the other methods in other cases. 106 | 107 | You can tell edgeR to ignore data with zero counts in all samples using `remove_zeros` 108 | 109 | ```{r} 110 | result_edger <- edgeR_exact(normalized_counts, 111 | treatment_a = "treatment", 112 | treatment_b = "control", 113 | remove_zeros = TRUE) 114 | 115 | head(result_edger) 116 | ``` 117 | 118 | The edgeR multiclass variant, `edgeR_multiclass()` also uses the `estimateDisp()` function in all cases. The `edgeR_multiclass()` function does not return a dataframe, instead it returns the native `DGELRT` objects (see [the DGELRT manual](https://www.rdocumentation.org/packages/edgeR/versions/3.14.0/topics/DGELRT-class) for more information) from each comparison in a `list()` object with names as per the treatment used. 119 | 120 | ```{r} 121 | edgeR_multiclass(normalized_counts,"mock", 122 | remove_zeros = TRUE) 123 | ``` 124 | -------------------------------------------------------------------------------- /vignettes/loading.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Loading data" 3 | author: "Dan MacLean" 4 | date: "`r Sys.Date()`" 5 | output: 6 | rmarkdown::html_vignette: 7 | fig_caption: yes 8 | vignette: > 9 | %\VignetteIndexEntry{Loading Data} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | 15 | All counts are computed from sorted, indexed BAM files using the `make_counts()` function. This function requires two files: 16 | 17 | 1. A GFF [1] file of bait regions on the genome 18 | 2. A csv file showing the sample -> treatment -> bam file mappings for the experiment. 19 | 20 | The mapping file has the following structure: 21 | 22 | "sample_name", | "bam_file_path", | "treatment" 23 | ---------------|------------------|------------ 24 | "control_001", | "data/control1/aligned_merged_sorted.bam" | "control" 25 | "control_002", | "data/control2/aligned_merged_sorted.bam" | "control" 26 | "control_003", | "data/control3/aligned_merged_sorted.bam" | "control" 27 | "treatment_001", | "data/treatment1/aligned_merged_sorted.bam" | "treatment" 28 | "treatment_002", | "data/treatment2/aligned_merged_sorted.bam" | "treatment" 29 | "treatment_003", | "data/treatment3/aligned_merged_sorted.bam" | "treatment" 30 | 31 | The BAM indices (`.bai` files) are presumed to be with the BAM files. 32 | 33 | ## Differences between ATACseq and RNAseq data within `atacr`. 34 | 35 | As far as `atacr` is concerned, ATACseq data is counted into equal sized windows within the bait windows - so that you end up with many more regions with counts, than you have baits. This behaviour means you can find regions of smaller than bait size that are differentially accessible. Conversely, RNAseq data is counted into one window per region declared in the GFF file, so you get just one expression estimate per gene/transcript. 36 | 37 | ## Loading ATACseq data 38 | 39 | ATACseq is the default data type expected in `atacr`. The `make_counts()` call is the simplest in this case. 40 | 41 | ```{r, echo=TRUE, eval=FALSE} 42 | counts <- make_counts("bait_regions.gff", 43 | "sample_treatment_mapping.csv") 44 | ``` 45 | 46 | ### Set genomic window width 47 | 48 | The width of the genomic windows in which to compute counts across the defined bait regions is set to 50 nt, to change this use the `width` parameter to the size of the windows you want to use, e.g 100 nt. 49 | 50 | ```{r, echo=TRUE, eval=FALSE} 51 | counts <- make_counts("bait_regions.gff", 52 | "sample_treatment_mapping.csv", 53 | width = 100) 54 | ``` 55 | 56 | ## Loading RNAseq data 57 | 58 | When loading RNAseq data it is neccesary to set the `is_rnaseq` option in `make_counts()` 59 | 60 | ```{r, echo = TRUE, eval=FALSE} 61 | 62 | counts <- make_counts("bait_regions.gff", 63 | "sample_treatment_mapping.csv", 64 | is_rnaseq = TRUE) 65 | ``` 66 | 67 | ### Setting quality filters when computing counts from BAM files 68 | 69 | `atacr` allows you to set values determining which reads will be included in counts. By default a simple filter object can be passed from the `make_params()` function to the `filter_params` argument of `make_counts()`. 70 | 71 | ```{r, echo = TRUE, eval = FALSE} 72 | 73 | my_params = make_params( 74 | paired_map = TRUE, 75 | minq = 30, 76 | dedup = TRUE 77 | ) 78 | 79 | counts <- make_counts("bait_regions.gff", 80 | "sample_treatment_mapping.csv", 81 | is_rnaseq = TRUE, 82 | filter_params = my_params ) 83 | ``` 84 | 85 | 86 | The `paired_map` option sets whether reads must be mapped as pairs to be counted, `TRUE` is the default. The `dedup` option removes reads that seem like PCR duplicates to the aligner `TRUE` is the default. `minq` sets the minimum PHRED mapping quality score for a read to be counted, `30` is the default 87 | 88 | ### Advanced Quality filters RNAseq 89 | 90 | If you require greater control over mapping filters for read counts from RNAseq, you can use an `Rsamtools::ScanBamParam()` object instead. See [https://www.rdocumentation.org/packages/Rsamtools/versions/1.24.0/topics/ScanBamParam](https://www.rdocumentation.org/packages/Rsamtools/versions/1.24.0/topics/ScanBamParam) for details 91 | 92 | ### Advanced Quality filters ATACseq 93 | 94 | For greater control over mapping filters for read counts when using ATACseq data, use a `csaw::readParam()` object. See [http://bioconductor.org/packages/release/bioc/manuals/csaw/man/csaw.pdf](http://bioconductor.org/packages/release/bioc/manuals/csaw/man/csaw.pdf) for details. 95 | 96 | 97 | ## Region names 98 | 99 | Region names are loaded from the GFF file. As GFF is a bit of a fluid format different files may encode this information differently. By default, `make_counts()` will look into the attribute (final) column in the GFF and use the attribute called `ID`. To use a different attribute set `gene_id_col` 100 | 101 | ```{r, eval=FALSE, echo=TRUE} 102 | counts <- make_counts("bait_regions.gff", 103 | "sample_treatment_mapping.csv", 104 | gene_id_col = "GENE_NAME") 105 | ``` 106 | 107 | ## Output - an `atacr` object 108 | 109 | The result of `make_counts()` is an `atacr` object of counts, basically an R `list` with slots for counts from bait windows, non-bait windows, the sample and BAM information. The count information is held in 'SummarizedExperiment' objects from Bioconductor. See [http://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html](http://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html) for details. 110 | 111 | ## Saving a count object 112 | 113 | Computing the `atacr` count object can take a while, especially when you are analysing many BAM files. It can be useful to save the object after computation. This can be done with base R's `saveRDS()` function. 114 | 115 | ```{r echo=TRUE, eval=FALSE} 116 | saveRDS(counts, file="my_output_file.rds") 117 | 118 | reloaded_counts <- readRDS("my_output_file.rds") 119 | ``` 120 | 121 | [1] 122 | -------------------------------------------------------------------------------- /R/atacr.R: -------------------------------------------------------------------------------- 1 | # Some useful keyboard shortcuts for package authoring: 2 | # 3 | # Build and Reload Package: 'Cmd + Shift + B' 4 | # Check Package: 'Cmd + Shift + E' 5 | # Test Package: 'Cmd + Shift + T' 6 | 7 | # stop devtools::check() complain about elements in ggplot and dplyr packages 8 | if (getRversion() >= "2.15.1") 9 | utils::globalVariables(c(".")) 10 | 11 | #' @importFrom magrittr %>% 12 | #' @importFrom graphics hist 13 | #' @importFrom stats cor kmeans median p.adjust quantile rnbinom rnorm rpois runif sd start t.test window cor.test 14 | #' @importFrom utils capture.output read.csv str 15 | #' @importFrom methods as 16 | #' @importFrom SummarizedExperiment rbind 17 | #' @importFrom stats rlnorm 18 | no_func <- 19 | function(x) { 20 | return(FALSE) 21 | } #only here to make line above work 22 | 23 | #' Get a summary of reads hitting the bait and non bait windows 24 | #' @export 25 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts() 26 | #' @return a table of on target and off target read counts 27 | target_count_summary <- function(data) { 28 | df <- target_count_coverage(data) 29 | df$means <- NULL 30 | on_target <- off_target <- NULL #deal with devtools::check() 31 | d <- 32 | df %>% reshape::cast(sample ~ target, value = "count_sum") %>% dplyr::mutate("percent_on_target" = ((on_target / 33 | ( 34 | on_target + off_target 35 | )) * 100)) 36 | return(d) 37 | } 38 | 39 | #' Get a summary of depth of coverage in the bait and non bait windows 40 | #' @export 41 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts() 42 | #' @return a table of on target and off target mean depths 43 | coverage_count_summary <- function(data) { 44 | df <- target_count_coverage(data) 45 | df$count_sum <- NULL 46 | return(reshape::cast(df, sample ~ target, value = "mean_coverage")) 47 | } 48 | 49 | #' Read count and mean coverage hitting the bait and non bait windows 50 | #' @export 51 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts() 52 | #' @return a dataframe of on target and off target read counts 53 | target_count_coverage <- function(data) { 54 | on <- SummarizedExperiment::assay(data$bait_windows) 55 | off <- SummarizedExperiment::assay(data$non_bait_windows) 56 | target <- 57 | factor(c(rep("on_target", length(colnames( 58 | on 59 | ))), rep("off_target", length(colnames( 60 | off 61 | ))))) 62 | sums <- c(colSums(on), colSums(off)) 63 | means <- c(colMeans(on), colMeans(off)) 64 | df <- 65 | data.frame( 66 | sample = names(sums), 67 | target = target, 68 | count_sum = sums, 69 | mean_coverage = means 70 | ) #probably not the same size? 71 | return(df) 72 | } 73 | 74 | #' identify kmeans clusters for samples 75 | #' @export 76 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts() 77 | #' @param which the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows' 78 | #' @return dataframe of cluster_id and sample name 79 | sample_kmeans_cluster <- function(data, which = "bait_windows") { 80 | counts <- SummarizedExperiment::assay(data[[which]]) 81 | k <- length(unique(data$treatments)) 82 | c <- kmeans(t(counts), k) 83 | d <- data.frame(cluster_id = c$cluster) 84 | d$sample <- rownames(d) 85 | cluster_id <- NULL 86 | return(dplyr::arrange(d, cluster_id, sample)) 87 | 88 | } 89 | 90 | #' count windows that have read counts below the threshold 91 | #' @export 92 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts() 93 | #' @param which the subdivision of the genome to calculate correlations either 94 | #' 'whole_genome', 'bait_windows' or 'non_bait_windows' 95 | #' @param threshold counts windows with read counts lower than this level 96 | #' @return dataframe of sample name, count and threshold 97 | count_windows_under_threshold <- 98 | function(data, 99 | which = "bait_windows", 100 | threshold = 0) { 101 | counts <- SummarizedExperiment::assay(data[[which]]) 102 | b <- apply(counts, MARGIN = 2, function(x) { 103 | sum(x <= threshold) 104 | }) 105 | r <- 106 | data.frame( 107 | sample = names(b), 108 | count = b, 109 | threshold = rep(threshold, length(b)) 110 | ) 111 | rownames(r) <- NULL 112 | return(r) 113 | } 114 | 115 | #' report counts at each quantile for each sample 116 | #' @export 117 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts() 118 | #' @param quantiles a vector of quantiles to report 119 | #' @param which the subset of data windows to report on. Default = 120 | #' "bait_windows" and "non_bait_windows" 121 | #' @return list of counts at quantiles 122 | calc_quantiles <- 123 | function(data, 124 | quantiles = c(.01, .05, 0.95, 0.99), 125 | which = NULL) { 126 | if (is.null(which)) { 127 | bait_windows <- as.matrix(data) 128 | non_bait_windows <- as.matrix(data, which = "non_bait_windows") 129 | bwq <- 130 | apply(bait_windows, 131 | MARGIN = 2, 132 | quantile, 133 | probs = quantiles) 134 | non_bwq <- 135 | apply(non_bait_windows, 136 | MARGIN = 2, 137 | quantile, 138 | probs = quantiles) 139 | return(list(bait_windows = bwq, non_bait_windows = non_bwq)) 140 | } 141 | else{ 142 | windows <- as.matrix(data, which = which) 143 | return(apply(windows, MARGIN = 2, quantile, probs = quantiles)) 144 | } 145 | } 146 | 147 | 148 | 149 | se_contains_only_integers <- function(data, which) { 150 | a <- SummarizedExperiment::assay(data[[which]]) 151 | return(all(a == as.integer(a))) 152 | } 153 | 154 | 155 | 156 | #' given a vector of values return a set of random numbers from a given 157 | #' distribution 158 | #' @export 159 | #' @param obs vector of observed values 160 | #' @param dist the distribution from which to return expected values 161 | #' @return a vector of length obs with random variates from distribution dist 162 | get_expected_values <- function(obs, dist = "norm") { 163 | exp <- rnorm(length(obs), mean = mean(obs), sd = sd(obs)) 164 | if (dist == "pois") 165 | exp <- rpois(length(obs), lambda = mean(obs)) 166 | if (dist == "nbinom") { 167 | est <- fitdistrplus::fitdist(obs, "nbinom") 168 | exp <- rnbinom(length(obs), 169 | size = est$estimate[['size']], 170 | mu = est$estimate[['mu']]) 171 | } 172 | return(exp) 173 | } 174 | 175 | #' given a vector of numbersd returns the counts in bins of bin_width, and the count 176 | #' @export 177 | #' @param obs a vector of numbers 178 | #' @param dist a string naming distribution from which to take expected counts 179 | #' @param bin_width the width of the bins for the counts 180 | #' @return list with members observed and expected which are vectors of counts 181 | observed_expected_bins <- 182 | function(obs, 183 | dist = "pois", 184 | bin_width = 10) { 185 | exp <- get_expected_values(obs, dist) 186 | 187 | mx <- max(c(obs, exp)) 188 | mn <- min(c(obs, exp)) 189 | b <- seq(mn, mx + bin_width, by = bin_width) 190 | obs <- hist(obs, breaks = b, plot = FALSE) 191 | exp <- hist(exp, breaks = b, plot = FALSE) 192 | return(list(observed = obs$counts, expected = exp$counts)) 193 | 194 | } 195 | 196 | #' a median of window values across all samples in a vector, for ma plots 197 | #' @export 198 | #' @param sample_matrix counts extracted from a SummarizedExperiment object 199 | #' @return the median of the provided counts, columnwise 200 | median_virtual_experiment <- function(sample_matrix) { 201 | return(apply(sample_matrix, 1, median)) 202 | } 203 | 204 | emm <- function(test, control) { 205 | return(log2(test) - log2(control)) 206 | } 207 | 208 | ay <- function(test, control) { 209 | return(0.5 * (log2(test) + log2(control))) 210 | } 211 | 212 | #' given a dataframe from the estimate_fdr_multiclass() function, will return a 213 | #' list in the format suitable for UpSetR visualisation. 214 | #' Does not do any filtering of lists, so selected genes must be filtered before hand e.g with dplyr 215 | #' @export 216 | #' @param df dataframe from estimate_fdr_multiclass 217 | #' @return list of named vectors suitable for UpSetR fromList() function 218 | make_UpSetR <- function(df) { 219 | log2_fc <- direction <- a <- NULL 220 | r <- df %>% 221 | dplyr::mutate( 222 | direction = ifelse(log2_fc > 0, "up", "down"), 223 | category = paste0(direction, "_", a) 224 | ) 225 | r <- r %>% split(r$category) %>% 226 | lapply(function(x) 227 | as.vector(dplyr::select(x, window)$window)) 228 | return(r) 229 | } 230 | 231 | #' sim_counts - simulated count data 232 | #' 233 | #' The data `sim_counts` is a simulated data set with computer generated window counts for three replicates of each of two conditions in experiments with 500 bait and non-bait windows. We'll set each experiment to have 10 \% of windows differentially accessible at a difference of approximately 2 fold. 234 | #' 235 | #' 236 | #' Counts in bait windows for "control" samples will be modelled as \eqn{C \sim NB(\mu = 30, size = 10\mu)}. 237 | #' 238 | #' Counts in bait windows for "treatment" samples will be modelled as \eqn{C \cdot unif(0.8,1.2)}. 239 | #' 240 | #' Differentially accessible bait windows will be modelled as \eqn{C_{1..50} \cdot \mathcal{N}( \mu=2,\sigma = \mu/2)} 241 | #' @format A SummarizedExperiment object 242 | "sim_counts" 243 | 244 | #' Simulated count data 245 | #' 246 | #' The data `sim_counts` is a simulated data set with computer generated window counts for three replicates of each of two conditions in experiments with 500 bait and non-bait windows. We'll set each experiment to have 10 \% of windows differentially accessible at a difference of approximately 2 fold. 247 | #' 248 | #' Counts in bait windows for "control" samples will be modelled as \eqn{C \sim NB(\mu = 30, size = 10\mu)}. 249 | #' 250 | #' Counts in bait windows for "treatment" samples will be modelled as \eqn{C \cdot unif(0.8,1.2)}. 251 | #' 252 | #' Differentially accessible bait windows will be modelled as \eqn{C_{1..50} \cdot \mathcal{N}( \mu=2,\sigma = \mu/2)} 253 | #' @format A list of SummarizedExperiment objects 254 | "sim_counts" 255 | 256 | #' small_counts - simulated count data 257 | #' The data `small_counts` is basically the same thing as `sim_counts` with smaller sample of 100 bait / non-bait windows. 258 | #' @format a list of SummarizedExperiment objects 259 | "small_counts" 260 | 261 | #' athal_wt_counts - real capture RNASeq count data 262 | #' The data `athal_wt_counts` are real, experimentally derived counts from untreated WT Arabidopsis leaves for 52 baits, each set of baits representing a gene. Three replicates are provided for each gene. This data set is intended to be used in resampling procedures for making test data sets. 263 | #' @format a named vector of counts 264 | "athal_wt_counts" 265 | -------------------------------------------------------------------------------- /R/normalisation.R: -------------------------------------------------------------------------------- 1 | #' estimates Goodness of Fit for each row in a count matrix 2 | #' 3 | #' @param mat a count matrix usually from SummarizedExperiment::assay() 4 | #' @return a named vector of GoF estimates 5 | gof <- function(mat){ 6 | 7 | #see https://haroldpimentel.wordpress.com/?s=TMM#paperList 8 | # https://academic.oup.com/biostatistics/article/13/3/523/248016/Normalization-testing-and-false-discovery-rate 9 | # https://github.com/cran/PoissonSeq/blob/3d9bc4b1744cb45714d4442b5a879b6e0c68b4a2/R/ps_other.R 10 | pseudo_val <- 1e-10 11 | 12 | shats <- colSums(mat) / sum(mat) 13 | x_shats <- rowSums(mat) %*% t(shats) 14 | gof <- rowSums((mat - x_shats) ^ 2 / (x_shats + pseudo_val)) 15 | return(gof) 16 | } 17 | 18 | #' estimates Goodness of Fit from atacr object 19 | #' @export 20 | #' @param atacr a list of SummarizedExperiment objects from atacr::make_counts() 21 | #' @param which the subdivision of the genome to calculate GoF either 'whole_genome', 'bait_windows' or 'non_bait_windows' 22 | #' @return the original atacr object with a new slot - 'gofs' - a named vector of each windows GoF estimate. 23 | estimate_GoFs <- function(atacr, which = "bait_windows"){ 24 | mat <- SummarizedExperiment::assay(atacr[[which]]) 25 | atacr$gofs <- gof(mat) 26 | return(atacr) 27 | } 28 | 29 | #' Depth estimation, directly from https://github.com/cran/PoissonSeq/blob/master/R/ps_cmeans.R 30 | #' @param n a matrix 31 | #' @param iter, runs of the Depth finder. 32 | #' @return list of depths and means 33 | Est.Depth <- function(n, iter=5) 34 | { 35 | SMALL.VAL <- 1e-8 36 | cmeans <- colSums(n) / sum(n) 37 | keep <- NULL 38 | 39 | for (i in 1 : iter) 40 | { 41 | n0 <- rowSums(n) %*% t(cmeans) 42 | prop <- rowSums((n - n0) ^ 2 / (n0 + SMALL.VAL)) 43 | qs <- quantile(prop, c(0.25, 0.75)) 44 | keep <- (prop >= qs[1]) & (prop <= qs[2]) 45 | 46 | cmeans <- colMeans(n[keep, ]) 47 | cmeans <- cmeans / sum(cmeans) 48 | } 49 | 50 | return(list(cmeans=cmeans, keep=keep)) 51 | } 52 | 53 | 54 | 55 | #' estimates sequencing depths based on windows with smallest GoF 56 | #' @export 57 | #' @param atacr a list of SummarizedExperiment objects from atacr::make_counts() 58 | #' @param which the subdivision of the genome to calculate GoF either 'whole_genome', 'bait_windows' or 'non_bait_windows' 59 | #' @return - a named vector of each windows GoF estimate. 60 | get_GoF_factors <- function(atacr, which = "bait_windows"){ 61 | 62 | m <- SummarizedExperiment::assay(atacr[[which]]) 63 | seq.depth <- Est.Depth(n=m, iter=5)$cmeans 64 | seq.depth <- 1 / (exp(log(seq.depth) - mean(log(seq.depth))) ) 65 | return(seq.depth) 66 | } 67 | 68 | #' find control windows by convergence method in https://academic.oup.com/biostatistics/article/13/3/523/248016/Normalization-testing-and-false-discovery-rate 69 | #' @export 70 | #' @param atacr a list of SummarizedExperiment objects from atacr::make_counts() 71 | #' @param which the subdivision of the genome to calculate GoF either 'whole_genome', 'bait_windows' or 'non_bait_windows' 72 | #' @return a character vector of window names 73 | find_controls_by_GoF <- function(atacr, which = "bait_windows"){ 74 | 75 | m <- SummarizedExperiment::assay(atacr[[which]]) 76 | controls <- rownames(m[Est.Depth(n = m, iter = 5)$keep,]) 77 | return(controls) 78 | 79 | } 80 | 81 | 82 | #' performs a whole library size normalisation of the selected set of windows, calculates a median virtual experiment and normalises to that 83 | #' @export 84 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts() 85 | #' @param which the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows' 86 | #' @param by_treatment (FALSE) will group the assay into different treatments and normalise each separately - assumes that within treatment groups the samples should show little difference, but between sample treatment groups could show lots of difference between windows. 87 | #' @return a SummarizedExperiment object with a new, normalised assay matrix 88 | library_size_normalisation <- function(data, which = "bait_windows", by_treatment = FALSE ){ 89 | l <- list() 90 | d <- data[[which]] 91 | if ( !by_treatment ){ 92 | return( library_size_normalisation_internal( data[[which]] ) ) 93 | } 94 | else { 95 | for (treatment in unique(data$treatments) ){ 96 | samples <- names_from_treatment(data, treatment) 97 | treat_norm <- library_size_normalisation_internal( data[[which]][,samples] ) 98 | l[[treatment]] <- SummarizedExperiment::assay( treat_norm ) 99 | } 100 | full_mat <- do.call(cbind, l) 101 | SummarizedExperiment::assay(d) <- full_mat 102 | return( d ) 103 | } 104 | 105 | 106 | } 107 | #' 108 | 109 | average_matrix_by_sample <- function(data, which = "bait_windows") { 110 | l <- list() 111 | m <- SummarizedExperiment::assay(data[[which]]) 112 | for (treatment in unique(data$treatments) ){ 113 | samples <- names_from_treatment(data, treatment) 114 | l[[treatment]] <- apply(m[,samples], 1, mean) 115 | } 116 | return(do.call(cbind, l)) 117 | 118 | } 119 | 120 | 121 | names_from_treatment <- function(data, treatment){ 122 | return(data$sample_names[which(data$treatments == treatment)] ) 123 | } 124 | 125 | #' return list of treatment names 126 | #' @export 127 | #' @param data an atacr object 128 | #' @return char vector of unique treatment names 129 | treatments <- function(data){ 130 | return( unique(data$treatments)) 131 | } 132 | 133 | treatment_from_name <- function(data, sample_name){ 134 | return(data$treatments[which(data$sample_names == sample_name)] ) 135 | } 136 | 137 | 138 | #' do a library size normalisation 139 | #' @param se a SummarizedExperiment object such as 'bait_windows' from atacr::make_counts() 140 | library_size_normalisation_internal <- function(se){ 141 | 142 | scaling_factors <- library_size_scaling_factors( se ) 143 | normalised_sample_matrix <- scale_normalise(SummarizedExperiment::assay( se ), scaling_factors) 144 | se_copy <- se 145 | SummarizedExperiment::assay(se_copy) <- normalised_sample_matrix 146 | return(se_copy) 147 | } 148 | 149 | #' calculate scaling factors for library size 150 | #' @export 151 | #' @param se a SummarizedExperiment object such as 'bait_windows' from atacr::make_counts() 152 | library_size_scaling_factors <- function( se ){ # nocov start 153 | sample_matrix <- SummarizedExperiment::assay( se ) 154 | return(get_scaling_factors(sample_matrix)) 155 | } # nocov end 156 | 157 | get_scaling_factors <- function( sample_matrix ){ 158 | mve_sum <- sum(median_virtual_experiment( sample_matrix )) 159 | scaling_factors <- sapply(colSums( sample_matrix ), function(x){ mve_sum / x }) 160 | return(scaling_factors) 161 | } 162 | 163 | scale_normalise <- function( sample_matrix, scaling_factors){ #nocov start 164 | scaled <- sample_matrix %*% diag(scaling_factors) 165 | return(scaled) 166 | } #nocov end 167 | 168 | #' normalise by a provided set of scaling factors 169 | #' @export 170 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts() 171 | #' @param which the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows' 172 | #' @param scaling_factors a vector of scaling factors to normalise by 173 | #' @return a SummarizedExperiment with scale normalised window values 174 | scale_factor_normalise <- function(data, which = "bait_windows", scaling_factors = NULL){ 175 | se <- data[[which]] 176 | normalised_sample_matrix <- scale_normalise(SummarizedExperiment::assay( se ), scaling_factors) 177 | se_copy <- se 178 | SummarizedExperiment::assay(se_copy) <- normalised_sample_matrix 179 | return(se_copy) 180 | } 181 | 182 | 183 | #' extract scaling factors from control windows (often from a file of control gene positions) 184 | #' @export 185 | #' @param se a SummarizedExperiment object 186 | #' @param window_file a text file containing the positions of control window/gene ranges 187 | #' @return a vector of scaling factors from control genes 188 | control_window_scaling_factors <- function( se, window_file){ 189 | control_window_regions <- get_bait_regions_from_text( window_file ) 190 | keep <- IRanges::overlapsAny( SummarizedExperiment::rowRanges( se ), control_window_regions ) 191 | control_windows <- se[keep, ] 192 | sample_matrix <- SummarizedExperiment::assay( control_windows ) 193 | return(get_scaling_factors(sample_matrix)) 194 | 195 | } 196 | #' do a control window scaling normalisation 197 | #' @param se a SummarizedExperiment object such as 'bait_windows' from atacr::make_counts() 198 | #' @param window_file a text file containing the positions of control window/gene ranges 199 | #' @return SummarizedExperiment object, a copy of se with normalised values 200 | control_window_normalise_internal <- function( se, window_file ){ 201 | scaling_factors <- control_window_scaling_factors( se, window_file) 202 | normalised_sample_matrix <- scale_normalise(SummarizedExperiment::assay( se ), scaling_factors) 203 | se_copy <- se 204 | SummarizedExperiment::assay(se_copy) <- normalised_sample_matrix 205 | return(se_copy) 206 | 207 | } 208 | #' performs control window based normalisation of the selected set of windows, calculates a median virtual experiment and normalises to that 209 | #' @export 210 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts() 211 | #' @param which the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows' 212 | #' @param window_file a text file containing the positions of control window/gene ranges 213 | #' @param by_treatment should normalisation be done by all experiments (one median virtualexperiment to compare all samples to) OR should normalisation be done by each treatment type (one median virtual experiment for each different treatment type) 214 | #' @return a vector of scaling factors from control genes 215 | control_window_normalise <- function(data, window_file, which = "bait_windows", by_treatment = FALSE ){ 216 | d <- data[[which]] 217 | l <- list() 218 | if(!by_treatment){ 219 | return( control_window_normalise_internal(data[[which]], window_file )) 220 | } 221 | else{ 222 | for (treatment in unique(data$treatments) ){ 223 | samples <- names_from_treatment(data, treatment) 224 | treat_norm <- control_window_normalise_internal( data[[which]][,samples], window_file ) 225 | l[[treatment]] <- SummarizedExperiment::assay( treat_norm ) 226 | } 227 | full_mat <- do.call(cbind, l) 228 | SummarizedExperiment::assay(d) <- full_mat 229 | return( d ) 230 | } 231 | } 232 | 233 | #' normalise counts by window width (counts / window width) 234 | #' @export 235 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts() 236 | #' @param which the subset of the data to normalise. Default = bait_windows 237 | #' @param per = the expression count / width gives the reads in the window divided by the width, so a 3000 nt gene with 30000 reads mapping to it will have a read count of just 10. Setting this parameter allows you to represent the counts per some other number of nts. Default = 1000, so gives the reads per kb of the gene. 238 | #' @return SummarizedExperiment object with normalised counts 239 | normalise_by_window_width <- function(data, which = "bait_windows", per= 1000){ 240 | widths <- data[[which]]@rowRanges@ranges@width 241 | se <- data[[which]] 242 | d <- SummarizedExperiment::assay(se) 243 | norm_mat <- (d / (widths / per)) # per kb 244 | SummarizedExperiment::assay(se) <- norm_mat 245 | return(se) 246 | } 247 | -------------------------------------------------------------------------------- /docs/atacr_which.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | atacr objects and the which argument 18 | 19 | 20 | 21 | 22 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 |

atacr objects and the which argument

72 |

Dan MacLean

73 |

2018-03-21

74 | 75 | 76 | 77 |
78 |

The atacr Object

79 |

When make_counts() is run, an atacr object is returned. This is a simple, somewhat informal object based on the R list type. It is basically an R list with the following members:

80 |
    81 |
  1. treatments - a character vector of treatment names
  2. 82 |
  3. samples - a character vector of sample names
  4. 83 |
  5. bam_files - a character vector of paths for the used BAM files
  6. 84 |
  7. bait_regions - a GenomicRanges::Granges object describing the bait window regions
  8. 85 |
  9. bait_windows - a RangedSummarizedExperiment object containing the counts in the windows in bait_regions
  10. 86 |
  11. non_bait_windows - a RangedSummarizedExperiment object containing the counts in the windows in the regoions outside bait_regions
  12. 87 |
  13. whole_genome - the union of bait_windows and non_bait_windows
  14. 88 |
  15. dataframe - an optional member and the result of calling as.data.frame() on the atacr object
  16. 89 |
90 |
91 |

Column Order

92 |

The RangedSummarizedExperiment objects carry the count data. They are organised as a matrix with rows representing windows and columns different samples. Their order is conserved and is the same as that in the treatments, samples and bam_files.

93 |
94 |
95 |
96 |

The ‘which’ argument

97 |

Many of the functions allow you to state which member of the atacr list (really a RangedSummarizedExperiment) you wish to apply the function to with the which argument, e.g

98 |
plot_counts(counts, which = "bait_windows", log10 = FALSE)
99 |
100 |
101 |

Adding members to the atacr object

102 |

In this way you can use functions that return RangedSummarizedExperiments to become new members in the list and work on them as with the built in ones, this is especially useful for normalisations.

103 |
counts$by_sample <- library_size_normalisation(counts, 
104 |                                              by_treatment = TRUE)
105 | 
106 | plot_counts(counts, which = "by_sample", log10 = FALSE)
107 |
108 | 109 | 110 | 111 | 112 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /R/loading.R: -------------------------------------------------------------------------------- 1 | #' load BAM files and calculate window coverage 2 | #' @export 3 | #' @param window_file A filename of a CSV file with the bait regions 4 | #' @param sample_treatment_file A filename of a CSV file that lists treatments, samples and bam file paths 5 | #' @param width an integer of the width of the bins the bait regions will be divided into 6 | #' @param filter_params a params object from atacr::make_params() that define how reads will be extracted from the BAM files. Optionally, for greater control, either a csaw::readParam() (for ATACseq) or Rsamtools::ScanBamParam() object for RNASeq can be provided. See http://bioconductor.org/packages/release/bioc/manuals/csaw/man/csaw.pdf or https://www.rdocumentation.org/packages/Rsamtools/versions/1.24.0/topics/ScanBamParam for details 7 | #' @param is_rnaseq a boolean stating whether this is RNASeq data. Default = FALSE 8 | #' @param gene_id_col a character string stating which attribute name to take from the final column of the GFF file to use for the window name in RNASeq data. Usually this is the name of the gene. Default = ID. 9 | #' @param with_df attach a dataframe version of the data Default = FALSE 10 | #' @return a list of metadata and RangedSummarizedExperiment objects with read count in windows for whole genome, bait windows and non-bait windows for each sample 11 | make_counts <- 12 | function(window_file, 13 | sample_treatment_file, 14 | width = 50, 15 | filter_params = make_params(), #csaw::readParam(minq = 50), 16 | with_df = FALSE, 17 | is_rnaseq = FALSE, 18 | gene_id_col = "ID") { 19 | result <- list() 20 | class(result) <- c("atacr", "list") 21 | 22 | sample_treatment_file_mapping <- 23 | read_experiment_info(sample_treatment_file) 24 | result$bam_files <- 25 | as.character(sample_treatment_file_mapping$bam_file_path) 26 | result$treatments <- 27 | as.character(sample_treatment_file_mapping$treatment) 28 | result$sample_names <- 29 | as.character(sample_treatment_file_mapping$sample_name) 30 | 31 | if (!is_rnaseq) { 32 | result <- load_atac(result, width, filter_params, window_file) 33 | } 34 | else { 35 | result <- load_rnaseq(result, filter_params, window_file) 36 | } 37 | 38 | if (with_df) { 39 | result$dataframe <- as.data.frame(result) 40 | } 41 | return(result) 42 | } 43 | 44 | #' set read filters for counting from the BAM file. 45 | #' @export 46 | #' @param paired_map Should reads only be included if they are aligned in pairs. Default = TRUE 47 | #' @param minq The minimum mapping quality to retain a read. Default = 20 48 | #' @param dedup Should removal of PCR duplicates be performed. Default = TRUE 49 | #' @return a named vector of class "atacr_params" 50 | make_params <- function(paired_map = TRUE, minq = 30, dedup = TRUE){ 51 | 52 | params <- c(paired_map, minq, dedup) 53 | names(params) <- c("paired_map", "minq", "dedup") 54 | class(params) <- c("atacr_params") 55 | return(params) 56 | 57 | } 58 | 59 | #' reads a csv file containing the bait regions 60 | #' @param file_name path to a csv file containing the bait regions. File must have a header with columns `bait_name`, `seq_name`, `start`, `end`. 61 | #' @return GenomicRanges object of bait regions 62 | get_bait_regions_from_text <- function(file_name) { 63 | df <- read.csv(file_name, sep = ",", header = TRUE) 64 | 65 | if ( !all(c("bait_name", "seq_name", "start_pos", "end_pos") %in% colnames(df)) ) { 66 | stop("File must have a header with columns bait_name, seq_name, start_pos, end_pos.") 67 | } 68 | 69 | bait_regions <- GenomicRanges::GRanges( 70 | seqnames = S4Vectors::Rle(df$seq_name), 71 | ranges = IRanges::IRanges( 72 | df$start_pos, 73 | end = df$end_pos, 74 | names = df$bait_name 75 | ) 76 | ) 77 | 78 | return(bait_regions) 79 | 80 | } 81 | 82 | #' reads a gff file containing the bait regions 83 | #' @param file_name path to the file containing the bait regions 84 | #' @return GenomicRanges object of bait regions 85 | get_bait_regions_from_gff <- function(file_name) { 86 | gff <- rtracklayer::import.gff(file_name) 87 | bait_regions <- as(gff, "GRanges") 88 | #bait_regions <- bait_regions[bait_regions$type %in% c("gene")] 89 | return(bait_regions) 90 | } 91 | 92 | #' format a csaw::readParam object from the atacr::make_params() object 93 | #' @param p an object returned from atacr::make_params() 94 | #' @return a csaw::readParam object 95 | make_csaw_params <- function(p){ 96 | return( 97 | csaw::readParam( 98 | minq = p["minq"], 99 | dedup = p["dedup"], 100 | pe = ifelse(p["paired_map"], "both", "none") 101 | ) 102 | ) 103 | } 104 | 105 | #' populate the result object with the RangedSummarizedExperiment from the bam files from ATAC seq data. Called from make_counts() when is_rnaseq == FALSE. 106 | #' @param result list from make_counts() 107 | #' @param width an integer of the width of the bins the bait regions will be divided into 108 | #' @param filter_params a params object, described in atacr::make_counts() 109 | #' @param window_file a filename of a CSV file with the bait regions 110 | #' @return a list with window counts for bait/non-bait windows 111 | load_atac <- function(result, width, filter_params, window_file) { 112 | 113 | if ("atacr_params" %in% class(filter_params) ) { 114 | filter_params <- make_csaw_params(filter_params) 115 | } 116 | 117 | result$whole_genome <- 118 | csaw::windowCounts( 119 | result$bam_files, 120 | bin = TRUE, 121 | filter = 0, 122 | width = width, 123 | param = filter_params 124 | ) 125 | 126 | ## name samples 127 | colnames(result$whole_genome) <- result$sample_names 128 | 129 | ## collect bait and non bait regions 130 | result$bait_regions <- get_bait_regions_from_gff(window_file) 131 | 132 | keep <- 133 | IRanges::overlapsAny(SummarizedExperiment::rowRanges(result$whole_genome), 134 | result$bait_regions) 135 | 136 | result$bait_windows <- result$whole_genome[keep,] 137 | result$non_bait_windows <- result$whole_genome[!keep,] 138 | 139 | ## name the windows 140 | result$whole_genome@rowRanges@ranges@NAMES <- 141 | as.character(result$whole_genome@rowRanges) 142 | result$bait_windows@rowRanges@ranges@NAMES <- 143 | as.character(result$bait_windows@rowRanges) 144 | result$non_bait_windows@rowRanges@ranges@NAMES <- 145 | as.character(result$non_bait_windows@rowRanges) 146 | 147 | return(result) 148 | } 149 | 150 | #' format a rsamtools::scanBam object from the atacr::make_params() object 151 | #' @param p an object returned from atacr::make_params() 152 | #' @param example_bam a filename pointing to a BAM file from which genome size can be taken 153 | #' @return an rsamtools::scanBamParam object 154 | make_scanBamParam <- function(p, example_bam){ 155 | 156 | seqnames <- seqlength <- NULL 157 | 158 | ranges <- Rsamtools::idxstatsBam(example_bam) 159 | ranges <- dplyr::mutate(ranges, start = 1) 160 | ranges <- dplyr::rename(ranges, seqname = seqnames, end = seqlength) 161 | ranges <- unlist(GenomicRanges::makeGRangesListFromDataFrame(ranges)) 162 | 163 | return(Rsamtools::ScanBamParam( 164 | flag = Rsamtools::scanBamFlag( 165 | isDuplicate = !p["dedup"], 166 | isProperPair = p["paired_map"] 167 | ), 168 | mapqFilter = p["minq"], 169 | which = ranges 170 | )) 171 | } 172 | 173 | #' populate the result object with the RangedSummarizedExperiment from the bam files from RNA seq data. Called from make_counts() when is_rnaseq == TRUE. 174 | #' @param result list from make_counts() 175 | #' @param filter_params a params object, described in atacr::make_counts() 176 | #' @param window_file a filename of a CSV file with the bait regions 177 | #' @param gene_id_col a character string stating which attribute name to take from the final column of the GFF file to use for the window name in RNASeq data. Usually this is the name of the gene. Default = ID. 178 | load_rnaseq <- 179 | function(result, 180 | filter_params, 181 | window_file, 182 | gene_id_col = "ID") { 183 | 184 | if ("atacr_params" %in% class(filter_params) ) { 185 | filter_params <- make_scanBamParam(filter_params, result$bam_files[1]) 186 | } 187 | 188 | bams <- Rsamtools::BamFileList(result$bam_files) 189 | names(bams) <- result$sample_names 190 | 191 | result$bait_regions <- get_bait_regions_from_gff(window_file) 192 | non_bait_regions <- 193 | GenomicRanges::gaps(result$bait_regions) #the intergene regions 194 | 195 | result$bait_windows <- 196 | GenomicAlignments::summarizeOverlaps( 197 | features = result$bait_regions, 198 | reads = bams, 199 | ignore.strand = T, 200 | param = filter_params 201 | ) 202 | result$non_bait_windows <- 203 | GenomicAlignments::summarizeOverlaps( 204 | features = non_bait_regions, 205 | reads = bams, 206 | ignore.strand = T, 207 | param = filter_params 208 | ) 209 | 210 | if (c(gene_id_col) %in% names(result$bait_regions@elementMetadata@listData)) { 211 | result$bait_windows@rowRanges@ranges@NAMES <- 212 | as.character(result$bait_regions@elementMetadata@listData[[gene_id_col]]) 213 | } 214 | else { 215 | result$bait_windows@rowRanges@ranges@NAMES <- make_range_names( 216 | result$bait_regions@seqnames@values, 217 | result$bait_regions@ranges@start, 218 | result$bait_regions@ranges@width 219 | ) 220 | } 221 | 222 | result$non_bait_windows@rowRanges@ranges@NAMES <- make_range_names( 223 | non_bait_regions@seqnames@values, 224 | non_bait_regions@ranges@start, 225 | non_bait_regions@ranges@width 226 | ) 227 | 228 | result$bait_windows@rowRanges@elementMetadata@listData <- list() 229 | result$whole_genome <- 230 | rbind(result$bait_windows, result$non_bait_windows) 231 | colnames(result$whole_genome) <- 232 | colnames(result$bait_windows) <- 233 | colnames(result$non_bait_windows) <- result$sample_names 234 | 235 | return(result) 236 | } 237 | 238 | make_range_names <- function(chr, start, width) { 239 | end <- start + width 240 | return(paste0(chr, ":", start, "-", end)) 241 | } 242 | 243 | #' Loads in a CSV file describing treatment, samples and bam files 244 | #' @param filename path and name of the file to load 245 | read_experiment_info <- function(filename, should_be = c("treatment", "sample_name", "bam_file_path")) { 246 | info <- read.csv(filename, header = TRUE, sep = ",") 247 | if (all(should_be %in% colnames(info))) { 248 | return(info) 249 | } 250 | else{ 251 | stop("experiment mapping file should have headings: ", paste0(should_be, collapse = " ")) 252 | } 253 | ## read in file with columns 'treatment, sample_name, bam_file_path' 254 | } 255 | 256 | #' pulls lines out of a gff file based on identifierss provided 257 | #' @export 258 | #' @param ids character vector of ids/names of feature to extract 259 | #' @param gff path to gff file 260 | #' @param type feature type of features to extract. 261 | #' @param col column name of GFF file containing id to use (ID) 262 | #' @param out_file path of file name to write. If NULL, no file is written 263 | #' @param version which gff version to export (Default is "3") 264 | #' @return GenomicRanges or NULL with GFF outfile. 265 | extract_features_from_gff <- function(ids, gff, type = c("gene"), col="ID", out_file = NULL, version = "3"){ 266 | gff <- rtracklayer::import.gff(gff, "GFF") 267 | gene_features <- as(gff, "GRanges") 268 | gene_features <- gene_features[ gene_features$type %in% type ] 269 | gene_features <- gene_features[ gene_features@elementMetadata@listData[[col]] %in% ids ] 270 | if ( is.null(out_file) ) { 271 | return(gene_features) 272 | } 273 | else{ 274 | rtracklayer::export.gff(gene_features, out_file, version = version) 275 | } 276 | 277 | } 278 | 279 | #' returns DGEList for edgeR from atacr object 280 | #' @export 281 | #' @param atacr an atacr object 282 | #' @param which the subset of the data to work on 283 | #' @param remove.zeros whether to remove rows that have 0 total count. 284 | #' @return DGEList representing atacr data 285 | as.DGEList <- function(atacr, which = "bait_windows", remove.zeros = FALSE ){ 286 | edgeR::DGEList(SummarizedExperiment::assay(atacr[[which]]), group = atacr$treatments, remove.zeros = remove.zeros) 287 | 288 | } 289 | #' writes GFF3 version of a simple text file describing the bait region starts and stops 290 | #' @export 291 | #' @param text_in path to the file describing the bait regions. File must have a header with columns `bait_name`, `seq_name`, `start_pos`, `end_pos`. 292 | #' @param gff_out path to the gff file to be created 293 | #' @return NULL 294 | text_to_gff <- function(text_in, gff_out){ 295 | if ( is.null(text_in) || is.null( gff_out ) ) { 296 | stop("must provide an input text file AND output text file") 297 | } 298 | 299 | bait_regions <- get_bait_regions_from_text(text_in) 300 | rtracklayer::export.gff3(bait_regions, gff_out) 301 | } 302 | 303 | 304 | #' make files to load tutorial data 305 | #' 306 | #' 307 | #' 308 | #' @param write_dir directory to put sample files in defaults to `getwd()` 309 | #' @export 310 | make_tutorial_data <- function(write_dir = getwd() ){ 311 | 312 | out_mappings <- file.path(write_dir, "sample_treatment_bam_mappings.csv") 313 | 314 | dir_names <- list.files(dirname(system.file("extdata", "bait_regions.gff", package = "atacr")), include.dirs = TRUE, pattern = "ATAC",full.names = TRUE ) 315 | 316 | df <- data.frame( 317 | "treatment" = c( rep("mock", 3), rep("infected", 3)), 318 | "sample_name" = c(paste0("mock_rep", 1:3), paste0("infected_rep", 1:3)), 319 | "bam_file_path" = file.path(dir_names, "alignedSorted.bam" ) 320 | ) 321 | 322 | write.csv(df, file = out_mappings, quote = FALSE, row.names = FALSE) 323 | 324 | out_gff <- file.path(write_dir, "bait_regions.gff") 325 | file.copy(system.file("extdata/", "bait_regions.gff", package = "atacr"), out_gff ) 326 | return( list(bait_regions_file = out_gff, mapping_file = out_mappings)) 327 | } 328 | -------------------------------------------------------------------------------- /R/differentials.R: -------------------------------------------------------------------------------- 1 | #' gets t-statistic for two vectors of data, x and y 2 | #' @param data matrix of sample data 3 | #' @param indices indices selected by boot::boot 4 | #' @return t the t statistic from Student's t-test or NA if error 5 | get_t <- function(data,indices){ 6 | d <- data[indices] 7 | 8 | e <- length(d) 9 | f <- floor(e/2) 10 | x <- d[1:f] 11 | y <- d[(f+1):e] 12 | 13 | 14 | stat <- tryCatch({ 15 | t.test(x,y)$statistic 16 | }, 17 | warning = function(w){ 18 | return(NA) 19 | }, 20 | error = function(e){ 21 | return(NA) 22 | }, 23 | finally = {} 24 | ) 25 | return(stat) 26 | } 27 | 28 | 29 | #'runs bootstrap t test, wrapper required for boot::boot function 30 | #' @param data matrix of sample data 31 | #' @param iterations number of bootstrap iterations to run 32 | #' @return vector of 2 columns, observed value t statisitc and p, calculated as proportion of bootstrap iterations greater than original t 33 | bootstrap_t <- function(data, iterations=10){ 34 | boot_res <- boot::boot(data, statistic = get_t, R = iterations) 35 | original <- boot_res$t0 36 | bootstraps <- boot_res$t 37 | p <- (sum(bootstraps > original) / iterations) 38 | if ( is.nan(original) | is.na(original) ) { 39 | p <- original 40 | } 41 | else if (original < 0) { 42 | p <- sum(bootstraps < original) / iterations 43 | } 44 | return(c(original, p)) 45 | } 46 | 47 | select_comparisons <- function(data, treatment_a, treatment_b, which = "bait_windows"){ 48 | l <- list() 49 | sample_matrix <- SummarizedExperiment::assay(data[[which]]) 50 | treatment_a_cols <- data$sample_names[which(data$treatments == treatment_a) ] 51 | treatment_b_cols <- data$sample_names[which(data$treatments == treatment_b) ] 52 | l$treatment_a_data <- sample_matrix[,treatment_a_cols] 53 | l$treatment_b_data <- sample_matrix[,treatment_b_cols] 54 | return(l) 55 | } 56 | 57 | get_means <- function(data){ 58 | 59 | mean_count_a <- apply(data$comparisons$treatment_a_data, 1, mean) 60 | mean_count_b <- apply(data$comparisons$treatment_b_data, 1, mean) 61 | result <- data.frame( 62 | c1 = mean_count_a, 63 | c2 = mean_count_b 64 | ) 65 | colnames(result) <- c(paste0("mean_", data$treatment_a_name), paste0("mean_", data$treatment_b_name) ) 66 | return( result ) 67 | } 68 | 69 | get_sd <- function(data){ 70 | sd_a <- apply(data$comparisons$treatment_a_data, 1, sd) 71 | sd_b <- apply(data$comparisons$treatment_b_data, 1, sd) 72 | result <- data.frame( 73 | c1 = sd_a, 74 | c2 = sd_b 75 | ) 76 | colnames(result) <- c(paste0("sd_", data$treatment_a_name), paste0("sd_", data$treatment_b_name) ) 77 | return( result ) 78 | } 79 | 80 | get_fc <- function(data){ 81 | means <- get_means(data) 82 | return(data.frame(log2_fold_change = log2(means[,1] / means[,2]))) 83 | } 84 | 85 | #' selects appropriate columns and names from a 86 | #' @param data an atacr object 87 | #' @param treatment_a string naming the first treatment (numerator) 88 | #' @param treatment_b string naming the second treatment (denominator) 89 | #' @param which subset to work on Default = NULL 90 | #' @return list of data to be calculated with 91 | select_data <- function(data, treatment_a, treatment_b, which = NULL){ 92 | 93 | comparison_list <- select_comparisons(data, treatment_a, treatment_b, which = which) 94 | comparison_matrix <- cbind(comparison_list$treatment_a, comparison_list$treatment_b ) 95 | 96 | return( 97 | list( 98 | counts = comparison_matrix, 99 | comparisons = comparison_list, 100 | treatment_a_names = data$sample_names[which(data$treatments == treatment_a)], 101 | treatment_b_names = data$sample_names[which(data$treatments == treatment_b)], 102 | treatment_a_name = treatment_a, 103 | treatment_b_name = treatment_b 104 | ) 105 | ) 106 | 107 | } 108 | 109 | check_data <- function(d, treatment_a, treatment_b){ 110 | 111 | if( length(d$treatment_a_names) < 3 | length(d$treatment_b_names) < 3 ){ 112 | message <- paste("Need at least 3 replicates to perform estimate bootstrap t value. Have", length(d$treatment_a_names), "for", treatment_a, "and", length(d$treatment_b_names), "for", treatment_b) 113 | stop(message) 114 | } 115 | 116 | if(length(d$treatment_a_names) != length(d$treatment_b_names) ){ 117 | message <- paste("Must have equal number of replicates in each treatment. Have", length(d$treatment_a_names), "for", treatment_a, "and", length(d$treatment_b_names), "for", treatment_b) 118 | stop(message) 119 | } 120 | 121 | } 122 | 123 | #' Estimate FDR and significantly different windows 124 | #' @export 125 | #' @param data an atacr object 126 | #' @param treatment_a the first treatment to consider 127 | #' @param treatment_b the second treatment to consider 128 | #' @param which the subset of windows to consider 129 | #' @param iterations the number of bootstrap iterations to perform 130 | #' @param fdr_level the level at which to mark FDR as significant 131 | #' @return dataframe of counts and statistics 132 | estimate_fdr <- function(data, treatment_a, treatment_b, which = "bait_windows", iterations=10,fdr_level=0.05){ 133 | 134 | 135 | d <- select_data(data, treatment_a, treatment_b, which) 136 | check_data(d, treatment_a, treatment_b) 137 | 138 | working_df <- as.data.frame(d$counts) 139 | row.names(working_df) <- rownames(d$counts) 140 | 141 | selected_df <- working_df[rowSums(working_df) > 0,] 142 | 143 | selected_result <- apply(selected_df, 1, bootstrap_t, iterations = iterations) 144 | #colnames(selected_result) <- c("t", "fdr") 145 | #selected_result <- apply(selected_df, 1, bayes_t, treatment_a_names = d$treatment_a_names, treatment_b_names = d$treatment_b_names) 146 | 147 | selected_result <- as.data.frame(t(selected_result)) %>% 148 | dplyr::rename("fdr" = V2) %>% 149 | dplyr::mutate(window = colnames(selected_result)) 150 | 151 | 152 | 153 | working_df$window <- row.names(working_df) 154 | 155 | result <- dplyr::left_join(working_df, selected_result, by = "window") %>% 156 | dplyr::mutate(is_sig = fdr <= fdr_level) %>% 157 | dplyr::bind_cols( get_means(d) ) %>% 158 | dplyr::bind_cols( get_sd(d) ) %>% 159 | dplyr::bind_cols( get_fc(d)) 160 | return(result) 161 | 162 | } 163 | 164 | #' Estimate FDR and significantly different windows for many experiments 165 | #' @export 166 | #' @param data an atacr object 167 | #' @param common_control the treatment to consider the control for all other treatments 168 | #' @param which the subset of windows to consider 169 | #' @param iterations the number of bootstrap iterations to perform 170 | #' @param fdr_level the level at which to mark FDR as significant 171 | estimate_fdr_multiclass <- function(data, common_control, which = "bait_windows", iterations = 10,fdr_level = 0.05) { 172 | treatments <- data$treatments[data$treatments != common_control] 173 | control <- rep(common_control, length(treatments)) 174 | comparisons <- cbind(treatments, control) 175 | 176 | r <- list() 177 | for (i in 1:nrow(comparisons)) { 178 | tr <- comparisons[i,][1] 179 | ct <- comparisons[i,][2] 180 | 181 | df <- atacr::estimate_fdr(data, 182 | tr, 183 | ct, 184 | which = which, 185 | iterations = iterations, 186 | fdr_level = fdr_level) 187 | df$a <- rep(tr, nrow(df)) 188 | df$b <- rep(ct, nrow(df)) 189 | colnames(df)[grep("mean_", colnames(df))] <- c("mean_a", "mean_b") 190 | colnames(df)[grep("sd_", colnames(df))] <- c("sd_a", "sd_b") 191 | df <- df[, c("window", "fdr", "is_sig", "mean_a", "mean_b", "sd_a", "sd_b", "log2_fold_change", "a", "b")] 192 | r[[i]] <- df 193 | } 194 | return(do.call(rbind, r)) 195 | 196 | } 197 | 198 | bayes_t <- function(counts, treatment_a_names, treatment_b_names){ 199 | 200 | a <- counts[treatment_a_names] 201 | b <- counts[treatment_b_names] 202 | bf <- BayesFactor::ttestBF(a,b) 203 | return(bf@bayesFactor$bf) 204 | } 205 | 206 | 207 | #' Estimate Bayes Factor and significantly different windows 208 | #' @export 209 | #' @param atacr an atacr object 210 | #' @param treatment_a the first treatment to consider 211 | #' @param treatment_b the second treatment to consider 212 | #' @param which the subset of windows to consider 213 | #' @param factor the BayesFactor at which to mark window as significant 214 | #' @return a dataframe of counts and statistics 215 | estimate_bayes_factor <- function(atacr, treatment_a, treatment_b, which = "bait_windows", factor = 4){ 216 | 217 | d <- select_data(atacr, treatment_a, treatment_b, which) 218 | check_data(d, treatment_a, treatment_b) 219 | 220 | working_df <- as.data.frame(d$counts) 221 | row.names(working_df) <- rownames(d$counts) 222 | 223 | selected_df <- working_df[rowSums(working_df) > 0,] 224 | 225 | selected_result <- apply(selected_df, 1, bayes_t, treatment_a_names = d$treatment_a_names, treatment_b_names = d$treatment_b_names) 226 | 227 | selected_result <- data.frame( 228 | bayes_factor = selected_result, 229 | window = row.names(selected_df) 230 | ) 231 | 232 | working_df$window <- row.names(working_df) 233 | result <- dplyr::left_join(working_df, selected_result, by = "window") %>% 234 | dplyr::mutate(is_sig = bayes_factor >= factor) %>% 235 | dplyr::bind_cols( get_means(d) ) %>% 236 | dplyr::bind_cols( get_sd(d) ) %>% 237 | dplyr::bind_cols( get_fc(d)) 238 | return(result) 239 | } 240 | 241 | #' Estimate BayesFactor and mark significantly different windows for many experiments 242 | #' @export 243 | #' @param data an atacr object 244 | #' @param common_control the treatment to consider the control for all other treatments 245 | #' @param which the subset of windows to consider 246 | #' @param factor the BayesFactor to consider significant 247 | #' @return a dataframe of counts and statistics 248 | estimate_bayes_factor_multiclass <- function(data, common_control, which = "bait_windows", factor = 4) { 249 | treatments <- unique(data$treatments[data$treatments != common_control]) 250 | control <- rep(common_control, length(treatments)) 251 | comparisons <- cbind(treatments, control) 252 | r <- list() 253 | for (i in 1:nrow(comparisons)) { 254 | tr <- comparisons[i,][1] 255 | ct <- comparisons[i,][2] 256 | df <- estimate_bayes_factor(data, 257 | tr, 258 | ct, 259 | which = which, 260 | factor = factor) 261 | df$a <- rep(tr, nrow(df)) 262 | df$b <- rep(ct, nrow(df)) 263 | colnames(df)[grep("mean_", colnames(df))] <- c("mean_a", "mean_b") 264 | colnames(df)[grep("sd_", colnames(df))] <- c("sd_a", "sd_b") 265 | df <- df[, c("window", "bayes_factor", "is_sig", "mean_a", "mean_b", "sd_a", "sd_b", "log2_fold_change", "a", "b")] 266 | r[[i]] <- df 267 | } 268 | 269 | return(do.call(rbind, r)) 270 | 271 | } 272 | #' Estimate differential window counts and mark significantly different windows using edgeR exact method for two samples 273 | #' @export 274 | #' @param atacr an atacr object 275 | #' @param common_control the treatment to consider the control for all other treatments 276 | #' @param which the subset of windows to consider 277 | #' @param sig_level the p_value to consider significant 278 | #' @return a dataframe of counts and statistics 279 | edgeR_exact <- function(atacr, which = "bait_windows", treatment_a = NULL, treatment_b = NULL, remove_zeros = FALSE, sig_level = 0.05 ){ 280 | 281 | data <- select_data(atacr, treatment_a, treatment_b, which) 282 | working_df <- as.data.frame(data$counts) 283 | row.names(working_df) <- rownames(data$counts) 284 | 285 | group <- c(rep(treatment_a, length(data$treatment_a_names)), rep(treatment_b, length(data$treatment_b_names)) ) 286 | 287 | dg <- edgeR::DGEList(data$counts, group = group, remove.zeros = remove_zeros) 288 | dg <- edgeR::estimateDisp(dg) 289 | et <- edgeR::exactTest(dg) 290 | names <- rownames(et$table) 291 | 292 | selected_result <- data.frame( 293 | window = rownames(et$table), 294 | p_value = et$table$PValue 295 | ) 296 | 297 | working_df$window <- row.names(working_df) 298 | 299 | result <- dplyr::left_join(working_df, selected_result, by = "window") %>% 300 | dplyr::mutate(is_sig = p_value <= sig_level) %>% 301 | dplyr::bind_cols( get_means(data) ) %>% 302 | dplyr::bind_cols( get_sd(data) ) %>% 303 | dplyr::bind_cols( get_fc(data)) 304 | return(result) 305 | } 306 | #' Estimate differential window counts and mark significantly different windows using edgeR glmFIT method for multiple samples with common control 307 | #' @export 308 | #' @param data an atacr object 309 | #' @param treatment_a the first treatment to consider 310 | #' @param treatment_b the second treatment to consider 311 | #' @param which the subset of windows to consider 312 | #' @param remove_zeros apply edgeR remove.zeros argument 313 | #' @return a list of "DGELRT" objects for each comparison 314 | edgeR_multiclass <- function(data, common_control, which = "bait_windows", sig_level = 0.05, remove_zeros = FALSE){ 315 | 316 | ctrl_idcs <- which(data$treatments == common_control) 317 | other_idcs <- which(data$treatments != common_control) 318 | new_order <- c(ctrl_idcs, other_idcs) 319 | 320 | treatments <- as.factor(data$treatments[ new_order ]) 321 | samples <- data$sample_names[ new_order ] 322 | 323 | 324 | df <- data.frame(sample = samples, treatment = as.factor(as.numeric(treatments))) 325 | design <- model.matrix(~treatment, data = df) 326 | num_levels <- nlevels(as.factor(unique(treatments))) 327 | 328 | dglist <- edgeR::DGEList(SummarizedExperiment::assay(data[[which]]), remove.zeros = remove_zeros) 329 | 330 | dglist <- edgeR::estimateDisp(dglist, design) 331 | fit <- edgeR::glmQLFit(dglist, design) 332 | 333 | dgelrts <- list() 334 | 335 | for (i in 2:num_levels) { 336 | curr_t <- unique(data$treatments[ new_order ])[i] 337 | dgelrts[[curr_t]] <- edgeR::glmQLFTest(fit, coef = i) 338 | } 339 | 340 | return(dgelrts) 341 | # result <- list() 342 | # 343 | # for(n in names(dgelrts)){ 344 | # tb <- dgelrts[[n]]$table 345 | # df <- data.frame( 346 | # window = rownames(tb), 347 | # p_value = tb$PValue, 348 | # f = tb$F 349 | # ) 350 | # 351 | # 352 | # dlist <- select_data(data, n, common_control, which) 353 | # df$is_sig <- (df$p_value <= sig_level) 354 | # 355 | # df <- cbind(df, get_means(dlist$comparisons)) 356 | # 357 | # #add sd 358 | # df <- cbind(df, get_sd(dlist$comparisons)) 359 | # #add log2 fc 360 | # df <- get_fc(df) 361 | # df$a <- rep(n, nrow(df)) 362 | # df$b <- rep(common_control, nrow(df)) 363 | # result[[n]] <- df 364 | # 365 | # } 366 | # result <- do.call(rbind, result) 367 | # rownames(result) <- NULL 368 | # return(result) 369 | 370 | 371 | 372 | } 373 | -------------------------------------------------------------------------------- /docs/differential_windows.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | Differentially accessible or expressed windows 18 | 19 | 20 | 21 | 22 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 |

Differentially accessible or expressed windows

72 |

Dan MacLean

73 |

2018-03-21

74 | 75 | 76 | 77 |

Finding windows that correspond to differentially expressed or accessible windows is possible with two related functions in atacr - estimate_fdr() which implements bootstrap t-tests, via the boot package and estimate_bayes_factor() which implements a Bayes factor ANOVA using the BayesFactor package. A tidy dataframe of results is returned in each case.

78 |
79 |

Bootstrap t-tests

80 |

For simple comparison of two treatments with bootstrap t tests, provide treatment ‘a’ and ‘b’ names and the number of bootstrap iterations (default is 10, which is fast for testing code, but useless analytically). You can set the threshold for marking as significant with fdr_level.

81 |
 result <- estimate_fdr(normalized_counts,
 82 |               treatment_a =  "treatment",
 83 |               treatment_b = "control",
 84 |               iterations = 100000,
 85 |               fdr_level = 0.01)
86 |
##                  window         t p_value       fdr mean_count_a
 87 | ## 1    synth_chrom:1-50:- -1.938336     0.1 0.2130435    5632.6667
 88 | ## 2  synth_chrom:51-100:+ -1.218827     0.2 0.3062500    9758.6667
 89 | ## 3 synth_chrom:101-150:-  4.107091     0.0 0.0000000     205.3333
 90 | ## 4 synth_chrom:151-200:- -1.510404     0.2 0.3062500   15202.6667
 91 | ## 5 synth_chrom:251-300:-  3.308530     0.0 0.0000000   39171.0000
 92 | ## 6 synth_chrom:301-350:-  1.400435     0.3 0.3868421      62.0000
 93 | ##   mean_count_b        sd_a         sd_b   log2_fc is_sig
 94 | ## 1  15382.66667  6377.90407  5935.290164 -1.449416  FALSE
 95 | ## 2  20613.66667  4435.08403 14774.507279 -1.078845  FALSE
 96 | ## 3     83.66667    50.46121     9.291573  1.295243   TRUE
 97 | ## 4  36699.33333 19444.72315 15152.091814 -1.271429  FALSE
 98 | ## 5  15567.33333 12216.06745  1859.435488  1.331264   TRUE
 99 | ## 6     12.33333    59.80803    14.011900  2.329705  FALSE
100 |

The output has columns as follows:

101 | 113 |

To analyse all treatments against a common comparison at once you can use the wrapper function estimate_fdr_multiclass() which requires the name of the common comparison treatment

114 |
multi_result <-  estimate_fdr_multiclass(normalized_counts,
115 |               common_control = "control",
116 |               iterations = 100000,
117 |               fdr_level = 0.01)
118 | 
119 | head(multi_result)
120 |
##                  window         t p_value       fdr mean_count_a
121 | ## 1    synth_chrom:1-50:- -1.938336     0.0 0.0000000    5632.6667
122 | ## 2  synth_chrom:51-100:+ -1.218827     0.0 0.0000000    9758.6667
123 | ## 3 synth_chrom:101-150:-  4.107091     0.0 0.0000000     205.3333
124 | ## 4 synth_chrom:151-200:- -1.510404     0.1 0.1689655   15202.6667
125 | ## 5 synth_chrom:251-300:-  3.308530     0.0 0.0000000   39171.0000
126 | ## 6 synth_chrom:301-350:-  1.400435     0.1 0.1689655      62.0000
127 | ##   mean_count_b        sd_a         sd_b   log2_fc is_sig         a       b
128 | ## 1  15382.66667  6377.90407  5935.290164 -1.449416   TRUE treatment control
129 | ## 2  20613.66667  4435.08403 14774.507279 -1.078845   TRUE treatment control
130 | ## 3     83.66667    50.46121     9.291573  1.295243   TRUE treatment control
131 | ## 4  36699.33333 19444.72315 15152.091814 -1.271429  FALSE treatment control
132 | ## 5  15567.33333 12216.06745  1859.435488  1.331264   TRUE treatment control
133 | ## 6     12.33333    59.80803    14.011900  2.329705  FALSE treatment control
134 |

The results here has two extra columns:

135 | 139 |
140 |
141 |

Bayes Factor Analysis

142 |

A similar pair of functions is available for Bayes factor analysis. estimate_bayes_factor() for the two-way comparison. The factor argument sets the Bayes factor at which to mark the window as having different counts.

143 |
result_bf <-  estimate_bayes_factor(normalized_counts,
144 |                            treatment_a =  "treatment",
145 |                            treatment_b = "control",
146 |                                 factor = 2.0)
147 | 
148 | head(result_bf)
149 |
##                  window bayes_factor is_sig mean_count_a mean_count_b
150 | ## 1    synth_chrom:1-50:-   0.20483396  FALSE    5632.6667  15382.66667
151 | ## 2  synth_chrom:51-100:+  -0.20139051  FALSE    9758.6667  20613.66667
152 | ## 3 synth_chrom:101-150:-   1.39100629  FALSE     205.3333     83.66667
153 | ## 4 synth_chrom:151-200:-  -0.04237361  FALSE   15202.6667  36699.33333
154 | ## 5 synth_chrom:251-300:-   0.98251046  FALSE   39171.0000  15567.33333
155 | ## 6 synth_chrom:301-350:-  -0.10371176  FALSE      62.0000     12.33333
156 | ##          sd_a         sd_b   log2_fc
157 | ## 1  6377.90407  5935.290164 -1.449416
158 | ## 2  4435.08403 14774.507279 -1.078845
159 | ## 3    50.46121     9.291573  1.295243
160 | ## 4 19444.72315 15152.091814 -1.271429
161 | ## 5 12216.06745  1859.435488  1.331264
162 | ## 6    59.80803    14.011900  2.329705
163 |

Again, a estimate_bayes_factor_multiclass() function works for all comparisons to a common control.

164 |

The results data frame is similar to that from the Bootstrap t methods, with a factor column in place of the t and fdr columns.

165 |
166 | 167 | 168 | 169 | 170 | 178 | 179 | 180 | 181 | -------------------------------------------------------------------------------- /docs/tutorial.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "A worked example with atacR" 3 | author: "Dan MacLean" 4 | date: "`r Sys.Date()`" 5 | output: html_document 6 | 7 | --- 8 | ```{r, echo = FALSE} 9 | knitr::opts_chunk$set( 10 | warning = FALSE, 11 | message = FALSE 12 | ) 13 | ``` 14 | 15 | ## About the experiment 16 | 17 | The experiment we'll be running through is an ATAC-cap-seq experiment of _Arabidopsis_ plant leaves taken from plants exposed to either a mock (water) treatment or infected with a pathogen. We have the bare minimum replicate number, just three independent samples for each of the mock or infected treatments. The reads are paired-end and 50 nt long. The BAM files are sorted using `samtools` from SAM files generated by `BWA` but at the start no further processing has been done. The paths to the BAM files and the bait region coordinates on your system are described above. 18 | 19 | ## Preparing input files 20 | 21 | The first step of any `atacR` analysis is to build the input files, these are basically files listing where the important data files are on your system. 22 | You'll need a file listing the BAM files and a file listing the bait windows. These are described more fully in the [loading vignette](loading.html). 23 | 24 | The `atacR` package comes with a small set of ATAC-cap-seq data built in. It is installed along with the package, and we'll use that data in this tutorial. 25 | 26 | A convenience function is built into `atacR` that will find the built-in data and build the files you need to follow this tutorial. All you need to do is decide where those files should be written. Below we write them to the `Desktop`. Two files will appear on the `Desktop` - `bait_regions.gff` and `sample_treatment_bam_mappings.csv`. If you inspect these you'll see the structure. 27 | 28 | 29 | ```{r} 30 | library(atacr) 31 | input_files <- make_tutorial_data("~/Desktop/") 32 | input_files 33 | ``` 34 | 35 | The object `input_files` holds the paths to the input files we made, so we'll use that to get going. 36 | 37 | ## Generating read counts 38 | 39 | Once we have the files ready, we can begin analysis. We can extract information from the files and make the counts we're interested in with the `make_counts()` function. In this step we'll set the read filter parameters to decide which reads and alignments in the BAM file are of sufficiently good quality to be counted. 40 | 41 | 42 | Depending on whether you have ATAC-cap-seq or RNA-cap-seq this function does slightly different things. If you have ATAC-cap-seq, this function divides the bait regions in the genome into sub-windows of a fixed width. If you have RNA-cap-seq the whole bait region is considered to be a single window. 43 | 44 | Below we will use the default window sizes (50nt, non-overlapping) and read filters (described in the [loading vignette](loading.html) ). As this is ATAC-cap-seq data we need to specify that too. 45 | 46 | ```{r} 47 | counts <- make_counts(input_files$bait_regions_file, 48 | input_files$mapping_file, 49 | is_rnaseq = FALSE 50 | ) 51 | ``` 52 | 53 | The resulting object `counts` has a few slots containing information. The most important are `bait_windows` which describes the windows in the bait regions and `non_bait_windows` which describes all the spaces in between the `bait_windows`. By defauly all functions will work on `bait_windows` but you can change the subset using the `which` parameter (see the [atacr which](atacr_which.html) vignette for more information. ) 54 | 55 | ## Summarising data 56 | 57 | Once everthing is loaded, it is a good idea to check the counts object is as you expect. The `summary()` function does this. 58 | 59 | ### Summary statistics 60 | 61 | ```{r, } 62 | summary(counts) 63 | ``` 64 | 65 | The summary is very long but worthwhile. A feature of `atacR` is that it keeps counts in non-bait region windows. Non-bait region windows are those outside the bait regions. The non-bait regions are not the same size as the bait window regions - A single non-bait window covers all the space between the last window of one bait region and the first window of the next. 66 | 67 | - The `treatments` line gives the two classes of data that `atacR` understands you have, here `mock` and `infected`. 68 | - The `samples` line gives the samples and replicate information 69 | - The `Bait regions used` line gives the bait region count 70 | - The `Total Windows` line tells how many windows those baits are divided into. 71 | - The `On/Off target read counts` section tells how many reads are in the windows (`on_target`) and how many are outside (`off_target`) for each sample 72 | - The `Quantiles` section shows the read count at each quantile for each sample in the windows in bait regions or non-bait regions 73 | - The `Read depths` section shows the `on_target` and `off_target` region average read depths. 74 | 75 | As we can see the coverage in this small sample is relatively low - that's an artefact of small files to keep the tutorial running quite quickly. But most windows have an average of ~ 10 counts and the off-target reads are very low. < 1 %. 76 | 77 | ### Summary and QC plots 78 | 79 | The `atacR` package has a range of summary and QC plots for visualising different aspects of the data. 80 | 81 | The samples can be inspected through plots. The standard `plot` function creates a few summary style plots enabling you to view coverage distribution and region density. As it summarises windows, the more windows you have, the slower it runs! 82 | 83 | ```{r plot_plot, cache=TRUE, eval=TRUE} 84 | plot(counts) 85 | ``` 86 | 87 | A coverage threshold plot can reveal the number of windows that have coverage lower than a given value. 88 | 89 | ```{r coverage_threshold, eval=TRUE} 90 | windows_below_coverage_threshold_plot(counts) 91 | ``` 92 | Here we can see that `mock_rep1` and `mock_rep3` have fewer windows below the coverage threshold, so are generally better covered. 93 | 94 | We can see from all these that although the read mapping and filtering is specific to the quite a lot of windows (~ 2000 in each sample) have counts of 0, which indicates that some of the DNA in the sequence regions was not sampled. You may wish to play with window size settings to see how robust this phenomenon is to window size. Increasing the window size will likely reduce the zero count windows number by merging counts from adjacent windows. Decreasing the window size will likely increase zero count windows. The level of granularity you use will be study dependent and if you intend to conclude absence of counts (e.g for detection of closed chromatin) then you'll want to be very careful with comparison to specific control windows to make that comparison. 95 | 96 | ### Specific window counts 97 | 98 | You can examine specific window counts quite easily. The internal object holding the data is of class `SummarizedExperiment`, which is part of BioConductor, so you can use functions in standard BioConductor packages to interrogate them. Here's how you might get information on specific window counts. 99 | 100 | First you must create a region of interest. Use (GenomicRanges)[https://bioconductor.org/packages/release/bioc/html/GenomicRanges.html] package to do this. 101 | 102 | ```{r, eval=TRUE} 103 | roi <- GenomicRanges::GRanges(seqnames = "Chr1", ranges = 245951:246250) 104 | ``` 105 | 106 | Next, subset the window set of interest with (IRanges)[https://bioconductor.org/packages/release/bioc/html/IRanges.html] 107 | 108 | ```{r, eval=TRUE} 109 | small_section <- IRanges::subsetByOverlaps(counts$bait_windows, roi) 110 | ``` 111 | 112 | The resulting object is a [SummarizedExperiment](https://www.bioconductor.org/packages/devel/bioc/vignettes/SummarizedExperiment/inst/doc/SummarizedExperiment.html) which doesn't print literally, as it can be quite big. To get at the actual count matrix, use the `assay` function. 113 | 114 | ```{r, eval=TRUE} 115 | SummarizedExperiment::assay(small_section) 116 | ``` 117 | 118 | #### Sample reproducibility 119 | 120 | A PCA plot can be used to examine the similarity between the different samples. Here we can see that two of the infected replicates are way off from each other and the other more similar samples. You can use these plots to identify any samples that are extremely different from the others. As all of the infected samples are quite different in different ways, we may be seeing just a large amount of experimental variability in our results, which can be important too. So we'll proceed with the data, keeping in mind that variability may be large and for particular treatments, we may need to gather more replicates. 121 | 122 | ```{r pca_plot, cache =TRUE, eval = TRUE} 123 | sample_pca_plot(counts) 124 | ``` 125 | 126 | An MA plot can show you eccentricities in each sample (See the wiki page for more information)[https://en.wikipedia.org/wiki/MA_plot]. In the `atacR` MA plot a common reference is used, the median value for a windows as a common denominator for sample. 127 | 128 | ```{r ma_plot, eval=TRUE} 129 | ma_plot(counts) 130 | ``` 131 | 132 | In this MA plot we see some structure in the data, the strong lines in each subplot indicate the points with zero for the count. The infected overall show higer counts than the mock. The usual assumption of most windows not changing between samples may not hold, here as the clouds of points seem quite shifted between mock and infected. 133 | 134 | ## Normalisation 135 | 136 | The normalisation step helps us to reduce systematic between-sample variability. Sequence data are hard to normalise, and cannot be normalised well by simple scaling. For RNASeq data there are numerous methods such as FPKM etc that sort of normalise. The best approaches with ATAC-cap-seq data are to find the least varying windows, then calculate factors and use those to scale the rest of the data with. 137 | 138 | `atacR` provides three types of normalisation. These are 139 | 140 | 1. Library size 141 | 2. Scale factor 142 | 3. Goodness of Fit 143 | 144 | The best of these is 3. Goodness of Fit. It is fast, automatically finds the least varying and best features in the data to normalise with and does a reasonable job of between-sample normalisation. It is usually the best one to choose. It is particularly useful when you don't know whether many windows will be changing or just a few will be, as it should perform the same regardless. 145 | 146 | The Library size normalisation is the most basic and the one that most studies seem to use for normalisation - the basis of this is that each count is divided by the mean count for all samples in that treatment the sample. For most ATAC purposes this will be underpowered, because the low number of windows or high proportions of changing windows will cause skew between samples. This method useful when you have reasonably high counts (> 20 mean) and you are certain few windows (< 10%) will display differential counts. 147 | 148 | The Scale factor normalisation is provided to allow interaction with other normalisation from other packages. With this you provide a number for each sample and the counts in each sample are divided by the respective number. It is only useful when you have some other method that generates factors that you wish to use to scale counts. 149 | 150 | Check out the [normalisations vignette](normalisations.html) for further information. 151 | 152 | ### Goodness of Fit normalisation 153 | 154 | Here we'll run Goodness of Fit (GoF) on the sample data. First step is to run the GoF code and find the most stable windows across the samples to use to normalise. 155 | 156 | ```{r, GoF, eval = TRUE} 157 | auto_controls <- find_controls_by_GoF(counts) 158 | ``` 159 | 160 | We can use these to check the selected control windows have lower GoF than the non-selected windows using the `plot_GoF()` function 161 | 162 | ```{r, eval = TRUE} 163 | plot_GoF(counts, controls = auto_controls) 164 | ``` 165 | They are better. They have a lower, spikier mean Goodness of Fit. The Non-control data has a long tail distribution so the difference is quite pronounced. So we can use now generate the normalisation factors and apply them. We'll save the resulting information to a new slot in the counts object. Then we'll plot the pre- and post- normalised data to see the effects of the normalisation 166 | 167 | ```{r} 168 | gof_norm_factors <- get_GoF_factors(counts) 169 | 170 | gof_normalised_counts <- scale_factor_normalise(counts, 171 | scaling_factors = gof_norm_factors) 172 | 173 | counts$normalised_counts <- gof_normalised_counts 174 | 175 | 176 | plot_counts(counts) 177 | plot_counts(counts, which = "normalised_counts") 178 | ma_plot(counts) 179 | ma_plot(counts, which = "normalised_counts") 180 | 181 | ``` 182 | 183 | We can see that the distributions get a little closer to each other and that the spread in the data in MA plots is reduced a little. The variability in these data are quite high though. See the [normalisations vignette](normalisations.html) for further discussion. 184 | 185 | ## Differential window counts 186 | 187 | Once you are happy with the normalisation, you can try to estimate which windows have differential counts. `atacR` gives you three methods. 188 | 189 | 1. edgeR exact test - this is a wrapper around the edgeR method for single factor designs, using the `estimateDispersion` method. This method was designed for genome wide studies so works best when only a few of the (~5 %) of the windows are expected to have differential counts. It is the most sensitive in this situation though. 190 | 2. bootstrap _t_ test - this is a brute force method that uses resampling of each windows sample counts and recalculating of the Student's _t_ statistic to come up with a background distribution of _t_. If the observed _t_ is at the edges of this distribution, differential counts are called. This method is useful when any number of the windows may show differential counts. 191 | 3. Bayes Factor test, this calculates the [Bayes factor](http://bayesfactor.blogspot.co.uk/2014/02/the-bayesfactor-package-this-blog-is.html) for each window. The ratio of the Bayes factor for control and test is returned on a window by window basis. If the ratio is over a given number (4 by default) a differential count is called. This method is useful when any number of the windows may show differential counts. 192 | 193 | See the [differential windows vignette](differential_windows.html) for further discussion 194 | 195 | We can perform differential analysis in the following ways, we'll use the `which` argument (see [which vignette](atacr_which.html) ) to make sure we analyse the normalised counts. 196 | 197 | ```{r differential, eval = TRUE} 198 | 199 | 200 | edgeRexact_result <- edgeR_exact(counts, 201 | which = "bait_windows", 202 | treatment_a = "infected", 203 | treatment_b = "mock", 204 | remove_zeros = TRUE) 205 | 206 | bootstrap_result <- estimate_fdr(counts, 207 | which = "normalised_counts", 208 | treatment_a = "infected", 209 | treatment_b = "mock", 210 | iterations = 10 211 | ) 212 | 213 | bayesfactor_result <- estimate_bayes_factor(counts, 214 | treatment_a = "infected", 215 | treatment_b = "mock", 216 | which = "normalised_counts" 217 | ) 218 | 219 | ``` 220 | 221 | The resulting dataframe holds the result of these calculations 222 | 223 | ```{r} 224 | 225 | head(bootstrap_result) 226 | 227 | head(bayesfactor_result) 228 | 229 | head(edgeRexact_result) 230 | ``` 231 | 232 | 233 | Each of these methods works on single factor designs, there is a `multiclass` variant that works on common control designs. See the [differential windows vignette](differential_windows.html) for further discussion of these. 234 | 235 | ```{r} 236 | bf_multi <- estimate_bayes_factor_multiclass(counts, "mock", 237 | factor = 0.5, 238 | which = "normalised_counts" 239 | ) 240 | 241 | head(bf_multi) 242 | 243 | fdr_multi <- estimate_fdr_multiclass(counts, "mock", 244 | fdr_level = 0.05, 245 | which = "normalised_counts" 246 | ) 247 | 248 | head(fdr_multi) 249 | 250 | ``` 251 | 252 | The `edgeR_multiclass()` function does not return a dataframe, instead it returns the native `DGELRT` objects (see [the DGELRT manual](https://www.rdocumentation.org/packages/edgeR/versions/3.14.0/topics/DGELRT-class) for more information) from each comparison in a `list()` object with names as per the treatment used. 253 | 254 | ```{r} 255 | edgeR_multiclass(counts,"mock", 256 | remove_zeros = TRUE, 257 | which = "bait_windows") 258 | ``` 259 | 260 | --------------------------------------------------------------------------------