├── LICENSE
├── docs
    ├── tutorial_cache
    │   └── html
    │   │   ├── __packages
    │   │   ├── pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.rdb
    │   │   ├── plot_plot_24f20174733d96dba5249a7d35c2c80c.rdb
    │   │   ├── pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.rdx
    │   │   ├── pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.RData
    │   │   ├── plot_plot_24f20174733d96dba5249a7d35c2c80c.rdx
    │   │   └── plot_plot_24f20174733d96dba5249a7d35c2c80c.RData
    ├── tutorial_files
    │   └── figure-html
    │   │   ├── ma_plot-1.png
    │   │   ├── pca_plot-1.png
    │   │   ├── plot_plot-1.png
    │   │   ├── unnamed-chunk-4-1.png
    │   │   ├── unnamed-chunk-5-1.png
    │   │   ├── unnamed-chunk-7-1.png
    │   │   ├── unnamed-chunk-8-1.png
    │   │   ├── unnamed-chunk-8-2.png
    │   │   ├── unnamed-chunk-8-3.png
    │   │   ├── unnamed-chunk-8-4.png
    │   │   ├── unnamed-chunk-9-1.png
    │   │   ├── unnamed-chunk-9-2.png
    │   │   ├── unnamed-chunk-9-3.png
    │   │   ├── unnamed-chunk-9-4.png
    │   │   └── coverage_threshold-1.png
    ├── atacr_which.html
    ├── differential_windows.html
    └── tutorial.Rmd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── a1_smallSorted.bam
    │   ├── a2_smallSorted.bam
    │   ├── b1_smallSorted.bam
    │   ├── b2_smallSorted.bam
    │   ├── a1_smallSorted.bam.bai
    │   ├── a2_smallSorted.bam.bai
    │   ├── b1_smallSorted.bam.bai
    │   ├── b2_smallSorted.bam.bai
    │   ├── bait_genes.gff
    │   ├── sample_treatment_bam_mappings_for_test.csv
    │   ├── helper-functions.R
    │   ├── test_methods.R
    │   ├── test_differentials.R
    │   ├── test_normalisation.R
    │   ├── control_windows.txt
    │   ├── test_atacr.R
    │   └── test_loading.R
├── data
    ├── sim_counts.rda
    ├── small_counts.rda
    └── athal_wt_counts.rda
├── .gitignore
├── README-unnamed-chunk-2-1.png
├── .Rbuildignore
├── inst
    └── extdata
    │   ├── ATAC102
    │       ├── alignedSorted.bam
    │       └── alignedSorted.bam.bai
    │   ├── ATAC103
    │       ├── alignedSorted.bam
    │       └── alignedSorted.bam.bai
    │   ├── ATAC202
    │       ├── alignedSorted.bam
    │       └── alignedSorted.bam.bai
    │   ├── ATAC203
    │       ├── alignedSorted.bam
    │       └── alignedSorted.bam.bai
    │   ├── ATAC302
    │       ├── alignedSorted.bam
    │       └── alignedSorted.bam.bai
    │   ├── ATAC303
    │       ├── alignedSorted.bam
    │       └── alignedSorted.bam.bai
    │   └── tutorial_mappings.csv
├── man
    ├── simulate_counts.Rd
    ├── qqarb.Rd
    ├── treatments.Rd
    ├── make_tutorial_data.Rd
    ├── sample_pca_plot.Rd
    ├── print.atacr.Rd
    ├── gof.Rd
    ├── plot.atacr.Rd
    ├── ma_data.Rd
    ├── summary.atacr.Rd
    ├── as.data.frame.atacr.Rd
    ├── library_size_scaling_factors.Rd
    ├── library_size_normalisation_internal.Rd
    ├── make_csaw_params.Rd
    ├── get_t.Rd
    ├── get_bait_regions_from_gff.Rd
    ├── make_corrplot.Rd
    ├── assay_matrix_to_df.Rd
    ├── read_experiment_info.Rd
    ├── target_count_summary.Rd
    ├── Est.Depth.Rd
    ├── coverage_count_summary.Rd
    ├── target_count_coverage.Rd
    ├── as.matrix.atacr.Rd
    ├── median_virtual_experiment.Rd
    ├── as.DGEList.Rd
    ├── get_bait_regions_from_text.Rd
    ├── plot_counts.Rd
    ├── chromosome_coverage.Rd
    ├── small_counts.Rd
    ├── make_scanBamParam.Rd
    ├── text_to_gff.Rd
    ├── bootstrap_t.Rd
    ├── plot_GoF.Rd
    ├── sample_kmeans_cluster.Rd
    ├── get_expected_values.Rd
    ├── select_data.Rd
    ├── get_GoF_factors.Rd
    ├── estimate_GoFs.Rd
    ├── make_params.Rd
    ├── sample_correlation_plot.Rd
    ├── calc_quantiles.Rd
    ├── control_window_normalise_internal.Rd
    ├── view_gene.Rd
    ├── control_window_scaling_factors.Rd
    ├── observed_expected_bins.Rd
    ├── plot_count_by_chromosome.Rd
    ├── coverage_summary.Rd
    ├── ma_plot.Rd
    ├── estimate_bayes_factor.Rd
    ├── estimate_bayes_factor_multiclass.Rd
    ├── estimate_fdr.Rd
    ├── scale_factor_normalise.Rd
    ├── count_windows_under_threshold.Rd
    ├── edgeR_exact.Rd
    ├── make_UpSetR.Rd
    ├── estimate_fdr_multiclass.Rd
    ├── find_controls_by_GoF.Rd
    ├── edgeR_multiclass.Rd
    ├── windows_below_coverage_threshold_plot.Rd
    ├── extract_features_from_gff.Rd
    ├── load_atac.Rd
    ├── athal_wt_counts.Rd
    ├── load_rnaseq.Rd
    ├── normalise_by_window_width.Rd
    ├── library_size_normalisation.Rd
    ├── control_window_normalise.Rd
    ├── sim_counts.Rd
    └── make_counts.Rd
├── .travis.yml
├── atacr.Rproj
├── DESCRIPTION
├── NAMESPACE
├── vignettes
    ├── atacr_which.Rmd
    ├── summaries.Rmd
    ├── atacr.Rmd
    ├── normalisations.Rmd
    ├── differential_windows.Rmd
    └── loading.Rmd
├── README.Rmd
├── R
    ├── sims.R
    ├── methods.R
    ├── atacr.R
    ├── normalisation.R
    ├── loading.R
    └── differentials.R
└── README.md


/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2017
2 | COPYRIGHT HOLDER: Dan MacLean
3 | 


--------------------------------------------------------------------------------
/docs/tutorial_cache/html/__packages:
--------------------------------------------------------------------------------
1 | base
2 | atacr
3 | bindrcpp
4 | 


--------------------------------------------------------------------------------
/docs/tutorial_cache/html/pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.rdb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/tutorial_cache/html/plot_plot_24f20174733d96dba5249a7d35c2c80c.rdb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(atacr)
3 | 
4 | test_check("atacr")
5 | 


--------------------------------------------------------------------------------
/data/sim_counts.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/data/sim_counts.rda


--------------------------------------------------------------------------------
/data/small_counts.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/data/small_counts.rda


--------------------------------------------------------------------------------
/data/athal_wt_counts.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/data/athal_wt_counts.rda


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | inst/doc
6 | atacr.Rproj
7 | *.DS_Store
8 | 


--------------------------------------------------------------------------------
/README-unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/README-unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/tests/testthat/a1_smallSorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/a1_smallSorted.bam


--------------------------------------------------------------------------------
/tests/testthat/a2_smallSorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/a2_smallSorted.bam


--------------------------------------------------------------------------------
/tests/testthat/b1_smallSorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/b1_smallSorted.bam


--------------------------------------------------------------------------------
/tests/testthat/b2_smallSorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/b2_smallSorted.bam


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^\.Rproj\.user$
2 | ^\.travis\.yml$
3 | ^.*\.Rproj$
4 | ^README\.Rmd$
5 | ^README-.*\.png$
6 | TODO
7 | docs/
8 | 


--------------------------------------------------------------------------------
/tests/testthat/a1_smallSorted.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/a1_smallSorted.bam.bai


--------------------------------------------------------------------------------
/tests/testthat/a2_smallSorted.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/a2_smallSorted.bam.bai


--------------------------------------------------------------------------------
/tests/testthat/b1_smallSorted.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/b1_smallSorted.bam.bai


--------------------------------------------------------------------------------
/tests/testthat/b2_smallSorted.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/tests/testthat/b2_smallSorted.bam.bai


--------------------------------------------------------------------------------
/inst/extdata/ATAC102/alignedSorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC102/alignedSorted.bam


--------------------------------------------------------------------------------
/inst/extdata/ATAC103/alignedSorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC103/alignedSorted.bam


--------------------------------------------------------------------------------
/inst/extdata/ATAC202/alignedSorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC202/alignedSorted.bam


--------------------------------------------------------------------------------
/inst/extdata/ATAC203/alignedSorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC203/alignedSorted.bam


--------------------------------------------------------------------------------
/inst/extdata/ATAC302/alignedSorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC302/alignedSorted.bam


--------------------------------------------------------------------------------
/inst/extdata/ATAC303/alignedSorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC303/alignedSorted.bam


--------------------------------------------------------------------------------
/tests/testthat/bait_genes.gff:
--------------------------------------------------------------------------------
1 | Chr1	manual	gene	246000	246200	.	+	.	ID=FakeGeneA
2 | Chr1	manual	gene	246700	247000	.	+	.	ID=FakeGeneB


--------------------------------------------------------------------------------
/inst/extdata/ATAC102/alignedSorted.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC102/alignedSorted.bam.bai


--------------------------------------------------------------------------------
/inst/extdata/ATAC103/alignedSorted.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC103/alignedSorted.bam.bai


--------------------------------------------------------------------------------
/inst/extdata/ATAC202/alignedSorted.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC202/alignedSorted.bam.bai


--------------------------------------------------------------------------------
/inst/extdata/ATAC203/alignedSorted.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC203/alignedSorted.bam.bai


--------------------------------------------------------------------------------
/inst/extdata/ATAC302/alignedSorted.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC302/alignedSorted.bam.bai


--------------------------------------------------------------------------------
/inst/extdata/ATAC303/alignedSorted.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/inst/extdata/ATAC303/alignedSorted.bam.bai


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/ma_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/ma_plot-1.png


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/pca_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/pca_plot-1.png


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/plot_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/plot_plot-1.png


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/unnamed-chunk-8-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-8-2.png


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/unnamed-chunk-8-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-8-3.png


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/unnamed-chunk-8-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-8-4.png


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/unnamed-chunk-9-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-9-2.png


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/unnamed-chunk-9-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-9-3.png


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/unnamed-chunk-9-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/unnamed-chunk-9-4.png


--------------------------------------------------------------------------------
/docs/tutorial_files/figure-html/coverage_threshold-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_files/figure-html/coverage_threshold-1.png


--------------------------------------------------------------------------------
/docs/tutorial_cache/html/pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_cache/html/pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.rdx


--------------------------------------------------------------------------------
/docs/tutorial_cache/html/pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_cache/html/pca_plot_d5010f9ca43af72d0bc587807a7bf3d9.RData


--------------------------------------------------------------------------------
/docs/tutorial_cache/html/plot_plot_24f20174733d96dba5249a7d35c2c80c.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_cache/html/plot_plot_24f20174733d96dba5249a7d35c2c80c.rdx


--------------------------------------------------------------------------------
/docs/tutorial_cache/html/plot_plot_24f20174733d96dba5249a7d35c2c80c.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeamMacLean/atacr/HEAD/docs/tutorial_cache/html/plot_plot_24f20174733d96dba5249a7d35c2c80c.RData


--------------------------------------------------------------------------------
/tests/testthat/sample_treatment_bam_mappings_for_test.csv:
--------------------------------------------------------------------------------
1 | treatment,sample_name,bam_file_path
2 | test,test_1,a1_smallSorted.bam
3 | test,test_2,a2_smallSorted.bam
4 | control,control_1,b1_smallSorted.bam
5 | control,control_2,b2_smallSorted.bam
6 | 


--------------------------------------------------------------------------------
/man/simulate_counts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sims.R
 3 | \name{simulate_counts}
 4 | \alias{simulate_counts}
 5 | \title{simulate counts and return an atacr object}
 6 | \usage{
 7 | simulate_counts()
 8 | }
 9 | \description{
10 | simulate counts and return an atacr object
11 | }
12 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-functions.R:
--------------------------------------------------------------------------------
 1 | 
 2 | expect_vectors_equal <- function(a,b){
 3 |   if (sum(a %in% b) == length(a) & length(setdiff(a,b)) == 0){
 4 |     return(TRUE)
 5 |   }else{
 6 |     return(FALSE)
 7 |   }
 8 | }
 9 | 
10 | expect_has_all_and_only_these_members <- function(l, v){
11 |   return(expect_vectors_equal(names(l), v))
12 | }
13 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
 2 | 
 3 | language: R
 4 | sudo: false
 5 | cache: packages
 6 | r: bioc-release
 7 | r_packages:
 8 |   - covr
 9 | 
10 | warnings_are_errors: false
11 | 
12 | after_success:
13 |   - Rscript -e 'library(covr); codecov()'
14 | 
15 | before_script:
16 |   - echo "BiocParallel::register(BiocParallel::SerialParam())" > ~/.Rprofile
17 | 


--------------------------------------------------------------------------------
/man/qqarb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/figures.R
 3 | \name{qqarb}
 4 | \alias{qqarb}
 5 | \title{Named distribution qqplot}
 6 | \usage{
 7 | qqarb(obs, dist = "norm")
 8 | }
 9 | \arguments{
10 | \item{obs}{observed values}
11 | 
12 | \item{dist}{expected distribution}
13 | }
14 | \value{
15 | ggplot2 object
16 | }
17 | \description{
18 | Named distribution qqplot
19 | }
20 | 


--------------------------------------------------------------------------------
/man/treatments.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalisation.R
 3 | \name{treatments}
 4 | \alias{treatments}
 5 | \title{return list of treatment names}
 6 | \usage{
 7 | treatments(data)
 8 | }
 9 | \arguments{
10 | \item{data}{an atacr object}
11 | }
12 | \value{
13 | char vector of unique treatment names
14 | }
15 | \description{
16 | return list of treatment names
17 | }
18 | 


--------------------------------------------------------------------------------
/atacr.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | 


--------------------------------------------------------------------------------
/man/make_tutorial_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading.R
 3 | \name{make_tutorial_data}
 4 | \alias{make_tutorial_data}
 5 | \title{make data to follow on in the tutorial}
 6 | \usage{
 7 | make_tutorial_data(write_dir = getwd())
 8 | }
 9 | \arguments{
10 | \item{write_dir}{directory to put sample files in defaults to `getwd()`}
11 | }
12 | \description{
13 | make data to follow on in the tutorial
14 | }
15 | 


--------------------------------------------------------------------------------
/inst/extdata/tutorial_mappings.csv:
--------------------------------------------------------------------------------
1 | treatment,sample_name,bam_file_path
2 | 4h_mock,4h_mock_rep1,inst/extdata/ATAC102/alignedSorted.bam
3 | 4h_mock,4h_mock_rep2,inst/extdata/ATAC202/alignedSorted.bam
4 | 4h_mock,4h_mock_rep3,inst/extdata/ATAC302/alignedSorted.bam
5 | 4h_infected,4h_infected_rep1,inst/extdata/ATAC103/alignedSorted.bam
6 | 4h_infected,4h_infected_rep2,inst/extdata/ATAC203/alignedSorted.bam
7 | 4h_infected,4h_infected_rep3,inst/extdata/ATAC303/alignedSorted.bam
8 | 


--------------------------------------------------------------------------------
/man/sample_pca_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/figures.R
 3 | \name{sample_pca_plot}
 4 | \alias{sample_pca_plot}
 5 | \title{PCA plot of samples}
 6 | \usage{
 7 | sample_pca_plot(data, which = "bait_windows")
 8 | }
 9 | \arguments{
10 | \item{data}{atacr object}
11 | 
12 | \item{which}{the subset of the data to plot}
13 | }
14 | \value{
15 | ggplot object
16 | }
17 | \description{
18 | PCA plot of samples
19 | }
20 | 


--------------------------------------------------------------------------------
/man/print.atacr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/methods.R
 3 | \name{print.atacr}
 4 | \alias{print.atacr}
 5 | \title{writes a summary of the metadata for a given atacr object}
 6 | \usage{
 7 | \method{print}{atacr}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{an atacr object}
11 | 
12 | \item{\dots}{other options for print generic}
13 | }
14 | \description{
15 | writes a summary of the metadata for a given atacr object
16 | }
17 | 


--------------------------------------------------------------------------------
/man/gof.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalisation.R
 3 | \name{gof}
 4 | \alias{gof}
 5 | \title{estimates Goodness of Fit for each row in a count matrix}
 6 | \usage{
 7 | gof(mat)
 8 | }
 9 | \arguments{
10 | \item{mat}{a count matrix usually from SummarizedExperiment::assay()}
11 | }
12 | \value{
13 | a named vector of GoF estimates
14 | }
15 | \description{
16 | estimates Goodness of Fit for each row in a count matrix
17 | }
18 | 


--------------------------------------------------------------------------------
/man/plot.atacr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/methods.R
 3 | \name{plot.atacr}
 4 | \alias{plot.atacr}
 5 | \title{returns summary plot of data in atacr object}
 6 | \usage{
 7 | \method{plot}{atacr}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{atacr object}
11 | 
12 | \item{\dots}{extra options for generic}
13 | }
14 | \value{
15 | gridExtra plot
16 | }
17 | \description{
18 | returns summary plot of data in atacr object
19 | }
20 | 


--------------------------------------------------------------------------------
/man/ma_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/figures.R
 3 | \name{ma_data}
 4 | \alias{ma_data}
 5 | \title{adds an 'm' and an 'a' column to an assay matrix dataframe for ma plots}
 6 | \usage{
 7 | ma_data(sample_matrix)
 8 | }
 9 | \arguments{
10 | \item{sample_matrix}{a SummarizedExperiment::assay from which to make the MA plot}
11 | }
12 | \description{
13 | adds an 'm' and an 'a' column to an assay matrix dataframe for ma plots
14 | }
15 | 


--------------------------------------------------------------------------------
/man/summary.atacr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/methods.R
 3 | \name{summary.atacr}
 4 | \alias{summary.atacr}
 5 | \title{writes a detailed data summary of the atacr object}
 6 | \usage{
 7 | \method{summary}{atacr}(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{an atacr object}
11 | 
12 | \item{\dots}{other options for summary generic}
13 | }
14 | \description{
15 | writes a detailed data summary of the atacr object
16 | }
17 | 


--------------------------------------------------------------------------------
/man/as.data.frame.atacr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/methods.R
 3 | \name{as.data.frame.atacr}
 4 | \alias{as.data.frame.atacr}
 5 | \title{returns dataframe of data in atacr object}
 6 | \usage{
 7 | \method{as.data.frame}{atacr}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{object to print}
11 | 
12 | \item{\dots}{other options for generic}
13 | }
14 | \value{
15 | dataframe
16 | }
17 | \description{
18 | returns dataframe of data in atacr object
19 | }
20 | 


--------------------------------------------------------------------------------
/man/library_size_scaling_factors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalisation.R
 3 | \name{library_size_scaling_factors}
 4 | \alias{library_size_scaling_factors}
 5 | \title{calculate scaling factors for library size}
 6 | \usage{
 7 | library_size_scaling_factors(se)
 8 | }
 9 | \arguments{
10 | \item{se}{a SummarizedExperiment object such as 'bait_windows' from atacr::make_counts()}
11 | }
12 | \description{
13 | calculate scaling factors for library size
14 | }
15 | 


--------------------------------------------------------------------------------
/man/library_size_normalisation_internal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalisation.R
 3 | \name{library_size_normalisation_internal}
 4 | \alias{library_size_normalisation_internal}
 5 | \title{do a library size normalisation}
 6 | \usage{
 7 | library_size_normalisation_internal(se)
 8 | }
 9 | \arguments{
10 | \item{se}{a SummarizedExperiment object such as 'bait_windows' from atacr::make_counts()}
11 | }
12 | \description{
13 | do a library size normalisation
14 | }
15 | 


--------------------------------------------------------------------------------
/man/make_csaw_params.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading.R
 3 | \name{make_csaw_params}
 4 | \alias{make_csaw_params}
 5 | \title{format a csaw::readParam object from the atacr::make_params() object}
 6 | \usage{
 7 | make_csaw_params(p)
 8 | }
 9 | \arguments{
10 | \item{p}{an object returned from atacr::make_params()}
11 | }
12 | \value{
13 | a csaw::readParam object
14 | }
15 | \description{
16 | format a csaw::readParam object from the atacr::make_params() object
17 | }
18 | 


--------------------------------------------------------------------------------
/man/get_t.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differentials.R
 3 | \name{get_t}
 4 | \alias{get_t}
 5 | \title{gets t-statistic for two vectors of data, x and y}
 6 | \usage{
 7 | get_t(data, indices)
 8 | }
 9 | \arguments{
10 | \item{data}{matrix of sample data}
11 | 
12 | \item{indices}{indices selected by boot::boot}
13 | }
14 | \value{
15 | t the t statistic from Student's t-test or NA if error
16 | }
17 | \description{
18 | gets t-statistic for two vectors of data, x and y
19 | }
20 | 


--------------------------------------------------------------------------------
/man/get_bait_regions_from_gff.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading.R
 3 | \name{get_bait_regions_from_gff}
 4 | \alias{get_bait_regions_from_gff}
 5 | \title{reads a gff file containing the bait regions}
 6 | \usage{
 7 | get_bait_regions_from_gff(file_name)
 8 | }
 9 | \arguments{
10 | \item{file_name}{path to the file containing the bait regions}
11 | }
12 | \value{
13 | GenomicRanges object of bait regions
14 | }
15 | \description{
16 | reads a gff file containing the bait regions
17 | }
18 | 


--------------------------------------------------------------------------------
/man/make_corrplot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/figures.R
 3 | \name{make_corrplot}
 4 | \alias{make_corrplot}
 5 | \title{generate corrplot from matrix of counts}
 6 | \usage{
 7 | make_corrplot(counts, method = "pearson")
 8 | }
 9 | \arguments{
10 | \item{counts}{a matrix of counts}
11 | 
12 | \item{method}{the correlation method to use, any supported by `cor()` is useable}
13 | }
14 | \value{
15 | ggplot2 plot
16 | }
17 | \description{
18 | generate corrplot from matrix of counts
19 | }
20 | 


--------------------------------------------------------------------------------
/man/assay_matrix_to_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/figures.R
 3 | \name{assay_matrix_to_df}
 4 | \alias{assay_matrix_to_df}
 5 | \title{converts SummarizedExperiment::assay matrix to a dataframe with cols 'window', 'sample' and 'count}
 6 | \usage{
 7 | assay_matrix_to_df(matrix)
 8 | }
 9 | \arguments{
10 | \item{matrix}{a SummarizedExperiment::assay matrix}
11 | }
12 | \description{
13 | converts SummarizedExperiment::assay matrix to a dataframe with cols 'window', 'sample' and 'count
14 | }
15 | 


--------------------------------------------------------------------------------
/man/read_experiment_info.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading.R
 3 | \name{read_experiment_info}
 4 | \alias{read_experiment_info}
 5 | \title{Loads in a CSV file describing treatment, samples and bam files}
 6 | \usage{
 7 | read_experiment_info(filename, should_be = c("treatment", "sample_name",
 8 |   "bam_file_path"))
 9 | }
10 | \arguments{
11 | \item{filename}{path and name of the file to load}
12 | }
13 | \description{
14 | Loads in a CSV file describing treatment, samples and bam files
15 | }
16 | 


--------------------------------------------------------------------------------
/man/target_count_summary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/atacr.R
 3 | \name{target_count_summary}
 4 | \alias{target_count_summary}
 5 | \title{Get a summary of reads hitting the bait and non bait windows}
 6 | \usage{
 7 | target_count_summary(data)
 8 | }
 9 | \arguments{
10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | }
12 | \value{
13 | a table of on target and off target read counts
14 | }
15 | \description{
16 | Get a summary of reads hitting the bait and non bait windows
17 | }
18 | 


--------------------------------------------------------------------------------
/man/Est.Depth.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalisation.R
 3 | \name{Est.Depth}
 4 | \alias{Est.Depth}
 5 | \title{Depth estimation, directly from https://github.com/cran/PoissonSeq/blob/master/R/ps_cmeans.R}
 6 | \usage{
 7 | Est.Depth(n, iter = 5)
 8 | }
 9 | \arguments{
10 | \item{n}{a matrix}
11 | 
12 | \item{iter, }{runs of the Depth finder.}
13 | }
14 | \value{
15 | list of depths and means
16 | }
17 | \description{
18 | Depth estimation, directly from https://github.com/cran/PoissonSeq/blob/master/R/ps_cmeans.R
19 | }
20 | 


--------------------------------------------------------------------------------
/man/coverage_count_summary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/atacr.R
 3 | \name{coverage_count_summary}
 4 | \alias{coverage_count_summary}
 5 | \title{Get a summary of depth of coverage in the bait and non bait windows}
 6 | \usage{
 7 | coverage_count_summary(data)
 8 | }
 9 | \arguments{
10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | }
12 | \value{
13 | a table of on target and off target mean depths
14 | }
15 | \description{
16 | Get a summary of depth of coverage in the bait and non bait windows
17 | }
18 | 


--------------------------------------------------------------------------------
/man/target_count_coverage.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/atacr.R
 3 | \name{target_count_coverage}
 4 | \alias{target_count_coverage}
 5 | \title{Read count and mean coverage hitting the bait and non bait windows}
 6 | \usage{
 7 | target_count_coverage(data)
 8 | }
 9 | \arguments{
10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | }
12 | \value{
13 | a dataframe of on target and off target read counts
14 | }
15 | \description{
16 | Read count and mean coverage hitting the bait and non bait windows
17 | }
18 | 


--------------------------------------------------------------------------------
/man/as.matrix.atacr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/methods.R
 3 | \name{as.matrix.atacr}
 4 | \alias{as.matrix.atacr}
 5 | \title{returns given subset of data in atacr object as a matrix}
 6 | \usage{
 7 | \method{as.matrix}{atacr}(x, ..., which = "bait_windows")
 8 | }
 9 | \arguments{
10 | \item{x}{an atacr object}
11 | 
12 | \item{\dots}{other options for generic}
13 | 
14 | \item{which}{the subset of data to work on}
15 | }
16 | \value{
17 | matrix of counts in subset
18 | }
19 | \description{
20 | returns given subset of data in atacr object as a matrix
21 | }
22 | 


--------------------------------------------------------------------------------
/man/median_virtual_experiment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/atacr.R
 3 | \name{median_virtual_experiment}
 4 | \alias{median_virtual_experiment}
 5 | \title{a median of window values across all samples in a vector, for ma plots}
 6 | \usage{
 7 | median_virtual_experiment(sample_matrix)
 8 | }
 9 | \arguments{
10 | \item{sample_matrix}{counts extracted from a SummarizedExperiment object}
11 | }
12 | \value{
13 | the median of the provided counts, columnwise
14 | }
15 | \description{
16 | a median of window values across all samples in a vector, for ma plots
17 | }
18 | 


--------------------------------------------------------------------------------
/man/as.DGEList.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading.R
 3 | \name{as.DGEList}
 4 | \alias{as.DGEList}
 5 | \title{returns DGEList for edgeR from atacr object}
 6 | \usage{
 7 | as.DGEList(atacr, which = "bait_windows", remove.zeros = FALSE)
 8 | }
 9 | \arguments{
10 | \item{atacr}{an atacr object}
11 | 
12 | \item{which}{the subset of the data to work on}
13 | 
14 | \item{remove.zeros}{whether to remove rows that have 0 total count.}
15 | }
16 | \value{
17 | DGEList representing atacr data
18 | }
19 | \description{
20 | returns DGEList for edgeR from atacr object
21 | }
22 | 


--------------------------------------------------------------------------------
/man/get_bait_regions_from_text.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading.R
 3 | \name{get_bait_regions_from_text}
 4 | \alias{get_bait_regions_from_text}
 5 | \title{reads a csv file containing the bait regions}
 6 | \usage{
 7 | get_bait_regions_from_text(file_name)
 8 | }
 9 | \arguments{
10 | \item{file_name}{path to a csv file containing the bait regions. File must have a header with columns `bait_name`, `seq_name`, `start`, `end`.}
11 | }
12 | \value{
13 | GenomicRanges object of bait regions
14 | }
15 | \description{
16 | reads a csv file containing the bait regions
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/testthat/test_methods.R:
--------------------------------------------------------------------------------
 1 | Sys.setenv("R_TESTS" = "")
 2 | library(atacr)
 3 | 
 4 | context("methods")
 5 | 
 6 | test_that("as.data.frame.atacr() returns proper dataframe",{
 7 |   d <- as.data.frame(sim_counts)
 8 |   expect_vectors_equal(names(d), c("chromosome", "start", "stop", "strand", "sample", "count", "window_type"))
 9 |   expect_is(d$chromosome, "factor")
10 |   expect_is(d$start, "integer")
11 |   expect_is(d$stop, "integer")
12 |   expect_is(d$sample, "factor")
13 |   expect_is(d$count, "numeric")
14 |   expect_is(d$window_type, "factor")
15 |   expect_equal(levels(d$chromosome), c("synth_chrom"))
16 | 
17 | })
18 | 


--------------------------------------------------------------------------------
/man/plot_counts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/figures.R
 3 | \name{plot_counts}
 4 | \alias{plot_counts}
 5 | \title{Plot distribution of counts in given data set}
 6 | \usage{
 7 | plot_counts(data, which = "bait_windows", log10 = TRUE)
 8 | }
 9 | \arguments{
10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | 
12 | \item{which}{the subdivision of the genome to plot}
13 | 
14 | \item{log10}{log 10 the counts for plotting.}
15 | }
16 | \value{
17 | ggplot2 plot
18 | }
19 | \description{
20 | Plot distribution of counts in given data set
21 | }
22 | 


--------------------------------------------------------------------------------
/man/chromosome_coverage.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/figures.R
 3 | \name{chromosome_coverage}
 4 | \alias{chromosome_coverage}
 5 | \title{Plot density of read counts by sample over the chromosomes}
 6 | \usage{
 7 | chromosome_coverage(data, which = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | 
12 | \item{which}{the subdivision of the genome to plot (default = bait and non_bait windows)}
13 | }
14 | \value{
15 | a ggplot2 object
16 | }
17 | \description{
18 | Plot density of read counts by sample over the chromosomes
19 | }
20 | 


--------------------------------------------------------------------------------
/man/small_counts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/atacr.R
 3 | \docType{data}
 4 | \name{small_counts}
 5 | \alias{small_counts}
 6 | \title{small_counts - simulated count data
 7 | The data `small_counts` is basically the same thing as `sim_counts` with smaller sample of 100 bait / non-bait windows.}
 8 | \format{a list of SummarizedExperiment objects}
 9 | \usage{
10 | small_counts
11 | }
12 | \description{
13 | small_counts - simulated count data
14 | The data `small_counts` is basically the same thing as `sim_counts` with smaller sample of 100 bait / non-bait windows.
15 | }
16 | \keyword{datasets}
17 | 


--------------------------------------------------------------------------------
/man/make_scanBamParam.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading.R
 3 | \name{make_scanBamParam}
 4 | \alias{make_scanBamParam}
 5 | \title{format a rsamtools::scanBam object from the atacr::make_params() object}
 6 | \usage{
 7 | make_scanBamParam(p, example_bam)
 8 | }
 9 | \arguments{
10 | \item{p}{an object returned from atacr::make_params()}
11 | 
12 | \item{example_bam}{a filename pointing to a BAM file from which genome size can be taken}
13 | }
14 | \value{
15 | an rsamtools::scanBamParam object
16 | }
17 | \description{
18 | format a rsamtools::scanBam object from the atacr::make_params() object
19 | }
20 | 


--------------------------------------------------------------------------------
/man/text_to_gff.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading.R
 3 | \name{text_to_gff}
 4 | \alias{text_to_gff}
 5 | \title{writes GFF3 version of a simple text file describing the bait region starts and stops}
 6 | \usage{
 7 | text_to_gff(text_in, gff_out)
 8 | }
 9 | \arguments{
10 | \item{text_in}{path to the file describing the bait regions. File must have a header with columns `bait_name`, `seq_name`, `start_pos`, `end_pos`.}
11 | 
12 | \item{gff_out}{path to the gff file to be created}
13 | }
14 | \description{
15 | writes GFF3 version of a simple text file describing the bait region starts and stops
16 | }
17 | 


--------------------------------------------------------------------------------
/man/bootstrap_t.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differentials.R
 3 | \name{bootstrap_t}
 4 | \alias{bootstrap_t}
 5 | \title{runs bootstrap t test, wrapper required for boot::boot function}
 6 | \usage{
 7 | bootstrap_t(data, iterations = 10)
 8 | }
 9 | \arguments{
10 | \item{data}{matrix of sample data}
11 | 
12 | \item{iterations}{number of bootstrap iterations to run}
13 | }
14 | \value{
15 | vector of 2 items, observed value t statisitc and p, calculated as proportion of bootstrap iterations greater than original t
16 | }
17 | \description{
18 | runs bootstrap t test, wrapper required for boot::boot function
19 | }
20 | 


--------------------------------------------------------------------------------
/man/plot_GoF.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/figures.R
 3 | \name{plot_GoF}
 4 | \alias{plot_GoF}
 5 | \title{draw count distribution of GOF estimates}
 6 | \usage{
 7 | plot_GoF(atacr, which = "bait_windows", controls = NULL)
 8 | }
 9 | \arguments{
10 | \item{atacr}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | 
12 | \item{which}{the subdivision of the genome to plot (default = bait and non_bait}
13 | 
14 | \item{controls}{character vector of window names to consider control windows}
15 | }
16 | \value{
17 | ggplot2 object
18 | }
19 | \description{
20 | draw count distribution of GOF estimates
21 | }
22 | 


--------------------------------------------------------------------------------
/man/sample_kmeans_cluster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/atacr.R
 3 | \name{sample_kmeans_cluster}
 4 | \alias{sample_kmeans_cluster}
 5 | \title{identify kmeans clusters for samples}
 6 | \usage{
 7 | sample_kmeans_cluster(data, which = "bait_windows")
 8 | }
 9 | \arguments{
10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | 
12 | \item{which}{the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'}
13 | }
14 | \value{
15 | dataframe of cluster_id and sample name
16 | }
17 | \description{
18 | identify kmeans clusters for samples
19 | }
20 | 


--------------------------------------------------------------------------------
/man/get_expected_values.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/atacr.R
 3 | \name{get_expected_values}
 4 | \alias{get_expected_values}
 5 | \title{given a vector of values return a set of random numbers from a given
 6 | distribution}
 7 | \usage{
 8 | get_expected_values(obs, dist = "norm")
 9 | }
10 | \arguments{
11 | \item{obs}{vector of observed values}
12 | 
13 | \item{dist}{the distribution from which to return expected values}
14 | }
15 | \value{
16 | a vector of length obs with random variates from distribution dist
17 | }
18 | \description{
19 | given a vector of values return a set of random numbers from a given
20 | distribution
21 | }
22 | 


--------------------------------------------------------------------------------
/man/select_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differentials.R
 3 | \name{select_data}
 4 | \alias{select_data}
 5 | \title{selects appropriate columns and names from a}
 6 | \usage{
 7 | select_data(data, treatment_a, treatment_b, which = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{an atacr object}
11 | 
12 | \item{treatment_a}{string naming the first treatment (numerator)}
13 | 
14 | \item{treatment_b}{string naming the second treatment (denominator)}
15 | 
16 | \item{which}{subset to work on Default = NULL}
17 | }
18 | \value{
19 | list of data to be calculated with
20 | }
21 | \description{
22 | selects appropriate columns and names from a
23 | }
24 | 


--------------------------------------------------------------------------------
/man/get_GoF_factors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalisation.R
 3 | \name{get_GoF_factors}
 4 | \alias{get_GoF_factors}
 5 | \title{estimates sequencing depths based on windows with smallest GoF}
 6 | \usage{
 7 | get_GoF_factors(atacr, which = "bait_windows")
 8 | }
 9 | \arguments{
10 | \item{atacr}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | 
12 | \item{which}{the subdivision of the genome to calculate GoF either 'whole_genome', 'bait_windows' or 'non_bait_windows'}
13 | }
14 | \value{
15 | - a named vector of each windows GoF estimate.
16 | }
17 | \description{
18 | estimates sequencing depths based on windows with smallest GoF
19 | }
20 | 


--------------------------------------------------------------------------------
/man/estimate_GoFs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalisation.R
 3 | \name{estimate_GoFs}
 4 | \alias{estimate_GoFs}
 5 | \title{estimates Goodness of Fit from atacr object}
 6 | \usage{
 7 | estimate_GoFs(atacr, which = "bait_windows")
 8 | }
 9 | \arguments{
10 | \item{atacr}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | 
12 | \item{which}{the subdivision of the genome to calculate GoF either 'whole_genome', 'bait_windows' or 'non_bait_windows'}
13 | }
14 | \value{
15 | the original atacr object with a new slot - 'gofs' - a named vector of each windows GoF estimate.
16 | }
17 | \description{
18 | estimates Goodness of Fit from atacr object
19 | }
20 | 


--------------------------------------------------------------------------------
/man/make_params.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading.R
 3 | \name{make_params}
 4 | \alias{make_params}
 5 | \title{set read filters for counting from the BAM file.}
 6 | \usage{
 7 | make_params(paired_map = TRUE, minq = 30, dedup = TRUE)
 8 | }
 9 | \arguments{
10 | \item{paired_map}{Should reads only be included if they are aligned in pairs. Default = TRUE}
11 | 
12 | \item{minq}{The minimum mapping quality to retain a read. Default = 20}
13 | 
14 | \item{dedup}{Should removal of PCR duplicates be performed. Default = TRUE}
15 | }
16 | \value{
17 | a named vector of class "atacr_params"
18 | }
19 | \description{
20 | set read filters for counting from the BAM file.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/sample_correlation_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/figures.R
 3 | \name{sample_correlation_plot}
 4 | \alias{sample_correlation_plot}
 5 | \title{Plot sample count correlations}
 6 | \usage{
 7 | sample_correlation_plot(data, which = "bait_windows", method = "pearson")
 8 | }
 9 | \arguments{
10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | 
12 | \item{which}{the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'}
13 | 
14 | \item{method}{the correlation method to use. Any supported by `cor()` is useable}
15 | }
16 | \description{
17 | Plot sample count correlations
18 | }
19 | 


--------------------------------------------------------------------------------
/man/calc_quantiles.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/atacr.R
 3 | \name{calc_quantiles}
 4 | \alias{calc_quantiles}
 5 | \title{report counts at each quantile for each sample}
 6 | \usage{
 7 | calc_quantiles(data, quantiles = c(0.01, 0.05, 0.95, 0.99), which = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | 
12 | \item{quantiles}{a vector of quantiles to report}
13 | 
14 | \item{which}{the subset of data windows to report on. Default =
15 | "bait_windows" and "non_bait_windows"}
16 | }
17 | \value{
18 | list of counts at quantiles
19 | }
20 | \description{
21 | report counts at each quantile for each sample
22 | }
23 | 


--------------------------------------------------------------------------------
/man/control_window_normalise_internal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalisation.R
 3 | \name{control_window_normalise_internal}
 4 | \alias{control_window_normalise_internal}
 5 | \title{do a control window scaling normalisation}
 6 | \usage{
 7 | control_window_normalise_internal(se, window_file)
 8 | }
 9 | \arguments{
10 | \item{se}{a SummarizedExperiment object such as 'bait_windows' from atacr::make_counts()}
11 | 
12 | \item{window_file}{a text file containing the positions of control window/gene ranges}
13 | }
14 | \value{
15 | SummarizedExperiment object, a copy of se with normalised values
16 | }
17 | \description{
18 | do a control window scaling normalisation
19 | }
20 | 


--------------------------------------------------------------------------------
/man/view_gene.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/figures.R
 3 | \name{view_gene}
 4 | \alias{view_gene}
 5 | \title{coverage over gene model}
 6 | \usage{
 7 | view_gene(data, gene_id, which = "bait_windows", ensembl = "plants",
 8 |   ens_dataset = "athaliana_eg_gene")
 9 | }
10 | \arguments{
11 | \item{data}{atacr object}
12 | 
13 | \item{gene_id}{the id of the gene to plot around}
14 | 
15 | \item{which}{the subset of the data to plot.}
16 | 
17 | \item{ensembl}{one of 'plants', 'ensembl' - which version of ensembl to connect to}
18 | 
19 | \item{ens_dataset}{which ensembl dataset to connect to}
20 | }
21 | \value{
22 | plot object
23 | }
24 | \description{
25 | coverage over gene model
26 | }
27 | 


--------------------------------------------------------------------------------
/man/control_window_scaling_factors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalisation.R
 3 | \name{control_window_scaling_factors}
 4 | \alias{control_window_scaling_factors}
 5 | \title{extract scaling factors from control windows (often from a file of control gene positions)}
 6 | \usage{
 7 | control_window_scaling_factors(se, window_file)
 8 | }
 9 | \arguments{
10 | \item{se}{a SummarizedExperiment object}
11 | 
12 | \item{window_file}{a text file containing the positions of control window/gene ranges}
13 | }
14 | \value{
15 | a vector of scaling factors from control genes
16 | }
17 | \description{
18 | extract scaling factors from control windows (often from a file of control gene positions)
19 | }
20 | 


--------------------------------------------------------------------------------
/man/observed_expected_bins.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/atacr.R
 3 | \name{observed_expected_bins}
 4 | \alias{observed_expected_bins}
 5 | \title{given a vector of numbersd returns the counts in bins of bin_width, and the count}
 6 | \usage{
 7 | observed_expected_bins(obs, dist = "pois", bin_width = 10)
 8 | }
 9 | \arguments{
10 | \item{obs}{a vector of numbers}
11 | 
12 | \item{dist}{a string naming distribution from which to take expected counts}
13 | 
14 | \item{bin_width}{the width of the bins for the counts}
15 | }
16 | \value{
17 | list with members observed and expected which are vectors of counts
18 | }
19 | \description{
20 | given a vector of numbersd returns the counts in bins of bin_width, and the count
21 | }
22 | 


--------------------------------------------------------------------------------
/man/plot_count_by_chromosome.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/figures.R
 3 | \name{plot_count_by_chromosome}
 4 | \alias{plot_count_by_chromosome}
 5 | \title{plot the counts split by chromosome and sample}
 6 | \usage{
 7 | plot_count_by_chromosome(data, which = "bait_windows", method = "bar")
 8 | }
 9 | \arguments{
10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | 
12 | \item{which}{the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'}
13 | 
14 | \item{method}{(bar | smooth | point) which sort of plot to return}
15 | }
16 | \value{
17 | ggplot2 plot
18 | }
19 | \description{
20 | plot the counts split by chromosome and sample
21 | }
22 | 


--------------------------------------------------------------------------------
/man/coverage_summary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/figures.R
 3 | \name{coverage_summary}
 4 | \alias{coverage_summary}
 5 | \title{Plot histograms of read counts by sample and window type}
 6 | \usage{
 7 | coverage_summary(data, which = NULL, sample = NULL, log_axis = TRUE)
 8 | }
 9 | \arguments{
10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | 
12 | \item{which}{the subdivision of the genome to plot (default = bait and non_bait windows)}
13 | 
14 | \item{sample}{the sample to plot (default = all )}
15 | 
16 | \item{log_axis}{use a log scale for the x-axis}
17 | }
18 | \value{
19 | a ggplot2 object
20 | }
21 | \description{
22 | Plot histograms of read counts by sample and window type
23 | }
24 | 


--------------------------------------------------------------------------------
/man/ma_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/figures.R
 3 | \name{ma_plot}
 4 | \alias{ma_plot}
 5 | \title{plot M (log2 ratio of a windows sample count to windows all-sample median count ) versus A (log2 sum of a windows sample count to a windows all-sample median count ) for each window}
 6 | \usage{
 7 | ma_plot(data, which = "bait_windows", by = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{an atacr object}
11 | 
12 | \item{which}{the subset of windows to operate on}
13 | 
14 | \item{by}{a vector of seqnames of the genome to view}
15 | }
16 | \description{
17 | plot M (log2 ratio of a windows sample count to windows all-sample median count ) versus A (log2 sum of a windows sample count to a windows all-sample median count ) for each window
18 | }
19 | 


--------------------------------------------------------------------------------
/man/estimate_bayes_factor.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differentials.R
 3 | \name{estimate_bayes_factor}
 4 | \alias{estimate_bayes_factor}
 5 | \title{Estimate Bayes Factor and significantly different windows}
 6 | \usage{
 7 | estimate_bayes_factor(atacr, treatment_a, treatment_b, which = "bait_windows",
 8 |   factor = 4)
 9 | }
10 | \arguments{
11 | \item{atacr}{an atacr object}
12 | 
13 | \item{treatment_a}{the first treatment to consider}
14 | 
15 | \item{treatment_b}{the second treatment to consider}
16 | 
17 | \item{which}{the subset of windows to consider}
18 | 
19 | \item{factor}{the BayesFactor at which to mark window as significant}
20 | }
21 | \value{
22 | a dataframe
23 | }
24 | \description{
25 | Estimate Bayes Factor and significantly different windows
26 | }
27 | 


--------------------------------------------------------------------------------
/man/estimate_bayes_factor_multiclass.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differentials.R
 3 | \name{estimate_bayes_factor_multiclass}
 4 | \alias{estimate_bayes_factor_multiclass}
 5 | \title{Estimate BayesFactor and mark significantly different windows for many experiments}
 6 | \usage{
 7 | estimate_bayes_factor_multiclass(data, common_control, which = "bait_windows",
 8 |   factor = 4)
 9 | }
10 | \arguments{
11 | \item{data}{an atacr object}
12 | 
13 | \item{common_control}{the treatment to consider the control for all other treatments}
14 | 
15 | \item{which}{the subset of windows to consider}
16 | 
17 | \item{factor}{the BayesFactor to consider significant}
18 | }
19 | \description{
20 | Estimate BayesFactor and mark significantly different windows for many experiments
21 | }
22 | 


--------------------------------------------------------------------------------
/man/estimate_fdr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differentials.R
 3 | \name{estimate_fdr}
 4 | \alias{estimate_fdr}
 5 | \title{Estimate FDR and significantly different windows}
 6 | \usage{
 7 | estimate_fdr(data, treatment_a, treatment_b, which = "bait_windows",
 8 |   iterations = 10, fdr_level = 0.05)
 9 | }
10 | \arguments{
11 | \item{data}{an atacr object}
12 | 
13 | \item{treatment_a}{the first treatment to consider}
14 | 
15 | \item{treatment_b}{the second treatment to consider}
16 | 
17 | \item{which}{the subset of windows to consider}
18 | 
19 | \item{iterations}{the number of bootstrap iterations to perform}
20 | 
21 | \item{fdr_level}{the level at which to mark FDR as significant}
22 | }
23 | \description{
24 | Estimate FDR and significantly different windows
25 | }
26 | 


--------------------------------------------------------------------------------
/man/scale_factor_normalise.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalisation.R
 3 | \name{scale_factor_normalise}
 4 | \alias{scale_factor_normalise}
 5 | \title{normalise by a provided set of scaling factors}
 6 | \usage{
 7 | scale_factor_normalise(data, which = "bait_windows", scaling_factors = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | 
12 | \item{which}{the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'}
13 | 
14 | \item{scaling_factors}{a vector of scaling factors to normalise by}
15 | }
16 | \value{
17 | a SummarizedExperiment with scale normalised window values
18 | }
19 | \description{
20 | normalise by a provided set of scaling factors
21 | }
22 | 


--------------------------------------------------------------------------------
/man/count_windows_under_threshold.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/atacr.R
 3 | \name{count_windows_under_threshold}
 4 | \alias{count_windows_under_threshold}
 5 | \title{count windows that have read counts below the threshold}
 6 | \usage{
 7 | count_windows_under_threshold(data, which = "bait_windows", threshold = 0)
 8 | }
 9 | \arguments{
10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | 
12 | \item{which}{the subdivision of the genome to calculate correlations either
13 | 'whole_genome', 'bait_windows' or 'non_bait_windows'}
14 | 
15 | \item{threshold}{counts windows with read counts lower than this level}
16 | }
17 | \value{
18 | dataframe of sample name, count and threshold
19 | }
20 | \description{
21 | count windows that have read counts below the threshold
22 | }
23 | 


--------------------------------------------------------------------------------
/man/edgeR_exact.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differentials.R
 3 | \name{edgeR_exact}
 4 | \alias{edgeR_exact}
 5 | \title{Estimate differential window counts  and mark significantly different windows using edgeR exact method for two samples}
 6 | \usage{
 7 | edgeR_exact(data, which = "bait_windows", treatment_a = NULL,
 8 |   treatment_b = NULL, remove.zeros = FALSE, sig_level = 0.05)
 9 | }
10 | \arguments{
11 | \item{data}{an atacr object}
12 | 
13 | \item{which}{the subset of windows to consider}
14 | 
15 | \item{sig_level}{the p_value to consider significant}
16 | 
17 | \item{common_control}{the treatment to consider the control for all other treatments}
18 | }
19 | \description{
20 | Estimate differential window counts  and mark significantly different windows using edgeR exact method for two samples
21 | }
22 | 


--------------------------------------------------------------------------------
/man/make_UpSetR.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/atacr.R
 3 | \name{make_UpSetR}
 4 | \alias{make_UpSetR}
 5 | \title{given a dataframe from the estimate_fdr_multiclass() function, will return a
 6 | list in the format suitable for UpSetR visualisation.
 7 | Does not do any filtering of lists, so selected genes must be filtered before hand e.g with dplyr}
 8 | \usage{
 9 | make_UpSetR(df)
10 | }
11 | \arguments{
12 | \item{df}{dataframe from estimate_fdr_multiclass}
13 | }
14 | \value{
15 | list of named vectors suitable for UpSetR fromList() function
16 | }
17 | \description{
18 | given a dataframe from the estimate_fdr_multiclass() function, will return a
19 | list in the format suitable for UpSetR visualisation.
20 | Does not do any filtering of lists, so selected genes must be filtered before hand e.g with dplyr
21 | }
22 | 


--------------------------------------------------------------------------------
/man/estimate_fdr_multiclass.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differentials.R
 3 | \name{estimate_fdr_multiclass}
 4 | \alias{estimate_fdr_multiclass}
 5 | \title{Estimate FDR and significantly different windows for many experiments}
 6 | \usage{
 7 | estimate_fdr_multiclass(data, common_control, which = "bait_windows",
 8 |   iterations = 10, fdr_level = 0.05)
 9 | }
10 | \arguments{
11 | \item{data}{an atacr object}
12 | 
13 | \item{common_control}{the treatment to consider the control for all other treatments}
14 | 
15 | \item{which}{the subset of windows to consider}
16 | 
17 | \item{iterations}{the number of bootstrap iterations to perform}
18 | 
19 | \item{fdr_level}{the level at which to mark FDR as significant}
20 | }
21 | \description{
22 | Estimate FDR and significantly different windows for many experiments
23 | }
24 | 


--------------------------------------------------------------------------------
/man/find_controls_by_GoF.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalisation.R
 3 | \name{find_controls_by_GoF}
 4 | \alias{find_controls_by_GoF}
 5 | \title{find control windows by convergence method in https://academic.oup.com/biostatistics/article/13/3/523/248016/Normalization-testing-and-false-discovery-rate}
 6 | \usage{
 7 | find_controls_by_GoF(atacr, which = "bait_windows")
 8 | }
 9 | \arguments{
10 | \item{atacr}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | 
12 | \item{which}{the subdivision of the genome to calculate GoF either 'whole_genome', 'bait_windows' or 'non_bait_windows'}
13 | }
14 | \value{
15 | a character vector of window names
16 | }
17 | \description{
18 | find control windows by convergence method in https://academic.oup.com/biostatistics/article/13/3/523/248016/Normalization-testing-and-false-discovery-rate
19 | }
20 | 


--------------------------------------------------------------------------------
/man/edgeR_multiclass.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differentials.R
 3 | \name{edgeR_multiclass}
 4 | \alias{edgeR_multiclass}
 5 | \title{Estimate differential window counts  and mark significantly different windows using edgeR glmFIT method for multiple samples with common control}
 6 | \usage{
 7 | edgeR_multiclass(data, common_control, which = "bait_windows",
 8 |   sig_level = 0.05, remove.zeros = FALSE)
 9 | }
10 | \arguments{
11 | \item{data}{an atacr object}
12 | 
13 | \item{which}{the subset of windows to consider}
14 | 
15 | \item{sig_level}{the p_value to consider significant}
16 | 
17 | \item{treatment_a}{the first treatment to consider}
18 | 
19 | \item{treatment_b}{the second treatment to consider}
20 | }
21 | \description{
22 | Estimate differential window counts  and mark significantly different windows using edgeR glmFIT method for multiple samples with common control
23 | }
24 | 


--------------------------------------------------------------------------------
/man/windows_below_coverage_threshold_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/figures.R
 3 | \name{windows_below_coverage_threshold_plot}
 4 | \alias{windows_below_coverage_threshold_plot}
 5 | \title{generate cumulative plot of number of windows below a threshold in samples}
 6 | \usage{
 7 | windows_below_coverage_threshold_plot(data, which = "bait_windows",
 8 |   from = 0, to = 10)
 9 | }
10 | \arguments{
11 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
12 | 
13 | \item{which}{("bait_windows") the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'}
14 | 
15 | \item{from}{(0) the lowest threshold to consider}
16 | 
17 | \item{to}{(10) the highest threshold to consider}
18 | }
19 | \value{
20 | ggplot2 plot
21 | }
22 | \description{
23 | generate cumulative plot of number of windows below a threshold in samples
24 | }
25 | 


--------------------------------------------------------------------------------
/man/extract_features_from_gff.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading.R
 3 | \name{extract_features_from_gff}
 4 | \alias{extract_features_from_gff}
 5 | \title{pulls lines out of a gff file based on identifierss provided}
 6 | \usage{
 7 | extract_features_from_gff(ids, gff, type = c("gene"), col = "ID",
 8 |   out_file = NULL, version = "3")
 9 | }
10 | \arguments{
11 | \item{ids}{character vector of ids/names of feature to extract}
12 | 
13 | \item{gff}{path to gff file}
14 | 
15 | \item{type}{feature type of features to extract.}
16 | 
17 | \item{col}{column name of GFF file containing id to use (ID)}
18 | 
19 | \item{out_file}{path of file name to write. If NULL, no file is written}
20 | 
21 | \item{version}{which gff version to export (Default is "3")}
22 | }
23 | \value{
24 | GenomicRanges or NULL with GFF outfile.
25 | }
26 | \description{
27 | pulls lines out of a gff file based on identifierss provided
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/testthat/test_differentials.R:
--------------------------------------------------------------------------------
 1 | Sys.setenv("R_TESTS" = "")
 2 | library(atacr)
 3 | 
 4 | context("differential count functions")
 5 | 
 6 | test_that("get_t() returns proper value", {
 7 |   expect_equal( unname(get_t(1:100, c(1:10, 90:100))), -64.6472, tolerance = 0.0000001)
 8 | })
 9 | 
10 | test_that("select_comparisons() extracts proper columns", {
11 |   l <- select_comparisons(sim_counts, "treatment", "control")
12 |   expect_has_all_and_only_these_members(l, c("treatment_a_data", "treatment_b_data"))
13 |   expect_vectors_equal(l$treatment_a_data, c("treatment_001", "treatment_002", "treatment_003"))
14 |   expect_vectors_equal(l$treatment_b_data, c("control_001", "control_002", "control_003"))
15 | })
16 | 
17 | test_that("estimate_fdr() returns proper dataframe", {
18 |   expect_vectors_equal(names(estimate_fdr(sim_counts, "control", "treatment")), c("window", "t", "p_value", "fdr", "mean_count_a", "mean_count_b", "sd_a", "sd_b", "log2fc", "is_sig"))
19 | })
20 | 


--------------------------------------------------------------------------------
/man/load_atac.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading.R
 3 | \name{load_atac}
 4 | \alias{load_atac}
 5 | \title{populate the result object with the RangedSummarizedExperiment from the bam files from ATAC seq data. Called from make_counts() when is_rnaseq == FALSE.}
 6 | \usage{
 7 | load_atac(result, width, filter_params, window_file)
 8 | }
 9 | \arguments{
10 | \item{result}{list from make_counts()}
11 | 
12 | \item{width}{an integer of the width of the bins the bait regions will be divided into}
13 | 
14 | \item{filter_params}{a params object, described in atacr::make_counts()}
15 | 
16 | \item{window_file}{a filename of a CSV file with the bait regions}
17 | }
18 | \value{
19 | a list with window counts for bait/non-bait windows
20 | }
21 | \description{
22 | populate the result object with the RangedSummarizedExperiment from the bam files from ATAC seq data. Called from make_counts() when is_rnaseq == FALSE.
23 | }
24 | 


--------------------------------------------------------------------------------
/man/athal_wt_counts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/atacr.R
 3 | \docType{data}
 4 | \name{athal_wt_counts}
 5 | \alias{athal_wt_counts}
 6 | \title{athal_wt_counts - real capture RNASeq count data
 7 | The data `athal_wt_counts` are real, experimentally derived counts from untreated WT Arabidopsis leaves for 52 baits, each set of baits representing a gene. Three replicates are provided for each gene. This data set is intended to be used in resampling procedures for making test data sets.}
 8 | \format{a named vector of counts}
 9 | \usage{
10 | athal_wt_counts
11 | }
12 | \description{
13 | athal_wt_counts - real capture RNASeq count data
14 | The data `athal_wt_counts` are real, experimentally derived counts from untreated WT Arabidopsis leaves for 52 baits, each set of baits representing a gene. Three replicates are provided for each gene. This data set is intended to be used in resampling procedures for making test data sets.
15 | }
16 | \keyword{datasets}
17 | 


--------------------------------------------------------------------------------
/man/load_rnaseq.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading.R
 3 | \name{load_rnaseq}
 4 | \alias{load_rnaseq}
 5 | \title{populate the result object with the RangedSummarizedExperiment from the bam files from RNA seq data. Called from make_counts() when is_rnaseq == TRUE.}
 6 | \usage{
 7 | load_rnaseq(result, filter_params, window_file, gene_id_col = "ID")
 8 | }
 9 | \arguments{
10 | \item{result}{list from make_counts()}
11 | 
12 | \item{filter_params}{a params object, described in atacr::make_counts()}
13 | 
14 | \item{window_file}{a filename of a CSV file with the bait regions}
15 | 
16 | \item{gene_id_col}{a character string stating which attribute name to take from the final column of the GFF file to use for the window name in RNASeq data. Usually this is the name of the gene. Default = ID.}
17 | }
18 | \description{
19 | populate the result object with the RangedSummarizedExperiment from the bam files from RNA seq data. Called from make_counts() when is_rnaseq == TRUE.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/normalise_by_window_width.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalisation.R
 3 | \name{normalise_by_window_width}
 4 | \alias{normalise_by_window_width}
 5 | \title{normalise counts by window width (counts / window width)}
 6 | \usage{
 7 | normalise_by_window_width(data, which = "bait_windows", per = 1000)
 8 | }
 9 | \arguments{
10 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
11 | 
12 | \item{which}{the subset of the data to normalise. Default = bait_windows}
13 | 
14 | \item{per}{= the expression count / width gives the reads in the window divided by the width, so a 3000 nt gene with 30000 reads mapping to it will have a read count of just 10. Setting this parameter allows you to represent the counts per some other number of nts. Default = 1000, so gives the reads per kb of the gene.}
15 | }
16 | \value{
17 | SummarizedExperiment object with normalised counts
18 | }
19 | \description{
20 | normalise counts by window width (counts / window width)
21 | }
22 | 


--------------------------------------------------------------------------------
/tests/testthat/test_normalisation.R:
--------------------------------------------------------------------------------
 1 | Sys.setenv("R_TESTS" = "")
 2 | library(atacr)
 3 | 
 4 | 
 5 | context("normalisation functions")
 6 | 
 7 | 
 8 | test_that("library_size_normalisation_internal() returns proper values", {
 9 | 
10 |   expected_counts <- matrix(c(2.5, 4, 4.375, 5, 5, 5, 7.5, 6, 5.625), nrow=3)
11 |   expected_se <-  SummarizedExperiment::SummarizedExperiment(assays=list(counts=expected_counts))
12 |   in_se <-  SummarizedExperiment::SummarizedExperiment(assays=list(counts=matrix(1:9, nrow=3)))
13 |   out_se <- library_size_normalisation_internal(in_se)
14 | 
15 |   expect_equal(out_se, expected_se)
16 | 
17 | })
18 | 
19 | test_that("get_scaling_factors() gets proper values", {
20 |   expected_factors <- c(2,1,0.666666667)
21 |   tm <- matrix(c(rep(1,3), rep(2,3), rep(3,3)), nrow=3)
22 |   expect_equal(get_scaling_factors(tm), expected_factors)
23 | })
24 | 
25 | 
26 | test_that("scale_normalise() returns proper values", {
27 |   expected_mat <- matrix(rep(2,9), nrow=3)
28 |   tm <- matrix(c(rep(1,3), rep(2,3), rep(3,3)), nrow=3)
29 |   expect_equal(scale_normalise(tm, c(2,1,0.66667)), expected_mat, tolerance = 1e-05)
30 | })
31 | 


--------------------------------------------------------------------------------
/man/library_size_normalisation.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalisation.R
 3 | \name{library_size_normalisation}
 4 | \alias{library_size_normalisation}
 5 | \title{performs a whole library size normalisation of the selected set of windows, calculates a median virtual experiment and normalises to that}
 6 | \usage{
 7 | library_size_normalisation(data, which = "bait_windows",
 8 |   by_treatment = FALSE)
 9 | }
10 | \arguments{
11 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
12 | 
13 | \item{which}{the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'}
14 | 
15 | \item{by_treatment}{(FALSE) will group the assay into different treatments and normalise each separately - assumes that within treatment groups the samples should show little difference, but between sample treatment groups could show lots of difference between windows.}
16 | }
17 | \value{
18 | a SummarizedExperiment object with a new, normalised assay matrix
19 | }
20 | \description{
21 | performs a whole library size normalisation of the selected set of windows, calculates a median virtual experiment and normalises to that
22 | }
23 | 


--------------------------------------------------------------------------------
/man/control_window_normalise.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalisation.R
 3 | \name{control_window_normalise}
 4 | \alias{control_window_normalise}
 5 | \title{performs control window based normalisation of the selected set of windows, calculates a median virtual experiment and normalises to that}
 6 | \usage{
 7 | control_window_normalise(data, window_file, which = "bait_windows",
 8 |   by_treatment = FALSE)
 9 | }
10 | \arguments{
11 | \item{data}{a list of SummarizedExperiment objects from atacr::make_counts()}
12 | 
13 | \item{window_file}{a text file containing the positions of control window/gene ranges}
14 | 
15 | \item{which}{the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'}
16 | 
17 | \item{by_treatment}{should normalisation be done by all experiments (one median virtualexperiment to compare all samples to) OR should normalisation be done by each treatment type (one median virtual experiment for each different treatment type)}
18 | }
19 | \value{
20 | a vector of scaling factors from control genes
21 | }
22 | \description{
23 | performs control window based normalisation of the selected set of windows, calculates a median virtual experiment and normalises to that
24 | }
25 | 


--------------------------------------------------------------------------------
/tests/testthat/control_windows.txt:
--------------------------------------------------------------------------------
 1 | "bait_name","seq_name","start_pos","end_pos"
 2 | "AT4G05320_1","Chr4",2716013,2716132
 3 | "AT4G05320_14","Chr4",2717352,2717471
 4 | "AT4G05320_15","Chr4",2717455,2717574
 5 | "AT4G05320_16","Chr4",2717558,2717677
 6 | "AT4G05320_18","Chr4",2717764,2717883
 7 | "AT4G05320_19","Chr4",2717867,2717986
 8 | "AT4G05320_2","Chr4",2716116,2716235
 9 | "AT4G05320_20","Chr4",2717970,2718089
10 | "AT4G05320_22","Chr4",2718176,2718295
11 | "AT4G05320_23","Chr4",2718279,2718398
12 | "AT4G05320_24","Chr4",2718382,2718501
13 | "AT4G05320_25","Chr4",2718485,2718604
14 | "AT4G05320_26","Chr4",2718588,2718707
15 | "AT4G05320_27","Chr4",2718691,2718810
16 | "AT4G05320_28","Chr4",2718794,2718913
17 | "AT4G05320_29","Chr4",2718897,2719016
18 | "AT4G05320_3","Chr4",2716219,2716338
19 | "AT4G05320_30","Chr4",2719000,2719119
20 | "AT4G05320_31","Chr4",2719103,2719222
21 | "AT4G05320_32","Chr4",2719206,2719325
22 | "AT4G05320_33","Chr4",2719309,2719428
23 | "AT4G05320_34","Chr4",2719412,2719531
24 | "AT4G05320_35","Chr4",2719515,2719634
25 | "AT4G05320_36","Chr4",2719618,2719737
26 | "AT4G05320_38","Chr4",2719824,2719943
27 | "AT4G05320_39","Chr4",2719927,2720046
28 | "AT4G05320_4","Chr4",2716322,2716441
29 | "AT4G05320_40","Chr4",2720030,2720149
30 | "AT4G05320_41","Chr4",2720133,2720252
31 | "AT4G05320_42","Chr4",2720236,2720355
32 | "AT4G05320_43","Chr4",2720339,2720458
33 | "AT4G05320_44","Chr4",2720441,2720560
34 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: atacr
 2 | Type: Package
 3 | Title: Analysing Capture Seq Count Data
 4 | Version: 0.4.14
 5 | Authors@R: c(
 6 |   person("Dan","MacLean", email="dan.maclean@tsl.ac.uk", role=c("aut", "cre")),
 7 |   person("Ram-Krishna", "Shrestha", email="ram-krishna.shrestha@tsl.ac.uk", role="aut"))
 8 | Description: This package helps with the analysis of count data from RNA and ATAC capture-seq experiments.
 9 |     Using BioConductor RangedSummarizedExperiment objects, atacr implements a set of helper
10 |     functions and quality control plots specific to the analysis of particularly windows.
11 |     Especially, atacr is useful for performing control window based between sample normalizations and for
12 |     easily running non-standard tests for differentially accessible windows in common reference designs.
13 | Depends: R (>= 3.0.0)
14 | License: MIT + file LICENSE
15 | LazyData: TRUE
16 | Imports:
17 |     BayesFactor,
18 |     boot,
19 |     biomaRt,
20 |     corrplot,
21 |     csaw,
22 |     dplyr,
23 |     edgeR,
24 |     fitdistrplus,
25 |     GenomeGraphs,
26 |     GenomicAlignments,
27 |     GenomicRanges,
28 |     grid,
29 |     gridExtra,
30 |     ggjoy,
31 |     ggplot2,
32 |     ggthemes,
33 |     heatmap3,
34 |     IRanges,
35 |     magrittr,
36 |     methods,
37 |     plyr,
38 |     RColorBrewer,
39 |     reshape,
40 |     reshape2,
41 |     Rsamtools,
42 |     rtracklayer,
43 |     S4Vectors,
44 |     stringr,
45 |     SummarizedExperiment,
46 |     tidyr
47 | RoxygenNote: 6.0.1
48 | Suggests: testthat,
49 |     knitr,
50 |     pander,
51 |     rmarkdown,
52 |     UpSetR
53 | VignetteBuilder: knitr
54 | biocViews: 
55 | 


--------------------------------------------------------------------------------
/man/sim_counts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/atacr.R
 3 | \docType{data}
 4 | \name{sim_counts}
 5 | \alias{sim_counts}
 6 | \alias{sim_counts}
 7 | \title{sim_counts - simulated count data}
 8 | \format{A SummarizedExperiment object}
 9 | \usage{
10 | sim_counts
11 | 
12 | sim_counts
13 | }
14 | \description{
15 | The data `sim_counts` is a simulated data set with computer generated window counts for three replicates of each of two conditions in experiments with 500 bait and non-bait windows. We'll set each experiment to have 10 \% of windows differentially accessible at a difference of approximately 2 fold.
16 | 
17 | The data `sim_counts` is a simulated data set with computer generated window counts for three replicates of each of two conditions in experiments with 500 bait and non-bait windows. We'll set each experiment to have 10 \% of windows differentially accessible at a difference of approximately 2 fold.
18 | }
19 | \details{
20 | Counts in bait windows for "control" samples  will be modelled as \eqn{C \sim NB(\mu = 30, size = 10\mu)}.
21 | 
22 | Counts in bait windows for "treatment" samples will be modelled as \eqn{C \cdot unif(0.8,1.2)}.
23 | 
24 | Differentially accessible bait windows will be modelled as \eqn{C_{1..50} \cdot \mathcal{N}( \mu=2,\sigma = \mu/2)}
25 | 
26 | Counts in bait windows for "control" samples  will be modelled as \eqn{C \sim NB(\mu = 30, size = 10\mu)}.
27 | 
28 | Counts in bait windows for "treatment" samples will be modelled as \eqn{C \cdot unif(0.8,1.2)}.
29 | 
30 | Differentially accessible bait windows will be modelled as \eqn{C_{1..50} \cdot \mathcal{N}( \mu=2,\sigma = \mu/2)}
31 | }
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/man/make_counts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading.R
 3 | \name{make_counts}
 4 | \alias{make_counts}
 5 | \title{load BAM files and calculate window coverage}
 6 | \usage{
 7 | make_counts(window_file, sample_treatment_file, width = 50,
 8 |   filter_params = make_params(), with_df = FALSE, is_rnaseq = FALSE,
 9 |   gene_id_col = "ID")
10 | }
11 | \arguments{
12 | \item{window_file}{A filename of a CSV file with the bait regions}
13 | 
14 | \item{sample_treatment_file}{A filename of a CSV file that lists treatments, samples and bam file paths}
15 | 
16 | \item{width}{an integer of the width of the bins the bait regions will be divided into}
17 | 
18 | \item{filter_params}{a params object from atacr::make_params()  that define how reads will be extracted from the BAM files. Optionally, for greater control, either a csaw::readParam() (for ATACseq) or Rsamtools::ScanBamParam() object for RNASeq can be provided. See http://bioconductor.org/packages/release/bioc/manuals/csaw/man/csaw.pdf or https://www.rdocumentation.org/packages/Rsamtools/versions/1.24.0/topics/ScanBamParam for details}
19 | 
20 | \item{with_df}{attach a dataframe version of the data Default = FALSE}
21 | 
22 | \item{is_rnaseq}{a boolean stating whether this is RNASeq data. Default = FALSE}
23 | 
24 | \item{gene_id_col}{a character string stating which attribute name to take from the final column of the GFF file to use for the window name in RNASeq data. Usually this is the name of the gene. Default = ID.}
25 | }
26 | \value{
27 | a list of metadata and RangedSummarizedExperiment objects with read count in windows for whole genome, bait windows and non-bait windows for each sample
28 | }
29 | \description{
30 | load BAM files and calculate window coverage
31 | }
32 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(as.data.frame,atacr)
 4 | S3method(as.matrix,atacr)
 5 | S3method(plot,atacr)
 6 | S3method(print,atacr)
 7 | S3method(summary,atacr)
 8 | export(as.DGEList)
 9 | export(calc_quantiles)
10 | export(chromosome_coverage)
11 | export(control_window_normalise)
12 | export(control_window_scaling_factors)
13 | export(count_windows_under_threshold)
14 | export(coverage_count_summary)
15 | export(coverage_summary)
16 | export(edgeR_exact)
17 | export(edgeR_multiclass)
18 | export(estimate_GoFs)
19 | export(estimate_bayes_factor)
20 | export(estimate_bayes_factor_multiclass)
21 | export(estimate_fdr)
22 | export(estimate_fdr_multiclass)
23 | export(extract_features_from_gff)
24 | export(find_controls_by_GoF)
25 | export(get_GoF_factors)
26 | export(get_expected_values)
27 | export(library_size_normalisation)
28 | export(library_size_scaling_factors)
29 | export(ma_data)
30 | export(ma_plot)
31 | export(make_UpSetR)
32 | export(make_counts)
33 | export(make_params)
34 | export(make_tutorial_data)
35 | export(median_virtual_experiment)
36 | export(normalise_by_window_width)
37 | export(observed_expected_bins)
38 | export(plot_GoF)
39 | export(plot_count_by_chromosome)
40 | export(plot_counts)
41 | export(qqarb)
42 | export(sample_correlation_plot)
43 | export(sample_kmeans_cluster)
44 | export(sample_pca_plot)
45 | export(scale_factor_normalise)
46 | export(simulate_counts)
47 | export(target_count_coverage)
48 | export(target_count_summary)
49 | export(text_to_gff)
50 | export(treatments)
51 | export(view_gene)
52 | export(windows_below_coverage_threshold_plot)
53 | importFrom(SummarizedExperiment,rbind)
54 | importFrom(graphics,hist)
55 | importFrom(magrittr,"%>%")
56 | importFrom(methods,as)
57 | importFrom(stats,cor)
58 | importFrom(stats,cor.test)
59 | importFrom(stats,kmeans)
60 | importFrom(stats,median)
61 | importFrom(stats,p.adjust)
62 | importFrom(stats,quantile)
63 | importFrom(stats,rlnorm)
64 | importFrom(stats,rnbinom)
65 | importFrom(stats,rnorm)
66 | importFrom(stats,rpois)
67 | importFrom(stats,runif)
68 | importFrom(stats,sd)
69 | importFrom(stats,start)
70 | importFrom(stats,t.test)
71 | importFrom(stats,window)
72 | importFrom(utils,capture.output)
73 | importFrom(utils,read.csv)
74 | importFrom(utils,str)
75 | 


--------------------------------------------------------------------------------
/vignettes/atacr_which.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "atacr objects and the which argument"
 3 | author: "Dan MacLean"
 4 | date: "`r Sys.Date()`"
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteIndexEntry{atacr which}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteEncoding{UTF-8}
10 | ---
11 | 
12 | ## The `atacr` Object
13 | 
14 | When `make_counts()` is run, an `atacr` object is returned. This is a simple, somewhat informal object based on the R list type. It is basically an R list with the following members:
15 | 
16 |   1. treatments - a character vector of treatment names
17 |   2. samples - a character vector of sample names
18 |   3. bam_files - a character vector of paths for the used BAM files
19 |   4. bait_regions - a `GenomicRanges::Granges` object describing the bait window regions 
20 |   5. bait_windows - a `RangedSummarizedExperiment` object containing the counts in the windows in `bait_regions`
21 |   6. non_bait_windows - a `RangedSummarizedExperiment` object containing the counts in the windows in the regoions outside `bait_regions`
22 |   7. whole_genome - the union of bait_windows and non_bait_windows
23 |   8. dataframe - an optional member and the result of calling `as.data.frame()` on the `atacr` object
24 |   
25 | ### Column Order
26 | 
27 | The `RangedSummarizedExperiment` objects carry the count data. They are organised as a matrix with rows representing windows and columns different samples. Their order is conserved and is the same as that in the `treatments`, `samples` and `bam_files`.
28 | 
29 | ## The 'which' argument
30 | 
31 | Many of the functions allow you to state which member of the `atacr` list (really a `RangedSummarizedExperiment`) you wish to apply the function to with the `which` argument, e.g
32 | 
33 | ```{r, eval=FALSE}
34 | plot_counts(counts, which = "bait_windows", log10 = FALSE)
35 | ```
36 | 
37 | ## Adding members to the `atacr` object
38 | 
39 | In this way you can use functions that return `RangedSummarizedExperiment`s to become new members in the list and work on them as with the built in ones, this is especially useful for normalisations.
40 | 
41 | ```{r, eval=FALSE}
42 | 
43 | counts$by_sample <- library_size_normalisation(counts, 
44 |                                              by_treatment = TRUE)
45 | 
46 | plot_counts(counts, which = "by_sample", log10 = FALSE)
47 | ```
48 | 
49 | 


--------------------------------------------------------------------------------
/vignettes/summaries.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Summaries"
 3 | author: "Dan MacLean"
 4 | date: "`r Sys.Date()`"
 5 | output:
 6 |   rmarkdown::html_vignette:
 7 |     fig_caption: yes
 8 | vignette: >
 9 |   %\VignetteIndexEntry{Summaries}
10 |   %\VignetteEngine{knitr::rmarkdown}
11 |   %\VignetteEncoding{UTF-8}
12 | ---
13 | 
14 | The `atacr` package provides functions for getting quick summaries of your data. An overview comes from `summary()`
15 | 
16 | ```{r, echo=FALSE, eval=TRUE}
17 | library(atacr)
18 | counts <- simulate_counts()
19 | ```
20 | 
21 | ```{r, echo=TRUE, eval=TRUE}
22 | summary(counts)
23 | ```
24 | 
25 | which shows the on and off target hit counts, the quantiles and the mean read depths.
26 | 
27 | The count distributions across the bait and non-bait windows by sample can be plotted quickly with `coverage_summary()`.
28 | 
29 | ```{r, echo=TRUE, eval=TRUE, fig.width=7}
30 | coverage_summary(counts)
31 | ```
32 | 
33 | ## Diagnostic plots 
34 | 
35 | It is possible to look coverage in a given data set and look at raw counts.
36 | 
37 | ```{r, fig.width=7}
38 | plot_counts(counts, which = "bait_windows", log10 = FALSE)
39 | ```
40 | 
41 | ### Low counts in windows
42 | 
43 | The number of windows below a threshold for each experiment can be seen with `windows_below_coverage_threshold_plot()`, and you can set the lower and upper bounds with the `to` and `from` arguments.
44 | 
45 | ```{r, echo=TRUE, eval=TRUE, fig.width=7}
46 | 
47 | windows_below_coverage_threshold_plot(counts, from = 5, to = 25)
48 | ```
49 | 
50 | 
51 | ### MA plots
52 | 
53 | MA plots of sample count versus all sample median count - to highlight odd looking experiments and extreme outliers - can be displayed with `ma_plot()`. By default this will use the `bait_windows` data, but you can set the `which` argument to use other subsets, e.g `non_bait_windows` 
54 | ```{r, fig.width=7}
55 | ma_plot(counts)
56 | ```
57 | 
58 | ### Per chromosome plots
59 | 
60 | These are bar charts of coverage at the windows across the chromosomes (`seqnames`) provided in the data.
61 | 
62 | ```{r, fig.width=7, fig.height=7, fig.cap="The simulated data here are spread randomly across the chromosome."}
63 | plot_count_by_chromosome(counts)
64 | ```
65 | 
66 | ### Sample comparison plots
67 | 
68 | A matrix of correlation between counts in the samples can be plot with the `sample_correlation_plot()` function. In this plot the colour and size scale of the dots represents the Pearson correlation coefficient. Pairwise comparisons with _p_ < 0.05 have a blank space.
69 | 
70 | ```{r, fig.width=7}
71 | sample_correlation_plot(counts)
72 | ```
73 | 
74 | A PCA plot that clusters the most simlar samples can also be generated using the `sample_pca_plot()` function.  
75 | 
76 | ```{r, fig.width=7}
77 | sample_pca_plot(counts)
78 | ```
79 | 


--------------------------------------------------------------------------------
/tests/testthat/test_atacr.R:
--------------------------------------------------------------------------------
 1 | Sys.setenv("R_TESTS" = "")
 2 | library(atacr)
 3 | 
 4 | 
 5 | context("summary and count functions")
 6 | 
 7 | test_that("target_count_summary() returns proper dataframe", {
 8 | 
 9 |   smry <- target_count_summary(sim_counts)
10 |   expect_is(smry, "data.frame") #right class
11 |   expect_vectors_equal(colnames(smry), c("sample","percent_on_target", "on_target", "off_target"))
12 | 
13 | })
14 | 
15 | test_that("coverage_count_summary() returns proper dataframe", {
16 | 
17 |   smry <- coverage_count_summary(sim_counts)
18 |   expect_is(smry, "data.frame")
19 |   expect_vectors_equal(colnames(smry), c("on_target", "off_target", "sample"))
20 | 
21 | })
22 | 
23 | test_that("target_count_coverage() returns proper dataframe", {
24 |   cov <- target_count_coverage(sim_counts)
25 |   expect_is(cov, "data.frame")
26 |   expect_vectors_equal(colnames(cov), c("sample", "target", "count_sum", "mean_coverage"))
27 | })
28 | 
29 | test_that("target_count_coverage() returns proper sized dataframe", {
30 |   cov <- target_count_coverage(sim_counts)
31 |   expect_length(cov$count_sum, 12)
32 |   expect_equal(nrow(cov[cov$target == "on_target",]), 6)
33 |   expect_equal(nrow(cov[cov$target == "off_target",]), 6)
34 | })
35 | 
36 | test_that("target_count_coverage() returns proper values in dataframe", {
37 |   cov <- target_count_coverage(sim_counts)
38 |   expect_vectors_equal(cov$count_sum,c(15001.00,15170.00,14976.00,16665.77,16755.63,16640.31,355.00,359.00,360.00,364.00,405.00,376.00))
39 | })
40 | 
41 | test_that("sample_kmeans() returns proper dataframe and proper values in the dataframe", {
42 |   k <- sample_kmeans_cluster(sim_counts)
43 |   expect_vectors_equal(colnames(k), c("cluster_id", "sample"))
44 |   expect_vectors_equal(k$cluster_id, c(1,1,1,2,2,2))
45 | })
46 | 
47 | test_that("count_windows_under_threshold() returns proper dataframe with proper values", {
48 |   th <- count_windows_under_threshold(sim_counts, threshold=15)
49 |   expect_vectors_equal(colnames(th), c("count", "threshold", "sample"))
50 |   expect_vectors_equal(th$count, c(0,0,0,4,4,4))
51 | })
52 | 
53 | test_that("calc_quantiles() returns list() when threshold == NULL", {
54 |   l <- calc_quantiles(sim_counts)
55 |   expect_is(l, "list")
56 |   expect_has_all_and_only_these_members(l, c("bait_windows", "non_bait_windows"))
57 | })
58 | 
59 | test_that("get_expected_values() returns right random numbers",{
60 |   set.seed(1234)
61 |   expect_vectors_equal(get_expected_values(c(1,2,3,4,1,2,3,4),dist="norm"),c(1.057280,2.831591,3.796155,-0.303645,3.012902,3.104852,1.813054,1.846650))
62 | })
63 | 
64 | test_that("observed_expected_bins() gives right values", {
65 |   l <- observed_expected_bins(c(1,2,3,4,1,2,3,4))
66 |   expect_has_all_and_only_these_members(l, c("observed", "expected"))
67 |   expect_vectors_equal(l$observed, c(8))
68 |   expect_vectors_equal(l$expected, c(8))
69 | })
70 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
 5 | [![Build Status](https://travis-ci.org/TeamMacLean/atacr.svg?branch=master)](https://travis-ci.org/TeamMacLean/atacr)
 6 | [![codecov](https://codecov.io/gh/TeamMacLean/atacr/branch/master/graph/badge.svg)](https://codecov.io/gh/TeamMacLean/atacr)
 7 |  
 8 | ---
 9 |  
10 | [![minimal R version](https://img.shields.io/badge/R%3E%3D-3.0.0-6666ff.svg)](https://cran.r-project.org/)
11 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/atacr)](https://cran.r-project.org/package=atacr)
12 | [![packageversion](https://img.shields.io/badge/Package%20version-0.4.14-orange.svg?style=flat-square)](commits/master)
13 |  
14 | ---
15 |  
16 | [![Last-changedate](https://img.shields.io/badge/last%20change-`r gsub('-', '--', Sys.Date())`-yellowgreen.svg)](/commits/master)
17 | 
18 | <!-- README.md is generated from README.Rmd. Please edit that file -->
19 | 
20 | ```{r, echo = FALSE, warnings=FALSE, message=FALSE}
21 | knitr::opts_chunk$set(
22 |   collapse = TRUE,
23 |   comment = "#>",
24 |   fig.path = "README-"
25 | )
26 | devtools::load_all("~/Desktop/atacr")
27 | 
28 | ```
29 | 
30 | # atacR
31 | 
32 | Helps with the analysis of count data from RNA-capture-seq and ATAC-capture-seq experiments. Using BioConductor RangedSummarizedExperiment objects, atacr implements a set of helper functions and quality control plots specific to the analysis of counts of reads in windows across genomes. Especially, atacr is useful for performing sample normalizations and for easily running bootstrap and Bayes factor tests for differentially accessible windows in common reference designs.
33 | 
34 | ## Installation
35 | 
36 | You can install atacR from github with:
37 | 
38 | ```{r gh-installation, eval = FALSE}
39 | # install.packages("devtools")
40 | devtools::install_github("TeamMacLean/atacr")
41 | ```
42 | 
43 | ## Documentation
44 | 
45 | You can read documentation on the following topics
46 | 
47 |   1. [Tutorial - A worked example](https://teammaclean.github.io/atacr)
48 |   2. [atacR - General Overview](https://teammaclean.github.io/atacr/atacr.html)
49 |   3. [Loading Data](https://teammaclean.github.io/atacr/loading.html)
50 |   3. [Summaries of Data](https://teammaclean.github.io/atacr/summaries.html)
51 |   4. [Normalising Data](https://teammaclean.github.io/atacr/normalisations.html)
52 |   5. [Differential Windows](https://teammaclean.github.io/atacr/differential_windows.html)
53 |   6. [Subsetting Data](https://teammaclean.github.io/atacr/atacr_which.html)
54 | 
55 | ## Quick start:
56 | 
57 | ```{r example, echo=TRUE, fig.height=7, fig.width=7}
58 | library(atacr)
59 | summary(sim_counts)
60 | ```
61 | 
62 | ```{r}
63 | plot(sim_counts)
64 | ```
65 | 


--------------------------------------------------------------------------------
/vignettes/atacr.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using atacr for Enriched RNAseq and ATACseq analysis"
  3 | author: "Dan MacLean"
  4 | date: "`r Sys.Date()`"
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{atacr}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding[utf8]{inputenc}
 10 | ---
 11 | 
 12 | _atacr_ is a package for creating statistics and diagnostic plots for short read sequence data from capture enriched RNAseq and ATACseq experiments.
 13 | 
 14 | This vignette provides a brief overview of the capabilities of `atacr`.
 15 | 
 16 | ## Sample data
 17 | 
 18 | > The function `simulate_counts()` will give us a small simulated data set of three replicates from a control and treatment. Each of the six sets of counts follows a mixed distribution of 10 counts drawn from a log-normal distribution with logmean 4 and SD 1, and 40 counts with logmean 10 and SD 1. This mimics the enrichment pattern we see with capture enriched data. 10 of the counts are multiplied by a value drawn from the normal distribution with mean 2 and SD 1 so can appear differentially expressed. These counts represent bait-windows - regions of the genome for which baits were designed. The bait-window counts are mixed with 50 non-bait-windows which have 0 counts.   
 19 | 
 20 | ```{r}
 21 | library(atacr)
 22 | counts <-simulate_counts()
 23 | ```
 24 | 
 25 | ## Experiment Summary Information
 26 | 
 27 | It's very easy to get information on the coverage for bait/non-bait windows on a per sample basis
 28 | 
 29 | ```{r, echo=TRUE, fig.height=7, fig.width=7}
 30 | summary(counts)
 31 | plot(counts)
 32 | ```
 33 | 
 34 | 
 35 | These plots can be generated individually with the following functions
 36 | 
 37 | ```{r, eval=FALSE, include=FALSE}
 38 | coverage_summary(counts)
 39 | chromosome_coverage(counts)
 40 | ```
 41 | 
 42 | ## QC Plots
 43 | 
 44 | ### Plot for coverage by sequence and sample
 45 | ```{r,  fig.height=7, fig.width=7}
 46 | plot_count_by_chromosome(counts)
 47 | ```
 48 | 
 49 | 
 50 | ### Correlations between sample counts
 51 | ```{r, fig.height=7, fig.width=7}
 52 | sample_correlation_plot(counts)
 53 | ```
 54 | 
 55 | 
 56 | 
 57 | ### Count windows below a threshold. 
 58 | ```{r, fig.height=7, fig.width=7}
 59 | windows_below_coverage_threshold_plot(counts, which = "bait_windows", from=0, to=1000)
 60 | ```
 61 | 
 62 | 
 63 | ### MA plots
 64 | ```{r, fig.height=7, fig.width=7}
 65 | ma_plot(counts)
 66 | ```
 67 | 
 68 | ## Normalisation
 69 | 
 70 | Normalisation strategies are easy to implement with `atacr` and helpful functions are included
 71 | 
 72 | ```{r, fig.height=7, fig.width=7}
 73 | counts$library_size_normalised <- library_size_normalisation(counts)
 74 | ma_plot(counts, which = "library_size_normalised")
 75 | ```
 76 | 
 77 | Normalisation by control windows. Requires a text file with the control window positions
 78 | 
 79 | ```{r, eval=FALSE}
 80 | window_file <- "control_windows.txt"
 81 | counts$control_window_normalisation <- control_window_normalise(sim_counts, window_file)
 82 | ```
 83 | 
 84 | ## Detect differentially expressed/accessible windows
 85 | 
 86 | Using a simple bootstrap _t_-test method for simple two-way comparisons.
 87 | ```{r, results = "asis" }
 88 | 
 89 | result <- estimate_fdr(sim_counts, "treatment", "control", which = "bait_windows")
 90 | 
 91 | pander::pandoc.table(head(result))
 92 | ```
 93 | This can also be done for multiclass designs with multiple samples against a common reference.
 94 | 
 95 | ```{r, results = "asis"}
 96 | multi_result <- estimate_fdr_multiclass(sim_counts, "control", which = "bait_windows")
 97 | pander::pandoc.table(head(multi_result))
 98 | ```
 99 | 
100 | 


--------------------------------------------------------------------------------
/R/sims.R:
--------------------------------------------------------------------------------
  1 | #' simulate counts and return an atacr object
  2 | #' @export
  3 | simulate_counts <- function() {
  4 |   # Each of the six sets of counts follows a mixed distribution of 10 counts drawn from a log-normal distribution with logmean 4 and SD 1, and 40 counts with logmean 10 and SD 1. This mimics the enrichment pattern we see with capture enriched data. 10 of the counts are multiplied by a value drawn from the normal distribution with mean 2 and SD 1 so can appear differentially expressed. These counts represent bait-windows - regions of the genome for which baits were designed and selected.
  5 |   num_windows = 100 #50 bait windows, 50 non bait windows
  6 |   reps = 3
  7 | 
  8 | 
  9 |   col_data <- S4Vectors::DataFrame(Treatment = c(rep("control", reps), rep("treatment", reps)),
 10 |     col.names = c(
 11 |       sprintf("control_%03d", 1:reps),
 12 |       sprintf("treatment_%03d", 1:reps)
 13 |     ))
 14 | 
 15 | 
 16 |   row_ranges <- GenomicRanges::GRanges(
 17 |     rep("synth_chrom", num_windows),
 18 |     IRanges::IRanges(seq(1, (num_windows * 50) , by = 50), width = 50),
 19 |     strand = sample(c("+", "-"), num_windows, TRUE),
 20 |     feature_id = sprintf("window_%06d", 1:num_windows)
 21 |   )
 22 | 
 23 |   names(row_ranges) <- sprintf("window_%06d", 1:num_windows)
 24 | 
 25 |   a <-
 26 |     floor(c(
 27 |       rlnorm(10, meanlog = 4, sdlog = 1),
 28 |       rlnorm(40, meanlog = 10, sdlog = 1)
 29 |     )) #basic two peak dist
 30 |   b <- floor(a * abs(rnorm(50, 1, sd = 1)))
 31 |   c <- floor(a * abs(rnorm(50, 1, sd = 1)))
 32 |   d <-
 33 |     floor(a * abs(c(rnorm(10, 2, sd = 1), rnorm(40, 1, sd = 1))))
 34 |   e <-
 35 |     floor(a * abs(c(rnorm(10, 2, sd = 1), rnorm(40, 1, sd = 1))))
 36 |   f <-
 37 |     floor(a * abs(c(rnorm(10, 2, sd = 1), rnorm(40, 1, sd = 1))))
 38 | 
 39 |   blank <- rep(0, 50)
 40 |   a <- c(a, blank)
 41 |   b <- c(b, blank)
 42 |   c <- c(c, blank)
 43 |   d <- c(d, blank)
 44 |   e <- c(e, blank)
 45 |   f <- c(f, blank)
 46 | 
 47 |   counts <-
 48 |     data.frame(
 49 |       control_001 = a,
 50 |       control_002 = b,
 51 |       control_003 = c,
 52 |       treatment_001 = d,
 53 |       treatment_002 = e,
 54 |       treatment_003 = f
 55 |     )
 56 | 
 57 |   counts <- as.matrix(counts[sample(nrow(counts)), ])
 58 |   row.names(counts) <- sprintf("window_%06d", 1:num_windows)
 59 | 
 60 |   se <- SummarizedExperiment::SummarizedExperiment(
 61 |     assays = list(counts = counts),
 62 |     rowRanges = row_ranges,
 63 |     colData = col_data
 64 |   )
 65 | 
 66 |   r <- list()
 67 |   class(r) <- c("atacr", "list")
 68 |   r$whole_genome <- se
 69 |   r$treatments <- c(rep("control", 3), rep("treatment", 3))
 70 |   r$sample_names <-
 71 |     c(sprintf("control_%03d", 1:3),
 72 |       sprintf("treatment_%03d", 1:3))
 73 |   r$bam_files <- "no.bam"
 74 | 
 75 |   bw <- which(counts[, 'control_001'] > 0)
 76 | 
 77 |   start_pos <- (bw - 1) * 50
 78 | 
 79 |   end_pos <- start_pos + 49
 80 |   seq_names <-  rep("synth_chrom", length(bw))
 81 |   r$bait_regions <- GenomicRanges::GRanges(
 82 |     seqnames = S4Vectors::Rle(seq_names),
 83 |     ranges = IRanges::IRanges(
 84 |       start_pos,
 85 |       end = end_pos,
 86 |       names = sprintf("bait_%02d", 1:length(bw))
 87 |     )
 88 |   )
 89 | 
 90 | 
 91 |   r$bait_windows <- r$whole_genome[bw,]
 92 |   r$non_bait_windows <- r$whole_genome[!bw,]
 93 |   r$whole_genome@rowRanges@ranges@NAMES <-
 94 |     as.character(r$whole_genome@rowRanges)
 95 |   r$bait_windows@rowRanges@ranges@NAMES <-
 96 |     as.character(r$bait_windows@rowRanges)
 97 |   r$non_bait_windows@rowRanges@ranges@NAMES <-
 98 |     as.character(r$non_bait_windows@rowRanges)
 99 |   r$dataframe <- as.data.frame(r)
100 |   return(r)
101 | }
102 | 


--------------------------------------------------------------------------------
/R/methods.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | meta_summary <- function(atcr) {
  4 |   samples = paste(unique(atcr$sample_names), collapse = ",")
  5 |   treatments = paste(unique(atcr$treatments), collapse = ",")
  6 |   sample_count = length(unique(atcr$sample_names))
  7 |   treat_count  = length(unique(atcr$treatments))
  8 |   return(
  9 |     cat(
 10 |       "ATAC-seq experiment of",
 11 |       treat_count,
 12 |       "treatments in",
 13 |       sample_count,
 14 |       "samples\n",
 15 |       "Treatments:",
 16 |       treatments,
 17 |       "\n",
 18 |       "Samples:",
 19 |       samples,
 20 |       "\n",
 21 |       "Bait regions used:",
 22 |       length(atcr$bait_regions),
 23 |       "\n",
 24 |       "Total Windows:",
 25 |       length(atcr$whole_genome) ,
 26 |       "\n"
 27 | 
 28 |     )
 29 |   )
 30 | }
 31 | 
 32 | #' writes a summary of the metadata for a given atacr object
 33 | #' @export
 34 | #' @param x an atacr object
 35 | #' @param \dots other options for print generic
 36 | print.atacr <- function(x, ...) {
 37 |   meta_summary(x)
 38 |   invisible(x)
 39 | }
 40 | 
 41 | #' writes a detailed data summary of the atacr object
 42 | #' @export
 43 | #' @param object an atacr object
 44 | #' @param \dots other options for summary generic
 45 | summary.atacr <- function(object, ...) {
 46 |   atcr <- object
 47 |   meta <- meta_summary(atcr)
 48 |   on_target <-
 49 |     paste(capture.output(target_count_summary(atcr)), collapse = "\n")
 50 |   coverage <-
 51 |     paste(capture.output(coverage_count_summary(atcr)), collapse = "\n")
 52 |   quantiles <-
 53 |     paste(capture.output(calc_quantiles(atcr)), collapse = "\n")
 54 |   return(
 55 |     cat(
 56 |       meta,
 57 |       "\n",
 58 |       "On/Off target read counts:\n",
 59 |       on_target,
 60 |       "\n",
 61 |       "Quantiles:",
 62 |       "\n",
 63 |       quantiles,
 64 |       "\n",
 65 |       "Read depths:\n",
 66 |       coverage
 67 |     )
 68 |   )
 69 | 
 70 | }
 71 | #' returns given subset of data in atacr object as a matrix
 72 | #' @export
 73 | #' @param x an atacr object
 74 | #' @param \dots other options for generic
 75 | #' @param which the subset of data to work on
 76 | #' @return matrix of counts in subset
 77 | as.matrix.atacr <- function(x, ..., which = "bait_windows") {
 78 |   atcr <- x
 79 |   return(SummarizedExperiment::assay(atcr[[which]]))
 80 | }
 81 | 
 82 | #' returns dataframe of data in atacr object
 83 | #' @export
 84 | #' @param x object to print
 85 | #' @param \dots other options for generic
 86 | #' @return dataframe
 87 | as.data.frame.atacr <- function(x, ...) {
 88 |   atcr <- x
 89 |   if (is.null(atcr[["dataframe"]])) {
 90 |     bw <- as.matrix.atacr(atcr, which = "bait_windows")
 91 |     nbw <- as.matrix.atacr(atcr, which = "non_bait_windows")
 92 |     bw_df <- reshape2::melt(bw)
 93 |     colnames(bw_df) <- c("name", "sample", "count")
 94 |     bw_df$window_type <- factor(rep("bait_windows", nrow(bw_df)))
 95 |     nbw_df <- reshape2::melt(nbw)
 96 |     colnames(nbw_df) <- c("name", "sample", "count")
 97 |     nbw_df$window_type <-
 98 |       factor(rep("non_bait_windows", nrow(nbw_df)))
 99 |     df <- rbind(bw_df, nbw_df)
100 |     df$name <- stringr::str_replace(df$name, "-$", "minus")
101 |     df$name <- stringr::str_replace(df$name, "\\+$", "plus")
102 |     name <- NULL #deal with NSE of devtools::check()
103 |     df <-
104 |       tidyr::separate(df, name, c("chromosome", "start", "stop", "strand"), sep =
105 |           '[-:]')
106 |     df$start <- as.integer(df$start)
107 |     df$stop <- as.integer(df$stop)
108 |     df$chromosome <- factor(df$chromosome)
109 |     atcr[["dataframe"]] <- df
110 |     return(df)
111 |   }
112 |   else{
113 |     return(atcr[["dataframe"]])
114 |   }
115 | 
116 | }
117 | 
118 | #' returns summary plot of data in atacr object
119 | #' @method plot atacr
120 | #' @export
121 | #' @param x atacr object
122 | #' @param \dots extra options for generic
123 | #' @return gridExtra plot
124 | plot.atacr <- function(x, ...) {
125 |   atcr <- x
126 |   #histogram of coverages by sample and window type
127 |   p1 <- coverage_summary(atcr)
128 | 
129 |   #density of coverage by chromosome region, bait windows
130 |   p2 <-  chromosome_coverage(atcr)
131 | 
132 |   return(gridExtra::grid.arrange(p1, p2, nrow = 2))
133 | 
134 | }
135 | 


--------------------------------------------------------------------------------
/tests/testthat/test_loading.R:
--------------------------------------------------------------------------------
  1 | library(atacr)
  2 | Sys.setenv("R_TESTS" = "")
  3 | 
  4 | context("loading BAM files")
  5 | 
  6 | test_that("get_bait_regions_from_text() gets correct bait regions", {
  7 |   regions <- get_bait_regions_from_text('individual_bait_regions.txt')
  8 | 
  9 |   expect_is(regions, "GRanges") #right class
 10 |   expect_that(levels(regions@seqnames), equals(c("Chr1", "Chr2", "Chr3", "Chr4", "Chr5"))) #right seqnames
 11 |   expect_that(regions[1]@ranges@NAMES, equals("AT1G01680_1")) #right first name
 12 |   expect_that(regions[1]@ranges@start, equals(249021)) #right start
 13 |   expect_that(regions[1]@ranges@width, equals(120)) #right calculated width
 14 |   expect_that(length(regions), equals(2219)) #right number of regions
 15 | 
 16 | })
 17 | 
 18 | 
 19 | 
 20 | all_atac <-   make_counts('individual_bait_regions.gff',
 21 |   'sample_treatment_bam_mappings_for_test.csv',
 22 |   filter_params = make_params(paired_map = FALSE, minq=1, dedup = F))
 23 | 
 24 | filtered_atac <-
 25 |   make_counts('individual_bait_regions.gff',
 26 |     'sample_treatment_bam_mappings_for_test.csv')
 27 | 
 28 | filtered_rnaseq <-
 29 |   make_counts('bait_genes.gff',
 30 |     'sample_treatment_bam_mappings_for_test.csv',
 31 |     is_rnaseq = TRUE
 32 |     )
 33 | 
 34 | all_rnaseq <-
 35 |   make_counts('bait_genes.gff',
 36 |     'sample_treatment_bam_mappings_for_test.csv',
 37 |     is_rnaseq = TRUE,
 38 |     filter_params = NULL
 39 |   )
 40 | 
 41 | 
 42 | 
 43 | test_that("when loading RNASeq, genome subsections are RangedSummarizedExperiments",
 44 |   {
 45 |     expect_is(all_rnaseq$whole_genome, "RangedSummarizedExperiment")
 46 |     expect_is(all_rnaseq$bait_windows, "RangedSummarizedExperiment")
 47 |     expect_is(all_rnaseq$non_bait_windows,
 48 |       "RangedSummarizedExperiment")
 49 | 
 50 |   })
 51 | 
 52 | test_that("when loading ATACSeq, genome subsections are RangedSummarizedExperiments",
 53 |   {
 54 |     expect_is(all_atac$whole_genome, "RangedSummarizedExperiment")
 55 |     expect_is(all_atac$bait_windows, "RangedSummarizedExperiment")
 56 |     expect_is(all_atac$non_bait_windows, "RangedSummarizedExperiment")
 57 | 
 58 |   })
 59 | 
 60 | test_that("when loading bam files for ATACSeq, region names load correctly", {
 61 |   # check that first range in each set of windows (genome, bait, none_baits) has right rownames - presumably is parsed correctly... this tests whether the windows are loaded correctly
 62 | 
 63 |   expect_that(names(all_atac$whole_genome)[1], equals("Chr1:1-50"))
 64 |   expect_that(names(all_atac$non_bait_windows)[1], equals("Chr1:1-50"))
 65 |   expect_that(names(all_atac$bait_windows)[1],
 66 |     equals("Chr1:245951-246000"))
 67 | 
 68 | })
 69 | 
 70 | test_that("when loading BAM files for RNAseq, region names are computed correctly",
 71 |   {
 72 |     expect_that(names(all_rnaseq$non_bait_windows)[1],
 73 |       equals("Chr1:1-246000"))
 74 |     expect_that(names(all_rnaseq$non_bait_windows)[2],
 75 |       equals("Chr1:246201-246700"))
 76 |     expect_that(names(all_rnaseq$bait_windows)[1], equals("FakeGeneA"))
 77 |     expect_that(names(all_rnaseq$bait_windows)[2], equals("FakeGeneB"))
 78 | 
 79 |   })
 80 | 
 81 | test_that("when loading BAM files for RNAseq, poor reads are filtered properly", {
 82 | 
 83 |   expect_that(unname(SummarizedExperiment::assay(filtered_rnaseq$bait_windows)["FakeGeneA",]), equals(c(0,4,3,70)))
 84 |   expect_that(unname(SummarizedExperiment::assay(filtered_rnaseq$bait_windows)["FakeGeneB",]), equals( c(3,7,7,186)))
 85 | 
 86 | })
 87 | 
 88 | test_that("when loading BAM files for ATACseq, poor reads are filtered properly", {
 89 | 
 90 |   expect_that(unname(SummarizedExperiment::assay(filtered_atac$bait_windows)["Chr1:246251-246300",]), equals(c(0,1,1,8)))
 91 | 
 92 | })
 93 | 
 94 | p <- make_csaw_params(make_params())
 95 | 
 96 | test_that("make_csaw_params() returns properly populated object", {
 97 |   expect_is(p, "readParam")
 98 |   expect_that(unname(p@pe), equals(c("both")))
 99 |   expect_that(p@max.frag, equals(500))
100 |   expect_that(p@dedup, equals(TRUE))
101 |   expect_that(p@minq, equals(30))
102 |   expect_that(p@forward, equals(NA))
103 | })
104 | 
105 | q <- make_scanBamParam(make_params(), filtered_rnaseq$bam_files[1])
106 | test_that("make_scanBamParam() returns properly populated object", {
107 | 
108 |   expect_is(q, "ScanBamParam")
109 |   expect_that(unname(q@flag), equals(c(2045,1023)))
110 |   expect_that(q@simpleCigar, equals(FALSE))
111 |   expect_that(q@reverseComplement, equals(FALSE))
112 |   expect_that(q@mapqFilter, equals(30))
113 | })
114 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
  2 | [![Build Status](https://travis-ci.org/TeamMacLean/atacr.svg?branch=master)](https://travis-ci.org/TeamMacLean/atacr)
  3 | [![codecov](https://codecov.io/gh/TeamMacLean/atacr/branch/master/graph/badge.svg)](https://codecov.io/gh/TeamMacLean/atacr)
  4 |  
  5 | -----------------------------------------
  6 |  
  7 | [![minimal R version](https://img.shields.io/badge/R%3E%3D-3.0.0-6666ff.svg)](https://cran.r-project.org/)
  8 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/atacr)](https://cran.r-project.org/package=atacr)
  9 | ![package_version](https://img.shields.io/badge/Package%20version-0.4.14-orange.svg?style=flat-square)
 10 | 
 11 | 
 12 | ---------------------------------------
 13 |  
 14 | ![Last-changedate](https://img.shields.io/badge/last%20change-"2018--05--15"-yellowgreen.svg)
 15 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 16 | 
 17 | atacr
 18 | =====
 19 | 
 20 | Helps with the analysis of count data from RNA-capture-seq and ATAC-capture-seq experiments. Using BioConductor RangedSummarizedExperiment objects, atacr implements a set of helper functions and quality control plots specific to the analysis of counts of reads in windows across genomes. Especially, atacr is useful for performing sample normalizations and for easily running bootstrap and Bayes factor tests for differentially accessible windows in common reference designs.
 21 | 
 22 | Installation
 23 | ------------
 24 | 
 25 | You can install atacr from github with:
 26 | 
 27 | ``` r
 28 | # install.packages("devtools")
 29 | devtools::install_github("TeamMacLean/atacr")
 30 | ```
 31 | 
 32 | Documentation
 33 | --------------
 34 | 
 35 | You can read documentation on the following topics
 36 | 
 37 |   1. [Tutorial - A worked example](https://teammaclean.github.io/atacr)
 38 |   2. [atacR - General Overview](https://teammaclean.github.io/atacr/atacr.html)
 39 |   3. [Loading Data](https://teammaclean.github.io/atacr/loading.html)
 40 |   3. [Summaries of Data](https://teammaclean.github.io/atacr/summaries.html)
 41 |   4. [Normalising Data](https://teammaclean.github.io/atacr/normalisations.html)
 42 |   5. [Differential Windows](https://teammaclean.github.io/atacr/differential_windows.html)
 43 |   6. [Subsetting Data](https://teammaclean.github.io/atacr/atacr_which.html)
 44 | 
 45 | Quick start:
 46 | ------------
 47 | 
 48 | ``` r
 49 | library(atacr)
 50 | summary(sim_counts)
 51 | #> ATAC-seq experiment of 2 treatments in 6 samples
 52 | #>  Treatments: control,treatment 
 53 | #>  Samples: control_001,control_002,control_003,treatment_001,treatment_002,treatment_003 
 54 | #>  Bait regions used: 500 
 55 | #>  Total Windows: 1000 
 56 | #>  
 57 | #>  On/Off target read counts:
 58 | #>           sample off_target on_target percent_on_target
 59 | #> 1   control_001        312     15160          97.98345
 60 | #> 2   control_002        347     14777          97.70563
 61 | #> 3   control_003        339     15115          97.80639
 62 | #> 4 treatment_001        321     16955          98.14193
 63 | #> 5 treatment_002        346     16490          97.94488
 64 | #> 6 treatment_003        335     17064          98.07460 
 65 | #>  Quantiles: 
 66 | #>  $bait_windows
 67 | #>     control_001 control_002 control_003 treatment_001 treatment_002
 68 | #> 1%        19.99       16.99          19         16.99         16.00
 69 | #> 5%        22.00       20.00          22         20.00         19.00
 70 | #> 95%       40.00       40.00          39         63.00         65.05
 71 | #> 99%       45.00       46.00          44        109.00         89.03
 72 | #>     treatment_003
 73 | #> 1%          16.00
 74 | #> 5%          21.00
 75 | #> 95%         61.00
 76 | #> 99%        109.06
 77 | #> 
 78 | #> $non_bait_windows
 79 | #>     control_001 control_002 control_003 treatment_001 treatment_002
 80 | #> 1%            0           0        0.00             0          0.00
 81 | #> 5%            0           0        0.00             0          0.00
 82 | #> 95%           3           4        3.05             3          3.05
 83 | #> 99%           4           4        4.00             4          4.00
 84 | #>     treatment_003
 85 | #> 1%              0
 86 | #> 5%              0
 87 | #> 95%             3
 88 | #> 99%             4
 89 | #>  
 90 | #>  Read depths:
 91 | #>           sample off_target on_target
 92 | #> 1   control_001      0.624    30.320
 93 | #> 2   control_002      0.694    29.554
 94 | #> 3   control_003      0.678    30.230
 95 | #> 4 treatment_001      0.642    33.910
 96 | #> 5 treatment_002      0.692    32.980
 97 | #> 6 treatment_003      0.670    34.128
 98 | ```
 99 | 
100 | ``` r
101 | plot(sim_counts)
102 | #> Picking joint bandwidth of 0.0243
103 | #> Picking joint bandwidth of 0.0582
104 | ```
105 | 
106 | ![](README-unnamed-chunk-2-1.png)
107 | 


--------------------------------------------------------------------------------
/vignettes/normalisations.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Normalisations"
 3 | author: "Dan MacLean"
 4 | date: "`r Sys.Date()`"
 5 | output: 
 6 |   rmarkdown::html_vignette:
 7 |     fig_caption: yes
 8 | vignette: >
 9 |   %\VignetteIndexEntry{Normalisations}
10 |   %\VignetteEngine{knitr::rmarkdown}
11 |   %\VignetteEncoding{UTF-8}
12 | ---
13 | 
14 | Normalisations help make the count estimates more easily comparable between experiments. `atacr` provides a few options for this.
15 | 
16 | ## Library size normalisation
17 | 
18 | This is the simplest, but probably the least useful normalisation. The total counts are scaled such that each sample has a similar total count to account for different sequencing depths. This procedure can be done in one step with `library_size_normalisation()`
19 | 
20 | ```{r, echo=FALSE, eval=TRUE}
21 | library(atacr)
22 | counts <- simulate_counts()
23 | ```
24 | 
25 | ```{r}
26 | normalised_counts <- library_size_normalisation(counts)
27 | ```
28 | 
29 | The `by_treatment` option will group the samples into different treatments and normalise each separately. This method assumes that within treatment groups the samples should show little difference, but between sample treatment groups could show lots of difference and prevents the treatment structure affecting the wider experiment. 
30 | 
31 | ```{r,eval=FALSE}
32 | by_sample_normalised_counts <- library_size_normalisation(counts, 
33 |                                              by_treatment = TRUE)
34 | ```
35 | 
36 | ## Control window normalisation
37 | 
38 | This option allows you to perform a scaling of the data based on user-specified control regions, usually these will be genomic windows corresponding to baits from control genes/regions. A one-step option is to provide these control window locations to `control_window_normalise()` in a separate file.
39 | 
40 | ```{r, eval=FALSE}
41 | control_window_normalised_counts <- control_window_normalise(counts, "my_controls.csv")
42 | ```
43 | 
44 | The control window file should be a simple `.csv` file with header and columns `seq_name,start_pos,end_pos,bait_name`.
45 | 
46 | 
47 | ## Finding internal scaling factors
48 | 
49 | A better way to normalise will often be to find the least variable windows in your sample and scale by those. `atacr` provides a method for doing this by `goodness of fit` as described previously in [Li et al, 2012](https://academic.oup.com/biostatistics/article/13/3/523/248016/Normalization-testing-and-false-discovery-rate) and [on Harold Pimentel's blog](https://haroldpimentel.wordpress.com/2014/12/08/in-rna-seq-2-2-between-sample-normalization/).
50 | 
51 | Essentially, Goodness of Fit (GoF) is a method of estimating variability over samples for each window. Each window gets a GoF, the lower it is, the lower the variability. These should then be the best ones to use as controls for scaling. The vector of normalisation factors for each sample can be obtained using `get_GoF_factors()`
52 | 
53 | ```{r}
54 | gof_norm_factors <- get_GoF_factors(counts)
55 | gof_norm_factors
56 | ```
57 | 
58 | ## Applying scaling factors 
59 | 
60 | If you have a set of scaling factors from `get_GoF_factors()` or some other package or function, then it is possible to apply them to the data using the `scale_factor_normalise()` function.
61 | 
62 | ```{r, fig.width=7 }
63 | gof_normalised_counts <- scale_factor_normalise(counts, 
64 |                                           scaling_factors = gof_norm_factors)
65 | 
66 | ## You can add the normalised counts to a slot on the original object
67 | counts$normalised_counts <- gof_normalised_counts
68 | 
69 | plot_counts(counts, which = "normalised_counts")
70 | ```
71 | 
72 | ## Comparing sets of potential control windows
73 | 
74 | To allow comparison the GoF metric of different sets of windows (e.g those determined by `get_GoF_factors()` or your own list) we can plot the distribution of 'control' windows against the rest using `plot_GoF()`, we just need a vector of names of windows to use as controls.  
75 | 
76 | The `atacr` function `find_controls_by_GoF()` is useful here, it returns a vector of window names used by the normalisation that can be plugged into the plot. Alternatively, a character vector of your own  
77 | 
78 | ```{r, fig.width=7}
79 | auto_controls <- find_controls_by_GoF(counts)
80 | head(auto_controls)
81 | 
82 | plot_GoF(counts, controls = auto_controls)
83 | 
84 | ```
85 | 
86 | ## Further normalisations by window size and other factors
87 | 
88 | The normalisations described here are not sensitive to factors such as window size and the counts from them may need to be corrected further, especially for RNAseq data with different window sizes. There are many packages in the Bioconductor libraries and on CRAN that can be used for this, check out the [edgeR](http://bioconductor.org/packages/release/bioc/html/edgeR.html), [DESeq](https://bioconductor.org/packages/release/bioc/html/DESeq.html) and [csaw](https://bioconductor.org/packages/release/bioc/html/csaw.html) packages among others.
89 | 
90 | 
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/vignettes/differential_windows.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Differentially accessible or expressed windows"
  3 | author: "Dan MacLean"
  4 | date: "`r Sys.Date()`"
  5 | output: 
  6 |   rmarkdown::html_vignette:
  7 |     fig_caption: yes
  8 | vignette: >
  9 |   %\VignetteIndexEntry{Differential Windows}
 10 |   %\VignetteEngine{knitr::rmarkdown}
 11 |   %\VignetteEncoding{UTF-8}
 12 | ---
 13 | ```{r echo = FALSE}
 14 | knitr::opts_chunk$set(
 15 |   message = FALSE,
 16 |   warning = FALSE
 17 | )
 18 | ```
 19 | 
 20 | Finding windows that correspond to differentially expressed or accessible windows is possible with two related functions in `atacr` - `estimate_fdr()` which implements bootstrap _t_-tests, via the boot package and `estimate_bayes_factor()` which implements a Bayes factor ANOVA using the BayesFactor package.  A tidy dataframe of results is returned in each case.
 21 | 
 22 | ```{r, echo=FALSE, eval=TRUE}
 23 | library(atacr)
 24 | normalized_counts <- simulate_counts()
 25 | result <- estimate_fdr(normalized_counts,
 26 |              treatment_a =  "treatment",
 27 |               treatment_b = "control")
 28 | ```
 29 | 
 30 | ### Bootstrap  _t_-tests 
 31 | 
 32 | For simple comparison of two treatments with bootstrap _t_ tests, provide treatment 'a' and 'b' names and the number of bootstrap iterations (default is 10, which is fast for testing code, but useless analytically). You can set the threshold for marking as significant with `fdr_level`.
 33 | 
 34 | ```{r, echo=TRUE, eval=FALSE}
 35 |  result <- estimate_fdr(normalized_counts,
 36 |               treatment_a =  "treatment",
 37 |               treatment_b = "control",
 38 |               iterations = 100000,
 39 |               fdr_level = 0.01)
 40 | ```
 41 | ```{r, echo=FALSE, eval=TRUE}
 42 | head(result)
 43 | ```
 44 | 
 45 | The output has columns as follows:
 46 | 
 47 |   * `window` - the name of the window with data on this row
 48 |   * `t` - the value of the _t_ statistic for the first (non-bootstrap) iteration
 49 |   * `p_value` - the computed _p_ value for the window
 50 |   * `fdr` - the false detection rate at this window
 51 |   * `mean_count_a` - the mean count for treatment 'a'
 52 |   * `mean_count_b` - the mean count for treatment 'b'
 53 |   * `sd_a` - standard deviation for treatment 'a'
 54 |   * `sd_b` - standard deviation for treatment 'b'
 55 |   * `log2_fc` - log 2 of the ratio of the mean counts
 56 |   * `is_sig` - flag showing whether window was significant according to the level set in the function with parameter `fdr_level`
 57 |   
 58 |   
 59 | To analyse all treatments against a common comparison at once you can use the wrapper function `estimate_fdr_multiclass()` which requires the name of the common comparison treatment 
 60 | 
 61 | 
 62 | 
 63 | ```{r, echo=TRUE, eval=FALSE}
 64 | multi_result <-  estimate_fdr_multiclass(normalized_counts,
 65 |               common_control = "control",
 66 |               iterations = 100000,
 67 |               fdr_level = 0.01)
 68 | 
 69 | head(multi_result)
 70 | ```
 71 | 
 72 | ```{r, echo=FALSE, eval=TRUE}
 73 | multi_result <-  estimate_fdr_multiclass(normalized_counts,
 74 |               common_control = "control")
 75 | head(multi_result)
 76 | ```
 77 | 
 78 | The results here has two extra columns:
 79 | 
 80 |   * a - the name of the treatment 
 81 |   * b - the name of the common control
 82 |   
 83 | ### Bayes Factor Analysis
 84 | 
 85 | A similar pair of functions is available for Bayes factor analysis. `estimate_bayes_factor()` for the two-way comparison. The `factor` argument sets the Bayes factor at which to mark the window as having different counts. 
 86 | 
 87 | ```{r}
 88 | result_bf <-  estimate_bayes_factor(normalized_counts,
 89 |                            treatment_a =  "treatment",
 90 |                            treatment_b = "control",
 91 |                                 factor = 2.0)
 92 | 
 93 | head(result_bf)
 94 | ```
 95 | 
 96 | 
 97 | Again, a `estimate_bayes_factor_multiclass()` function works for all comparisons to a common control.
 98 | 
 99 | The results data frame is similar to that from the Bootstrap _t_ methods, with a `factor` column in place of the `t` and `fdr` columns.
100 | 
101 | ### EdgeR analysis
102 | 
103 | The single comparison edgeR analysis returns a dataframe similar to the above methods.
104 | 
105 | In all the runs of edgeR the `estimateDisp()` function is used. This means that the `edgeR_exact()` methods will be increasingly less useful as a greater proportion of windows show differential counts. edgeR is the most powerful method when only a few genes are showing differential counts, use the other methods in other cases.
106 | 
107 | You can tell edgeR to ignore data with zero counts in all samples using `remove_zeros`
108 | 
109 | ```{r}
110 | result_edger <-  edgeR_exact(normalized_counts,
111 |                            treatment_a =  "treatment",
112 |                            treatment_b = "control",
113 |                            remove_zeros = TRUE)
114 | 
115 | head(result_edger)
116 | ```
117 | 
118 | The edgeR multiclass variant, `edgeR_multiclass()` also uses the `estimateDisp()` function in all cases. The `edgeR_multiclass()` function does not return a dataframe, instead it returns the native `DGELRT` objects (see [the DGELRT manual](https://www.rdocumentation.org/packages/edgeR/versions/3.14.0/topics/DGELRT-class) for more information) from each comparison in a `list()` object with names as per the treatment used.
119 | 
120 | ```{r}
121 | edgeR_multiclass(normalized_counts,"mock", 
122 |   remove_zeros = TRUE)
123 | ```
124 | 


--------------------------------------------------------------------------------
/vignettes/loading.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Loading data"
  3 | author: "Dan MacLean"
  4 | date: "`r Sys.Date()`"
  5 | output: 
  6 |   rmarkdown::html_vignette:
  7 |     fig_caption: yes
  8 | vignette: >
  9 |   %\VignetteIndexEntry{Loading Data}
 10 |   %\VignetteEngine{knitr::rmarkdown}
 11 |   %\VignetteEncoding{UTF-8}
 12 | ---
 13 | 
 14 | 
 15 | All counts are computed from sorted, indexed BAM files using the `make_counts()` function. This function requires two files:
 16 | 
 17 |     1. A GFF [1] file of bait regions on the genome
 18 |     2. A csv file showing the sample -> treatment -> bam file mappings for the experiment. 
 19 |     
 20 | The mapping file has the following structure:
 21 | 
 22 | "sample_name", | "bam_file_path", | "treatment"
 23 | ---------------|------------------|------------
 24 | "control_001", | "data/control1/aligned_merged_sorted.bam" | "control"
 25 | "control_002", | "data/control2/aligned_merged_sorted.bam" | "control"
 26 | "control_003", | "data/control3/aligned_merged_sorted.bam" | "control"
 27 | "treatment_001", | "data/treatment1/aligned_merged_sorted.bam" | "treatment"
 28 | "treatment_002", | "data/treatment2/aligned_merged_sorted.bam" | "treatment"
 29 | "treatment_003", | "data/treatment3/aligned_merged_sorted.bam" | "treatment"
 30 | 
 31 | The BAM indices (`.bai` files) are presumed to be with the BAM files.
 32 | 
 33 | ## Differences between ATACseq and RNAseq data within `atacr`. 
 34 | 
 35 | As far as `atacr` is concerned, ATACseq data is counted into equal sized windows within the bait windows - so that you end up with many more regions with counts, than you have baits. This behaviour means you can find regions of smaller than bait size that are differentially accessible. Conversely, RNAseq data is counted into one window per region declared in the GFF file, so you get just one expression estimate per gene/transcript. 
 36 | 
 37 | ## Loading ATACseq data
 38 | 
 39 | ATACseq is the default data type expected in `atacr`. The `make_counts()` call is the simplest in this case.
 40 | 
 41 | ```{r, echo=TRUE, eval=FALSE}
 42 | counts <- make_counts("bait_regions.gff", 
 43 |                       "sample_treatment_mapping.csv")
 44 | ```
 45 | 
 46 | ### Set genomic window width
 47 | 
 48 | The width of the genomic windows in which to compute counts across the defined bait regions is set to 50 nt, to change this use the `width` parameter to the size of the windows you want to use, e.g 100 nt. 
 49 | 
 50 | ```{r, echo=TRUE, eval=FALSE}
 51 | counts <- make_counts("bait_regions.gff", 
 52 |                       "sample_treatment_mapping.csv",
 53 |                       width = 100)
 54 | ```
 55 | 
 56 | ## Loading RNAseq data
 57 | 
 58 | When loading RNAseq data it is neccesary to set the `is_rnaseq` option in `make_counts()`
 59 | 
 60 | ```{r, echo = TRUE, eval=FALSE}
 61 | 
 62 | counts <- make_counts("bait_regions.gff", 
 63 |                       "sample_treatment_mapping.csv",
 64 |                        is_rnaseq = TRUE)
 65 | ```
 66 | 
 67 | ### Setting quality filters when computing counts from BAM files
 68 | 
 69 | `atacr` allows you to set values determining which reads will be included in counts. By default a simple filter object can be passed from the `make_params()` function to the `filter_params` argument of `make_counts()`. 
 70 | 
 71 | ```{r, echo = TRUE, eval = FALSE}
 72 | 
 73 | my_params = make_params(
 74 |     paired_map = TRUE, 
 75 |     minq = 30, 
 76 |     dedup = TRUE
 77 |   )
 78 | 
 79 | counts <- make_counts("bait_regions.gff", 
 80 |                       "sample_treatment_mapping.csv",
 81 |                        is_rnaseq = TRUE,
 82 |                        filter_params = my_params )
 83 | ```
 84 | 
 85 | 
 86 | The `paired_map` option sets whether reads must be mapped as pairs to be counted, `TRUE` is the default. The `dedup` option removes reads that seem like PCR duplicates to the aligner `TRUE` is the default. `minq` sets the minimum PHRED mapping quality score for a read to be counted, `30` is the default
 87 | 
 88 | ### Advanced Quality filters RNAseq
 89 | 
 90 | If you require greater control over mapping filters for read counts from RNAseq, you can use an `Rsamtools::ScanBamParam()` object instead. See [https://www.rdocumentation.org/packages/Rsamtools/versions/1.24.0/topics/ScanBamParam](https://www.rdocumentation.org/packages/Rsamtools/versions/1.24.0/topics/ScanBamParam) for details
 91 | 
 92 | ### Advanced Quality filters ATACseq
 93 | 
 94 | For greater control over mapping filters for read counts when using ATACseq data, use a `csaw::readParam()` object. See [http://bioconductor.org/packages/release/bioc/manuals/csaw/man/csaw.pdf](http://bioconductor.org/packages/release/bioc/manuals/csaw/man/csaw.pdf) for details. 
 95 | 
 96 | 
 97 | ## Region names
 98 | 
 99 | Region names are loaded from the GFF file. As GFF is a bit of a fluid format different files may encode this information differently. By default, `make_counts()` will look into the attribute (final) column in the GFF and use the attribute called `ID`. To use a different attribute set `gene_id_col`
100 | 
101 | ```{r, eval=FALSE, echo=TRUE}
102 | counts <- make_counts("bait_regions.gff", 
103 |                       "sample_treatment_mapping.csv",
104 |                        gene_id_col = "GENE_NAME")
105 | ```
106 | 
107 | ## Output - an `atacr` object
108 | 
109 | The result of `make_counts()` is an `atacr` object of counts, basically an R `list` with slots for counts from bait windows, non-bait windows, the sample and BAM information. The count information is held in 'SummarizedExperiment' objects from Bioconductor. See [http://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html](http://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html) for details.
110 | 
111 | ## Saving a count object
112 | 
113 | Computing the `atacr` count object can take a while, especially when you are analysing many BAM files. It can be useful to save the object after computation. This can be done with base R's `saveRDS()` function.
114 | 
115 | ```{r echo=TRUE, eval=FALSE}
116 | saveRDS(counts, file="my_output_file.rds")
117 | 
118 | reloaded_counts <- readRDS("my_output_file.rds")
119 | ```
120 | 
121 | [1] <http://gmod.org/wiki/GFF3#GFF3> 
122 | 


--------------------------------------------------------------------------------
/R/atacr.R:
--------------------------------------------------------------------------------
  1 | # Some useful keyboard shortcuts for package authoring:
  2 | #
  3 | #   Build and Reload Package:  'Cmd + Shift + B'
  4 | #   Check Package:             'Cmd + Shift + E'
  5 | #   Test Package:              'Cmd + Shift + T'
  6 | 
  7 | # stop devtools::check() complain about elements in ggplot and dplyr packages
  8 | if (getRversion() >= "2.15.1")
  9 |   utils::globalVariables(c("."))
 10 | 
 11 | #' @importFrom magrittr %>%
 12 | #' @importFrom graphics hist
 13 | #' @importFrom stats cor kmeans median p.adjust quantile rnbinom rnorm rpois runif sd start t.test window cor.test
 14 | #' @importFrom utils capture.output read.csv str
 15 | #' @importFrom methods as
 16 | #' @importFrom SummarizedExperiment rbind
 17 | #' @importFrom stats rlnorm
 18 | no_func <-
 19 |   function(x) {
 20 |     return(FALSE)
 21 |   } #only here to make line above work
 22 | 
 23 | #' Get a summary of reads hitting the bait and non bait windows
 24 | #' @export
 25 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts()
 26 | #' @return a table of on target and off target read counts
 27 | target_count_summary <- function(data) {
 28 |   df <- target_count_coverage(data)
 29 |   df$means <- NULL
 30 |   on_target <- off_target <- NULL #deal with devtools::check()
 31 |   d <-
 32 |     df %>% reshape::cast(sample ~ target, value = "count_sum") %>% dplyr::mutate("percent_on_target" = ((on_target /
 33 |         (
 34 |           on_target + off_target
 35 |         )) * 100))
 36 |   return(d)
 37 | }
 38 | 
 39 | #' Get a summary of depth of coverage in the bait and non bait windows
 40 | #' @export
 41 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts()
 42 | #' @return a table of on target and off target mean depths
 43 | coverage_count_summary <- function(data) {
 44 |   df <- target_count_coverage(data)
 45 |   df$count_sum <- NULL
 46 |   return(reshape::cast(df, sample ~ target, value = "mean_coverage"))
 47 | }
 48 | 
 49 | #' Read count and mean coverage hitting the bait and non bait windows
 50 | #' @export
 51 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts()
 52 | #' @return a dataframe of on target and off target read counts
 53 | target_count_coverage <- function(data) {
 54 |   on <- SummarizedExperiment::assay(data$bait_windows)
 55 |   off <- SummarizedExperiment::assay(data$non_bait_windows)
 56 |   target <-
 57 |     factor(c(rep("on_target", length(colnames(
 58 |       on
 59 |     ))), rep("off_target", length(colnames(
 60 |       off
 61 |     )))))
 62 |   sums <- c(colSums(on), colSums(off))
 63 |   means <- c(colMeans(on), colMeans(off))
 64 |   df <-
 65 |     data.frame(
 66 |       sample = names(sums),
 67 |       target = target,
 68 |       count_sum = sums,
 69 |       mean_coverage = means
 70 |     ) #probably not the same size?
 71 |   return(df)
 72 | }
 73 | 
 74 | #' identify kmeans clusters for samples
 75 | #' @export
 76 | #' @param data  a list of SummarizedExperiment objects from atacr::make_counts()
 77 | #' @param which the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'
 78 | #' @return dataframe of cluster_id and sample name
 79 | sample_kmeans_cluster <- function(data, which = "bait_windows") {
 80 |   counts <- SummarizedExperiment::assay(data[[which]])
 81 |   k <- length(unique(data$treatments))
 82 |   c <- kmeans(t(counts), k)
 83 |   d <- data.frame(cluster_id = c$cluster)
 84 |   d$sample <- rownames(d)
 85 |   cluster_id <- NULL
 86 |   return(dplyr::arrange(d, cluster_id, sample))
 87 | 
 88 | }
 89 | 
 90 | #' count windows that have read counts below the threshold
 91 | #' @export
 92 | #' @param data  a list of SummarizedExperiment objects from atacr::make_counts()
 93 | #' @param which the subdivision of the genome to calculate correlations either
 94 | #'   'whole_genome', 'bait_windows' or 'non_bait_windows'
 95 | #' @param threshold counts windows with read counts lower than this level
 96 | #' @return dataframe of sample name, count and threshold
 97 | count_windows_under_threshold <-
 98 |   function(data,
 99 |     which = "bait_windows",
100 |     threshold = 0) {
101 |     counts <- SummarizedExperiment::assay(data[[which]])
102 |     b <- apply(counts, MARGIN = 2, function(x) {
103 |       sum(x <= threshold)
104 |     })
105 |     r <-
106 |       data.frame(
107 |         sample = names(b),
108 |         count = b,
109 |         threshold = rep(threshold, length(b))
110 |       )
111 |     rownames(r) <- NULL
112 |     return(r)
113 |   }
114 | 
115 | #' report counts at each quantile for each sample
116 | #' @export
117 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts()
118 | #' @param quantiles a vector of quantiles to report
119 | #' @param which the subset of data windows to report on. Default =
120 | #'   "bait_windows" and "non_bait_windows"
121 | #' @return list of counts at quantiles
122 | calc_quantiles <-
123 |   function(data,
124 |     quantiles = c(.01, .05, 0.95, 0.99),
125 |     which = NULL) {
126 |     if (is.null(which)) {
127 |       bait_windows <- as.matrix(data)
128 |       non_bait_windows <- as.matrix(data, which = "non_bait_windows")
129 |       bwq <-
130 |         apply(bait_windows,
131 |           MARGIN = 2,
132 |           quantile,
133 |           probs = quantiles)
134 |       non_bwq <-
135 |         apply(non_bait_windows,
136 |           MARGIN = 2,
137 |           quantile,
138 |           probs = quantiles)
139 |       return(list(bait_windows = bwq, non_bait_windows = non_bwq))
140 |     }
141 |     else{
142 |       windows <- as.matrix(data, which = which)
143 |       return(apply(windows, MARGIN = 2, quantile, probs = quantiles))
144 |     }
145 |   }
146 | 
147 | 
148 | 
149 | se_contains_only_integers <- function(data, which) {
150 |   a <- SummarizedExperiment::assay(data[[which]])
151 |   return(all(a == as.integer(a)))
152 | }
153 | 
154 | 
155 | 
156 | #' given a vector of values return a set of random numbers from a given
157 | #' distribution
158 | #' @export
159 | #' @param obs vector of observed values
160 | #' @param dist the distribution from which to return expected values
161 | #' @return a vector of length obs with random variates from distribution dist
162 | get_expected_values <- function(obs, dist = "norm") {
163 |   exp <- rnorm(length(obs), mean = mean(obs), sd = sd(obs))
164 |   if (dist == "pois")
165 |     exp <- rpois(length(obs), lambda = mean(obs))
166 |   if (dist == "nbinom") {
167 |     est <- fitdistrplus::fitdist(obs, "nbinom")
168 |     exp <- rnbinom(length(obs),
169 |       size = est$estimate[['size']],
170 |       mu = est$estimate[['mu']])
171 |   }
172 |   return(exp)
173 | }
174 | 
175 | #' given a vector of numbersd returns the counts in bins of bin_width, and the count
176 | #' @export
177 | #' @param obs a vector of numbers
178 | #' @param dist a string naming distribution from which to take expected counts
179 | #' @param bin_width the width of the bins for the counts
180 | #' @return list with members observed and expected which are vectors of counts
181 | observed_expected_bins <-
182 |   function(obs,
183 |     dist = "pois",
184 |     bin_width = 10) {
185 |     exp <- get_expected_values(obs, dist)
186 | 
187 |     mx <- max(c(obs, exp))
188 |     mn <- min(c(obs, exp))
189 |     b <- seq(mn, mx + bin_width, by = bin_width)
190 |     obs <- hist(obs, breaks = b, plot = FALSE)
191 |     exp <- hist(exp, breaks = b, plot = FALSE)
192 |     return(list(observed = obs$counts, expected = exp$counts))
193 | 
194 |   }
195 | 
196 | #' a median of window values across all samples in a vector, for ma plots
197 | #' @export
198 | #' @param sample_matrix counts extracted from a SummarizedExperiment object
199 | #' @return the median of the provided counts, columnwise
200 | median_virtual_experiment <- function(sample_matrix) {
201 |   return(apply(sample_matrix, 1, median))
202 | }
203 | 
204 | emm <- function(test, control) {
205 |   return(log2(test) - log2(control))
206 | }
207 | 
208 | ay <- function(test, control) {
209 |   return(0.5 * (log2(test) + log2(control)))
210 | }
211 | 
212 | #' given a dataframe from the estimate_fdr_multiclass() function, will return a
213 | #' list in the format suitable for UpSetR visualisation.
214 | #' Does not do any filtering of lists, so selected genes must be filtered before hand e.g with dplyr
215 | #' @export
216 | #' @param df dataframe from estimate_fdr_multiclass
217 | #' @return list of named vectors suitable for UpSetR fromList() function
218 | make_UpSetR <- function(df) {
219 |   log2_fc <- direction <- a <- NULL
220 |   r <- df %>%
221 |     dplyr::mutate(
222 |       direction = ifelse(log2_fc > 0, "up", "down"),
223 |       category = paste0(direction, "_", a)
224 |     )
225 |   r <- r %>% split(r$category) %>%
226 |     lapply(function(x)
227 |       as.vector(dplyr::select(x, window)$window))
228 |   return(r)
229 | }
230 | 
231 | #' sim_counts - simulated count data
232 | #'
233 | #' The data `sim_counts` is a simulated data set with computer generated window counts for three replicates of each of two conditions in experiments with 500 bait and non-bait windows. We'll set each experiment to have 10 \% of windows differentially accessible at a difference of approximately 2 fold.
234 | #'
235 | #'
236 | #' Counts in bait windows for "control" samples  will be modelled as \eqn{C \sim NB(\mu = 30, size = 10\mu)}.
237 | #'
238 | #' Counts in bait windows for "treatment" samples will be modelled as \eqn{C \cdot unif(0.8,1.2)}.
239 | #'
240 | #' Differentially accessible bait windows will be modelled as \eqn{C_{1..50} \cdot \mathcal{N}( \mu=2,\sigma = \mu/2)}
241 | #' @format A SummarizedExperiment object
242 | "sim_counts"
243 | 
244 | #' Simulated count data
245 | #'
246 | #' The data `sim_counts` is a simulated data set with computer generated window counts for three replicates of each of two conditions in experiments with 500 bait and non-bait windows. We'll set each experiment to have 10 \% of windows differentially accessible at a difference of approximately 2 fold.
247 | #'
248 | #' Counts in bait windows for "control" samples  will be modelled as \eqn{C \sim NB(\mu = 30, size = 10\mu)}.
249 | #'
250 | #' Counts in bait windows for "treatment" samples will be modelled as \eqn{C \cdot unif(0.8,1.2)}.
251 | #'
252 | #' Differentially accessible bait windows will be modelled as \eqn{C_{1..50} \cdot \mathcal{N}( \mu=2,\sigma = \mu/2)}
253 | #' @format A list of SummarizedExperiment objects
254 | "sim_counts"
255 | 
256 | #' small_counts - simulated count data
257 | #' The data `small_counts` is basically the same thing as `sim_counts` with smaller sample of 100 bait / non-bait windows.
258 | #' @format a list of SummarizedExperiment objects
259 | "small_counts"
260 | 
261 | #' athal_wt_counts - real capture RNASeq count data
262 | #' The data `athal_wt_counts` are real, experimentally derived counts from untreated WT Arabidopsis leaves for 52 baits, each set of baits representing a gene. Three replicates are provided for each gene. This data set is intended to be used in resampling procedures for making test data sets.
263 | #' @format a named vector of counts
264 | "athal_wt_counts"
265 | 


--------------------------------------------------------------------------------
/R/normalisation.R:
--------------------------------------------------------------------------------
  1 | #' estimates Goodness of Fit for each row in a count matrix
  2 | #'
  3 | #' @param mat a count matrix usually from SummarizedExperiment::assay()
  4 | #' @return a named vector of GoF estimates
  5 | gof <- function(mat){
  6 | 
  7 |   #see https://haroldpimentel.wordpress.com/?s=TMM#paperList
  8 |   # https://academic.oup.com/biostatistics/article/13/3/523/248016/Normalization-testing-and-false-discovery-rate
  9 |   # https://github.com/cran/PoissonSeq/blob/3d9bc4b1744cb45714d4442b5a879b6e0c68b4a2/R/ps_other.R
 10 |   pseudo_val <- 1e-10
 11 | 
 12 |   shats <- colSums(mat) / sum(mat)
 13 |   x_shats <- rowSums(mat) %*% t(shats)
 14 |   gof <- rowSums((mat - x_shats) ^ 2 / (x_shats + pseudo_val))
 15 |   return(gof)
 16 | }
 17 | 
 18 | #' estimates Goodness of Fit from atacr object
 19 | #' @export
 20 | #' @param atacr a list of SummarizedExperiment objects from atacr::make_counts()
 21 | #' @param which the subdivision of the genome to calculate GoF either 'whole_genome', 'bait_windows' or 'non_bait_windows'
 22 | #' @return the original atacr object with a new slot - 'gofs' - a named vector of each windows GoF estimate.
 23 | estimate_GoFs <- function(atacr, which = "bait_windows"){
 24 |   mat <- SummarizedExperiment::assay(atacr[[which]])
 25 |   atacr$gofs <- gof(mat)
 26 |   return(atacr)
 27 | }
 28 | 
 29 | #' Depth estimation, directly from https://github.com/cran/PoissonSeq/blob/master/R/ps_cmeans.R
 30 | #' @param n a matrix
 31 | #' @param iter, runs of the Depth finder.
 32 | #' @return list of depths and means
 33 | Est.Depth <- function(n, iter=5)
 34 | {
 35 |   SMALL.VAL <- 1e-8
 36 |   cmeans <- colSums(n) / sum(n)
 37 |   keep <- NULL
 38 | 
 39 |   for (i in 1 : iter)
 40 |   {
 41 |     n0 <- rowSums(n) %*% t(cmeans)
 42 |     prop <- rowSums((n - n0) ^ 2 / (n0 + SMALL.VAL))
 43 |     qs <- quantile(prop, c(0.25, 0.75))
 44 |     keep <- (prop >= qs[1]) & (prop <= qs[2])
 45 | 
 46 |     cmeans <- colMeans(n[keep, ])
 47 |     cmeans <- cmeans / sum(cmeans)
 48 |   }
 49 | 
 50 |   return(list(cmeans=cmeans, keep=keep))
 51 | }
 52 | 
 53 | 
 54 | 
 55 | #' estimates sequencing depths based on windows with smallest GoF
 56 | #' @export
 57 | #' @param atacr a list of SummarizedExperiment objects from atacr::make_counts()
 58 | #' @param which the subdivision of the genome to calculate GoF either 'whole_genome', 'bait_windows' or 'non_bait_windows'
 59 | #' @return - a named vector of each windows GoF estimate.
 60 | get_GoF_factors <- function(atacr, which = "bait_windows"){
 61 | 
 62 |   m <- SummarizedExperiment::assay(atacr[[which]])
 63 |   seq.depth <- Est.Depth(n=m, iter=5)$cmeans
 64 |   seq.depth <- 1 / (exp(log(seq.depth) - mean(log(seq.depth))) )
 65 |   return(seq.depth)
 66 | }
 67 | 
 68 | #' find control windows by convergence method in https://academic.oup.com/biostatistics/article/13/3/523/248016/Normalization-testing-and-false-discovery-rate
 69 | #' @export
 70 | #' @param atacr a list of SummarizedExperiment objects from atacr::make_counts()
 71 | #' @param which the subdivision of the genome to calculate GoF either 'whole_genome', 'bait_windows' or 'non_bait_windows'
 72 | #' @return a character vector of window names
 73 | find_controls_by_GoF <- function(atacr, which = "bait_windows"){
 74 | 
 75 |   m <- SummarizedExperiment::assay(atacr[[which]])
 76 |   controls <- rownames(m[Est.Depth(n = m, iter = 5)$keep,])
 77 |   return(controls)
 78 | 
 79 | }
 80 | 
 81 | 
 82 | #' performs a whole library size normalisation of the selected set of windows, calculates a median virtual experiment and normalises to that
 83 | #' @export
 84 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts()
 85 | #' @param which the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'
 86 | #' @param by_treatment (FALSE) will group the assay into different treatments and normalise each separately - assumes that within treatment groups the samples should show little difference, but between sample treatment groups could show lots of difference between windows.
 87 | #' @return a SummarizedExperiment object with a new, normalised assay matrix
 88 | library_size_normalisation <- function(data, which = "bait_windows", by_treatment = FALSE ){
 89 |     l <- list()
 90 |     d <- data[[which]]
 91 |     if ( !by_treatment ){
 92 |       return( library_size_normalisation_internal( data[[which]] ) )
 93 |     }
 94 |     else {
 95 |       for (treatment in unique(data$treatments) ){
 96 |         samples <- names_from_treatment(data, treatment)
 97 |         treat_norm <- library_size_normalisation_internal( data[[which]][,samples] )
 98 |         l[[treatment]] <- SummarizedExperiment::assay( treat_norm )
 99 |       }
100 |         full_mat <- do.call(cbind, l)
101 |         SummarizedExperiment::assay(d) <- full_mat
102 |         return( d )
103 |     }
104 | 
105 | 
106 | }
107 | #'
108 | 
109 | average_matrix_by_sample <- function(data, which = "bait_windows") {
110 |   l <- list()
111 |   m <- SummarizedExperiment::assay(data[[which]])
112 |   for (treatment in unique(data$treatments) ){
113 |     samples <- names_from_treatment(data, treatment)
114 |     l[[treatment]] <- apply(m[,samples], 1, mean)
115 |   }
116 |   return(do.call(cbind, l))
117 | 
118 | }
119 | 
120 | 
121 | names_from_treatment <- function(data, treatment){
122 |   return(data$sample_names[which(data$treatments == treatment)] )
123 | }
124 | 
125 | #' return list of treatment names
126 | #' @export
127 | #' @param data an atacr object
128 | #' @return char vector of unique treatment names
129 | treatments <- function(data){
130 |   return( unique(data$treatments))
131 | }
132 | 
133 | treatment_from_name <- function(data, sample_name){
134 |   return(data$treatments[which(data$sample_names == sample_name)] )
135 | }
136 | 
137 | 
138 | #' do a library size normalisation
139 | #' @param se a SummarizedExperiment object such as 'bait_windows' from atacr::make_counts()
140 | library_size_normalisation_internal <- function(se){
141 | 
142 |   scaling_factors <- library_size_scaling_factors( se )
143 |   normalised_sample_matrix <- scale_normalise(SummarizedExperiment::assay( se ), scaling_factors)
144 |   se_copy <- se
145 |   SummarizedExperiment::assay(se_copy) <- normalised_sample_matrix
146 |   return(se_copy)
147 | }
148 | 
149 | #' calculate scaling factors for library size
150 | #' @export
151 | #' @param se a SummarizedExperiment object such as 'bait_windows' from atacr::make_counts()
152 | library_size_scaling_factors <- function( se ){ # nocov start
153 |   sample_matrix <- SummarizedExperiment::assay( se )
154 |   return(get_scaling_factors(sample_matrix))
155 | } # nocov end
156 | 
157 | get_scaling_factors <- function( sample_matrix ){
158 |   mve_sum <- sum(median_virtual_experiment( sample_matrix ))
159 |   scaling_factors <- sapply(colSums( sample_matrix ), function(x){ mve_sum / x })
160 |   return(scaling_factors)
161 | }
162 | 
163 | scale_normalise <- function( sample_matrix, scaling_factors){ #nocov start
164 |   scaled <- sample_matrix %*% diag(scaling_factors)
165 |   return(scaled)
166 | } #nocov end
167 | 
168 | #' normalise by a provided set of scaling factors
169 | #' @export
170 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts()
171 | #' @param which the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'
172 | #' @param scaling_factors a vector of scaling factors to normalise by
173 | #' @return a SummarizedExperiment with scale normalised window values
174 | scale_factor_normalise <- function(data, which = "bait_windows", scaling_factors = NULL){
175 |   se <-  data[[which]]
176 |   normalised_sample_matrix <- scale_normalise(SummarizedExperiment::assay( se ), scaling_factors)
177 |   se_copy <- se
178 |   SummarizedExperiment::assay(se_copy) <- normalised_sample_matrix
179 |   return(se_copy)
180 | }
181 | 
182 | 
183 | #' extract scaling factors from control windows (often from a file of control gene positions)
184 | #' @export
185 | #' @param se a SummarizedExperiment object
186 | #' @param window_file a text file containing the positions of control window/gene ranges
187 | #' @return a vector of scaling factors from control genes
188 | control_window_scaling_factors <- function( se, window_file){
189 |   control_window_regions <- get_bait_regions_from_text( window_file )
190 |   keep <- IRanges::overlapsAny( SummarizedExperiment::rowRanges( se ), control_window_regions )
191 |   control_windows <- se[keep, ]
192 |   sample_matrix <- SummarizedExperiment::assay( control_windows )
193 |   return(get_scaling_factors(sample_matrix))
194 | 
195 | }
196 | #' do a control window scaling normalisation
197 | #' @param se a SummarizedExperiment object such as 'bait_windows' from atacr::make_counts()
198 | #' @param window_file a text file containing the positions of control window/gene ranges
199 | #' @return SummarizedExperiment object, a copy of se with normalised values
200 | control_window_normalise_internal <- function( se, window_file ){
201 |   scaling_factors <- control_window_scaling_factors( se, window_file)
202 |   normalised_sample_matrix <- scale_normalise(SummarizedExperiment::assay( se ), scaling_factors)
203 |   se_copy <- se
204 |   SummarizedExperiment::assay(se_copy) <- normalised_sample_matrix
205 |   return(se_copy)
206 | 
207 | }
208 | #' performs control window based normalisation of the selected set of windows, calculates a median virtual experiment and normalises to that
209 | #' @export
210 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts()
211 | #' @param which the subdivision of the genome to calculate correlations either 'whole_genome', 'bait_windows' or 'non_bait_windows'
212 | #' @param window_file a text file containing the positions of control window/gene ranges
213 | #' @param by_treatment should normalisation be done by all experiments (one median virtualexperiment to compare all samples to) OR should normalisation be done by each treatment type (one median virtual experiment for each different treatment type)
214 | #' @return a vector of scaling factors from control genes
215 | control_window_normalise <- function(data, window_file, which = "bait_windows", by_treatment = FALSE ){
216 |     d <- data[[which]]
217 |     l <- list()
218 |     if(!by_treatment){
219 |       return( control_window_normalise_internal(data[[which]], window_file ))
220 |     }
221 |     else{
222 |       for (treatment in unique(data$treatments) ){
223 |         samples <- names_from_treatment(data, treatment)
224 |         treat_norm <- control_window_normalise_internal( data[[which]][,samples], window_file )
225 |         l[[treatment]] <- SummarizedExperiment::assay( treat_norm )
226 |       }
227 |       full_mat <- do.call(cbind, l)
228 |       SummarizedExperiment::assay(d) <- full_mat
229 |       return( d )
230 |     }
231 | }
232 | 
233 | #' normalise counts by window width (counts / window width)
234 | #' @export
235 | #' @param data a list of SummarizedExperiment objects from atacr::make_counts()
236 | #' @param which the subset of the data to normalise. Default = bait_windows
237 | #' @param per = the expression count / width gives the reads in the window divided by the width, so a 3000 nt gene with 30000 reads mapping to it will have a read count of just 10. Setting this parameter allows you to represent the counts per some other number of nts. Default = 1000, so gives the reads per kb of the gene.
238 | #' @return SummarizedExperiment object with normalised counts
239 | normalise_by_window_width <- function(data, which = "bait_windows",  per= 1000){
240 |   widths <- data[[which]]@rowRanges@ranges@width
241 |   se <- data[[which]]
242 |   d <- SummarizedExperiment::assay(se)
243 |   norm_mat <- (d / (widths / per)) # per kb
244 |   SummarizedExperiment::assay(se) <- norm_mat
245 |   return(se)
246 | }
247 | 


--------------------------------------------------------------------------------
/docs/atacr_which.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | 
  3 | <html xmlns="http://www.w3.org/1999/xhtml">
  4 | 
  5 | <head>
  6 | 
  7 | <meta charset="utf-8" />
  8 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  9 | <meta name="generator" content="pandoc" />
 10 | 
 11 | <meta name="viewport" content="width=device-width, initial-scale=1">
 12 | 
 13 | <meta name="author" content="Dan MacLean" />
 14 | 
 15 | <meta name="date" content="2018-03-21" />
 16 | 
 17 | <title>atacr objects and the which argument</title>
 18 | 
 19 | 
 20 | 
 21 | <style type="text/css">code{white-space: pre;}</style>
 22 | <style type="text/css">
 23 | div.sourceCode { overflow-x: auto; }
 24 | table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
 25 |   margin: 0; padding: 0; vertical-align: baseline; border: none; }
 26 | table.sourceCode { width: 100%; line-height: 100%; }
 27 | td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
 28 | td.sourceCode { padding-left: 5px; }
 29 | code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
 30 | code > span.dt { color: #902000; } /* DataType */
 31 | code > span.dv { color: #40a070; } /* DecVal */
 32 | code > span.bn { color: #40a070; } /* BaseN */
 33 | code > span.fl { color: #40a070; } /* Float */
 34 | code > span.ch { color: #4070a0; } /* Char */
 35 | code > span.st { color: #4070a0; } /* String */
 36 | code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
 37 | code > span.ot { color: #007020; } /* Other */
 38 | code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
 39 | code > span.fu { color: #06287e; } /* Function */
 40 | code > span.er { color: #ff0000; font-weight: bold; } /* Error */
 41 | code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
 42 | code > span.cn { color: #880000; } /* Constant */
 43 | code > span.sc { color: #4070a0; } /* SpecialChar */
 44 | code > span.vs { color: #4070a0; } /* VerbatimString */
 45 | code > span.ss { color: #bb6688; } /* SpecialString */
 46 | code > span.im { } /* Import */
 47 | code > span.va { color: #19177c; } /* Variable */
 48 | code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
 49 | code > span.op { color: #666666; } /* Operator */
 50 | code > span.bu { } /* BuiltIn */
 51 | code > span.ex { } /* Extension */
 52 | code > span.pp { color: #bc7a00; } /* Preprocessor */
 53 | code > span.at { color: #7d9029; } /* Attribute */
 54 | code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
 55 | code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
 56 | code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
 57 | code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
 58 | </style>
 59 | 
 60 | 
 61 | 
 62 | <link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20both%3B%0Amargin%3A%200%200%2010px%2010px%3B%0Apadding%3A%204px%3B%0Awidth%3A%20400px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Aborder%2Dradius%3A%205px%3B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Afont%2Dsize%3A%2013px%3B%0Aline%2Dheight%3A%201%2E3%3B%0A%7D%0A%23TOC%20%2Etoctitle%20%7B%0Afont%2Dweight%3A%20bold%3B%0Afont%2Dsize%3A%2015px%3B%0Amargin%2Dleft%3A%205px%3B%0A%7D%0A%23TOC%20ul%20%7B%0Apadding%2Dleft%3A%2040px%3B%0Amargin%2Dleft%3A%20%2D1%2E5em%3B%0Amargin%2Dtop%3A%205px%3B%0Amargin%2Dbottom%3A%205px%3B%0A%7D%0A%23TOC%20ul%20ul%20%7B%0Amargin%2Dleft%3A%20%2D2em%3B%0A%7D%0A%23TOC%20li%20%7B%0Aline%2Dheight%3A%2016px%3B%0A%7D%0Atable%20%7B%0Amargin%3A%201em%20auto%3B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dcolor%3A%20%23DDDDDD%3B%0Aborder%2Dstyle%3A%20outset%3B%0Aborder%2Dcollapse%3A%20collapse%3B%0A%7D%0Atable%20th%20%7B%0Aborder%2Dwidth%3A%202px%3B%0Apadding%3A%205px%3B%0Aborder%2Dstyle%3A%20inset%3B%0A%7D%0Atable%20td%20%7B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dstyle%3A%20inset%3B%0Aline%2Dheight%3A%2018px%3B%0Apadding%3A%205px%205px%3B%0A%7D%0Atable%2C%20table%20th%2C%20table%20td%20%7B%0Aborder%2Dleft%2Dstyle%3A%20none%3B%0Aborder%2Dright%2Dstyle%3A%20none%3B%0A%7D%0Atable%20thead%2C%20table%20tr%2Eeven%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Ap%20%7B%0Amargin%3A%200%2E5em%200%3B%0A%7D%0Ablockquote%20%7B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Apadding%3A%200%2E25em%200%2E75em%3B%0A%7D%0Ahr%20%7B%0Aborder%2Dstyle%3A%20solid%3B%0Aborder%3A%20none%3B%0Aborder%2Dtop%3A%201px%20solid%20%23777%3B%0Amargin%3A%2028px%200%3B%0A%7D%0Adl%20%7B%0Amargin%2Dleft%3A%200%3B%0A%7D%0Adl%20dd%20%7B%0Amargin%2Dbottom%3A%2013px%3B%0Amargin%2Dleft%3A%2013px%3B%0A%7D%0Adl%20dt%20%7B%0Afont%2Dweight%3A%20bold%3B%0A%7D%0Aul%20%7B%0Amargin%2Dtop%3A%200%3B%0A%7D%0Aul%20li%20%7B%0Alist%2Dstyle%3A%20circle%20outside%3B%0A%7D%0Aul%20ul%20%7B%0Amargin%2Dbottom%3A%200%3B%0A%7D%0Apre%2C%20code%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0Aborder%2Dradius%3A%203px%3B%0Acolor%3A%20%23333%3B%0Awhite%2Dspace%3A%20pre%2Dwrap%3B%20%0A%7D%0Apre%20%7B%0Aborder%2Dradius%3A%203px%3B%0Amargin%3A%205px%200px%2010px%200px%3B%0Apadding%3A%2010px%3B%0A%7D%0Apre%3Anot%28%5Bclass%5D%29%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Acode%20%7B%0Afont%2Dfamily%3A%20Consolas%2C%20Monaco%2C%20%27Courier%20New%27%2C%20monospace%3B%0Afont%2Dsize%3A%2085%25%3B%0A%7D%0Ap%20%3E%20code%2C%20li%20%3E%20code%20%7B%0Apadding%3A%202px%200px%3B%0A%7D%0Adiv%2Efigure%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0Aimg%20%7B%0Abackground%2Dcolor%3A%20%23FFFFFF%3B%0Apadding%3A%202px%3B%0Aborder%3A%201px%20solid%20%23DDDDDD%3B%0Aborder%2Dradius%3A%203px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Amargin%3A%200%205px%3B%0A%7D%0Ah1%20%7B%0Amargin%2Dtop%3A%200%3B%0Afont%2Dsize%3A%2035px%3B%0Aline%2Dheight%3A%2040px%3B%0A%7D%0Ah2%20%7B%0Aborder%2Dbottom%3A%204px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Apadding%2Dbottom%3A%202px%3B%0Afont%2Dsize%3A%20145%25%3B%0A%7D%0Ah3%20%7B%0Aborder%2Dbottom%3A%202px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Afont%2Dsize%3A%20120%25%3B%0A%7D%0Ah4%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23f7f7f7%3B%0Amargin%2Dleft%3A%208px%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Ah5%2C%20h6%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23ccc%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Aa%20%7B%0Acolor%3A%20%230033dd%3B%0Atext%2Ddecoration%3A%20none%3B%0A%7D%0Aa%3Ahover%20%7B%0Acolor%3A%20%236666ff%3B%20%7D%0Aa%3Avisited%20%7B%0Acolor%3A%20%23800080%3B%20%7D%0Aa%3Avisited%3Ahover%20%7B%0Acolor%3A%20%23BB00BB%3B%20%7D%0Aa%5Bhref%5E%3D%22http%3A%22%5D%20%7B%0Atext%2Ddecoration%3A%20underline%3B%20%7D%0Aa%5Bhref%5E%3D%22https%3A%22%5D%20%7B%0Atext%2Ddecoration%3A%20underline%3B%20%7D%0A%0Acode%20%3E%20span%2Ekw%20%7B%20color%3A%20%23555%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%0Acode%20%3E%20span%2Edt%20%7B%20color%3A%20%23902000%3B%20%7D%20%0Acode%20%3E%20span%2Edv%20%7B%20color%3A%20%2340a070%3B%20%7D%20%0Acode%20%3E%20span%2Ebn%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Efl%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Ech%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Est%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Eco%20%7B%20color%3A%20%23888888%3B%20font%2Dstyle%3A%20italic%3B%20%7D%20%0Acode%20%3E%20span%2Eot%20%7B%20color%3A%20%23007020%3B%20%7D%20%0Acode%20%3E%20span%2Eal%20%7B%20color%3A%20%23ff0000%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%0Acode%20%3E%20span%2Efu%20%7B%20color%3A%20%23900%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%20code%20%3E%20span%2Eer%20%7B%20color%3A%20%23a61717%3B%20background%2Dcolor%3A%20%23e3d2d2%3B%20%7D%20%0A" rel="stylesheet" type="text/css" />
 63 | 
 64 | </head>
 65 | 
 66 | <body>
 67 | 
 68 | 
 69 | 
 70 | 
 71 | <h1 class="title toc-ignore">atacr objects and the which argument</h1>
 72 | <h4 class="author"><em>Dan MacLean</em></h4>
 73 | <h4 class="date"><em>2018-03-21</em></h4>
 74 | 
 75 | 
 76 | 
 77 | <div id="the-atacr-object" class="section level2">
 78 | <h2>The <code>atacr</code> Object</h2>
 79 | <p>When <code>make_counts()</code> is run, an <code>atacr</code> object is returned. This is a simple, somewhat informal object based on the R list type. It is basically an R list with the following members:</p>
 80 | <ol style="list-style-type: decimal">
 81 | <li>treatments - a character vector of treatment names</li>
 82 | <li>samples - a character vector of sample names</li>
 83 | <li>bam_files - a character vector of paths for the used BAM files</li>
 84 | <li>bait_regions - a <code>GenomicRanges::Granges</code> object describing the bait window regions</li>
 85 | <li>bait_windows - a <code>RangedSummarizedExperiment</code> object containing the counts in the windows in <code>bait_regions</code></li>
 86 | <li>non_bait_windows - a <code>RangedSummarizedExperiment</code> object containing the counts in the windows in the regoions outside <code>bait_regions</code></li>
 87 | <li>whole_genome - the union of bait_windows and non_bait_windows</li>
 88 | <li>dataframe - an optional member and the result of calling <code>as.data.frame()</code> on the <code>atacr</code> object</li>
 89 | </ol>
 90 | <div id="column-order" class="section level3">
 91 | <h3>Column Order</h3>
 92 | <p>The <code>RangedSummarizedExperiment</code> objects carry the count data. They are organised as a matrix with rows representing windows and columns different samples. Their order is conserved and is the same as that in the <code>treatments</code>, <code>samples</code> and <code>bam_files</code>.</p>
 93 | </div>
 94 | </div>
 95 | <div id="the-which-argument" class="section level2">
 96 | <h2>The ‘which’ argument</h2>
 97 | <p>Many of the functions allow you to state which member of the <code>atacr</code> list (really a <code>RangedSummarizedExperiment</code>) you wish to apply the function to with the <code>which</code> argument, e.g</p>
 98 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">plot_counts</span>(counts, <span class="dt">which =</span> <span class="st">&quot;bait_windows&quot;</span>, <span class="dt">log10 =</span> <span class="ot">FALSE</span>)</code></pre></div>
 99 | </div>
100 | <div id="adding-members-to-the-atacr-object" class="section level2">
101 | <h2>Adding members to the <code>atacr</code> object</h2>
102 | <p>In this way you can use functions that return <code>RangedSummarizedExperiment</code>s to become new members in the list and work on them as with the built in ones, this is especially useful for normalisations.</p>
103 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">counts<span class="op">$</span>by_sample &lt;-<span class="st"> </span><span class="kw">library_size_normalisation</span>(counts, 
104 |                                              <span class="dt">by_treatment =</span> <span class="ot">TRUE</span>)
105 | 
106 | <span class="kw">plot_counts</span>(counts, <span class="dt">which =</span> <span class="st">&quot;by_sample&quot;</span>, <span class="dt">log10 =</span> <span class="ot">FALSE</span>)</code></pre></div>
107 | </div>
108 | 
109 | 
110 | 
111 | <!-- dynamically load mathjax for compatibility with self-contained -->
112 | <script>
113 |   (function () {
114 |     var script = document.createElement("script");
115 |     script.type = "text/javascript";
116 |     script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
117 |     document.getElementsByTagName("head")[0].appendChild(script);
118 |   })();
119 | </script>
120 | 
121 | </body>
122 | </html>
123 | 


--------------------------------------------------------------------------------
/R/loading.R:
--------------------------------------------------------------------------------
  1 | #' load BAM files and calculate window coverage
  2 | #' @export
  3 | #' @param window_file A filename of a CSV file with the bait regions
  4 | #' @param sample_treatment_file A filename of a CSV file that lists treatments, samples and bam file paths
  5 | #' @param width an integer of the width of the bins the bait regions will be divided into
  6 | #' @param filter_params a params object from atacr::make_params()  that define how reads will be extracted from the BAM files. Optionally, for greater control, either a csaw::readParam() (for ATACseq) or Rsamtools::ScanBamParam() object for RNASeq can be provided. See http://bioconductor.org/packages/release/bioc/manuals/csaw/man/csaw.pdf or https://www.rdocumentation.org/packages/Rsamtools/versions/1.24.0/topics/ScanBamParam for details
  7 | #' @param is_rnaseq a boolean stating whether this is RNASeq data. Default = FALSE
  8 | #' @param gene_id_col a character string stating which attribute name to take from the final column of the GFF file to use for the window name in RNASeq data. Usually this is the name of the gene. Default = ID.
  9 | #' @param with_df attach a dataframe version of the data Default = FALSE
 10 | #' @return a list of metadata and RangedSummarizedExperiment objects with read count in windows for whole genome, bait windows and non-bait windows for each sample
 11 | make_counts <-
 12 |   function(window_file,
 13 |     sample_treatment_file,
 14 |     width = 50,
 15 |     filter_params = make_params(), #csaw::readParam(minq = 50),
 16 |     with_df = FALSE,
 17 |     is_rnaseq = FALSE,
 18 |     gene_id_col = "ID") {
 19 |     result <- list()
 20 |     class(result) <- c("atacr", "list")
 21 | 
 22 |     sample_treatment_file_mapping <-
 23 |       read_experiment_info(sample_treatment_file)
 24 |     result$bam_files <-
 25 |       as.character(sample_treatment_file_mapping$bam_file_path)
 26 |     result$treatments <-
 27 |       as.character(sample_treatment_file_mapping$treatment)
 28 |     result$sample_names <-
 29 |       as.character(sample_treatment_file_mapping$sample_name)
 30 | 
 31 |     if (!is_rnaseq) {
 32 |       result <- load_atac(result, width, filter_params, window_file)
 33 |     }
 34 |     else {
 35 |       result <- load_rnaseq(result, filter_params, window_file)
 36 |     }
 37 | 
 38 |     if (with_df) {
 39 |       result$dataframe <- as.data.frame(result)
 40 |     }
 41 |     return(result)
 42 |   }
 43 | 
 44 | #' set read filters for counting from the BAM file.
 45 | #' @export
 46 | #' @param paired_map Should reads only be included if they are aligned in pairs. Default = TRUE
 47 | #' @param minq The minimum mapping quality to retain a read. Default = 20
 48 | #' @param dedup Should removal of PCR duplicates be performed. Default = TRUE
 49 | #' @return a named vector of class "atacr_params"
 50 | make_params <- function(paired_map = TRUE, minq = 30, dedup = TRUE){
 51 | 
 52 |   params <- c(paired_map, minq, dedup)
 53 |   names(params) <- c("paired_map", "minq", "dedup")
 54 |   class(params) <- c("atacr_params")
 55 |   return(params)
 56 | 
 57 | }
 58 | 
 59 | #' reads a csv file containing the bait regions
 60 | #' @param file_name path to a csv file containing the bait regions. File must have a header with columns `bait_name`, `seq_name`, `start`, `end`.
 61 | #' @return GenomicRanges object of bait regions
 62 | get_bait_regions_from_text <- function(file_name) {
 63 |   df <- read.csv(file_name, sep = ",", header = TRUE)
 64 | 
 65 |   if ( !all(c("bait_name", "seq_name", "start_pos", "end_pos") %in% colnames(df)) ) {
 66 |     stop("File must have a header with columns bait_name, seq_name, start_pos, end_pos.")
 67 |   }
 68 | 
 69 |   bait_regions <- GenomicRanges::GRanges(
 70 |     seqnames = S4Vectors::Rle(df$seq_name),
 71 |     ranges = IRanges::IRanges(
 72 |       df$start_pos,
 73 |       end = df$end_pos,
 74 |       names = df$bait_name
 75 |     )
 76 |   )
 77 | 
 78 |   return(bait_regions)
 79 | 
 80 | }
 81 | 
 82 | #' reads a gff file containing the bait regions
 83 | #' @param file_name path to the file containing the bait regions
 84 | #' @return GenomicRanges object of bait regions
 85 | get_bait_regions_from_gff <- function(file_name) {
 86 |   gff <- rtracklayer::import.gff(file_name)
 87 |   bait_regions <- as(gff, "GRanges")
 88 |   #bait_regions <- bait_regions[bait_regions$type %in% c("gene")]
 89 |   return(bait_regions)
 90 | }
 91 | 
 92 | #' format a csaw::readParam object from the atacr::make_params() object
 93 | #' @param p an object returned from atacr::make_params()
 94 | #' @return a csaw::readParam object
 95 | make_csaw_params <- function(p){
 96 |   return(
 97 |     csaw::readParam(
 98 |       minq = p["minq"],
 99 |       dedup = p["dedup"],
100 |       pe = ifelse(p["paired_map"], "both", "none")
101 |     )
102 |   )
103 | }
104 | 
105 | #' populate the result object with the RangedSummarizedExperiment from the bam files from ATAC seq data. Called from make_counts() when is_rnaseq == FALSE.
106 | #' @param result list from make_counts()
107 | #' @param width an integer of the width of the bins the bait regions will be divided into
108 | #' @param filter_params a params object, described in atacr::make_counts()
109 | #' @param window_file  a filename of a CSV file with the bait regions
110 | #' @return a list with window counts for bait/non-bait windows
111 | load_atac <- function(result, width, filter_params, window_file) {
112 | 
113 |   if ("atacr_params" %in% class(filter_params) ) {
114 |     filter_params <- make_csaw_params(filter_params)
115 |   }
116 | 
117 |   result$whole_genome <-
118 |     csaw::windowCounts(
119 |       result$bam_files,
120 |       bin = TRUE,
121 |       filter = 0,
122 |       width = width,
123 |       param = filter_params
124 |     )
125 | 
126 |   ## name samples
127 |   colnames(result$whole_genome) <- result$sample_names
128 | 
129 |   ## collect bait and non bait regions
130 |   result$bait_regions <- get_bait_regions_from_gff(window_file)
131 | 
132 |   keep <-
133 |     IRanges::overlapsAny(SummarizedExperiment::rowRanges(result$whole_genome),
134 |       result$bait_regions)
135 | 
136 |   result$bait_windows <- result$whole_genome[keep,]
137 |   result$non_bait_windows <- result$whole_genome[!keep,]
138 | 
139 |   ## name the windows
140 |   result$whole_genome@rowRanges@ranges@NAMES <-
141 |     as.character(result$whole_genome@rowRanges)
142 |   result$bait_windows@rowRanges@ranges@NAMES <-
143 |     as.character(result$bait_windows@rowRanges)
144 |   result$non_bait_windows@rowRanges@ranges@NAMES <-
145 |     as.character(result$non_bait_windows@rowRanges)
146 | 
147 |   return(result)
148 | }
149 | 
150 | #' format a rsamtools::scanBam object from the atacr::make_params() object
151 | #' @param p an object returned from atacr::make_params()
152 | #' @param example_bam a filename pointing to a BAM file from which genome size can be taken
153 | #' @return an rsamtools::scanBamParam object
154 | make_scanBamParam <- function(p, example_bam){
155 | 
156 |   seqnames <- seqlength <- NULL
157 | 
158 |   ranges <- Rsamtools::idxstatsBam(example_bam)
159 |   ranges <- dplyr::mutate(ranges, start = 1)
160 |   ranges <- dplyr::rename(ranges, seqname = seqnames, end = seqlength)
161 |   ranges <- unlist(GenomicRanges::makeGRangesListFromDataFrame(ranges))
162 | 
163 |   return(Rsamtools::ScanBamParam(
164 |     flag = Rsamtools::scanBamFlag(
165 |       isDuplicate = !p["dedup"],
166 |       isProperPair = p["paired_map"]
167 |     ),
168 |     mapqFilter = p["minq"],
169 |     which = ranges
170 |   ))
171 | }
172 | 
173 | #' populate the result object with the RangedSummarizedExperiment from the bam files from RNA seq data. Called from make_counts() when is_rnaseq == TRUE.
174 | #' @param result list from make_counts()
175 | #' @param filter_params a params object, described in atacr::make_counts()
176 | #' @param window_file  a filename of a CSV file with the bait regions
177 | #' @param gene_id_col a character string stating which attribute name to take from the final column of the GFF file to use for the window name in RNASeq data. Usually this is the name of the gene. Default = ID.
178 | load_rnaseq <-
179 |   function(result,
180 |     filter_params,
181 |     window_file,
182 |     gene_id_col = "ID") {
183 | 
184 |     if ("atacr_params" %in% class(filter_params) ) {
185 |       filter_params <- make_scanBamParam(filter_params, result$bam_files[1])
186 |     }
187 | 
188 |     bams <- Rsamtools::BamFileList(result$bam_files)
189 |     names(bams) <- result$sample_names
190 | 
191 |     result$bait_regions <- get_bait_regions_from_gff(window_file)
192 |     non_bait_regions <-
193 |       GenomicRanges::gaps(result$bait_regions) #the intergene regions
194 | 
195 |     result$bait_windows <-
196 |       GenomicAlignments::summarizeOverlaps(
197 |         features = result$bait_regions,
198 |         reads = bams,
199 |         ignore.strand = T,
200 |         param = filter_params
201 |       )
202 |     result$non_bait_windows <-
203 |       GenomicAlignments::summarizeOverlaps(
204 |         features = non_bait_regions,
205 |         reads = bams,
206 |         ignore.strand = T,
207 |         param = filter_params
208 |       )
209 | 
210 |     if (c(gene_id_col) %in% names(result$bait_regions@elementMetadata@listData)) {
211 |       result$bait_windows@rowRanges@ranges@NAMES <-
212 |         as.character(result$bait_regions@elementMetadata@listData[[gene_id_col]])
213 |     }
214 |     else {
215 |       result$bait_windows@rowRanges@ranges@NAMES <- make_range_names(
216 |         result$bait_regions@seqnames@values,
217 |         result$bait_regions@ranges@start,
218 |         result$bait_regions@ranges@width
219 |       )
220 |     }
221 | 
222 |     result$non_bait_windows@rowRanges@ranges@NAMES <- make_range_names(
223 |       non_bait_regions@seqnames@values,
224 |       non_bait_regions@ranges@start,
225 |       non_bait_regions@ranges@width
226 |     )
227 | 
228 |     result$bait_windows@rowRanges@elementMetadata@listData <- list()
229 |     result$whole_genome <-
230 |     rbind(result$bait_windows, result$non_bait_windows)
231 |     colnames(result$whole_genome) <-
232 |     colnames(result$bait_windows) <-
233 |     colnames(result$non_bait_windows) <- result$sample_names
234 | 
235 |     return(result)
236 |   }
237 | 
238 | make_range_names <- function(chr, start, width) {
239 |   end <- start + width
240 |   return(paste0(chr, ":", start, "-", end))
241 | }
242 | 
243 | #' Loads in a CSV file describing treatment, samples and bam files
244 | #' @param filename path and name of the file to load
245 | read_experiment_info <- function(filename, should_be = c("treatment",     "sample_name",   "bam_file_path")) {
246 |   info <- read.csv(filename, header = TRUE, sep = ",")
247 |   if (all(should_be %in% colnames(info))) {
248 |     return(info)
249 |   }
250 |   else{
251 |     stop("experiment mapping file should have headings: ", paste0(should_be, collapse = " "))
252 |   }
253 |   ## read in file with columns 'treatment, sample_name, bam_file_path'
254 | }
255 | 
256 | #' pulls lines out of a gff file based on identifierss provided
257 | #' @export
258 | #' @param ids character vector of ids/names of feature to extract
259 | #' @param gff path to gff file
260 | #' @param type feature type of features to extract.
261 | #' @param col column name of GFF file containing id to use (ID)
262 | #' @param out_file path of file name to write. If NULL, no file is written
263 | #' @param version which gff version to export (Default is "3")
264 | #' @return GenomicRanges or NULL with GFF outfile.
265 | extract_features_from_gff <- function(ids, gff, type = c("gene"), col="ID", out_file = NULL, version = "3"){
266 |   gff <- rtracklayer::import.gff(gff, "GFF")
267 |   gene_features <- as(gff, "GRanges")
268 |   gene_features <- gene_features[ gene_features$type %in% type ]
269 |   gene_features <- gene_features[ gene_features@elementMetadata@listData[[col]] %in% ids ]
270 |   if ( is.null(out_file) ) {
271 |     return(gene_features)
272 |   }
273 |   else{
274 |     rtracklayer::export.gff(gene_features, out_file, version = version)
275 |   }
276 | 
277 | }
278 | 
279 | #' returns DGEList for edgeR from atacr object
280 | #' @export
281 | #' @param atacr an atacr object
282 | #' @param which the subset of the data to work on
283 | #' @param remove.zeros whether to remove rows that have 0 total count.
284 | #' @return DGEList representing atacr data
285 | as.DGEList <- function(atacr, which = "bait_windows", remove.zeros = FALSE ){
286 |   edgeR::DGEList(SummarizedExperiment::assay(atacr[[which]]), group = atacr$treatments, remove.zeros = remove.zeros)
287 | 
288 | }
289 | #' writes GFF3 version of a simple text file describing the bait region starts and stops
290 | #' @export
291 | #' @param text_in path to the file describing the bait regions. File must have a header with columns `bait_name`, `seq_name`, `start_pos`, `end_pos`.
292 | #' @param gff_out path to the gff file to be created
293 | #' @return NULL
294 | text_to_gff <- function(text_in, gff_out){
295 |   if ( is.null(text_in) || is.null( gff_out ) ) {
296 |     stop("must provide an input text file AND output text file")
297 |   }
298 | 
299 |   bait_regions <- get_bait_regions_from_text(text_in)
300 |   rtracklayer::export.gff3(bait_regions, gff_out)
301 | }
302 | 
303 | 
304 | #' make files to load tutorial data
305 | #'
306 | #'
307 | #'
308 | #' @param write_dir directory to put sample files in defaults to `getwd()`
309 | #' @export
310 | make_tutorial_data <- function(write_dir = getwd() ){
311 | 
312 |   out_mappings <- file.path(write_dir, "sample_treatment_bam_mappings.csv")
313 | 
314 |   dir_names <- list.files(dirname(system.file("extdata", "bait_regions.gff", package = "atacr")), include.dirs = TRUE, pattern = "ATAC",full.names = TRUE  )
315 | 
316 |   df <- data.frame(
317 |     "treatment" = c( rep("mock", 3), rep("infected", 3)),
318 |     "sample_name" = c(paste0("mock_rep", 1:3), paste0("infected_rep", 1:3)),
319 |     "bam_file_path" = file.path(dir_names, "alignedSorted.bam" )
320 |   )
321 | 
322 |   write.csv(df, file = out_mappings, quote = FALSE, row.names = FALSE)
323 | 
324 |   out_gff <- file.path(write_dir, "bait_regions.gff")
325 |   file.copy(system.file("extdata/", "bait_regions.gff", package = "atacr"), out_gff )
326 |   return( list(bait_regions_file = out_gff, mapping_file = out_mappings))
327 | }
328 | 


--------------------------------------------------------------------------------
/R/differentials.R:
--------------------------------------------------------------------------------
  1 | #' gets t-statistic for two vectors of data, x and y
  2 | #' @param data matrix of sample data
  3 | #' @param indices indices selected by boot::boot
  4 | #' @return t the t statistic from Student's t-test or NA if error
  5 | get_t <- function(data,indices){
  6 |   d <- data[indices]
  7 | 
  8 |   e <- length(d)
  9 |   f <- floor(e/2)
 10 |   x <- d[1:f]
 11 |   y <- d[(f+1):e]
 12 | 
 13 | 
 14 |   stat <- tryCatch({
 15 |     t.test(x,y)$statistic
 16 |   },
 17 |     warning = function(w){
 18 |       return(NA)
 19 |     },
 20 |     error = function(e){
 21 |       return(NA)
 22 |     },
 23 |     finally = {}
 24 |   )
 25 |   return(stat)
 26 | }
 27 | 
 28 | 
 29 | #'runs bootstrap t test, wrapper required for boot::boot function
 30 | #' @param data matrix of sample data
 31 | #' @param iterations number of bootstrap iterations to run
 32 | #' @return vector of 2 columns, observed value t statisitc and p, calculated as proportion of bootstrap iterations greater than original t
 33 | bootstrap_t <- function(data, iterations=10){
 34 |   boot_res <- boot::boot(data, statistic = get_t, R = iterations)
 35 |   original <- boot_res$t0
 36 |   bootstraps <- boot_res$t
 37 |   p <- (sum(bootstraps > original) / iterations)
 38 |   if ( is.nan(original) | is.na(original) ) {
 39 |     p <- original
 40 |   }
 41 |   else if (original < 0) {
 42 |     p <- sum(bootstraps < original) / iterations
 43 |   }
 44 |   return(c(original, p))
 45 | }
 46 | 
 47 | select_comparisons <- function(data, treatment_a, treatment_b, which = "bait_windows"){
 48 |     l <- list()
 49 |     sample_matrix <- SummarizedExperiment::assay(data[[which]])
 50 |     treatment_a_cols <- data$sample_names[which(data$treatments == treatment_a) ]
 51 |     treatment_b_cols <- data$sample_names[which(data$treatments == treatment_b) ]
 52 |     l$treatment_a_data <- sample_matrix[,treatment_a_cols]
 53 |     l$treatment_b_data <- sample_matrix[,treatment_b_cols]
 54 |     return(l)
 55 | }
 56 | 
 57 | get_means <- function(data){
 58 | 
 59 |   mean_count_a <- apply(data$comparisons$treatment_a_data, 1, mean)
 60 |   mean_count_b <- apply(data$comparisons$treatment_b_data, 1, mean)
 61 |   result <- data.frame(
 62 |     c1 = mean_count_a,
 63 |     c2 = mean_count_b
 64 |   )
 65 |   colnames(result) <- c(paste0("mean_", data$treatment_a_name), paste0("mean_", data$treatment_b_name) )
 66 |   return( result )
 67 | }
 68 | 
 69 | get_sd <- function(data){
 70 |   sd_a <- apply(data$comparisons$treatment_a_data, 1, sd)
 71 |   sd_b <- apply(data$comparisons$treatment_b_data, 1, sd)
 72 |   result <- data.frame(
 73 |     c1 = sd_a,
 74 |     c2 = sd_b
 75 |   )
 76 |   colnames(result) <- c(paste0("sd_", data$treatment_a_name), paste0("sd_", data$treatment_b_name) )
 77 |   return( result )
 78 | }
 79 | 
 80 | get_fc <- function(data){
 81 |   means <- get_means(data)
 82 |   return(data.frame(log2_fold_change = log2(means[,1] / means[,2])))
 83 | }
 84 | 
 85 | #' selects appropriate columns and names from a
 86 | #' @param data an atacr object
 87 | #' @param treatment_a string naming the first treatment (numerator)
 88 | #' @param treatment_b string naming the second treatment (denominator)
 89 | #' @param which subset to work on Default = NULL
 90 | #' @return list of data to be calculated with
 91 | select_data <- function(data, treatment_a, treatment_b, which = NULL){
 92 | 
 93 |   comparison_list <- select_comparisons(data, treatment_a, treatment_b, which = which)
 94 |   comparison_matrix <- cbind(comparison_list$treatment_a, comparison_list$treatment_b )
 95 | 
 96 |   return(
 97 |     list(
 98 |       counts = comparison_matrix,
 99 |       comparisons = comparison_list,
100 |       treatment_a_names = data$sample_names[which(data$treatments == treatment_a)],
101 |       treatment_b_names = data$sample_names[which(data$treatments == treatment_b)],
102 |       treatment_a_name = treatment_a,
103 |       treatment_b_name = treatment_b
104 |       )
105 |     )
106 | 
107 | }
108 | 
109 | check_data <- function(d, treatment_a, treatment_b){
110 | 
111 |   if( length(d$treatment_a_names) < 3 | length(d$treatment_b_names) < 3  ){
112 |     message <- paste("Need at least 3 replicates to perform estimate bootstrap t value. Have", length(d$treatment_a_names), "for", treatment_a, "and", length(d$treatment_b_names), "for", treatment_b)
113 |     stop(message)
114 |   }
115 | 
116 |   if(length(d$treatment_a_names) != length(d$treatment_b_names) ){
117 |     message <- paste("Must have equal number of replicates in each treatment. Have", length(d$treatment_a_names), "for", treatment_a, "and", length(d$treatment_b_names), "for", treatment_b)
118 |     stop(message)
119 |   }
120 | 
121 | }
122 | 
123 | #' Estimate FDR and significantly different windows
124 | #' @export
125 | #' @param data an atacr object
126 | #' @param treatment_a the first treatment to consider
127 | #' @param treatment_b the second treatment to consider
128 | #' @param which the subset of windows to consider
129 | #' @param iterations the number of bootstrap iterations to perform
130 | #' @param fdr_level the level at which to mark FDR as significant
131 | #' @return dataframe of counts and statistics
132 | estimate_fdr <- function(data, treatment_a, treatment_b, which = "bait_windows", iterations=10,fdr_level=0.05){
133 | 
134 | 
135 |   d <- select_data(data, treatment_a, treatment_b, which)
136 |   check_data(d, treatment_a, treatment_b)
137 | 
138 |   working_df <- as.data.frame(d$counts)
139 |   row.names(working_df) <- rownames(d$counts)
140 | 
141 |   selected_df <- working_df[rowSums(working_df) > 0,]
142 | 
143 |   selected_result <- apply(selected_df, 1, bootstrap_t, iterations = iterations)
144 |   #colnames(selected_result) <- c("t", "fdr")
145 |   #selected_result <- apply(selected_df, 1, bayes_t, treatment_a_names = d$treatment_a_names, treatment_b_names = d$treatment_b_names)
146 | 
147 |  selected_result <- as.data.frame(t(selected_result)) %>%
148 |   dplyr::rename("fdr" = V2) %>%
149 |     dplyr::mutate(window = colnames(selected_result))
150 | 
151 | 
152 | 
153 |   working_df$window <- row.names(working_df)
154 | 
155 |   result <- dplyr::left_join(working_df, selected_result, by = "window") %>%
156 |     dplyr::mutate(is_sig = fdr <= fdr_level) %>%
157 |     dplyr::bind_cols( get_means(d) ) %>%
158 |     dplyr::bind_cols( get_sd(d) ) %>%
159 |     dplyr::bind_cols( get_fc(d))
160 |   return(result)
161 | 
162 | }
163 | 
164 | #' Estimate FDR and significantly different windows for many experiments
165 | #' @export
166 | #' @param data an atacr object
167 | #' @param common_control the treatment to consider the control for all other treatments
168 | #' @param which the subset of windows to consider
169 | #' @param iterations the number of bootstrap iterations to perform
170 | #' @param fdr_level the level at which to mark FDR as significant
171 | estimate_fdr_multiclass <- function(data, common_control, which = "bait_windows", iterations = 10,fdr_level = 0.05) {
172 |   treatments <- data$treatments[data$treatments != common_control]
173 |   control <- rep(common_control, length(treatments))
174 |   comparisons <- cbind(treatments, control)
175 | 
176 |   r <- list()
177 |   for (i in 1:nrow(comparisons)) {
178 |     tr <- comparisons[i,][1]
179 |     ct <- comparisons[i,][2]
180 | 
181 |     df <- atacr::estimate_fdr(data,
182 |                     tr,
183 |                     ct,
184 |                     which = which,
185 |                     iterations = iterations,
186 |                     fdr_level = fdr_level)
187 |          df$a <- rep(tr, nrow(df))
188 |          df$b <- rep(ct, nrow(df))
189 |          colnames(df)[grep("mean_", colnames(df))] <- c("mean_a", "mean_b")
190 |          colnames(df)[grep("sd_", colnames(df))] <- c("sd_a", "sd_b")
191 |          df <- df[, c("window",  "fdr", "is_sig", "mean_a", "mean_b", "sd_a", "sd_b", "log2_fold_change", "a", "b")]
192 |     r[[i]] <- df
193 |     }
194 |   return(do.call(rbind, r))
195 | 
196 | }
197 | 
198 | bayes_t <- function(counts, treatment_a_names, treatment_b_names){
199 | 
200 |   a <- counts[treatment_a_names]
201 |   b <- counts[treatment_b_names]
202 |   bf <- BayesFactor::ttestBF(a,b)
203 |   return(bf@bayesFactor$bf)
204 | }
205 | 
206 | 
207 | #' Estimate Bayes Factor and significantly different windows
208 | #' @export
209 | #' @param atacr an atacr object
210 | #' @param treatment_a the first treatment to consider
211 | #' @param treatment_b the second treatment to consider
212 | #' @param which the subset of windows to consider
213 | #' @param factor the BayesFactor at which to mark window as significant
214 | #' @return a dataframe of counts and statistics
215 | estimate_bayes_factor <- function(atacr, treatment_a, treatment_b, which = "bait_windows", factor = 4){
216 | 
217 |   d <- select_data(atacr, treatment_a, treatment_b, which)
218 |   check_data(d, treatment_a, treatment_b)
219 | 
220 |   working_df <- as.data.frame(d$counts)
221 |   row.names(working_df) <- rownames(d$counts)
222 | 
223 |   selected_df <- working_df[rowSums(working_df) > 0,]
224 | 
225 |   selected_result <- apply(selected_df, 1, bayes_t, treatment_a_names = d$treatment_a_names, treatment_b_names = d$treatment_b_names)
226 | 
227 |    selected_result <- data.frame(
228 |      bayes_factor = selected_result,
229 |      window = row.names(selected_df)
230 |    )
231 | 
232 |   working_df$window <- row.names(working_df)
233 |    result <- dplyr::left_join(working_df, selected_result, by = "window") %>%
234 |      dplyr::mutate(is_sig = bayes_factor >= factor) %>%
235 |      dplyr::bind_cols( get_means(d) ) %>%
236 |      dplyr::bind_cols( get_sd(d) ) %>%
237 |      dplyr::bind_cols( get_fc(d))
238 |   return(result)
239 | }
240 | 
241 | #' Estimate BayesFactor and mark significantly different windows for many experiments
242 | #' @export
243 | #' @param data an atacr object
244 | #' @param common_control the treatment to consider the control for all other treatments
245 | #' @param which the subset of windows to consider
246 | #' @param factor the BayesFactor to consider significant
247 | #' @return a dataframe of counts and statistics
248 | estimate_bayes_factor_multiclass <- function(data, common_control, which = "bait_windows", factor = 4) {
249 |   treatments <- unique(data$treatments[data$treatments != common_control])
250 |   control <- rep(common_control, length(treatments))
251 |   comparisons <- cbind(treatments, control)
252 |   r <- list()
253 |   for (i in 1:nrow(comparisons)) {
254 |     tr <- comparisons[i,][1]
255 |     ct <- comparisons[i,][2]
256 |     df <- estimate_bayes_factor(data,
257 |       tr,
258 |       ct,
259 |       which = which,
260 |       factor = factor)
261 |     df$a <- rep(tr, nrow(df))
262 |     df$b <- rep(ct, nrow(df))
263 |     colnames(df)[grep("mean_", colnames(df))] <- c("mean_a", "mean_b")
264 |     colnames(df)[grep("sd_", colnames(df))] <- c("sd_a", "sd_b")
265 |     df <- df[, c("window",  "bayes_factor", "is_sig", "mean_a", "mean_b", "sd_a", "sd_b", "log2_fold_change", "a", "b")]
266 |     r[[i]] <- df
267 |   }
268 | 
269 |   return(do.call(rbind, r))
270 | 
271 | }
272 | #' Estimate differential window counts  and mark significantly different windows using edgeR exact method for two samples
273 | #' @export
274 | #' @param atacr an atacr object
275 | #' @param common_control the treatment to consider the control for all other treatments
276 | #' @param which the subset of windows to consider
277 | #' @param sig_level the p_value to consider significant
278 | #' @return a dataframe of counts and statistics
279 | edgeR_exact <- function(atacr, which = "bait_windows", treatment_a = NULL, treatment_b = NULL, remove_zeros = FALSE, sig_level = 0.05 ){
280 | 
281 |   data <- select_data(atacr, treatment_a, treatment_b, which)
282 |   working_df <- as.data.frame(data$counts)
283 |   row.names(working_df) <- rownames(data$counts)
284 | 
285 |   group <- c(rep(treatment_a, length(data$treatment_a_names)), rep(treatment_b, length(data$treatment_b_names)) )
286 | 
287 |   dg <- edgeR::DGEList(data$counts, group = group, remove.zeros = remove_zeros)
288 |   dg <- edgeR::estimateDisp(dg)
289 |   et <- edgeR::exactTest(dg)
290 |   names <- rownames(et$table)
291 | 
292 |   selected_result <- data.frame(
293 |     window = rownames(et$table),
294 |     p_value = et$table$PValue
295 |   )
296 | 
297 |   working_df$window <- row.names(working_df)
298 | 
299 |   result <- dplyr::left_join(working_df, selected_result, by = "window") %>%
300 |     dplyr::mutate(is_sig = p_value <= sig_level) %>%
301 |     dplyr::bind_cols( get_means(data) ) %>%
302 |     dplyr::bind_cols( get_sd(data) ) %>%
303 |     dplyr::bind_cols( get_fc(data))
304 |   return(result)
305 | }
306 | #' Estimate differential window counts  and mark significantly different windows using edgeR glmFIT method for multiple samples with common control
307 | #' @export
308 | #' @param data an atacr object
309 | #' @param treatment_a the first treatment to consider
310 | #' @param treatment_b the second treatment to consider
311 | #' @param which the subset of windows to consider
312 | #' @param remove_zeros apply edgeR remove.zeros argument
313 | #' @return a list of "DGELRT" objects for each comparison
314 | edgeR_multiclass <- function(data, common_control, which = "bait_windows", sig_level = 0.05, remove_zeros = FALSE){
315 | 
316 |   ctrl_idcs <- which(data$treatments == common_control)
317 |   other_idcs <- which(data$treatments != common_control)
318 |   new_order <- c(ctrl_idcs, other_idcs)
319 | 
320 |   treatments <- as.factor(data$treatments[ new_order ])
321 |   samples <- data$sample_names[ new_order ]
322 | 
323 | 
324 |   df <- data.frame(sample = samples, treatment = as.factor(as.numeric(treatments)))
325 |   design <- model.matrix(~treatment, data = df)
326 |   num_levels <- nlevels(as.factor(unique(treatments)))
327 | 
328 |   dglist <- edgeR::DGEList(SummarizedExperiment::assay(data[[which]]), remove.zeros = remove_zeros)
329 | 
330 |   dglist <- edgeR::estimateDisp(dglist, design)
331 |   fit <- edgeR::glmQLFit(dglist, design)
332 | 
333 |   dgelrts <- list()
334 | 
335 |   for (i in 2:num_levels) {
336 |     curr_t <- unique(data$treatments[ new_order ])[i]
337 |     dgelrts[[curr_t]] <- edgeR::glmQLFTest(fit, coef = i)
338 |   }
339 | 
340 |   return(dgelrts)
341 |   # result <- list()
342 |   #
343 |   # for(n in names(dgelrts)){
344 |   #   tb <- dgelrts[[n]]$table
345 |   #   df <-  data.frame(
346 |   #     window = rownames(tb),
347 |   #     p_value = tb$PValue,
348 |   #     f = tb$F
349 |   #   )
350 |   #
351 |   #
352 |   #   dlist <- select_data(data, n, common_control, which)
353 |   #   df$is_sig <- (df$p_value <= sig_level)
354 |   #
355 |   #   df <- cbind(df, get_means(dlist$comparisons))
356 |   #
357 |   #   #add sd
358 |   #   df <- cbind(df, get_sd(dlist$comparisons))
359 |   #   #add log2 fc
360 |   #   df <- get_fc(df)
361 |   #   df$a <- rep(n, nrow(df))
362 |   #   df$b <- rep(common_control, nrow(df))
363 |   #   result[[n]] <- df
364 |   #
365 |   # }
366 |   # result <- do.call(rbind, result)
367 |   # rownames(result) <- NULL
368 |   # return(result)
369 | 
370 | 
371 | 
372 | }
373 | 


--------------------------------------------------------------------------------
/docs/differential_windows.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | 
  3 | <html xmlns="http://www.w3.org/1999/xhtml">
  4 | 
  5 | <head>
  6 | 
  7 | <meta charset="utf-8" />
  8 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  9 | <meta name="generator" content="pandoc" />
 10 | 
 11 | <meta name="viewport" content="width=device-width, initial-scale=1">
 12 | 
 13 | <meta name="author" content="Dan MacLean" />
 14 | 
 15 | <meta name="date" content="2018-03-21" />
 16 | 
 17 | <title>Differentially accessible or expressed windows</title>
 18 | 
 19 | 
 20 | 
 21 | <style type="text/css">code{white-space: pre;}</style>
 22 | <style type="text/css">
 23 | div.sourceCode { overflow-x: auto; }
 24 | table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
 25 |   margin: 0; padding: 0; vertical-align: baseline; border: none; }
 26 | table.sourceCode { width: 100%; line-height: 100%; }
 27 | td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
 28 | td.sourceCode { padding-left: 5px; }
 29 | code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
 30 | code > span.dt { color: #902000; } /* DataType */
 31 | code > span.dv { color: #40a070; } /* DecVal */
 32 | code > span.bn { color: #40a070; } /* BaseN */
 33 | code > span.fl { color: #40a070; } /* Float */
 34 | code > span.ch { color: #4070a0; } /* Char */
 35 | code > span.st { color: #4070a0; } /* String */
 36 | code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
 37 | code > span.ot { color: #007020; } /* Other */
 38 | code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
 39 | code > span.fu { color: #06287e; } /* Function */
 40 | code > span.er { color: #ff0000; font-weight: bold; } /* Error */
 41 | code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
 42 | code > span.cn { color: #880000; } /* Constant */
 43 | code > span.sc { color: #4070a0; } /* SpecialChar */
 44 | code > span.vs { color: #4070a0; } /* VerbatimString */
 45 | code > span.ss { color: #bb6688; } /* SpecialString */
 46 | code > span.im { } /* Import */
 47 | code > span.va { color: #19177c; } /* Variable */
 48 | code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
 49 | code > span.op { color: #666666; } /* Operator */
 50 | code > span.bu { } /* BuiltIn */
 51 | code > span.ex { } /* Extension */
 52 | code > span.pp { color: #bc7a00; } /* Preprocessor */
 53 | code > span.at { color: #7d9029; } /* Attribute */
 54 | code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
 55 | code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
 56 | code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
 57 | code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
 58 | </style>
 59 | 
 60 | 
 61 | 
 62 | <link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20both%3B%0Amargin%3A%200%200%2010px%2010px%3B%0Apadding%3A%204px%3B%0Awidth%3A%20400px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Aborder%2Dradius%3A%205px%3B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Afont%2Dsize%3A%2013px%3B%0Aline%2Dheight%3A%201%2E3%3B%0A%7D%0A%23TOC%20%2Etoctitle%20%7B%0Afont%2Dweight%3A%20bold%3B%0Afont%2Dsize%3A%2015px%3B%0Amargin%2Dleft%3A%205px%3B%0A%7D%0A%23TOC%20ul%20%7B%0Apadding%2Dleft%3A%2040px%3B%0Amargin%2Dleft%3A%20%2D1%2E5em%3B%0Amargin%2Dtop%3A%205px%3B%0Amargin%2Dbottom%3A%205px%3B%0A%7D%0A%23TOC%20ul%20ul%20%7B%0Amargin%2Dleft%3A%20%2D2em%3B%0A%7D%0A%23TOC%20li%20%7B%0Aline%2Dheight%3A%2016px%3B%0A%7D%0Atable%20%7B%0Amargin%3A%201em%20auto%3B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dcolor%3A%20%23DDDDDD%3B%0Aborder%2Dstyle%3A%20outset%3B%0Aborder%2Dcollapse%3A%20collapse%3B%0A%7D%0Atable%20th%20%7B%0Aborder%2Dwidth%3A%202px%3B%0Apadding%3A%205px%3B%0Aborder%2Dstyle%3A%20inset%3B%0A%7D%0Atable%20td%20%7B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dstyle%3A%20inset%3B%0Aline%2Dheight%3A%2018px%3B%0Apadding%3A%205px%205px%3B%0A%7D%0Atable%2C%20table%20th%2C%20table%20td%20%7B%0Aborder%2Dleft%2Dstyle%3A%20none%3B%0Aborder%2Dright%2Dstyle%3A%20none%3B%0A%7D%0Atable%20thead%2C%20table%20tr%2Eeven%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Ap%20%7B%0Amargin%3A%200%2E5em%200%3B%0A%7D%0Ablockquote%20%7B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Apadding%3A%200%2E25em%200%2E75em%3B%0A%7D%0Ahr%20%7B%0Aborder%2Dstyle%3A%20solid%3B%0Aborder%3A%20none%3B%0Aborder%2Dtop%3A%201px%20solid%20%23777%3B%0Amargin%3A%2028px%200%3B%0A%7D%0Adl%20%7B%0Amargin%2Dleft%3A%200%3B%0A%7D%0Adl%20dd%20%7B%0Amargin%2Dbottom%3A%2013px%3B%0Amargin%2Dleft%3A%2013px%3B%0A%7D%0Adl%20dt%20%7B%0Afont%2Dweight%3A%20bold%3B%0A%7D%0Aul%20%7B%0Amargin%2Dtop%3A%200%3B%0A%7D%0Aul%20li%20%7B%0Alist%2Dstyle%3A%20circle%20outside%3B%0A%7D%0Aul%20ul%20%7B%0Amargin%2Dbottom%3A%200%3B%0A%7D%0Apre%2C%20code%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0Aborder%2Dradius%3A%203px%3B%0Acolor%3A%20%23333%3B%0Awhite%2Dspace%3A%20pre%2Dwrap%3B%20%0A%7D%0Apre%20%7B%0Aborder%2Dradius%3A%203px%3B%0Amargin%3A%205px%200px%2010px%200px%3B%0Apadding%3A%2010px%3B%0A%7D%0Apre%3Anot%28%5Bclass%5D%29%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Acode%20%7B%0Afont%2Dfamily%3A%20Consolas%2C%20Monaco%2C%20%27Courier%20New%27%2C%20monospace%3B%0Afont%2Dsize%3A%2085%25%3B%0A%7D%0Ap%20%3E%20code%2C%20li%20%3E%20code%20%7B%0Apadding%3A%202px%200px%3B%0A%7D%0Adiv%2Efigure%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0Aimg%20%7B%0Abackground%2Dcolor%3A%20%23FFFFFF%3B%0Apadding%3A%202px%3B%0Aborder%3A%201px%20solid%20%23DDDDDD%3B%0Aborder%2Dradius%3A%203px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Amargin%3A%200%205px%3B%0A%7D%0Ah1%20%7B%0Amargin%2Dtop%3A%200%3B%0Afont%2Dsize%3A%2035px%3B%0Aline%2Dheight%3A%2040px%3B%0A%7D%0Ah2%20%7B%0Aborder%2Dbottom%3A%204px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Apadding%2Dbottom%3A%202px%3B%0Afont%2Dsize%3A%20145%25%3B%0A%7D%0Ah3%20%7B%0Aborder%2Dbottom%3A%202px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Afont%2Dsize%3A%20120%25%3B%0A%7D%0Ah4%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23f7f7f7%3B%0Amargin%2Dleft%3A%208px%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Ah5%2C%20h6%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23ccc%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Aa%20%7B%0Acolor%3A%20%230033dd%3B%0Atext%2Ddecoration%3A%20none%3B%0A%7D%0Aa%3Ahover%20%7B%0Acolor%3A%20%236666ff%3B%20%7D%0Aa%3Avisited%20%7B%0Acolor%3A%20%23800080%3B%20%7D%0Aa%3Avisited%3Ahover%20%7B%0Acolor%3A%20%23BB00BB%3B%20%7D%0Aa%5Bhref%5E%3D%22http%3A%22%5D%20%7B%0Atext%2Ddecoration%3A%20underline%3B%20%7D%0Aa%5Bhref%5E%3D%22https%3A%22%5D%20%7B%0Atext%2Ddecoration%3A%20underline%3B%20%7D%0A%0Acode%20%3E%20span%2Ekw%20%7B%20color%3A%20%23555%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%0Acode%20%3E%20span%2Edt%20%7B%20color%3A%20%23902000%3B%20%7D%20%0Acode%20%3E%20span%2Edv%20%7B%20color%3A%20%2340a070%3B%20%7D%20%0Acode%20%3E%20span%2Ebn%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Efl%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Ech%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Est%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Eco%20%7B%20color%3A%20%23888888%3B%20font%2Dstyle%3A%20italic%3B%20%7D%20%0Acode%20%3E%20span%2Eot%20%7B%20color%3A%20%23007020%3B%20%7D%20%0Acode%20%3E%20span%2Eal%20%7B%20color%3A%20%23ff0000%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%0Acode%20%3E%20span%2Efu%20%7B%20color%3A%20%23900%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%20code%20%3E%20span%2Eer%20%7B%20color%3A%20%23a61717%3B%20background%2Dcolor%3A%20%23e3d2d2%3B%20%7D%20%0A" rel="stylesheet" type="text/css" />
 63 | 
 64 | </head>
 65 | 
 66 | <body>
 67 | 
 68 | 
 69 | 
 70 | 
 71 | <h1 class="title toc-ignore">Differentially accessible or expressed windows</h1>
 72 | <h4 class="author"><em>Dan MacLean</em></h4>
 73 | <h4 class="date"><em>2018-03-21</em></h4>
 74 | 
 75 | 
 76 | 
 77 | <p>Finding windows that correspond to differentially expressed or accessible windows is possible with two related functions in <code>atacr</code> - <code>estimate_fdr()</code> which implements bootstrap <em>t</em>-tests, via the boot package and <code>estimate_bayes_factor()</code> which implements a Bayes factor ANOVA using the BayesFactor package. A tidy dataframe of results is returned in each case.</p>
 78 | <div id="bootstrap-t-tests" class="section level3">
 79 | <h3>Bootstrap <em>t</em>-tests</h3>
 80 | <p>For simple comparison of two treatments with bootstrap <em>t</em> tests, provide treatment ‘a’ and ‘b’ names and the number of bootstrap iterations (default is 10, which is fast for testing code, but useless analytically). You can set the threshold for marking as significant with <code>fdr_level</code>.</p>
 81 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"> result &lt;-<span class="st"> </span><span class="kw">estimate_fdr</span>(normalized_counts,
 82 |               <span class="dt">treatment_a =</span>  <span class="st">&quot;treatment&quot;</span>,
 83 |               <span class="dt">treatment_b =</span> <span class="st">&quot;control&quot;</span>,
 84 |               <span class="dt">iterations =</span> <span class="dv">100000</span>,
 85 |               <span class="dt">fdr_level =</span> <span class="fl">0.01</span>)</code></pre></div>
 86 | <pre><code>##                  window         t p_value       fdr mean_count_a
 87 | ## 1    synth_chrom:1-50:- -1.938336     0.1 0.2130435    5632.6667
 88 | ## 2  synth_chrom:51-100:+ -1.218827     0.2 0.3062500    9758.6667
 89 | ## 3 synth_chrom:101-150:-  4.107091     0.0 0.0000000     205.3333
 90 | ## 4 synth_chrom:151-200:- -1.510404     0.2 0.3062500   15202.6667
 91 | ## 5 synth_chrom:251-300:-  3.308530     0.0 0.0000000   39171.0000
 92 | ## 6 synth_chrom:301-350:-  1.400435     0.3 0.3868421      62.0000
 93 | ##   mean_count_b        sd_a         sd_b   log2_fc is_sig
 94 | ## 1  15382.66667  6377.90407  5935.290164 -1.449416  FALSE
 95 | ## 2  20613.66667  4435.08403 14774.507279 -1.078845  FALSE
 96 | ## 3     83.66667    50.46121     9.291573  1.295243   TRUE
 97 | ## 4  36699.33333 19444.72315 15152.091814 -1.271429  FALSE
 98 | ## 5  15567.33333 12216.06745  1859.435488  1.331264   TRUE
 99 | ## 6     12.33333    59.80803    14.011900  2.329705  FALSE</code></pre>
100 | <p>The output has columns as follows:</p>
101 | <ul>
102 | <li><code>window</code> - the name of the window with data on this row</li>
103 | <li><code>t</code> - the value of the <em>t</em> statistic for the first (non-bootstrap) iteration</li>
104 | <li><code>p_value</code> - the computed <em>p</em> value for the window</li>
105 | <li><code>fdr</code> - the false detection rate at this window</li>
106 | <li><code>mean_count_a</code> - the mean count for treatment ‘a’</li>
107 | <li><code>mean_count_b</code> - the mean count for treatment ‘b’</li>
108 | <li><code>sd_a</code> - standard deviation for treatment ‘a’</li>
109 | <li><code>sd_b</code> - standard deviation for treatment ‘b’</li>
110 | <li><code>log2_fc</code> - log 2 of the ratio of the mean counts</li>
111 | <li><code>is_sig</code> - flag showing whether window was significant according to the level set in the function with parameter <code>fdr_level</code></li>
112 | </ul>
113 | <p>To analyse all treatments against a common comparison at once you can use the wrapper function <code>estimate_fdr_multiclass()</code> which requires the name of the common comparison treatment</p>
114 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">multi_result &lt;-<span class="st">  </span><span class="kw">estimate_fdr_multiclass</span>(normalized_counts,
115 |               <span class="dt">common_control =</span> <span class="st">&quot;control&quot;</span>,
116 |               <span class="dt">iterations =</span> <span class="dv">100000</span>,
117 |               <span class="dt">fdr_level =</span> <span class="fl">0.01</span>)
118 | 
119 | <span class="kw">head</span>(multi_result)</code></pre></div>
120 | <pre><code>##                  window         t p_value       fdr mean_count_a
121 | ## 1    synth_chrom:1-50:- -1.938336     0.0 0.0000000    5632.6667
122 | ## 2  synth_chrom:51-100:+ -1.218827     0.0 0.0000000    9758.6667
123 | ## 3 synth_chrom:101-150:-  4.107091     0.0 0.0000000     205.3333
124 | ## 4 synth_chrom:151-200:- -1.510404     0.1 0.1689655   15202.6667
125 | ## 5 synth_chrom:251-300:-  3.308530     0.0 0.0000000   39171.0000
126 | ## 6 synth_chrom:301-350:-  1.400435     0.1 0.1689655      62.0000
127 | ##   mean_count_b        sd_a         sd_b   log2_fc is_sig         a       b
128 | ## 1  15382.66667  6377.90407  5935.290164 -1.449416   TRUE treatment control
129 | ## 2  20613.66667  4435.08403 14774.507279 -1.078845   TRUE treatment control
130 | ## 3     83.66667    50.46121     9.291573  1.295243   TRUE treatment control
131 | ## 4  36699.33333 19444.72315 15152.091814 -1.271429  FALSE treatment control
132 | ## 5  15567.33333 12216.06745  1859.435488  1.331264   TRUE treatment control
133 | ## 6     12.33333    59.80803    14.011900  2.329705  FALSE treatment control</code></pre>
134 | <p>The results here has two extra columns:</p>
135 | <ul>
136 | <li>a - the name of the treatment</li>
137 | <li>b - the name of the common control</li>
138 | </ul>
139 | </div>
140 | <div id="bayes-factor-analysis" class="section level3">
141 | <h3>Bayes Factor Analysis</h3>
142 | <p>A similar pair of functions is available for Bayes factor analysis. <code>estimate_bayes_factor()</code> for the two-way comparison. The <code>factor</code> argument sets the Bayes factor at which to mark the window as having different counts.</p>
143 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">result_bf &lt;-<span class="st">  </span><span class="kw">estimate_bayes_factor</span>(normalized_counts,
144 |                            <span class="dt">treatment_a =</span>  <span class="st">&quot;treatment&quot;</span>,
145 |                            <span class="dt">treatment_b =</span> <span class="st">&quot;control&quot;</span>,
146 |                                 <span class="dt">factor =</span> <span class="fl">2.0</span>)
147 | 
148 | <span class="kw">head</span>(result_bf)</code></pre></div>
149 | <pre><code>##                  window bayes_factor is_sig mean_count_a mean_count_b
150 | ## 1    synth_chrom:1-50:-   0.20483396  FALSE    5632.6667  15382.66667
151 | ## 2  synth_chrom:51-100:+  -0.20139051  FALSE    9758.6667  20613.66667
152 | ## 3 synth_chrom:101-150:-   1.39100629  FALSE     205.3333     83.66667
153 | ## 4 synth_chrom:151-200:-  -0.04237361  FALSE   15202.6667  36699.33333
154 | ## 5 synth_chrom:251-300:-   0.98251046  FALSE   39171.0000  15567.33333
155 | ## 6 synth_chrom:301-350:-  -0.10371176  FALSE      62.0000     12.33333
156 | ##          sd_a         sd_b   log2_fc
157 | ## 1  6377.90407  5935.290164 -1.449416
158 | ## 2  4435.08403 14774.507279 -1.078845
159 | ## 3    50.46121     9.291573  1.295243
160 | ## 4 19444.72315 15152.091814 -1.271429
161 | ## 5 12216.06745  1859.435488  1.331264
162 | ## 6    59.80803    14.011900  2.329705</code></pre>
163 | <p>Again, a <code>estimate_bayes_factor_multiclass()</code> function works for all comparisons to a common control.</p>
164 | <p>The results data frame is similar to that from the Bootstrap <em>t</em> methods, with a <code>factor</code> column in place of the <code>t</code> and <code>fdr</code> columns.</p>
165 | </div>
166 | 
167 | 
168 | 
169 | <!-- dynamically load mathjax for compatibility with self-contained -->
170 | <script>
171 |   (function () {
172 |     var script = document.createElement("script");
173 |     script.type = "text/javascript";
174 |     script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
175 |     document.getElementsByTagName("head")[0].appendChild(script);
176 |   })();
177 | </script>
178 | 
179 | </body>
180 | </html>
181 | 


--------------------------------------------------------------------------------
/docs/tutorial.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "A worked example with atacR"
  3 | author: "Dan MacLean"
  4 | date: "`r Sys.Date()`"
  5 | output: html_document
  6 | 
  7 | ---
  8 | ```{r, echo = FALSE}
  9 | knitr::opts_chunk$set(
 10 |   warning = FALSE,
 11 |   message = FALSE
 12 | )
 13 | ```
 14 | 
 15 | ## About the experiment
 16 | 
 17 | The experiment we'll be running through is an ATAC-cap-seq experiment of _Arabidopsis_ plant leaves taken from plants exposed to either a mock (water) treatment or infected with a pathogen. We have the bare minimum replicate number, just three independent samples for each of the mock or infected treatments. The reads are paired-end and 50 nt long. The BAM files are sorted using `samtools` from SAM files generated by `BWA` but at the start no further processing has been done. The paths to the BAM files and the bait region coordinates on your system are described above. 
 18 | 
 19 | ## Preparing input files
 20 | 
 21 | The first step of any `atacR` analysis is to build the input files, these are basically files listing where the important data files are on your system.
 22 | You'll need a file listing the BAM files and a file listing the bait windows. These are described more fully in the [loading vignette](loading.html).
 23 | 
 24 | The `atacR` package comes with a small set of ATAC-cap-seq data built in. It is installed along with the package, and we'll use that data in this tutorial.
 25 | 
 26 | A convenience function is built into `atacR` that will find the built-in data and build the files you need to follow this tutorial. All you need to do is decide where those files should be written. Below we write them to the `Desktop`. Two files will appear on the `Desktop` - `bait_regions.gff` and `sample_treatment_bam_mappings.csv`. If you inspect these you'll see the structure. 
 27 | 
 28 | 
 29 | ```{r}
 30 | library(atacr)
 31 | input_files <- make_tutorial_data("~/Desktop/")
 32 | input_files
 33 | ```
 34 | 
 35 | The object `input_files` holds the paths to the input files we made, so we'll use that to get going.
 36 | 
 37 | ## Generating read counts 
 38 | 
 39 | Once we have the files ready, we can begin analysis. We can extract information from the files and make the counts we're interested in with the `make_counts()` function. In this step we'll set the read filter parameters to decide which reads and alignments in the BAM file are of sufficiently good quality to be counted. 
 40 | 
 41 | 
 42 | Depending on whether you have ATAC-cap-seq or RNA-cap-seq this function does slightly different things. If you have ATAC-cap-seq, this function divides the bait regions in the genome into sub-windows of a fixed width. If you have RNA-cap-seq the whole bait region is considered to be a single window. 
 43 | 
 44 | Below we will use the default window sizes (50nt, non-overlapping) and read filters (described in the [loading vignette](loading.html) ). As this is ATAC-cap-seq data we need to specify that too.
 45 | 
 46 | ```{r}
 47 | counts <- make_counts(input_files$bait_regions_file, 
 48 |                       input_files$mapping_file,
 49 |                       is_rnaseq = FALSE
 50 |            )
 51 | ```
 52 | 
 53 | The resulting object `counts` has a few slots containing information. The most important are  `bait_windows` which describes the windows in the bait regions and `non_bait_windows` which describes all the spaces in between the `bait_windows`. By defauly all functions will work on `bait_windows` but you can change the subset using the `which` parameter (see the [atacr which](atacr_which.html) vignette for more information. )
 54 | 
 55 | ## Summarising data
 56 | 
 57 | Once everthing is loaded, it is a good idea to check the counts object is as you expect. The `summary()` function does this.
 58 | 
 59 | ### Summary statistics
 60 | 
 61 | ```{r, }
 62 | summary(counts)
 63 | ```
 64 | 
 65 | The summary is very long but worthwhile. A feature of `atacR` is that it keeps counts in non-bait region windows. Non-bait region windows are those outside the bait regions. The non-bait regions are not the same size as the bait window regions - A single non-bait window covers all the space between the last window of one bait region and the first window of the next.
 66 | 
 67 |   - The `treatments` line gives the two classes of data that `atacR` understands you have, here `mock` and `infected`.
 68 |   - The `samples` line gives the samples and replicate information
 69 |   - The `Bait regions used` line gives the bait region count
 70 |   - The `Total Windows` line tells how many windows those baits are divided into.
 71 |   - The `On/Off target read counts` section tells how many reads are in the windows (`on_target`) and how many are outside (`off_target`) for each sample
 72 |   - The `Quantiles` section shows the read count at each quantile for each sample in the windows in bait regions or non-bait regions
 73 |   - The `Read depths` section shows the `on_target` and `off_target` region average read depths.
 74 |   
 75 | As we can see the coverage in this small sample is relatively low - that's an artefact of small files to keep the tutorial running quite quickly. But most windows have an average of ~ 10 counts and the off-target reads are very low. < 1 %.  
 76 |   
 77 | ### Summary and QC plots
 78 | 
 79 | The `atacR` package has a range of summary and QC plots for visualising different aspects of the data.
 80 | 
 81 | The samples can be inspected through plots. The standard `plot` function creates a few summary style plots enabling you to view coverage distribution and region density. As it summarises windows, the more windows you have, the slower it runs!
 82 | 
 83 | ```{r plot_plot, cache=TRUE, eval=TRUE}
 84 | plot(counts)
 85 | ```
 86 | 
 87 | A coverage threshold plot can reveal the number of windows that have coverage lower than a given value. 
 88 | 
 89 | ```{r coverage_threshold, eval=TRUE}
 90 | windows_below_coverage_threshold_plot(counts)
 91 | ```
 92 | Here we can see that `mock_rep1` and `mock_rep3` have fewer windows below the coverage threshold, so are generally better covered. 
 93 | 
 94 | We can see from all these that although the read mapping and filtering is specific to the  quite a lot of windows (~ 2000 in each sample) have counts of 0,  which indicates that some of the DNA in the sequence regions was not sampled. You may wish to play with window size settings to see how robust this phenomenon is to window size. Increasing the window size will likely reduce the zero count windows number by merging counts from adjacent windows. Decreasing the window size will likely increase zero count windows. The level of granularity you use will be study dependent and if you intend to conclude absence of counts (e.g for detection of closed chromatin) then you'll want to be very careful with comparison to specific control windows to make that comparison. 
 95 | 
 96 | ### Specific window counts
 97 | 
 98 | You can examine specific window counts quite easily. The internal object holding the data is of class `SummarizedExperiment`, which is part of BioConductor, so you can use functions in standard BioConductor packages to interrogate them. Here's how you might get information on specific window counts.
 99 | 
100 | First you must create a region of interest. Use (GenomicRanges)[https://bioconductor.org/packages/release/bioc/html/GenomicRanges.html] package to do this.
101 | 
102 | ```{r, eval=TRUE}
103 | roi <- GenomicRanges::GRanges(seqnames = "Chr1", ranges = 245951:246250)
104 | ```
105 | 
106 | Next, subset the window set of interest with (IRanges)[https://bioconductor.org/packages/release/bioc/html/IRanges.html]
107 | 
108 | ```{r, eval=TRUE}
109 | small_section <- IRanges::subsetByOverlaps(counts$bait_windows, roi)
110 | ```
111 | 
112 | The resulting object is a [SummarizedExperiment](https://www.bioconductor.org/packages/devel/bioc/vignettes/SummarizedExperiment/inst/doc/SummarizedExperiment.html) which doesn't print literally, as it can be quite big. To get at the actual count matrix, use the `assay` function.
113 | 
114 | ```{r, eval=TRUE}
115 | SummarizedExperiment::assay(small_section)
116 | ```
117 | 
118 | #### Sample reproducibility
119 | 
120 | A PCA plot can be used to examine the similarity between the different samples. Here we can see that two of the infected replicates are way off from each other and the other more similar samples. You can use these plots to identify any samples that are extremely different from the others. As all of the infected samples are quite different in different ways, we may be seeing just a large amount of experimental variability in our results, which can be important too. So we'll proceed with the data, keeping in mind that variability may be large and for particular treatments, we may need to gather more replicates.  
121 | 
122 | ```{r pca_plot, cache =TRUE, eval = TRUE}
123 | sample_pca_plot(counts)
124 | ```
125 | 
126 | An MA plot can show you eccentricities in each sample (See the wiki page for more information)[https://en.wikipedia.org/wiki/MA_plot]. In the `atacR` MA plot a common reference is used, the median value for a windows as a common denominator for sample.  
127 | 
128 | ```{r ma_plot, eval=TRUE}
129 | ma_plot(counts)
130 | ```
131 | 
132 | In this MA plot we see some structure in the data, the strong lines in each subplot indicate the points with zero for the count. The infected overall show higer counts than the mock. The usual assumption of most windows not changing between samples may not hold, here as the clouds of points seem quite shifted between mock and infected.  
133 | 
134 | ## Normalisation
135 | 
136 | The normalisation step helps us to reduce systematic between-sample variability. Sequence data are hard to normalise, and cannot be normalised well by simple scaling. For RNASeq data there are numerous methods such as FPKM etc that sort of normalise. The best approaches with ATAC-cap-seq data are to find the least varying windows, then calculate factors  and use those to scale the rest of the data with. 
137 | 
138 | `atacR` provides three types of normalisation. These are 
139 | 
140 |   1. Library size 
141 |   2. Scale factor 
142 |   3. Goodness of Fit 
143 |   
144 | The best of these is 3. Goodness of Fit. It is fast, automatically finds the least varying and best features in the data to normalise with and does a reasonable job of between-sample normalisation. It is usually the best one to choose. It is particularly useful when you don't know whether many windows will be changing or just a few will be, as it should perform the same regardless.  
145 | 
146 | The Library size normalisation is the most basic and the one that most studies seem to use for normalisation - the basis of this is that each count is divided by the mean count for all samples in that treatment the sample. For most ATAC purposes this will be underpowered, because the low number of windows or high proportions of changing windows will cause skew between samples. This method useful when you have reasonably high counts (> 20 mean) and you are certain few windows (< 10%) will display differential counts.
147 | 
148 | The Scale factor normalisation is provided to allow interaction with other normalisation from other packages. With this you provide a number for each sample and the counts in each sample are divided by the respective number. It is only useful when you have some other method that generates factors that you wish to use to scale counts. 
149 | 
150 | Check out the [normalisations vignette](normalisations.html) for further information.
151 | 
152 | ### Goodness of Fit normalisation
153 | 
154 | Here we'll run Goodness of Fit (GoF) on the sample data. First step is to run the GoF code and find the most stable windows across the samples to use to normalise.
155 | 
156 | ```{r, GoF, eval = TRUE}
157 | auto_controls <- find_controls_by_GoF(counts)
158 | ```
159 | 
160 | We can use these to check the selected control windows have lower GoF than the non-selected windows using the `plot_GoF()` function
161 | 
162 | ```{r, eval = TRUE}
163 | plot_GoF(counts, controls = auto_controls)
164 | ```
165 | They are better. They have a lower, spikier mean Goodness of Fit. The Non-control data has a long tail distribution so the difference is quite pronounced. So we can use now generate the normalisation factors and apply them. We'll save the resulting information to a new slot in the counts object. Then we'll plot the pre- and post- normalised data to see the effects of the normalisation
166 | 
167 | ```{r}
168 | gof_norm_factors <- get_GoF_factors(counts)
169 | 
170 | gof_normalised_counts <- scale_factor_normalise(counts, 
171 |                                           scaling_factors = gof_norm_factors)
172 | 
173 | counts$normalised_counts <- gof_normalised_counts
174 | 
175 | 
176 | plot_counts(counts)
177 | plot_counts(counts, which = "normalised_counts")
178 | ma_plot(counts)
179 | ma_plot(counts, which = "normalised_counts")
180 | 
181 | ```
182 | 
183 | We can see that the distributions get a little closer to each other and that the spread in the data in MA plots is reduced a little. The variability in these data are quite high though. See the [normalisations vignette](normalisations.html) for further discussion.
184 | 
185 | ## Differential window counts
186 | 
187 | Once you are happy with the normalisation, you can try to estimate which windows have differential counts. `atacR` gives you three methods. 
188 | 
189 |   1. edgeR exact test - this is a wrapper around the edgeR method for single factor designs, using the `estimateDispersion` method. This method was designed for genome wide studies so works best when only a few of the (~5 %) of the windows are expected to have differential counts. It is the most sensitive in this situation though.
190 |   2. bootstrap _t_ test - this is a brute force method that uses resampling of each windows sample counts and recalculating of the Student's _t_ statistic to come up with a background distribution of _t_. If the observed _t_ is at the edges of this distribution, differential counts are called. This method is useful when any number of the windows may show differential counts. 
191 |   3. Bayes Factor test, this calculates the [Bayes factor](http://bayesfactor.blogspot.co.uk/2014/02/the-bayesfactor-package-this-blog-is.html) for each window. The ratio of the Bayes factor for control and test is returned on a window by window basis. If the ratio is over a given number (4 by default) a differential count is called.  This method is useful when any number of the windows may show differential counts. 
192 |   
193 | See the [differential windows vignette](differential_windows.html) for further discussion
194 | 
195 | We can perform differential analysis in the following ways, we'll use the `which` argument (see [which vignette](atacr_which.html) ) to make sure we analyse the normalised counts.
196 | 
197 | ```{r differential, eval = TRUE}
198 | 
199 | 
200 |  edgeRexact_result <-  edgeR_exact(counts,
201 |                 which = "bait_windows",
202 |                 treatment_a =  "infected",
203 |                treatment_b = "mock",
204 |                 remove_zeros = TRUE)
205 |  
206 | bootstrap_result <- estimate_fdr(counts,
207 |               which = "normalised_counts",
208 |               treatment_a =  "infected",
209 |               treatment_b = "mock",
210 |               iterations = 10
211 | )
212 | 
213 | bayesfactor_result <- estimate_bayes_factor(counts,
214 |               treatment_a = "infected",
215 |               treatment_b =  "mock",
216 |               which = "normalised_counts"
217 |               )
218 | 
219 | ```
220 | 
221 | The resulting dataframe holds the result of these calculations
222 | 
223 | ```{r}
224 | 
225 | head(bootstrap_result)
226 | 
227 | head(bayesfactor_result)
228 | 
229 | head(edgeRexact_result)
230 | ```
231 | 
232 | 
233 | Each of these methods works on single factor designs, there is a `multiclass` variant that works on common control designs. See the [differential windows vignette](differential_windows.html) for further discussion of these.
234 | 
235 | ```{r}
236 | bf_multi <- estimate_bayes_factor_multiclass(counts, "mock",
237 |               factor = 0.5,
238 |               which = "normalised_counts"
239 |               )
240 | 
241 | head(bf_multi)
242 | 
243 | fdr_multi <- estimate_fdr_multiclass(counts, "mock",
244 |              fdr_level = 0.05,
245 |               which = "normalised_counts"
246 |              )
247 | 
248 | head(fdr_multi)
249 | 
250 | ```
251 | 
252 | The `edgeR_multiclass()` function does not return a dataframe, instead it returns the native `DGELRT` objects (see [the DGELRT manual](https://www.rdocumentation.org/packages/edgeR/versions/3.14.0/topics/DGELRT-class) for more information) from each comparison in a `list()` object with names as per the treatment used.
253 | 
254 | ```{r}
255 | edgeR_multiclass(counts,"mock", 
256 |   remove_zeros = TRUE, 
257 |   which = "bait_windows")
258 | ```
259 | 
260 | 


--------------------------------------------------------------------------------