├── inst
    ├── extdata
    │   └── indrops
    │   │   ├── 2018-01-01_bcbio
    │   │       ├── bcbio-nextgen.log
    │   │       ├── bcbio-nextgen-commands.log
    │   │       ├── project-summary.yaml
    │   │       └── programs.txt
    │   │   ├── metadata.csv
    │   │   └── multiplexed-AAAAAAAA
    │   │       ├── multiplexed-AAAAAAAA.mtx.rownames
    │   │       ├── multiplexed-AAAAAAAA.mtx.colnames
    │   │       └── multiplexed-AAAAAAAA-barcodes.tsv
    └── rmarkdown
    │   └── templates
    │       └── quality-control
    │           ├── template.yaml
    │           └── skeleton
    │               └── skeleton.Rmd
├── tests
    ├── testthat
    │   ├── .gitignore
    │   ├── test-show.R
    │   ├── helper-globals.R
    │   ├── helper-cache.R
    │   ├── test-updateObject.R
    │   ├── test-plotReadsPerCell.R
    │   ├── test-bcbioSingleCell.R
    │   └── test-filterCells.R
    └── testthat.R
├── .gitignore
├── pkgdown
    └── extra.css
├── data
    └── bcb.rda
├── .Rbuildignore
├── R
    ├── data.R
    ├── AllGenerics.R
    ├── AllGlobals.R
    ├── reexports.R
    ├── show-methods.R
    ├── internal-barcodes.R
    ├── extract-methods.R
    ├── package.R
    ├── AllClasses.R
    ├── internal-import.R
    ├── updateObject-methods.R
    ├── AllGenerators.R
    └── plotReadsPerCell-methods.R
├── man
    ├── bcbioSingleCellTestsUrl.Rd
    ├── bcb.Rd
    ├── show.Rd
    ├── bcbioSingleCell-class.Rd
    ├── updateObject.Rd
    ├── reexports.Rd
    ├── bcbioSingleCell-package.Rd
    ├── plotReadsPerCell.Rd
    ├── extract.Rd
    └── bcbioSingleCell.Rd
├── package.Rproj
├── _pkgdown.yml
├── data-raw
    └── bcb.R
├── .lintr
├── DESCRIPTION
├── todo.org
├── README.md
├── NAMESPACE
└── LICENSE


/inst/extdata/indrops/2018-01-01_bcbio/bcbio-nextgen.log:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/testthat/.gitignore:
--------------------------------------------------------------------------------
1 | *.rda
2 | subsetPerSample
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .RData
2 | .Rcheck
3 | .Rhistory
4 | .Rproj.user
5 | docs/
6 | 


--------------------------------------------------------------------------------
/pkgdown/extra.css:
--------------------------------------------------------------------------------
1 | @import url("https://steinbaugh.com/css/pkgdown.css");
2 | 


--------------------------------------------------------------------------------
/data/bcb.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hbc/bcbioSingleCell/HEAD/data/bcb.rda


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | ## nolint start
2 | library(testthat)
3 | library(bcbioSingleCell)
4 | ## nolint end
5 | 
6 | test_check("bcbioSingleCell")
7 | 


--------------------------------------------------------------------------------
/inst/extdata/indrops/metadata.csv:
--------------------------------------------------------------------------------
1 | fileName,description,index,sequence,sampleName,aggregate
2 | multiplexed.fastq.gz,multiplexed,1,TTTTTTTT,rep_1,sample
3 | 


--------------------------------------------------------------------------------
/tests/testthat/test-show.R:
--------------------------------------------------------------------------------
1 | test_that("bcbioSingleCell", {
2 |     output <- capture.output(show(bcb))
3 |     expect_true(grepl("^bcbioSingleCell", output[[1L]]))
4 | })
5 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-globals.R:
--------------------------------------------------------------------------------
1 | ## nolint start
2 | data <- utils::data
3 | hasInternet <- goalie::hasInternet
4 | ## nolint end
5 | 
6 | data(bcb, envir = environment())
7 | 


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/quality-control/template.yaml:
--------------------------------------------------------------------------------
1 | name: Quality Control
2 | description: >
3 |   Template for single-cell RNA-seq quality control report.
4 | create_dir: false
5 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-cache.R:
--------------------------------------------------------------------------------
1 | lst <- AcidDevTools::cacheTestFiles(
2 |     pkg = .pkgName,
3 |     files = "bcbioSingleCell_0.1.0.rds"
4 | )
5 | cacheDir <- lst[["cacheDir"]]
6 | rm(lst)
7 | 


--------------------------------------------------------------------------------
/inst/extdata/indrops/2018-01-01_bcbio/bcbio-nextgen-commands.log:
--------------------------------------------------------------------------------
1 | cellularBarcodeCutoff: --cb_cutoff 1000
2 | level: --genemap Homo_sapiens.GRCh38.90-tx2gene.tsv
3 | umiType: umis fastqtransform --separate_cb /XXX/umis/harvard-indrop-v3-transform.json
4 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^CONTRIBUTING\.md$
 3 | ^LICENSE$
 4 | ^Meta$
 5 | ^[-._a-zA-Z0-9]+\.(R|Rcheck|Rproj|gz|html|org|pdf|png|sh|tar|txt|xz|yaml|yml|zip)$
 6 | ^[._][-._a-zA-Z0-9]+$
 7 | ^\.Rproj\.user$
 8 | ^cran-comments\.md$
 9 | ^data-raw$
10 | ^doc$
11 | ^docs$
12 | ^pkgdown$
13 | ^todo\.org$
14 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' bcbio single-cell RNA-seq example data set
 2 | #'
 3 | #' Harvard inDrops v3 example data
 4 | #'
 5 | #' @author Michael Steinbaugh
 6 | #' @note Updated 2019-08-12.
 7 | #' @usage data(bcb)
 8 | #' @return `bcbioSingleCell`.
 9 | #'
10 | #' @examples
11 | #' data(bcb)
12 | #' class(bcb)
13 | "bcb"
14 | 


--------------------------------------------------------------------------------
/tests/testthat/test-updateObject.R:
--------------------------------------------------------------------------------
 1 | test_that("bcbioSingleCell", {
 2 |     x <- updateObject(bcb)
 3 |     expect_s4_class(x, "bcbioSingleCell")
 4 | })
 5 | 
 6 | test_that("v0.1 update", {
 7 |     invalid <- import(file.path(cacheDir, "bcbioSingleCell_0.1.0.rds"))
 8 |     valid <- updateObject(invalid)
 9 |     expect_s4_class(valid, "bcbioSingleCell")
10 | })
11 | 


--------------------------------------------------------------------------------
/R/AllGenerics.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | #' @name plotReadsPerCell
 3 | #' @rdname plotReadsPerCell
 4 | #' @usage plotReadsPerCell(object, ...)
 5 | NULL
 6 | 
 7 | #' @export
 8 | #' @name show
 9 | #' @rdname show
10 | #' @usage show(object)
11 | NULL
12 | 
13 | #' @export
14 | #' @name updateObject
15 | #' @rdname updateObject
16 | #' @usage updateObject(object, ..., verbose = FALSE)
17 | NULL
18 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plotReadsPerCell.R:
--------------------------------------------------------------------------------
1 | ## Example dataset doesn't have a cellular barcode cutoff because we removed the
2 | ## bcbio commands log file (which conflicts with Travis CI).
3 | test_that("geom", {
4 |     for (geom in eval(formals(`plotReadsPerCell,bcbioSingleCell`)[["geom"]])) {
5 |         x <- plotReadsPerCell(bcb, geom = geom)
6 |         expect_s3_class(x, "ggplot")
7 |     }
8 | })
9 | 


--------------------------------------------------------------------------------
/man/bcbioSingleCellTestsUrl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AllGlobals.R
 3 | \docType{data}
 4 | \name{bcbioSingleCellTestsUrl}
 5 | \alias{bcbioSingleCellTestsUrl}
 6 | \title{Cache URL}
 7 | \format{
 8 | An object of class \code{character} of length 1.
 9 | }
10 | \usage{
11 | bcbioSingleCellTestsUrl
12 | }
13 | \description{
14 | Cache URL
15 | }
16 | \examples{
17 | bcbioSingleCellTestsUrl
18 | }
19 | \keyword{internal}
20 | 


--------------------------------------------------------------------------------
/package.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 4
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/R/AllGlobals.R:
--------------------------------------------------------------------------------
 1 | .pkgName <- packageName()
 2 | .pkgVersion <- packageVersion(.pkgName)
 3 | 
 4 | ## This is also defined in AcidPlots.
 5 | .geom <- c("histogram", "ecdf", "violin", "ridgeline", "boxplot")
 6 | 
 7 | ## We're adding an additional raw reads column (pre-UMI disambiguation).
 8 | .metricsCols <- c("nRead", metricsCols)
 9 | 
10 | .requiredAssays <- "counts"
11 | 
12 | #' Cache URL
13 | #' @keywords internal
14 | #' @export
15 | #' @examples
16 | #' bcbioSingleCellTestsUrl
17 | bcbioSingleCellTestsUrl <- "https://r.acidgenomics.com/testdata/bcbiosinglecell"
18 | 


--------------------------------------------------------------------------------
/man/bcb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{bcb}
 5 | \alias{bcb}
 6 | \title{bcbio single-cell RNA-seq example data set}
 7 | \format{
 8 | An object of class \code{bcbioSingleCell} with 50 rows and 100 columns.
 9 | }
10 | \usage{
11 | data(bcb)
12 | }
13 | \value{
14 | \code{bcbioSingleCell}.
15 | }
16 | \description{
17 | Harvard inDrops v3 example data
18 | }
19 | \note{
20 | Updated 2019-08-12.
21 | }
22 | \examples{
23 | data(bcb)
24 | class(bcb)
25 | }
26 | \author{
27 | Michael Steinbaugh
28 | }
29 | \keyword{datasets}
30 | 


--------------------------------------------------------------------------------
/man/show.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/show-methods.R
 3 | \name{show}
 4 | \alias{show}
 5 | \alias{show,bcbioSingleCell-method}
 6 | \title{Show an object}
 7 | \usage{
 8 | show(object)
 9 | 
10 | \S4method{show}{bcbioSingleCell}(object)
11 | }
12 | \arguments{
13 | \item{object}{Object.}
14 | }
15 | \value{
16 | Console output.
17 | }
18 | \description{
19 | Show an object
20 | }
21 | \note{
22 | Updated 2022-05-09.
23 | }
24 | \examples{
25 | data(bcb)
26 | 
27 | ## bcbioSingleCell ====
28 | show(bcb)
29 | }
30 | \author{
31 | Michael Steinbaugh
32 | }
33 | 


--------------------------------------------------------------------------------
/inst/extdata/indrops/2018-01-01_bcbio/project-summary.yaml:
--------------------------------------------------------------------------------
 1 | date: '2018-01-01 00:00:00.000000'
 2 | upload: /n/data1/XXX/final
 3 | bcbio_system: /n/app/bcbio/dev/galaxy/bcbio_system.yaml
 4 | samples:
 5 | - description: multiplexed-AAAAAAAA
 6 |   dirs:
 7 |     config: /n/app/bcbio/dev/galaxy
 8 |     fastq: null
 9 |     flowcell: null
10 |     galaxy: /n/app/bcbio/dev/galaxy
11 |     work: /n/scratch2/XXX/data/bcbio
12 |   genome_build: hg38
13 |   genome_resources:
14 |     rnaseq:
15 |       transcripts: /n/app/bcbio/dev/genomes/Hsapiens/hg38/rnaseq/ref-transcripts.gtf
16 |   metadata:
17 |     batch: null
18 |     phenotype: ''
19 | 


--------------------------------------------------------------------------------
/man/bcbioSingleCell-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AllClasses.R
 3 | \docType{class}
 4 | \name{bcbioSingleCell-class}
 5 | \alias{bcbioSingleCell-class}
 6 | \title{bcbio single-cell RNA-seq data set}
 7 | \description{
 8 | \code{bcbioSingleCell} is an S4 class that extends \code{SingleCellExperiment}, and is
 9 | designed to store a bcbio single-cell RNA-seq analysis. This class contains
10 | read counts saved as a sparse matrix (\code{sparseMatrix}), sample metadata, and
11 | cell quality control metrics.
12 | }
13 | \note{
14 | Updated 2022-05-09.
15 | }
16 | \author{
17 | Michael Steinbaugh, Rory Kirchner
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bcbioSingleCell.R:
--------------------------------------------------------------------------------
 1 | uploadDir <- system.file("extdata/indrops", package = "bcbioSingleCell")
 2 | 
 3 | ## Minimal mode, with no metadata or annotations.
 4 | ## This is fast but doesn't slot a lot of useful info.
 5 | test_that("Minimal mode", {
 6 |     x <- bcbioSingleCell(uploadDir = uploadDir)
 7 |     expect_s4_class(x, "bcbioSingleCell")
 8 | })
 9 | 
10 | test_that("User-defined metadata", {
11 |     x <- bcbioSingleCell(
12 |         uploadDir = uploadDir,
13 |         sampleMetadataFile <- file.path(uploadDir, "metadata.csv")
14 |     )
15 |     expect_s4_class(x, "bcbioSingleCell")
16 | })
17 | 
18 | ## Automatic organism annotations from AnnotationHub.
19 | test_that("AnnotationHub", {
20 |     x <- bcbioSingleCell(
21 |         uploadDir = uploadDir,
22 |         organism = "Homo sapiens"
23 |     )
24 |     expect_s4_class(x, "bcbioSingleCell")
25 | })
26 | 


--------------------------------------------------------------------------------
/R/reexports.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | #' @importFrom AcidGenerics calculateMetrics
 3 | AcidGenerics::calculateMetrics
 4 | 
 5 | #' @export
 6 | #' @importFrom AcidGenerics filterCells
 7 | AcidGenerics::filterCells
 8 | 
 9 | #' @export
10 | #' @importFrom AcidGenerics plotBarcodeRanks
11 | AcidGenerics::plotBarcodeRanks
12 | 
13 | #' @export
14 | #' @importFrom AcidGenerics plotCellCounts
15 | AcidGenerics::plotCellCounts
16 | 
17 | #' @export
18 | #' @importFrom AcidGenerics plotCountsPerCell
19 | AcidGenerics::plotCountsPerCell
20 | 
21 | #' @export
22 | #' @importFrom AcidGenerics plotCountsVsFeatures
23 | AcidGenerics::plotCountsVsFeatures
24 | 
25 | #' @export
26 | #' @importFrom AcidGenerics plotFeaturesPerCell
27 | AcidGenerics::plotFeaturesPerCell
28 | 
29 | #' @export
30 | #' @importFrom AcidGenerics plotMitoRatio
31 | AcidGenerics::plotMitoRatio
32 | 
33 | #' @export
34 | #' @importFrom AcidGenerics plotNovelty
35 | AcidGenerics::plotNovelty
36 | 
37 | #' @export
38 | #' @importFrom AcidGenerics plotQc
39 | AcidGenerics::plotQc
40 | 


--------------------------------------------------------------------------------
/inst/extdata/indrops/multiplexed-AAAAAAAA/multiplexed-AAAAAAAA.mtx.rownames:
--------------------------------------------------------------------------------
 1 | ENSG00000071082
 2 | ENSG00000100316
 3 | ENSG00000106631
 4 | ENSG00000108821
 5 | ENSG00000112306
 6 | ENSG00000115414
 7 | ENSG00000125691
 8 | ENSG00000133112
 9 | ENSG00000137818
10 | ENSG00000138326
11 | ENSG00000140988
12 | ENSG00000142534
13 | ENSG00000142541
14 | ENSG00000142937
15 | ENSG00000143947
16 | ENSG00000147403
17 | ENSG00000147604
18 | ENSG00000156508
19 | ENSG00000159251
20 | ENSG00000164692
21 | ENSG00000167244
22 | ENSG00000167526
23 | ENSG00000168542
24 | ENSG00000174748
25 | ENSG00000177600
26 | ENSG00000184009
27 | ENSG00000186468
28 | ENSG00000197756
29 | ENSG00000198034
30 | ENSG00000198695
31 | ENSG00000198712
32 | ENSG00000198727
33 | ENSG00000198763
34 | ENSG00000198786
35 | ENSG00000198804
36 | ENSG00000198840
37 | ENSG00000198886
38 | ENSG00000198888
39 | ENSG00000198899
40 | ENSG00000198938
41 | ENSG00000205542
42 | ENSG00000225972
43 | ENSG00000229117
44 | ENSG00000229344
45 | ENSG00000233927
46 | ENSG00000237973
47 | ENSG00000248527
48 | ENSG00000256618
49 | ENSG00000269028
50 | ENSG00000282105
51 | 


--------------------------------------------------------------------------------
/man/updateObject.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/updateObject-methods.R
 3 | \name{updateObject}
 4 | \alias{updateObject}
 5 | \alias{updateObject,bcbioSingleCell-method}
 6 | \title{Update object}
 7 | \usage{
 8 | updateObject(object, ..., verbose = FALSE)
 9 | 
10 | \S4method{updateObject}{bcbioSingleCell}(object, ..., verbose = FALSE)
11 | }
12 | \arguments{
13 | \item{object}{Object.}
14 | 
15 | \item{...}{Additional arguments.}
16 | 
17 | \item{verbose}{\code{logical(1)}.
18 | Run the function with verbose output.}
19 | }
20 | \value{
21 | Modified object.
22 | }
23 | \description{
24 | Update object
25 | }
26 | \note{
27 | Updated 2023-12-04.
28 | }
29 | \examples{
30 | data(bcb)
31 | 
32 | ## bcbioSingleCell ====
33 | updateObject(bcb)
34 | 
35 | ## Example that depends on remote file.
36 | ## > x <- import(
37 | ## >     con = file.path(
38 | ## >         bcbioSingleCellTestsUrl,
39 | ## >         "bcbioSingleCell_0.1.0.rds"
40 | ## >     )
41 | ## > )
42 | ## > x <- updateObject(x)
43 | ## > x
44 | }
45 | \author{
46 | Michael Steinbaugh
47 | }
48 | 


--------------------------------------------------------------------------------
/inst/extdata/indrops/2018-01-01_bcbio/programs.txt:
--------------------------------------------------------------------------------
 1 | bamtools,2.4.0
 2 | bcbio-nextgen,1.0.6a0-d2b5b522
 3 | bcbio-variation,0.2.6
 4 | bcftools,1.6
 5 | bedtools,2.26.0
 6 | biobambam,2.0.79
 7 | bioconductor-bubbletree,2.6.0
 8 | bowtie2,2.2.8
 9 | bwa,0.7.16
10 | chanjo,
11 | cnvkit,0.9.0
12 | cufflinks,2.2.1
13 | cutadapt,1.14
14 | fastqc,0.11.5
15 | featurecounts,1.4.4
16 | freebayes,1.1.0.46
17 | gatk,3.8
18 | gatk-framework,3.6.24
19 | gatk4,4.0b6
20 | gemini,0.20.1
21 | grabix,0.1.8
22 | hisat2,2.1.0
23 | htseq,0.9.1
24 | lumpy-sv,0.2.13
25 | manta,1.1.0
26 | metasv,0.4.0
27 | mirdeep2,2.0.0.7
28 | mutect,1.1.5
29 | novoalign,3.07.00
30 | novosort,V3.00.02
31 | oncofuse,1.1.1
32 | phylowgs,20150714
33 | picard,2.13
34 | platypus-variant,0.8.1.1
35 | preseq,2.0.2
36 | qualimap,2.2.2a
37 | rna-star,
38 | rtg-tools,3.8.4
39 | sailfish,0.10.1
40 | salmon,0.8.2
41 | sambamba,0.6.6
42 | samblaster,0.1.24
43 | samtools,1.6
44 | scalpel,0.5.3
45 | seqbuster,3.1
46 | snpeff,4.3i
47 | vardict,2017.04.18
48 | vardict-java,1.5.1
49 | variant-effect-predictor,87
50 | varscan,2.4.3
51 | vcflib,1.0.0_rc1
52 | vt,2015.11.10
53 | wham,1.7.0.311
54 | 


--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reexports.R
 3 | \docType{import}
 4 | \name{reexports}
 5 | \alias{reexports}
 6 | \alias{calculateMetrics}
 7 | \alias{filterCells}
 8 | \alias{plotBarcodeRanks}
 9 | \alias{plotCellCounts}
10 | \alias{plotCountsPerCell}
11 | \alias{plotCountsVsFeatures}
12 | \alias{plotFeaturesPerCell}
13 | \alias{plotMitoRatio}
14 | \alias{plotNovelty}
15 | \alias{plotQc}
16 | \title{Objects exported from other packages}
17 | \keyword{internal}
18 | \description{
19 | These objects are imported from other packages. Follow the links
20 | below to see their documentation.
21 | 
22 | \describe{
23 |   \item{AcidGenerics}{\code{\link[AcidGenerics]{calculateMetrics}}, \code{\link[AcidGenerics]{filterCells}}, \code{\link[AcidGenerics]{plotBarcodeRanks}}, \code{\link[AcidGenerics]{plotCellCounts}}, \code{\link[AcidGenerics]{plotCountsPerCell}}, \code{\link[AcidGenerics]{plotCountsVsFeatures}}, \code{\link[AcidGenerics]{plotFeaturesPerCell}}, \code{\link[AcidGenerics]{plotMitoRatio}}, \code{\link[AcidGenerics]{plotNovelty}}, \code{\link[AcidGenerics]{plotQc}}}
24 | }}
25 | 
26 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | authors:
 2 |   Michael Steinbaugh:
 3 |     href: https://mike.steinbaugh.com/
 4 |   Rory Kirchner:
 5 |     href: https://github.com/roryk/
 6 |   Mary Piper:
 7 |     href: https://github.com/marypiper/
 8 |   Victor Barrera:
 9 |     href: https://github.com/vbarrera/
10 |   Shannan Ho Sui:
11 |     href: https://github.com/sjhosui/
12 |   Harvard Chan Bioinformatics Core:
13 |     href: https://bioinformatics.sph.harvard.edu/
14 |   Acid Genomics:
15 |     href: https://acidgenomics.com/
16 | navbar:
17 |   components:
18 |      acid:
19 |        icon: "fas fa-vial fa-lg"
20 |        href: https://acidgenomics.com/
21 |   structure:
22 |     right: [acid, github]
23 | news:
24 |   - one_page: false
25 | reference:
26 |   - title: S4 classes
27 |     contents:
28 |     - starts_with("bcbioSingleCell")
29 |     - extract
30 |     - show
31 |     - updateObject
32 |   - title: Plots
33 |     contents:
34 |     - starts_with("plot")
35 |   - title: Example data
36 |     contents:
37 |     - bcb
38 |   - title: Reexports
39 |     contents:
40 |     - reexports
41 | template:
42 |   bootstrap: 5
43 | url: https://r.acidgenomics.com/packages/bcbiosinglecell
44 | 


--------------------------------------------------------------------------------
/man/bcbioSingleCell-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/package.R
 3 | \docType{package}
 4 | \name{bcbioSingleCell-package}
 5 | \title{bcbioSingleCell}
 6 | \description{
 7 | Import and analyze \href{https://bcbio-nextgen.readthedocs.io/}{bcbio} single-cell
 8 | RNA-seq data.
 9 | }
10 | \seealso{
11 | Useful links:
12 | \itemize{
13 |   \item \url{https://r.acidgenomics.com/packages/bcbiosinglecell/}
14 |   \item \url{https://github.com/hbc/bcbioSingleCell/}
15 |   \item Report bugs at \url{https://github.com/hbc/bcbioSingleCell/issues/}
16 | }
17 | 
18 | }
19 | \author{
20 | \strong{Maintainer}: Michael Steinbaugh \email{mike@steinbaugh.com} (\href{https://orcid.org/0000-0002-2403-2221}{ORCID})
21 | 
22 | Authors:
23 | \itemize{
24 |   \item Rory Kirchner \email{roryk@alum.mit.edu} (\href{https://orcid.org/0000-0003-4814-5885}{ORCID})
25 |   \item Mary Piper \email{mary.piper@gmail.com} (\href{https://orcid.org/0000-0003-2699-3840}{ORCID})
26 |   \item Victor Barrera \email{barrera.vic@gmail.com} (\href{https://orcid.org/0000-0003-0590-4634}{ORCID})
27 |   \item Shannan Ho Sui \email{shosui@hsph.harvard.edu} (\href{https://orcid.org/0000-0002-6191-4709}{ORCID})
28 | }
29 | 
30 | Other contributors:
31 | \itemize{
32 |   \item Harvard Chan Bioinformatics Core \email{bioinformatics@hsph.harvard.edu} [copyright holder, funder]
33 |   \item Acid Genomics [copyright holder, funder]
34 | }
35 | 
36 | }
37 | \keyword{internal}
38 | 


--------------------------------------------------------------------------------
/man/plotReadsPerCell.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/plotReadsPerCell-methods.R
 3 | \name{plotReadsPerCell}
 4 | \alias{plotReadsPerCell}
 5 | \alias{plotReadsPerCell,bcbioSingleCell-method}
 6 | \title{Plot read counts per cell}
 7 | \usage{
 8 | plotReadsPerCell(object, ...)
 9 | 
10 | \S4method{plotReadsPerCell}{bcbioSingleCell}(
11 |   object,
12 |   interestingGroups = NULL,
13 |   geom = c("histogram", "ecdf", "violin", "ridgeline", "boxplot"),
14 |   cutoffLine = FALSE,
15 |   title = "Reads per cell"
16 | )
17 | }
18 | \arguments{
19 | \item{object}{Object.}
20 | 
21 | \item{interestingGroups}{\code{character}.
22 | Groups of interest to use for visualization.
23 | Corresponds to factors describing the columns of the object.}
24 | 
25 | \item{geom}{\code{character(1)}.
26 | Plot type. Uses \code{\link[base:match.arg]{match.arg()}} internally and defaults to
27 | the first argument in the \code{character} vector.}
28 | 
29 | \item{cutoffLine}{\code{logical(1)}.
30 | Include a line marking the cutoff.}
31 | 
32 | \item{title}{\code{character(1)}.
33 | Title.}
34 | 
35 | \item{...}{Additional arguments.}
36 | }
37 | \value{
38 | \code{ggplot}.
39 | }
40 | \description{
41 | Plot the distribution of read counts for all unfiltered cellular barcodes.
42 | }
43 | \note{
44 | Updated 2023-12-04.
45 | }
46 | \examples{
47 | data(bcb)
48 | 
49 | ## bcbioSingleCell ====
50 | plotReadsPerCell(bcb, geom = "histogram")
51 | plotReadsPerCell(bcb, geom = "ecdf")
52 | }
53 | \author{
54 | Michael Steinbaugh, Rory Kirchner
55 | }
56 | 


--------------------------------------------------------------------------------
/tests/testthat/test-filterCells.R:
--------------------------------------------------------------------------------
 1 | bcb <- calculateMetrics(bcb)
 2 | 
 3 | test_that("sampleNames", {
 4 |     expect_identical(
 5 |         object = sampleNames(bcb),
 6 |         expected = c("multiplexed_AAAAAAAA" = "rep_1")
 7 |     )
 8 | })
 9 | 
10 | ## Expecting an object with the same dimensions by default.
11 | test_that("No filtering", {
12 |     x <- filterCells(bcb)
13 |     expect_s4_class(x, "bcbioSingleCell")
14 |     expect_identical(dim(x), dim(bcb))
15 | })
16 | 
17 | ## Refer to the quality control R Markdown for actual recommended cutoffs.
18 | ## These are skewed, and designed to work with our minimal dataset.
19 | test_that("Parameterized cutoff tests", {
20 |     Map(
21 |         args = list(
22 |             list("minCounts" = 2000L),
23 |             list("maxCounts" = 2500L),
24 |             list("minFeatures" = 45L),
25 |             list("maxFeatures" = 49L),
26 |             list("maxMitoRatio" = 0.1),
27 |             list("minNovelty" = 0.5),
28 |             list("minCellsPerFeature" = 95L)
29 |         ),
30 |         dim = list(
31 |             c(50L, 35L),
32 |             c(50L, 88L),
33 |             c(50L, 95L),
34 |             c(50L, 81L),
35 |             c(50L, 22L),
36 |             c(50L, 81L),
37 |             c(45L, 100L)
38 |         ),
39 |         f = function(args, dim) {
40 |             args[["object"]] <- bcb
41 |             x <- do.call(what = filterCells, args = args)
42 |             expect_s4_class(x, "bcbioSingleCell")
43 |             expect_s4_class(metadata(x)[["filterCells"]], "SimpleList")
44 |             expect_true(metadata(x)[["subset"]])
45 |             expect_identical(dim(x), dim)
46 |         }
47 |     )
48 | })
49 | 
50 | test_that("Expected cutoff failure", {
51 |     expect_error(
52 |         object = filterCells(bcb, minCounts = Inf),
53 |         regexp = "No cells passed"
54 |     )
55 | })
56 | 


--------------------------------------------------------------------------------
/data-raw/bcb.R:
--------------------------------------------------------------------------------
 1 | ## inDrops example data
 2 | ## Using harvard-indrop-v3 barcodes.
 3 | ## Updated 2022-06-09.
 4 | ## nolint start
 5 | suppressPackageStartupMessages({
 6 |     library(devtools)
 7 |     library(usethis)
 8 |     library(pipette)
 9 | })
10 | ## nolint end
11 | load_all()
12 | limit <- structure(2e6L, class = "object_size") # nolint
13 | ## Minimal example bcbio upload directory.
14 | ## Include the top 500 genes (rows) and cells (columns).
15 | uploadDir <- file.path("..", "inst", "extdata", "indrops")
16 | sample <- "multiplexed-AAAAAAAA"
17 | countsFile <- file.path(
18 |     uploadDir,
19 |     sample,
20 |     paste0(sample, ".mtx")
21 | )
22 | rownamesFile <- file.path(
23 |     uploadDir,
24 |     sample,
25 |     paste0(sample, ".mtx.rownames")
26 | )
27 | colnamesFile <- file.path(
28 |     uploadDir,
29 |     sample,
30 |     paste0(sample, ".mtx.colnames")
31 | )
32 | barcodesFile <- file.path(
33 |     uploadDir,
34 |     sample,
35 |     paste0(sample, "-barcodes.tsv")
36 | )
37 | stopifnot(all(file.exists(
38 |     c(countsFile, rownamesFile, colnamesFile, barcodesFile)
39 | )))
40 | barcodes <- import(barcodesFile, colnames = FALSE)
41 | export(object = barcodes, con = barcodesFile, colnames = FALSE)
42 | counts <- import(countsFile)
43 | topGenes <-
44 |     counts |>
45 |     Matrix::rowSums() |>
46 |     sort(decreasing = TRUE) |>
47 |     head(n = 50L)
48 | genes <- sort(names(topGenes))
49 | cells <- barcodes[[1L]]
50 | counts <- counts[genes, cells]
51 | export(object = counts, con = countsFile)
52 | ## Create bcbioSingleCell object.
53 | bcb <- bcbioSingleCell(
54 |     uploadDir = uploadDir,
55 |     sampleMetadataFile = file.path(uploadDir, "metadata.csv"),
56 |     organism = "Homo sapiens",
57 |     ensemblRelease = 90L
58 | )
59 | stopifnot(
60 |     object.size(bcb) < limit,
61 |     validObject(bcb)
62 | )
63 | use_data(bcb, compress = "xz", overwrite = TRUE)
64 | 


--------------------------------------------------------------------------------
/R/show-methods.R:
--------------------------------------------------------------------------------
 1 | #' Show an object
 2 | #'
 3 | #' @name show
 4 | #' @author Michael Steinbaugh
 5 | #' @note Updated 2022-05-09.
 6 | #'
 7 | #' @inheritParams AcidRoxygen::params
 8 | #'
 9 | #' @return Console output.
10 | #'
11 | #' @examples
12 | #' data(bcb)
13 | #'
14 | #' ## bcbioSingleCell ====
15 | #' show(bcb)
16 | NULL
17 | 
18 | 
19 | 
20 | ## Updated 2019-07-24.
21 | .showHeader <- function(object, version = NULL) {
22 |     cat(paste(class(object), version), sep = "\n")
23 | }
24 | 
25 | 
26 | 
27 | ## Using the same internal method for bcbioSingleCell and CellRanger.
28 | ## Updated 2019-08-08.
29 | `show,bcbioSingleCell` <- # nolint
30 |     function(object) {
31 |         validObject(object)
32 |         ## Metadata.
33 |         m <- metadata(object)
34 |         ## Row ranges metadata.
35 |         rrm <- metadata(rowRanges(object))
36 |         .showHeader(object, version = m[["version"]])
37 |         filtered <- isSubset("filterCells", names(m))
38 |         showSlotInfo(list(
39 |             uploadDir = m[["uploadDir"]],
40 |             dates = as.character(c(
41 |                 bcbio = m[["runDate"]],
42 |                 R = m[["date"]]
43 |             )),
44 |             level = m[["level"]],
45 |             sampleMetadataFile = m[["sampleMetadataFile"]],
46 |             organism = m[["organism"]],
47 |             gffFile = m[["gffFile"]],
48 |             annotationHub = rrm[["annotationHub"]],
49 |             ensemblRelease = rrm[["release"]],
50 |             genomeBuild = rrm[["build"]],
51 |             interestingGroups = m[["interestingGroups"]],
52 |             filtered = filtered
53 |         ))
54 |         ## Extend the SingleCellExperiment method.
55 |         sce <- as(object, "SingleCellExperiment")
56 |         cat(capture.output(show(sce)), sep = "\n")
57 |     }
58 | 
59 | 
60 | 
61 | #' @rdname show
62 | #' @export
63 | setMethod(
64 |     f = "show",
65 |     signature = signature(object = "bcbioSingleCell"),
66 |     definition = `show,bcbioSingleCell`
67 | )
68 | 


--------------------------------------------------------------------------------
/inst/extdata/indrops/multiplexed-AAAAAAAA/multiplexed-AAAAAAAA.mtx.colnames:
--------------------------------------------------------------------------------
  1 | AAACACTA-CTTCGATT
  2 | AAACTACA-CCACATTA
  3 | AACTGCCT-GCAAGGAC
  4 | AAGAAGGT-TCTGTGGT
  5 | AAGCCTTC-TAAATAGG
  6 | AATAAGGA-CCACATTA
  7 | AATCGAAG-CCCAAGCA
  8 | AATCGTTC-CCCTAACC
  9 | ACCCTCAA-CTGCGTTG
 10 | ACCTGAAG-GAGCGGTA
 11 | ACTAATTG-CTTTAATC
 12 | ACTAGAGC-TCGACACC
 13 | AGAAACCA-ATACTCTT
 14 | AGCTCCAC-CCTGACAC
 15 | AGGTAAGC-TCCCAATC
 16 | ATATGCAA-GGCGGTTT
 17 | ATCAATCG-GTTGTCAT
 18 | ATCGCGCT-AGAGGTGG
 19 | CAACGCAG-CTCGCGTA
 20 | CACAACCT-GGAGAAGC
 21 | CCATGCAT-TTCCGCTC
 22 | CCCGTTCT-AAAGCCTA
 23 | CCGAGATC-ATGGGCAC
 24 | CCGATACG-CAAGAGGG
 25 | CCGGAAAT-GTTGTCAT
 26 | CCTACGCT-AGCAGAAC
 27 | CGTGTGTT-AGGAAGAC
 28 | CGTTTCGT-GACAGATA
 29 | CTAGCACG-AATCGGGT
 30 | CTAGTAGG-TTGAGGGT
 31 | CTCACATC-ACCCACGA
 32 | CTCCTCCA-CGTATTTC
 33 | CTCCTCCA-TGTACACG
 34 | CTCTATAG-GCAAAGCC
 35 | CTGTCGCA-TTTAACAG
 36 | CTGTTAAA-AATGAATG
 37 | CTGTTAAA-GACAGATA
 38 | CTTAGGCC-AGAAGTCC
 39 | CTTAGTGT-TCCAGGGA
 40 | CTTCTACG-CTTCTTCG
 41 | CTTTATCC-CTACCGTT
 42 | GACACCTG-CTCAGAAT
 43 | GACTAGCG-GAAGTGCC
 44 | GAGAAACC-ACAGCGGA
 45 | GAGTGTAC-ACGCAGAG
 46 | GATTAAAG-ACTAGCCA
 47 | GATTACTT-GAGAATTG
 48 | GATTTCCC-ATGTTGGC
 49 | GCAAACTG-CTTCAGGT
 50 | GCCAACAT-CGTGGATA
 51 | GCCTGGTA-CGCTCTCA
 52 | GCGCTGAT-GACAAAGG
 53 | GCGTGCAA-TCTGTGGT
 54 | GGAACGAA-TTGCACGC
 55 | GGCGACAA-TTCCGAGT
 56 | GGCTTTGC-AACCCTTG
 57 | GGCTTTGC-AGCGAAGT
 58 | GGCTTTGC-TTAGGACC
 59 | GGGATTAC-AAATGTCG
 60 | GGTTGAGA-CTGTCTGG
 61 | GGTTGAGA-GAGAGTAT
 62 | GTAAGCCG-CGATTGAT
 63 | GTAATCTG-CGCTAATA
 64 | GTACGCTT-CCCAAGCA
 65 | GTACGGAC-CAACAAAT
 66 | GTCCACTA-CTTCTGGA
 67 | GTCCACTA-GACAAAGG
 68 | GTCCGTCA-ATACTCTT
 69 | GTCTAATC-GGCCCTTA
 70 | GTGAACTC-CAAGAGGG
 71 | GTGAGGCA-CAGTTTGC
 72 | GTGATAAA-CGCTCTCA
 73 | GTGCCCAT-GTGTCGGA
 74 | GTGGTGCT-GGAGAAGC
 75 | GTTACTAG-AGAAGTTA
 76 | GTTACTAG-CCCTTGGT
 77 | GTTCTGCT-TGGCTACC
 78 | TAATCCAT-CGGAATTT
 79 | TACCGCTC-CCCATAGC
 80 | TACGCGAG-TGTAGTTT
 81 | TACGTTCG-TTGATCTA
 82 | TAGGCTTT-CGGACAAC
 83 | TAGTAGCC-TAGTGTTT
 84 | TATTAGCG-CCCTAACC
 85 | TCAGCCTC-TGCAAGGG
 86 | TCCGACAC-GGGAGGTA
 87 | TCGCAATC-CGAACGTA
 88 | TCGGTCAT-AGCACCAC
 89 | TCTAAACT-CTCTTGAC
 90 | TCTTTGAC-CGCTCTCA
 91 | TGAGAGCG-CCTATTCA
 92 | TGAGAGCG-GAAGTGCC
 93 | TGAGCACA-TGCTATTT
 94 | TGCGACTA-CCGTGTTT
 95 | TGCGACTA-TTCACATA
 96 | TGCTTCAT-GCAGGGTA
 97 | TGCTTGGG-CAACAAAT
 98 | TGGACGGA-TTGTTTAC
 99 | TGGGAATT-ATATAGGA
100 | TGTTATCA-ACGCAGAG
101 | 


--------------------------------------------------------------------------------
/R/internal-barcodes.R:
--------------------------------------------------------------------------------
 1 | #' Raw reads per cellular barcode
 2 | #'
 3 | #' Read counts prior to UMI disambiguation.
 4 | #'
 5 | #' @author Michael Steinbaugh
 6 | #' @keywords internal
 7 | #' @note Updated 2019-08-08.
 8 | #' @noRd
 9 | #'
10 | #' @param list `list`.
11 | #' Cellular barcodes per sample.
12 | #'
13 | #' @return `integer`.
14 | #' Cell identifiers are the names and raw reads are the values.
15 | .nRead <- function(list) {
16 |     assert(
17 |         is.list(list),
18 |         hasNames(list),
19 |         is.integer(list[[1L]]),
20 |         hasNames(list[[1L]])
21 |     )
22 |     if (hasLength(list, n = 1L)) {
23 |         list[[1L]]
24 |     } else {
25 |         ## This will unlist using a "." separator.
26 |         ## Renaming "." to "_" in names.
27 |         x <- unlist(list, use.names = TRUE)
28 |         names(x) <- makeNames(names(x))
29 |         x
30 |     }
31 | }
32 | 
33 | 
34 | 
35 | #' Obtain the raw, unfiltered cellular barcode read counts
36 | #'
37 | #' @note Updated 2023-12-04.
38 | #' @noRd
39 | #'
40 | #' @return `DataFrame`.
41 | .rawMetrics <- function(object) {
42 |     assert(is(object, "bcbioSingleCell"))
43 |     list <- metadata(object)[["cellularBarcodes"]]
44 |     assert(
45 |         is.list(list),
46 |         msg = sprintf(
47 |             fmt = paste(
48 |                 "Object does not contain unfiltered cellular barcodes.",
49 |                 "Has {.fun %s} been applied?",
50 |                 "This step drops them."
51 |             ),
52 |             "filterCells"
53 |         )
54 |     )
55 |     assert(
56 |         is.list(list),
57 |         hasNames(list)
58 |     )
59 |     list <- Map(
60 |         sampleId = names(list),
61 |         reads = list,
62 |         f = function(sampleId, reads) {
63 |             DataFrame(
64 |                 "sampleId" = as.factor(sampleId),
65 |                 "cellId" = as.factor(names(reads)),
66 |                 "nRead" = reads,
67 |                 row.names = NULL
68 |             )
69 |         }
70 |     )
71 |     data <- unlist(DataFrameList(list), use.names = FALSE)
72 |     sampleData <- sampleData(object)
73 |     sampleData[["sampleId"]] <- as.factor(rownames(sampleData))
74 |     data <- leftJoin(data, sampleData, by = "sampleId")
75 |     assert(
76 |         is(data, "DataFrame"),
77 |         !hasRownames(data),
78 |         isSubset(c("sampleId", "cellId", "nRead"), colnames(data)),
79 |         is.integer(data[["nRead"]])
80 |     )
81 |     data
82 | }
83 | 


--------------------------------------------------------------------------------
/man/extract.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/extract-methods.R
 3 | \name{extract}
 4 | \alias{extract}
 5 | \alias{[,bcbioSingleCell,ANY,ANY,ANY-method}
 6 | \title{Extract or replace parts of an object}
 7 | \usage{
 8 | \S4method{[}{bcbioSingleCell,ANY,ANY,ANY}(x, i, j, ..., drop = FALSE)
 9 | }
10 | \arguments{
11 | \item{x}{Object.}
12 | 
13 | \item{i}{Indices specifying elements to extract or replace. Indices are \code{numeric} or
14 | \code{character} vectors, empty (\code{missing}), or \code{NULL}.
15 | 
16 | For more information:
17 | 
18 | \if{html}{\out{<div class="sourceCode">}}\preformatted{help(topic = "Extract", package = "base")
19 | }\if{html}{\out{</div>}}}
20 | 
21 | \item{j}{Indices specifying elements to extract or replace. Indices are \code{numeric} or
22 | \code{character} vectors, empty (\code{missing}), or \code{NULL}.
23 | 
24 | For more information:
25 | 
26 | \if{html}{\out{<div class="sourceCode">}}\preformatted{help(topic = "Extract", package = "base")
27 | }\if{html}{\out{</div>}}}
28 | 
29 | \item{...}{Additional arguments.}
30 | 
31 | \item{drop}{For matrices and arrays.  If \code{TRUE} the result is
32 |     coerced to the lowest possible dimension (see the examples).  This
33 |     only works for extracting elements, not for the replacement.  See
34 |     \code{\link[base]{drop}} for further details.
35 |   }
36 | }
37 | \value{
38 | \code{bcbioSingleCell}.
39 | }
40 | \description{
41 | Extract genes by row and cells by column.
42 | }
43 | \details{
44 | Refer to \code{cellToSample()} and \code{selectSamples()} if sample-level extraction is
45 | desired. Note that \code{sampleId} is slotted into \code{colData} and defines the
46 | cell-to-sample mappings.
47 | 
48 | Unfiltered cellular barcode distributions for the entire dataset, including
49 | cells not kept in the matrix will be dropped in favor of the \code{nCount} column
50 | of \code{colData()}.
51 | }
52 | \note{
53 | Updated 2021-09-10.
54 | }
55 | \examples{
56 | ## bcbioSingleCell ====
57 | data(bcb)
58 | 
59 | cells <- head(colnames(bcb))
60 | head(cells)
61 | genes <- head(rownames(bcb))
62 | head(genes)
63 | 
64 | ## Subset by cell identifiers.
65 | bcb[, cells]
66 | 
67 | ## Subset by genes.
68 | bcb[genes, ]
69 | 
70 | ## Subset by both genes and cells.
71 | bcb[genes, cells]
72 | }
73 | \references{
74 | Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988)
75 |   \emph{The New S Language}.
76 |   Wadsworth & Brooks/Cole.
77 | }
78 | \author{
79 | Michael Steinbaugh
80 | }
81 | 


--------------------------------------------------------------------------------
/.lintr:
--------------------------------------------------------------------------------
 1 | linters: linters_with_defaults(
 2 |     # Updated 2023-07-24.
 3 |     # > help(topic = "linters", package = "lintr")
 4 |     T_and_F_symbol_linter(),
 5 |     absolute_path_linter(),
 6 |     any_duplicated_linter(),
 7 |     any_is_na_linter(),
 8 |     assignment_linter(),
 9 |     backport_linter(),
10 |     brace_linter(),
11 |     class_equals_linter(),
12 |     commas_linter(),
13 |     commented_code_linter(),
14 |     condition_message_linter(),
15 |     conjunct_test_linter(),
16 |     consecutive_assertion_linter(),
17 |     cyclocomp_linter(complexity_limit = 30L),
18 |     duplicate_argument_linter(),
19 |     equals_na_linter(),
20 |     expect_comparison_linter(),
21 |     expect_identical_linter(),
22 |     expect_length_linter(),
23 |     expect_named_linter(),
24 |     expect_not_linter(),
25 |     expect_null_linter(),
26 |     expect_s3_class_linter(),
27 |     expect_s4_class_linter(),
28 |     expect_true_false_linter(),
29 |     extraction_operator_linter(),
30 |     function_left_parentheses_linter(),
31 |     ifelse_censor_linter(),
32 |     implicit_integer_linter(),
33 |     indentation_linter(indent = 4L, hanging_indent_style = "tidy"),
34 |     infix_spaces_linter(),
35 |     inner_combine_linter(),
36 |     line_length_linter(length = 80L),
37 |     literal_coercion_linter(),
38 |     missing_argument_linter(),
39 |     missing_package_linter(),
40 |     namespace_linter(),
41 |     nested_ifelse_linter(),
42 |     # This is currently returning too many false positives.
43 |     # > nonportable_path_linter = NULL,
44 |     numeric_leading_zero_linter(),
45 |     object_length_linter(length = 40L),
46 |     object_name_linter(styles = "camelCase"),
47 |     object_usage_linter(),
48 |     outer_negation_linter(),
49 |     package_hooks_linter(),
50 |     paren_body_linter(),
51 |     paste_linter(),
52 |     pipe_call_linter(),
53 |     pipe_continuation_linter(),
54 |     quotes_linter(),
55 |     redundant_ifelse_linter(),
56 |     regex_subset_linter(),
57 |     semicolon_linter(),
58 |     seq_linter(),
59 |     spaces_inside_linter(),
60 |     spaces_left_parentheses_linter(),
61 |     sprintf_linter(),
62 |     system_file_linter(),
63 |     todo_comment_linter(),
64 |     trailing_blank_lines_linter(),
65 |     trailing_whitespace_linter(),
66 |     undesirable_function_linter(),
67 |     undesirable_operator_linter(),
68 |     unnecessary_concatenation_linter(),
69 |     unreachable_code_linter(),
70 |     vector_logic_linter(),
71 |     whitespace_linter(),
72 |     yoda_test_linter())
73 | exclude: "# nolint"
74 | exclude_start: "# nolint start"
75 | exclude_end: "# nolint end"
76 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: bcbioSingleCell
 2 | Title: Bcbio Single-Cell RNA-Seq
 3 | Description: R package for bcbio single-cell RNA-seq analysis.
 4 | Version: 0.7.1
 5 | Date: 2023-12-04
 6 | Authors@R: c(
 7 |     person(
 8 |         given = "Michael",
 9 |         family = "Steinbaugh",
10 |         email = "mike@steinbaugh.com",
11 |         role = c("aut", "cre"),
12 |         comment = c(ORCID = "0000-0002-2403-2221")
13 |     ),
14 |     person(
15 |         given = "Rory",
16 |         family = "Kirchner",
17 |         email = "roryk@alum.mit.edu",
18 |         role = "aut",
19 |         comment = c(ORCID = "0000-0003-4814-5885")
20 |     ),
21 |     person(
22 |         given = "Mary",
23 |         family = "Piper",
24 |         email = "mary.piper@gmail.com",
25 |         role = "aut",
26 |         comment = c(ORCID = "0000-0003-2699-3840")
27 |     ),
28 |     person(
29 |         given = "Victor",
30 |         family = "Barrera",
31 |         email = "barrera.vic@gmail.com",
32 |         role = "aut",
33 |         comment = c(ORCID = "0000-0003-0590-4634")
34 |     ),
35 |     person(
36 |         given = "Shannan",
37 |         family = "Ho Sui",
38 |         email = "shosui@hsph.harvard.edu",
39 |         role = "aut",
40 |         comment = c(ORCID = "0000-0002-6191-4709")
41 |     ),
42 |     person(
43 |         given = "Harvard Chan Bioinformatics Core",
44 |         email = "bioinformatics@hsph.harvard.edu",
45 |         role = c("cph", "fnd")
46 |     ),
47 |     person(
48 |         given = "Acid Genomics",
49 |         role = c("cph", "fnd")
50 |     ))
51 | URL:
52 |     https://r.acidgenomics.com/packages/bcbiosinglecell/,
53 |     https://github.com/hbc/bcbioSingleCell/
54 | BugReports: https://github.com/hbc/bcbioSingleCell/issues/
55 | License: AGPL-3
56 | Encoding: UTF-8
57 | LazyData: false
58 | Depends: R (>= 4.3)
59 | Imports:
60 |     AcidBase (>= 0.7.0),
61 |     AcidCLI (>= 0.3.0),
62 |     AcidExperiment (>= 0.5.0),
63 |     AcidGenerics (>= 0.7.1),
64 |     AcidGenomes (>= 0.6.0),
65 |     AcidMarkdown (>= 0.3.0),
66 |     AcidPlots (>= 0.7.0),
67 |     AcidPlyr (>= 0.5.0),
68 |     AcidSingleCell (>= 0.4.0),
69 |     BiocGenerics (>= 0.46.0),
70 |     IRanges (>= 2.34.0),
71 |     S4Vectors (>= 0.38.0),
72 |     SingleCellExperiment (>= 1.22.0),
73 |     SummarizedExperiment (>= 1.30.0),
74 |     bcbioBase (>= 0.9.0),
75 |     ggplot2 (>= 3.4.3),
76 |     ggridges (>= 0.5.4),
77 |     goalie (>= 0.7.1),
78 |     pipette (>= 0.14.0),
79 |     syntactic (>= 0.7.0),
80 |     methods,
81 |     parallel,
82 |     utils
83 | Suggests:
84 |     AcidDevTools (>= 0.7.1),
85 |     Biostrings (>= 2.68.0),
86 |     basejump (>= 0.18.0),
87 |     rmarkdown (>= 2.25),
88 |     testthat (>= 3.1.10),
89 |     graphics
90 | Additional_repositories: https://r.acidgenomics.com
91 | Config/testthat/edition: 3
92 | Config/testthat/parallel: true
93 | Roxygen: list(markdown = TRUE)
94 | RoxygenNote: 7.2.3
95 | 


--------------------------------------------------------------------------------
/R/extract-methods.R:
--------------------------------------------------------------------------------
 1 | #' Extract or replace parts of an object
 2 | #'
 3 | #' Extract genes by row and cells by column.
 4 | #'
 5 | #' Refer to `cellToSample()` and `selectSamples()` if sample-level extraction is
 6 | #' desired. Note that `sampleId` is slotted into `colData` and defines the
 7 | #' cell-to-sample mappings.
 8 | #'
 9 | #' Unfiltered cellular barcode distributions for the entire dataset, including
10 | #' cells not kept in the matrix will be dropped in favor of the `nCount` column
11 | #' of `colData()`.
12 | #'
13 | #' @name extract
14 | #' @author Michael Steinbaugh
15 | #' @inherit base::Extract params references
16 | #' @note Updated 2021-09-10.
17 | #'
18 | #' @inheritParams AcidRoxygen::params
19 | #'
20 | #' @return `bcbioSingleCell`.
21 | #'
22 | #' @examples
23 | #' ## bcbioSingleCell ====
24 | #' data(bcb)
25 | #'
26 | #' cells <- head(colnames(bcb))
27 | #' head(cells)
28 | #' genes <- head(rownames(bcb))
29 | #' head(genes)
30 | #'
31 | #' ## Subset by cell identifiers.
32 | #' bcb[, cells]
33 | #'
34 | #' ## Subset by genes.
35 | #' bcb[genes, ]
36 | #'
37 | #' ## Subset by both genes and cells.
38 | #' bcb[genes, cells]
39 | NULL
40 | 
41 | 
42 | 
43 | ## Updated 2019-08-20.
44 | `extract,bcbioSingleCell` <- # nolint
45 |     function(x, i, j, ..., drop = FALSE) {
46 |         validObject(x)
47 |         assert(identical(drop, FALSE))
48 |         ## Genes (rows).
49 |         if (missing(i)) {
50 |             i <- seq_len(nrow(x))
51 |         }
52 |         ## Cells (columns).
53 |         if (missing(j)) {
54 |             j <- seq_len(ncol(x))
55 |         }
56 |         ## Determine whether we should stash subset in metadata.
57 |         if (identical(x = dim(x), y = c(length(i), length(j)))) {
58 |             subset <- FALSE
59 |         } else {
60 |             subset <- TRUE
61 |         }
62 |         ## Subset using SCE method.
63 |         sce <- as(x, "SingleCellExperiment")
64 |         sce <- sce[i, j, drop = drop]
65 |         ## Early return original object, if unmodified.
66 |         if (identical(assay(sce), assay(x))) {
67 |             return(x)
68 |         }
69 |         ## Metadata ------------------------------------------------------------
70 |         metadata <- metadata(sce)
71 |         if (isTRUE(subset)) {
72 |             metadata[["cellularBarcodes"]] <- NULL
73 |             metadata[["filterCells"]] <- NULL
74 |             metadata[["filterGenes"]] <- NULL
75 |             metadata[["subset"]] <- TRUE
76 |         }
77 |         metadata <- Filter(f = Negate(is.null), x = metadata)
78 |         metadata(sce) <- metadata
79 |         ## Return --------------------------------------------------------------
80 |         sce <- droplevels2(sce)
81 |         new(Class = "bcbioSingleCell", sce)
82 |     }
83 | 
84 | 
85 | 
86 | #' @rdname extract
87 | #' @export
88 | setMethod(
89 |     "[",
90 |     signature(
91 |         x = "bcbioSingleCell",
92 |         i = "ANY",
93 |         j = "ANY",
94 |         drop = "ANY"
95 |     ),
96 |     definition = `extract,bcbioSingleCell`
97 | )
98 | 


--------------------------------------------------------------------------------
/man/bcbioSingleCell.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AllGenerators.R
 3 | \name{bcbioSingleCell}
 4 | \alias{bcbioSingleCell}
 5 | \title{bcbio single-cell RNA-seq data set}
 6 | \usage{
 7 | bcbioSingleCell(
 8 |   uploadDir,
 9 |   sampleMetadataFile = NULL,
10 |   organism = NULL,
11 |   ensemblRelease = NULL,
12 |   genomeBuild = NULL,
13 |   gffFile = NULL,
14 |   transgeneNames = NULL,
15 |   interestingGroups = "sampleName"
16 | )
17 | }
18 | \arguments{
19 | \item{uploadDir}{\code{character(1)}.
20 | Final upload directory path.}
21 | 
22 | \item{sampleMetadataFile}{\code{character(1)}.
23 | Sample metadata file path. CSV or TSV is preferred, but Excel worksheets
24 | are also supported. Check the documentation for conventions and required
25 | columns.}
26 | 
27 | \item{organism}{\code{character(1)}.
28 | Full Latin organism name (e.g. \code{"Homo sapiens"}).}
29 | 
30 | \item{ensemblRelease}{\code{integer(1)}.
31 | Ensembl release version (e.g. \code{100}). We recommend setting this value if
32 | possible, for improved reproducibility. When left unset, the latest release
33 | available via AnnotationHub/ensembldb is used. Note that the latest version
34 | available can vary, depending on the versions of AnnotationHub and
35 | ensembldb in use.}
36 | 
37 | \item{genomeBuild}{\code{character(1)}.
38 | Ensembl genome build assembly name (e.g. \code{"GRCh38"}). If set \code{NULL},
39 | defaults to the most recent build available.
40 | \emph{Note:} don't pass in UCSC build IDs (e.g. \code{"hg38"}).}
41 | 
42 | \item{gffFile}{\code{character(1)}.
43 | GFF/GTF (General Feature Format) file.
44 | Generally, we recommend using GTF (GFFv2) instead of GFFv3.}
45 | 
46 | \item{transgeneNames}{\code{character}.
47 | Vector indicating which assay rows denote transgenes (e.g. EGFP, TDTOMATO).}
48 | 
49 | \item{interestingGroups}{\code{character}.
50 | Groups of interest to use for visualization.
51 | Corresponds to factors describing the columns of the object.}
52 | }
53 | \value{
54 | \code{bcbioSingleCell}.
55 | }
56 | \description{
57 | \code{bcbioSingleCell} is an S4 class that extends \code{SingleCellExperiment}, and is
58 | designed to store a bcbio single-cell RNA-seq analysis. This class contains
59 | read counts saved as a sparse matrix (\code{sparseMatrix}), sample metadata, and
60 | cell quality control metrics.
61 | }
62 | \note{
63 | Updated 2023-09-21.
64 | }
65 | \section{Remote data}{
66 | 
67 | 
68 | When working in RStudio, we recommend connecting to the bcbio-nextgen run
69 | directory as a remote connection over
70 | \href{https://github.com/osxfuse/osxfuse/wiki/SSHFS}{sshfs}.
71 | }
72 | 
73 | \examples{
74 | uploadDir <- system.file("extdata/indrops", package = "bcbioSingleCell")
75 | 
76 | x <- bcbioSingleCell(uploadDir)
77 | print(x)
78 | 
79 | x <- bcbioSingleCell(
80 |     uploadDir = uploadDir,
81 |     sampleMetadataFile = file.path(uploadDir, "metadata.csv")
82 | )
83 | print(x)
84 | }
85 | \seealso{
86 | \itemize{
87 | \item \code{SingleCellExperiment::SingleCellExperiment()}.
88 | \item \code{.S4methods(class = "bcbioSingleCell")}.
89 | }
90 | }
91 | \author{
92 | Michael Steinbaugh
93 | }
94 | 


--------------------------------------------------------------------------------
/inst/extdata/indrops/multiplexed-AAAAAAAA/multiplexed-AAAAAAAA-barcodes.tsv:
--------------------------------------------------------------------------------
  1 | "AAACACTA-CTTCGATT"	71802
  2 | "AAACTACA-CCACATTA"	76590
  3 | "AACTGCCT-GCAAGGAC"	106100
  4 | "AAGAAGGT-TCTGTGGT"	74449
  5 | "AAGCCTTC-TAAATAGG"	105540
  6 | "AATAAGGA-CCACATTA"	35645
  7 | "AATCGAAG-CCCAAGCA"	68099
  8 | "AATCGTTC-CCCTAACC"	84804
  9 | "ACCCTCAA-CTGCGTTG"	66194
 10 | "ACCTGAAG-GAGCGGTA"	57162
 11 | "ACTAATTG-CTTTAATC"	63990
 12 | "ACTAGAGC-TCGACACC"	82100
 13 | "AGAAACCA-ATACTCTT"	56283
 14 | "AGCTCCAC-CCTGACAC"	58527
 15 | "AGGTAAGC-TCCCAATC"	82639
 16 | "ATATGCAA-GGCGGTTT"	68647
 17 | "ATCAATCG-GTTGTCAT"	72148
 18 | "ATCGCGCT-AGAGGTGG"	77010
 19 | "CAACGCAG-CTCGCGTA"	78838
 20 | "CACAACCT-GGAGAAGC"	51457
 21 | "CCATGCAT-TTCCGCTC"	68447
 22 | "CCCGTTCT-AAAGCCTA"	106986
 23 | "CCGAGATC-ATGGGCAC"	90386
 24 | "CCGATACG-CAAGAGGG"	131259
 25 | "CCGGAAAT-GTTGTCAT"	61199
 26 | "CCTACGCT-AGCAGAAC"	86060
 27 | "CGTGTGTT-AGGAAGAC"	124799
 28 | "CGTTTCGT-GACAGATA"	76444
 29 | "CTAGCACG-AATCGGGT"	90582
 30 | "CTAGTAGG-TTGAGGGT"	66021
 31 | "CTCACATC-ACCCACGA"	57006
 32 | "CTCCTCCA-CGTATTTC"	94587
 33 | "CTCCTCCA-TGTACACG"	58778
 34 | "CTCTATAG-GCAAAGCC"	82448
 35 | "CTGTCGCA-TTTAACAG"	111111
 36 | "CTGTTAAA-AATGAATG"	51878
 37 | "CTGTTAAA-GACAGATA"	93666
 38 | "CTTAGGCC-AGAAGTCC"	111183
 39 | "CTTAGTGT-TCCAGGGA"	57623
 40 | "CTTCTACG-CTTCTTCG"	67106
 41 | "CTTTATCC-CTACCGTT"	92126
 42 | "GACACCTG-CTCAGAAT"	49189
 43 | "GACTAGCG-GAAGTGCC"	91684
 44 | "GAGAAACC-ACAGCGGA"	50844
 45 | "GAGTGTAC-ACGCAGAG"	64944
 46 | "GATTAAAG-ACTAGCCA"	71098
 47 | "GATTACTT-GAGAATTG"	65073
 48 | "GATTTCCC-ATGTTGGC"	84194
 49 | "GCAAACTG-CTTCAGGT"	52167
 50 | "GCCAACAT-CGTGGATA"	38063
 51 | "GCCTGGTA-CGCTCTCA"	51643
 52 | "GCGCTGAT-GACAAAGG"	47118
 53 | "GCGTGCAA-TCTGTGGT"	66552
 54 | "GGAACGAA-TTGCACGC"	52399
 55 | "GGCGACAA-TTCCGAGT"	61851
 56 | "GGCTTTGC-AACCCTTG"	54214
 57 | "GGCTTTGC-AGCGAAGT"	70085
 58 | "GGCTTTGC-TTAGGACC"	92908
 59 | "GGGATTAC-AAATGTCG"	60698
 60 | "GGTTGAGA-CTGTCTGG"	101810
 61 | "GGTTGAGA-GAGAGTAT"	126962
 62 | "GTAAGCCG-CGATTGAT"	92347
 63 | "GTAATCTG-CGCTAATA"	79897
 64 | "GTACGCTT-CCCAAGCA"	77318
 65 | "GTACGGAC-CAACAAAT"	70943
 66 | "GTCCACTA-CTTCTGGA"	65666
 67 | "GTCCACTA-GACAAAGG"	59151
 68 | "GTCCGTCA-ATACTCTT"	74739
 69 | "GTCTAATC-GGCCCTTA"	77431
 70 | "GTGAACTC-CAAGAGGG"	118550
 71 | "GTGAGGCA-CAGTTTGC"	58023
 72 | "GTGATAAA-CGCTCTCA"	47077
 73 | "GTGCCCAT-GTGTCGGA"	61911
 74 | "GTGGTGCT-GGAGAAGC"	84629
 75 | "GTTACTAG-AGAAGTTA"	57592
 76 | "GTTACTAG-CCCTTGGT"	94832
 77 | "GTTCTGCT-TGGCTACC"	99905
 78 | "TAATCCAT-CGGAATTT"	64021
 79 | "TACCGCTC-CCCATAGC"	62870
 80 | "TACGCGAG-TGTAGTTT"	73071
 81 | "TACGTTCG-TTGATCTA"	80071
 82 | "TAGGCTTT-CGGACAAC"	110058
 83 | "TAGTAGCC-TAGTGTTT"	79818
 84 | "TATTAGCG-CCCTAACC"	52979
 85 | "TCAGCCTC-TGCAAGGG"	125515
 86 | "TCCGACAC-GGGAGGTA"	120160
 87 | "TCGCAATC-CGAACGTA"	113926
 88 | "TCGGTCAT-AGCACCAC"	104007
 89 | "TCTAAACT-CTCTTGAC"	70414
 90 | "TCTTTGAC-CGCTCTCA"	96402
 91 | "TGAGAGCG-CCTATTCA"	68229
 92 | "TGAGAGCG-GAAGTGCC"	87673
 93 | "TGAGCACA-TGCTATTT"	84576
 94 | "TGCGACTA-CCGTGTTT"	34262
 95 | "TGCGACTA-TTCACATA"	101690
 96 | "TGCTTCAT-GCAGGGTA"	128690
 97 | "TGCTTGGG-CAACAAAT"	69455
 98 | "TGGACGGA-TTGTTTAC"	118717
 99 | "TGGGAATT-ATATAGGA"	94737
100 | "TGTTATCA-ACGCAGAG"	50886
101 | 


--------------------------------------------------------------------------------
/R/package.R:
--------------------------------------------------------------------------------
 1 | #' bcbioSingleCell
 2 | #'
 3 | #' Import and analyze [bcbio](https://bcbio-nextgen.readthedocs.io/) single-cell
 4 | #' RNA-seq data.
 5 | #'
 6 | #' @aliases NULL
 7 | #' @keywords internal
 8 | "_PACKAGE"
 9 | 
10 | 
11 | 
12 | ## S4 classes ==================================================================
13 | 
14 | #' @importClassesFrom SingleCellExperiment SingleCellExperiment
15 | NULL
16 | 
17 | 
18 | 
19 | ## S4 generics and methods =====================================================
20 | 
21 | #' @importFrom AcidExperiment sampleNames
22 | #' @importFrom AcidGenerics calculateMetrics camelCase droplevels2
23 | #' interestingGroups interestingGroups<- leftJoin makeDimnames makeLabel
24 | #' makeNames metrics plotReadsPerCell sampleData
25 | #' @importFrom BiocGenerics counts updateObject
26 | #' @importFrom S4Vectors cbind do.call droplevels lapply mcols mcols<-
27 | #' metadata metadata<-
28 | #' @importFrom SummarizedExperiment assayNames assay assays assays<- colData
29 | #' colData<- rowData rowData<- rowRanges rowRanges<-
30 | #' @importFrom methods coerce show
31 | #' @importFrom pipette import
32 | NULL
33 | 
34 | #' @importMethodsFrom AcidExperiment calculateMetrics interestingGroups
35 | #' interestingGroups<- metrics sampleData sampleNames
36 | #' @importMethodsFrom AcidPlyr leftJoin
37 | #' @importMethodsFrom AcidSingleCell sampleData
38 | #' @importMethodsFrom pipette import
39 | #' @importMethodsFrom syntactic camelCase makeDimnames makeLabel makeNames
40 | NULL
41 | 
42 | 
43 | 
44 | ## Standard functions ==========================================================
45 | 
46 | #' @importFrom AcidBase metricsCols printString realpath showSlotInfo
47 | #' standardizeCall strMatch
48 | #' @importFrom AcidCLI abort alert alertSuccess alertWarning h1 h2
49 | #' separator toInlineString
50 | #' @importFrom AcidExperiment detectLanes droplevels2 importSampleData
51 | #' matchInterestingGroups minimalSampleData
52 | #' @importFrom AcidGenomes emptyRanges makeGRangesFromEnsembl makeGRangesFromGff
53 | #' @importFrom AcidMarkdown markdownPlots
54 | #' @importFrom AcidPlots !!! .data acid_geom_abline acid_geom_label
55 | #' acid_geom_label_average acid_geom_label_repel acid_scale_color_discrete
56 | #' acid_scale_fill_discrete syms
57 | #' @importFrom AcidSingleCell makeSingleCellExperiment mapCellsToSamples
58 | #' @importFrom IRanges DataFrameList
59 | #' @importFrom S4Vectors DataFrame SimpleList
60 | #' @importFrom bcbioBase getBarcodeCutoffFromCommands getGtfFileFromYaml
61 | #' getLevelFromCommands getSampleDataFromYaml getUmiTypeFromCommands
62 | #' importDataVersions importProgramVersions projectDir runDate sampleDirs
63 | #' @importFrom ggplot2 aes facet_wrap geom_boxplot geom_histogram geom_step
64 | #' geom_violin ggplot labs scale_x_continuous scale_y_continuous stat_ecdf vars
65 | #' @importFrom ggridges geom_density_ridges
66 | #' @importFrom goalie allAreDirectories allAreFiles areDisjointSets areSetEqual
67 | #' assert hasLength hasNames hasRownames hasValidDimnames isADirectory isAFile
68 | #' isAUrl isAny isCharacter isDirectory isFile isFlag isInt isString isSubset
69 | #' requireNamespaces validate validateClasses
70 | #' @importFrom methods .hasSlot as as<- is new setClass slot slot<- validObject
71 | #' @importFrom parallel mcMap mclapply
72 | #' @importFrom utils capture.output packageName packageVersion
73 | NULL
74 | 


--------------------------------------------------------------------------------
/todo.org:
--------------------------------------------------------------------------------
 1 | #+TITLE: bcbioSingleCell
 2 | #+STARTUP: content
 3 | * Development
 4 | ** TODO Need to address this note now popping up in build checks.
 5 |     Note: found 88 marked UTF-8 strings
 6 | ** TODO Need to ensure R Markdown renders correctly without goalie issues.
 7 | ** TODO Require valid names in all slots.
 8 | ** TODO Add monocle celldataset coercion method.
 9 | ** TODO Need to run BFG on the repo and remove old example R data.
10 | ** TODO `bcbioSingleCell()`: Explain genome annotation priority in better detail, following the logic defined in `bcbioRNASeq()`.
11 | ** TODO Improve support for transcript-level counts?
12 |     Should we even allow this at this point?
13 | ** TODO Vignette using example bcbio data and 10X pbmc data.
14 | ** TODO Look into BarcodeInflectionsPlot, now in Seurat 3.
15 | * pointillism
16 | ** TODO Move markers out of Google Sheets and into the package, as simple CSV files.
17 | ** TODO Use pseudobulk approach for DE with sample replicates
18 | ** TODO `plotFeature()`: Add `pointsAsNumbers` argument support.
19 | ** TODO `diffExp()`: Add internal support for accessing design with `design()` generic.
20 | ** TODO `KnownMarkers` S4: switch from DataFrame to SplitDataFrameList inheritance?
21 | ** TODO Improve SeuratMarkers class.
22 |     This needs to error if the input data.frame contains `cluster` column.
23 |     Consider only using `SeuratMarkers` as a single generator but returning `SeuratMarkers` or `SeuratMarkersPerCluster` automatically.
24 |     Allow generator to work with empty ranges?
25 | ** TODO Improve plotFeature.
26 |     Add pointsAsNumbers support. Is there a way to facet wrap these instead of using plot grid? Then we can easily support a title. We're using continuous color here, so the formal won't match… argument "color" is missing, with no default.
27 | ** TODO `findMarkers()`: Consider adding `progress` option or BPPARAM support.
28 | ** TODO Switch to `Misc()` to access the `@misc` slot?
29 | ** TODO Consider splitting `SeuratMarkers` class into a `DataFrameList`, per cluster.
30 | ** TODO Stacked bar plot for relative cell abundances per cluster or sample type.
31 | ** TODO Put the resolution in the plot title for t-SNE.
32 | ** TODO Add `write = TRUE` argument support for marking looping functions, to write CSVs automatically to disk.
33 | ** TODO Improve assert checks for `findMarkers()`
34 | * syntactic
35 | ** TODO saveData: Need to harden against accidential pipe to this function.
36 | ** TODO Check that renaming mode renames symlinks themselves, not the resolved file.
37 |     I ran into this attempting to name album artist symlinks in iTunes.
38 | ** TODO VIGNETTE. Seriously, work on it.
39 | ** TODO Functions need to convert accent marks if possible. Particularly useful for file names.
40 | ** TODO Ensure nM returns as nm instead of n_m.
41 | ** TODO Rename mode: Need to look for and strip ' - ' out automatically.
42 |     Otherwise this will return '-minus-' in the file name, which is annoying. Need to improve the internal sanitization in R for this.
43 | ** TODO Rename mode needs to also make extension lowercase.
44 | ** TODO Need to add recursive rename mode support (koopa only?).
45 |     Maybe this is easiest to implement on the R side of things. Need to rename files first, then directories, sorted. Need to work from lowest levels up.
46 | ** TODO nMito, nCoding looks weird with makeLabel plural.
47 |     See bcbioSingleCell example.
48 | 


--------------------------------------------------------------------------------
/R/AllClasses.R:
--------------------------------------------------------------------------------
  1 | #' bcbio single-cell RNA-seq data set
  2 | #'
  3 | #' `bcbioSingleCell` is an S4 class that extends `SingleCellExperiment`, and is
  4 | #' designed to store a bcbio single-cell RNA-seq analysis. This class contains
  5 | #' read counts saved as a sparse matrix (`sparseMatrix`), sample metadata, and
  6 | #' cell quality control metrics.
  7 | #'
  8 | #' @author Michael Steinbaugh, Rory Kirchner
  9 | #' @note Updated 2022-05-09.
 10 | #' @export
 11 | setClass(
 12 |     Class = "bcbioSingleCell",
 13 |     contains = "SingleCellExperiment"
 14 | )
 15 | setValidity(
 16 |     Class = "bcbioSingleCell",
 17 |     method = function(object) {
 18 |         colData <- colData(object)
 19 |         metadata <- metadata(object)
 20 |         sampleData <- sampleData(object)
 21 |         ## Return invalid for all objects older than v0.1.
 22 |         version <- metadata[["version"]]
 23 |         ok <- validate(
 24 |             is(version, "package_version"),
 25 |             version >= 0.1
 26 |         )
 27 |         if (!isTRUE(ok)) {
 28 |             return(ok)
 29 |         }
 30 |         ## Check for legacy bcbio slot.
 31 |         ok <- validate(!.hasSlot(object, "bcbio"))
 32 |         if (!isTRUE(ok)) {
 33 |             return(ok)
 34 |         }
 35 |         ## Assays --------------------------------------------------------------
 36 |         ok <- validate(isSubset("counts", names(assays(object))))
 37 |         if (!isTRUE(ok)) {
 38 |             return(ok)
 39 |         }
 40 |         ## Row data ------------------------------------------------------------
 41 |         ok <- validate(
 42 |             is(rowRanges(object), "GenomicRanges"),
 43 |             is(rowData(object), "DataFrame")
 44 |         )
 45 |         if (!isTRUE(ok)) {
 46 |             return(ok)
 47 |         }
 48 |         ## Column data ---------------------------------------------------------
 49 |         ok <- validate(
 50 |             ## Require that metrics columns are defined.
 51 |             isSubset(.metricsCols, colnames(colData)),
 52 |             ## Ensure that `interestingGroups` isn't slotted in colData.
 53 |             areDisjointSets("interestingGroups", colnames(colData))
 54 |         )
 55 |         if (!isTRUE(ok)) {
 56 |             return(ok)
 57 |         }
 58 |         ## Metadata ------------------------------------------------------------
 59 |         df <- c("DFrame", "DataFrame")
 60 |         ok <- validateClasses(
 61 |             object = metadata,
 62 |             expected = list(
 63 |                 allSamples = "logical",
 64 |                 bcbioCommandsLog = "character",
 65 |                 bcbioLog = "character",
 66 |                 dataVersions = df,
 67 |                 date = "Date",
 68 |                 ensemblRelease = "integer",
 69 |                 genomeBuild = "character",
 70 |                 gffFile = "character",
 71 |                 interestingGroups = "character",
 72 |                 lanes = "integer",
 73 |                 level = "character",
 74 |                 organism = "character",
 75 |                 pipeline = "character",
 76 |                 programVersions = df,
 77 |                 projectDir = "character",
 78 |                 runDate = "Date",
 79 |                 sampleDirs = "character",
 80 |                 sampleMetadataFile = "character",
 81 |                 sessionInfo = c("sessionInfo", "session_info"),
 82 |                 umiType = "character",
 83 |                 uploadDir = "character",
 84 |                 version = "package_version",
 85 |                 wd = "character",
 86 |                 yaml = "list"
 87 |             ),
 88 |             subset = TRUE
 89 |         )
 90 |         if (!isTRUE(ok)) {
 91 |             return(ok)
 92 |         }
 93 |         ## Check that level is defined.
 94 |         ok <- validate(
 95 |             !isSubset("sampleName", names(metadata)),
 96 |             isSubset(metadata[["level"]], c("genes", "transcripts"))
 97 |         )
 98 |         if (!isTRUE(ok)) {
 99 |             return(ok)
100 |         }
101 |         TRUE
102 |     }
103 | )
104 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # bcbioSingleCell
  2 | 
  3 | [![Install with Bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/recipes/r-bcbiosinglecell/README.html)
  4 | ![Lifecycle: retired](https://img.shields.io/badge/lifecycle-retired-red.svg)
  5 | 
  6 | **NOTE: [bcbio-nextgen][bcbio] is no longer under active development.**
  7 | Refer to the [notice of discontinuation][] for additional details.
  8 | 
  9 | [R][] package for [bcbio][] single-cell RNA-seq analysis.
 10 | 
 11 | ## Installation
 12 | 
 13 | This is an R package.
 14 | 
 15 | ```r
 16 | if (!requireNamespace("BiocManager", quietly = TRUE)) {
 17 |     install.packages("BiocManager")
 18 | }
 19 | install.packages(
 20 |     pkgs = "bcbioSingleCell",
 21 |     repos = c(
 22 |         "https://r.acidgenomics.com",
 23 |         BiocManager::repositories()
 24 |     ),
 25 |     dependencies = TRUE
 26 | )
 27 | ```
 28 | 
 29 | ### [Conda][] method
 30 | 
 31 | Configure [Conda][] to use the [Bioconda][] channels.
 32 | 
 33 | ```sh
 34 | # Don't install recipe into base environment.
 35 | conda create --name='r-bcbiosinglecell' 'r-bcbiosinglecell'
 36 | conda activate 'r-bcbiosinglecell'
 37 | R
 38 | ```
 39 | 
 40 | ## Load bcbio single-cell RNA-seq data
 41 | 
 42 | ```r
 43 | library(bcbioSingleCell)
 44 | object <- bcbioSingleCell(
 45 |     uploadDir = file.path("indrops", "final"),
 46 |     interestingGroups = c("genotype", "treatment"),
 47 |     sampleMetadataFile = "sample_metadata.csv",
 48 |     organism = "Homo sapiens",
 49 |     ensemblRelease = 90L
 50 | )
 51 | ```
 52 | 
 53 | This will return a `bcbioSingleCell` object, which is an extension of the
 54 | [Bioconductor][] [SingleCellExperiment][sce] container class. Consult the
 55 | `bcbioSingleCell()` constructor function documentation for detailed information
 56 | on the supported parameters:
 57 | 
 58 | ```r
 59 | help(topic = "bcbioSingleCell", package = "bcbioSingleCell")
 60 | ```
 61 | 
 62 | ## Sample metadata examples
 63 | 
 64 | ### FASTQ files with samples multiplexed by index barcode
 65 | 
 66 | This is our current recommended method for analyzing an inDrops dataset.
 67 | The sample index barcodes are multiplexed per FASTQ set. For Illumina
 68 | sequencing data, the raw binary base call (BCL) data must be converted into
 69 | FASTQs (split into `R1`-`R4` files) using [bcl2fastq][].
 70 | 
 71 | The inDrops library version is automatically detected by bcbio, but ensure that
 72 | the sample index sequences provided match the library version when attempting to
 73 | create a `bcbioSingleCell` object.
 74 | 
 75 | Consult the bcbio documentation for more information on how to configure an
 76 | inDrops run prior to loading into R with the `bcbioSingleCell()` function.
 77 | 
 78 | | description | index | sequence | sampleName | aggregate | genotype |
 79 | | ----------- | ----- | -------- | ---------- | --------- | -------- |
 80 | | indrops1    | 1     | CTCTCTAT | sample1_1  | sample1   | wildtype |
 81 | | indrops1    | 2     | TATCCTCT | sample2_1  | sample2   | knockout |
 82 | | indrops1    | 3     | GTAAGGAG | sample3_1  | sample3   | wildtype |
 83 | | indrops1    | 4     | ACTGCATA | sample4_1  | sample4   | knockout |
 84 | | indrops2    | 1     | CTCTCTAT | sample1_2  | sample1   | wildtype |
 85 | | indrops2    | 2     | TATCCTCT | sample1_2  | sample2   | knockout |
 86 | | indrops2    | 3     | GTAAGGAG | sample1_2  | sample3   | wildtype |
 87 | | indrops2    | 4     | ACTGCATA | sample1_2  | sample4   | knockout |
 88 | 
 89 | Note that bcbio currently outputs the reverse complement index sequence in the
 90 | sample directory names (e.g. `sample-ATAGAGAG`). Define the forward index
 91 | barcode in the `sequence` column here, not the reverse complement. The reverse
 92 | complement will be calculated automatically and added as the `revcomp` column
 93 | in the sample metadata.
 94 | 
 95 | ### FASTQ files demultiplexed per sample
 96 | 
 97 | This is our current method for handling 10X Genomics Chromium and Illumina
 98 | SureCell cell barcodes.
 99 | 
100 | | description | genotype |
101 | | ----------- | -------- |
102 | | sample1     | wildtype |
103 | | sample2     | knockout |
104 | | sample3     | wildtype |
105 | | sample4     | knockout |
106 | 
107 | ### Invalid object
108 | 
109 | If you encounter a `validObject` error when attempting to load a
110 | `bcbioSingleCell` object from a previous analysis, run this step to update the
111 | object to the current version of the package:
112 | 
113 | ```r
114 | object <- updateObject(object)
115 | validObject(object)
116 | ## [1] TRUE
117 | ```
118 | 
119 | ## References
120 | 
121 | The papers and software cited in our workflows are available as a [shared
122 | library](https://paperpile.com/shared/C8EMxl) on [Paperpile][].
123 | 
124 | [bcbio]: https://bcbio-nextgen.readthedocs.io/
125 | [bcl2fastq]: https://support.illumina.com/sequencing/sequencing_software/bcl2fastq-conversion-software.html
126 | [bioconda]: https://bioconda.github.io/
127 | [bioconductor]: https://bioconductor.org/
128 | [conda]: https://conda.io/
129 | [notice of discontinuation]: https://github.com/bcbio/bcbio-nextgen/issues/3749
130 | [paperpile]: https://paperpile.com/
131 | [r]: https://www.r-project.org/
132 | [sce]: https://bioconductor.org/packages/SingleCellExperiment/
133 | 


--------------------------------------------------------------------------------
/R/internal-import.R:
--------------------------------------------------------------------------------
  1 | #' Import bcbio counts from sample directories
  2 | #'
  3 | #' @author Michael Steinbaugh
  4 | #' @keywords internal
  5 | #' @note Updated 2023-08-17.
  6 | #' @noRd
  7 | #'
  8 | #' @param sampleDirs `character`.
  9 | #' Sample directory paths.
 10 | #'
 11 | #' @return `Matrix` / `matrix`.
 12 | .importCounts <-
 13 |     function(sampleDirs) {
 14 |         assert(
 15 |             allAreDirectories(sampleDirs),
 16 |             hasNames(sampleDirs)
 17 |         )
 18 |         alert("Importing counts.")
 19 |         list <- mcMap(
 20 |             sampleId = names(sampleDirs),
 21 |             dir = sampleDirs,
 22 |             f = function(sampleId, dir) {
 23 |                 counts <- .importCountsPerSample(dir)
 24 |                 ## Prefix cell barcodes with sample identifier when we're
 25 |                 ## loading counts from multiple samples.
 26 |                 if (length(sampleDirs) > 1L) {
 27 |                     colnames(counts) <-
 28 |                         paste(sampleId, colnames(counts), sep = "_")
 29 |                 }
 30 |                 ## Ensure names are valid.
 31 |                 counts <- makeDimnames(counts)
 32 |                 counts
 33 |             }
 34 |         )
 35 |         ## Remove any empty items in list, which can result from low quality
 36 |         ## samples with empty matrices in bcbio pipeline.
 37 |         list <- Filter(f = Negate(is.null), x = list)
 38 |         assert(
 39 |             hasLength(list),
 40 |             msg = sprintf(
 41 |                 fmt = paste0(
 42 |                     "bcbio didn't return any cells.\n",
 43 |                     "Check your '%s' setting."
 44 |                 ),
 45 |                 "minimum_barcode_depth"
 46 |             )
 47 |         )
 48 |         ## Bind the matrices.
 49 |         do.call(cbind, list)
 50 |     }
 51 | 
 52 | 
 53 | 
 54 | #' Import counts per sample from sparse matrix
 55 | #'
 56 | #' Always in Matrix Market Exchange (MEX/MTX) format.
 57 | #'
 58 | #' This may be advantagenous to loading the giant combined matrix because we
 59 | #' can parallelize with BiocParallel.
 60 | #'
 61 | #' Attempt to load the column and rowname files first. If they're empty, skip
 62 | #' loading the MatrixMarket file, which will error otherwise. The bcbio pipeline
 63 | #' will output empty files for very low quality samples with no cells that pass
 64 | #' filtering.
 65 | #'
 66 | #' @author Michael Steinbaugh
 67 | #' @keywords internal
 68 | #' @note Updated 2020-01-20.
 69 | #' @noRd
 70 | #'
 71 | #' @param dir `character(1)`.
 72 | #' Sample directory path.
 73 | #'
 74 | #' @return `sparseMatrix`.
 75 | .importCountsPerSample <- # nolint
 76 |     function(dir) {
 77 |         assert(isADirectory(dir))
 78 |         ## Require that all of the files exist, even if they are empty.
 79 |         file <- file.path(dir, paste0(basename(dir), ".mtx"))
 80 |         rownamesFile <- paste0(file, ".rownames")
 81 |         colnamesFile <- paste0(file, ".colnames")
 82 |         assert(allAreFiles(c(file, rownamesFile, colnamesFile)))
 83 |         ## Import Genes/transcripts (features).
 84 |         rownames <- import(rownamesFile, format = "lines")
 85 |         ## Import cellular barcodes.
 86 |         colnames <- import(colnamesFile, format = "lines")
 87 |         if (!length(rownames) > 0L || !length(colnames) > 0L) {
 88 |             alertWarning(sprintf("Skipped {.path %s}.", basename(dir)))
 89 |             return(NULL)
 90 |         }
 91 |         ## Import counts.
 92 |         counts <- import(file)
 93 |         assert(
 94 |             identical(length(rownames), nrow(counts)),
 95 |             identical(length(colnames), ncol(counts))
 96 |         )
 97 |         rownames(counts) <- rownames
 98 |         colnames(counts) <- colnames
 99 |         alert(sprintf("Imported {.path %s}.", basename(dir)))
100 |         counts
101 |     }
102 | 
103 | 
104 | 
105 | #' Import raw cellular barcode read list
106 | #'
107 | #' Get the number of pre-UMI disambiguated reads per cellular barcode.
108 | #'
109 | #' @author Michael Steinbaugh
110 | #' @keywords internal
111 | #' @note Updated 2023-08-17.
112 | #' @noRd
113 | #'
114 | #' @param sampleDirs `character`.
115 | #' Sample directories.
116 | #'
117 | #' @return `list`.
118 | #' List of integer vectors per sample containing the pre-filtered cellular
119 | #' barcode counts (`nCount`).
120 | .importReads <-
121 |     function(sampleDirs) {
122 |         assert(
123 |             allAreDirectories(sampleDirs),
124 |             hasNames(sampleDirs)
125 |         )
126 |         alert("Importing unfiltered cellular barcode distributions.")
127 |         files <- file.path(
128 |             sampleDirs,
129 |             paste(basename(sampleDirs), "barcodes.tsv", sep = "-")
130 |         )
131 |         files <- realpath(files)
132 |         names(files) <- names(sampleDirs)
133 |         list <- mclapply(
134 |             X = files,
135 |             FUN = function(file) {
136 |                 data <- import(
137 |                     con = file,
138 |                     format = "tsv",
139 |                     colnames = c("barcode", "n")
140 |                 )
141 |                 x <- as.integer(data[["n"]])
142 |                 names(x) <- makeNames(data[["barcode"]])
143 |                 x
144 |             }
145 |         )
146 |         names(list) <- names(sampleDirs)
147 |         list
148 |     }
149 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
  1 | # Generated by roxygen2: do not edit by hand
  2 | 
  3 | export(bcbioSingleCell)
  4 | export(bcbioSingleCellTestsUrl)
  5 | export(calculateMetrics)
  6 | export(filterCells)
  7 | export(plotBarcodeRanks)
  8 | export(plotCellCounts)
  9 | export(plotCountsPerCell)
 10 | export(plotCountsVsFeatures)
 11 | export(plotFeaturesPerCell)
 12 | export(plotMitoRatio)
 13 | export(plotNovelty)
 14 | export(plotQc)
 15 | export(plotReadsPerCell)
 16 | export(show)
 17 | export(updateObject)
 18 | exportClasses(bcbioSingleCell)
 19 | exportMethods("[")
 20 | exportMethods(plotReadsPerCell)
 21 | exportMethods(show)
 22 | exportMethods(updateObject)
 23 | importClassesFrom(SingleCellExperiment,SingleCellExperiment)
 24 | importFrom(AcidBase,metricsCols)
 25 | importFrom(AcidBase,printString)
 26 | importFrom(AcidBase,realpath)
 27 | importFrom(AcidBase,showSlotInfo)
 28 | importFrom(AcidBase,standardizeCall)
 29 | importFrom(AcidBase,strMatch)
 30 | importFrom(AcidCLI,abort)
 31 | importFrom(AcidCLI,alert)
 32 | importFrom(AcidCLI,alertSuccess)
 33 | importFrom(AcidCLI,alertWarning)
 34 | importFrom(AcidCLI,h1)
 35 | importFrom(AcidCLI,h2)
 36 | importFrom(AcidCLI,separator)
 37 | importFrom(AcidCLI,toInlineString)
 38 | importFrom(AcidExperiment,detectLanes)
 39 | importFrom(AcidExperiment,droplevels2)
 40 | importFrom(AcidExperiment,importSampleData)
 41 | importFrom(AcidExperiment,matchInterestingGroups)
 42 | importFrom(AcidExperiment,minimalSampleData)
 43 | importFrom(AcidExperiment,sampleNames)
 44 | importFrom(AcidGenerics,"interestingGroups<-")
 45 | importFrom(AcidGenerics,calculateMetrics)
 46 | importFrom(AcidGenerics,camelCase)
 47 | importFrom(AcidGenerics,droplevels2)
 48 | importFrom(AcidGenerics,filterCells)
 49 | importFrom(AcidGenerics,interestingGroups)
 50 | importFrom(AcidGenerics,leftJoin)
 51 | importFrom(AcidGenerics,makeDimnames)
 52 | importFrom(AcidGenerics,makeLabel)
 53 | importFrom(AcidGenerics,makeNames)
 54 | importFrom(AcidGenerics,metrics)
 55 | importFrom(AcidGenerics,plotBarcodeRanks)
 56 | importFrom(AcidGenerics,plotCellCounts)
 57 | importFrom(AcidGenerics,plotCountsPerCell)
 58 | importFrom(AcidGenerics,plotCountsVsFeatures)
 59 | importFrom(AcidGenerics,plotFeaturesPerCell)
 60 | importFrom(AcidGenerics,plotMitoRatio)
 61 | importFrom(AcidGenerics,plotNovelty)
 62 | importFrom(AcidGenerics,plotQc)
 63 | importFrom(AcidGenerics,plotReadsPerCell)
 64 | importFrom(AcidGenerics,sampleData)
 65 | importFrom(AcidGenomes,emptyRanges)
 66 | importFrom(AcidGenomes,makeGRangesFromEnsembl)
 67 | importFrom(AcidGenomes,makeGRangesFromGff)
 68 | importFrom(AcidMarkdown,markdownPlots)
 69 | importFrom(AcidPlots,"!!!")
 70 | importFrom(AcidPlots,.data)
 71 | importFrom(AcidPlots,acid_geom_abline)
 72 | importFrom(AcidPlots,acid_geom_label)
 73 | importFrom(AcidPlots,acid_geom_label_average)
 74 | importFrom(AcidPlots,acid_geom_label_repel)
 75 | importFrom(AcidPlots,acid_scale_color_discrete)
 76 | importFrom(AcidPlots,acid_scale_fill_discrete)
 77 | importFrom(AcidPlots,syms)
 78 | importFrom(AcidSingleCell,makeSingleCellExperiment)
 79 | importFrom(AcidSingleCell,mapCellsToSamples)
 80 | importFrom(BiocGenerics,counts)
 81 | importFrom(BiocGenerics,updateObject)
 82 | importFrom(IRanges,DataFrameList)
 83 | importFrom(S4Vectors,"mcols<-")
 84 | importFrom(S4Vectors,"metadata<-")
 85 | importFrom(S4Vectors,DataFrame)
 86 | importFrom(S4Vectors,SimpleList)
 87 | importFrom(S4Vectors,cbind)
 88 | importFrom(S4Vectors,do.call)
 89 | importFrom(S4Vectors,droplevels)
 90 | importFrom(S4Vectors,lapply)
 91 | importFrom(S4Vectors,mcols)
 92 | importFrom(S4Vectors,metadata)
 93 | importFrom(SummarizedExperiment,"assays<-")
 94 | importFrom(SummarizedExperiment,"colData<-")
 95 | importFrom(SummarizedExperiment,"rowData<-")
 96 | importFrom(SummarizedExperiment,"rowRanges<-")
 97 | importFrom(SummarizedExperiment,assay)
 98 | importFrom(SummarizedExperiment,assayNames)
 99 | importFrom(SummarizedExperiment,assays)
100 | importFrom(SummarizedExperiment,colData)
101 | importFrom(SummarizedExperiment,rowData)
102 | importFrom(SummarizedExperiment,rowRanges)
103 | importFrom(bcbioBase,getBarcodeCutoffFromCommands)
104 | importFrom(bcbioBase,getGtfFileFromYaml)
105 | importFrom(bcbioBase,getLevelFromCommands)
106 | importFrom(bcbioBase,getSampleDataFromYaml)
107 | importFrom(bcbioBase,getUmiTypeFromCommands)
108 | importFrom(bcbioBase,importDataVersions)
109 | importFrom(bcbioBase,importProgramVersions)
110 | importFrom(bcbioBase,projectDir)
111 | importFrom(bcbioBase,runDate)
112 | importFrom(bcbioBase,sampleDirs)
113 | importFrom(ggplot2,aes)
114 | importFrom(ggplot2,facet_wrap)
115 | importFrom(ggplot2,geom_boxplot)
116 | importFrom(ggplot2,geom_histogram)
117 | importFrom(ggplot2,geom_step)
118 | importFrom(ggplot2,geom_violin)
119 | importFrom(ggplot2,ggplot)
120 | importFrom(ggplot2,labs)
121 | importFrom(ggplot2,scale_x_continuous)
122 | importFrom(ggplot2,scale_y_continuous)
123 | importFrom(ggplot2,stat_ecdf)
124 | importFrom(ggplot2,vars)
125 | importFrom(ggridges,geom_density_ridges)
126 | importFrom(goalie,allAreDirectories)
127 | importFrom(goalie,allAreFiles)
128 | importFrom(goalie,areDisjointSets)
129 | importFrom(goalie,areSetEqual)
130 | importFrom(goalie,assert)
131 | importFrom(goalie,hasLength)
132 | importFrom(goalie,hasNames)
133 | importFrom(goalie,hasRownames)
134 | importFrom(goalie,hasValidDimnames)
135 | importFrom(goalie,isADirectory)
136 | importFrom(goalie,isAFile)
137 | importFrom(goalie,isAUrl)
138 | importFrom(goalie,isAny)
139 | importFrom(goalie,isCharacter)
140 | importFrom(goalie,isDirectory)
141 | importFrom(goalie,isFile)
142 | importFrom(goalie,isFlag)
143 | importFrom(goalie,isInt)
144 | importFrom(goalie,isString)
145 | importFrom(goalie,isSubset)
146 | importFrom(goalie,requireNamespaces)
147 | importFrom(goalie,validate)
148 | importFrom(goalie,validateClasses)
149 | importFrom(methods,"as<-")
150 | importFrom(methods,"slot<-")
151 | importFrom(methods,.hasSlot)
152 | importFrom(methods,as)
153 | importFrom(methods,coerce)
154 | importFrom(methods,is)
155 | importFrom(methods,new)
156 | importFrom(methods,setClass)
157 | importFrom(methods,show)
158 | importFrom(methods,slot)
159 | importFrom(methods,validObject)
160 | importFrom(parallel,mcMap)
161 | importFrom(parallel,mclapply)
162 | importFrom(pipette,import)
163 | importFrom(utils,capture.output)
164 | importFrom(utils,packageName)
165 | importFrom(utils,packageVersion)
166 | importMethodsFrom(AcidExperiment,"interestingGroups<-")
167 | importMethodsFrom(AcidExperiment,calculateMetrics)
168 | importMethodsFrom(AcidExperiment,interestingGroups)
169 | importMethodsFrom(AcidExperiment,metrics)
170 | importMethodsFrom(AcidExperiment,sampleData)
171 | importMethodsFrom(AcidExperiment,sampleNames)
172 | importMethodsFrom(AcidPlyr,leftJoin)
173 | importMethodsFrom(AcidSingleCell,sampleData)
174 | importMethodsFrom(pipette,import)
175 | importMethodsFrom(syntactic,camelCase)
176 | importMethodsFrom(syntactic,makeDimnames)
177 | importMethodsFrom(syntactic,makeLabel)
178 | importMethodsFrom(syntactic,makeNames)
179 | 


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/quality-control/skeleton/skeleton.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | ## Updated 2023-10-05.
  3 | params:
  4 |   title: "Quality control and filtering"
  5 | 
  6 |   ## bcbioSingleCell object.
  7 |   bcb_file: !r file.path("data", "bcb.rds")
  8 | 
  9 |   ## Counts: Expected number of UMI-disambiguated counts per cell.
 10 |   min_counts: 1000
 11 |   max_counts: !r Inf
 12 | 
 13 |   ## Features: Expected number of features (i.e. genes) per cell.
 14 |   min_features: 500
 15 |   max_features: !r Inf
 16 | 
 17 |   ## Novelty score: log10 features per count.
 18 |   min_novelty: 0.85
 19 | 
 20 |   ## Mitochondrial abundance.
 21 |   ## Low quality / stressed cells tend to be above 20%.
 22 |   max_mito_ratio: 0.1
 23 | 
 24 |   ## Minimum number of cells per feature.
 25 |   ## This will remove features with very low expression.
 26 |   min_cells_per_feature: 10
 27 | 
 28 |   ## Expected number of cells per sample.
 29 |   ## Not recommended by default.
 30 |   n_cells: !r Inf
 31 | 
 32 |   ## Where to save the filtered output.
 33 |   data_dir: !r file.path("data")
 34 | 
 35 |   ## Where to export results of the analysis.
 36 |   output_dir: !r file.path("results", Sys.Date(), "quality-control")
 37 | 
 38 | title: "`r params[['title']]`"
 39 | author: "`r getOption('author')`"
 40 | date: "`r Sys.Date()`"
 41 | 
 42 | ## This file is generated by the `prepareTemplate()` step in setup chunk.
 43 | bibliography: "bibliography.bib"
 44 | ---
 45 | 
 46 | ```{r setup, cache=FALSE, message=FALSE}
 47 | ## nolint start
 48 | suppressPackageStartupMessages({
 49 |     library(goalie)
 50 |     library(basejump)
 51 |     library(ggplot2)
 52 |     library(bcbioSingleCell)
 53 | })
 54 | prepareTemplate()
 55 | source("_setup.R")
 56 | ## nolint end
 57 | ```
 58 | 
 59 | ```{r header, child="_header.Rmd"}
 60 | ```
 61 | 
 62 | # Load `bcbioSingleCell` object
 63 | 
 64 | ```{r load-object}
 65 | object <- import(params[["bcb_file"]])
 66 | assert(
 67 |     is(object, "bcbioSingleCell"),
 68 |     validObject(object)
 69 | )
 70 | print(object)
 71 | ```
 72 | 
 73 | [bcbio][] run data was imported from:  
 74 | **`r metadata(object)[["uploadDir"]]`**.
 75 | 
 76 | # Sample metadata
 77 | 
 78 | ```{r sample-data}
 79 | sampleData(object)
 80 | ```
 81 | 
 82 | # Reads per cell {.tabset}
 83 | 
 84 | These are counts of how many reads are assigned to a given cellular barcode. It
 85 | is normal for single cell RNA-seq data to contain a large number of low
 86 | complexity barcodes. The bcbio pipeline filters out most of these barcodes, and
 87 | here we have applied a threshold cutoff of a minimum of
 88 | `r metadata(object)[["cellularBarcodeCutoff"]]` reads per cell. The unfiltered
 89 | read count distributions are shown here.
 90 | 
 91 | ## Histogram
 92 | 
 93 | For high quality data, the proportional histogram should contain a single large
 94 | peak that represents cells that were encapsulated. If we see a strong shoulder,
 95 | or a bimodal distribution of the cells, that can indicate a couple problems. It
 96 | might be that there is free floating RNA, which happens when cells are dying.
 97 | It could also be that there are a set of cells that failed for some reason.
 98 | Finally, it could also be that there are biologically different types of cells,
 99 | and one type is much smaller than the other. If this is the case we would
100 | expect to see less RNA being sequenced from the smaller cells.
101 | 
102 | ```{r plot-reads-per-cell-histogram}
103 | plotReadsPerCell(
104 |     object = object,
105 |     geom = "histogram",
106 |     interestingGroups = "sampleName"
107 | )
108 | ```
109 | 
110 | ## ECDF
111 | 
112 | An empirical distribution function (ECDF) plot will show the frequency
113 | distribution of the reads per cell. You can see that the vast majority of low
114 | complexity barcodes plateau at a read depth below 1000 reads per cell.
115 | 
116 | ```{r plot-reads-per-cell-ecdf}
117 | plotReadsPerCell(object = object, geom = "ecdf")
118 | ```
119 | 
120 | # UMI counts per cell {.tabset}
121 | 
122 | Now let's assess the distribution of unique molecular identifier
123 | (UMI)-deconvoluted counts per cell. In general, the distributions should be
124 | relatively uniform per sample. Here we are also including violin and ridgeline
125 | plots, with the average number of genes per cell labeled.
126 | 
127 | ```{r plot-counts-per-cell-prefilter}
128 | markdownHeader("Violin", level = 2L)
129 | plotCountsPerCell(
130 |     object = object,
131 |     geom = "violin",
132 |     min = params[["min_counts"]],
133 |     max = params[["max_counts"]]
134 | )
135 | 
136 | markdownHeader("Ridgeline", level = 2L)
137 | plotCountsPerCell(
138 |     object = object,
139 |     geom = "ridgeline",
140 |     min = params[["min_counts"]],
141 |     max = params[["max_counts"]]
142 | )
143 | 
144 | markdownHeader("Histogram", level = 2L)
145 | plotCountsPerCell(
146 |     object = object,
147 |     geom = "histogram",
148 |     min = params[["min_counts"]],
149 |     max = params[["max_counts"]]
150 | )
151 | 
152 | markdownHeader("ECDF", level = 2L)
153 | plotCountsPerCell(
154 |     object = object,
155 |     geom = "ecdf",
156 |     interestingGroups = "sampleName",
157 |     min = params[["min_counts"]],
158 |     max = params[["max_counts"]]
159 | )
160 | ```
161 | 
162 | # Filter cells by UMI count
163 | 
164 | Let's apply this step first and then proceed to evaluating gene detection,
165 | mitocondrial transcript abundance, and novelty scores.
166 | 
167 | ```{r filter-cells-by-count}
168 | object <- filterCells(
169 |     object = object,
170 |     minCounts = params[["min_counts"]],
171 |     maxCounts = params[["max_counts"]]
172 | )
173 | ```
174 | 
175 | Let's take a look at the UMI per cell distributions after this filtering step.
176 | Note that we haven't applied very strict filtering here -- we're going to cut
177 | off the "low quality" cells based on the gene detection rate, novelty score,
178 | and mitochondrial abundance.
179 | 
180 | ```{r plot-counts-per-cell-postfilter}
181 | plotCountsPerCell(
182 |     object = object,
183 |     geom = "histogram",
184 |     min = params[["min_counts"]],
185 |     max = params[["max_counts"]]
186 | )
187 | ```
188 | 
189 | # Genes detected per cell {.tabset}
190 | 
191 | Here by "detected", we mean genes with a non-zero count measurement per cell.
192 | Seeing gene detection in the range of `500`-`5000` is normal for most
193 | single-cell experiments.
194 | 
195 | ```{r plot-features-per-cell}
196 | markdownHeader("Violin", level = 2L)
197 | plotFeaturesPerCell(
198 |     object = object,
199 |     geom = "violin",
200 |     min = min(params[["min_features"]]),
201 |     max = max(params[["max_features"]])
202 | )
203 | 
204 | markdownHeader("Ridgeline", level = 2L)
205 | plotFeaturesPerCell(
206 |     object = object,
207 |     geom = "ridgeline",
208 |     min = min(params[["min_features"]]),
209 |     max = max(params[["max_features"]])
210 | )
211 | 
212 | markdownHeader("Histogram", level = 2L)
213 | plotFeaturesPerCell(
214 |     object = object,
215 |     geom = "histogram",
216 |     min = min(params[["min_features"]]),
217 |     max = max(params[["max_features"]])
218 | )
219 | 
220 | markdownHeader("ECDF", level = 2L)
221 | plotFeaturesPerCell(
222 |     object = object,
223 |     geom = "ecdf",
224 |     min = min(params[["min_features"]]),
225 |     max = max(params[["max_features"]])
226 | )
227 | ```
228 | 
229 | # UMIs vs. features detected
230 | 
231 | If we graph out the total number of UMI counts per cell vs. the genes detected
232 | per cell, we can assess whether there is a large population of low quality
233 | cells with low counts and/or gene detection.
234 | 
235 | ```{r plot-counts-vs-features}
236 | plotCountsVsFeatures(object)
237 | ```
238 | 
239 | # Novelty score {.tabset}
240 | 
241 | Another way to QC the data is to look for less novelty, that is cells that have
242 | less genes detected per count than other cells. We can see the samples where we
243 | sequenced each cell less have a higher overall novelty, that is because we have
244 | not started saturated the sequencing for any given gene for these samples.
245 | Outlier cells in these samples might be cells that we have a less complex RNA
246 | species than other cells. Sometimes we can detect contamination with low
247 | complexity cell types like red blood cells via this metric.
248 | 
249 | ```{r plot-novelty}
250 | markdownHeader("Violin", level = 2L)
251 | plotNovelty(
252 |     object = object,
253 |     geom = "violin",
254 |     min = min(params[["min_novelty"]])
255 | )
256 | 
257 | markdownHeader("Ridgeline", level = 2L)
258 | plotNovelty(
259 |     object = object,
260 |     geom = "ridgeline",
261 |     min = min(params[["min_novelty"]])
262 | )
263 | 
264 | markdownHeader("Histogram", level = 2L)
265 | plotNovelty(
266 |     object = object,
267 |     geom = "histogram",
268 |     min = min(params[["min_novelty"]])
269 | )
270 | 
271 | markdownHeader("ECDF", level = 2L)
272 | plotNovelty(
273 |     object = object,
274 |     geom = "ecdf",
275 |     min = min(params[["min_novelty"]])
276 | )
277 | ```
278 | 
279 | # Mitochondrial abundance {.tabset}
280 | 
281 | We evaluate overall mitochondrial gene expression as a biomarker of cellular
282 | stress during sample preparation.
283 | 
284 | ```{r plot-mito-ratio}
285 | markdownHeader("Violin", level = 2L)
286 | plotMitoRatio(
287 |     object = object,
288 |     geom = "violin",
289 |     max = max(params[["max_mito_ratio"]])
290 | )
291 | 
292 | markdownHeader("Ridgeline", level = 2L)
293 | plotMitoRatio(
294 |     object = object,
295 |     geom = "ridgeline",
296 |     max = max(params[["max_mito_ratio"]])
297 | )
298 | 
299 | markdownHeader("Histogram", level = 2L)
300 | plotMitoRatio(
301 |     object = object,
302 |     geom = "histogram",
303 |     max = max(params[["max_mito_ratio"]])
304 | )
305 | 
306 | markdownHeader("ECDF", level = 2L)
307 | plotMitoRatio(
308 |     object = object,
309 |     geom = "ecdf",
310 |     max = max(params[["max_mito_ratio"]])
311 | )
312 | ```
313 | 
314 | # Filter cells
315 | 
316 | ```{r filter-cells}
317 | object <- filterCells(
318 |     object = object,
319 |     minCounts = params[["min_counts"]],
320 |     maxCounts = params[["max_counts"]],
321 |     minFeatures = params[["min_features"]],
322 |     maxFeatures = params[["max_features"]],
323 |     maxMitoRatio = params[["max_mito_ratio"]],
324 |     minNovelty = params[["min_novelty"]],
325 |     nCells = params[["n_cells"]],
326 |     minCellsPerFeature = params[["min_cells_per_feature"]]
327 | )
328 | ```
329 | 
330 | ```{r plot-filtered-qc}
331 | plotQc(object, geom = "violin")
332 | ```
333 | 
334 | # Save filtered data
335 | 
336 | ```{r save-filtered}
337 | name <- basenameSansExt(params[["bcb_file"]])
338 | assignAndSaveData(
339 |     name = paste(name, "filtered", sep = "_"),
340 |     object = object,
341 |     dir = params[["data_dir"]]
342 | )
343 | ```
344 | 
345 | ```{r export}
346 | export(
347 |     object = object,
348 |     con = params[["output_dir"]],
349 |     compress = TRUE
350 | )
351 | ```
352 | 
353 | ```{r footer, child="_footer.Rmd"}
354 | ```
355 | 
356 | ```{r links, child="_links.Rmd"}
357 | ```
358 | 


--------------------------------------------------------------------------------
/R/updateObject-methods.R:
--------------------------------------------------------------------------------
  1 | #' Update object
  2 | #'
  3 | #' @name updateObject
  4 | #' @author Michael Steinbaugh
  5 | #' @note Updated 2023-12-04.
  6 | #'
  7 | #' @inheritParams AcidRoxygen::params
  8 | #'
  9 | #' @return Modified object.
 10 | #'
 11 | #' @examples
 12 | #' data(bcb)
 13 | #'
 14 | #' ## bcbioSingleCell ====
 15 | #' updateObject(bcb)
 16 | #'
 17 | #' ## Example that depends on remote file.
 18 | #' ## > x <- import(
 19 | #' ## >     con = file.path(
 20 | #' ## >         bcbioSingleCellTestsUrl,
 21 | #' ## >         "bcbioSingleCell_0.1.0.rds"
 22 | #' ## >     )
 23 | #' ## > )
 24 | #' ## > x <- updateObject(x)
 25 | #' ## > x
 26 | NULL
 27 | 
 28 | 
 29 | 
 30 | ## Updated 2022-05-09.
 31 | `updateObject,bcbioSingleCell` <- # nolint
 32 |     function(object, ..., verbose = FALSE) {
 33 |         assert(isFlag(verbose))
 34 |         if (isTRUE(verbose)) {
 35 |             h1("Update object")
 36 |         }
 37 |         sce <- as(object, "SingleCellExperiment")
 38 |         cells <- colnames(sce)
 39 |         assays <- assays(sce)
 40 |         rowRanges <- rowRanges(sce)
 41 |         colData <- colData(sce)
 42 |         metadata <- metadata(sce)
 43 |         version <- metadata[["version"]]
 44 |         assert(is(version, "package_version"))
 45 |         if (isTRUE(verbose)) {
 46 |             alert(sprintf(
 47 |                 fmt = "Upgrading {.var %s} from version %s to %s.",
 48 |                 "bcbioSingleCell",
 49 |                 as.character(version),
 50 |                 as.character(.pkgVersion)
 51 |             ))
 52 |         }
 53 |         ## Assays --------------------------------------------------------------
 54 |         if (isTRUE(verbose)) {
 55 |             h2("Assays")
 56 |         }
 57 |         ## Ensure raw counts are always named "counts".
 58 |         if (isSubset("assay", names(assays))) {
 59 |             ## Versions < 0.1 (e.g. 0.0.21).
 60 |             if (isTRUE(verbose)) {
 61 |                 alert(sprintf(
 62 |                     "Renaming {.var %s} to {.var %s}.",
 63 |                     "assay", "counts"
 64 |                 ))
 65 |             }
 66 |             names(assays)[names(assays) == "assay"] <- "counts"
 67 |         } else if (isSubset("raw", names(assays))) {
 68 |             if (isTRUE(verbose)) {
 69 |                 alert(sprintf(
 70 |                     "Renaming {.var %s} assay to {.var %s}.",
 71 |                     "raw", "counts"
 72 |                 ))
 73 |             }
 74 |             names(assays)[names(assays) == "raw"] <- "counts"
 75 |         }
 76 |         assays <- Filter(Negate(is.null), assays)
 77 |         ## Put the required assays first, in order.
 78 |         assays <- assays[unique(c(.requiredAssays, names(assays)))]
 79 |         assert(isSubset(.requiredAssays, names(assays)))
 80 |         ## Row data ------------------------------------------------------------
 81 |         if (hasNames(mcols(rowRanges))) {
 82 |             mcols(rowRanges) <-
 83 |                 camelCase(mcols(rowRanges), strict = TRUE)
 84 |         }
 85 |         ## Column data ---------------------------------------------------------
 86 |         if (isTRUE(verbose)) {
 87 |             h2("Column data")
 88 |         }
 89 |         colnames(colData) <- camelCase(colnames(colData), strict = TRUE)
 90 |         if (isSubset(c("nCount", "nUmi"), colnames(colData))) {
 91 |             if (isTRUE(verbose)) {
 92 |                 alert(sprintf(
 93 |                     "Renaming {.var %s} to {.var %s}.",
 94 |                     "nCount", "nRead"
 95 |                 ))
 96 |             }
 97 |             colnames(colData)[colnames(colData) == "nCount"] <- "nRead"
 98 |             if (isTRUE(verbose)) {
 99 |                 alert(sprintf(
100 |                     "Renaming {.var %s} to {.var %s}.",
101 |                     "nUmi", "nCount"
102 |                 ))
103 |             }
104 |             colnames(colData)[colnames(colData) == "nUmi"] <- "nCount"
105 |         }
106 |         if (isSubset("nGene", colnames(colData))) {
107 |             if (isTRUE(verbose)) {
108 |                 alert(sprintf(
109 |                     "Renaming {.var %s} to {.var %s}.",
110 |                     "nGene", "nFeature"
111 |                 ))
112 |             }
113 |             colnames(colData)[colnames(colData) == "nGene"] <- "nFeature"
114 |             if (isTRUE(verbose)) {
115 |                 alert(sprintf(
116 |                     "Renaming {.var %s} to {.var %s}.",
117 |                     "log10GenesPerUmi", "log10FeaturesPerCount"
118 |                 ))
119 |             }
120 |             colnames(colData)[colnames(colData) == "log10GenesPerUmi"] <-
121 |                 "log10FeaturesPerCount"
122 |         }
123 |         ## Move sampleData into colData.
124 |         if (isSubset("sampleData", names(metadata))) {
125 |             sampleData <- metadata[["sampleData"]]
126 |         } else if (isSubset("sampleMetadata", names(metadata))) {
127 |             sampleData <- metadata[["sampleMetadata"]]
128 |         } else {
129 |             sampleData <- NULL
130 |         }
131 |         if (!is.null(sampleData)) {
132 |             colnames(sampleData) <-
133 |                 camelCase(colnames(sampleData), strict = TRUE)
134 |             if (isTRUE(verbose)) {
135 |                 alert(sprintf(
136 |                     "Moving {.var %s} from {.fun %s} into {.fun %s}.",
137 |                     "sampleData", "metadata", "colData"
138 |                 ))
139 |             }
140 |             assert(isSubset("sampleId", colnames(sampleData)))
141 |             sampleData <- as(sampleData, "DataFrame")
142 |             colData <- colData[
143 |                 ,
144 |                 setdiff(colnames(colData), colnames(sampleData)),
145 |                 drop = FALSE
146 |             ]
147 |             if (isTRUE(verbose)) {
148 |                 alert("Mapping cells to samples.")
149 |             }
150 |             c2s <- mapCellsToSamples(
151 |                 cells = cells,
152 |                 samples = as.character(sampleData[["sampleId"]])
153 |             )
154 |             assert(is.factor(c2s))
155 |             colData[["sampleId"]] <- c2s
156 |             sampleData[["sampleId"]] <- as.factor(rownames(sampleData))
157 |             colData <- leftJoin(x = colData, y = sampleData, by = "sampleId")
158 |             assert(
159 |                 is(colData, "DataFrame"),
160 |                 identical(rownames(colData), colnames(object))
161 |             )
162 |             ## Ensure rows are ordered to match the object.
163 |             colData <- colData[cells, , drop = FALSE]
164 |         }
165 |         ## Metadata ------------------------------------------------------------
166 |         if (isTRUE(verbose)) {
167 |             h2("Metadata")
168 |         }
169 |         ## dataVersions.
170 |         dataVersions <- metadata[["dataVersions"]]
171 |         if (is(dataVersions, "data.frame")) {
172 |             if (isTRUE(verbose)) {
173 |                 alert(sprintf(
174 |                     "Setting {.var %s} as {.cls %s}.",
175 |                     "dataVersions", "DataFrame"
176 |                 ))
177 |             }
178 |             metadata[["dataVersions"]] <- as(dataVersions, "DataFrame")
179 |         }
180 |         ## ensemblRelease.
181 |         if (isSubset("ensemblVersion", names(metadata))) {
182 |             if (isTRUE(verbose)) {
183 |                 alert(sprintf(
184 |                     "Renaming {.var %s} to {.var %s}.",
185 |                     "ensemblVersion", "ensemblRelease"
186 |                 ))
187 |             }
188 |             names(metadata)[
189 |                 names(metadata) == "ensemblVersion"
190 |             ] <- "ensemblRelease"
191 |         }
192 |         if (
193 |             is.numeric(metadata[["ensemblRelease"]]) &&
194 |                 !is.integer(metadata[["ensemblRelease"]])
195 |         ) {
196 |             if (isTRUE(verbose)) {
197 |                 alert(sprintf(
198 |                     "Setting {.var %s} as integer.",
199 |                     "ensemblRelease"
200 |                 ))
201 |             }
202 |             metadata[["ensemblRelease"]] <-
203 |                 as.integer(metadata[["ensemblRelease"]])
204 |         }
205 |         ## Update the version, if necessary.
206 |         if (!identical(metadata[["version"]], .pkgVersion)) {
207 |             metadata[["originalVersion"]] <- metadata[["version"]]
208 |             metadata[["version"]] <- .pkgVersion
209 |         }
210 |         ## gffFile.
211 |         if (isSubset("gtfFile", names(metadata))) {
212 |             if (isTRUE(verbose)) {
213 |                 alert(sprintf(
214 |                     "Renaming {.var %s} to {.var %s}.",
215 |                     "gtfFile", "gffFile"
216 |                 ))
217 |             }
218 |             names(metadata)[names(metadata) == "gtfFile"] <- "gffFile"
219 |         }
220 |         if (!isSubset("gffFile", names(metadata))) {
221 |             if (isTRUE(verbose)) {
222 |                 alert(sprintf(
223 |                     "Setting {.var %s} as {.val %s}.",
224 |                     "gffFile", "empty character"
225 |                 ))
226 |             }
227 |             metadata[["gffFile"]] <- character()
228 |         }
229 |         ## lanes.
230 |         if (!is.integer(metadata[["lanes"]])) {
231 |             if (isTRUE(verbose)) {
232 |                 alert(sprintf(
233 |                     "Setting {.var %s} as {.val %s}.",
234 |                     "lanes", "integer"
235 |                 ))
236 |             }
237 |             metadata[["lanes"]] <- as.integer(metadata[["lanes"]])
238 |         }
239 |         ## level.
240 |         if (!isSubset("level", names(metadata))) {
241 |             if (isTRUE(verbose)) {
242 |                 alert(sprintf(
243 |                     "Setting {.var %s} as {.val %s}.",
244 |                     "level", "genes"
245 |                 ))
246 |             }
247 |             metadata[["level"]] <- "genes"
248 |         }
249 |         ## programVersions.
250 |         if (
251 |             !isSubset("programVersions", names(metadata)) &&
252 |                 isSubset("programs", names(metadata))
253 |         ) {
254 |             if (isTRUE(verbose)) {
255 |                 alert(sprintf(
256 |                     "Renaming {.var %s} to {.var %s}.",
257 |                     "programs", "programVersions"
258 |                 ))
259 |             }
260 |             names(metadata)[names(metadata) == "programs"] <- "programVersions"
261 |         }
262 |         programVersions <- metadata[["programVersions"]]
263 |         if (is(programVersions, "data.frame")) {
264 |             metadata[["programVersions"]] <- as(programVersions, "DataFrame")
265 |         }
266 |         ## sampleMetadataFile.
267 |         if (!is.character(metadata[["sampleMetadataFile"]])) {
268 |             if (isTRUE(verbose)) {
269 |                 alert(sprintf(
270 |                     "Setting {.var %s} as {.val %s}.",
271 |                     "sampleMetadataFile", "empty character"
272 |                 ))
273 |             }
274 |             metadata[["sampleMetadataFile"]] <- character()
275 |         }
276 |         ## sessionInfo.
277 |         if (isSubset("utilsSessionInfo", names(metadata))) {
278 |             if (isTRUE(verbose)) {
279 |                 alert(sprintf("Simplifying stashed {.var %s}.", "sessionInfo"))
280 |             }
281 |             names(metadata)[
282 |                 names(metadata) == "utilsSessionInfo"
283 |             ] <- "sessionInfo"
284 |             metadata[["devtoolsSessionInfo"]] <- NULL
285 |         }
286 |         ## Drop legacy slots.
287 |         keep <- setdiff(
288 |             x = names(metadata),
289 |             y = c("cellToSample", "sampleData", "sampleMetadata")
290 |         )
291 |         metadata <- metadata[keep]
292 |         ## Return --------------------------------------------------------------
293 |         assays(sce) <- assays
294 |         rowRanges(sce) <- rowRanges
295 |         colData(sce) <- colData
296 |         metadata(sce) <- metadata
297 |         bcb <- new(Class = "bcbioSingleCell", sce)
298 |         validObject(bcb)
299 |         if (isTRUE(verbose)) {
300 |             alertSuccess(sprintf(
301 |                 "Update of {.var %s} object was successful.",
302 |                 "bcbioSingleCell"
303 |             ))
304 |         }
305 |         bcb
306 |     }
307 | 
308 | 
309 | 
310 | #' @rdname updateObject
311 | #' @export
312 | setMethod(
313 |     f = "updateObject",
314 |     signature = signature(object = "bcbioSingleCell"),
315 |     definition = `updateObject,bcbioSingleCell`
316 | )
317 | 


--------------------------------------------------------------------------------
/R/AllGenerators.R:
--------------------------------------------------------------------------------
  1 | #' @inherit bcbioSingleCell-class title description
  2 | #' @author Michael Steinbaugh
  3 | #' @note Updated 2023-09-21.
  4 | #' @export
  5 | #'
  6 | #' @inheritParams AcidSingleCell::makeSingleCellExperiment
  7 | #' @inheritParams AcidRoxygen::params
  8 | #'
  9 | #' @section Remote data:
 10 | #'
 11 | #' When working in RStudio, we recommend connecting to the bcbio-nextgen run
 12 | #' directory as a remote connection over
 13 | #' [sshfs](https://github.com/osxfuse/osxfuse/wiki/SSHFS).
 14 | #'
 15 | #' @return `bcbioSingleCell`.
 16 | #'
 17 | #' @seealso
 18 | #' - `SingleCellExperiment::SingleCellExperiment()`.
 19 | #' - `.S4methods(class = "bcbioSingleCell")`.
 20 | #'
 21 | #' @examples
 22 | #' uploadDir <- system.file("extdata/indrops", package = "bcbioSingleCell")
 23 | #'
 24 | #' x <- bcbioSingleCell(uploadDir)
 25 | #' print(x)
 26 | #'
 27 | #' x <- bcbioSingleCell(
 28 | #'     uploadDir = uploadDir,
 29 | #'     sampleMetadataFile = file.path(uploadDir, "metadata.csv")
 30 | #' )
 31 | #' print(x)
 32 | bcbioSingleCell <-
 33 |     function(uploadDir,
 34 |              sampleMetadataFile = NULL,
 35 |              organism = NULL,
 36 |              ensemblRelease = NULL,
 37 |              genomeBuild = NULL,
 38 |              gffFile = NULL,
 39 |              transgeneNames = NULL,
 40 |              interestingGroups = "sampleName") {
 41 |         assert(
 42 |             isADirectory(uploadDir),
 43 |             isString(sampleMetadataFile, nullOk = TRUE),
 44 |             isString(organism, nullOk = TRUE),
 45 |             isInt(ensemblRelease, nullOk = TRUE),
 46 |             isString(genomeBuild, nullOk = TRUE),
 47 |             isString(gffFile, nullOk = TRUE),
 48 |             isCharacter(transgeneNames, nullOk = TRUE),
 49 |             isCharacter(interestingGroups)
 50 |         )
 51 |         if (isString(gffFile)) {
 52 |             isAFile(gffFile) || isAUrl(gffFile)
 53 |         }
 54 |         h1("bcbioSingleCell")
 55 |         alert("Importing bcbio-nextgen single-cell RNA-seq run")
 56 |         sampleData <- NULL
 57 |         ## Run info ------------------------------------------------------------
 58 |         uploadDir <- realpath(uploadDir)
 59 |         projectDir <- projectDir(uploadDir)
 60 |         sampleDirs <- sampleDirs(uploadDir)
 61 |         lanes <- detectLanes(sampleDirs)
 62 |         yaml <- import(file.path(projectDir, "project-summary.yaml"))
 63 |         dataVersions <-
 64 |             importDataVersions(file.path(projectDir, "data_versions.csv"))
 65 |         assert(is(dataVersions, "DataFrame"))
 66 |         programVersions <-
 67 |             importProgramVersions(file.path(projectDir, "programs.txt"))
 68 |         assert(is(dataVersions, "DataFrame"))
 69 |         log <- import(file.path(projectDir, "bcbio-nextgen.log"))
 70 |         ## This step enables our minimal dataset to pass checks.
 71 |         tryCatch(
 72 |             expr = assert(isCharacter(log)),
 73 |             error = function(e) {
 74 |                 alertWarning(sprintf(
 75 |                     "{.file %s} file is empty.",
 76 |                     "bcbio-nextgen.log"
 77 |                 ))
 78 |             }
 79 |         )
 80 |         commandsLog <-
 81 |             import(file.path(projectDir, "bcbio-nextgen-commands.log"))
 82 |         ## This step enables our minimal dataset to pass checks.
 83 |         tryCatch(
 84 |             expr = assert(isCharacter(commandsLog)),
 85 |             error = function(e) {
 86 |                 alertWarning(
 87 |                     "{.file bcbio-nextgen-commands.log} file is empty."
 88 |                 )
 89 |             }
 90 |         )
 91 |         cutoff <- getBarcodeCutoffFromCommands(commandsLog)
 92 |         level <- getLevelFromCommands(commandsLog)
 93 |         umiType <- getUmiTypeFromCommands(commandsLog)
 94 |         ## Check to see if we're dealing with a multiplexed platform.
 95 |         multiplexed <- any(vapply(
 96 |             X = c("dropseq", "indrop"),
 97 |             FUN = function(pattern) {
 98 |                 grepl(pattern = pattern, x = umiType)
 99 |             },
100 |             FUN.VALUE = logical(1L)
101 |         ))
102 |         ## Sample metadata -----------------------------------------------------
103 |         h2("Sample metadata")
104 |         allSamples <- TRUE
105 |         sampleData <- NULL
106 |         if (isString(sampleMetadataFile)) {
107 |             sampleData <- importSampleData(
108 |                 file = sampleMetadataFile,
109 |                 lanes = lanes,
110 |                 pipeline = "bcbio"
111 |             )
112 |             ## Error on incorrect reverse complement input.
113 |             if (isSubset("sequence", colnames(sampleData))) {
114 |                 sampleDirSequence <- strMatch(
115 |                     x = names(sampleDirs),
116 |                     pattern = "^.+_([ACGT]+)$"
117 |                 )[, 2L]
118 |                 assert(
119 |                     !identical(
120 |                         sort(sampleDirSequence),
121 |                         sort(as.character(sampleData[["sequence"]]))
122 |                     ),
123 |                     msg = paste(
124 |                         "It appears that the reverse complement sequence of",
125 |                         "the i5 index barcodes were input into the sample",
126 |                         "metadata 'sequence' column. bcbio outputs the revcomp",
127 |                         "into the sample directories, but the forward sequence",
128 |                         "should be used in the R package."
129 |                     )
130 |                 )
131 |             }
132 |             ## Allow sample selection by with this file.
133 |             if (nrow(sampleData) < length(sampleDirs)) {
134 |                 sampleDirs <- sampleDirs[rownames(sampleData)]
135 |                 alert(sprintf(
136 |                     fmt = "Loading a subset of samples: %s.",
137 |                     toInlineString(basename(sampleDirs), n = 5L)
138 |                 ))
139 |                 allSamples <- FALSE
140 |             }
141 |         }
142 |         ## Assays (counts) -----------------------------------------------------
143 |         h2("Counts")
144 |         ## Note that we're now allowing transcript-level counts.
145 |         counts <- .importCounts(sampleDirs = sampleDirs)
146 |         assert(hasValidDimnames(counts))
147 |         ## Row data (genes/transcripts) ----------------------------------------
148 |         h2("Feature metadata")
149 |         ## Annotation priority:
150 |         ## 1. AnnotationHub.
151 |         ## - Requires `organism` to be declared.
152 |         ## - Ensure that Ensembl release and genome build match.
153 |         ## 2. GTF/GFF file. Use the bcbio GTF if possible.
154 |         ## 3. Fall back to slotting empty ranges. This is offered as support for
155 |         ## complex datasets (e.g. multiple organisms).
156 |         if (isString(organism) && is.numeric(ensemblRelease)) {
157 |             ## AnnotationHub (ensembldb).
158 |             alert("{.fun makeGRangesFromEnsembl}")
159 |             rowRanges <- makeGRangesFromEnsembl(
160 |                 organism = organism,
161 |                 level = level,
162 |                 genomeBuild = genomeBuild,
163 |                 release = ensemblRelease
164 |             )
165 |         } else {
166 |             ## GTF/GFF file.
167 |             if (is.null(gffFile)) {
168 |                 ## Attempt to use bcbio GTF automatically.
169 |                 gffFile <- getGtfFileFromYaml(yaml)
170 |             }
171 |             if (!is.null(gffFile)) {
172 |                 alert("{.fun makeGRangesFromGff}")
173 |                 gffFile <- realpath(gffFile)
174 |                 rowRanges <- makeGRangesFromGff(file = gffFile, level = level)
175 |             } else {
176 |                 alertWarning("Slotting empty ranges into {.fun rowRanges}.")
177 |                 rowRanges <- emptyRanges(rownames(counts))
178 |             }
179 |         }
180 |         assert(is(rowRanges, "GenomicRanges"))
181 |         ## Attempt to get genome build and Ensembl release if not declared.
182 |         ## Note that these will remain NULL when using GTF file (see above).
183 |         if (is.null(genomeBuild)) {
184 |             genomeBuild <- metadata(rowRanges)[["genomeBuild"]]
185 |         }
186 |         if (is.null(ensemblRelease)) {
187 |             ensemblRelease <- metadata(rowRanges)[["ensemblRelease"]]
188 |         }
189 |         ## Column data ---------------------------------------------------------
190 |         h2("Column data")
191 |         colData <- DataFrame(row.names = colnames(counts))
192 |         ## Generate automatic sample metadata, if necessary.
193 |         if (is.null(sampleData)) {
194 |             if (isTRUE(multiplexed)) {
195 |                 ## Multiplexed samples without user-defined metadata.
196 |                 alertWarning(sprintf(
197 |                     fmt = paste(
198 |                         "{.var %s} is recommended for",
199 |                         "multiplexed samples (e.g. {.val %s})."
200 |                     ),
201 |                     "sampleMetadataFile", umiType
202 |                 ))
203 |                 sampleData <- minimalSampleData(basename(sampleDirs))
204 |             } else {
205 |                 sampleData <- getSampleDataFromYaml(yaml)
206 |             }
207 |         }
208 |         assert(isSubset(rownames(sampleData), names(sampleDirs)))
209 |         ## Join `sampleData` into cell-level `colData`.
210 |         if (identical(nrow(sampleData), 1L)) {
211 |             colData[["sampleId"]] <- as.factor(rownames(sampleData))
212 |         } else {
213 |             colData[["sampleId"]] <- mapCellsToSamples(
214 |                 cells = rownames(colData),
215 |                 samples = rownames(sampleData)
216 |             )
217 |         }
218 |         sampleData[["sampleId"]] <- as.factor(rownames(sampleData))
219 |         ## Need to ensure the `sampleId` factor levels match up, otherwise we'll
220 |         ## get a warning during the `leftJoin()` call below.
221 |         assert(areSetEqual(
222 |             x = levels(colData[["sampleId"]]),
223 |             y = levels(sampleData[["sampleId"]])
224 |         ))
225 |         levels(sampleData[["sampleId"]]) <- levels(colData[["sampleId"]])
226 |         colData <- leftJoin(colData, sampleData, by = "sampleId")
227 |         assert(
228 |             is(colData, "DataFrame"),
229 |             hasRownames(colData)
230 |         )
231 |         ## Metadata ------------------------------------------------------------
232 |         h2("Metadata")
233 |         cbList <- .importReads(sampleDirs = sampleDirs)
234 |         runDate <- runDate(projectDir)
235 |         interestingGroups <- camelCase(interestingGroups, strict = TRUE)
236 |         assert(isSubset(interestingGroups, colnames(sampleData)))
237 |         metadata <- list(
238 |             "allSamples" = allSamples,
239 |             "bcbioCommandsLog" = commandsLog,
240 |             "bcbioLog" = log,
241 |             "call" = standardizeCall(),
242 |             "cellularBarcodeCutoff" = cutoff,
243 |             "cellularBarcodes" = cbList,
244 |             "dataVersions" = dataVersions,
245 |             "ensemblRelease" = as.integer(ensemblRelease),
246 |             "genomeBuild" = as.character(genomeBuild),
247 |             "gffFile" = as.character(gffFile),
248 |             "interestingGroups" = interestingGroups,
249 |             "lanes" = lanes,
250 |             "level" = level,
251 |             "organism" = as.character(organism),
252 |             "pipeline" = "bcbio",
253 |             "programVersions" = programVersions,
254 |             "projectDir" = projectDir,
255 |             "runDate" = runDate,
256 |             "sampleDirs" = sampleDirs,
257 |             "sampleMetadataFile" = as.character(sampleMetadataFile),
258 |             "umiType" = umiType,
259 |             "uploadDir" = uploadDir,
260 |             "version" = .pkgVersion,
261 |             "yaml" = yaml
262 |         )
263 |         ## SingleCellExperiment ------------------------------------------------
264 |         object <- makeSingleCellExperiment(
265 |             assays = SimpleList("counts" = counts),
266 |             rowRanges = rowRanges,
267 |             colData = colData,
268 |             metadata = metadata,
269 |             transgeneNames = transgeneNames
270 |         )
271 |         ## Return --------------------------------------------------------------
272 |         ## Always prefilter, removing very low quality cells and/or genes.
273 |         object <- calculateMetrics(object = object, prefilter = TRUE)
274 |         ## Bind the `nRead` column into the cell metrics. These are the number
275 |         ## of raw read counts prior to UMI disambiguation that bcbio uses for
276 |         ## initial filtering (`minimum_barcode_depth`` in YAML).
277 |         colData <- colData(object)
278 |         nRead <- .nRead(cbList)
279 |         assert(
280 |             is.integer(nRead),
281 |             isSubset(rownames(colData), names(nRead)),
282 |             areDisjointSets("nRead", colnames(colData))
283 |         )
284 |         colData[["nRead"]] <- unname(nRead[rownames(colData)])
285 |         colData <- colData[, sort(colnames(colData)), drop = FALSE]
286 |         colData(object) <- colData
287 |         bcb <- new(Class = "bcbioSingleCell", object)
288 |         alertSuccess("bcbio single-cell RNA-seq run imported successfully.")
289 |         bcb
290 |     }
291 | 


--------------------------------------------------------------------------------
/R/plotReadsPerCell-methods.R:
--------------------------------------------------------------------------------
  1 | #' @name plotReadsPerCell
  2 | #' @author Michael Steinbaugh, Rory Kirchner
  3 | #' @inherit AcidGenerics::plotReadsPerCell
  4 | #' @note Updated 2023-12-04.
  5 | #'
  6 | #' @inheritParams AcidRoxygen::params
  7 | #' @param ... Additional arguments.
  8 | #'
  9 | #' @param cutoffLine `logical(1)`.
 10 | #' Include a line marking the cutoff.
 11 | #'
 12 | #' @examples
 13 | #' data(bcb)
 14 | #'
 15 | #' ## bcbioSingleCell ====
 16 | #' plotReadsPerCell(bcb, geom = "histogram")
 17 | #' plotReadsPerCell(bcb, geom = "ecdf")
 18 | NULL
 19 | 
 20 | 
 21 | 
 22 | #' Proportional cellular barcodes data
 23 | #'
 24 | #' Modified version of Allon Klein Lab MATLAB code.
 25 | #'
 26 | #' @author Michael Steinbaugh, Rory Kirchner
 27 | #' @keywords internal
 28 | #' @note Updated 2022-05-07.
 29 | #' @noRd
 30 | #'
 31 | #' @param data `DataFrame`.
 32 | #' Raw read counts per cellular barcode.
 33 | #' Return from `.rawMetrics()` function.
 34 | #'
 35 | #' @return `DataFrame`.
 36 | .proportionalReadsPerCell <-
 37 |     function(data,
 38 |              sampleData,
 39 |              breaks = 100L) {
 40 |         assert(
 41 |             requireNamespaces("graphics"),
 42 |             is(data, "DataFrame"),
 43 |             isSubset(c("nRead", "sampleId"), colnames(data)),
 44 |             is.integer(data[["nRead"]]),
 45 |             is.factor(data[["sampleId"]]),
 46 |             is(sampleData, "DataFrame"),
 47 |             isInt(breaks)
 48 |         )
 49 |         sampleData[["sampleId"]] <- as.factor(rownames(sampleData))
 50 |         samples <- levels(data[["sampleId"]])
 51 |         list <- DataFrameList(lapply(
 52 |             X = samples,
 53 |             FUN = function(sampleId) {
 54 |                 keep <- which(data[["sampleId"]] == sampleId)
 55 |                 subset <- data[keep, , drop = FALSE]
 56 |                 ## Histogram of log10-transformed counts.
 57 |                 h <- graphics::hist(
 58 |                     x = log10(subset[["nRead"]]),
 59 |                     n = breaks,
 60 |                     plot = FALSE
 61 |                 )
 62 |                 ## Klein Lab MATLAB code reference.
 63 |                 ## counts: fLog; mids: xLog
 64 |                 proportion <- h[["counts"]] *
 65 |                     (10L^h[["mids"]]) /
 66 |                     sum(h[["counts"]] * (10L^h[["mids"]]))
 67 |                 DataFrame(
 68 |                     "sampleId" = factor(sampleId),
 69 |                     "log10Read" = h[["mids"]],
 70 |                     "proportion" = proportion
 71 |                 )
 72 |             }
 73 |         ))
 74 |         out <- unlist(list, recursive = FALSE, use.names = FALSE)
 75 |         out <- leftJoin(out, sampleData, by = "sampleId")
 76 |         out
 77 |     }
 78 | 
 79 | 
 80 | 
 81 | #' Plot proportional reads per cell histogram
 82 | #'
 83 | #' @note Updated 2023-08-16.
 84 | #' @noRd
 85 | #'
 86 | #' @param data Return from `.proportionalReadsPerCell()` function.
 87 | #'
 88 | #' @return `ggplot`.
 89 | .plotReadsPerCellHistogram <-
 90 |     function(data,
 91 |              min = 0L) {
 92 |         assert(is(data, "DataFrame"))
 93 |         p <- ggplot(
 94 |             data = as.data.frame(data),
 95 |             mapping = aes(
 96 |                 x = .data[["log10Read"]],
 97 |                 y = .data[["proportion"]],
 98 |                 color = .data[["interestingGroups"]]
 99 |             )
100 |         ) +
101 |             geom_step(
102 |                 alpha = 0.75,
103 |                 linewidth = 1L
104 |             ) +
105 |             labs(
106 |                 x = "log10 reads per cell",
107 |                 y = "proportion of reads"
108 |             )
109 |         ## Cutoff line.
110 |         if (min > 0L) {
111 |             p <- p + acid_geom_abline(xintercept = log10(min))
112 |         }
113 |         ## Color palette.
114 |         p <- p + acid_scale_color_discrete()
115 |         ## Facets.
116 |         facets <- NULL
117 |         if (isSubset("aggregate", colnames(data))) {
118 |             facets <- c(facets, "aggregate")
119 |         }
120 |         if (is.character(facets)) {
121 |             p <- p + facet_wrap(
122 |                 facets = vars(!!!syms(facets)),
123 |                 scales = "free"
124 |             )
125 |         }
126 |         ## Return.
127 |         p
128 |     }
129 | 
130 | 
131 | 
132 | ## Updated 2023-08-16.
133 | .plotReadsPerCellBoxplot <-
134 |     function(data,
135 |              min = 0L) {
136 |         assert(is(data, "DataFrame"))
137 |         p <- ggplot(
138 |             data = as.data.frame(data),
139 |             mapping = aes(
140 |                 x = .data[["sampleName"]],
141 |                 y = .data[["nRead"]],
142 |                 fill = .data[["interestingGroups"]]
143 |             )
144 |         ) +
145 |             geom_boxplot(color = "black", outlier.shape = NA) +
146 |             scale_y_continuous(trans = "log10") +
147 |             acid_geom_label_average(
148 |                 data = as.data.frame(data),
149 |                 col = "nRead",
150 |                 digits = 0L
151 |             ) +
152 |             labs(
153 |                 x = NULL,
154 |                 y = "reads per cell"
155 |             )
156 |         ## Cutoff line.
157 |         if (min > 0L) {
158 |             p <- p + acid_geom_abline(yintercept = min)
159 |         }
160 |         ## Color palette.
161 |         p <- p + acid_scale_fill_discrete()
162 |         ## Facets.
163 |         facets <- NULL
164 |         if (isSubset("aggregate", colnames(data))) {
165 |             facets <- c(facets, "aggregate")
166 |         }
167 |         if (is.character(facets)) {
168 |             p <- p + facet_wrap(
169 |                 facets = vars(!!!syms(facets)),
170 |                 scales = "free"
171 |             )
172 |         }
173 |         ## Return.
174 |         p
175 |     }
176 | 
177 | 
178 | 
179 | ## Updated 2023-08-16.
180 | .plotReadsPerCellEcdf <-
181 |     function(data,
182 |              min = 0L) {
183 |         assert(is(data, "DataFrame"))
184 |         p <- ggplot(
185 |             data = as.data.frame(data),
186 |             mapping = aes(
187 |                 x = .data[["nRead"]],
188 |                 color = .data[["interestingGroups"]]
189 |             )
190 |         ) +
191 |             stat_ecdf(geom = "step", linewidth = 1L) +
192 |             labs(
193 |                 x = "reads per cell",
194 |                 y = "frequency"
195 |             ) +
196 |             scale_x_continuous(trans = "log10")
197 |         ## Cutoff line.
198 |         if (min > 0L) {
199 |             p <- p + acid_geom_abline(xintercept = min)
200 |         }
201 |         ## Color palette.
202 |         p <- p + acid_scale_color_discrete()
203 |         ## Facets.
204 |         facets <- NULL
205 |         if (isSubset("aggregate", colnames(data))) {
206 |             facets <- c(facets, "aggregate")
207 |         }
208 |         if (is.character(facets)) {
209 |             p <- p + facet_wrap(
210 |                 facets = vars(!!!syms(facets)),
211 |                 scales = "free"
212 |             )
213 |         }
214 |         ## Return.
215 |         p
216 |     }
217 | 
218 | 
219 | 
220 | ## Updated 2023-08-16.
221 | .plotReadsPerCellRidgeline <-
222 |     function(data,
223 |              min = 0L) {
224 |         assert(is(data, "DataFrame"))
225 |         p <- ggplot(
226 |             data = as.data.frame(data),
227 |             mapping = aes(
228 |                 x = .data[["nRead"]],
229 |                 y = .data[["sampleName"]],
230 |                 fill = .data[["interestingGroups"]]
231 |             )
232 |         ) +
233 |             geom_density_ridges(
234 |                 alpha = 0.75,
235 |                 color = "black",
236 |                 panel_scaling = TRUE,
237 |                 scale = 10L
238 |             ) +
239 |             scale_x_continuous(trans = "log10") +
240 |             acid_geom_label_average(
241 |                 data = as.data.frame(data),
242 |                 col = "nRead",
243 |                 digits = 0L
244 |             ) +
245 |             labs(
246 |                 x = "reads per cell",
247 |                 y = NULL
248 |             )
249 |         ## Cutoff line.
250 |         if (min > 0L) {
251 |             p <- p + acid_geom_abline(xintercept = min)
252 |         }
253 |         ## Color palette.
254 |         p <- p + acid_scale_fill_discrete()
255 |         ## Facets.
256 |         facets <- NULL
257 |         if (isSubset("aggregate", colnames(data))) {
258 |             facets <- c(facets, "aggregate")
259 |         }
260 |         if (is.character(facets)) {
261 |             p <- p + facet_wrap(
262 |                 facets = vars(!!!syms(facets)),
263 |                 scales = "free"
264 |             )
265 |         }
266 |         p
267 |     }
268 | 
269 | 
270 | 
271 | ## Updated 2023-08-16.
272 | .plotReadsPerCellViolin <-
273 |     function(data,
274 |              min = 0L) {
275 |         assert(is(data, "DataFrame"))
276 |         p <- ggplot(
277 |             data = as.data.frame(data),
278 |             mapping = aes(
279 |                 x = .data[["sampleName"]],
280 |                 y = .data[["nRead"]],
281 |                 fill = .data[["interestingGroups"]]
282 |             )
283 |         ) +
284 |             geom_violin(
285 |                 color = "black",
286 |                 scale = "count"
287 |             ) +
288 |             scale_y_continuous(trans = "log10") +
289 |             acid_geom_label_average(
290 |                 data = as.data.frame(data),
291 |                 col = "nRead",
292 |                 digits = 0L
293 |             ) +
294 |             labs(
295 |                 x = NULL,
296 |                 y = "reads per cell"
297 |             )
298 |         ## Cutoff line.
299 |         if (min > 0L) {
300 |             p <- p + acid_geom_abline(yintercept = min)
301 |         }
302 |         ## Color palette.
303 |         p <- p + acid_scale_fill_discrete()
304 |         ## Facets.
305 |         facets <- NULL
306 |         if (isSubset("aggregate", colnames(data))) {
307 |             facets <- c(facets, "aggregate")
308 |         }
309 |         if (is.character(facets)) {
310 |             p <- p + facet_wrap(
311 |                 facets = vars(!!!syms(facets)),
312 |                 scales = "free"
313 |             )
314 |         }
315 |         ## Return.
316 |         p
317 |     }
318 | 
319 | 
320 | 
321 | ## Updated 2023-08-16.
322 | `plotReadsPerCell,bcbioSingleCell` <- # nolint
323 |     function(object,
324 |              interestingGroups = NULL,
325 |              geom,
326 |              cutoffLine = FALSE,
327 |              title = "Reads per cell") {
328 |         validObject(object)
329 |         assert(isString(title, nullOk = TRUE))
330 |         interestingGroups(object) <-
331 |             matchInterestingGroups(object, interestingGroups)
332 |         interestingGroups <- interestingGroups(object)
333 |         geom <- match.arg(geom)
334 |         ## Minimum reads per barcode cutoff (for unfiltered data).
335 |         if (!is.null(metadata(object)[["filterCells"]])) {
336 |             min <- 0L
337 |             subtitle <- NULL
338 |         } else {
339 |             cutoff <- metadata(object)[["cellularBarcodeCutoff"]]
340 |             subtitle <- paste("cutoff", cutoff, sep = " = ")
341 |             if (isTRUE(cutoffLine)) {
342 |                 min <- cutoff
343 |             } else {
344 |                 min <- 0L
345 |             }
346 |         }
347 |         assert(isInt(min))
348 |         ## This step will intentionally error for filtered objects.
349 |         data <- .rawMetrics(object)
350 |         p <- switch(
351 |             EXPR = geom,
352 |             boxplot = do.call(
353 |                 what = .plotReadsPerCellBoxplot,
354 |                 args = list(
355 |                     "data" = data,
356 |                     "min" = min
357 |                 )
358 |             ),
359 |             ecdf = do.call(
360 |                 what = .plotReadsPerCellEcdf,
361 |                 args = list(
362 |                     "data" = data,
363 |                     "min" = min
364 |                 )
365 |             ),
366 |             histogram = {
367 |                 data <- do.call(
368 |                     what = .proportionalReadsPerCell,
369 |                     args = list(
370 |                         "data" = data,
371 |                         "sampleData" = sampleData(object)
372 |                     )
373 |                 )
374 |                 do.call(
375 |                     what = .plotReadsPerCellHistogram,
376 |                     args = list(
377 |                         "data" = data,
378 |                         "min" = min
379 |                     )
380 |                 )
381 |             },
382 |             ridgeline = do.call(
383 |                 what = .plotReadsPerCellRidgeline,
384 |                 args = list(
385 |                     "data" = data,
386 |                     "min" = min
387 |                 )
388 |             ),
389 |             violin = do.call(
390 |                 what = .plotReadsPerCellViolin,
391 |                 args = list(
392 |                     "data" = data,
393 |                     "min" = min
394 |                 )
395 |             )
396 |         )
397 |         ## Add title and subtitle containing cutoff information.
398 |         p <- p +
399 |             labs(
400 |                 title = title,
401 |                 subtitle = subtitle,
402 |                 color = paste(interestingGroups, collapse = ":\n"),
403 |                 fill = paste(interestingGroups, collapse = ":\n")
404 |             )
405 |         ## Return.
406 |         p
407 |     }
408 | 
409 | formals(`plotReadsPerCell,bcbioSingleCell`)[["geom"]] <- # nolint
410 |     .geom
411 | 
412 | 
413 | 
414 | #' @rdname plotReadsPerCell
415 | #' @export
416 | setMethod(
417 |     f = "plotReadsPerCell",
418 |     signature = signature(object = "bcbioSingleCell"),
419 |     definition = `plotReadsPerCell,bcbioSingleCell`
420 | )
421 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU AFFERO GENERAL PUBLIC LICENSE
  2 |                        Version 3, 19 November 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU Affero General Public License is a free, copyleft license for
 11 | software and other kinds of works, specifically designed to ensure
 12 | cooperation with the community in the case of network server software.
 13 | 
 14 |   The licenses for most software and other practical works are designed
 15 | to take away your freedom to share and change the works.  By contrast,
 16 | our General Public Licenses are intended to guarantee your freedom to
 17 | share and change all versions of a program--to make sure it remains free
 18 | software for all its users.
 19 | 
 20 |   When we speak of free software, we are referring to freedom, not
 21 | price.  Our General Public Licenses are designed to make sure that you
 22 | have the freedom to distribute copies of free software (and charge for
 23 | them if you wish), that you receive source code or can get it if you
 24 | want it, that you can change the software or use pieces of it in new
 25 | free programs, and that you know you can do these things.
 26 | 
 27 |   Developers that use our General Public Licenses protect your rights
 28 | with two steps: (1) assert copyright on the software, and (2) offer
 29 | you this License which gives you legal permission to copy, distribute
 30 | and/or modify the software.
 31 | 
 32 |   A secondary benefit of defending all users' freedom is that
 33 | improvements made in alternate versions of the program, if they
 34 | receive widespread use, become available for other developers to
 35 | incorporate.  Many developers of free software are heartened and
 36 | encouraged by the resulting cooperation.  However, in the case of
 37 | software used on network servers, this result may fail to come about.
 38 | The GNU General Public License permits making a modified version and
 39 | letting the public access it on a server without ever releasing its
 40 | source code to the public.
 41 | 
 42 |   The GNU Affero General Public License is designed specifically to
 43 | ensure that, in such cases, the modified source code becomes available
 44 | to the community.  It requires the operator of a network server to
 45 | provide the source code of the modified version running there to the
 46 | users of that server.  Therefore, public use of a modified version, on
 47 | a publicly accessible server, gives the public access to the source
 48 | code of the modified version.
 49 | 
 50 |   An older license, called the Affero General Public License and
 51 | published by Affero, was designed to accomplish similar goals.  This is
 52 | a different license, not a version of the Affero GPL, but Affero has
 53 | released a new version of the Affero GPL which permits relicensing under
 54 | this license.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                        TERMS AND CONDITIONS
 60 | 
 61 |   0. Definitions.
 62 | 
 63 |   "This License" refers to version 3 of the GNU Affero General Public License.
 64 | 
 65 |   "Copyright" also means copyright-like laws that apply to other kinds of
 66 | works, such as semiconductor masks.
 67 | 
 68 |   "The Program" refers to any copyrightable work licensed under this
 69 | License.  Each licensee is addressed as "you".  "Licensees" and
 70 | "recipients" may be individuals or organizations.
 71 | 
 72 |   To "modify" a work means to copy from or adapt all or part of the work
 73 | in a fashion requiring copyright permission, other than the making of an
 74 | exact copy.  The resulting work is called a "modified version" of the
 75 | earlier work or a work "based on" the earlier work.
 76 | 
 77 |   A "covered work" means either the unmodified Program or a work based
 78 | on the Program.
 79 | 
 80 |   To "propagate" a work means to do anything with it that, without
 81 | permission, would make you directly or secondarily liable for
 82 | infringement under applicable copyright law, except executing it on a
 83 | computer or modifying a private copy.  Propagation includes copying,
 84 | distribution (with or without modification), making available to the
 85 | public, and in some countries other activities as well.
 86 | 
 87 |   To "convey" a work means any kind of propagation that enables other
 88 | parties to make or receive copies.  Mere interaction with a user through
 89 | a computer network, with no transfer of a copy, is not conveying.
 90 | 
 91 |   An interactive user interface displays "Appropriate Legal Notices"
 92 | to the extent that it includes a convenient and prominently visible
 93 | feature that (1) displays an appropriate copyright notice, and (2)
 94 | tells the user that there is no warranty for the work (except to the
 95 | extent that warranties are provided), that licensees may convey the
 96 | work under this License, and how to view a copy of this License.  If
 97 | the interface presents a list of user commands or options, such as a
 98 | menu, a prominent item in the list meets this criterion.
 99 | 
100 |   1. Source Code.
101 | 
102 |   The "source code" for a work means the preferred form of the work
103 | for making modifications to it.  "Object code" means any non-source
104 | form of a work.
105 | 
106 |   A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 | 
111 |   The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form.  A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 | 
122 |   The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities.  However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work.  For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 | 
135 |   The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 | 
139 |   The Corresponding Source for a work in source code form is that
140 | same work.
141 | 
142 |   2. Basic Permissions.
143 | 
144 |   All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met.  This License explicitly affirms your unlimited
147 | permission to run the unmodified Program.  The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work.  This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 | 
152 |   You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force.  You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright.  Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 | 
163 |   Conveying under any other circumstances is permitted solely under
164 | the conditions stated below.  Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 | 
167 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 | 
169 |   No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 | 
175 |   When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 | 
183 |   4. Conveying Verbatim Copies.
184 | 
185 |   You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 | 
193 |   You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 | 
196 |   5. Conveying Modified Source Versions.
197 | 
198 |   You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 | 
202 |     a) The work must carry prominent notices stating that you modified
203 |     it, and giving a relevant date.
204 | 
205 |     b) The work must carry prominent notices stating that it is
206 |     released under this License and any conditions added under section
207 |     7.  This requirement modifies the requirement in section 4 to
208 |     "keep intact all notices".
209 | 
210 |     c) You must license the entire work, as a whole, under this
211 |     License to anyone who comes into possession of a copy.  This
212 |     License will therefore apply, along with any applicable section 7
213 |     additional terms, to the whole of the work, and all its parts,
214 |     regardless of how they are packaged.  This License gives no
215 |     permission to license the work in any other way, but it does not
216 |     invalidate such permission if you have separately received it.
217 | 
218 |     d) If the work has interactive user interfaces, each must display
219 |     Appropriate Legal Notices; however, if the Program has interactive
220 |     interfaces that do not display Appropriate Legal Notices, your
221 |     work need not make them do so.
222 | 
223 |   A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit.  Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 | 
233 |   6. Conveying Non-Source Forms.
234 | 
235 |   You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 | 
240 |     a) Convey the object code in, or embodied in, a physical product
241 |     (including a physical distribution medium), accompanied by the
242 |     Corresponding Source fixed on a durable physical medium
243 |     customarily used for software interchange.
244 | 
245 |     b) Convey the object code in, or embodied in, a physical product
246 |     (including a physical distribution medium), accompanied by a
247 |     written offer, valid for at least three years and valid for as
248 |     long as you offer spare parts or customer support for that product
249 |     model, to give anyone who possesses the object code either (1) a
250 |     copy of the Corresponding Source for all the software in the
251 |     product that is covered by this License, on a durable physical
252 |     medium customarily used for software interchange, for a price no
253 |     more than your reasonable cost of physically performing this
254 |     conveying of source, or (2) access to copy the
255 |     Corresponding Source from a network server at no charge.
256 | 
257 |     c) Convey individual copies of the object code with a copy of the
258 |     written offer to provide the Corresponding Source.  This
259 |     alternative is allowed only occasionally and noncommercially, and
260 |     only if you received the object code with such an offer, in accord
261 |     with subsection 6b.
262 | 
263 |     d) Convey the object code by offering access from a designated
264 |     place (gratis or for a charge), and offer equivalent access to the
265 |     Corresponding Source in the same way through the same place at no
266 |     further charge.  You need not require recipients to copy the
267 |     Corresponding Source along with the object code.  If the place to
268 |     copy the object code is a network server, the Corresponding Source
269 |     may be on a different server (operated by you or a third party)
270 |     that supports equivalent copying facilities, provided you maintain
271 |     clear directions next to the object code saying where to find the
272 |     Corresponding Source.  Regardless of what server hosts the
273 |     Corresponding Source, you remain obligated to ensure that it is
274 |     available for as long as needed to satisfy these requirements.
275 | 
276 |     e) Convey the object code using peer-to-peer transmission, provided
277 |     you inform other peers where the object code and Corresponding
278 |     Source of the work are being offered to the general public at no
279 |     charge under subsection 6d.
280 | 
281 |   A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 | 
285 |   A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling.  In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage.  For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product.  A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 | 
298 |   "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source.  The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 | 
306 |   If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information.  But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 | 
317 |   The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed.  Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 | 
325 |   Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 | 
331 |   7. Additional Terms.
332 | 
333 |   "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law.  If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 | 
342 |   When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it.  (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.)  You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 | 
349 |   Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 | 
353 |     a) Disclaiming warranty or limiting liability differently from the
354 |     terms of sections 15 and 16 of this License; or
355 | 
356 |     b) Requiring preservation of specified reasonable legal notices or
357 |     author attributions in that material or in the Appropriate Legal
358 |     Notices displayed by works containing it; or
359 | 
360 |     c) Prohibiting misrepresentation of the origin of that material, or
361 |     requiring that modified versions of such material be marked in
362 |     reasonable ways as different from the original version; or
363 | 
364 |     d) Limiting the use for publicity purposes of names of licensors or
365 |     authors of the material; or
366 | 
367 |     e) Declining to grant rights under trademark law for use of some
368 |     trade names, trademarks, or service marks; or
369 | 
370 |     f) Requiring indemnification of licensors and authors of that
371 |     material by anyone who conveys the material (or modified versions of
372 |     it) with contractual assumptions of liability to the recipient, for
373 |     any liability that these contractual assumptions directly impose on
374 |     those licensors and authors.
375 | 
376 |   All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10.  If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term.  If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 | 
386 |   If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 | 
391 |   Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 | 
395 |   8. Termination.
396 | 
397 |   You may not propagate or modify a covered work except as expressly
398 | provided under this License.  Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 | 
403 |   However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 | 
410 |   Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 | 
417 |   Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License.  If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 | 
423 |   9. Acceptance Not Required for Having Copies.
424 | 
425 |   You are not required to accept this License in order to receive or
426 | run a copy of the Program.  Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance.  However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work.  These actions infringe copyright if you do
431 | not accept this License.  Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 | 
434 |   10. Automatic Licensing of Downstream Recipients.
435 | 
436 |   Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License.  You are not responsible
439 | for enforcing compliance by third parties with this License.
440 | 
441 |   An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations.  If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 | 
451 |   You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License.  For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 | 
459 |   11. Patents.
460 | 
461 |   A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based.  The
463 | work thus licensed is called the contributor's "contributor version".
464 | 
465 |   A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version.  For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 | 
475 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 | 
480 |   In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement).  To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 | 
487 |   If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients.  "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 | 
501 |   If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 | 
509 |   A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License.  You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 | 
524 |   Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 | 
528 |   12. No Surrender of Others' Freedom.
529 | 
530 |   If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License.  If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all.  For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 | 
540 |   13. Remote Network Interaction; Use with the GNU General Public License.
541 | 
542 |   Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software.  This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 | 
553 |   Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work.  The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 | 
561 |   14. Revised Versions of this License.
562 | 
563 |   The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time.  Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 | 
568 |   Each version is given a distinguishing version number.  If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation.  If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 | 
577 |   If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 | 
582 |   Later license versions may give you additional or different
583 | permissions.  However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 | 
587 |   15. Disclaimer of Warranty.
588 | 
589 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 | 
598 |   16. Limitation of Liability.
599 | 
600 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 | 
610 |   17. Interpretation of Sections 15 and 16.
611 | 
612 |   If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 | 
619 |                      END OF TERMS AND CONDITIONS
620 | 
621 |             How to Apply These Terms to Your New Programs
622 | 
623 |   If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 | 
627 |   To do so, attach the following notices to the program.  It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 | 
632 |     <one line to give the program's name and a brief idea of what it does.>
633 |     Copyright (C) <year>  <name of author>
634 | 
635 |     This program is free software: you can redistribute it and/or modify
636 |     it under the terms of the GNU Affero General Public License as published by
637 |     the Free Software Foundation, either version 3 of the License, or
638 |     (at your option) any later version.
639 | 
640 |     This program is distributed in the hope that it will be useful,
641 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
642 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
643 |     GNU Affero General Public License for more details.
644 | 
645 |     You should have received a copy of the GNU Affero General Public License
646 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
647 | 
648 | Also add information on how to contact you by electronic and paper mail.
649 | 
650 |   If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source.  For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code.  There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 | 
658 |   You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | <https://www.gnu.org/licenses/>.
662 | 


--------------------------------------------------------------------------------