├── inst
├── extdata
│ └── indrops
│ │ ├── 2018-01-01_bcbio
│ │ ├── bcbio-nextgen.log
│ │ ├── bcbio-nextgen-commands.log
│ │ ├── project-summary.yaml
│ │ └── programs.txt
│ │ ├── metadata.csv
│ │ └── multiplexed-AAAAAAAA
│ │ ├── multiplexed-AAAAAAAA.mtx.rownames
│ │ ├── multiplexed-AAAAAAAA.mtx.colnames
│ │ └── multiplexed-AAAAAAAA-barcodes.tsv
└── rmarkdown
│ └── templates
│ └── quality-control
│ ├── template.yaml
│ └── skeleton
│ └── skeleton.Rmd
├── tests
├── testthat
│ ├── .gitignore
│ ├── test-show.R
│ ├── helper-globals.R
│ ├── helper-cache.R
│ ├── test-updateObject.R
│ ├── test-plotReadsPerCell.R
│ ├── test-bcbioSingleCell.R
│ └── test-filterCells.R
└── testthat.R
├── .gitignore
├── pkgdown
└── extra.css
├── data
└── bcb.rda
├── .Rbuildignore
├── R
├── data.R
├── AllGenerics.R
├── AllGlobals.R
├── reexports.R
├── show-methods.R
├── internal-barcodes.R
├── extract-methods.R
├── package.R
├── AllClasses.R
├── internal-import.R
├── updateObject-methods.R
├── AllGenerators.R
└── plotReadsPerCell-methods.R
├── man
├── bcbioSingleCellTestsUrl.Rd
├── bcb.Rd
├── show.Rd
├── bcbioSingleCell-class.Rd
├── updateObject.Rd
├── reexports.Rd
├── bcbioSingleCell-package.Rd
├── plotReadsPerCell.Rd
├── extract.Rd
└── bcbioSingleCell.Rd
├── package.Rproj
├── _pkgdown.yml
├── data-raw
└── bcb.R
├── .lintr
├── DESCRIPTION
├── todo.org
├── README.md
├── NAMESPACE
└── LICENSE
/inst/extdata/indrops/2018-01-01_bcbio/bcbio-nextgen.log:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/testthat/.gitignore:
--------------------------------------------------------------------------------
1 | *.rda
2 | subsetPerSample
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .RData
2 | .Rcheck
3 | .Rhistory
4 | .Rproj.user
5 | docs/
6 |
--------------------------------------------------------------------------------
/pkgdown/extra.css:
--------------------------------------------------------------------------------
1 | @import url("https://steinbaugh.com/css/pkgdown.css");
2 |
--------------------------------------------------------------------------------
/data/bcb.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hbc/bcbioSingleCell/HEAD/data/bcb.rda
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | ## nolint start
2 | library(testthat)
3 | library(bcbioSingleCell)
4 | ## nolint end
5 |
6 | test_check("bcbioSingleCell")
7 |
--------------------------------------------------------------------------------
/inst/extdata/indrops/metadata.csv:
--------------------------------------------------------------------------------
1 | fileName,description,index,sequence,sampleName,aggregate
2 | multiplexed.fastq.gz,multiplexed,1,TTTTTTTT,rep_1,sample
3 |
--------------------------------------------------------------------------------
/tests/testthat/test-show.R:
--------------------------------------------------------------------------------
1 | test_that("bcbioSingleCell", {
2 | output <- capture.output(show(bcb))
3 | expect_true(grepl("^bcbioSingleCell", output[[1L]]))
4 | })
5 |
--------------------------------------------------------------------------------
/tests/testthat/helper-globals.R:
--------------------------------------------------------------------------------
1 | ## nolint start
2 | data <- utils::data
3 | hasInternet <- goalie::hasInternet
4 | ## nolint end
5 |
6 | data(bcb, envir = environment())
7 |
--------------------------------------------------------------------------------
/inst/rmarkdown/templates/quality-control/template.yaml:
--------------------------------------------------------------------------------
1 | name: Quality Control
2 | description: >
3 | Template for single-cell RNA-seq quality control report.
4 | create_dir: false
5 |
--------------------------------------------------------------------------------
/tests/testthat/helper-cache.R:
--------------------------------------------------------------------------------
1 | lst <- AcidDevTools::cacheTestFiles(
2 | pkg = .pkgName,
3 | files = "bcbioSingleCell_0.1.0.rds"
4 | )
5 | cacheDir <- lst[["cacheDir"]]
6 | rm(lst)
7 |
--------------------------------------------------------------------------------
/inst/extdata/indrops/2018-01-01_bcbio/bcbio-nextgen-commands.log:
--------------------------------------------------------------------------------
1 | cellularBarcodeCutoff: --cb_cutoff 1000
2 | level: --genemap Homo_sapiens.GRCh38.90-tx2gene.tsv
3 | umiType: umis fastqtransform --separate_cb /XXX/umis/harvard-indrop-v3-transform.json
4 |
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^CONTRIBUTING\.md$
3 | ^LICENSE$
4 | ^Meta$
5 | ^[-._a-zA-Z0-9]+\.(R|Rcheck|Rproj|gz|html|org|pdf|png|sh|tar|txt|xz|yaml|yml|zip)$
6 | ^[._][-._a-zA-Z0-9]+$
7 | ^\.Rproj\.user$
8 | ^cran-comments\.md$
9 | ^data-raw$
10 | ^doc$
11 | ^docs$
12 | ^pkgdown$
13 | ^todo\.org$
14 |
--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
1 | #' bcbio single-cell RNA-seq example data set
2 | #'
3 | #' Harvard inDrops v3 example data
4 | #'
5 | #' @author Michael Steinbaugh
6 | #' @note Updated 2019-08-12.
7 | #' @usage data(bcb)
8 | #' @return `bcbioSingleCell`.
9 | #'
10 | #' @examples
11 | #' data(bcb)
12 | #' class(bcb)
13 | "bcb"
14 |
--------------------------------------------------------------------------------
/tests/testthat/test-updateObject.R:
--------------------------------------------------------------------------------
1 | test_that("bcbioSingleCell", {
2 | x <- updateObject(bcb)
3 | expect_s4_class(x, "bcbioSingleCell")
4 | })
5 |
6 | test_that("v0.1 update", {
7 | invalid <- import(file.path(cacheDir, "bcbioSingleCell_0.1.0.rds"))
8 | valid <- updateObject(invalid)
9 | expect_s4_class(valid, "bcbioSingleCell")
10 | })
11 |
--------------------------------------------------------------------------------
/R/AllGenerics.R:
--------------------------------------------------------------------------------
1 | #' @export
2 | #' @name plotReadsPerCell
3 | #' @rdname plotReadsPerCell
4 | #' @usage plotReadsPerCell(object, ...)
5 | NULL
6 |
7 | #' @export
8 | #' @name show
9 | #' @rdname show
10 | #' @usage show(object)
11 | NULL
12 |
13 | #' @export
14 | #' @name updateObject
15 | #' @rdname updateObject
16 | #' @usage updateObject(object, ..., verbose = FALSE)
17 | NULL
18 |
--------------------------------------------------------------------------------
/tests/testthat/test-plotReadsPerCell.R:
--------------------------------------------------------------------------------
1 | ## Example dataset doesn't have a cellular barcode cutoff because we removed the
2 | ## bcbio commands log file (which conflicts with Travis CI).
3 | test_that("geom", {
4 | for (geom in eval(formals(`plotReadsPerCell,bcbioSingleCell`)[["geom"]])) {
5 | x <- plotReadsPerCell(bcb, geom = geom)
6 | expect_s3_class(x, "ggplot")
7 | }
8 | })
9 |
--------------------------------------------------------------------------------
/man/bcbioSingleCellTestsUrl.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AllGlobals.R
3 | \docType{data}
4 | \name{bcbioSingleCellTestsUrl}
5 | \alias{bcbioSingleCellTestsUrl}
6 | \title{Cache URL}
7 | \format{
8 | An object of class \code{character} of length 1.
9 | }
10 | \usage{
11 | bcbioSingleCellTestsUrl
12 | }
13 | \description{
14 | Cache URL
15 | }
16 | \examples{
17 | bcbioSingleCellTestsUrl
18 | }
19 | \keyword{internal}
20 |
--------------------------------------------------------------------------------
/package.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: No
4 | SaveWorkspace: No
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 4
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 |
--------------------------------------------------------------------------------
/R/AllGlobals.R:
--------------------------------------------------------------------------------
1 | .pkgName <- packageName()
2 | .pkgVersion <- packageVersion(.pkgName)
3 |
4 | ## This is also defined in AcidPlots.
5 | .geom <- c("histogram", "ecdf", "violin", "ridgeline", "boxplot")
6 |
7 | ## We're adding an additional raw reads column (pre-UMI disambiguation).
8 | .metricsCols <- c("nRead", metricsCols)
9 |
10 | .requiredAssays <- "counts"
11 |
12 | #' Cache URL
13 | #' @keywords internal
14 | #' @export
15 | #' @examples
16 | #' bcbioSingleCellTestsUrl
17 | bcbioSingleCellTestsUrl <- "https://r.acidgenomics.com/testdata/bcbiosinglecell"
18 |
--------------------------------------------------------------------------------
/man/bcb.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{bcb}
5 | \alias{bcb}
6 | \title{bcbio single-cell RNA-seq example data set}
7 | \format{
8 | An object of class \code{bcbioSingleCell} with 50 rows and 100 columns.
9 | }
10 | \usage{
11 | data(bcb)
12 | }
13 | \value{
14 | \code{bcbioSingleCell}.
15 | }
16 | \description{
17 | Harvard inDrops v3 example data
18 | }
19 | \note{
20 | Updated 2019-08-12.
21 | }
22 | \examples{
23 | data(bcb)
24 | class(bcb)
25 | }
26 | \author{
27 | Michael Steinbaugh
28 | }
29 | \keyword{datasets}
30 |
--------------------------------------------------------------------------------
/man/show.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AllGenerics.R, R/show-methods.R
3 | \name{show}
4 | \alias{show}
5 | \alias{show,bcbioSingleCell-method}
6 | \title{Show an object}
7 | \usage{
8 | show(object)
9 |
10 | \S4method{show}{bcbioSingleCell}(object)
11 | }
12 | \arguments{
13 | \item{object}{Object.}
14 | }
15 | \value{
16 | Console output.
17 | }
18 | \description{
19 | Show an object
20 | }
21 | \note{
22 | Updated 2022-05-09.
23 | }
24 | \examples{
25 | data(bcb)
26 |
27 | ## bcbioSingleCell ====
28 | show(bcb)
29 | }
30 | \author{
31 | Michael Steinbaugh
32 | }
33 |
--------------------------------------------------------------------------------
/inst/extdata/indrops/2018-01-01_bcbio/project-summary.yaml:
--------------------------------------------------------------------------------
1 | date: '2018-01-01 00:00:00.000000'
2 | upload: /n/data1/XXX/final
3 | bcbio_system: /n/app/bcbio/dev/galaxy/bcbio_system.yaml
4 | samples:
5 | - description: multiplexed-AAAAAAAA
6 | dirs:
7 | config: /n/app/bcbio/dev/galaxy
8 | fastq: null
9 | flowcell: null
10 | galaxy: /n/app/bcbio/dev/galaxy
11 | work: /n/scratch2/XXX/data/bcbio
12 | genome_build: hg38
13 | genome_resources:
14 | rnaseq:
15 | transcripts: /n/app/bcbio/dev/genomes/Hsapiens/hg38/rnaseq/ref-transcripts.gtf
16 | metadata:
17 | batch: null
18 | phenotype: ''
19 |
--------------------------------------------------------------------------------
/man/bcbioSingleCell-class.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AllClasses.R
3 | \docType{class}
4 | \name{bcbioSingleCell-class}
5 | \alias{bcbioSingleCell-class}
6 | \title{bcbio single-cell RNA-seq data set}
7 | \description{
8 | \code{bcbioSingleCell} is an S4 class that extends \code{SingleCellExperiment}, and is
9 | designed to store a bcbio single-cell RNA-seq analysis. This class contains
10 | read counts saved as a sparse matrix (\code{sparseMatrix}), sample metadata, and
11 | cell quality control metrics.
12 | }
13 | \note{
14 | Updated 2022-05-09.
15 | }
16 | \author{
17 | Michael Steinbaugh, Rory Kirchner
18 | }
19 |
--------------------------------------------------------------------------------
/tests/testthat/test-bcbioSingleCell.R:
--------------------------------------------------------------------------------
1 | uploadDir <- system.file("extdata/indrops", package = "bcbioSingleCell")
2 |
3 | ## Minimal mode, with no metadata or annotations.
4 | ## This is fast but doesn't slot a lot of useful info.
5 | test_that("Minimal mode", {
6 | x <- bcbioSingleCell(uploadDir = uploadDir)
7 | expect_s4_class(x, "bcbioSingleCell")
8 | })
9 |
10 | test_that("User-defined metadata", {
11 | x <- bcbioSingleCell(
12 | uploadDir = uploadDir,
13 | sampleMetadataFile <- file.path(uploadDir, "metadata.csv")
14 | )
15 | expect_s4_class(x, "bcbioSingleCell")
16 | })
17 |
18 | ## Automatic organism annotations from AnnotationHub.
19 | test_that("AnnotationHub", {
20 | x <- bcbioSingleCell(
21 | uploadDir = uploadDir,
22 | organism = "Homo sapiens"
23 | )
24 | expect_s4_class(x, "bcbioSingleCell")
25 | })
26 |
--------------------------------------------------------------------------------
/R/reexports.R:
--------------------------------------------------------------------------------
1 | #' @export
2 | #' @importFrom AcidGenerics calculateMetrics
3 | AcidGenerics::calculateMetrics
4 |
5 | #' @export
6 | #' @importFrom AcidGenerics filterCells
7 | AcidGenerics::filterCells
8 |
9 | #' @export
10 | #' @importFrom AcidGenerics plotBarcodeRanks
11 | AcidGenerics::plotBarcodeRanks
12 |
13 | #' @export
14 | #' @importFrom AcidGenerics plotCellCounts
15 | AcidGenerics::plotCellCounts
16 |
17 | #' @export
18 | #' @importFrom AcidGenerics plotCountsPerCell
19 | AcidGenerics::plotCountsPerCell
20 |
21 | #' @export
22 | #' @importFrom AcidGenerics plotCountsVsFeatures
23 | AcidGenerics::plotCountsVsFeatures
24 |
25 | #' @export
26 | #' @importFrom AcidGenerics plotFeaturesPerCell
27 | AcidGenerics::plotFeaturesPerCell
28 |
29 | #' @export
30 | #' @importFrom AcidGenerics plotMitoRatio
31 | AcidGenerics::plotMitoRatio
32 |
33 | #' @export
34 | #' @importFrom AcidGenerics plotNovelty
35 | AcidGenerics::plotNovelty
36 |
37 | #' @export
38 | #' @importFrom AcidGenerics plotQc
39 | AcidGenerics::plotQc
40 |
--------------------------------------------------------------------------------
/inst/extdata/indrops/multiplexed-AAAAAAAA/multiplexed-AAAAAAAA.mtx.rownames:
--------------------------------------------------------------------------------
1 | ENSG00000071082
2 | ENSG00000100316
3 | ENSG00000106631
4 | ENSG00000108821
5 | ENSG00000112306
6 | ENSG00000115414
7 | ENSG00000125691
8 | ENSG00000133112
9 | ENSG00000137818
10 | ENSG00000138326
11 | ENSG00000140988
12 | ENSG00000142534
13 | ENSG00000142541
14 | ENSG00000142937
15 | ENSG00000143947
16 | ENSG00000147403
17 | ENSG00000147604
18 | ENSG00000156508
19 | ENSG00000159251
20 | ENSG00000164692
21 | ENSG00000167244
22 | ENSG00000167526
23 | ENSG00000168542
24 | ENSG00000174748
25 | ENSG00000177600
26 | ENSG00000184009
27 | ENSG00000186468
28 | ENSG00000197756
29 | ENSG00000198034
30 | ENSG00000198695
31 | ENSG00000198712
32 | ENSG00000198727
33 | ENSG00000198763
34 | ENSG00000198786
35 | ENSG00000198804
36 | ENSG00000198840
37 | ENSG00000198886
38 | ENSG00000198888
39 | ENSG00000198899
40 | ENSG00000198938
41 | ENSG00000205542
42 | ENSG00000225972
43 | ENSG00000229117
44 | ENSG00000229344
45 | ENSG00000233927
46 | ENSG00000237973
47 | ENSG00000248527
48 | ENSG00000256618
49 | ENSG00000269028
50 | ENSG00000282105
51 |
--------------------------------------------------------------------------------
/man/updateObject.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AllGenerics.R, R/updateObject-methods.R
3 | \name{updateObject}
4 | \alias{updateObject}
5 | \alias{updateObject,bcbioSingleCell-method}
6 | \title{Update object}
7 | \usage{
8 | updateObject(object, ..., verbose = FALSE)
9 |
10 | \S4method{updateObject}{bcbioSingleCell}(object, ..., verbose = FALSE)
11 | }
12 | \arguments{
13 | \item{object}{Object.}
14 |
15 | \item{...}{Additional arguments.}
16 |
17 | \item{verbose}{\code{logical(1)}.
18 | Run the function with verbose output.}
19 | }
20 | \value{
21 | Modified object.
22 | }
23 | \description{
24 | Update object
25 | }
26 | \note{
27 | Updated 2023-12-04.
28 | }
29 | \examples{
30 | data(bcb)
31 |
32 | ## bcbioSingleCell ====
33 | updateObject(bcb)
34 |
35 | ## Example that depends on remote file.
36 | ## > x <- import(
37 | ## > con = file.path(
38 | ## > bcbioSingleCellTestsUrl,
39 | ## > "bcbioSingleCell_0.1.0.rds"
40 | ## > )
41 | ## > )
42 | ## > x <- updateObject(x)
43 | ## > x
44 | }
45 | \author{
46 | Michael Steinbaugh
47 | }
48 |
--------------------------------------------------------------------------------
/inst/extdata/indrops/2018-01-01_bcbio/programs.txt:
--------------------------------------------------------------------------------
1 | bamtools,2.4.0
2 | bcbio-nextgen,1.0.6a0-d2b5b522
3 | bcbio-variation,0.2.6
4 | bcftools,1.6
5 | bedtools,2.26.0
6 | biobambam,2.0.79
7 | bioconductor-bubbletree,2.6.0
8 | bowtie2,2.2.8
9 | bwa,0.7.16
10 | chanjo,
11 | cnvkit,0.9.0
12 | cufflinks,2.2.1
13 | cutadapt,1.14
14 | fastqc,0.11.5
15 | featurecounts,1.4.4
16 | freebayes,1.1.0.46
17 | gatk,3.8
18 | gatk-framework,3.6.24
19 | gatk4,4.0b6
20 | gemini,0.20.1
21 | grabix,0.1.8
22 | hisat2,2.1.0
23 | htseq,0.9.1
24 | lumpy-sv,0.2.13
25 | manta,1.1.0
26 | metasv,0.4.0
27 | mirdeep2,2.0.0.7
28 | mutect,1.1.5
29 | novoalign,3.07.00
30 | novosort,V3.00.02
31 | oncofuse,1.1.1
32 | phylowgs,20150714
33 | picard,2.13
34 | platypus-variant,0.8.1.1
35 | preseq,2.0.2
36 | qualimap,2.2.2a
37 | rna-star,
38 | rtg-tools,3.8.4
39 | sailfish,0.10.1
40 | salmon,0.8.2
41 | sambamba,0.6.6
42 | samblaster,0.1.24
43 | samtools,1.6
44 | scalpel,0.5.3
45 | seqbuster,3.1
46 | snpeff,4.3i
47 | vardict,2017.04.18
48 | vardict-java,1.5.1
49 | variant-effect-predictor,87
50 | varscan,2.4.3
51 | vcflib,1.0.0_rc1
52 | vt,2015.11.10
53 | wham,1.7.0.311
54 |
--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/reexports.R
3 | \docType{import}
4 | \name{reexports}
5 | \alias{reexports}
6 | \alias{calculateMetrics}
7 | \alias{filterCells}
8 | \alias{plotBarcodeRanks}
9 | \alias{plotCellCounts}
10 | \alias{plotCountsPerCell}
11 | \alias{plotCountsVsFeatures}
12 | \alias{plotFeaturesPerCell}
13 | \alias{plotMitoRatio}
14 | \alias{plotNovelty}
15 | \alias{plotQc}
16 | \title{Objects exported from other packages}
17 | \keyword{internal}
18 | \description{
19 | These objects are imported from other packages. Follow the links
20 | below to see their documentation.
21 |
22 | \describe{
23 | \item{AcidGenerics}{\code{\link[AcidGenerics]{calculateMetrics}}, \code{\link[AcidGenerics]{filterCells}}, \code{\link[AcidGenerics]{plotBarcodeRanks}}, \code{\link[AcidGenerics]{plotCellCounts}}, \code{\link[AcidGenerics]{plotCountsPerCell}}, \code{\link[AcidGenerics]{plotCountsVsFeatures}}, \code{\link[AcidGenerics]{plotFeaturesPerCell}}, \code{\link[AcidGenerics]{plotMitoRatio}}, \code{\link[AcidGenerics]{plotNovelty}}, \code{\link[AcidGenerics]{plotQc}}}
24 | }}
25 |
26 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | authors:
2 | Michael Steinbaugh:
3 | href: https://mike.steinbaugh.com/
4 | Rory Kirchner:
5 | href: https://github.com/roryk/
6 | Mary Piper:
7 | href: https://github.com/marypiper/
8 | Victor Barrera:
9 | href: https://github.com/vbarrera/
10 | Shannan Ho Sui:
11 | href: https://github.com/sjhosui/
12 | Harvard Chan Bioinformatics Core:
13 | href: https://bioinformatics.sph.harvard.edu/
14 | Acid Genomics:
15 | href: https://acidgenomics.com/
16 | navbar:
17 | components:
18 | acid:
19 | icon: "fas fa-vial fa-lg"
20 | href: https://acidgenomics.com/
21 | structure:
22 | right: [acid, github]
23 | news:
24 | - one_page: false
25 | reference:
26 | - title: S4 classes
27 | contents:
28 | - starts_with("bcbioSingleCell")
29 | - extract
30 | - show
31 | - updateObject
32 | - title: Plots
33 | contents:
34 | - starts_with("plot")
35 | - title: Example data
36 | contents:
37 | - bcb
38 | - title: Reexports
39 | contents:
40 | - reexports
41 | template:
42 | bootstrap: 5
43 | url: https://r.acidgenomics.com/packages/bcbiosinglecell
44 |
--------------------------------------------------------------------------------
/man/bcbioSingleCell-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/package.R
3 | \docType{package}
4 | \name{bcbioSingleCell-package}
5 | \title{bcbioSingleCell}
6 | \description{
7 | Import and analyze \href{https://bcbio-nextgen.readthedocs.io/}{bcbio} single-cell
8 | RNA-seq data.
9 | }
10 | \seealso{
11 | Useful links:
12 | \itemize{
13 | \item \url{https://r.acidgenomics.com/packages/bcbiosinglecell/}
14 | \item \url{https://github.com/hbc/bcbioSingleCell/}
15 | \item Report bugs at \url{https://github.com/hbc/bcbioSingleCell/issues/}
16 | }
17 |
18 | }
19 | \author{
20 | \strong{Maintainer}: Michael Steinbaugh \email{mike@steinbaugh.com} (\href{https://orcid.org/0000-0002-2403-2221}{ORCID})
21 |
22 | Authors:
23 | \itemize{
24 | \item Rory Kirchner \email{roryk@alum.mit.edu} (\href{https://orcid.org/0000-0003-4814-5885}{ORCID})
25 | \item Mary Piper \email{mary.piper@gmail.com} (\href{https://orcid.org/0000-0003-2699-3840}{ORCID})
26 | \item Victor Barrera \email{barrera.vic@gmail.com} (\href{https://orcid.org/0000-0003-0590-4634}{ORCID})
27 | \item Shannan Ho Sui \email{shosui@hsph.harvard.edu} (\href{https://orcid.org/0000-0002-6191-4709}{ORCID})
28 | }
29 |
30 | Other contributors:
31 | \itemize{
32 | \item Harvard Chan Bioinformatics Core \email{bioinformatics@hsph.harvard.edu} [copyright holder, funder]
33 | \item Acid Genomics [copyright holder, funder]
34 | }
35 |
36 | }
37 | \keyword{internal}
38 |
--------------------------------------------------------------------------------
/man/plotReadsPerCell.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AllGenerics.R, R/plotReadsPerCell-methods.R
3 | \name{plotReadsPerCell}
4 | \alias{plotReadsPerCell}
5 | \alias{plotReadsPerCell,bcbioSingleCell-method}
6 | \title{Plot read counts per cell}
7 | \usage{
8 | plotReadsPerCell(object, ...)
9 |
10 | \S4method{plotReadsPerCell}{bcbioSingleCell}(
11 | object,
12 | interestingGroups = NULL,
13 | geom = c("histogram", "ecdf", "violin", "ridgeline", "boxplot"),
14 | cutoffLine = FALSE,
15 | title = "Reads per cell"
16 | )
17 | }
18 | \arguments{
19 | \item{object}{Object.}
20 |
21 | \item{interestingGroups}{\code{character}.
22 | Groups of interest to use for visualization.
23 | Corresponds to factors describing the columns of the object.}
24 |
25 | \item{geom}{\code{character(1)}.
26 | Plot type. Uses \code{\link[base:match.arg]{match.arg()}} internally and defaults to
27 | the first argument in the \code{character} vector.}
28 |
29 | \item{cutoffLine}{\code{logical(1)}.
30 | Include a line marking the cutoff.}
31 |
32 | \item{title}{\code{character(1)}.
33 | Title.}
34 |
35 | \item{...}{Additional arguments.}
36 | }
37 | \value{
38 | \code{ggplot}.
39 | }
40 | \description{
41 | Plot the distribution of read counts for all unfiltered cellular barcodes.
42 | }
43 | \note{
44 | Updated 2023-12-04.
45 | }
46 | \examples{
47 | data(bcb)
48 |
49 | ## bcbioSingleCell ====
50 | plotReadsPerCell(bcb, geom = "histogram")
51 | plotReadsPerCell(bcb, geom = "ecdf")
52 | }
53 | \author{
54 | Michael Steinbaugh, Rory Kirchner
55 | }
56 |
--------------------------------------------------------------------------------
/tests/testthat/test-filterCells.R:
--------------------------------------------------------------------------------
1 | bcb <- calculateMetrics(bcb)
2 |
3 | test_that("sampleNames", {
4 | expect_identical(
5 | object = sampleNames(bcb),
6 | expected = c("multiplexed_AAAAAAAA" = "rep_1")
7 | )
8 | })
9 |
10 | ## Expecting an object with the same dimensions by default.
11 | test_that("No filtering", {
12 | x <- filterCells(bcb)
13 | expect_s4_class(x, "bcbioSingleCell")
14 | expect_identical(dim(x), dim(bcb))
15 | })
16 |
17 | ## Refer to the quality control R Markdown for actual recommended cutoffs.
18 | ## These are skewed, and designed to work with our minimal dataset.
19 | test_that("Parameterized cutoff tests", {
20 | Map(
21 | args = list(
22 | list("minCounts" = 2000L),
23 | list("maxCounts" = 2500L),
24 | list("minFeatures" = 45L),
25 | list("maxFeatures" = 49L),
26 | list("maxMitoRatio" = 0.1),
27 | list("minNovelty" = 0.5),
28 | list("minCellsPerFeature" = 95L)
29 | ),
30 | dim = list(
31 | c(50L, 35L),
32 | c(50L, 88L),
33 | c(50L, 95L),
34 | c(50L, 81L),
35 | c(50L, 22L),
36 | c(50L, 81L),
37 | c(45L, 100L)
38 | ),
39 | f = function(args, dim) {
40 | args[["object"]] <- bcb
41 | x <- do.call(what = filterCells, args = args)
42 | expect_s4_class(x, "bcbioSingleCell")
43 | expect_s4_class(metadata(x)[["filterCells"]], "SimpleList")
44 | expect_true(metadata(x)[["subset"]])
45 | expect_identical(dim(x), dim)
46 | }
47 | )
48 | })
49 |
50 | test_that("Expected cutoff failure", {
51 | expect_error(
52 | object = filterCells(bcb, minCounts = Inf),
53 | regexp = "No cells passed"
54 | )
55 | })
56 |
--------------------------------------------------------------------------------
/data-raw/bcb.R:
--------------------------------------------------------------------------------
1 | ## inDrops example data
2 | ## Using harvard-indrop-v3 barcodes.
3 | ## Updated 2022-06-09.
4 | ## nolint start
5 | suppressPackageStartupMessages({
6 | library(devtools)
7 | library(usethis)
8 | library(pipette)
9 | })
10 | ## nolint end
11 | load_all()
12 | limit <- structure(2e6L, class = "object_size") # nolint
13 | ## Minimal example bcbio upload directory.
14 | ## Include the top 500 genes (rows) and cells (columns).
15 | uploadDir <- file.path("..", "inst", "extdata", "indrops")
16 | sample <- "multiplexed-AAAAAAAA"
17 | countsFile <- file.path(
18 | uploadDir,
19 | sample,
20 | paste0(sample, ".mtx")
21 | )
22 | rownamesFile <- file.path(
23 | uploadDir,
24 | sample,
25 | paste0(sample, ".mtx.rownames")
26 | )
27 | colnamesFile <- file.path(
28 | uploadDir,
29 | sample,
30 | paste0(sample, ".mtx.colnames")
31 | )
32 | barcodesFile <- file.path(
33 | uploadDir,
34 | sample,
35 | paste0(sample, "-barcodes.tsv")
36 | )
37 | stopifnot(all(file.exists(
38 | c(countsFile, rownamesFile, colnamesFile, barcodesFile)
39 | )))
40 | barcodes <- import(barcodesFile, colnames = FALSE)
41 | export(object = barcodes, con = barcodesFile, colnames = FALSE)
42 | counts <- import(countsFile)
43 | topGenes <-
44 | counts |>
45 | Matrix::rowSums() |>
46 | sort(decreasing = TRUE) |>
47 | head(n = 50L)
48 | genes <- sort(names(topGenes))
49 | cells <- barcodes[[1L]]
50 | counts <- counts[genes, cells]
51 | export(object = counts, con = countsFile)
52 | ## Create bcbioSingleCell object.
53 | bcb <- bcbioSingleCell(
54 | uploadDir = uploadDir,
55 | sampleMetadataFile = file.path(uploadDir, "metadata.csv"),
56 | organism = "Homo sapiens",
57 | ensemblRelease = 90L
58 | )
59 | stopifnot(
60 | object.size(bcb) < limit,
61 | validObject(bcb)
62 | )
63 | use_data(bcb, compress = "xz", overwrite = TRUE)
64 |
--------------------------------------------------------------------------------
/R/show-methods.R:
--------------------------------------------------------------------------------
1 | #' Show an object
2 | #'
3 | #' @name show
4 | #' @author Michael Steinbaugh
5 | #' @note Updated 2022-05-09.
6 | #'
7 | #' @inheritParams AcidRoxygen::params
8 | #'
9 | #' @return Console output.
10 | #'
11 | #' @examples
12 | #' data(bcb)
13 | #'
14 | #' ## bcbioSingleCell ====
15 | #' show(bcb)
16 | NULL
17 |
18 |
19 |
20 | ## Updated 2019-07-24.
21 | .showHeader <- function(object, version = NULL) {
22 | cat(paste(class(object), version), sep = "\n")
23 | }
24 |
25 |
26 |
27 | ## Using the same internal method for bcbioSingleCell and CellRanger.
28 | ## Updated 2019-08-08.
29 | `show,bcbioSingleCell` <- # nolint
30 | function(object) {
31 | validObject(object)
32 | ## Metadata.
33 | m <- metadata(object)
34 | ## Row ranges metadata.
35 | rrm <- metadata(rowRanges(object))
36 | .showHeader(object, version = m[["version"]])
37 | filtered <- isSubset("filterCells", names(m))
38 | showSlotInfo(list(
39 | uploadDir = m[["uploadDir"]],
40 | dates = as.character(c(
41 | bcbio = m[["runDate"]],
42 | R = m[["date"]]
43 | )),
44 | level = m[["level"]],
45 | sampleMetadataFile = m[["sampleMetadataFile"]],
46 | organism = m[["organism"]],
47 | gffFile = m[["gffFile"]],
48 | annotationHub = rrm[["annotationHub"]],
49 | ensemblRelease = rrm[["release"]],
50 | genomeBuild = rrm[["build"]],
51 | interestingGroups = m[["interestingGroups"]],
52 | filtered = filtered
53 | ))
54 | ## Extend the SingleCellExperiment method.
55 | sce <- as(object, "SingleCellExperiment")
56 | cat(capture.output(show(sce)), sep = "\n")
57 | }
58 |
59 |
60 |
61 | #' @rdname show
62 | #' @export
63 | setMethod(
64 | f = "show",
65 | signature = signature(object = "bcbioSingleCell"),
66 | definition = `show,bcbioSingleCell`
67 | )
68 |
--------------------------------------------------------------------------------
/inst/extdata/indrops/multiplexed-AAAAAAAA/multiplexed-AAAAAAAA.mtx.colnames:
--------------------------------------------------------------------------------
1 | AAACACTA-CTTCGATT
2 | AAACTACA-CCACATTA
3 | AACTGCCT-GCAAGGAC
4 | AAGAAGGT-TCTGTGGT
5 | AAGCCTTC-TAAATAGG
6 | AATAAGGA-CCACATTA
7 | AATCGAAG-CCCAAGCA
8 | AATCGTTC-CCCTAACC
9 | ACCCTCAA-CTGCGTTG
10 | ACCTGAAG-GAGCGGTA
11 | ACTAATTG-CTTTAATC
12 | ACTAGAGC-TCGACACC
13 | AGAAACCA-ATACTCTT
14 | AGCTCCAC-CCTGACAC
15 | AGGTAAGC-TCCCAATC
16 | ATATGCAA-GGCGGTTT
17 | ATCAATCG-GTTGTCAT
18 | ATCGCGCT-AGAGGTGG
19 | CAACGCAG-CTCGCGTA
20 | CACAACCT-GGAGAAGC
21 | CCATGCAT-TTCCGCTC
22 | CCCGTTCT-AAAGCCTA
23 | CCGAGATC-ATGGGCAC
24 | CCGATACG-CAAGAGGG
25 | CCGGAAAT-GTTGTCAT
26 | CCTACGCT-AGCAGAAC
27 | CGTGTGTT-AGGAAGAC
28 | CGTTTCGT-GACAGATA
29 | CTAGCACG-AATCGGGT
30 | CTAGTAGG-TTGAGGGT
31 | CTCACATC-ACCCACGA
32 | CTCCTCCA-CGTATTTC
33 | CTCCTCCA-TGTACACG
34 | CTCTATAG-GCAAAGCC
35 | CTGTCGCA-TTTAACAG
36 | CTGTTAAA-AATGAATG
37 | CTGTTAAA-GACAGATA
38 | CTTAGGCC-AGAAGTCC
39 | CTTAGTGT-TCCAGGGA
40 | CTTCTACG-CTTCTTCG
41 | CTTTATCC-CTACCGTT
42 | GACACCTG-CTCAGAAT
43 | GACTAGCG-GAAGTGCC
44 | GAGAAACC-ACAGCGGA
45 | GAGTGTAC-ACGCAGAG
46 | GATTAAAG-ACTAGCCA
47 | GATTACTT-GAGAATTG
48 | GATTTCCC-ATGTTGGC
49 | GCAAACTG-CTTCAGGT
50 | GCCAACAT-CGTGGATA
51 | GCCTGGTA-CGCTCTCA
52 | GCGCTGAT-GACAAAGG
53 | GCGTGCAA-TCTGTGGT
54 | GGAACGAA-TTGCACGC
55 | GGCGACAA-TTCCGAGT
56 | GGCTTTGC-AACCCTTG
57 | GGCTTTGC-AGCGAAGT
58 | GGCTTTGC-TTAGGACC
59 | GGGATTAC-AAATGTCG
60 | GGTTGAGA-CTGTCTGG
61 | GGTTGAGA-GAGAGTAT
62 | GTAAGCCG-CGATTGAT
63 | GTAATCTG-CGCTAATA
64 | GTACGCTT-CCCAAGCA
65 | GTACGGAC-CAACAAAT
66 | GTCCACTA-CTTCTGGA
67 | GTCCACTA-GACAAAGG
68 | GTCCGTCA-ATACTCTT
69 | GTCTAATC-GGCCCTTA
70 | GTGAACTC-CAAGAGGG
71 | GTGAGGCA-CAGTTTGC
72 | GTGATAAA-CGCTCTCA
73 | GTGCCCAT-GTGTCGGA
74 | GTGGTGCT-GGAGAAGC
75 | GTTACTAG-AGAAGTTA
76 | GTTACTAG-CCCTTGGT
77 | GTTCTGCT-TGGCTACC
78 | TAATCCAT-CGGAATTT
79 | TACCGCTC-CCCATAGC
80 | TACGCGAG-TGTAGTTT
81 | TACGTTCG-TTGATCTA
82 | TAGGCTTT-CGGACAAC
83 | TAGTAGCC-TAGTGTTT
84 | TATTAGCG-CCCTAACC
85 | TCAGCCTC-TGCAAGGG
86 | TCCGACAC-GGGAGGTA
87 | TCGCAATC-CGAACGTA
88 | TCGGTCAT-AGCACCAC
89 | TCTAAACT-CTCTTGAC
90 | TCTTTGAC-CGCTCTCA
91 | TGAGAGCG-CCTATTCA
92 | TGAGAGCG-GAAGTGCC
93 | TGAGCACA-TGCTATTT
94 | TGCGACTA-CCGTGTTT
95 | TGCGACTA-TTCACATA
96 | TGCTTCAT-GCAGGGTA
97 | TGCTTGGG-CAACAAAT
98 | TGGACGGA-TTGTTTAC
99 | TGGGAATT-ATATAGGA
100 | TGTTATCA-ACGCAGAG
101 |
--------------------------------------------------------------------------------
/R/internal-barcodes.R:
--------------------------------------------------------------------------------
1 | #' Raw reads per cellular barcode
2 | #'
3 | #' Read counts prior to UMI disambiguation.
4 | #'
5 | #' @author Michael Steinbaugh
6 | #' @keywords internal
7 | #' @note Updated 2019-08-08.
8 | #' @noRd
9 | #'
10 | #' @param list `list`.
11 | #' Cellular barcodes per sample.
12 | #'
13 | #' @return `integer`.
14 | #' Cell identifiers are the names and raw reads are the values.
15 | .nRead <- function(list) {
16 | assert(
17 | is.list(list),
18 | hasNames(list),
19 | is.integer(list[[1L]]),
20 | hasNames(list[[1L]])
21 | )
22 | if (hasLength(list, n = 1L)) {
23 | list[[1L]]
24 | } else {
25 | ## This will unlist using a "." separator.
26 | ## Renaming "." to "_" in names.
27 | x <- unlist(list, use.names = TRUE)
28 | names(x) <- makeNames(names(x))
29 | x
30 | }
31 | }
32 |
33 |
34 |
35 | #' Obtain the raw, unfiltered cellular barcode read counts
36 | #'
37 | #' @note Updated 2023-12-04.
38 | #' @noRd
39 | #'
40 | #' @return `DataFrame`.
41 | .rawMetrics <- function(object) {
42 | assert(is(object, "bcbioSingleCell"))
43 | list <- metadata(object)[["cellularBarcodes"]]
44 | assert(
45 | is.list(list),
46 | msg = sprintf(
47 | fmt = paste(
48 | "Object does not contain unfiltered cellular barcodes.",
49 | "Has {.fun %s} been applied?",
50 | "This step drops them."
51 | ),
52 | "filterCells"
53 | )
54 | )
55 | assert(
56 | is.list(list),
57 | hasNames(list)
58 | )
59 | list <- Map(
60 | sampleId = names(list),
61 | reads = list,
62 | f = function(sampleId, reads) {
63 | DataFrame(
64 | "sampleId" = as.factor(sampleId),
65 | "cellId" = as.factor(names(reads)),
66 | "nRead" = reads,
67 | row.names = NULL
68 | )
69 | }
70 | )
71 | data <- unlist(DataFrameList(list), use.names = FALSE)
72 | sampleData <- sampleData(object)
73 | sampleData[["sampleId"]] <- as.factor(rownames(sampleData))
74 | data <- leftJoin(data, sampleData, by = "sampleId")
75 | assert(
76 | is(data, "DataFrame"),
77 | !hasRownames(data),
78 | isSubset(c("sampleId", "cellId", "nRead"), colnames(data)),
79 | is.integer(data[["nRead"]])
80 | )
81 | data
82 | }
83 |
--------------------------------------------------------------------------------
/man/extract.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/extract-methods.R
3 | \name{extract}
4 | \alias{extract}
5 | \alias{[,bcbioSingleCell,ANY,ANY,ANY-method}
6 | \title{Extract or replace parts of an object}
7 | \usage{
8 | \S4method{[}{bcbioSingleCell,ANY,ANY,ANY}(x, i, j, ..., drop = FALSE)
9 | }
10 | \arguments{
11 | \item{x}{Object.}
12 |
13 | \item{i}{Indices specifying elements to extract or replace. Indices are \code{numeric} or
14 | \code{character} vectors, empty (\code{missing}), or \code{NULL}.
15 |
16 | For more information:
17 |
18 | \if{html}{\out{
}}\preformatted{help(topic = "Extract", package = "base")
19 | }\if{html}{\out{
}}}
20 |
21 | \item{j}{Indices specifying elements to extract or replace. Indices are \code{numeric} or
22 | \code{character} vectors, empty (\code{missing}), or \code{NULL}.
23 |
24 | For more information:
25 |
26 | \if{html}{\out{}}\preformatted{help(topic = "Extract", package = "base")
27 | }\if{html}{\out{
}}}
28 |
29 | \item{...}{Additional arguments.}
30 |
31 | \item{drop}{For matrices and arrays. If \code{TRUE} the result is
32 | coerced to the lowest possible dimension (see the examples). This
33 | only works for extracting elements, not for the replacement. See
34 | \code{\link[base]{drop}} for further details.
35 | }
36 | }
37 | \value{
38 | \code{bcbioSingleCell}.
39 | }
40 | \description{
41 | Extract genes by row and cells by column.
42 | }
43 | \details{
44 | Refer to \code{cellToSample()} and \code{selectSamples()} if sample-level extraction is
45 | desired. Note that \code{sampleId} is slotted into \code{colData} and defines the
46 | cell-to-sample mappings.
47 |
48 | Unfiltered cellular barcode distributions for the entire dataset, including
49 | cells not kept in the matrix will be dropped in favor of the \code{nCount} column
50 | of \code{colData()}.
51 | }
52 | \note{
53 | Updated 2021-09-10.
54 | }
55 | \examples{
56 | ## bcbioSingleCell ====
57 | data(bcb)
58 |
59 | cells <- head(colnames(bcb))
60 | head(cells)
61 | genes <- head(rownames(bcb))
62 | head(genes)
63 |
64 | ## Subset by cell identifiers.
65 | bcb[, cells]
66 |
67 | ## Subset by genes.
68 | bcb[genes, ]
69 |
70 | ## Subset by both genes and cells.
71 | bcb[genes, cells]
72 | }
73 | \references{
74 | Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988)
75 | \emph{The New S Language}.
76 | Wadsworth & Brooks/Cole.
77 | }
78 | \author{
79 | Michael Steinbaugh
80 | }
81 |
--------------------------------------------------------------------------------
/.lintr:
--------------------------------------------------------------------------------
1 | linters: linters_with_defaults(
2 | # Updated 2023-07-24.
3 | # > help(topic = "linters", package = "lintr")
4 | T_and_F_symbol_linter(),
5 | absolute_path_linter(),
6 | any_duplicated_linter(),
7 | any_is_na_linter(),
8 | assignment_linter(),
9 | backport_linter(),
10 | brace_linter(),
11 | class_equals_linter(),
12 | commas_linter(),
13 | commented_code_linter(),
14 | condition_message_linter(),
15 | conjunct_test_linter(),
16 | consecutive_assertion_linter(),
17 | cyclocomp_linter(complexity_limit = 30L),
18 | duplicate_argument_linter(),
19 | equals_na_linter(),
20 | expect_comparison_linter(),
21 | expect_identical_linter(),
22 | expect_length_linter(),
23 | expect_named_linter(),
24 | expect_not_linter(),
25 | expect_null_linter(),
26 | expect_s3_class_linter(),
27 | expect_s4_class_linter(),
28 | expect_true_false_linter(),
29 | extraction_operator_linter(),
30 | function_left_parentheses_linter(),
31 | ifelse_censor_linter(),
32 | implicit_integer_linter(),
33 | indentation_linter(indent = 4L, hanging_indent_style = "tidy"),
34 | infix_spaces_linter(),
35 | inner_combine_linter(),
36 | line_length_linter(length = 80L),
37 | literal_coercion_linter(),
38 | missing_argument_linter(),
39 | missing_package_linter(),
40 | namespace_linter(),
41 | nested_ifelse_linter(),
42 | # This is currently returning too many false positives.
43 | # > nonportable_path_linter = NULL,
44 | numeric_leading_zero_linter(),
45 | object_length_linter(length = 40L),
46 | object_name_linter(styles = "camelCase"),
47 | object_usage_linter(),
48 | outer_negation_linter(),
49 | package_hooks_linter(),
50 | paren_body_linter(),
51 | paste_linter(),
52 | pipe_call_linter(),
53 | pipe_continuation_linter(),
54 | quotes_linter(),
55 | redundant_ifelse_linter(),
56 | regex_subset_linter(),
57 | semicolon_linter(),
58 | seq_linter(),
59 | spaces_inside_linter(),
60 | spaces_left_parentheses_linter(),
61 | sprintf_linter(),
62 | system_file_linter(),
63 | todo_comment_linter(),
64 | trailing_blank_lines_linter(),
65 | trailing_whitespace_linter(),
66 | undesirable_function_linter(),
67 | undesirable_operator_linter(),
68 | unnecessary_concatenation_linter(),
69 | unreachable_code_linter(),
70 | vector_logic_linter(),
71 | whitespace_linter(),
72 | yoda_test_linter())
73 | exclude: "# nolint"
74 | exclude_start: "# nolint start"
75 | exclude_end: "# nolint end"
76 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: bcbioSingleCell
2 | Title: Bcbio Single-Cell RNA-Seq
3 | Description: R package for bcbio single-cell RNA-seq analysis.
4 | Version: 0.7.1
5 | Date: 2023-12-04
6 | Authors@R: c(
7 | person(
8 | given = "Michael",
9 | family = "Steinbaugh",
10 | email = "mike@steinbaugh.com",
11 | role = c("aut", "cre"),
12 | comment = c(ORCID = "0000-0002-2403-2221")
13 | ),
14 | person(
15 | given = "Rory",
16 | family = "Kirchner",
17 | email = "roryk@alum.mit.edu",
18 | role = "aut",
19 | comment = c(ORCID = "0000-0003-4814-5885")
20 | ),
21 | person(
22 | given = "Mary",
23 | family = "Piper",
24 | email = "mary.piper@gmail.com",
25 | role = "aut",
26 | comment = c(ORCID = "0000-0003-2699-3840")
27 | ),
28 | person(
29 | given = "Victor",
30 | family = "Barrera",
31 | email = "barrera.vic@gmail.com",
32 | role = "aut",
33 | comment = c(ORCID = "0000-0003-0590-4634")
34 | ),
35 | person(
36 | given = "Shannan",
37 | family = "Ho Sui",
38 | email = "shosui@hsph.harvard.edu",
39 | role = "aut",
40 | comment = c(ORCID = "0000-0002-6191-4709")
41 | ),
42 | person(
43 | given = "Harvard Chan Bioinformatics Core",
44 | email = "bioinformatics@hsph.harvard.edu",
45 | role = c("cph", "fnd")
46 | ),
47 | person(
48 | given = "Acid Genomics",
49 | role = c("cph", "fnd")
50 | ))
51 | URL:
52 | https://r.acidgenomics.com/packages/bcbiosinglecell/,
53 | https://github.com/hbc/bcbioSingleCell/
54 | BugReports: https://github.com/hbc/bcbioSingleCell/issues/
55 | License: AGPL-3
56 | Encoding: UTF-8
57 | LazyData: false
58 | Depends: R (>= 4.3)
59 | Imports:
60 | AcidBase (>= 0.7.0),
61 | AcidCLI (>= 0.3.0),
62 | AcidExperiment (>= 0.5.0),
63 | AcidGenerics (>= 0.7.1),
64 | AcidGenomes (>= 0.6.0),
65 | AcidMarkdown (>= 0.3.0),
66 | AcidPlots (>= 0.7.0),
67 | AcidPlyr (>= 0.5.0),
68 | AcidSingleCell (>= 0.4.0),
69 | BiocGenerics (>= 0.46.0),
70 | IRanges (>= 2.34.0),
71 | S4Vectors (>= 0.38.0),
72 | SingleCellExperiment (>= 1.22.0),
73 | SummarizedExperiment (>= 1.30.0),
74 | bcbioBase (>= 0.9.0),
75 | ggplot2 (>= 3.4.3),
76 | ggridges (>= 0.5.4),
77 | goalie (>= 0.7.1),
78 | pipette (>= 0.14.0),
79 | syntactic (>= 0.7.0),
80 | methods,
81 | parallel,
82 | utils
83 | Suggests:
84 | AcidDevTools (>= 0.7.1),
85 | Biostrings (>= 2.68.0),
86 | basejump (>= 0.18.0),
87 | rmarkdown (>= 2.25),
88 | testthat (>= 3.1.10),
89 | graphics
90 | Additional_repositories: https://r.acidgenomics.com
91 | Config/testthat/edition: 3
92 | Config/testthat/parallel: true
93 | Roxygen: list(markdown = TRUE)
94 | RoxygenNote: 7.2.3
95 |
--------------------------------------------------------------------------------
/R/extract-methods.R:
--------------------------------------------------------------------------------
1 | #' Extract or replace parts of an object
2 | #'
3 | #' Extract genes by row and cells by column.
4 | #'
5 | #' Refer to `cellToSample()` and `selectSamples()` if sample-level extraction is
6 | #' desired. Note that `sampleId` is slotted into `colData` and defines the
7 | #' cell-to-sample mappings.
8 | #'
9 | #' Unfiltered cellular barcode distributions for the entire dataset, including
10 | #' cells not kept in the matrix will be dropped in favor of the `nCount` column
11 | #' of `colData()`.
12 | #'
13 | #' @name extract
14 | #' @author Michael Steinbaugh
15 | #' @inherit base::Extract params references
16 | #' @note Updated 2021-09-10.
17 | #'
18 | #' @inheritParams AcidRoxygen::params
19 | #'
20 | #' @return `bcbioSingleCell`.
21 | #'
22 | #' @examples
23 | #' ## bcbioSingleCell ====
24 | #' data(bcb)
25 | #'
26 | #' cells <- head(colnames(bcb))
27 | #' head(cells)
28 | #' genes <- head(rownames(bcb))
29 | #' head(genes)
30 | #'
31 | #' ## Subset by cell identifiers.
32 | #' bcb[, cells]
33 | #'
34 | #' ## Subset by genes.
35 | #' bcb[genes, ]
36 | #'
37 | #' ## Subset by both genes and cells.
38 | #' bcb[genes, cells]
39 | NULL
40 |
41 |
42 |
43 | ## Updated 2019-08-20.
44 | `extract,bcbioSingleCell` <- # nolint
45 | function(x, i, j, ..., drop = FALSE) {
46 | validObject(x)
47 | assert(identical(drop, FALSE))
48 | ## Genes (rows).
49 | if (missing(i)) {
50 | i <- seq_len(nrow(x))
51 | }
52 | ## Cells (columns).
53 | if (missing(j)) {
54 | j <- seq_len(ncol(x))
55 | }
56 | ## Determine whether we should stash subset in metadata.
57 | if (identical(x = dim(x), y = c(length(i), length(j)))) {
58 | subset <- FALSE
59 | } else {
60 | subset <- TRUE
61 | }
62 | ## Subset using SCE method.
63 | sce <- as(x, "SingleCellExperiment")
64 | sce <- sce[i, j, drop = drop]
65 | ## Early return original object, if unmodified.
66 | if (identical(assay(sce), assay(x))) {
67 | return(x)
68 | }
69 | ## Metadata ------------------------------------------------------------
70 | metadata <- metadata(sce)
71 | if (isTRUE(subset)) {
72 | metadata[["cellularBarcodes"]] <- NULL
73 | metadata[["filterCells"]] <- NULL
74 | metadata[["filterGenes"]] <- NULL
75 | metadata[["subset"]] <- TRUE
76 | }
77 | metadata <- Filter(f = Negate(is.null), x = metadata)
78 | metadata(sce) <- metadata
79 | ## Return --------------------------------------------------------------
80 | sce <- droplevels2(sce)
81 | new(Class = "bcbioSingleCell", sce)
82 | }
83 |
84 |
85 |
86 | #' @rdname extract
87 | #' @export
88 | setMethod(
89 | "[",
90 | signature(
91 | x = "bcbioSingleCell",
92 | i = "ANY",
93 | j = "ANY",
94 | drop = "ANY"
95 | ),
96 | definition = `extract,bcbioSingleCell`
97 | )
98 |
--------------------------------------------------------------------------------
/man/bcbioSingleCell.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AllGenerators.R
3 | \name{bcbioSingleCell}
4 | \alias{bcbioSingleCell}
5 | \title{bcbio single-cell RNA-seq data set}
6 | \usage{
7 | bcbioSingleCell(
8 | uploadDir,
9 | sampleMetadataFile = NULL,
10 | organism = NULL,
11 | ensemblRelease = NULL,
12 | genomeBuild = NULL,
13 | gffFile = NULL,
14 | transgeneNames = NULL,
15 | interestingGroups = "sampleName"
16 | )
17 | }
18 | \arguments{
19 | \item{uploadDir}{\code{character(1)}.
20 | Final upload directory path.}
21 |
22 | \item{sampleMetadataFile}{\code{character(1)}.
23 | Sample metadata file path. CSV or TSV is preferred, but Excel worksheets
24 | are also supported. Check the documentation for conventions and required
25 | columns.}
26 |
27 | \item{organism}{\code{character(1)}.
28 | Full Latin organism name (e.g. \code{"Homo sapiens"}).}
29 |
30 | \item{ensemblRelease}{\code{integer(1)}.
31 | Ensembl release version (e.g. \code{100}). We recommend setting this value if
32 | possible, for improved reproducibility. When left unset, the latest release
33 | available via AnnotationHub/ensembldb is used. Note that the latest version
34 | available can vary, depending on the versions of AnnotationHub and
35 | ensembldb in use.}
36 |
37 | \item{genomeBuild}{\code{character(1)}.
38 | Ensembl genome build assembly name (e.g. \code{"GRCh38"}). If set \code{NULL},
39 | defaults to the most recent build available.
40 | \emph{Note:} don't pass in UCSC build IDs (e.g. \code{"hg38"}).}
41 |
42 | \item{gffFile}{\code{character(1)}.
43 | GFF/GTF (General Feature Format) file.
44 | Generally, we recommend using GTF (GFFv2) instead of GFFv3.}
45 |
46 | \item{transgeneNames}{\code{character}.
47 | Vector indicating which assay rows denote transgenes (e.g. EGFP, TDTOMATO).}
48 |
49 | \item{interestingGroups}{\code{character}.
50 | Groups of interest to use for visualization.
51 | Corresponds to factors describing the columns of the object.}
52 | }
53 | \value{
54 | \code{bcbioSingleCell}.
55 | }
56 | \description{
57 | \code{bcbioSingleCell} is an S4 class that extends \code{SingleCellExperiment}, and is
58 | designed to store a bcbio single-cell RNA-seq analysis. This class contains
59 | read counts saved as a sparse matrix (\code{sparseMatrix}), sample metadata, and
60 | cell quality control metrics.
61 | }
62 | \note{
63 | Updated 2023-09-21.
64 | }
65 | \section{Remote data}{
66 |
67 |
68 | When working in RStudio, we recommend connecting to the bcbio-nextgen run
69 | directory as a remote connection over
70 | \href{https://github.com/osxfuse/osxfuse/wiki/SSHFS}{sshfs}.
71 | }
72 |
73 | \examples{
74 | uploadDir <- system.file("extdata/indrops", package = "bcbioSingleCell")
75 |
76 | x <- bcbioSingleCell(uploadDir)
77 | print(x)
78 |
79 | x <- bcbioSingleCell(
80 | uploadDir = uploadDir,
81 | sampleMetadataFile = file.path(uploadDir, "metadata.csv")
82 | )
83 | print(x)
84 | }
85 | \seealso{
86 | \itemize{
87 | \item \code{SingleCellExperiment::SingleCellExperiment()}.
88 | \item \code{.S4methods(class = "bcbioSingleCell")}.
89 | }
90 | }
91 | \author{
92 | Michael Steinbaugh
93 | }
94 |
--------------------------------------------------------------------------------
/inst/extdata/indrops/multiplexed-AAAAAAAA/multiplexed-AAAAAAAA-barcodes.tsv:
--------------------------------------------------------------------------------
1 | "AAACACTA-CTTCGATT" 71802
2 | "AAACTACA-CCACATTA" 76590
3 | "AACTGCCT-GCAAGGAC" 106100
4 | "AAGAAGGT-TCTGTGGT" 74449
5 | "AAGCCTTC-TAAATAGG" 105540
6 | "AATAAGGA-CCACATTA" 35645
7 | "AATCGAAG-CCCAAGCA" 68099
8 | "AATCGTTC-CCCTAACC" 84804
9 | "ACCCTCAA-CTGCGTTG" 66194
10 | "ACCTGAAG-GAGCGGTA" 57162
11 | "ACTAATTG-CTTTAATC" 63990
12 | "ACTAGAGC-TCGACACC" 82100
13 | "AGAAACCA-ATACTCTT" 56283
14 | "AGCTCCAC-CCTGACAC" 58527
15 | "AGGTAAGC-TCCCAATC" 82639
16 | "ATATGCAA-GGCGGTTT" 68647
17 | "ATCAATCG-GTTGTCAT" 72148
18 | "ATCGCGCT-AGAGGTGG" 77010
19 | "CAACGCAG-CTCGCGTA" 78838
20 | "CACAACCT-GGAGAAGC" 51457
21 | "CCATGCAT-TTCCGCTC" 68447
22 | "CCCGTTCT-AAAGCCTA" 106986
23 | "CCGAGATC-ATGGGCAC" 90386
24 | "CCGATACG-CAAGAGGG" 131259
25 | "CCGGAAAT-GTTGTCAT" 61199
26 | "CCTACGCT-AGCAGAAC" 86060
27 | "CGTGTGTT-AGGAAGAC" 124799
28 | "CGTTTCGT-GACAGATA" 76444
29 | "CTAGCACG-AATCGGGT" 90582
30 | "CTAGTAGG-TTGAGGGT" 66021
31 | "CTCACATC-ACCCACGA" 57006
32 | "CTCCTCCA-CGTATTTC" 94587
33 | "CTCCTCCA-TGTACACG" 58778
34 | "CTCTATAG-GCAAAGCC" 82448
35 | "CTGTCGCA-TTTAACAG" 111111
36 | "CTGTTAAA-AATGAATG" 51878
37 | "CTGTTAAA-GACAGATA" 93666
38 | "CTTAGGCC-AGAAGTCC" 111183
39 | "CTTAGTGT-TCCAGGGA" 57623
40 | "CTTCTACG-CTTCTTCG" 67106
41 | "CTTTATCC-CTACCGTT" 92126
42 | "GACACCTG-CTCAGAAT" 49189
43 | "GACTAGCG-GAAGTGCC" 91684
44 | "GAGAAACC-ACAGCGGA" 50844
45 | "GAGTGTAC-ACGCAGAG" 64944
46 | "GATTAAAG-ACTAGCCA" 71098
47 | "GATTACTT-GAGAATTG" 65073
48 | "GATTTCCC-ATGTTGGC" 84194
49 | "GCAAACTG-CTTCAGGT" 52167
50 | "GCCAACAT-CGTGGATA" 38063
51 | "GCCTGGTA-CGCTCTCA" 51643
52 | "GCGCTGAT-GACAAAGG" 47118
53 | "GCGTGCAA-TCTGTGGT" 66552
54 | "GGAACGAA-TTGCACGC" 52399
55 | "GGCGACAA-TTCCGAGT" 61851
56 | "GGCTTTGC-AACCCTTG" 54214
57 | "GGCTTTGC-AGCGAAGT" 70085
58 | "GGCTTTGC-TTAGGACC" 92908
59 | "GGGATTAC-AAATGTCG" 60698
60 | "GGTTGAGA-CTGTCTGG" 101810
61 | "GGTTGAGA-GAGAGTAT" 126962
62 | "GTAAGCCG-CGATTGAT" 92347
63 | "GTAATCTG-CGCTAATA" 79897
64 | "GTACGCTT-CCCAAGCA" 77318
65 | "GTACGGAC-CAACAAAT" 70943
66 | "GTCCACTA-CTTCTGGA" 65666
67 | "GTCCACTA-GACAAAGG" 59151
68 | "GTCCGTCA-ATACTCTT" 74739
69 | "GTCTAATC-GGCCCTTA" 77431
70 | "GTGAACTC-CAAGAGGG" 118550
71 | "GTGAGGCA-CAGTTTGC" 58023
72 | "GTGATAAA-CGCTCTCA" 47077
73 | "GTGCCCAT-GTGTCGGA" 61911
74 | "GTGGTGCT-GGAGAAGC" 84629
75 | "GTTACTAG-AGAAGTTA" 57592
76 | "GTTACTAG-CCCTTGGT" 94832
77 | "GTTCTGCT-TGGCTACC" 99905
78 | "TAATCCAT-CGGAATTT" 64021
79 | "TACCGCTC-CCCATAGC" 62870
80 | "TACGCGAG-TGTAGTTT" 73071
81 | "TACGTTCG-TTGATCTA" 80071
82 | "TAGGCTTT-CGGACAAC" 110058
83 | "TAGTAGCC-TAGTGTTT" 79818
84 | "TATTAGCG-CCCTAACC" 52979
85 | "TCAGCCTC-TGCAAGGG" 125515
86 | "TCCGACAC-GGGAGGTA" 120160
87 | "TCGCAATC-CGAACGTA" 113926
88 | "TCGGTCAT-AGCACCAC" 104007
89 | "TCTAAACT-CTCTTGAC" 70414
90 | "TCTTTGAC-CGCTCTCA" 96402
91 | "TGAGAGCG-CCTATTCA" 68229
92 | "TGAGAGCG-GAAGTGCC" 87673
93 | "TGAGCACA-TGCTATTT" 84576
94 | "TGCGACTA-CCGTGTTT" 34262
95 | "TGCGACTA-TTCACATA" 101690
96 | "TGCTTCAT-GCAGGGTA" 128690
97 | "TGCTTGGG-CAACAAAT" 69455
98 | "TGGACGGA-TTGTTTAC" 118717
99 | "TGGGAATT-ATATAGGA" 94737
100 | "TGTTATCA-ACGCAGAG" 50886
101 |
--------------------------------------------------------------------------------
/R/package.R:
--------------------------------------------------------------------------------
1 | #' bcbioSingleCell
2 | #'
3 | #' Import and analyze [bcbio](https://bcbio-nextgen.readthedocs.io/) single-cell
4 | #' RNA-seq data.
5 | #'
6 | #' @aliases NULL
7 | #' @keywords internal
8 | "_PACKAGE"
9 |
10 |
11 |
12 | ## S4 classes ==================================================================
13 |
14 | #' @importClassesFrom SingleCellExperiment SingleCellExperiment
15 | NULL
16 |
17 |
18 |
19 | ## S4 generics and methods =====================================================
20 |
21 | #' @importFrom AcidExperiment sampleNames
22 | #' @importFrom AcidGenerics calculateMetrics camelCase droplevels2
23 | #' interestingGroups interestingGroups<- leftJoin makeDimnames makeLabel
24 | #' makeNames metrics plotReadsPerCell sampleData
25 | #' @importFrom BiocGenerics counts updateObject
26 | #' @importFrom S4Vectors cbind do.call droplevels lapply mcols mcols<-
27 | #' metadata metadata<-
28 | #' @importFrom SummarizedExperiment assayNames assay assays assays<- colData
29 | #' colData<- rowData rowData<- rowRanges rowRanges<-
30 | #' @importFrom methods coerce show
31 | #' @importFrom pipette import
32 | NULL
33 |
34 | #' @importMethodsFrom AcidExperiment calculateMetrics interestingGroups
35 | #' interestingGroups<- metrics sampleData sampleNames
36 | #' @importMethodsFrom AcidPlyr leftJoin
37 | #' @importMethodsFrom AcidSingleCell sampleData
38 | #' @importMethodsFrom pipette import
39 | #' @importMethodsFrom syntactic camelCase makeDimnames makeLabel makeNames
40 | NULL
41 |
42 |
43 |
44 | ## Standard functions ==========================================================
45 |
46 | #' @importFrom AcidBase metricsCols printString realpath showSlotInfo
47 | #' standardizeCall strMatch
48 | #' @importFrom AcidCLI abort alert alertSuccess alertWarning h1 h2
49 | #' separator toInlineString
50 | #' @importFrom AcidExperiment detectLanes droplevels2 importSampleData
51 | #' matchInterestingGroups minimalSampleData
52 | #' @importFrom AcidGenomes emptyRanges makeGRangesFromEnsembl makeGRangesFromGff
53 | #' @importFrom AcidMarkdown markdownPlots
54 | #' @importFrom AcidPlots !!! .data acid_geom_abline acid_geom_label
55 | #' acid_geom_label_average acid_geom_label_repel acid_scale_color_discrete
56 | #' acid_scale_fill_discrete syms
57 | #' @importFrom AcidSingleCell makeSingleCellExperiment mapCellsToSamples
58 | #' @importFrom IRanges DataFrameList
59 | #' @importFrom S4Vectors DataFrame SimpleList
60 | #' @importFrom bcbioBase getBarcodeCutoffFromCommands getGtfFileFromYaml
61 | #' getLevelFromCommands getSampleDataFromYaml getUmiTypeFromCommands
62 | #' importDataVersions importProgramVersions projectDir runDate sampleDirs
63 | #' @importFrom ggplot2 aes facet_wrap geom_boxplot geom_histogram geom_step
64 | #' geom_violin ggplot labs scale_x_continuous scale_y_continuous stat_ecdf vars
65 | #' @importFrom ggridges geom_density_ridges
66 | #' @importFrom goalie allAreDirectories allAreFiles areDisjointSets areSetEqual
67 | #' assert hasLength hasNames hasRownames hasValidDimnames isADirectory isAFile
68 | #' isAUrl isAny isCharacter isDirectory isFile isFlag isInt isString isSubset
69 | #' requireNamespaces validate validateClasses
70 | #' @importFrom methods .hasSlot as as<- is new setClass slot slot<- validObject
71 | #' @importFrom parallel mcMap mclapply
72 | #' @importFrom utils capture.output packageName packageVersion
73 | NULL
74 |
--------------------------------------------------------------------------------
/todo.org:
--------------------------------------------------------------------------------
1 | #+TITLE: bcbioSingleCell
2 | #+STARTUP: content
3 | * Development
4 | ** TODO Need to address this note now popping up in build checks.
5 | Note: found 88 marked UTF-8 strings
6 | ** TODO Need to ensure R Markdown renders correctly without goalie issues.
7 | ** TODO Require valid names in all slots.
8 | ** TODO Add monocle celldataset coercion method.
9 | ** TODO Need to run BFG on the repo and remove old example R data.
10 | ** TODO `bcbioSingleCell()`: Explain genome annotation priority in better detail, following the logic defined in `bcbioRNASeq()`.
11 | ** TODO Improve support for transcript-level counts?
12 | Should we even allow this at this point?
13 | ** TODO Vignette using example bcbio data and 10X pbmc data.
14 | ** TODO Look into BarcodeInflectionsPlot, now in Seurat 3.
15 | * pointillism
16 | ** TODO Move markers out of Google Sheets and into the package, as simple CSV files.
17 | ** TODO Use pseudobulk approach for DE with sample replicates
18 | ** TODO `plotFeature()`: Add `pointsAsNumbers` argument support.
19 | ** TODO `diffExp()`: Add internal support for accessing design with `design()` generic.
20 | ** TODO `KnownMarkers` S4: switch from DataFrame to SplitDataFrameList inheritance?
21 | ** TODO Improve SeuratMarkers class.
22 | This needs to error if the input data.frame contains `cluster` column.
23 | Consider only using `SeuratMarkers` as a single generator but returning `SeuratMarkers` or `SeuratMarkersPerCluster` automatically.
24 | Allow generator to work with empty ranges?
25 | ** TODO Improve plotFeature.
26 | Add pointsAsNumbers support. Is there a way to facet wrap these instead of using plot grid? Then we can easily support a title. We're using continuous color here, so the formal won't match… argument "color" is missing, with no default.
27 | ** TODO `findMarkers()`: Consider adding `progress` option or BPPARAM support.
28 | ** TODO Switch to `Misc()` to access the `@misc` slot?
29 | ** TODO Consider splitting `SeuratMarkers` class into a `DataFrameList`, per cluster.
30 | ** TODO Stacked bar plot for relative cell abundances per cluster or sample type.
31 | ** TODO Put the resolution in the plot title for t-SNE.
32 | ** TODO Add `write = TRUE` argument support for marking looping functions, to write CSVs automatically to disk.
33 | ** TODO Improve assert checks for `findMarkers()`
34 | * syntactic
35 | ** TODO saveData: Need to harden against accidential pipe to this function.
36 | ** TODO Check that renaming mode renames symlinks themselves, not the resolved file.
37 | I ran into this attempting to name album artist symlinks in iTunes.
38 | ** TODO VIGNETTE. Seriously, work on it.
39 | ** TODO Functions need to convert accent marks if possible. Particularly useful for file names.
40 | ** TODO Ensure nM returns as nm instead of n_m.
41 | ** TODO Rename mode: Need to look for and strip ' - ' out automatically.
42 | Otherwise this will return '-minus-' in the file name, which is annoying. Need to improve the internal sanitization in R for this.
43 | ** TODO Rename mode needs to also make extension lowercase.
44 | ** TODO Need to add recursive rename mode support (koopa only?).
45 | Maybe this is easiest to implement on the R side of things. Need to rename files first, then directories, sorted. Need to work from lowest levels up.
46 | ** TODO nMito, nCoding looks weird with makeLabel plural.
47 | See bcbioSingleCell example.
48 |
--------------------------------------------------------------------------------
/R/AllClasses.R:
--------------------------------------------------------------------------------
1 | #' bcbio single-cell RNA-seq data set
2 | #'
3 | #' `bcbioSingleCell` is an S4 class that extends `SingleCellExperiment`, and is
4 | #' designed to store a bcbio single-cell RNA-seq analysis. This class contains
5 | #' read counts saved as a sparse matrix (`sparseMatrix`), sample metadata, and
6 | #' cell quality control metrics.
7 | #'
8 | #' @author Michael Steinbaugh, Rory Kirchner
9 | #' @note Updated 2022-05-09.
10 | #' @export
11 | setClass(
12 | Class = "bcbioSingleCell",
13 | contains = "SingleCellExperiment"
14 | )
15 | setValidity(
16 | Class = "bcbioSingleCell",
17 | method = function(object) {
18 | colData <- colData(object)
19 | metadata <- metadata(object)
20 | sampleData <- sampleData(object)
21 | ## Return invalid for all objects older than v0.1.
22 | version <- metadata[["version"]]
23 | ok <- validate(
24 | is(version, "package_version"),
25 | version >= 0.1
26 | )
27 | if (!isTRUE(ok)) {
28 | return(ok)
29 | }
30 | ## Check for legacy bcbio slot.
31 | ok <- validate(!.hasSlot(object, "bcbio"))
32 | if (!isTRUE(ok)) {
33 | return(ok)
34 | }
35 | ## Assays --------------------------------------------------------------
36 | ok <- validate(isSubset("counts", names(assays(object))))
37 | if (!isTRUE(ok)) {
38 | return(ok)
39 | }
40 | ## Row data ------------------------------------------------------------
41 | ok <- validate(
42 | is(rowRanges(object), "GenomicRanges"),
43 | is(rowData(object), "DataFrame")
44 | )
45 | if (!isTRUE(ok)) {
46 | return(ok)
47 | }
48 | ## Column data ---------------------------------------------------------
49 | ok <- validate(
50 | ## Require that metrics columns are defined.
51 | isSubset(.metricsCols, colnames(colData)),
52 | ## Ensure that `interestingGroups` isn't slotted in colData.
53 | areDisjointSets("interestingGroups", colnames(colData))
54 | )
55 | if (!isTRUE(ok)) {
56 | return(ok)
57 | }
58 | ## Metadata ------------------------------------------------------------
59 | df <- c("DFrame", "DataFrame")
60 | ok <- validateClasses(
61 | object = metadata,
62 | expected = list(
63 | allSamples = "logical",
64 | bcbioCommandsLog = "character",
65 | bcbioLog = "character",
66 | dataVersions = df,
67 | date = "Date",
68 | ensemblRelease = "integer",
69 | genomeBuild = "character",
70 | gffFile = "character",
71 | interestingGroups = "character",
72 | lanes = "integer",
73 | level = "character",
74 | organism = "character",
75 | pipeline = "character",
76 | programVersions = df,
77 | projectDir = "character",
78 | runDate = "Date",
79 | sampleDirs = "character",
80 | sampleMetadataFile = "character",
81 | sessionInfo = c("sessionInfo", "session_info"),
82 | umiType = "character",
83 | uploadDir = "character",
84 | version = "package_version",
85 | wd = "character",
86 | yaml = "list"
87 | ),
88 | subset = TRUE
89 | )
90 | if (!isTRUE(ok)) {
91 | return(ok)
92 | }
93 | ## Check that level is defined.
94 | ok <- validate(
95 | !isSubset("sampleName", names(metadata)),
96 | isSubset(metadata[["level"]], c("genes", "transcripts"))
97 | )
98 | if (!isTRUE(ok)) {
99 | return(ok)
100 | }
101 | TRUE
102 | }
103 | )
104 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # bcbioSingleCell
2 |
3 | [](http://bioconda.github.io/recipes/r-bcbiosinglecell/README.html)
4 | 
5 |
6 | **NOTE: [bcbio-nextgen][bcbio] is no longer under active development.**
7 | Refer to the [notice of discontinuation][] for additional details.
8 |
9 | [R][] package for [bcbio][] single-cell RNA-seq analysis.
10 |
11 | ## Installation
12 |
13 | This is an R package.
14 |
15 | ```r
16 | if (!requireNamespace("BiocManager", quietly = TRUE)) {
17 | install.packages("BiocManager")
18 | }
19 | install.packages(
20 | pkgs = "bcbioSingleCell",
21 | repos = c(
22 | "https://r.acidgenomics.com",
23 | BiocManager::repositories()
24 | ),
25 | dependencies = TRUE
26 | )
27 | ```
28 |
29 | ### [Conda][] method
30 |
31 | Configure [Conda][] to use the [Bioconda][] channels.
32 |
33 | ```sh
34 | # Don't install recipe into base environment.
35 | conda create --name='r-bcbiosinglecell' 'r-bcbiosinglecell'
36 | conda activate 'r-bcbiosinglecell'
37 | R
38 | ```
39 |
40 | ## Load bcbio single-cell RNA-seq data
41 |
42 | ```r
43 | library(bcbioSingleCell)
44 | object <- bcbioSingleCell(
45 | uploadDir = file.path("indrops", "final"),
46 | interestingGroups = c("genotype", "treatment"),
47 | sampleMetadataFile = "sample_metadata.csv",
48 | organism = "Homo sapiens",
49 | ensemblRelease = 90L
50 | )
51 | ```
52 |
53 | This will return a `bcbioSingleCell` object, which is an extension of the
54 | [Bioconductor][] [SingleCellExperiment][sce] container class. Consult the
55 | `bcbioSingleCell()` constructor function documentation for detailed information
56 | on the supported parameters:
57 |
58 | ```r
59 | help(topic = "bcbioSingleCell", package = "bcbioSingleCell")
60 | ```
61 |
62 | ## Sample metadata examples
63 |
64 | ### FASTQ files with samples multiplexed by index barcode
65 |
66 | This is our current recommended method for analyzing an inDrops dataset.
67 | The sample index barcodes are multiplexed per FASTQ set. For Illumina
68 | sequencing data, the raw binary base call (BCL) data must be converted into
69 | FASTQs (split into `R1`-`R4` files) using [bcl2fastq][].
70 |
71 | The inDrops library version is automatically detected by bcbio, but ensure that
72 | the sample index sequences provided match the library version when attempting to
73 | create a `bcbioSingleCell` object.
74 |
75 | Consult the bcbio documentation for more information on how to configure an
76 | inDrops run prior to loading into R with the `bcbioSingleCell()` function.
77 |
78 | | description | index | sequence | sampleName | aggregate | genotype |
79 | | ----------- | ----- | -------- | ---------- | --------- | -------- |
80 | | indrops1 | 1 | CTCTCTAT | sample1_1 | sample1 | wildtype |
81 | | indrops1 | 2 | TATCCTCT | sample2_1 | sample2 | knockout |
82 | | indrops1 | 3 | GTAAGGAG | sample3_1 | sample3 | wildtype |
83 | | indrops1 | 4 | ACTGCATA | sample4_1 | sample4 | knockout |
84 | | indrops2 | 1 | CTCTCTAT | sample1_2 | sample1 | wildtype |
85 | | indrops2 | 2 | TATCCTCT | sample1_2 | sample2 | knockout |
86 | | indrops2 | 3 | GTAAGGAG | sample1_2 | sample3 | wildtype |
87 | | indrops2 | 4 | ACTGCATA | sample1_2 | sample4 | knockout |
88 |
89 | Note that bcbio currently outputs the reverse complement index sequence in the
90 | sample directory names (e.g. `sample-ATAGAGAG`). Define the forward index
91 | barcode in the `sequence` column here, not the reverse complement. The reverse
92 | complement will be calculated automatically and added as the `revcomp` column
93 | in the sample metadata.
94 |
95 | ### FASTQ files demultiplexed per sample
96 |
97 | This is our current method for handling 10X Genomics Chromium and Illumina
98 | SureCell cell barcodes.
99 |
100 | | description | genotype |
101 | | ----------- | -------- |
102 | | sample1 | wildtype |
103 | | sample2 | knockout |
104 | | sample3 | wildtype |
105 | | sample4 | knockout |
106 |
107 | ### Invalid object
108 |
109 | If you encounter a `validObject` error when attempting to load a
110 | `bcbioSingleCell` object from a previous analysis, run this step to update the
111 | object to the current version of the package:
112 |
113 | ```r
114 | object <- updateObject(object)
115 | validObject(object)
116 | ## [1] TRUE
117 | ```
118 |
119 | ## References
120 |
121 | The papers and software cited in our workflows are available as a [shared
122 | library](https://paperpile.com/shared/C8EMxl) on [Paperpile][].
123 |
124 | [bcbio]: https://bcbio-nextgen.readthedocs.io/
125 | [bcl2fastq]: https://support.illumina.com/sequencing/sequencing_software/bcl2fastq-conversion-software.html
126 | [bioconda]: https://bioconda.github.io/
127 | [bioconductor]: https://bioconductor.org/
128 | [conda]: https://conda.io/
129 | [notice of discontinuation]: https://github.com/bcbio/bcbio-nextgen/issues/3749
130 | [paperpile]: https://paperpile.com/
131 | [r]: https://www.r-project.org/
132 | [sce]: https://bioconductor.org/packages/SingleCellExperiment/
133 |
--------------------------------------------------------------------------------
/R/internal-import.R:
--------------------------------------------------------------------------------
1 | #' Import bcbio counts from sample directories
2 | #'
3 | #' @author Michael Steinbaugh
4 | #' @keywords internal
5 | #' @note Updated 2023-08-17.
6 | #' @noRd
7 | #'
8 | #' @param sampleDirs `character`.
9 | #' Sample directory paths.
10 | #'
11 | #' @return `Matrix` / `matrix`.
12 | .importCounts <-
13 | function(sampleDirs) {
14 | assert(
15 | allAreDirectories(sampleDirs),
16 | hasNames(sampleDirs)
17 | )
18 | alert("Importing counts.")
19 | list <- mcMap(
20 | sampleId = names(sampleDirs),
21 | dir = sampleDirs,
22 | f = function(sampleId, dir) {
23 | counts <- .importCountsPerSample(dir)
24 | ## Prefix cell barcodes with sample identifier when we're
25 | ## loading counts from multiple samples.
26 | if (length(sampleDirs) > 1L) {
27 | colnames(counts) <-
28 | paste(sampleId, colnames(counts), sep = "_")
29 | }
30 | ## Ensure names are valid.
31 | counts <- makeDimnames(counts)
32 | counts
33 | }
34 | )
35 | ## Remove any empty items in list, which can result from low quality
36 | ## samples with empty matrices in bcbio pipeline.
37 | list <- Filter(f = Negate(is.null), x = list)
38 | assert(
39 | hasLength(list),
40 | msg = sprintf(
41 | fmt = paste0(
42 | "bcbio didn't return any cells.\n",
43 | "Check your '%s' setting."
44 | ),
45 | "minimum_barcode_depth"
46 | )
47 | )
48 | ## Bind the matrices.
49 | do.call(cbind, list)
50 | }
51 |
52 |
53 |
54 | #' Import counts per sample from sparse matrix
55 | #'
56 | #' Always in Matrix Market Exchange (MEX/MTX) format.
57 | #'
58 | #' This may be advantagenous to loading the giant combined matrix because we
59 | #' can parallelize with BiocParallel.
60 | #'
61 | #' Attempt to load the column and rowname files first. If they're empty, skip
62 | #' loading the MatrixMarket file, which will error otherwise. The bcbio pipeline
63 | #' will output empty files for very low quality samples with no cells that pass
64 | #' filtering.
65 | #'
66 | #' @author Michael Steinbaugh
67 | #' @keywords internal
68 | #' @note Updated 2020-01-20.
69 | #' @noRd
70 | #'
71 | #' @param dir `character(1)`.
72 | #' Sample directory path.
73 | #'
74 | #' @return `sparseMatrix`.
75 | .importCountsPerSample <- # nolint
76 | function(dir) {
77 | assert(isADirectory(dir))
78 | ## Require that all of the files exist, even if they are empty.
79 | file <- file.path(dir, paste0(basename(dir), ".mtx"))
80 | rownamesFile <- paste0(file, ".rownames")
81 | colnamesFile <- paste0(file, ".colnames")
82 | assert(allAreFiles(c(file, rownamesFile, colnamesFile)))
83 | ## Import Genes/transcripts (features).
84 | rownames <- import(rownamesFile, format = "lines")
85 | ## Import cellular barcodes.
86 | colnames <- import(colnamesFile, format = "lines")
87 | if (!length(rownames) > 0L || !length(colnames) > 0L) {
88 | alertWarning(sprintf("Skipped {.path %s}.", basename(dir)))
89 | return(NULL)
90 | }
91 | ## Import counts.
92 | counts <- import(file)
93 | assert(
94 | identical(length(rownames), nrow(counts)),
95 | identical(length(colnames), ncol(counts))
96 | )
97 | rownames(counts) <- rownames
98 | colnames(counts) <- colnames
99 | alert(sprintf("Imported {.path %s}.", basename(dir)))
100 | counts
101 | }
102 |
103 |
104 |
105 | #' Import raw cellular barcode read list
106 | #'
107 | #' Get the number of pre-UMI disambiguated reads per cellular barcode.
108 | #'
109 | #' @author Michael Steinbaugh
110 | #' @keywords internal
111 | #' @note Updated 2023-08-17.
112 | #' @noRd
113 | #'
114 | #' @param sampleDirs `character`.
115 | #' Sample directories.
116 | #'
117 | #' @return `list`.
118 | #' List of integer vectors per sample containing the pre-filtered cellular
119 | #' barcode counts (`nCount`).
120 | .importReads <-
121 | function(sampleDirs) {
122 | assert(
123 | allAreDirectories(sampleDirs),
124 | hasNames(sampleDirs)
125 | )
126 | alert("Importing unfiltered cellular barcode distributions.")
127 | files <- file.path(
128 | sampleDirs,
129 | paste(basename(sampleDirs), "barcodes.tsv", sep = "-")
130 | )
131 | files <- realpath(files)
132 | names(files) <- names(sampleDirs)
133 | list <- mclapply(
134 | X = files,
135 | FUN = function(file) {
136 | data <- import(
137 | con = file,
138 | format = "tsv",
139 | colnames = c("barcode", "n")
140 | )
141 | x <- as.integer(data[["n"]])
142 | names(x) <- makeNames(data[["barcode"]])
143 | x
144 | }
145 | )
146 | names(list) <- names(sampleDirs)
147 | list
148 | }
149 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(bcbioSingleCell)
4 | export(bcbioSingleCellTestsUrl)
5 | export(calculateMetrics)
6 | export(filterCells)
7 | export(plotBarcodeRanks)
8 | export(plotCellCounts)
9 | export(plotCountsPerCell)
10 | export(plotCountsVsFeatures)
11 | export(plotFeaturesPerCell)
12 | export(plotMitoRatio)
13 | export(plotNovelty)
14 | export(plotQc)
15 | export(plotReadsPerCell)
16 | export(show)
17 | export(updateObject)
18 | exportClasses(bcbioSingleCell)
19 | exportMethods("[")
20 | exportMethods(plotReadsPerCell)
21 | exportMethods(show)
22 | exportMethods(updateObject)
23 | importClassesFrom(SingleCellExperiment,SingleCellExperiment)
24 | importFrom(AcidBase,metricsCols)
25 | importFrom(AcidBase,printString)
26 | importFrom(AcidBase,realpath)
27 | importFrom(AcidBase,showSlotInfo)
28 | importFrom(AcidBase,standardizeCall)
29 | importFrom(AcidBase,strMatch)
30 | importFrom(AcidCLI,abort)
31 | importFrom(AcidCLI,alert)
32 | importFrom(AcidCLI,alertSuccess)
33 | importFrom(AcidCLI,alertWarning)
34 | importFrom(AcidCLI,h1)
35 | importFrom(AcidCLI,h2)
36 | importFrom(AcidCLI,separator)
37 | importFrom(AcidCLI,toInlineString)
38 | importFrom(AcidExperiment,detectLanes)
39 | importFrom(AcidExperiment,droplevels2)
40 | importFrom(AcidExperiment,importSampleData)
41 | importFrom(AcidExperiment,matchInterestingGroups)
42 | importFrom(AcidExperiment,minimalSampleData)
43 | importFrom(AcidExperiment,sampleNames)
44 | importFrom(AcidGenerics,"interestingGroups<-")
45 | importFrom(AcidGenerics,calculateMetrics)
46 | importFrom(AcidGenerics,camelCase)
47 | importFrom(AcidGenerics,droplevels2)
48 | importFrom(AcidGenerics,filterCells)
49 | importFrom(AcidGenerics,interestingGroups)
50 | importFrom(AcidGenerics,leftJoin)
51 | importFrom(AcidGenerics,makeDimnames)
52 | importFrom(AcidGenerics,makeLabel)
53 | importFrom(AcidGenerics,makeNames)
54 | importFrom(AcidGenerics,metrics)
55 | importFrom(AcidGenerics,plotBarcodeRanks)
56 | importFrom(AcidGenerics,plotCellCounts)
57 | importFrom(AcidGenerics,plotCountsPerCell)
58 | importFrom(AcidGenerics,plotCountsVsFeatures)
59 | importFrom(AcidGenerics,plotFeaturesPerCell)
60 | importFrom(AcidGenerics,plotMitoRatio)
61 | importFrom(AcidGenerics,plotNovelty)
62 | importFrom(AcidGenerics,plotQc)
63 | importFrom(AcidGenerics,plotReadsPerCell)
64 | importFrom(AcidGenerics,sampleData)
65 | importFrom(AcidGenomes,emptyRanges)
66 | importFrom(AcidGenomes,makeGRangesFromEnsembl)
67 | importFrom(AcidGenomes,makeGRangesFromGff)
68 | importFrom(AcidMarkdown,markdownPlots)
69 | importFrom(AcidPlots,"!!!")
70 | importFrom(AcidPlots,.data)
71 | importFrom(AcidPlots,acid_geom_abline)
72 | importFrom(AcidPlots,acid_geom_label)
73 | importFrom(AcidPlots,acid_geom_label_average)
74 | importFrom(AcidPlots,acid_geom_label_repel)
75 | importFrom(AcidPlots,acid_scale_color_discrete)
76 | importFrom(AcidPlots,acid_scale_fill_discrete)
77 | importFrom(AcidPlots,syms)
78 | importFrom(AcidSingleCell,makeSingleCellExperiment)
79 | importFrom(AcidSingleCell,mapCellsToSamples)
80 | importFrom(BiocGenerics,counts)
81 | importFrom(BiocGenerics,updateObject)
82 | importFrom(IRanges,DataFrameList)
83 | importFrom(S4Vectors,"mcols<-")
84 | importFrom(S4Vectors,"metadata<-")
85 | importFrom(S4Vectors,DataFrame)
86 | importFrom(S4Vectors,SimpleList)
87 | importFrom(S4Vectors,cbind)
88 | importFrom(S4Vectors,do.call)
89 | importFrom(S4Vectors,droplevels)
90 | importFrom(S4Vectors,lapply)
91 | importFrom(S4Vectors,mcols)
92 | importFrom(S4Vectors,metadata)
93 | importFrom(SummarizedExperiment,"assays<-")
94 | importFrom(SummarizedExperiment,"colData<-")
95 | importFrom(SummarizedExperiment,"rowData<-")
96 | importFrom(SummarizedExperiment,"rowRanges<-")
97 | importFrom(SummarizedExperiment,assay)
98 | importFrom(SummarizedExperiment,assayNames)
99 | importFrom(SummarizedExperiment,assays)
100 | importFrom(SummarizedExperiment,colData)
101 | importFrom(SummarizedExperiment,rowData)
102 | importFrom(SummarizedExperiment,rowRanges)
103 | importFrom(bcbioBase,getBarcodeCutoffFromCommands)
104 | importFrom(bcbioBase,getGtfFileFromYaml)
105 | importFrom(bcbioBase,getLevelFromCommands)
106 | importFrom(bcbioBase,getSampleDataFromYaml)
107 | importFrom(bcbioBase,getUmiTypeFromCommands)
108 | importFrom(bcbioBase,importDataVersions)
109 | importFrom(bcbioBase,importProgramVersions)
110 | importFrom(bcbioBase,projectDir)
111 | importFrom(bcbioBase,runDate)
112 | importFrom(bcbioBase,sampleDirs)
113 | importFrom(ggplot2,aes)
114 | importFrom(ggplot2,facet_wrap)
115 | importFrom(ggplot2,geom_boxplot)
116 | importFrom(ggplot2,geom_histogram)
117 | importFrom(ggplot2,geom_step)
118 | importFrom(ggplot2,geom_violin)
119 | importFrom(ggplot2,ggplot)
120 | importFrom(ggplot2,labs)
121 | importFrom(ggplot2,scale_x_continuous)
122 | importFrom(ggplot2,scale_y_continuous)
123 | importFrom(ggplot2,stat_ecdf)
124 | importFrom(ggplot2,vars)
125 | importFrom(ggridges,geom_density_ridges)
126 | importFrom(goalie,allAreDirectories)
127 | importFrom(goalie,allAreFiles)
128 | importFrom(goalie,areDisjointSets)
129 | importFrom(goalie,areSetEqual)
130 | importFrom(goalie,assert)
131 | importFrom(goalie,hasLength)
132 | importFrom(goalie,hasNames)
133 | importFrom(goalie,hasRownames)
134 | importFrom(goalie,hasValidDimnames)
135 | importFrom(goalie,isADirectory)
136 | importFrom(goalie,isAFile)
137 | importFrom(goalie,isAUrl)
138 | importFrom(goalie,isAny)
139 | importFrom(goalie,isCharacter)
140 | importFrom(goalie,isDirectory)
141 | importFrom(goalie,isFile)
142 | importFrom(goalie,isFlag)
143 | importFrom(goalie,isInt)
144 | importFrom(goalie,isString)
145 | importFrom(goalie,isSubset)
146 | importFrom(goalie,requireNamespaces)
147 | importFrom(goalie,validate)
148 | importFrom(goalie,validateClasses)
149 | importFrom(methods,"as<-")
150 | importFrom(methods,"slot<-")
151 | importFrom(methods,.hasSlot)
152 | importFrom(methods,as)
153 | importFrom(methods,coerce)
154 | importFrom(methods,is)
155 | importFrom(methods,new)
156 | importFrom(methods,setClass)
157 | importFrom(methods,show)
158 | importFrom(methods,slot)
159 | importFrom(methods,validObject)
160 | importFrom(parallel,mcMap)
161 | importFrom(parallel,mclapply)
162 | importFrom(pipette,import)
163 | importFrom(utils,capture.output)
164 | importFrom(utils,packageName)
165 | importFrom(utils,packageVersion)
166 | importMethodsFrom(AcidExperiment,"interestingGroups<-")
167 | importMethodsFrom(AcidExperiment,calculateMetrics)
168 | importMethodsFrom(AcidExperiment,interestingGroups)
169 | importMethodsFrom(AcidExperiment,metrics)
170 | importMethodsFrom(AcidExperiment,sampleData)
171 | importMethodsFrom(AcidExperiment,sampleNames)
172 | importMethodsFrom(AcidPlyr,leftJoin)
173 | importMethodsFrom(AcidSingleCell,sampleData)
174 | importMethodsFrom(pipette,import)
175 | importMethodsFrom(syntactic,camelCase)
176 | importMethodsFrom(syntactic,makeDimnames)
177 | importMethodsFrom(syntactic,makeLabel)
178 | importMethodsFrom(syntactic,makeNames)
179 |
--------------------------------------------------------------------------------
/inst/rmarkdown/templates/quality-control/skeleton/skeleton.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | ## Updated 2023-10-05.
3 | params:
4 | title: "Quality control and filtering"
5 |
6 | ## bcbioSingleCell object.
7 | bcb_file: !r file.path("data", "bcb.rds")
8 |
9 | ## Counts: Expected number of UMI-disambiguated counts per cell.
10 | min_counts: 1000
11 | max_counts: !r Inf
12 |
13 | ## Features: Expected number of features (i.e. genes) per cell.
14 | min_features: 500
15 | max_features: !r Inf
16 |
17 | ## Novelty score: log10 features per count.
18 | min_novelty: 0.85
19 |
20 | ## Mitochondrial abundance.
21 | ## Low quality / stressed cells tend to be above 20%.
22 | max_mito_ratio: 0.1
23 |
24 | ## Minimum number of cells per feature.
25 | ## This will remove features with very low expression.
26 | min_cells_per_feature: 10
27 |
28 | ## Expected number of cells per sample.
29 | ## Not recommended by default.
30 | n_cells: !r Inf
31 |
32 | ## Where to save the filtered output.
33 | data_dir: !r file.path("data")
34 |
35 | ## Where to export results of the analysis.
36 | output_dir: !r file.path("results", Sys.Date(), "quality-control")
37 |
38 | title: "`r params[['title']]`"
39 | author: "`r getOption('author')`"
40 | date: "`r Sys.Date()`"
41 |
42 | ## This file is generated by the `prepareTemplate()` step in setup chunk.
43 | bibliography: "bibliography.bib"
44 | ---
45 |
46 | ```{r setup, cache=FALSE, message=FALSE}
47 | ## nolint start
48 | suppressPackageStartupMessages({
49 | library(goalie)
50 | library(basejump)
51 | library(ggplot2)
52 | library(bcbioSingleCell)
53 | })
54 | prepareTemplate()
55 | source("_setup.R")
56 | ## nolint end
57 | ```
58 |
59 | ```{r header, child="_header.Rmd"}
60 | ```
61 |
62 | # Load `bcbioSingleCell` object
63 |
64 | ```{r load-object}
65 | object <- import(params[["bcb_file"]])
66 | assert(
67 | is(object, "bcbioSingleCell"),
68 | validObject(object)
69 | )
70 | print(object)
71 | ```
72 |
73 | [bcbio][] run data was imported from:
74 | **`r metadata(object)[["uploadDir"]]`**.
75 |
76 | # Sample metadata
77 |
78 | ```{r sample-data}
79 | sampleData(object)
80 | ```
81 |
82 | # Reads per cell {.tabset}
83 |
84 | These are counts of how many reads are assigned to a given cellular barcode. It
85 | is normal for single cell RNA-seq data to contain a large number of low
86 | complexity barcodes. The bcbio pipeline filters out most of these barcodes, and
87 | here we have applied a threshold cutoff of a minimum of
88 | `r metadata(object)[["cellularBarcodeCutoff"]]` reads per cell. The unfiltered
89 | read count distributions are shown here.
90 |
91 | ## Histogram
92 |
93 | For high quality data, the proportional histogram should contain a single large
94 | peak that represents cells that were encapsulated. If we see a strong shoulder,
95 | or a bimodal distribution of the cells, that can indicate a couple problems. It
96 | might be that there is free floating RNA, which happens when cells are dying.
97 | It could also be that there are a set of cells that failed for some reason.
98 | Finally, it could also be that there are biologically different types of cells,
99 | and one type is much smaller than the other. If this is the case we would
100 | expect to see less RNA being sequenced from the smaller cells.
101 |
102 | ```{r plot-reads-per-cell-histogram}
103 | plotReadsPerCell(
104 | object = object,
105 | geom = "histogram",
106 | interestingGroups = "sampleName"
107 | )
108 | ```
109 |
110 | ## ECDF
111 |
112 | An empirical distribution function (ECDF) plot will show the frequency
113 | distribution of the reads per cell. You can see that the vast majority of low
114 | complexity barcodes plateau at a read depth below 1000 reads per cell.
115 |
116 | ```{r plot-reads-per-cell-ecdf}
117 | plotReadsPerCell(object = object, geom = "ecdf")
118 | ```
119 |
120 | # UMI counts per cell {.tabset}
121 |
122 | Now let's assess the distribution of unique molecular identifier
123 | (UMI)-deconvoluted counts per cell. In general, the distributions should be
124 | relatively uniform per sample. Here we are also including violin and ridgeline
125 | plots, with the average number of genes per cell labeled.
126 |
127 | ```{r plot-counts-per-cell-prefilter}
128 | markdownHeader("Violin", level = 2L)
129 | plotCountsPerCell(
130 | object = object,
131 | geom = "violin",
132 | min = params[["min_counts"]],
133 | max = params[["max_counts"]]
134 | )
135 |
136 | markdownHeader("Ridgeline", level = 2L)
137 | plotCountsPerCell(
138 | object = object,
139 | geom = "ridgeline",
140 | min = params[["min_counts"]],
141 | max = params[["max_counts"]]
142 | )
143 |
144 | markdownHeader("Histogram", level = 2L)
145 | plotCountsPerCell(
146 | object = object,
147 | geom = "histogram",
148 | min = params[["min_counts"]],
149 | max = params[["max_counts"]]
150 | )
151 |
152 | markdownHeader("ECDF", level = 2L)
153 | plotCountsPerCell(
154 | object = object,
155 | geom = "ecdf",
156 | interestingGroups = "sampleName",
157 | min = params[["min_counts"]],
158 | max = params[["max_counts"]]
159 | )
160 | ```
161 |
162 | # Filter cells by UMI count
163 |
164 | Let's apply this step first and then proceed to evaluating gene detection,
165 | mitocondrial transcript abundance, and novelty scores.
166 |
167 | ```{r filter-cells-by-count}
168 | object <- filterCells(
169 | object = object,
170 | minCounts = params[["min_counts"]],
171 | maxCounts = params[["max_counts"]]
172 | )
173 | ```
174 |
175 | Let's take a look at the UMI per cell distributions after this filtering step.
176 | Note that we haven't applied very strict filtering here -- we're going to cut
177 | off the "low quality" cells based on the gene detection rate, novelty score,
178 | and mitochondrial abundance.
179 |
180 | ```{r plot-counts-per-cell-postfilter}
181 | plotCountsPerCell(
182 | object = object,
183 | geom = "histogram",
184 | min = params[["min_counts"]],
185 | max = params[["max_counts"]]
186 | )
187 | ```
188 |
189 | # Genes detected per cell {.tabset}
190 |
191 | Here by "detected", we mean genes with a non-zero count measurement per cell.
192 | Seeing gene detection in the range of `500`-`5000` is normal for most
193 | single-cell experiments.
194 |
195 | ```{r plot-features-per-cell}
196 | markdownHeader("Violin", level = 2L)
197 | plotFeaturesPerCell(
198 | object = object,
199 | geom = "violin",
200 | min = min(params[["min_features"]]),
201 | max = max(params[["max_features"]])
202 | )
203 |
204 | markdownHeader("Ridgeline", level = 2L)
205 | plotFeaturesPerCell(
206 | object = object,
207 | geom = "ridgeline",
208 | min = min(params[["min_features"]]),
209 | max = max(params[["max_features"]])
210 | )
211 |
212 | markdownHeader("Histogram", level = 2L)
213 | plotFeaturesPerCell(
214 | object = object,
215 | geom = "histogram",
216 | min = min(params[["min_features"]]),
217 | max = max(params[["max_features"]])
218 | )
219 |
220 | markdownHeader("ECDF", level = 2L)
221 | plotFeaturesPerCell(
222 | object = object,
223 | geom = "ecdf",
224 | min = min(params[["min_features"]]),
225 | max = max(params[["max_features"]])
226 | )
227 | ```
228 |
229 | # UMIs vs. features detected
230 |
231 | If we graph out the total number of UMI counts per cell vs. the genes detected
232 | per cell, we can assess whether there is a large population of low quality
233 | cells with low counts and/or gene detection.
234 |
235 | ```{r plot-counts-vs-features}
236 | plotCountsVsFeatures(object)
237 | ```
238 |
239 | # Novelty score {.tabset}
240 |
241 | Another way to QC the data is to look for less novelty, that is cells that have
242 | less genes detected per count than other cells. We can see the samples where we
243 | sequenced each cell less have a higher overall novelty, that is because we have
244 | not started saturated the sequencing for any given gene for these samples.
245 | Outlier cells in these samples might be cells that we have a less complex RNA
246 | species than other cells. Sometimes we can detect contamination with low
247 | complexity cell types like red blood cells via this metric.
248 |
249 | ```{r plot-novelty}
250 | markdownHeader("Violin", level = 2L)
251 | plotNovelty(
252 | object = object,
253 | geom = "violin",
254 | min = min(params[["min_novelty"]])
255 | )
256 |
257 | markdownHeader("Ridgeline", level = 2L)
258 | plotNovelty(
259 | object = object,
260 | geom = "ridgeline",
261 | min = min(params[["min_novelty"]])
262 | )
263 |
264 | markdownHeader("Histogram", level = 2L)
265 | plotNovelty(
266 | object = object,
267 | geom = "histogram",
268 | min = min(params[["min_novelty"]])
269 | )
270 |
271 | markdownHeader("ECDF", level = 2L)
272 | plotNovelty(
273 | object = object,
274 | geom = "ecdf",
275 | min = min(params[["min_novelty"]])
276 | )
277 | ```
278 |
279 | # Mitochondrial abundance {.tabset}
280 |
281 | We evaluate overall mitochondrial gene expression as a biomarker of cellular
282 | stress during sample preparation.
283 |
284 | ```{r plot-mito-ratio}
285 | markdownHeader("Violin", level = 2L)
286 | plotMitoRatio(
287 | object = object,
288 | geom = "violin",
289 | max = max(params[["max_mito_ratio"]])
290 | )
291 |
292 | markdownHeader("Ridgeline", level = 2L)
293 | plotMitoRatio(
294 | object = object,
295 | geom = "ridgeline",
296 | max = max(params[["max_mito_ratio"]])
297 | )
298 |
299 | markdownHeader("Histogram", level = 2L)
300 | plotMitoRatio(
301 | object = object,
302 | geom = "histogram",
303 | max = max(params[["max_mito_ratio"]])
304 | )
305 |
306 | markdownHeader("ECDF", level = 2L)
307 | plotMitoRatio(
308 | object = object,
309 | geom = "ecdf",
310 | max = max(params[["max_mito_ratio"]])
311 | )
312 | ```
313 |
314 | # Filter cells
315 |
316 | ```{r filter-cells}
317 | object <- filterCells(
318 | object = object,
319 | minCounts = params[["min_counts"]],
320 | maxCounts = params[["max_counts"]],
321 | minFeatures = params[["min_features"]],
322 | maxFeatures = params[["max_features"]],
323 | maxMitoRatio = params[["max_mito_ratio"]],
324 | minNovelty = params[["min_novelty"]],
325 | nCells = params[["n_cells"]],
326 | minCellsPerFeature = params[["min_cells_per_feature"]]
327 | )
328 | ```
329 |
330 | ```{r plot-filtered-qc}
331 | plotQc(object, geom = "violin")
332 | ```
333 |
334 | # Save filtered data
335 |
336 | ```{r save-filtered}
337 | name <- basenameSansExt(params[["bcb_file"]])
338 | assignAndSaveData(
339 | name = paste(name, "filtered", sep = "_"),
340 | object = object,
341 | dir = params[["data_dir"]]
342 | )
343 | ```
344 |
345 | ```{r export}
346 | export(
347 | object = object,
348 | con = params[["output_dir"]],
349 | compress = TRUE
350 | )
351 | ```
352 |
353 | ```{r footer, child="_footer.Rmd"}
354 | ```
355 |
356 | ```{r links, child="_links.Rmd"}
357 | ```
358 |
--------------------------------------------------------------------------------
/R/updateObject-methods.R:
--------------------------------------------------------------------------------
1 | #' Update object
2 | #'
3 | #' @name updateObject
4 | #' @author Michael Steinbaugh
5 | #' @note Updated 2023-12-04.
6 | #'
7 | #' @inheritParams AcidRoxygen::params
8 | #'
9 | #' @return Modified object.
10 | #'
11 | #' @examples
12 | #' data(bcb)
13 | #'
14 | #' ## bcbioSingleCell ====
15 | #' updateObject(bcb)
16 | #'
17 | #' ## Example that depends on remote file.
18 | #' ## > x <- import(
19 | #' ## > con = file.path(
20 | #' ## > bcbioSingleCellTestsUrl,
21 | #' ## > "bcbioSingleCell_0.1.0.rds"
22 | #' ## > )
23 | #' ## > )
24 | #' ## > x <- updateObject(x)
25 | #' ## > x
26 | NULL
27 |
28 |
29 |
30 | ## Updated 2022-05-09.
31 | `updateObject,bcbioSingleCell` <- # nolint
32 | function(object, ..., verbose = FALSE) {
33 | assert(isFlag(verbose))
34 | if (isTRUE(verbose)) {
35 | h1("Update object")
36 | }
37 | sce <- as(object, "SingleCellExperiment")
38 | cells <- colnames(sce)
39 | assays <- assays(sce)
40 | rowRanges <- rowRanges(sce)
41 | colData <- colData(sce)
42 | metadata <- metadata(sce)
43 | version <- metadata[["version"]]
44 | assert(is(version, "package_version"))
45 | if (isTRUE(verbose)) {
46 | alert(sprintf(
47 | fmt = "Upgrading {.var %s} from version %s to %s.",
48 | "bcbioSingleCell",
49 | as.character(version),
50 | as.character(.pkgVersion)
51 | ))
52 | }
53 | ## Assays --------------------------------------------------------------
54 | if (isTRUE(verbose)) {
55 | h2("Assays")
56 | }
57 | ## Ensure raw counts are always named "counts".
58 | if (isSubset("assay", names(assays))) {
59 | ## Versions < 0.1 (e.g. 0.0.21).
60 | if (isTRUE(verbose)) {
61 | alert(sprintf(
62 | "Renaming {.var %s} to {.var %s}.",
63 | "assay", "counts"
64 | ))
65 | }
66 | names(assays)[names(assays) == "assay"] <- "counts"
67 | } else if (isSubset("raw", names(assays))) {
68 | if (isTRUE(verbose)) {
69 | alert(sprintf(
70 | "Renaming {.var %s} assay to {.var %s}.",
71 | "raw", "counts"
72 | ))
73 | }
74 | names(assays)[names(assays) == "raw"] <- "counts"
75 | }
76 | assays <- Filter(Negate(is.null), assays)
77 | ## Put the required assays first, in order.
78 | assays <- assays[unique(c(.requiredAssays, names(assays)))]
79 | assert(isSubset(.requiredAssays, names(assays)))
80 | ## Row data ------------------------------------------------------------
81 | if (hasNames(mcols(rowRanges))) {
82 | mcols(rowRanges) <-
83 | camelCase(mcols(rowRanges), strict = TRUE)
84 | }
85 | ## Column data ---------------------------------------------------------
86 | if (isTRUE(verbose)) {
87 | h2("Column data")
88 | }
89 | colnames(colData) <- camelCase(colnames(colData), strict = TRUE)
90 | if (isSubset(c("nCount", "nUmi"), colnames(colData))) {
91 | if (isTRUE(verbose)) {
92 | alert(sprintf(
93 | "Renaming {.var %s} to {.var %s}.",
94 | "nCount", "nRead"
95 | ))
96 | }
97 | colnames(colData)[colnames(colData) == "nCount"] <- "nRead"
98 | if (isTRUE(verbose)) {
99 | alert(sprintf(
100 | "Renaming {.var %s} to {.var %s}.",
101 | "nUmi", "nCount"
102 | ))
103 | }
104 | colnames(colData)[colnames(colData) == "nUmi"] <- "nCount"
105 | }
106 | if (isSubset("nGene", colnames(colData))) {
107 | if (isTRUE(verbose)) {
108 | alert(sprintf(
109 | "Renaming {.var %s} to {.var %s}.",
110 | "nGene", "nFeature"
111 | ))
112 | }
113 | colnames(colData)[colnames(colData) == "nGene"] <- "nFeature"
114 | if (isTRUE(verbose)) {
115 | alert(sprintf(
116 | "Renaming {.var %s} to {.var %s}.",
117 | "log10GenesPerUmi", "log10FeaturesPerCount"
118 | ))
119 | }
120 | colnames(colData)[colnames(colData) == "log10GenesPerUmi"] <-
121 | "log10FeaturesPerCount"
122 | }
123 | ## Move sampleData into colData.
124 | if (isSubset("sampleData", names(metadata))) {
125 | sampleData <- metadata[["sampleData"]]
126 | } else if (isSubset("sampleMetadata", names(metadata))) {
127 | sampleData <- metadata[["sampleMetadata"]]
128 | } else {
129 | sampleData <- NULL
130 | }
131 | if (!is.null(sampleData)) {
132 | colnames(sampleData) <-
133 | camelCase(colnames(sampleData), strict = TRUE)
134 | if (isTRUE(verbose)) {
135 | alert(sprintf(
136 | "Moving {.var %s} from {.fun %s} into {.fun %s}.",
137 | "sampleData", "metadata", "colData"
138 | ))
139 | }
140 | assert(isSubset("sampleId", colnames(sampleData)))
141 | sampleData <- as(sampleData, "DataFrame")
142 | colData <- colData[
143 | ,
144 | setdiff(colnames(colData), colnames(sampleData)),
145 | drop = FALSE
146 | ]
147 | if (isTRUE(verbose)) {
148 | alert("Mapping cells to samples.")
149 | }
150 | c2s <- mapCellsToSamples(
151 | cells = cells,
152 | samples = as.character(sampleData[["sampleId"]])
153 | )
154 | assert(is.factor(c2s))
155 | colData[["sampleId"]] <- c2s
156 | sampleData[["sampleId"]] <- as.factor(rownames(sampleData))
157 | colData <- leftJoin(x = colData, y = sampleData, by = "sampleId")
158 | assert(
159 | is(colData, "DataFrame"),
160 | identical(rownames(colData), colnames(object))
161 | )
162 | ## Ensure rows are ordered to match the object.
163 | colData <- colData[cells, , drop = FALSE]
164 | }
165 | ## Metadata ------------------------------------------------------------
166 | if (isTRUE(verbose)) {
167 | h2("Metadata")
168 | }
169 | ## dataVersions.
170 | dataVersions <- metadata[["dataVersions"]]
171 | if (is(dataVersions, "data.frame")) {
172 | if (isTRUE(verbose)) {
173 | alert(sprintf(
174 | "Setting {.var %s} as {.cls %s}.",
175 | "dataVersions", "DataFrame"
176 | ))
177 | }
178 | metadata[["dataVersions"]] <- as(dataVersions, "DataFrame")
179 | }
180 | ## ensemblRelease.
181 | if (isSubset("ensemblVersion", names(metadata))) {
182 | if (isTRUE(verbose)) {
183 | alert(sprintf(
184 | "Renaming {.var %s} to {.var %s}.",
185 | "ensemblVersion", "ensemblRelease"
186 | ))
187 | }
188 | names(metadata)[
189 | names(metadata) == "ensemblVersion"
190 | ] <- "ensemblRelease"
191 | }
192 | if (
193 | is.numeric(metadata[["ensemblRelease"]]) &&
194 | !is.integer(metadata[["ensemblRelease"]])
195 | ) {
196 | if (isTRUE(verbose)) {
197 | alert(sprintf(
198 | "Setting {.var %s} as integer.",
199 | "ensemblRelease"
200 | ))
201 | }
202 | metadata[["ensemblRelease"]] <-
203 | as.integer(metadata[["ensemblRelease"]])
204 | }
205 | ## Update the version, if necessary.
206 | if (!identical(metadata[["version"]], .pkgVersion)) {
207 | metadata[["originalVersion"]] <- metadata[["version"]]
208 | metadata[["version"]] <- .pkgVersion
209 | }
210 | ## gffFile.
211 | if (isSubset("gtfFile", names(metadata))) {
212 | if (isTRUE(verbose)) {
213 | alert(sprintf(
214 | "Renaming {.var %s} to {.var %s}.",
215 | "gtfFile", "gffFile"
216 | ))
217 | }
218 | names(metadata)[names(metadata) == "gtfFile"] <- "gffFile"
219 | }
220 | if (!isSubset("gffFile", names(metadata))) {
221 | if (isTRUE(verbose)) {
222 | alert(sprintf(
223 | "Setting {.var %s} as {.val %s}.",
224 | "gffFile", "empty character"
225 | ))
226 | }
227 | metadata[["gffFile"]] <- character()
228 | }
229 | ## lanes.
230 | if (!is.integer(metadata[["lanes"]])) {
231 | if (isTRUE(verbose)) {
232 | alert(sprintf(
233 | "Setting {.var %s} as {.val %s}.",
234 | "lanes", "integer"
235 | ))
236 | }
237 | metadata[["lanes"]] <- as.integer(metadata[["lanes"]])
238 | }
239 | ## level.
240 | if (!isSubset("level", names(metadata))) {
241 | if (isTRUE(verbose)) {
242 | alert(sprintf(
243 | "Setting {.var %s} as {.val %s}.",
244 | "level", "genes"
245 | ))
246 | }
247 | metadata[["level"]] <- "genes"
248 | }
249 | ## programVersions.
250 | if (
251 | !isSubset("programVersions", names(metadata)) &&
252 | isSubset("programs", names(metadata))
253 | ) {
254 | if (isTRUE(verbose)) {
255 | alert(sprintf(
256 | "Renaming {.var %s} to {.var %s}.",
257 | "programs", "programVersions"
258 | ))
259 | }
260 | names(metadata)[names(metadata) == "programs"] <- "programVersions"
261 | }
262 | programVersions <- metadata[["programVersions"]]
263 | if (is(programVersions, "data.frame")) {
264 | metadata[["programVersions"]] <- as(programVersions, "DataFrame")
265 | }
266 | ## sampleMetadataFile.
267 | if (!is.character(metadata[["sampleMetadataFile"]])) {
268 | if (isTRUE(verbose)) {
269 | alert(sprintf(
270 | "Setting {.var %s} as {.val %s}.",
271 | "sampleMetadataFile", "empty character"
272 | ))
273 | }
274 | metadata[["sampleMetadataFile"]] <- character()
275 | }
276 | ## sessionInfo.
277 | if (isSubset("utilsSessionInfo", names(metadata))) {
278 | if (isTRUE(verbose)) {
279 | alert(sprintf("Simplifying stashed {.var %s}.", "sessionInfo"))
280 | }
281 | names(metadata)[
282 | names(metadata) == "utilsSessionInfo"
283 | ] <- "sessionInfo"
284 | metadata[["devtoolsSessionInfo"]] <- NULL
285 | }
286 | ## Drop legacy slots.
287 | keep <- setdiff(
288 | x = names(metadata),
289 | y = c("cellToSample", "sampleData", "sampleMetadata")
290 | )
291 | metadata <- metadata[keep]
292 | ## Return --------------------------------------------------------------
293 | assays(sce) <- assays
294 | rowRanges(sce) <- rowRanges
295 | colData(sce) <- colData
296 | metadata(sce) <- metadata
297 | bcb <- new(Class = "bcbioSingleCell", sce)
298 | validObject(bcb)
299 | if (isTRUE(verbose)) {
300 | alertSuccess(sprintf(
301 | "Update of {.var %s} object was successful.",
302 | "bcbioSingleCell"
303 | ))
304 | }
305 | bcb
306 | }
307 |
308 |
309 |
310 | #' @rdname updateObject
311 | #' @export
312 | setMethod(
313 | f = "updateObject",
314 | signature = signature(object = "bcbioSingleCell"),
315 | definition = `updateObject,bcbioSingleCell`
316 | )
317 |
--------------------------------------------------------------------------------
/R/AllGenerators.R:
--------------------------------------------------------------------------------
1 | #' @inherit bcbioSingleCell-class title description
2 | #' @author Michael Steinbaugh
3 | #' @note Updated 2023-09-21.
4 | #' @export
5 | #'
6 | #' @inheritParams AcidSingleCell::makeSingleCellExperiment
7 | #' @inheritParams AcidRoxygen::params
8 | #'
9 | #' @section Remote data:
10 | #'
11 | #' When working in RStudio, we recommend connecting to the bcbio-nextgen run
12 | #' directory as a remote connection over
13 | #' [sshfs](https://github.com/osxfuse/osxfuse/wiki/SSHFS).
14 | #'
15 | #' @return `bcbioSingleCell`.
16 | #'
17 | #' @seealso
18 | #' - `SingleCellExperiment::SingleCellExperiment()`.
19 | #' - `.S4methods(class = "bcbioSingleCell")`.
20 | #'
21 | #' @examples
22 | #' uploadDir <- system.file("extdata/indrops", package = "bcbioSingleCell")
23 | #'
24 | #' x <- bcbioSingleCell(uploadDir)
25 | #' print(x)
26 | #'
27 | #' x <- bcbioSingleCell(
28 | #' uploadDir = uploadDir,
29 | #' sampleMetadataFile = file.path(uploadDir, "metadata.csv")
30 | #' )
31 | #' print(x)
32 | bcbioSingleCell <-
33 | function(uploadDir,
34 | sampleMetadataFile = NULL,
35 | organism = NULL,
36 | ensemblRelease = NULL,
37 | genomeBuild = NULL,
38 | gffFile = NULL,
39 | transgeneNames = NULL,
40 | interestingGroups = "sampleName") {
41 | assert(
42 | isADirectory(uploadDir),
43 | isString(sampleMetadataFile, nullOk = TRUE),
44 | isString(organism, nullOk = TRUE),
45 | isInt(ensemblRelease, nullOk = TRUE),
46 | isString(genomeBuild, nullOk = TRUE),
47 | isString(gffFile, nullOk = TRUE),
48 | isCharacter(transgeneNames, nullOk = TRUE),
49 | isCharacter(interestingGroups)
50 | )
51 | if (isString(gffFile)) {
52 | isAFile(gffFile) || isAUrl(gffFile)
53 | }
54 | h1("bcbioSingleCell")
55 | alert("Importing bcbio-nextgen single-cell RNA-seq run")
56 | sampleData <- NULL
57 | ## Run info ------------------------------------------------------------
58 | uploadDir <- realpath(uploadDir)
59 | projectDir <- projectDir(uploadDir)
60 | sampleDirs <- sampleDirs(uploadDir)
61 | lanes <- detectLanes(sampleDirs)
62 | yaml <- import(file.path(projectDir, "project-summary.yaml"))
63 | dataVersions <-
64 | importDataVersions(file.path(projectDir, "data_versions.csv"))
65 | assert(is(dataVersions, "DataFrame"))
66 | programVersions <-
67 | importProgramVersions(file.path(projectDir, "programs.txt"))
68 | assert(is(dataVersions, "DataFrame"))
69 | log <- import(file.path(projectDir, "bcbio-nextgen.log"))
70 | ## This step enables our minimal dataset to pass checks.
71 | tryCatch(
72 | expr = assert(isCharacter(log)),
73 | error = function(e) {
74 | alertWarning(sprintf(
75 | "{.file %s} file is empty.",
76 | "bcbio-nextgen.log"
77 | ))
78 | }
79 | )
80 | commandsLog <-
81 | import(file.path(projectDir, "bcbio-nextgen-commands.log"))
82 | ## This step enables our minimal dataset to pass checks.
83 | tryCatch(
84 | expr = assert(isCharacter(commandsLog)),
85 | error = function(e) {
86 | alertWarning(
87 | "{.file bcbio-nextgen-commands.log} file is empty."
88 | )
89 | }
90 | )
91 | cutoff <- getBarcodeCutoffFromCommands(commandsLog)
92 | level <- getLevelFromCommands(commandsLog)
93 | umiType <- getUmiTypeFromCommands(commandsLog)
94 | ## Check to see if we're dealing with a multiplexed platform.
95 | multiplexed <- any(vapply(
96 | X = c("dropseq", "indrop"),
97 | FUN = function(pattern) {
98 | grepl(pattern = pattern, x = umiType)
99 | },
100 | FUN.VALUE = logical(1L)
101 | ))
102 | ## Sample metadata -----------------------------------------------------
103 | h2("Sample metadata")
104 | allSamples <- TRUE
105 | sampleData <- NULL
106 | if (isString(sampleMetadataFile)) {
107 | sampleData <- importSampleData(
108 | file = sampleMetadataFile,
109 | lanes = lanes,
110 | pipeline = "bcbio"
111 | )
112 | ## Error on incorrect reverse complement input.
113 | if (isSubset("sequence", colnames(sampleData))) {
114 | sampleDirSequence <- strMatch(
115 | x = names(sampleDirs),
116 | pattern = "^.+_([ACGT]+)$"
117 | )[, 2L]
118 | assert(
119 | !identical(
120 | sort(sampleDirSequence),
121 | sort(as.character(sampleData[["sequence"]]))
122 | ),
123 | msg = paste(
124 | "It appears that the reverse complement sequence of",
125 | "the i5 index barcodes were input into the sample",
126 | "metadata 'sequence' column. bcbio outputs the revcomp",
127 | "into the sample directories, but the forward sequence",
128 | "should be used in the R package."
129 | )
130 | )
131 | }
132 | ## Allow sample selection by with this file.
133 | if (nrow(sampleData) < length(sampleDirs)) {
134 | sampleDirs <- sampleDirs[rownames(sampleData)]
135 | alert(sprintf(
136 | fmt = "Loading a subset of samples: %s.",
137 | toInlineString(basename(sampleDirs), n = 5L)
138 | ))
139 | allSamples <- FALSE
140 | }
141 | }
142 | ## Assays (counts) -----------------------------------------------------
143 | h2("Counts")
144 | ## Note that we're now allowing transcript-level counts.
145 | counts <- .importCounts(sampleDirs = sampleDirs)
146 | assert(hasValidDimnames(counts))
147 | ## Row data (genes/transcripts) ----------------------------------------
148 | h2("Feature metadata")
149 | ## Annotation priority:
150 | ## 1. AnnotationHub.
151 | ## - Requires `organism` to be declared.
152 | ## - Ensure that Ensembl release and genome build match.
153 | ## 2. GTF/GFF file. Use the bcbio GTF if possible.
154 | ## 3. Fall back to slotting empty ranges. This is offered as support for
155 | ## complex datasets (e.g. multiple organisms).
156 | if (isString(organism) && is.numeric(ensemblRelease)) {
157 | ## AnnotationHub (ensembldb).
158 | alert("{.fun makeGRangesFromEnsembl}")
159 | rowRanges <- makeGRangesFromEnsembl(
160 | organism = organism,
161 | level = level,
162 | genomeBuild = genomeBuild,
163 | release = ensemblRelease
164 | )
165 | } else {
166 | ## GTF/GFF file.
167 | if (is.null(gffFile)) {
168 | ## Attempt to use bcbio GTF automatically.
169 | gffFile <- getGtfFileFromYaml(yaml)
170 | }
171 | if (!is.null(gffFile)) {
172 | alert("{.fun makeGRangesFromGff}")
173 | gffFile <- realpath(gffFile)
174 | rowRanges <- makeGRangesFromGff(file = gffFile, level = level)
175 | } else {
176 | alertWarning("Slotting empty ranges into {.fun rowRanges}.")
177 | rowRanges <- emptyRanges(rownames(counts))
178 | }
179 | }
180 | assert(is(rowRanges, "GenomicRanges"))
181 | ## Attempt to get genome build and Ensembl release if not declared.
182 | ## Note that these will remain NULL when using GTF file (see above).
183 | if (is.null(genomeBuild)) {
184 | genomeBuild <- metadata(rowRanges)[["genomeBuild"]]
185 | }
186 | if (is.null(ensemblRelease)) {
187 | ensemblRelease <- metadata(rowRanges)[["ensemblRelease"]]
188 | }
189 | ## Column data ---------------------------------------------------------
190 | h2("Column data")
191 | colData <- DataFrame(row.names = colnames(counts))
192 | ## Generate automatic sample metadata, if necessary.
193 | if (is.null(sampleData)) {
194 | if (isTRUE(multiplexed)) {
195 | ## Multiplexed samples without user-defined metadata.
196 | alertWarning(sprintf(
197 | fmt = paste(
198 | "{.var %s} is recommended for",
199 | "multiplexed samples (e.g. {.val %s})."
200 | ),
201 | "sampleMetadataFile", umiType
202 | ))
203 | sampleData <- minimalSampleData(basename(sampleDirs))
204 | } else {
205 | sampleData <- getSampleDataFromYaml(yaml)
206 | }
207 | }
208 | assert(isSubset(rownames(sampleData), names(sampleDirs)))
209 | ## Join `sampleData` into cell-level `colData`.
210 | if (identical(nrow(sampleData), 1L)) {
211 | colData[["sampleId"]] <- as.factor(rownames(sampleData))
212 | } else {
213 | colData[["sampleId"]] <- mapCellsToSamples(
214 | cells = rownames(colData),
215 | samples = rownames(sampleData)
216 | )
217 | }
218 | sampleData[["sampleId"]] <- as.factor(rownames(sampleData))
219 | ## Need to ensure the `sampleId` factor levels match up, otherwise we'll
220 | ## get a warning during the `leftJoin()` call below.
221 | assert(areSetEqual(
222 | x = levels(colData[["sampleId"]]),
223 | y = levels(sampleData[["sampleId"]])
224 | ))
225 | levels(sampleData[["sampleId"]]) <- levels(colData[["sampleId"]])
226 | colData <- leftJoin(colData, sampleData, by = "sampleId")
227 | assert(
228 | is(colData, "DataFrame"),
229 | hasRownames(colData)
230 | )
231 | ## Metadata ------------------------------------------------------------
232 | h2("Metadata")
233 | cbList <- .importReads(sampleDirs = sampleDirs)
234 | runDate <- runDate(projectDir)
235 | interestingGroups <- camelCase(interestingGroups, strict = TRUE)
236 | assert(isSubset(interestingGroups, colnames(sampleData)))
237 | metadata <- list(
238 | "allSamples" = allSamples,
239 | "bcbioCommandsLog" = commandsLog,
240 | "bcbioLog" = log,
241 | "call" = standardizeCall(),
242 | "cellularBarcodeCutoff" = cutoff,
243 | "cellularBarcodes" = cbList,
244 | "dataVersions" = dataVersions,
245 | "ensemblRelease" = as.integer(ensemblRelease),
246 | "genomeBuild" = as.character(genomeBuild),
247 | "gffFile" = as.character(gffFile),
248 | "interestingGroups" = interestingGroups,
249 | "lanes" = lanes,
250 | "level" = level,
251 | "organism" = as.character(organism),
252 | "pipeline" = "bcbio",
253 | "programVersions" = programVersions,
254 | "projectDir" = projectDir,
255 | "runDate" = runDate,
256 | "sampleDirs" = sampleDirs,
257 | "sampleMetadataFile" = as.character(sampleMetadataFile),
258 | "umiType" = umiType,
259 | "uploadDir" = uploadDir,
260 | "version" = .pkgVersion,
261 | "yaml" = yaml
262 | )
263 | ## SingleCellExperiment ------------------------------------------------
264 | object <- makeSingleCellExperiment(
265 | assays = SimpleList("counts" = counts),
266 | rowRanges = rowRanges,
267 | colData = colData,
268 | metadata = metadata,
269 | transgeneNames = transgeneNames
270 | )
271 | ## Return --------------------------------------------------------------
272 | ## Always prefilter, removing very low quality cells and/or genes.
273 | object <- calculateMetrics(object = object, prefilter = TRUE)
274 | ## Bind the `nRead` column into the cell metrics. These are the number
275 | ## of raw read counts prior to UMI disambiguation that bcbio uses for
276 | ## initial filtering (`minimum_barcode_depth`` in YAML).
277 | colData <- colData(object)
278 | nRead <- .nRead(cbList)
279 | assert(
280 | is.integer(nRead),
281 | isSubset(rownames(colData), names(nRead)),
282 | areDisjointSets("nRead", colnames(colData))
283 | )
284 | colData[["nRead"]] <- unname(nRead[rownames(colData)])
285 | colData <- colData[, sort(colnames(colData)), drop = FALSE]
286 | colData(object) <- colData
287 | bcb <- new(Class = "bcbioSingleCell", object)
288 | alertSuccess("bcbio single-cell RNA-seq run imported successfully.")
289 | bcb
290 | }
291 |
--------------------------------------------------------------------------------
/R/plotReadsPerCell-methods.R:
--------------------------------------------------------------------------------
1 | #' @name plotReadsPerCell
2 | #' @author Michael Steinbaugh, Rory Kirchner
3 | #' @inherit AcidGenerics::plotReadsPerCell
4 | #' @note Updated 2023-12-04.
5 | #'
6 | #' @inheritParams AcidRoxygen::params
7 | #' @param ... Additional arguments.
8 | #'
9 | #' @param cutoffLine `logical(1)`.
10 | #' Include a line marking the cutoff.
11 | #'
12 | #' @examples
13 | #' data(bcb)
14 | #'
15 | #' ## bcbioSingleCell ====
16 | #' plotReadsPerCell(bcb, geom = "histogram")
17 | #' plotReadsPerCell(bcb, geom = "ecdf")
18 | NULL
19 |
20 |
21 |
22 | #' Proportional cellular barcodes data
23 | #'
24 | #' Modified version of Allon Klein Lab MATLAB code.
25 | #'
26 | #' @author Michael Steinbaugh, Rory Kirchner
27 | #' @keywords internal
28 | #' @note Updated 2022-05-07.
29 | #' @noRd
30 | #'
31 | #' @param data `DataFrame`.
32 | #' Raw read counts per cellular barcode.
33 | #' Return from `.rawMetrics()` function.
34 | #'
35 | #' @return `DataFrame`.
36 | .proportionalReadsPerCell <-
37 | function(data,
38 | sampleData,
39 | breaks = 100L) {
40 | assert(
41 | requireNamespaces("graphics"),
42 | is(data, "DataFrame"),
43 | isSubset(c("nRead", "sampleId"), colnames(data)),
44 | is.integer(data[["nRead"]]),
45 | is.factor(data[["sampleId"]]),
46 | is(sampleData, "DataFrame"),
47 | isInt(breaks)
48 | )
49 | sampleData[["sampleId"]] <- as.factor(rownames(sampleData))
50 | samples <- levels(data[["sampleId"]])
51 | list <- DataFrameList(lapply(
52 | X = samples,
53 | FUN = function(sampleId) {
54 | keep <- which(data[["sampleId"]] == sampleId)
55 | subset <- data[keep, , drop = FALSE]
56 | ## Histogram of log10-transformed counts.
57 | h <- graphics::hist(
58 | x = log10(subset[["nRead"]]),
59 | n = breaks,
60 | plot = FALSE
61 | )
62 | ## Klein Lab MATLAB code reference.
63 | ## counts: fLog; mids: xLog
64 | proportion <- h[["counts"]] *
65 | (10L^h[["mids"]]) /
66 | sum(h[["counts"]] * (10L^h[["mids"]]))
67 | DataFrame(
68 | "sampleId" = factor(sampleId),
69 | "log10Read" = h[["mids"]],
70 | "proportion" = proportion
71 | )
72 | }
73 | ))
74 | out <- unlist(list, recursive = FALSE, use.names = FALSE)
75 | out <- leftJoin(out, sampleData, by = "sampleId")
76 | out
77 | }
78 |
79 |
80 |
81 | #' Plot proportional reads per cell histogram
82 | #'
83 | #' @note Updated 2023-08-16.
84 | #' @noRd
85 | #'
86 | #' @param data Return from `.proportionalReadsPerCell()` function.
87 | #'
88 | #' @return `ggplot`.
89 | .plotReadsPerCellHistogram <-
90 | function(data,
91 | min = 0L) {
92 | assert(is(data, "DataFrame"))
93 | p <- ggplot(
94 | data = as.data.frame(data),
95 | mapping = aes(
96 | x = .data[["log10Read"]],
97 | y = .data[["proportion"]],
98 | color = .data[["interestingGroups"]]
99 | )
100 | ) +
101 | geom_step(
102 | alpha = 0.75,
103 | linewidth = 1L
104 | ) +
105 | labs(
106 | x = "log10 reads per cell",
107 | y = "proportion of reads"
108 | )
109 | ## Cutoff line.
110 | if (min > 0L) {
111 | p <- p + acid_geom_abline(xintercept = log10(min))
112 | }
113 | ## Color palette.
114 | p <- p + acid_scale_color_discrete()
115 | ## Facets.
116 | facets <- NULL
117 | if (isSubset("aggregate", colnames(data))) {
118 | facets <- c(facets, "aggregate")
119 | }
120 | if (is.character(facets)) {
121 | p <- p + facet_wrap(
122 | facets = vars(!!!syms(facets)),
123 | scales = "free"
124 | )
125 | }
126 | ## Return.
127 | p
128 | }
129 |
130 |
131 |
132 | ## Updated 2023-08-16.
133 | .plotReadsPerCellBoxplot <-
134 | function(data,
135 | min = 0L) {
136 | assert(is(data, "DataFrame"))
137 | p <- ggplot(
138 | data = as.data.frame(data),
139 | mapping = aes(
140 | x = .data[["sampleName"]],
141 | y = .data[["nRead"]],
142 | fill = .data[["interestingGroups"]]
143 | )
144 | ) +
145 | geom_boxplot(color = "black", outlier.shape = NA) +
146 | scale_y_continuous(trans = "log10") +
147 | acid_geom_label_average(
148 | data = as.data.frame(data),
149 | col = "nRead",
150 | digits = 0L
151 | ) +
152 | labs(
153 | x = NULL,
154 | y = "reads per cell"
155 | )
156 | ## Cutoff line.
157 | if (min > 0L) {
158 | p <- p + acid_geom_abline(yintercept = min)
159 | }
160 | ## Color palette.
161 | p <- p + acid_scale_fill_discrete()
162 | ## Facets.
163 | facets <- NULL
164 | if (isSubset("aggregate", colnames(data))) {
165 | facets <- c(facets, "aggregate")
166 | }
167 | if (is.character(facets)) {
168 | p <- p + facet_wrap(
169 | facets = vars(!!!syms(facets)),
170 | scales = "free"
171 | )
172 | }
173 | ## Return.
174 | p
175 | }
176 |
177 |
178 |
179 | ## Updated 2023-08-16.
180 | .plotReadsPerCellEcdf <-
181 | function(data,
182 | min = 0L) {
183 | assert(is(data, "DataFrame"))
184 | p <- ggplot(
185 | data = as.data.frame(data),
186 | mapping = aes(
187 | x = .data[["nRead"]],
188 | color = .data[["interestingGroups"]]
189 | )
190 | ) +
191 | stat_ecdf(geom = "step", linewidth = 1L) +
192 | labs(
193 | x = "reads per cell",
194 | y = "frequency"
195 | ) +
196 | scale_x_continuous(trans = "log10")
197 | ## Cutoff line.
198 | if (min > 0L) {
199 | p <- p + acid_geom_abline(xintercept = min)
200 | }
201 | ## Color palette.
202 | p <- p + acid_scale_color_discrete()
203 | ## Facets.
204 | facets <- NULL
205 | if (isSubset("aggregate", colnames(data))) {
206 | facets <- c(facets, "aggregate")
207 | }
208 | if (is.character(facets)) {
209 | p <- p + facet_wrap(
210 | facets = vars(!!!syms(facets)),
211 | scales = "free"
212 | )
213 | }
214 | ## Return.
215 | p
216 | }
217 |
218 |
219 |
220 | ## Updated 2023-08-16.
221 | .plotReadsPerCellRidgeline <-
222 | function(data,
223 | min = 0L) {
224 | assert(is(data, "DataFrame"))
225 | p <- ggplot(
226 | data = as.data.frame(data),
227 | mapping = aes(
228 | x = .data[["nRead"]],
229 | y = .data[["sampleName"]],
230 | fill = .data[["interestingGroups"]]
231 | )
232 | ) +
233 | geom_density_ridges(
234 | alpha = 0.75,
235 | color = "black",
236 | panel_scaling = TRUE,
237 | scale = 10L
238 | ) +
239 | scale_x_continuous(trans = "log10") +
240 | acid_geom_label_average(
241 | data = as.data.frame(data),
242 | col = "nRead",
243 | digits = 0L
244 | ) +
245 | labs(
246 | x = "reads per cell",
247 | y = NULL
248 | )
249 | ## Cutoff line.
250 | if (min > 0L) {
251 | p <- p + acid_geom_abline(xintercept = min)
252 | }
253 | ## Color palette.
254 | p <- p + acid_scale_fill_discrete()
255 | ## Facets.
256 | facets <- NULL
257 | if (isSubset("aggregate", colnames(data))) {
258 | facets <- c(facets, "aggregate")
259 | }
260 | if (is.character(facets)) {
261 | p <- p + facet_wrap(
262 | facets = vars(!!!syms(facets)),
263 | scales = "free"
264 | )
265 | }
266 | p
267 | }
268 |
269 |
270 |
271 | ## Updated 2023-08-16.
272 | .plotReadsPerCellViolin <-
273 | function(data,
274 | min = 0L) {
275 | assert(is(data, "DataFrame"))
276 | p <- ggplot(
277 | data = as.data.frame(data),
278 | mapping = aes(
279 | x = .data[["sampleName"]],
280 | y = .data[["nRead"]],
281 | fill = .data[["interestingGroups"]]
282 | )
283 | ) +
284 | geom_violin(
285 | color = "black",
286 | scale = "count"
287 | ) +
288 | scale_y_continuous(trans = "log10") +
289 | acid_geom_label_average(
290 | data = as.data.frame(data),
291 | col = "nRead",
292 | digits = 0L
293 | ) +
294 | labs(
295 | x = NULL,
296 | y = "reads per cell"
297 | )
298 | ## Cutoff line.
299 | if (min > 0L) {
300 | p <- p + acid_geom_abline(yintercept = min)
301 | }
302 | ## Color palette.
303 | p <- p + acid_scale_fill_discrete()
304 | ## Facets.
305 | facets <- NULL
306 | if (isSubset("aggregate", colnames(data))) {
307 | facets <- c(facets, "aggregate")
308 | }
309 | if (is.character(facets)) {
310 | p <- p + facet_wrap(
311 | facets = vars(!!!syms(facets)),
312 | scales = "free"
313 | )
314 | }
315 | ## Return.
316 | p
317 | }
318 |
319 |
320 |
321 | ## Updated 2023-08-16.
322 | `plotReadsPerCell,bcbioSingleCell` <- # nolint
323 | function(object,
324 | interestingGroups = NULL,
325 | geom,
326 | cutoffLine = FALSE,
327 | title = "Reads per cell") {
328 | validObject(object)
329 | assert(isString(title, nullOk = TRUE))
330 | interestingGroups(object) <-
331 | matchInterestingGroups(object, interestingGroups)
332 | interestingGroups <- interestingGroups(object)
333 | geom <- match.arg(geom)
334 | ## Minimum reads per barcode cutoff (for unfiltered data).
335 | if (!is.null(metadata(object)[["filterCells"]])) {
336 | min <- 0L
337 | subtitle <- NULL
338 | } else {
339 | cutoff <- metadata(object)[["cellularBarcodeCutoff"]]
340 | subtitle <- paste("cutoff", cutoff, sep = " = ")
341 | if (isTRUE(cutoffLine)) {
342 | min <- cutoff
343 | } else {
344 | min <- 0L
345 | }
346 | }
347 | assert(isInt(min))
348 | ## This step will intentionally error for filtered objects.
349 | data <- .rawMetrics(object)
350 | p <- switch(
351 | EXPR = geom,
352 | boxplot = do.call(
353 | what = .plotReadsPerCellBoxplot,
354 | args = list(
355 | "data" = data,
356 | "min" = min
357 | )
358 | ),
359 | ecdf = do.call(
360 | what = .plotReadsPerCellEcdf,
361 | args = list(
362 | "data" = data,
363 | "min" = min
364 | )
365 | ),
366 | histogram = {
367 | data <- do.call(
368 | what = .proportionalReadsPerCell,
369 | args = list(
370 | "data" = data,
371 | "sampleData" = sampleData(object)
372 | )
373 | )
374 | do.call(
375 | what = .plotReadsPerCellHistogram,
376 | args = list(
377 | "data" = data,
378 | "min" = min
379 | )
380 | )
381 | },
382 | ridgeline = do.call(
383 | what = .plotReadsPerCellRidgeline,
384 | args = list(
385 | "data" = data,
386 | "min" = min
387 | )
388 | ),
389 | violin = do.call(
390 | what = .plotReadsPerCellViolin,
391 | args = list(
392 | "data" = data,
393 | "min" = min
394 | )
395 | )
396 | )
397 | ## Add title and subtitle containing cutoff information.
398 | p <- p +
399 | labs(
400 | title = title,
401 | subtitle = subtitle,
402 | color = paste(interestingGroups, collapse = ":\n"),
403 | fill = paste(interestingGroups, collapse = ":\n")
404 | )
405 | ## Return.
406 | p
407 | }
408 |
409 | formals(`plotReadsPerCell,bcbioSingleCell`)[["geom"]] <- # nolint
410 | .geom
411 |
412 |
413 |
414 | #' @rdname plotReadsPerCell
415 | #' @export
416 | setMethod(
417 | f = "plotReadsPerCell",
418 | signature = signature(object = "bcbioSingleCell"),
419 | definition = `plotReadsPerCell,bcbioSingleCell`
420 | )
421 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU AFFERO GENERAL PUBLIC LICENSE
2 | Version 3, 19 November 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU Affero General Public License is a free, copyleft license for
11 | software and other kinds of works, specifically designed to ensure
12 | cooperation with the community in the case of network server software.
13 |
14 | The licenses for most software and other practical works are designed
15 | to take away your freedom to share and change the works. By contrast,
16 | our General Public Licenses are intended to guarantee your freedom to
17 | share and change all versions of a program--to make sure it remains free
18 | software for all its users.
19 |
20 | When we speak of free software, we are referring to freedom, not
21 | price. Our General Public Licenses are designed to make sure that you
22 | have the freedom to distribute copies of free software (and charge for
23 | them if you wish), that you receive source code or can get it if you
24 | want it, that you can change the software or use pieces of it in new
25 | free programs, and that you know you can do these things.
26 |
27 | Developers that use our General Public Licenses protect your rights
28 | with two steps: (1) assert copyright on the software, and (2) offer
29 | you this License which gives you legal permission to copy, distribute
30 | and/or modify the software.
31 |
32 | A secondary benefit of defending all users' freedom is that
33 | improvements made in alternate versions of the program, if they
34 | receive widespread use, become available for other developers to
35 | incorporate. Many developers of free software are heartened and
36 | encouraged by the resulting cooperation. However, in the case of
37 | software used on network servers, this result may fail to come about.
38 | The GNU General Public License permits making a modified version and
39 | letting the public access it on a server without ever releasing its
40 | source code to the public.
41 |
42 | The GNU Affero General Public License is designed specifically to
43 | ensure that, in such cases, the modified source code becomes available
44 | to the community. It requires the operator of a network server to
45 | provide the source code of the modified version running there to the
46 | users of that server. Therefore, public use of a modified version, on
47 | a publicly accessible server, gives the public access to the source
48 | code of the modified version.
49 |
50 | An older license, called the Affero General Public License and
51 | published by Affero, was designed to accomplish similar goals. This is
52 | a different license, not a version of the Affero GPL, but Affero has
53 | released a new version of the Affero GPL which permits relicensing under
54 | this license.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | TERMS AND CONDITIONS
60 |
61 | 0. Definitions.
62 |
63 | "This License" refers to version 3 of the GNU Affero General Public License.
64 |
65 | "Copyright" also means copyright-like laws that apply to other kinds of
66 | works, such as semiconductor masks.
67 |
68 | "The Program" refers to any copyrightable work licensed under this
69 | License. Each licensee is addressed as "you". "Licensees" and
70 | "recipients" may be individuals or organizations.
71 |
72 | To "modify" a work means to copy from or adapt all or part of the work
73 | in a fashion requiring copyright permission, other than the making of an
74 | exact copy. The resulting work is called a "modified version" of the
75 | earlier work or a work "based on" the earlier work.
76 |
77 | A "covered work" means either the unmodified Program or a work based
78 | on the Program.
79 |
80 | To "propagate" a work means to do anything with it that, without
81 | permission, would make you directly or secondarily liable for
82 | infringement under applicable copyright law, except executing it on a
83 | computer or modifying a private copy. Propagation includes copying,
84 | distribution (with or without modification), making available to the
85 | public, and in some countries other activities as well.
86 |
87 | To "convey" a work means any kind of propagation that enables other
88 | parties to make or receive copies. Mere interaction with a user through
89 | a computer network, with no transfer of a copy, is not conveying.
90 |
91 | An interactive user interface displays "Appropriate Legal Notices"
92 | to the extent that it includes a convenient and prominently visible
93 | feature that (1) displays an appropriate copyright notice, and (2)
94 | tells the user that there is no warranty for the work (except to the
95 | extent that warranties are provided), that licensees may convey the
96 | work under this License, and how to view a copy of this License. If
97 | the interface presents a list of user commands or options, such as a
98 | menu, a prominent item in the list meets this criterion.
99 |
100 | 1. Source Code.
101 |
102 | The "source code" for a work means the preferred form of the work
103 | for making modifications to it. "Object code" means any non-source
104 | form of a work.
105 |
106 | A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 |
111 | The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form. A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 |
122 | The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities. However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work. For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 |
135 | The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 |
139 | The Corresponding Source for a work in source code form is that
140 | same work.
141 |
142 | 2. Basic Permissions.
143 |
144 | All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met. This License explicitly affirms your unlimited
147 | permission to run the unmodified Program. The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work. This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 |
152 | You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force. You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright. Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 |
163 | Conveying under any other circumstances is permitted solely under
164 | the conditions stated below. Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 |
167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 |
169 | No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 |
175 | When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 |
183 | 4. Conveying Verbatim Copies.
184 |
185 | You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 |
193 | You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 |
196 | 5. Conveying Modified Source Versions.
197 |
198 | You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 |
202 | a) The work must carry prominent notices stating that you modified
203 | it, and giving a relevant date.
204 |
205 | b) The work must carry prominent notices stating that it is
206 | released under this License and any conditions added under section
207 | 7. This requirement modifies the requirement in section 4 to
208 | "keep intact all notices".
209 |
210 | c) You must license the entire work, as a whole, under this
211 | License to anyone who comes into possession of a copy. This
212 | License will therefore apply, along with any applicable section 7
213 | additional terms, to the whole of the work, and all its parts,
214 | regardless of how they are packaged. This License gives no
215 | permission to license the work in any other way, but it does not
216 | invalidate such permission if you have separately received it.
217 |
218 | d) If the work has interactive user interfaces, each must display
219 | Appropriate Legal Notices; however, if the Program has interactive
220 | interfaces that do not display Appropriate Legal Notices, your
221 | work need not make them do so.
222 |
223 | A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit. Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 |
233 | 6. Conveying Non-Source Forms.
234 |
235 | You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 |
240 | a) Convey the object code in, or embodied in, a physical product
241 | (including a physical distribution medium), accompanied by the
242 | Corresponding Source fixed on a durable physical medium
243 | customarily used for software interchange.
244 |
245 | b) Convey the object code in, or embodied in, a physical product
246 | (including a physical distribution medium), accompanied by a
247 | written offer, valid for at least three years and valid for as
248 | long as you offer spare parts or customer support for that product
249 | model, to give anyone who possesses the object code either (1) a
250 | copy of the Corresponding Source for all the software in the
251 | product that is covered by this License, on a durable physical
252 | medium customarily used for software interchange, for a price no
253 | more than your reasonable cost of physically performing this
254 | conveying of source, or (2) access to copy the
255 | Corresponding Source from a network server at no charge.
256 |
257 | c) Convey individual copies of the object code with a copy of the
258 | written offer to provide the Corresponding Source. This
259 | alternative is allowed only occasionally and noncommercially, and
260 | only if you received the object code with such an offer, in accord
261 | with subsection 6b.
262 |
263 | d) Convey the object code by offering access from a designated
264 | place (gratis or for a charge), and offer equivalent access to the
265 | Corresponding Source in the same way through the same place at no
266 | further charge. You need not require recipients to copy the
267 | Corresponding Source along with the object code. If the place to
268 | copy the object code is a network server, the Corresponding Source
269 | may be on a different server (operated by you or a third party)
270 | that supports equivalent copying facilities, provided you maintain
271 | clear directions next to the object code saying where to find the
272 | Corresponding Source. Regardless of what server hosts the
273 | Corresponding Source, you remain obligated to ensure that it is
274 | available for as long as needed to satisfy these requirements.
275 |
276 | e) Convey the object code using peer-to-peer transmission, provided
277 | you inform other peers where the object code and Corresponding
278 | Source of the work are being offered to the general public at no
279 | charge under subsection 6d.
280 |
281 | A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 |
285 | A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling. In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage. For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product. A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 |
298 | "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source. The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 |
306 | If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information. But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 |
317 | The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed. Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 |
325 | Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 |
331 | 7. Additional Terms.
332 |
333 | "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law. If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 |
342 | When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it. (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.) You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 |
349 | Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 |
353 | a) Disclaiming warranty or limiting liability differently from the
354 | terms of sections 15 and 16 of this License; or
355 |
356 | b) Requiring preservation of specified reasonable legal notices or
357 | author attributions in that material or in the Appropriate Legal
358 | Notices displayed by works containing it; or
359 |
360 | c) Prohibiting misrepresentation of the origin of that material, or
361 | requiring that modified versions of such material be marked in
362 | reasonable ways as different from the original version; or
363 |
364 | d) Limiting the use for publicity purposes of names of licensors or
365 | authors of the material; or
366 |
367 | e) Declining to grant rights under trademark law for use of some
368 | trade names, trademarks, or service marks; or
369 |
370 | f) Requiring indemnification of licensors and authors of that
371 | material by anyone who conveys the material (or modified versions of
372 | it) with contractual assumptions of liability to the recipient, for
373 | any liability that these contractual assumptions directly impose on
374 | those licensors and authors.
375 |
376 | All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10. If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term. If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 |
386 | If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 |
391 | Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 |
395 | 8. Termination.
396 |
397 | You may not propagate or modify a covered work except as expressly
398 | provided under this License. Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 |
403 | However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 |
410 | Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 |
417 | Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License. If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 |
423 | 9. Acceptance Not Required for Having Copies.
424 |
425 | You are not required to accept this License in order to receive or
426 | run a copy of the Program. Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance. However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work. These actions infringe copyright if you do
431 | not accept this License. Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 |
434 | 10. Automatic Licensing of Downstream Recipients.
435 |
436 | Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License. You are not responsible
439 | for enforcing compliance by third parties with this License.
440 |
441 | An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations. If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 |
451 | You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License. For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 |
459 | 11. Patents.
460 |
461 | A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based. The
463 | work thus licensed is called the contributor's "contributor version".
464 |
465 | A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version. For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 |
475 | Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 |
480 | In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement). To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 |
487 | If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients. "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 |
501 | If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 |
509 | A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License. You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 |
524 | Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 |
528 | 12. No Surrender of Others' Freedom.
529 |
530 | If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License. If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all. For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 |
540 | 13. Remote Network Interaction; Use with the GNU General Public License.
541 |
542 | Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software. This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 |
553 | Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work. The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 |
561 | 14. Revised Versions of this License.
562 |
563 | The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time. Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 |
568 | Each version is given a distinguishing version number. If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation. If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 |
577 | If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 |
582 | Later license versions may give you additional or different
583 | permissions. However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 |
587 | 15. Disclaimer of Warranty.
588 |
589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 |
598 | 16. Limitation of Liability.
599 |
600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 |
610 | 17. Interpretation of Sections 15 and 16.
611 |
612 | If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 |
619 | END OF TERMS AND CONDITIONS
620 |
621 | How to Apply These Terms to Your New Programs
622 |
623 | If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 |
627 | To do so, attach the following notices to the program. It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 |
632 |
633 | Copyright (C)
634 |
635 | This program is free software: you can redistribute it and/or modify
636 | it under the terms of the GNU Affero General Public License as published by
637 | the Free Software Foundation, either version 3 of the License, or
638 | (at your option) any later version.
639 |
640 | This program is distributed in the hope that it will be useful,
641 | but WITHOUT ANY WARRANTY; without even the implied warranty of
642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
643 | GNU Affero General Public License for more details.
644 |
645 | You should have received a copy of the GNU Affero General Public License
646 | along with this program. If not, see .
647 |
648 | Also add information on how to contact you by electronic and paper mail.
649 |
650 | If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source. For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code. There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 |
658 | You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | .
662 |
--------------------------------------------------------------------------------