├── inst ├── extdata │ └── indrops │ │ ├── 2018-01-01_bcbio │ │ ├── bcbio-nextgen.log │ │ ├── bcbio-nextgen-commands.log │ │ ├── project-summary.yaml │ │ └── programs.txt │ │ ├── metadata.csv │ │ └── multiplexed-AAAAAAAA │ │ ├── multiplexed-AAAAAAAA.mtx.rownames │ │ ├── multiplexed-AAAAAAAA.mtx.colnames │ │ └── multiplexed-AAAAAAAA-barcodes.tsv └── rmarkdown │ └── templates │ └── quality-control │ ├── template.yaml │ └── skeleton │ └── skeleton.Rmd ├── tests ├── testthat │ ├── .gitignore │ ├── test-show.R │ ├── helper-globals.R │ ├── helper-cache.R │ ├── test-updateObject.R │ ├── test-plotReadsPerCell.R │ ├── test-bcbioSingleCell.R │ └── test-filterCells.R └── testthat.R ├── .gitignore ├── pkgdown └── extra.css ├── data └── bcb.rda ├── .Rbuildignore ├── R ├── data.R ├── AllGenerics.R ├── AllGlobals.R ├── reexports.R ├── show-methods.R ├── internal-barcodes.R ├── extract-methods.R ├── package.R ├── AllClasses.R ├── internal-import.R ├── updateObject-methods.R ├── AllGenerators.R └── plotReadsPerCell-methods.R ├── man ├── bcbioSingleCellTestsUrl.Rd ├── bcb.Rd ├── show.Rd ├── bcbioSingleCell-class.Rd ├── updateObject.Rd ├── reexports.Rd ├── bcbioSingleCell-package.Rd ├── plotReadsPerCell.Rd ├── extract.Rd └── bcbioSingleCell.Rd ├── package.Rproj ├── _pkgdown.yml ├── data-raw └── bcb.R ├── .lintr ├── DESCRIPTION ├── todo.org ├── README.md ├── NAMESPACE └── LICENSE /inst/extdata/indrops/2018-01-01_bcbio/bcbio-nextgen.log: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/testthat/.gitignore: -------------------------------------------------------------------------------- 1 | *.rda 2 | subsetPerSample 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .RData 2 | .Rcheck 3 | .Rhistory 4 | .Rproj.user 5 | docs/ 6 | -------------------------------------------------------------------------------- /pkgdown/extra.css: -------------------------------------------------------------------------------- 1 | @import url("https://steinbaugh.com/css/pkgdown.css"); 2 | -------------------------------------------------------------------------------- /data/bcb.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hbc/bcbioSingleCell/HEAD/data/bcb.rda -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | ## nolint start 2 | library(testthat) 3 | library(bcbioSingleCell) 4 | ## nolint end 5 | 6 | test_check("bcbioSingleCell") 7 | -------------------------------------------------------------------------------- /inst/extdata/indrops/metadata.csv: -------------------------------------------------------------------------------- 1 | fileName,description,index,sequence,sampleName,aggregate 2 | multiplexed.fastq.gz,multiplexed,1,TTTTTTTT,rep_1,sample 3 | -------------------------------------------------------------------------------- /tests/testthat/test-show.R: -------------------------------------------------------------------------------- 1 | test_that("bcbioSingleCell", { 2 | output <- capture.output(show(bcb)) 3 | expect_true(grepl("^bcbioSingleCell", output[[1L]])) 4 | }) 5 | -------------------------------------------------------------------------------- /tests/testthat/helper-globals.R: -------------------------------------------------------------------------------- 1 | ## nolint start 2 | data <- utils::data 3 | hasInternet <- goalie::hasInternet 4 | ## nolint end 5 | 6 | data(bcb, envir = environment()) 7 | -------------------------------------------------------------------------------- /inst/rmarkdown/templates/quality-control/template.yaml: -------------------------------------------------------------------------------- 1 | name: Quality Control 2 | description: > 3 | Template for single-cell RNA-seq quality control report. 4 | create_dir: false 5 | -------------------------------------------------------------------------------- /tests/testthat/helper-cache.R: -------------------------------------------------------------------------------- 1 | lst <- AcidDevTools::cacheTestFiles( 2 | pkg = .pkgName, 3 | files = "bcbioSingleCell_0.1.0.rds" 4 | ) 5 | cacheDir <- lst[["cacheDir"]] 6 | rm(lst) 7 | -------------------------------------------------------------------------------- /inst/extdata/indrops/2018-01-01_bcbio/bcbio-nextgen-commands.log: -------------------------------------------------------------------------------- 1 | cellularBarcodeCutoff: --cb_cutoff 1000 2 | level: --genemap Homo_sapiens.GRCh38.90-tx2gene.tsv 3 | umiType: umis fastqtransform --separate_cb /XXX/umis/harvard-indrop-v3-transform.json 4 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^CONTRIBUTING\.md$ 3 | ^LICENSE$ 4 | ^Meta$ 5 | ^[-._a-zA-Z0-9]+\.(R|Rcheck|Rproj|gz|html|org|pdf|png|sh|tar|txt|xz|yaml|yml|zip)$ 6 | ^[._][-._a-zA-Z0-9]+$ 7 | ^\.Rproj\.user$ 8 | ^cran-comments\.md$ 9 | ^data-raw$ 10 | ^doc$ 11 | ^docs$ 12 | ^pkgdown$ 13 | ^todo\.org$ 14 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' bcbio single-cell RNA-seq example data set 2 | #' 3 | #' Harvard inDrops v3 example data 4 | #' 5 | #' @author Michael Steinbaugh 6 | #' @note Updated 2019-08-12. 7 | #' @usage data(bcb) 8 | #' @return `bcbioSingleCell`. 9 | #' 10 | #' @examples 11 | #' data(bcb) 12 | #' class(bcb) 13 | "bcb" 14 | -------------------------------------------------------------------------------- /tests/testthat/test-updateObject.R: -------------------------------------------------------------------------------- 1 | test_that("bcbioSingleCell", { 2 | x <- updateObject(bcb) 3 | expect_s4_class(x, "bcbioSingleCell") 4 | }) 5 | 6 | test_that("v0.1 update", { 7 | invalid <- import(file.path(cacheDir, "bcbioSingleCell_0.1.0.rds")) 8 | valid <- updateObject(invalid) 9 | expect_s4_class(valid, "bcbioSingleCell") 10 | }) 11 | -------------------------------------------------------------------------------- /R/AllGenerics.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | #' @name plotReadsPerCell 3 | #' @rdname plotReadsPerCell 4 | #' @usage plotReadsPerCell(object, ...) 5 | NULL 6 | 7 | #' @export 8 | #' @name show 9 | #' @rdname show 10 | #' @usage show(object) 11 | NULL 12 | 13 | #' @export 14 | #' @name updateObject 15 | #' @rdname updateObject 16 | #' @usage updateObject(object, ..., verbose = FALSE) 17 | NULL 18 | -------------------------------------------------------------------------------- /tests/testthat/test-plotReadsPerCell.R: -------------------------------------------------------------------------------- 1 | ## Example dataset doesn't have a cellular barcode cutoff because we removed the 2 | ## bcbio commands log file (which conflicts with Travis CI). 3 | test_that("geom", { 4 | for (geom in eval(formals(`plotReadsPerCell,bcbioSingleCell`)[["geom"]])) { 5 | x <- plotReadsPerCell(bcb, geom = geom) 6 | expect_s3_class(x, "ggplot") 7 | } 8 | }) 9 | -------------------------------------------------------------------------------- /man/bcbioSingleCellTestsUrl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllGlobals.R 3 | \docType{data} 4 | \name{bcbioSingleCellTestsUrl} 5 | \alias{bcbioSingleCellTestsUrl} 6 | \title{Cache URL} 7 | \format{ 8 | An object of class \code{character} of length 1. 9 | } 10 | \usage{ 11 | bcbioSingleCellTestsUrl 12 | } 13 | \description{ 14 | Cache URL 15 | } 16 | \examples{ 17 | bcbioSingleCellTestsUrl 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /package.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /R/AllGlobals.R: -------------------------------------------------------------------------------- 1 | .pkgName <- packageName() 2 | .pkgVersion <- packageVersion(.pkgName) 3 | 4 | ## This is also defined in AcidPlots. 5 | .geom <- c("histogram", "ecdf", "violin", "ridgeline", "boxplot") 6 | 7 | ## We're adding an additional raw reads column (pre-UMI disambiguation). 8 | .metricsCols <- c("nRead", metricsCols) 9 | 10 | .requiredAssays <- "counts" 11 | 12 | #' Cache URL 13 | #' @keywords internal 14 | #' @export 15 | #' @examples 16 | #' bcbioSingleCellTestsUrl 17 | bcbioSingleCellTestsUrl <- "https://r.acidgenomics.com/testdata/bcbiosinglecell" 18 | -------------------------------------------------------------------------------- /man/bcb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{bcb} 5 | \alias{bcb} 6 | \title{bcbio single-cell RNA-seq example data set} 7 | \format{ 8 | An object of class \code{bcbioSingleCell} with 50 rows and 100 columns. 9 | } 10 | \usage{ 11 | data(bcb) 12 | } 13 | \value{ 14 | \code{bcbioSingleCell}. 15 | } 16 | \description{ 17 | Harvard inDrops v3 example data 18 | } 19 | \note{ 20 | Updated 2019-08-12. 21 | } 22 | \examples{ 23 | data(bcb) 24 | class(bcb) 25 | } 26 | \author{ 27 | Michael Steinbaugh 28 | } 29 | \keyword{datasets} 30 | -------------------------------------------------------------------------------- /man/show.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/show-methods.R 3 | \name{show} 4 | \alias{show} 5 | \alias{show,bcbioSingleCell-method} 6 | \title{Show an object} 7 | \usage{ 8 | show(object) 9 | 10 | \S4method{show}{bcbioSingleCell}(object) 11 | } 12 | \arguments{ 13 | \item{object}{Object.} 14 | } 15 | \value{ 16 | Console output. 17 | } 18 | \description{ 19 | Show an object 20 | } 21 | \note{ 22 | Updated 2022-05-09. 23 | } 24 | \examples{ 25 | data(bcb) 26 | 27 | ## bcbioSingleCell ==== 28 | show(bcb) 29 | } 30 | \author{ 31 | Michael Steinbaugh 32 | } 33 | -------------------------------------------------------------------------------- /inst/extdata/indrops/2018-01-01_bcbio/project-summary.yaml: -------------------------------------------------------------------------------- 1 | date: '2018-01-01 00:00:00.000000' 2 | upload: /n/data1/XXX/final 3 | bcbio_system: /n/app/bcbio/dev/galaxy/bcbio_system.yaml 4 | samples: 5 | - description: multiplexed-AAAAAAAA 6 | dirs: 7 | config: /n/app/bcbio/dev/galaxy 8 | fastq: null 9 | flowcell: null 10 | galaxy: /n/app/bcbio/dev/galaxy 11 | work: /n/scratch2/XXX/data/bcbio 12 | genome_build: hg38 13 | genome_resources: 14 | rnaseq: 15 | transcripts: /n/app/bcbio/dev/genomes/Hsapiens/hg38/rnaseq/ref-transcripts.gtf 16 | metadata: 17 | batch: null 18 | phenotype: '' 19 | -------------------------------------------------------------------------------- /man/bcbioSingleCell-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllClasses.R 3 | \docType{class} 4 | \name{bcbioSingleCell-class} 5 | \alias{bcbioSingleCell-class} 6 | \title{bcbio single-cell RNA-seq data set} 7 | \description{ 8 | \code{bcbioSingleCell} is an S4 class that extends \code{SingleCellExperiment}, and is 9 | designed to store a bcbio single-cell RNA-seq analysis. This class contains 10 | read counts saved as a sparse matrix (\code{sparseMatrix}), sample metadata, and 11 | cell quality control metrics. 12 | } 13 | \note{ 14 | Updated 2022-05-09. 15 | } 16 | \author{ 17 | Michael Steinbaugh, Rory Kirchner 18 | } 19 | -------------------------------------------------------------------------------- /tests/testthat/test-bcbioSingleCell.R: -------------------------------------------------------------------------------- 1 | uploadDir <- system.file("extdata/indrops", package = "bcbioSingleCell") 2 | 3 | ## Minimal mode, with no metadata or annotations. 4 | ## This is fast but doesn't slot a lot of useful info. 5 | test_that("Minimal mode", { 6 | x <- bcbioSingleCell(uploadDir = uploadDir) 7 | expect_s4_class(x, "bcbioSingleCell") 8 | }) 9 | 10 | test_that("User-defined metadata", { 11 | x <- bcbioSingleCell( 12 | uploadDir = uploadDir, 13 | sampleMetadataFile <- file.path(uploadDir, "metadata.csv") 14 | ) 15 | expect_s4_class(x, "bcbioSingleCell") 16 | }) 17 | 18 | ## Automatic organism annotations from AnnotationHub. 19 | test_that("AnnotationHub", { 20 | x <- bcbioSingleCell( 21 | uploadDir = uploadDir, 22 | organism = "Homo sapiens" 23 | ) 24 | expect_s4_class(x, "bcbioSingleCell") 25 | }) 26 | -------------------------------------------------------------------------------- /R/reexports.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | #' @importFrom AcidGenerics calculateMetrics 3 | AcidGenerics::calculateMetrics 4 | 5 | #' @export 6 | #' @importFrom AcidGenerics filterCells 7 | AcidGenerics::filterCells 8 | 9 | #' @export 10 | #' @importFrom AcidGenerics plotBarcodeRanks 11 | AcidGenerics::plotBarcodeRanks 12 | 13 | #' @export 14 | #' @importFrom AcidGenerics plotCellCounts 15 | AcidGenerics::plotCellCounts 16 | 17 | #' @export 18 | #' @importFrom AcidGenerics plotCountsPerCell 19 | AcidGenerics::plotCountsPerCell 20 | 21 | #' @export 22 | #' @importFrom AcidGenerics plotCountsVsFeatures 23 | AcidGenerics::plotCountsVsFeatures 24 | 25 | #' @export 26 | #' @importFrom AcidGenerics plotFeaturesPerCell 27 | AcidGenerics::plotFeaturesPerCell 28 | 29 | #' @export 30 | #' @importFrom AcidGenerics plotMitoRatio 31 | AcidGenerics::plotMitoRatio 32 | 33 | #' @export 34 | #' @importFrom AcidGenerics plotNovelty 35 | AcidGenerics::plotNovelty 36 | 37 | #' @export 38 | #' @importFrom AcidGenerics plotQc 39 | AcidGenerics::plotQc 40 | -------------------------------------------------------------------------------- /inst/extdata/indrops/multiplexed-AAAAAAAA/multiplexed-AAAAAAAA.mtx.rownames: -------------------------------------------------------------------------------- 1 | ENSG00000071082 2 | ENSG00000100316 3 | ENSG00000106631 4 | ENSG00000108821 5 | ENSG00000112306 6 | ENSG00000115414 7 | ENSG00000125691 8 | ENSG00000133112 9 | ENSG00000137818 10 | ENSG00000138326 11 | ENSG00000140988 12 | ENSG00000142534 13 | ENSG00000142541 14 | ENSG00000142937 15 | ENSG00000143947 16 | ENSG00000147403 17 | ENSG00000147604 18 | ENSG00000156508 19 | ENSG00000159251 20 | ENSG00000164692 21 | ENSG00000167244 22 | ENSG00000167526 23 | ENSG00000168542 24 | ENSG00000174748 25 | ENSG00000177600 26 | ENSG00000184009 27 | ENSG00000186468 28 | ENSG00000197756 29 | ENSG00000198034 30 | ENSG00000198695 31 | ENSG00000198712 32 | ENSG00000198727 33 | ENSG00000198763 34 | ENSG00000198786 35 | ENSG00000198804 36 | ENSG00000198840 37 | ENSG00000198886 38 | ENSG00000198888 39 | ENSG00000198899 40 | ENSG00000198938 41 | ENSG00000205542 42 | ENSG00000225972 43 | ENSG00000229117 44 | ENSG00000229344 45 | ENSG00000233927 46 | ENSG00000237973 47 | ENSG00000248527 48 | ENSG00000256618 49 | ENSG00000269028 50 | ENSG00000282105 51 | -------------------------------------------------------------------------------- /man/updateObject.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/updateObject-methods.R 3 | \name{updateObject} 4 | \alias{updateObject} 5 | \alias{updateObject,bcbioSingleCell-method} 6 | \title{Update object} 7 | \usage{ 8 | updateObject(object, ..., verbose = FALSE) 9 | 10 | \S4method{updateObject}{bcbioSingleCell}(object, ..., verbose = FALSE) 11 | } 12 | \arguments{ 13 | \item{object}{Object.} 14 | 15 | \item{...}{Additional arguments.} 16 | 17 | \item{verbose}{\code{logical(1)}. 18 | Run the function with verbose output.} 19 | } 20 | \value{ 21 | Modified object. 22 | } 23 | \description{ 24 | Update object 25 | } 26 | \note{ 27 | Updated 2023-12-04. 28 | } 29 | \examples{ 30 | data(bcb) 31 | 32 | ## bcbioSingleCell ==== 33 | updateObject(bcb) 34 | 35 | ## Example that depends on remote file. 36 | ## > x <- import( 37 | ## > con = file.path( 38 | ## > bcbioSingleCellTestsUrl, 39 | ## > "bcbioSingleCell_0.1.0.rds" 40 | ## > ) 41 | ## > ) 42 | ## > x <- updateObject(x) 43 | ## > x 44 | } 45 | \author{ 46 | Michael Steinbaugh 47 | } 48 | -------------------------------------------------------------------------------- /inst/extdata/indrops/2018-01-01_bcbio/programs.txt: -------------------------------------------------------------------------------- 1 | bamtools,2.4.0 2 | bcbio-nextgen,1.0.6a0-d2b5b522 3 | bcbio-variation,0.2.6 4 | bcftools,1.6 5 | bedtools,2.26.0 6 | biobambam,2.0.79 7 | bioconductor-bubbletree,2.6.0 8 | bowtie2,2.2.8 9 | bwa,0.7.16 10 | chanjo, 11 | cnvkit,0.9.0 12 | cufflinks,2.2.1 13 | cutadapt,1.14 14 | fastqc,0.11.5 15 | featurecounts,1.4.4 16 | freebayes,1.1.0.46 17 | gatk,3.8 18 | gatk-framework,3.6.24 19 | gatk4,4.0b6 20 | gemini,0.20.1 21 | grabix,0.1.8 22 | hisat2,2.1.0 23 | htseq,0.9.1 24 | lumpy-sv,0.2.13 25 | manta,1.1.0 26 | metasv,0.4.0 27 | mirdeep2,2.0.0.7 28 | mutect,1.1.5 29 | novoalign,3.07.00 30 | novosort,V3.00.02 31 | oncofuse,1.1.1 32 | phylowgs,20150714 33 | picard,2.13 34 | platypus-variant,0.8.1.1 35 | preseq,2.0.2 36 | qualimap,2.2.2a 37 | rna-star, 38 | rtg-tools,3.8.4 39 | sailfish,0.10.1 40 | salmon,0.8.2 41 | sambamba,0.6.6 42 | samblaster,0.1.24 43 | samtools,1.6 44 | scalpel,0.5.3 45 | seqbuster,3.1 46 | snpeff,4.3i 47 | vardict,2017.04.18 48 | vardict-java,1.5.1 49 | variant-effect-predictor,87 50 | varscan,2.4.3 51 | vcflib,1.0.0_rc1 52 | vt,2015.11.10 53 | wham,1.7.0.311 54 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reexports.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{calculateMetrics} 7 | \alias{filterCells} 8 | \alias{plotBarcodeRanks} 9 | \alias{plotCellCounts} 10 | \alias{plotCountsPerCell} 11 | \alias{plotCountsVsFeatures} 12 | \alias{plotFeaturesPerCell} 13 | \alias{plotMitoRatio} 14 | \alias{plotNovelty} 15 | \alias{plotQc} 16 | \title{Objects exported from other packages} 17 | \keyword{internal} 18 | \description{ 19 | These objects are imported from other packages. Follow the links 20 | below to see their documentation. 21 | 22 | \describe{ 23 | \item{AcidGenerics}{\code{\link[AcidGenerics]{calculateMetrics}}, \code{\link[AcidGenerics]{filterCells}}, \code{\link[AcidGenerics]{plotBarcodeRanks}}, \code{\link[AcidGenerics]{plotCellCounts}}, \code{\link[AcidGenerics]{plotCountsPerCell}}, \code{\link[AcidGenerics]{plotCountsVsFeatures}}, \code{\link[AcidGenerics]{plotFeaturesPerCell}}, \code{\link[AcidGenerics]{plotMitoRatio}}, \code{\link[AcidGenerics]{plotNovelty}}, \code{\link[AcidGenerics]{plotQc}}} 24 | }} 25 | 26 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | authors: 2 | Michael Steinbaugh: 3 | href: https://mike.steinbaugh.com/ 4 | Rory Kirchner: 5 | href: https://github.com/roryk/ 6 | Mary Piper: 7 | href: https://github.com/marypiper/ 8 | Victor Barrera: 9 | href: https://github.com/vbarrera/ 10 | Shannan Ho Sui: 11 | href: https://github.com/sjhosui/ 12 | Harvard Chan Bioinformatics Core: 13 | href: https://bioinformatics.sph.harvard.edu/ 14 | Acid Genomics: 15 | href: https://acidgenomics.com/ 16 | navbar: 17 | components: 18 | acid: 19 | icon: "fas fa-vial fa-lg" 20 | href: https://acidgenomics.com/ 21 | structure: 22 | right: [acid, github] 23 | news: 24 | - one_page: false 25 | reference: 26 | - title: S4 classes 27 | contents: 28 | - starts_with("bcbioSingleCell") 29 | - extract 30 | - show 31 | - updateObject 32 | - title: Plots 33 | contents: 34 | - starts_with("plot") 35 | - title: Example data 36 | contents: 37 | - bcb 38 | - title: Reexports 39 | contents: 40 | - reexports 41 | template: 42 | bootstrap: 5 43 | url: https://r.acidgenomics.com/packages/bcbiosinglecell 44 | -------------------------------------------------------------------------------- /man/bcbioSingleCell-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/package.R 3 | \docType{package} 4 | \name{bcbioSingleCell-package} 5 | \title{bcbioSingleCell} 6 | \description{ 7 | Import and analyze \href{https://bcbio-nextgen.readthedocs.io/}{bcbio} single-cell 8 | RNA-seq data. 9 | } 10 | \seealso{ 11 | Useful links: 12 | \itemize{ 13 | \item \url{https://r.acidgenomics.com/packages/bcbiosinglecell/} 14 | \item \url{https://github.com/hbc/bcbioSingleCell/} 15 | \item Report bugs at \url{https://github.com/hbc/bcbioSingleCell/issues/} 16 | } 17 | 18 | } 19 | \author{ 20 | \strong{Maintainer}: Michael Steinbaugh \email{mike@steinbaugh.com} (\href{https://orcid.org/0000-0002-2403-2221}{ORCID}) 21 | 22 | Authors: 23 | \itemize{ 24 | \item Rory Kirchner \email{roryk@alum.mit.edu} (\href{https://orcid.org/0000-0003-4814-5885}{ORCID}) 25 | \item Mary Piper \email{mary.piper@gmail.com} (\href{https://orcid.org/0000-0003-2699-3840}{ORCID}) 26 | \item Victor Barrera \email{barrera.vic@gmail.com} (\href{https://orcid.org/0000-0003-0590-4634}{ORCID}) 27 | \item Shannan Ho Sui \email{shosui@hsph.harvard.edu} (\href{https://orcid.org/0000-0002-6191-4709}{ORCID}) 28 | } 29 | 30 | Other contributors: 31 | \itemize{ 32 | \item Harvard Chan Bioinformatics Core \email{bioinformatics@hsph.harvard.edu} [copyright holder, funder] 33 | \item Acid Genomics [copyright holder, funder] 34 | } 35 | 36 | } 37 | \keyword{internal} 38 | -------------------------------------------------------------------------------- /man/plotReadsPerCell.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/plotReadsPerCell-methods.R 3 | \name{plotReadsPerCell} 4 | \alias{plotReadsPerCell} 5 | \alias{plotReadsPerCell,bcbioSingleCell-method} 6 | \title{Plot read counts per cell} 7 | \usage{ 8 | plotReadsPerCell(object, ...) 9 | 10 | \S4method{plotReadsPerCell}{bcbioSingleCell}( 11 | object, 12 | interestingGroups = NULL, 13 | geom = c("histogram", "ecdf", "violin", "ridgeline", "boxplot"), 14 | cutoffLine = FALSE, 15 | title = "Reads per cell" 16 | ) 17 | } 18 | \arguments{ 19 | \item{object}{Object.} 20 | 21 | \item{interestingGroups}{\code{character}. 22 | Groups of interest to use for visualization. 23 | Corresponds to factors describing the columns of the object.} 24 | 25 | \item{geom}{\code{character(1)}. 26 | Plot type. Uses \code{\link[base:match.arg]{match.arg()}} internally and defaults to 27 | the first argument in the \code{character} vector.} 28 | 29 | \item{cutoffLine}{\code{logical(1)}. 30 | Include a line marking the cutoff.} 31 | 32 | \item{title}{\code{character(1)}. 33 | Title.} 34 | 35 | \item{...}{Additional arguments.} 36 | } 37 | \value{ 38 | \code{ggplot}. 39 | } 40 | \description{ 41 | Plot the distribution of read counts for all unfiltered cellular barcodes. 42 | } 43 | \note{ 44 | Updated 2023-12-04. 45 | } 46 | \examples{ 47 | data(bcb) 48 | 49 | ## bcbioSingleCell ==== 50 | plotReadsPerCell(bcb, geom = "histogram") 51 | plotReadsPerCell(bcb, geom = "ecdf") 52 | } 53 | \author{ 54 | Michael Steinbaugh, Rory Kirchner 55 | } 56 | -------------------------------------------------------------------------------- /tests/testthat/test-filterCells.R: -------------------------------------------------------------------------------- 1 | bcb <- calculateMetrics(bcb) 2 | 3 | test_that("sampleNames", { 4 | expect_identical( 5 | object = sampleNames(bcb), 6 | expected = c("multiplexed_AAAAAAAA" = "rep_1") 7 | ) 8 | }) 9 | 10 | ## Expecting an object with the same dimensions by default. 11 | test_that("No filtering", { 12 | x <- filterCells(bcb) 13 | expect_s4_class(x, "bcbioSingleCell") 14 | expect_identical(dim(x), dim(bcb)) 15 | }) 16 | 17 | ## Refer to the quality control R Markdown for actual recommended cutoffs. 18 | ## These are skewed, and designed to work with our minimal dataset. 19 | test_that("Parameterized cutoff tests", { 20 | Map( 21 | args = list( 22 | list("minCounts" = 2000L), 23 | list("maxCounts" = 2500L), 24 | list("minFeatures" = 45L), 25 | list("maxFeatures" = 49L), 26 | list("maxMitoRatio" = 0.1), 27 | list("minNovelty" = 0.5), 28 | list("minCellsPerFeature" = 95L) 29 | ), 30 | dim = list( 31 | c(50L, 35L), 32 | c(50L, 88L), 33 | c(50L, 95L), 34 | c(50L, 81L), 35 | c(50L, 22L), 36 | c(50L, 81L), 37 | c(45L, 100L) 38 | ), 39 | f = function(args, dim) { 40 | args[["object"]] <- bcb 41 | x <- do.call(what = filterCells, args = args) 42 | expect_s4_class(x, "bcbioSingleCell") 43 | expect_s4_class(metadata(x)[["filterCells"]], "SimpleList") 44 | expect_true(metadata(x)[["subset"]]) 45 | expect_identical(dim(x), dim) 46 | } 47 | ) 48 | }) 49 | 50 | test_that("Expected cutoff failure", { 51 | expect_error( 52 | object = filterCells(bcb, minCounts = Inf), 53 | regexp = "No cells passed" 54 | ) 55 | }) 56 | -------------------------------------------------------------------------------- /data-raw/bcb.R: -------------------------------------------------------------------------------- 1 | ## inDrops example data 2 | ## Using harvard-indrop-v3 barcodes. 3 | ## Updated 2022-06-09. 4 | ## nolint start 5 | suppressPackageStartupMessages({ 6 | library(devtools) 7 | library(usethis) 8 | library(pipette) 9 | }) 10 | ## nolint end 11 | load_all() 12 | limit <- structure(2e6L, class = "object_size") # nolint 13 | ## Minimal example bcbio upload directory. 14 | ## Include the top 500 genes (rows) and cells (columns). 15 | uploadDir <- file.path("..", "inst", "extdata", "indrops") 16 | sample <- "multiplexed-AAAAAAAA" 17 | countsFile <- file.path( 18 | uploadDir, 19 | sample, 20 | paste0(sample, ".mtx") 21 | ) 22 | rownamesFile <- file.path( 23 | uploadDir, 24 | sample, 25 | paste0(sample, ".mtx.rownames") 26 | ) 27 | colnamesFile <- file.path( 28 | uploadDir, 29 | sample, 30 | paste0(sample, ".mtx.colnames") 31 | ) 32 | barcodesFile <- file.path( 33 | uploadDir, 34 | sample, 35 | paste0(sample, "-barcodes.tsv") 36 | ) 37 | stopifnot(all(file.exists( 38 | c(countsFile, rownamesFile, colnamesFile, barcodesFile) 39 | ))) 40 | barcodes <- import(barcodesFile, colnames = FALSE) 41 | export(object = barcodes, con = barcodesFile, colnames = FALSE) 42 | counts <- import(countsFile) 43 | topGenes <- 44 | counts |> 45 | Matrix::rowSums() |> 46 | sort(decreasing = TRUE) |> 47 | head(n = 50L) 48 | genes <- sort(names(topGenes)) 49 | cells <- barcodes[[1L]] 50 | counts <- counts[genes, cells] 51 | export(object = counts, con = countsFile) 52 | ## Create bcbioSingleCell object. 53 | bcb <- bcbioSingleCell( 54 | uploadDir = uploadDir, 55 | sampleMetadataFile = file.path(uploadDir, "metadata.csv"), 56 | organism = "Homo sapiens", 57 | ensemblRelease = 90L 58 | ) 59 | stopifnot( 60 | object.size(bcb) < limit, 61 | validObject(bcb) 62 | ) 63 | use_data(bcb, compress = "xz", overwrite = TRUE) 64 | -------------------------------------------------------------------------------- /R/show-methods.R: -------------------------------------------------------------------------------- 1 | #' Show an object 2 | #' 3 | #' @name show 4 | #' @author Michael Steinbaugh 5 | #' @note Updated 2022-05-09. 6 | #' 7 | #' @inheritParams AcidRoxygen::params 8 | #' 9 | #' @return Console output. 10 | #' 11 | #' @examples 12 | #' data(bcb) 13 | #' 14 | #' ## bcbioSingleCell ==== 15 | #' show(bcb) 16 | NULL 17 | 18 | 19 | 20 | ## Updated 2019-07-24. 21 | .showHeader <- function(object, version = NULL) { 22 | cat(paste(class(object), version), sep = "\n") 23 | } 24 | 25 | 26 | 27 | ## Using the same internal method for bcbioSingleCell and CellRanger. 28 | ## Updated 2019-08-08. 29 | `show,bcbioSingleCell` <- # nolint 30 | function(object) { 31 | validObject(object) 32 | ## Metadata. 33 | m <- metadata(object) 34 | ## Row ranges metadata. 35 | rrm <- metadata(rowRanges(object)) 36 | .showHeader(object, version = m[["version"]]) 37 | filtered <- isSubset("filterCells", names(m)) 38 | showSlotInfo(list( 39 | uploadDir = m[["uploadDir"]], 40 | dates = as.character(c( 41 | bcbio = m[["runDate"]], 42 | R = m[["date"]] 43 | )), 44 | level = m[["level"]], 45 | sampleMetadataFile = m[["sampleMetadataFile"]], 46 | organism = m[["organism"]], 47 | gffFile = m[["gffFile"]], 48 | annotationHub = rrm[["annotationHub"]], 49 | ensemblRelease = rrm[["release"]], 50 | genomeBuild = rrm[["build"]], 51 | interestingGroups = m[["interestingGroups"]], 52 | filtered = filtered 53 | )) 54 | ## Extend the SingleCellExperiment method. 55 | sce <- as(object, "SingleCellExperiment") 56 | cat(capture.output(show(sce)), sep = "\n") 57 | } 58 | 59 | 60 | 61 | #' @rdname show 62 | #' @export 63 | setMethod( 64 | f = "show", 65 | signature = signature(object = "bcbioSingleCell"), 66 | definition = `show,bcbioSingleCell` 67 | ) 68 | -------------------------------------------------------------------------------- /inst/extdata/indrops/multiplexed-AAAAAAAA/multiplexed-AAAAAAAA.mtx.colnames: -------------------------------------------------------------------------------- 1 | AAACACTA-CTTCGATT 2 | AAACTACA-CCACATTA 3 | AACTGCCT-GCAAGGAC 4 | AAGAAGGT-TCTGTGGT 5 | AAGCCTTC-TAAATAGG 6 | AATAAGGA-CCACATTA 7 | AATCGAAG-CCCAAGCA 8 | AATCGTTC-CCCTAACC 9 | ACCCTCAA-CTGCGTTG 10 | ACCTGAAG-GAGCGGTA 11 | ACTAATTG-CTTTAATC 12 | ACTAGAGC-TCGACACC 13 | AGAAACCA-ATACTCTT 14 | AGCTCCAC-CCTGACAC 15 | AGGTAAGC-TCCCAATC 16 | ATATGCAA-GGCGGTTT 17 | ATCAATCG-GTTGTCAT 18 | ATCGCGCT-AGAGGTGG 19 | CAACGCAG-CTCGCGTA 20 | CACAACCT-GGAGAAGC 21 | CCATGCAT-TTCCGCTC 22 | CCCGTTCT-AAAGCCTA 23 | CCGAGATC-ATGGGCAC 24 | CCGATACG-CAAGAGGG 25 | CCGGAAAT-GTTGTCAT 26 | CCTACGCT-AGCAGAAC 27 | CGTGTGTT-AGGAAGAC 28 | CGTTTCGT-GACAGATA 29 | CTAGCACG-AATCGGGT 30 | CTAGTAGG-TTGAGGGT 31 | CTCACATC-ACCCACGA 32 | CTCCTCCA-CGTATTTC 33 | CTCCTCCA-TGTACACG 34 | CTCTATAG-GCAAAGCC 35 | CTGTCGCA-TTTAACAG 36 | CTGTTAAA-AATGAATG 37 | CTGTTAAA-GACAGATA 38 | CTTAGGCC-AGAAGTCC 39 | CTTAGTGT-TCCAGGGA 40 | CTTCTACG-CTTCTTCG 41 | CTTTATCC-CTACCGTT 42 | GACACCTG-CTCAGAAT 43 | GACTAGCG-GAAGTGCC 44 | GAGAAACC-ACAGCGGA 45 | GAGTGTAC-ACGCAGAG 46 | GATTAAAG-ACTAGCCA 47 | GATTACTT-GAGAATTG 48 | GATTTCCC-ATGTTGGC 49 | GCAAACTG-CTTCAGGT 50 | GCCAACAT-CGTGGATA 51 | GCCTGGTA-CGCTCTCA 52 | GCGCTGAT-GACAAAGG 53 | GCGTGCAA-TCTGTGGT 54 | GGAACGAA-TTGCACGC 55 | GGCGACAA-TTCCGAGT 56 | GGCTTTGC-AACCCTTG 57 | GGCTTTGC-AGCGAAGT 58 | GGCTTTGC-TTAGGACC 59 | GGGATTAC-AAATGTCG 60 | GGTTGAGA-CTGTCTGG 61 | GGTTGAGA-GAGAGTAT 62 | GTAAGCCG-CGATTGAT 63 | GTAATCTG-CGCTAATA 64 | GTACGCTT-CCCAAGCA 65 | GTACGGAC-CAACAAAT 66 | GTCCACTA-CTTCTGGA 67 | GTCCACTA-GACAAAGG 68 | GTCCGTCA-ATACTCTT 69 | GTCTAATC-GGCCCTTA 70 | GTGAACTC-CAAGAGGG 71 | GTGAGGCA-CAGTTTGC 72 | GTGATAAA-CGCTCTCA 73 | GTGCCCAT-GTGTCGGA 74 | GTGGTGCT-GGAGAAGC 75 | GTTACTAG-AGAAGTTA 76 | GTTACTAG-CCCTTGGT 77 | GTTCTGCT-TGGCTACC 78 | TAATCCAT-CGGAATTT 79 | TACCGCTC-CCCATAGC 80 | TACGCGAG-TGTAGTTT 81 | TACGTTCG-TTGATCTA 82 | TAGGCTTT-CGGACAAC 83 | TAGTAGCC-TAGTGTTT 84 | TATTAGCG-CCCTAACC 85 | TCAGCCTC-TGCAAGGG 86 | TCCGACAC-GGGAGGTA 87 | TCGCAATC-CGAACGTA 88 | TCGGTCAT-AGCACCAC 89 | TCTAAACT-CTCTTGAC 90 | TCTTTGAC-CGCTCTCA 91 | TGAGAGCG-CCTATTCA 92 | TGAGAGCG-GAAGTGCC 93 | TGAGCACA-TGCTATTT 94 | TGCGACTA-CCGTGTTT 95 | TGCGACTA-TTCACATA 96 | TGCTTCAT-GCAGGGTA 97 | TGCTTGGG-CAACAAAT 98 | TGGACGGA-TTGTTTAC 99 | TGGGAATT-ATATAGGA 100 | TGTTATCA-ACGCAGAG 101 | -------------------------------------------------------------------------------- /R/internal-barcodes.R: -------------------------------------------------------------------------------- 1 | #' Raw reads per cellular barcode 2 | #' 3 | #' Read counts prior to UMI disambiguation. 4 | #' 5 | #' @author Michael Steinbaugh 6 | #' @keywords internal 7 | #' @note Updated 2019-08-08. 8 | #' @noRd 9 | #' 10 | #' @param list `list`. 11 | #' Cellular barcodes per sample. 12 | #' 13 | #' @return `integer`. 14 | #' Cell identifiers are the names and raw reads are the values. 15 | .nRead <- function(list) { 16 | assert( 17 | is.list(list), 18 | hasNames(list), 19 | is.integer(list[[1L]]), 20 | hasNames(list[[1L]]) 21 | ) 22 | if (hasLength(list, n = 1L)) { 23 | list[[1L]] 24 | } else { 25 | ## This will unlist using a "." separator. 26 | ## Renaming "." to "_" in names. 27 | x <- unlist(list, use.names = TRUE) 28 | names(x) <- makeNames(names(x)) 29 | x 30 | } 31 | } 32 | 33 | 34 | 35 | #' Obtain the raw, unfiltered cellular barcode read counts 36 | #' 37 | #' @note Updated 2023-12-04. 38 | #' @noRd 39 | #' 40 | #' @return `DataFrame`. 41 | .rawMetrics <- function(object) { 42 | assert(is(object, "bcbioSingleCell")) 43 | list <- metadata(object)[["cellularBarcodes"]] 44 | assert( 45 | is.list(list), 46 | msg = sprintf( 47 | fmt = paste( 48 | "Object does not contain unfiltered cellular barcodes.", 49 | "Has {.fun %s} been applied?", 50 | "This step drops them." 51 | ), 52 | "filterCells" 53 | ) 54 | ) 55 | assert( 56 | is.list(list), 57 | hasNames(list) 58 | ) 59 | list <- Map( 60 | sampleId = names(list), 61 | reads = list, 62 | f = function(sampleId, reads) { 63 | DataFrame( 64 | "sampleId" = as.factor(sampleId), 65 | "cellId" = as.factor(names(reads)), 66 | "nRead" = reads, 67 | row.names = NULL 68 | ) 69 | } 70 | ) 71 | data <- unlist(DataFrameList(list), use.names = FALSE) 72 | sampleData <- sampleData(object) 73 | sampleData[["sampleId"]] <- as.factor(rownames(sampleData)) 74 | data <- leftJoin(data, sampleData, by = "sampleId") 75 | assert( 76 | is(data, "DataFrame"), 77 | !hasRownames(data), 78 | isSubset(c("sampleId", "cellId", "nRead"), colnames(data)), 79 | is.integer(data[["nRead"]]) 80 | ) 81 | data 82 | } 83 | -------------------------------------------------------------------------------- /man/extract.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/extract-methods.R 3 | \name{extract} 4 | \alias{extract} 5 | \alias{[,bcbioSingleCell,ANY,ANY,ANY-method} 6 | \title{Extract or replace parts of an object} 7 | \usage{ 8 | \S4method{[}{bcbioSingleCell,ANY,ANY,ANY}(x, i, j, ..., drop = FALSE) 9 | } 10 | \arguments{ 11 | \item{x}{Object.} 12 | 13 | \item{i}{Indices specifying elements to extract or replace. Indices are \code{numeric} or 14 | \code{character} vectors, empty (\code{missing}), or \code{NULL}. 15 | 16 | For more information: 17 | 18 | \if{html}{\out{
}}\preformatted{help(topic = "Extract", package = "base") 19 | }\if{html}{\out{
}}} 20 | 21 | \item{j}{Indices specifying elements to extract or replace. Indices are \code{numeric} or 22 | \code{character} vectors, empty (\code{missing}), or \code{NULL}. 23 | 24 | For more information: 25 | 26 | \if{html}{\out{
}}\preformatted{help(topic = "Extract", package = "base") 27 | }\if{html}{\out{
}}} 28 | 29 | \item{...}{Additional arguments.} 30 | 31 | \item{drop}{For matrices and arrays. If \code{TRUE} the result is 32 | coerced to the lowest possible dimension (see the examples). This 33 | only works for extracting elements, not for the replacement. See 34 | \code{\link[base]{drop}} for further details. 35 | } 36 | } 37 | \value{ 38 | \code{bcbioSingleCell}. 39 | } 40 | \description{ 41 | Extract genes by row and cells by column. 42 | } 43 | \details{ 44 | Refer to \code{cellToSample()} and \code{selectSamples()} if sample-level extraction is 45 | desired. Note that \code{sampleId} is slotted into \code{colData} and defines the 46 | cell-to-sample mappings. 47 | 48 | Unfiltered cellular barcode distributions for the entire dataset, including 49 | cells not kept in the matrix will be dropped in favor of the \code{nCount} column 50 | of \code{colData()}. 51 | } 52 | \note{ 53 | Updated 2021-09-10. 54 | } 55 | \examples{ 56 | ## bcbioSingleCell ==== 57 | data(bcb) 58 | 59 | cells <- head(colnames(bcb)) 60 | head(cells) 61 | genes <- head(rownames(bcb)) 62 | head(genes) 63 | 64 | ## Subset by cell identifiers. 65 | bcb[, cells] 66 | 67 | ## Subset by genes. 68 | bcb[genes, ] 69 | 70 | ## Subset by both genes and cells. 71 | bcb[genes, cells] 72 | } 73 | \references{ 74 | Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) 75 | \emph{The New S Language}. 76 | Wadsworth & Brooks/Cole. 77 | } 78 | \author{ 79 | Michael Steinbaugh 80 | } 81 | -------------------------------------------------------------------------------- /.lintr: -------------------------------------------------------------------------------- 1 | linters: linters_with_defaults( 2 | # Updated 2023-07-24. 3 | # > help(topic = "linters", package = "lintr") 4 | T_and_F_symbol_linter(), 5 | absolute_path_linter(), 6 | any_duplicated_linter(), 7 | any_is_na_linter(), 8 | assignment_linter(), 9 | backport_linter(), 10 | brace_linter(), 11 | class_equals_linter(), 12 | commas_linter(), 13 | commented_code_linter(), 14 | condition_message_linter(), 15 | conjunct_test_linter(), 16 | consecutive_assertion_linter(), 17 | cyclocomp_linter(complexity_limit = 30L), 18 | duplicate_argument_linter(), 19 | equals_na_linter(), 20 | expect_comparison_linter(), 21 | expect_identical_linter(), 22 | expect_length_linter(), 23 | expect_named_linter(), 24 | expect_not_linter(), 25 | expect_null_linter(), 26 | expect_s3_class_linter(), 27 | expect_s4_class_linter(), 28 | expect_true_false_linter(), 29 | extraction_operator_linter(), 30 | function_left_parentheses_linter(), 31 | ifelse_censor_linter(), 32 | implicit_integer_linter(), 33 | indentation_linter(indent = 4L, hanging_indent_style = "tidy"), 34 | infix_spaces_linter(), 35 | inner_combine_linter(), 36 | line_length_linter(length = 80L), 37 | literal_coercion_linter(), 38 | missing_argument_linter(), 39 | missing_package_linter(), 40 | namespace_linter(), 41 | nested_ifelse_linter(), 42 | # This is currently returning too many false positives. 43 | # > nonportable_path_linter = NULL, 44 | numeric_leading_zero_linter(), 45 | object_length_linter(length = 40L), 46 | object_name_linter(styles = "camelCase"), 47 | object_usage_linter(), 48 | outer_negation_linter(), 49 | package_hooks_linter(), 50 | paren_body_linter(), 51 | paste_linter(), 52 | pipe_call_linter(), 53 | pipe_continuation_linter(), 54 | quotes_linter(), 55 | redundant_ifelse_linter(), 56 | regex_subset_linter(), 57 | semicolon_linter(), 58 | seq_linter(), 59 | spaces_inside_linter(), 60 | spaces_left_parentheses_linter(), 61 | sprintf_linter(), 62 | system_file_linter(), 63 | todo_comment_linter(), 64 | trailing_blank_lines_linter(), 65 | trailing_whitespace_linter(), 66 | undesirable_function_linter(), 67 | undesirable_operator_linter(), 68 | unnecessary_concatenation_linter(), 69 | unreachable_code_linter(), 70 | vector_logic_linter(), 71 | whitespace_linter(), 72 | yoda_test_linter()) 73 | exclude: "# nolint" 74 | exclude_start: "# nolint start" 75 | exclude_end: "# nolint end" 76 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: bcbioSingleCell 2 | Title: Bcbio Single-Cell RNA-Seq 3 | Description: R package for bcbio single-cell RNA-seq analysis. 4 | Version: 0.7.1 5 | Date: 2023-12-04 6 | Authors@R: c( 7 | person( 8 | given = "Michael", 9 | family = "Steinbaugh", 10 | email = "mike@steinbaugh.com", 11 | role = c("aut", "cre"), 12 | comment = c(ORCID = "0000-0002-2403-2221") 13 | ), 14 | person( 15 | given = "Rory", 16 | family = "Kirchner", 17 | email = "roryk@alum.mit.edu", 18 | role = "aut", 19 | comment = c(ORCID = "0000-0003-4814-5885") 20 | ), 21 | person( 22 | given = "Mary", 23 | family = "Piper", 24 | email = "mary.piper@gmail.com", 25 | role = "aut", 26 | comment = c(ORCID = "0000-0003-2699-3840") 27 | ), 28 | person( 29 | given = "Victor", 30 | family = "Barrera", 31 | email = "barrera.vic@gmail.com", 32 | role = "aut", 33 | comment = c(ORCID = "0000-0003-0590-4634") 34 | ), 35 | person( 36 | given = "Shannan", 37 | family = "Ho Sui", 38 | email = "shosui@hsph.harvard.edu", 39 | role = "aut", 40 | comment = c(ORCID = "0000-0002-6191-4709") 41 | ), 42 | person( 43 | given = "Harvard Chan Bioinformatics Core", 44 | email = "bioinformatics@hsph.harvard.edu", 45 | role = c("cph", "fnd") 46 | ), 47 | person( 48 | given = "Acid Genomics", 49 | role = c("cph", "fnd") 50 | )) 51 | URL: 52 | https://r.acidgenomics.com/packages/bcbiosinglecell/, 53 | https://github.com/hbc/bcbioSingleCell/ 54 | BugReports: https://github.com/hbc/bcbioSingleCell/issues/ 55 | License: AGPL-3 56 | Encoding: UTF-8 57 | LazyData: false 58 | Depends: R (>= 4.3) 59 | Imports: 60 | AcidBase (>= 0.7.0), 61 | AcidCLI (>= 0.3.0), 62 | AcidExperiment (>= 0.5.0), 63 | AcidGenerics (>= 0.7.1), 64 | AcidGenomes (>= 0.6.0), 65 | AcidMarkdown (>= 0.3.0), 66 | AcidPlots (>= 0.7.0), 67 | AcidPlyr (>= 0.5.0), 68 | AcidSingleCell (>= 0.4.0), 69 | BiocGenerics (>= 0.46.0), 70 | IRanges (>= 2.34.0), 71 | S4Vectors (>= 0.38.0), 72 | SingleCellExperiment (>= 1.22.0), 73 | SummarizedExperiment (>= 1.30.0), 74 | bcbioBase (>= 0.9.0), 75 | ggplot2 (>= 3.4.3), 76 | ggridges (>= 0.5.4), 77 | goalie (>= 0.7.1), 78 | pipette (>= 0.14.0), 79 | syntactic (>= 0.7.0), 80 | methods, 81 | parallel, 82 | utils 83 | Suggests: 84 | AcidDevTools (>= 0.7.1), 85 | Biostrings (>= 2.68.0), 86 | basejump (>= 0.18.0), 87 | rmarkdown (>= 2.25), 88 | testthat (>= 3.1.10), 89 | graphics 90 | Additional_repositories: https://r.acidgenomics.com 91 | Config/testthat/edition: 3 92 | Config/testthat/parallel: true 93 | Roxygen: list(markdown = TRUE) 94 | RoxygenNote: 7.2.3 95 | -------------------------------------------------------------------------------- /R/extract-methods.R: -------------------------------------------------------------------------------- 1 | #' Extract or replace parts of an object 2 | #' 3 | #' Extract genes by row and cells by column. 4 | #' 5 | #' Refer to `cellToSample()` and `selectSamples()` if sample-level extraction is 6 | #' desired. Note that `sampleId` is slotted into `colData` and defines the 7 | #' cell-to-sample mappings. 8 | #' 9 | #' Unfiltered cellular barcode distributions for the entire dataset, including 10 | #' cells not kept in the matrix will be dropped in favor of the `nCount` column 11 | #' of `colData()`. 12 | #' 13 | #' @name extract 14 | #' @author Michael Steinbaugh 15 | #' @inherit base::Extract params references 16 | #' @note Updated 2021-09-10. 17 | #' 18 | #' @inheritParams AcidRoxygen::params 19 | #' 20 | #' @return `bcbioSingleCell`. 21 | #' 22 | #' @examples 23 | #' ## bcbioSingleCell ==== 24 | #' data(bcb) 25 | #' 26 | #' cells <- head(colnames(bcb)) 27 | #' head(cells) 28 | #' genes <- head(rownames(bcb)) 29 | #' head(genes) 30 | #' 31 | #' ## Subset by cell identifiers. 32 | #' bcb[, cells] 33 | #' 34 | #' ## Subset by genes. 35 | #' bcb[genes, ] 36 | #' 37 | #' ## Subset by both genes and cells. 38 | #' bcb[genes, cells] 39 | NULL 40 | 41 | 42 | 43 | ## Updated 2019-08-20. 44 | `extract,bcbioSingleCell` <- # nolint 45 | function(x, i, j, ..., drop = FALSE) { 46 | validObject(x) 47 | assert(identical(drop, FALSE)) 48 | ## Genes (rows). 49 | if (missing(i)) { 50 | i <- seq_len(nrow(x)) 51 | } 52 | ## Cells (columns). 53 | if (missing(j)) { 54 | j <- seq_len(ncol(x)) 55 | } 56 | ## Determine whether we should stash subset in metadata. 57 | if (identical(x = dim(x), y = c(length(i), length(j)))) { 58 | subset <- FALSE 59 | } else { 60 | subset <- TRUE 61 | } 62 | ## Subset using SCE method. 63 | sce <- as(x, "SingleCellExperiment") 64 | sce <- sce[i, j, drop = drop] 65 | ## Early return original object, if unmodified. 66 | if (identical(assay(sce), assay(x))) { 67 | return(x) 68 | } 69 | ## Metadata ------------------------------------------------------------ 70 | metadata <- metadata(sce) 71 | if (isTRUE(subset)) { 72 | metadata[["cellularBarcodes"]] <- NULL 73 | metadata[["filterCells"]] <- NULL 74 | metadata[["filterGenes"]] <- NULL 75 | metadata[["subset"]] <- TRUE 76 | } 77 | metadata <- Filter(f = Negate(is.null), x = metadata) 78 | metadata(sce) <- metadata 79 | ## Return -------------------------------------------------------------- 80 | sce <- droplevels2(sce) 81 | new(Class = "bcbioSingleCell", sce) 82 | } 83 | 84 | 85 | 86 | #' @rdname extract 87 | #' @export 88 | setMethod( 89 | "[", 90 | signature( 91 | x = "bcbioSingleCell", 92 | i = "ANY", 93 | j = "ANY", 94 | drop = "ANY" 95 | ), 96 | definition = `extract,bcbioSingleCell` 97 | ) 98 | -------------------------------------------------------------------------------- /man/bcbioSingleCell.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllGenerators.R 3 | \name{bcbioSingleCell} 4 | \alias{bcbioSingleCell} 5 | \title{bcbio single-cell RNA-seq data set} 6 | \usage{ 7 | bcbioSingleCell( 8 | uploadDir, 9 | sampleMetadataFile = NULL, 10 | organism = NULL, 11 | ensemblRelease = NULL, 12 | genomeBuild = NULL, 13 | gffFile = NULL, 14 | transgeneNames = NULL, 15 | interestingGroups = "sampleName" 16 | ) 17 | } 18 | \arguments{ 19 | \item{uploadDir}{\code{character(1)}. 20 | Final upload directory path.} 21 | 22 | \item{sampleMetadataFile}{\code{character(1)}. 23 | Sample metadata file path. CSV or TSV is preferred, but Excel worksheets 24 | are also supported. Check the documentation for conventions and required 25 | columns.} 26 | 27 | \item{organism}{\code{character(1)}. 28 | Full Latin organism name (e.g. \code{"Homo sapiens"}).} 29 | 30 | \item{ensemblRelease}{\code{integer(1)}. 31 | Ensembl release version (e.g. \code{100}). We recommend setting this value if 32 | possible, for improved reproducibility. When left unset, the latest release 33 | available via AnnotationHub/ensembldb is used. Note that the latest version 34 | available can vary, depending on the versions of AnnotationHub and 35 | ensembldb in use.} 36 | 37 | \item{genomeBuild}{\code{character(1)}. 38 | Ensembl genome build assembly name (e.g. \code{"GRCh38"}). If set \code{NULL}, 39 | defaults to the most recent build available. 40 | \emph{Note:} don't pass in UCSC build IDs (e.g. \code{"hg38"}).} 41 | 42 | \item{gffFile}{\code{character(1)}. 43 | GFF/GTF (General Feature Format) file. 44 | Generally, we recommend using GTF (GFFv2) instead of GFFv3.} 45 | 46 | \item{transgeneNames}{\code{character}. 47 | Vector indicating which assay rows denote transgenes (e.g. EGFP, TDTOMATO).} 48 | 49 | \item{interestingGroups}{\code{character}. 50 | Groups of interest to use for visualization. 51 | Corresponds to factors describing the columns of the object.} 52 | } 53 | \value{ 54 | \code{bcbioSingleCell}. 55 | } 56 | \description{ 57 | \code{bcbioSingleCell} is an S4 class that extends \code{SingleCellExperiment}, and is 58 | designed to store a bcbio single-cell RNA-seq analysis. This class contains 59 | read counts saved as a sparse matrix (\code{sparseMatrix}), sample metadata, and 60 | cell quality control metrics. 61 | } 62 | \note{ 63 | Updated 2023-09-21. 64 | } 65 | \section{Remote data}{ 66 | 67 | 68 | When working in RStudio, we recommend connecting to the bcbio-nextgen run 69 | directory as a remote connection over 70 | \href{https://github.com/osxfuse/osxfuse/wiki/SSHFS}{sshfs}. 71 | } 72 | 73 | \examples{ 74 | uploadDir <- system.file("extdata/indrops", package = "bcbioSingleCell") 75 | 76 | x <- bcbioSingleCell(uploadDir) 77 | print(x) 78 | 79 | x <- bcbioSingleCell( 80 | uploadDir = uploadDir, 81 | sampleMetadataFile = file.path(uploadDir, "metadata.csv") 82 | ) 83 | print(x) 84 | } 85 | \seealso{ 86 | \itemize{ 87 | \item \code{SingleCellExperiment::SingleCellExperiment()}. 88 | \item \code{.S4methods(class = "bcbioSingleCell")}. 89 | } 90 | } 91 | \author{ 92 | Michael Steinbaugh 93 | } 94 | -------------------------------------------------------------------------------- /inst/extdata/indrops/multiplexed-AAAAAAAA/multiplexed-AAAAAAAA-barcodes.tsv: -------------------------------------------------------------------------------- 1 | "AAACACTA-CTTCGATT" 71802 2 | "AAACTACA-CCACATTA" 76590 3 | "AACTGCCT-GCAAGGAC" 106100 4 | "AAGAAGGT-TCTGTGGT" 74449 5 | "AAGCCTTC-TAAATAGG" 105540 6 | "AATAAGGA-CCACATTA" 35645 7 | "AATCGAAG-CCCAAGCA" 68099 8 | "AATCGTTC-CCCTAACC" 84804 9 | "ACCCTCAA-CTGCGTTG" 66194 10 | "ACCTGAAG-GAGCGGTA" 57162 11 | "ACTAATTG-CTTTAATC" 63990 12 | "ACTAGAGC-TCGACACC" 82100 13 | "AGAAACCA-ATACTCTT" 56283 14 | "AGCTCCAC-CCTGACAC" 58527 15 | "AGGTAAGC-TCCCAATC" 82639 16 | "ATATGCAA-GGCGGTTT" 68647 17 | "ATCAATCG-GTTGTCAT" 72148 18 | "ATCGCGCT-AGAGGTGG" 77010 19 | "CAACGCAG-CTCGCGTA" 78838 20 | "CACAACCT-GGAGAAGC" 51457 21 | "CCATGCAT-TTCCGCTC" 68447 22 | "CCCGTTCT-AAAGCCTA" 106986 23 | "CCGAGATC-ATGGGCAC" 90386 24 | "CCGATACG-CAAGAGGG" 131259 25 | "CCGGAAAT-GTTGTCAT" 61199 26 | "CCTACGCT-AGCAGAAC" 86060 27 | "CGTGTGTT-AGGAAGAC" 124799 28 | "CGTTTCGT-GACAGATA" 76444 29 | "CTAGCACG-AATCGGGT" 90582 30 | "CTAGTAGG-TTGAGGGT" 66021 31 | "CTCACATC-ACCCACGA" 57006 32 | "CTCCTCCA-CGTATTTC" 94587 33 | "CTCCTCCA-TGTACACG" 58778 34 | "CTCTATAG-GCAAAGCC" 82448 35 | "CTGTCGCA-TTTAACAG" 111111 36 | "CTGTTAAA-AATGAATG" 51878 37 | "CTGTTAAA-GACAGATA" 93666 38 | "CTTAGGCC-AGAAGTCC" 111183 39 | "CTTAGTGT-TCCAGGGA" 57623 40 | "CTTCTACG-CTTCTTCG" 67106 41 | "CTTTATCC-CTACCGTT" 92126 42 | "GACACCTG-CTCAGAAT" 49189 43 | "GACTAGCG-GAAGTGCC" 91684 44 | "GAGAAACC-ACAGCGGA" 50844 45 | "GAGTGTAC-ACGCAGAG" 64944 46 | "GATTAAAG-ACTAGCCA" 71098 47 | "GATTACTT-GAGAATTG" 65073 48 | "GATTTCCC-ATGTTGGC" 84194 49 | "GCAAACTG-CTTCAGGT" 52167 50 | "GCCAACAT-CGTGGATA" 38063 51 | "GCCTGGTA-CGCTCTCA" 51643 52 | "GCGCTGAT-GACAAAGG" 47118 53 | "GCGTGCAA-TCTGTGGT" 66552 54 | "GGAACGAA-TTGCACGC" 52399 55 | "GGCGACAA-TTCCGAGT" 61851 56 | "GGCTTTGC-AACCCTTG" 54214 57 | "GGCTTTGC-AGCGAAGT" 70085 58 | "GGCTTTGC-TTAGGACC" 92908 59 | "GGGATTAC-AAATGTCG" 60698 60 | "GGTTGAGA-CTGTCTGG" 101810 61 | "GGTTGAGA-GAGAGTAT" 126962 62 | "GTAAGCCG-CGATTGAT" 92347 63 | "GTAATCTG-CGCTAATA" 79897 64 | "GTACGCTT-CCCAAGCA" 77318 65 | "GTACGGAC-CAACAAAT" 70943 66 | "GTCCACTA-CTTCTGGA" 65666 67 | "GTCCACTA-GACAAAGG" 59151 68 | "GTCCGTCA-ATACTCTT" 74739 69 | "GTCTAATC-GGCCCTTA" 77431 70 | "GTGAACTC-CAAGAGGG" 118550 71 | "GTGAGGCA-CAGTTTGC" 58023 72 | "GTGATAAA-CGCTCTCA" 47077 73 | "GTGCCCAT-GTGTCGGA" 61911 74 | "GTGGTGCT-GGAGAAGC" 84629 75 | "GTTACTAG-AGAAGTTA" 57592 76 | "GTTACTAG-CCCTTGGT" 94832 77 | "GTTCTGCT-TGGCTACC" 99905 78 | "TAATCCAT-CGGAATTT" 64021 79 | "TACCGCTC-CCCATAGC" 62870 80 | "TACGCGAG-TGTAGTTT" 73071 81 | "TACGTTCG-TTGATCTA" 80071 82 | "TAGGCTTT-CGGACAAC" 110058 83 | "TAGTAGCC-TAGTGTTT" 79818 84 | "TATTAGCG-CCCTAACC" 52979 85 | "TCAGCCTC-TGCAAGGG" 125515 86 | "TCCGACAC-GGGAGGTA" 120160 87 | "TCGCAATC-CGAACGTA" 113926 88 | "TCGGTCAT-AGCACCAC" 104007 89 | "TCTAAACT-CTCTTGAC" 70414 90 | "TCTTTGAC-CGCTCTCA" 96402 91 | "TGAGAGCG-CCTATTCA" 68229 92 | "TGAGAGCG-GAAGTGCC" 87673 93 | "TGAGCACA-TGCTATTT" 84576 94 | "TGCGACTA-CCGTGTTT" 34262 95 | "TGCGACTA-TTCACATA" 101690 96 | "TGCTTCAT-GCAGGGTA" 128690 97 | "TGCTTGGG-CAACAAAT" 69455 98 | "TGGACGGA-TTGTTTAC" 118717 99 | "TGGGAATT-ATATAGGA" 94737 100 | "TGTTATCA-ACGCAGAG" 50886 101 | -------------------------------------------------------------------------------- /R/package.R: -------------------------------------------------------------------------------- 1 | #' bcbioSingleCell 2 | #' 3 | #' Import and analyze [bcbio](https://bcbio-nextgen.readthedocs.io/) single-cell 4 | #' RNA-seq data. 5 | #' 6 | #' @aliases NULL 7 | #' @keywords internal 8 | "_PACKAGE" 9 | 10 | 11 | 12 | ## S4 classes ================================================================== 13 | 14 | #' @importClassesFrom SingleCellExperiment SingleCellExperiment 15 | NULL 16 | 17 | 18 | 19 | ## S4 generics and methods ===================================================== 20 | 21 | #' @importFrom AcidExperiment sampleNames 22 | #' @importFrom AcidGenerics calculateMetrics camelCase droplevels2 23 | #' interestingGroups interestingGroups<- leftJoin makeDimnames makeLabel 24 | #' makeNames metrics plotReadsPerCell sampleData 25 | #' @importFrom BiocGenerics counts updateObject 26 | #' @importFrom S4Vectors cbind do.call droplevels lapply mcols mcols<- 27 | #' metadata metadata<- 28 | #' @importFrom SummarizedExperiment assayNames assay assays assays<- colData 29 | #' colData<- rowData rowData<- rowRanges rowRanges<- 30 | #' @importFrom methods coerce show 31 | #' @importFrom pipette import 32 | NULL 33 | 34 | #' @importMethodsFrom AcidExperiment calculateMetrics interestingGroups 35 | #' interestingGroups<- metrics sampleData sampleNames 36 | #' @importMethodsFrom AcidPlyr leftJoin 37 | #' @importMethodsFrom AcidSingleCell sampleData 38 | #' @importMethodsFrom pipette import 39 | #' @importMethodsFrom syntactic camelCase makeDimnames makeLabel makeNames 40 | NULL 41 | 42 | 43 | 44 | ## Standard functions ========================================================== 45 | 46 | #' @importFrom AcidBase metricsCols printString realpath showSlotInfo 47 | #' standardizeCall strMatch 48 | #' @importFrom AcidCLI abort alert alertSuccess alertWarning h1 h2 49 | #' separator toInlineString 50 | #' @importFrom AcidExperiment detectLanes droplevels2 importSampleData 51 | #' matchInterestingGroups minimalSampleData 52 | #' @importFrom AcidGenomes emptyRanges makeGRangesFromEnsembl makeGRangesFromGff 53 | #' @importFrom AcidMarkdown markdownPlots 54 | #' @importFrom AcidPlots !!! .data acid_geom_abline acid_geom_label 55 | #' acid_geom_label_average acid_geom_label_repel acid_scale_color_discrete 56 | #' acid_scale_fill_discrete syms 57 | #' @importFrom AcidSingleCell makeSingleCellExperiment mapCellsToSamples 58 | #' @importFrom IRanges DataFrameList 59 | #' @importFrom S4Vectors DataFrame SimpleList 60 | #' @importFrom bcbioBase getBarcodeCutoffFromCommands getGtfFileFromYaml 61 | #' getLevelFromCommands getSampleDataFromYaml getUmiTypeFromCommands 62 | #' importDataVersions importProgramVersions projectDir runDate sampleDirs 63 | #' @importFrom ggplot2 aes facet_wrap geom_boxplot geom_histogram geom_step 64 | #' geom_violin ggplot labs scale_x_continuous scale_y_continuous stat_ecdf vars 65 | #' @importFrom ggridges geom_density_ridges 66 | #' @importFrom goalie allAreDirectories allAreFiles areDisjointSets areSetEqual 67 | #' assert hasLength hasNames hasRownames hasValidDimnames isADirectory isAFile 68 | #' isAUrl isAny isCharacter isDirectory isFile isFlag isInt isString isSubset 69 | #' requireNamespaces validate validateClasses 70 | #' @importFrom methods .hasSlot as as<- is new setClass slot slot<- validObject 71 | #' @importFrom parallel mcMap mclapply 72 | #' @importFrom utils capture.output packageName packageVersion 73 | NULL 74 | -------------------------------------------------------------------------------- /todo.org: -------------------------------------------------------------------------------- 1 | #+TITLE: bcbioSingleCell 2 | #+STARTUP: content 3 | * Development 4 | ** TODO Need to address this note now popping up in build checks. 5 | Note: found 88 marked UTF-8 strings 6 | ** TODO Need to ensure R Markdown renders correctly without goalie issues. 7 | ** TODO Require valid names in all slots. 8 | ** TODO Add monocle celldataset coercion method. 9 | ** TODO Need to run BFG on the repo and remove old example R data. 10 | ** TODO `bcbioSingleCell()`: Explain genome annotation priority in better detail, following the logic defined in `bcbioRNASeq()`. 11 | ** TODO Improve support for transcript-level counts? 12 | Should we even allow this at this point? 13 | ** TODO Vignette using example bcbio data and 10X pbmc data. 14 | ** TODO Look into BarcodeInflectionsPlot, now in Seurat 3. 15 | * pointillism 16 | ** TODO Move markers out of Google Sheets and into the package, as simple CSV files. 17 | ** TODO Use pseudobulk approach for DE with sample replicates 18 | ** TODO `plotFeature()`: Add `pointsAsNumbers` argument support. 19 | ** TODO `diffExp()`: Add internal support for accessing design with `design()` generic. 20 | ** TODO `KnownMarkers` S4: switch from DataFrame to SplitDataFrameList inheritance? 21 | ** TODO Improve SeuratMarkers class. 22 | This needs to error if the input data.frame contains `cluster` column. 23 | Consider only using `SeuratMarkers` as a single generator but returning `SeuratMarkers` or `SeuratMarkersPerCluster` automatically. 24 | Allow generator to work with empty ranges? 25 | ** TODO Improve plotFeature. 26 | Add pointsAsNumbers support. Is there a way to facet wrap these instead of using plot grid? Then we can easily support a title. We're using continuous color here, so the formal won't match… argument "color" is missing, with no default. 27 | ** TODO `findMarkers()`: Consider adding `progress` option or BPPARAM support. 28 | ** TODO Switch to `Misc()` to access the `@misc` slot? 29 | ** TODO Consider splitting `SeuratMarkers` class into a `DataFrameList`, per cluster. 30 | ** TODO Stacked bar plot for relative cell abundances per cluster or sample type. 31 | ** TODO Put the resolution in the plot title for t-SNE. 32 | ** TODO Add `write = TRUE` argument support for marking looping functions, to write CSVs automatically to disk. 33 | ** TODO Improve assert checks for `findMarkers()` 34 | * syntactic 35 | ** TODO saveData: Need to harden against accidential pipe to this function. 36 | ** TODO Check that renaming mode renames symlinks themselves, not the resolved file. 37 | I ran into this attempting to name album artist symlinks in iTunes. 38 | ** TODO VIGNETTE. Seriously, work on it. 39 | ** TODO Functions need to convert accent marks if possible. Particularly useful for file names. 40 | ** TODO Ensure nM returns as nm instead of n_m. 41 | ** TODO Rename mode: Need to look for and strip ' - ' out automatically. 42 | Otherwise this will return '-minus-' in the file name, which is annoying. Need to improve the internal sanitization in R for this. 43 | ** TODO Rename mode needs to also make extension lowercase. 44 | ** TODO Need to add recursive rename mode support (koopa only?). 45 | Maybe this is easiest to implement on the R side of things. Need to rename files first, then directories, sorted. Need to work from lowest levels up. 46 | ** TODO nMito, nCoding looks weird with makeLabel plural. 47 | See bcbioSingleCell example. 48 | -------------------------------------------------------------------------------- /R/AllClasses.R: -------------------------------------------------------------------------------- 1 | #' bcbio single-cell RNA-seq data set 2 | #' 3 | #' `bcbioSingleCell` is an S4 class that extends `SingleCellExperiment`, and is 4 | #' designed to store a bcbio single-cell RNA-seq analysis. This class contains 5 | #' read counts saved as a sparse matrix (`sparseMatrix`), sample metadata, and 6 | #' cell quality control metrics. 7 | #' 8 | #' @author Michael Steinbaugh, Rory Kirchner 9 | #' @note Updated 2022-05-09. 10 | #' @export 11 | setClass( 12 | Class = "bcbioSingleCell", 13 | contains = "SingleCellExperiment" 14 | ) 15 | setValidity( 16 | Class = "bcbioSingleCell", 17 | method = function(object) { 18 | colData <- colData(object) 19 | metadata <- metadata(object) 20 | sampleData <- sampleData(object) 21 | ## Return invalid for all objects older than v0.1. 22 | version <- metadata[["version"]] 23 | ok <- validate( 24 | is(version, "package_version"), 25 | version >= 0.1 26 | ) 27 | if (!isTRUE(ok)) { 28 | return(ok) 29 | } 30 | ## Check for legacy bcbio slot. 31 | ok <- validate(!.hasSlot(object, "bcbio")) 32 | if (!isTRUE(ok)) { 33 | return(ok) 34 | } 35 | ## Assays -------------------------------------------------------------- 36 | ok <- validate(isSubset("counts", names(assays(object)))) 37 | if (!isTRUE(ok)) { 38 | return(ok) 39 | } 40 | ## Row data ------------------------------------------------------------ 41 | ok <- validate( 42 | is(rowRanges(object), "GenomicRanges"), 43 | is(rowData(object), "DataFrame") 44 | ) 45 | if (!isTRUE(ok)) { 46 | return(ok) 47 | } 48 | ## Column data --------------------------------------------------------- 49 | ok <- validate( 50 | ## Require that metrics columns are defined. 51 | isSubset(.metricsCols, colnames(colData)), 52 | ## Ensure that `interestingGroups` isn't slotted in colData. 53 | areDisjointSets("interestingGroups", colnames(colData)) 54 | ) 55 | if (!isTRUE(ok)) { 56 | return(ok) 57 | } 58 | ## Metadata ------------------------------------------------------------ 59 | df <- c("DFrame", "DataFrame") 60 | ok <- validateClasses( 61 | object = metadata, 62 | expected = list( 63 | allSamples = "logical", 64 | bcbioCommandsLog = "character", 65 | bcbioLog = "character", 66 | dataVersions = df, 67 | date = "Date", 68 | ensemblRelease = "integer", 69 | genomeBuild = "character", 70 | gffFile = "character", 71 | interestingGroups = "character", 72 | lanes = "integer", 73 | level = "character", 74 | organism = "character", 75 | pipeline = "character", 76 | programVersions = df, 77 | projectDir = "character", 78 | runDate = "Date", 79 | sampleDirs = "character", 80 | sampleMetadataFile = "character", 81 | sessionInfo = c("sessionInfo", "session_info"), 82 | umiType = "character", 83 | uploadDir = "character", 84 | version = "package_version", 85 | wd = "character", 86 | yaml = "list" 87 | ), 88 | subset = TRUE 89 | ) 90 | if (!isTRUE(ok)) { 91 | return(ok) 92 | } 93 | ## Check that level is defined. 94 | ok <- validate( 95 | !isSubset("sampleName", names(metadata)), 96 | isSubset(metadata[["level"]], c("genes", "transcripts")) 97 | ) 98 | if (!isTRUE(ok)) { 99 | return(ok) 100 | } 101 | TRUE 102 | } 103 | ) 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bcbioSingleCell 2 | 3 | [![Install with Bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/recipes/r-bcbiosinglecell/README.html) 4 | ![Lifecycle: retired](https://img.shields.io/badge/lifecycle-retired-red.svg) 5 | 6 | **NOTE: [bcbio-nextgen][bcbio] is no longer under active development.** 7 | Refer to the [notice of discontinuation][] for additional details. 8 | 9 | [R][] package for [bcbio][] single-cell RNA-seq analysis. 10 | 11 | ## Installation 12 | 13 | This is an R package. 14 | 15 | ```r 16 | if (!requireNamespace("BiocManager", quietly = TRUE)) { 17 | install.packages("BiocManager") 18 | } 19 | install.packages( 20 | pkgs = "bcbioSingleCell", 21 | repos = c( 22 | "https://r.acidgenomics.com", 23 | BiocManager::repositories() 24 | ), 25 | dependencies = TRUE 26 | ) 27 | ``` 28 | 29 | ### [Conda][] method 30 | 31 | Configure [Conda][] to use the [Bioconda][] channels. 32 | 33 | ```sh 34 | # Don't install recipe into base environment. 35 | conda create --name='r-bcbiosinglecell' 'r-bcbiosinglecell' 36 | conda activate 'r-bcbiosinglecell' 37 | R 38 | ``` 39 | 40 | ## Load bcbio single-cell RNA-seq data 41 | 42 | ```r 43 | library(bcbioSingleCell) 44 | object <- bcbioSingleCell( 45 | uploadDir = file.path("indrops", "final"), 46 | interestingGroups = c("genotype", "treatment"), 47 | sampleMetadataFile = "sample_metadata.csv", 48 | organism = "Homo sapiens", 49 | ensemblRelease = 90L 50 | ) 51 | ``` 52 | 53 | This will return a `bcbioSingleCell` object, which is an extension of the 54 | [Bioconductor][] [SingleCellExperiment][sce] container class. Consult the 55 | `bcbioSingleCell()` constructor function documentation for detailed information 56 | on the supported parameters: 57 | 58 | ```r 59 | help(topic = "bcbioSingleCell", package = "bcbioSingleCell") 60 | ``` 61 | 62 | ## Sample metadata examples 63 | 64 | ### FASTQ files with samples multiplexed by index barcode 65 | 66 | This is our current recommended method for analyzing an inDrops dataset. 67 | The sample index barcodes are multiplexed per FASTQ set. For Illumina 68 | sequencing data, the raw binary base call (BCL) data must be converted into 69 | FASTQs (split into `R1`-`R4` files) using [bcl2fastq][]. 70 | 71 | The inDrops library version is automatically detected by bcbio, but ensure that 72 | the sample index sequences provided match the library version when attempting to 73 | create a `bcbioSingleCell` object. 74 | 75 | Consult the bcbio documentation for more information on how to configure an 76 | inDrops run prior to loading into R with the `bcbioSingleCell()` function. 77 | 78 | | description | index | sequence | sampleName | aggregate | genotype | 79 | | ----------- | ----- | -------- | ---------- | --------- | -------- | 80 | | indrops1 | 1 | CTCTCTAT | sample1_1 | sample1 | wildtype | 81 | | indrops1 | 2 | TATCCTCT | sample2_1 | sample2 | knockout | 82 | | indrops1 | 3 | GTAAGGAG | sample3_1 | sample3 | wildtype | 83 | | indrops1 | 4 | ACTGCATA | sample4_1 | sample4 | knockout | 84 | | indrops2 | 1 | CTCTCTAT | sample1_2 | sample1 | wildtype | 85 | | indrops2 | 2 | TATCCTCT | sample1_2 | sample2 | knockout | 86 | | indrops2 | 3 | GTAAGGAG | sample1_2 | sample3 | wildtype | 87 | | indrops2 | 4 | ACTGCATA | sample1_2 | sample4 | knockout | 88 | 89 | Note that bcbio currently outputs the reverse complement index sequence in the 90 | sample directory names (e.g. `sample-ATAGAGAG`). Define the forward index 91 | barcode in the `sequence` column here, not the reverse complement. The reverse 92 | complement will be calculated automatically and added as the `revcomp` column 93 | in the sample metadata. 94 | 95 | ### FASTQ files demultiplexed per sample 96 | 97 | This is our current method for handling 10X Genomics Chromium and Illumina 98 | SureCell cell barcodes. 99 | 100 | | description | genotype | 101 | | ----------- | -------- | 102 | | sample1 | wildtype | 103 | | sample2 | knockout | 104 | | sample3 | wildtype | 105 | | sample4 | knockout | 106 | 107 | ### Invalid object 108 | 109 | If you encounter a `validObject` error when attempting to load a 110 | `bcbioSingleCell` object from a previous analysis, run this step to update the 111 | object to the current version of the package: 112 | 113 | ```r 114 | object <- updateObject(object) 115 | validObject(object) 116 | ## [1] TRUE 117 | ``` 118 | 119 | ## References 120 | 121 | The papers and software cited in our workflows are available as a [shared 122 | library](https://paperpile.com/shared/C8EMxl) on [Paperpile][]. 123 | 124 | [bcbio]: https://bcbio-nextgen.readthedocs.io/ 125 | [bcl2fastq]: https://support.illumina.com/sequencing/sequencing_software/bcl2fastq-conversion-software.html 126 | [bioconda]: https://bioconda.github.io/ 127 | [bioconductor]: https://bioconductor.org/ 128 | [conda]: https://conda.io/ 129 | [notice of discontinuation]: https://github.com/bcbio/bcbio-nextgen/issues/3749 130 | [paperpile]: https://paperpile.com/ 131 | [r]: https://www.r-project.org/ 132 | [sce]: https://bioconductor.org/packages/SingleCellExperiment/ 133 | -------------------------------------------------------------------------------- /R/internal-import.R: -------------------------------------------------------------------------------- 1 | #' Import bcbio counts from sample directories 2 | #' 3 | #' @author Michael Steinbaugh 4 | #' @keywords internal 5 | #' @note Updated 2023-08-17. 6 | #' @noRd 7 | #' 8 | #' @param sampleDirs `character`. 9 | #' Sample directory paths. 10 | #' 11 | #' @return `Matrix` / `matrix`. 12 | .importCounts <- 13 | function(sampleDirs) { 14 | assert( 15 | allAreDirectories(sampleDirs), 16 | hasNames(sampleDirs) 17 | ) 18 | alert("Importing counts.") 19 | list <- mcMap( 20 | sampleId = names(sampleDirs), 21 | dir = sampleDirs, 22 | f = function(sampleId, dir) { 23 | counts <- .importCountsPerSample(dir) 24 | ## Prefix cell barcodes with sample identifier when we're 25 | ## loading counts from multiple samples. 26 | if (length(sampleDirs) > 1L) { 27 | colnames(counts) <- 28 | paste(sampleId, colnames(counts), sep = "_") 29 | } 30 | ## Ensure names are valid. 31 | counts <- makeDimnames(counts) 32 | counts 33 | } 34 | ) 35 | ## Remove any empty items in list, which can result from low quality 36 | ## samples with empty matrices in bcbio pipeline. 37 | list <- Filter(f = Negate(is.null), x = list) 38 | assert( 39 | hasLength(list), 40 | msg = sprintf( 41 | fmt = paste0( 42 | "bcbio didn't return any cells.\n", 43 | "Check your '%s' setting." 44 | ), 45 | "minimum_barcode_depth" 46 | ) 47 | ) 48 | ## Bind the matrices. 49 | do.call(cbind, list) 50 | } 51 | 52 | 53 | 54 | #' Import counts per sample from sparse matrix 55 | #' 56 | #' Always in Matrix Market Exchange (MEX/MTX) format. 57 | #' 58 | #' This may be advantagenous to loading the giant combined matrix because we 59 | #' can parallelize with BiocParallel. 60 | #' 61 | #' Attempt to load the column and rowname files first. If they're empty, skip 62 | #' loading the MatrixMarket file, which will error otherwise. The bcbio pipeline 63 | #' will output empty files for very low quality samples with no cells that pass 64 | #' filtering. 65 | #' 66 | #' @author Michael Steinbaugh 67 | #' @keywords internal 68 | #' @note Updated 2020-01-20. 69 | #' @noRd 70 | #' 71 | #' @param dir `character(1)`. 72 | #' Sample directory path. 73 | #' 74 | #' @return `sparseMatrix`. 75 | .importCountsPerSample <- # nolint 76 | function(dir) { 77 | assert(isADirectory(dir)) 78 | ## Require that all of the files exist, even if they are empty. 79 | file <- file.path(dir, paste0(basename(dir), ".mtx")) 80 | rownamesFile <- paste0(file, ".rownames") 81 | colnamesFile <- paste0(file, ".colnames") 82 | assert(allAreFiles(c(file, rownamesFile, colnamesFile))) 83 | ## Import Genes/transcripts (features). 84 | rownames <- import(rownamesFile, format = "lines") 85 | ## Import cellular barcodes. 86 | colnames <- import(colnamesFile, format = "lines") 87 | if (!length(rownames) > 0L || !length(colnames) > 0L) { 88 | alertWarning(sprintf("Skipped {.path %s}.", basename(dir))) 89 | return(NULL) 90 | } 91 | ## Import counts. 92 | counts <- import(file) 93 | assert( 94 | identical(length(rownames), nrow(counts)), 95 | identical(length(colnames), ncol(counts)) 96 | ) 97 | rownames(counts) <- rownames 98 | colnames(counts) <- colnames 99 | alert(sprintf("Imported {.path %s}.", basename(dir))) 100 | counts 101 | } 102 | 103 | 104 | 105 | #' Import raw cellular barcode read list 106 | #' 107 | #' Get the number of pre-UMI disambiguated reads per cellular barcode. 108 | #' 109 | #' @author Michael Steinbaugh 110 | #' @keywords internal 111 | #' @note Updated 2023-08-17. 112 | #' @noRd 113 | #' 114 | #' @param sampleDirs `character`. 115 | #' Sample directories. 116 | #' 117 | #' @return `list`. 118 | #' List of integer vectors per sample containing the pre-filtered cellular 119 | #' barcode counts (`nCount`). 120 | .importReads <- 121 | function(sampleDirs) { 122 | assert( 123 | allAreDirectories(sampleDirs), 124 | hasNames(sampleDirs) 125 | ) 126 | alert("Importing unfiltered cellular barcode distributions.") 127 | files <- file.path( 128 | sampleDirs, 129 | paste(basename(sampleDirs), "barcodes.tsv", sep = "-") 130 | ) 131 | files <- realpath(files) 132 | names(files) <- names(sampleDirs) 133 | list <- mclapply( 134 | X = files, 135 | FUN = function(file) { 136 | data <- import( 137 | con = file, 138 | format = "tsv", 139 | colnames = c("barcode", "n") 140 | ) 141 | x <- as.integer(data[["n"]]) 142 | names(x) <- makeNames(data[["barcode"]]) 143 | x 144 | } 145 | ) 146 | names(list) <- names(sampleDirs) 147 | list 148 | } 149 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(bcbioSingleCell) 4 | export(bcbioSingleCellTestsUrl) 5 | export(calculateMetrics) 6 | export(filterCells) 7 | export(plotBarcodeRanks) 8 | export(plotCellCounts) 9 | export(plotCountsPerCell) 10 | export(plotCountsVsFeatures) 11 | export(plotFeaturesPerCell) 12 | export(plotMitoRatio) 13 | export(plotNovelty) 14 | export(plotQc) 15 | export(plotReadsPerCell) 16 | export(show) 17 | export(updateObject) 18 | exportClasses(bcbioSingleCell) 19 | exportMethods("[") 20 | exportMethods(plotReadsPerCell) 21 | exportMethods(show) 22 | exportMethods(updateObject) 23 | importClassesFrom(SingleCellExperiment,SingleCellExperiment) 24 | importFrom(AcidBase,metricsCols) 25 | importFrom(AcidBase,printString) 26 | importFrom(AcidBase,realpath) 27 | importFrom(AcidBase,showSlotInfo) 28 | importFrom(AcidBase,standardizeCall) 29 | importFrom(AcidBase,strMatch) 30 | importFrom(AcidCLI,abort) 31 | importFrom(AcidCLI,alert) 32 | importFrom(AcidCLI,alertSuccess) 33 | importFrom(AcidCLI,alertWarning) 34 | importFrom(AcidCLI,h1) 35 | importFrom(AcidCLI,h2) 36 | importFrom(AcidCLI,separator) 37 | importFrom(AcidCLI,toInlineString) 38 | importFrom(AcidExperiment,detectLanes) 39 | importFrom(AcidExperiment,droplevels2) 40 | importFrom(AcidExperiment,importSampleData) 41 | importFrom(AcidExperiment,matchInterestingGroups) 42 | importFrom(AcidExperiment,minimalSampleData) 43 | importFrom(AcidExperiment,sampleNames) 44 | importFrom(AcidGenerics,"interestingGroups<-") 45 | importFrom(AcidGenerics,calculateMetrics) 46 | importFrom(AcidGenerics,camelCase) 47 | importFrom(AcidGenerics,droplevels2) 48 | importFrom(AcidGenerics,filterCells) 49 | importFrom(AcidGenerics,interestingGroups) 50 | importFrom(AcidGenerics,leftJoin) 51 | importFrom(AcidGenerics,makeDimnames) 52 | importFrom(AcidGenerics,makeLabel) 53 | importFrom(AcidGenerics,makeNames) 54 | importFrom(AcidGenerics,metrics) 55 | importFrom(AcidGenerics,plotBarcodeRanks) 56 | importFrom(AcidGenerics,plotCellCounts) 57 | importFrom(AcidGenerics,plotCountsPerCell) 58 | importFrom(AcidGenerics,plotCountsVsFeatures) 59 | importFrom(AcidGenerics,plotFeaturesPerCell) 60 | importFrom(AcidGenerics,plotMitoRatio) 61 | importFrom(AcidGenerics,plotNovelty) 62 | importFrom(AcidGenerics,plotQc) 63 | importFrom(AcidGenerics,plotReadsPerCell) 64 | importFrom(AcidGenerics,sampleData) 65 | importFrom(AcidGenomes,emptyRanges) 66 | importFrom(AcidGenomes,makeGRangesFromEnsembl) 67 | importFrom(AcidGenomes,makeGRangesFromGff) 68 | importFrom(AcidMarkdown,markdownPlots) 69 | importFrom(AcidPlots,"!!!") 70 | importFrom(AcidPlots,.data) 71 | importFrom(AcidPlots,acid_geom_abline) 72 | importFrom(AcidPlots,acid_geom_label) 73 | importFrom(AcidPlots,acid_geom_label_average) 74 | importFrom(AcidPlots,acid_geom_label_repel) 75 | importFrom(AcidPlots,acid_scale_color_discrete) 76 | importFrom(AcidPlots,acid_scale_fill_discrete) 77 | importFrom(AcidPlots,syms) 78 | importFrom(AcidSingleCell,makeSingleCellExperiment) 79 | importFrom(AcidSingleCell,mapCellsToSamples) 80 | importFrom(BiocGenerics,counts) 81 | importFrom(BiocGenerics,updateObject) 82 | importFrom(IRanges,DataFrameList) 83 | importFrom(S4Vectors,"mcols<-") 84 | importFrom(S4Vectors,"metadata<-") 85 | importFrom(S4Vectors,DataFrame) 86 | importFrom(S4Vectors,SimpleList) 87 | importFrom(S4Vectors,cbind) 88 | importFrom(S4Vectors,do.call) 89 | importFrom(S4Vectors,droplevels) 90 | importFrom(S4Vectors,lapply) 91 | importFrom(S4Vectors,mcols) 92 | importFrom(S4Vectors,metadata) 93 | importFrom(SummarizedExperiment,"assays<-") 94 | importFrom(SummarizedExperiment,"colData<-") 95 | importFrom(SummarizedExperiment,"rowData<-") 96 | importFrom(SummarizedExperiment,"rowRanges<-") 97 | importFrom(SummarizedExperiment,assay) 98 | importFrom(SummarizedExperiment,assayNames) 99 | importFrom(SummarizedExperiment,assays) 100 | importFrom(SummarizedExperiment,colData) 101 | importFrom(SummarizedExperiment,rowData) 102 | importFrom(SummarizedExperiment,rowRanges) 103 | importFrom(bcbioBase,getBarcodeCutoffFromCommands) 104 | importFrom(bcbioBase,getGtfFileFromYaml) 105 | importFrom(bcbioBase,getLevelFromCommands) 106 | importFrom(bcbioBase,getSampleDataFromYaml) 107 | importFrom(bcbioBase,getUmiTypeFromCommands) 108 | importFrom(bcbioBase,importDataVersions) 109 | importFrom(bcbioBase,importProgramVersions) 110 | importFrom(bcbioBase,projectDir) 111 | importFrom(bcbioBase,runDate) 112 | importFrom(bcbioBase,sampleDirs) 113 | importFrom(ggplot2,aes) 114 | importFrom(ggplot2,facet_wrap) 115 | importFrom(ggplot2,geom_boxplot) 116 | importFrom(ggplot2,geom_histogram) 117 | importFrom(ggplot2,geom_step) 118 | importFrom(ggplot2,geom_violin) 119 | importFrom(ggplot2,ggplot) 120 | importFrom(ggplot2,labs) 121 | importFrom(ggplot2,scale_x_continuous) 122 | importFrom(ggplot2,scale_y_continuous) 123 | importFrom(ggplot2,stat_ecdf) 124 | importFrom(ggplot2,vars) 125 | importFrom(ggridges,geom_density_ridges) 126 | importFrom(goalie,allAreDirectories) 127 | importFrom(goalie,allAreFiles) 128 | importFrom(goalie,areDisjointSets) 129 | importFrom(goalie,areSetEqual) 130 | importFrom(goalie,assert) 131 | importFrom(goalie,hasLength) 132 | importFrom(goalie,hasNames) 133 | importFrom(goalie,hasRownames) 134 | importFrom(goalie,hasValidDimnames) 135 | importFrom(goalie,isADirectory) 136 | importFrom(goalie,isAFile) 137 | importFrom(goalie,isAUrl) 138 | importFrom(goalie,isAny) 139 | importFrom(goalie,isCharacter) 140 | importFrom(goalie,isDirectory) 141 | importFrom(goalie,isFile) 142 | importFrom(goalie,isFlag) 143 | importFrom(goalie,isInt) 144 | importFrom(goalie,isString) 145 | importFrom(goalie,isSubset) 146 | importFrom(goalie,requireNamespaces) 147 | importFrom(goalie,validate) 148 | importFrom(goalie,validateClasses) 149 | importFrom(methods,"as<-") 150 | importFrom(methods,"slot<-") 151 | importFrom(methods,.hasSlot) 152 | importFrom(methods,as) 153 | importFrom(methods,coerce) 154 | importFrom(methods,is) 155 | importFrom(methods,new) 156 | importFrom(methods,setClass) 157 | importFrom(methods,show) 158 | importFrom(methods,slot) 159 | importFrom(methods,validObject) 160 | importFrom(parallel,mcMap) 161 | importFrom(parallel,mclapply) 162 | importFrom(pipette,import) 163 | importFrom(utils,capture.output) 164 | importFrom(utils,packageName) 165 | importFrom(utils,packageVersion) 166 | importMethodsFrom(AcidExperiment,"interestingGroups<-") 167 | importMethodsFrom(AcidExperiment,calculateMetrics) 168 | importMethodsFrom(AcidExperiment,interestingGroups) 169 | importMethodsFrom(AcidExperiment,metrics) 170 | importMethodsFrom(AcidExperiment,sampleData) 171 | importMethodsFrom(AcidExperiment,sampleNames) 172 | importMethodsFrom(AcidPlyr,leftJoin) 173 | importMethodsFrom(AcidSingleCell,sampleData) 174 | importMethodsFrom(pipette,import) 175 | importMethodsFrom(syntactic,camelCase) 176 | importMethodsFrom(syntactic,makeDimnames) 177 | importMethodsFrom(syntactic,makeLabel) 178 | importMethodsFrom(syntactic,makeNames) 179 | -------------------------------------------------------------------------------- /inst/rmarkdown/templates/quality-control/skeleton/skeleton.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | ## Updated 2023-10-05. 3 | params: 4 | title: "Quality control and filtering" 5 | 6 | ## bcbioSingleCell object. 7 | bcb_file: !r file.path("data", "bcb.rds") 8 | 9 | ## Counts: Expected number of UMI-disambiguated counts per cell. 10 | min_counts: 1000 11 | max_counts: !r Inf 12 | 13 | ## Features: Expected number of features (i.e. genes) per cell. 14 | min_features: 500 15 | max_features: !r Inf 16 | 17 | ## Novelty score: log10 features per count. 18 | min_novelty: 0.85 19 | 20 | ## Mitochondrial abundance. 21 | ## Low quality / stressed cells tend to be above 20%. 22 | max_mito_ratio: 0.1 23 | 24 | ## Minimum number of cells per feature. 25 | ## This will remove features with very low expression. 26 | min_cells_per_feature: 10 27 | 28 | ## Expected number of cells per sample. 29 | ## Not recommended by default. 30 | n_cells: !r Inf 31 | 32 | ## Where to save the filtered output. 33 | data_dir: !r file.path("data") 34 | 35 | ## Where to export results of the analysis. 36 | output_dir: !r file.path("results", Sys.Date(), "quality-control") 37 | 38 | title: "`r params[['title']]`" 39 | author: "`r getOption('author')`" 40 | date: "`r Sys.Date()`" 41 | 42 | ## This file is generated by the `prepareTemplate()` step in setup chunk. 43 | bibliography: "bibliography.bib" 44 | --- 45 | 46 | ```{r setup, cache=FALSE, message=FALSE} 47 | ## nolint start 48 | suppressPackageStartupMessages({ 49 | library(goalie) 50 | library(basejump) 51 | library(ggplot2) 52 | library(bcbioSingleCell) 53 | }) 54 | prepareTemplate() 55 | source("_setup.R") 56 | ## nolint end 57 | ``` 58 | 59 | ```{r header, child="_header.Rmd"} 60 | ``` 61 | 62 | # Load `bcbioSingleCell` object 63 | 64 | ```{r load-object} 65 | object <- import(params[["bcb_file"]]) 66 | assert( 67 | is(object, "bcbioSingleCell"), 68 | validObject(object) 69 | ) 70 | print(object) 71 | ``` 72 | 73 | [bcbio][] run data was imported from: 74 | **`r metadata(object)[["uploadDir"]]`**. 75 | 76 | # Sample metadata 77 | 78 | ```{r sample-data} 79 | sampleData(object) 80 | ``` 81 | 82 | # Reads per cell {.tabset} 83 | 84 | These are counts of how many reads are assigned to a given cellular barcode. It 85 | is normal for single cell RNA-seq data to contain a large number of low 86 | complexity barcodes. The bcbio pipeline filters out most of these barcodes, and 87 | here we have applied a threshold cutoff of a minimum of 88 | `r metadata(object)[["cellularBarcodeCutoff"]]` reads per cell. The unfiltered 89 | read count distributions are shown here. 90 | 91 | ## Histogram 92 | 93 | For high quality data, the proportional histogram should contain a single large 94 | peak that represents cells that were encapsulated. If we see a strong shoulder, 95 | or a bimodal distribution of the cells, that can indicate a couple problems. It 96 | might be that there is free floating RNA, which happens when cells are dying. 97 | It could also be that there are a set of cells that failed for some reason. 98 | Finally, it could also be that there are biologically different types of cells, 99 | and one type is much smaller than the other. If this is the case we would 100 | expect to see less RNA being sequenced from the smaller cells. 101 | 102 | ```{r plot-reads-per-cell-histogram} 103 | plotReadsPerCell( 104 | object = object, 105 | geom = "histogram", 106 | interestingGroups = "sampleName" 107 | ) 108 | ``` 109 | 110 | ## ECDF 111 | 112 | An empirical distribution function (ECDF) plot will show the frequency 113 | distribution of the reads per cell. You can see that the vast majority of low 114 | complexity barcodes plateau at a read depth below 1000 reads per cell. 115 | 116 | ```{r plot-reads-per-cell-ecdf} 117 | plotReadsPerCell(object = object, geom = "ecdf") 118 | ``` 119 | 120 | # UMI counts per cell {.tabset} 121 | 122 | Now let's assess the distribution of unique molecular identifier 123 | (UMI)-deconvoluted counts per cell. In general, the distributions should be 124 | relatively uniform per sample. Here we are also including violin and ridgeline 125 | plots, with the average number of genes per cell labeled. 126 | 127 | ```{r plot-counts-per-cell-prefilter} 128 | markdownHeader("Violin", level = 2L) 129 | plotCountsPerCell( 130 | object = object, 131 | geom = "violin", 132 | min = params[["min_counts"]], 133 | max = params[["max_counts"]] 134 | ) 135 | 136 | markdownHeader("Ridgeline", level = 2L) 137 | plotCountsPerCell( 138 | object = object, 139 | geom = "ridgeline", 140 | min = params[["min_counts"]], 141 | max = params[["max_counts"]] 142 | ) 143 | 144 | markdownHeader("Histogram", level = 2L) 145 | plotCountsPerCell( 146 | object = object, 147 | geom = "histogram", 148 | min = params[["min_counts"]], 149 | max = params[["max_counts"]] 150 | ) 151 | 152 | markdownHeader("ECDF", level = 2L) 153 | plotCountsPerCell( 154 | object = object, 155 | geom = "ecdf", 156 | interestingGroups = "sampleName", 157 | min = params[["min_counts"]], 158 | max = params[["max_counts"]] 159 | ) 160 | ``` 161 | 162 | # Filter cells by UMI count 163 | 164 | Let's apply this step first and then proceed to evaluating gene detection, 165 | mitocondrial transcript abundance, and novelty scores. 166 | 167 | ```{r filter-cells-by-count} 168 | object <- filterCells( 169 | object = object, 170 | minCounts = params[["min_counts"]], 171 | maxCounts = params[["max_counts"]] 172 | ) 173 | ``` 174 | 175 | Let's take a look at the UMI per cell distributions after this filtering step. 176 | Note that we haven't applied very strict filtering here -- we're going to cut 177 | off the "low quality" cells based on the gene detection rate, novelty score, 178 | and mitochondrial abundance. 179 | 180 | ```{r plot-counts-per-cell-postfilter} 181 | plotCountsPerCell( 182 | object = object, 183 | geom = "histogram", 184 | min = params[["min_counts"]], 185 | max = params[["max_counts"]] 186 | ) 187 | ``` 188 | 189 | # Genes detected per cell {.tabset} 190 | 191 | Here by "detected", we mean genes with a non-zero count measurement per cell. 192 | Seeing gene detection in the range of `500`-`5000` is normal for most 193 | single-cell experiments. 194 | 195 | ```{r plot-features-per-cell} 196 | markdownHeader("Violin", level = 2L) 197 | plotFeaturesPerCell( 198 | object = object, 199 | geom = "violin", 200 | min = min(params[["min_features"]]), 201 | max = max(params[["max_features"]]) 202 | ) 203 | 204 | markdownHeader("Ridgeline", level = 2L) 205 | plotFeaturesPerCell( 206 | object = object, 207 | geom = "ridgeline", 208 | min = min(params[["min_features"]]), 209 | max = max(params[["max_features"]]) 210 | ) 211 | 212 | markdownHeader("Histogram", level = 2L) 213 | plotFeaturesPerCell( 214 | object = object, 215 | geom = "histogram", 216 | min = min(params[["min_features"]]), 217 | max = max(params[["max_features"]]) 218 | ) 219 | 220 | markdownHeader("ECDF", level = 2L) 221 | plotFeaturesPerCell( 222 | object = object, 223 | geom = "ecdf", 224 | min = min(params[["min_features"]]), 225 | max = max(params[["max_features"]]) 226 | ) 227 | ``` 228 | 229 | # UMIs vs. features detected 230 | 231 | If we graph out the total number of UMI counts per cell vs. the genes detected 232 | per cell, we can assess whether there is a large population of low quality 233 | cells with low counts and/or gene detection. 234 | 235 | ```{r plot-counts-vs-features} 236 | plotCountsVsFeatures(object) 237 | ``` 238 | 239 | # Novelty score {.tabset} 240 | 241 | Another way to QC the data is to look for less novelty, that is cells that have 242 | less genes detected per count than other cells. We can see the samples where we 243 | sequenced each cell less have a higher overall novelty, that is because we have 244 | not started saturated the sequencing for any given gene for these samples. 245 | Outlier cells in these samples might be cells that we have a less complex RNA 246 | species than other cells. Sometimes we can detect contamination with low 247 | complexity cell types like red blood cells via this metric. 248 | 249 | ```{r plot-novelty} 250 | markdownHeader("Violin", level = 2L) 251 | plotNovelty( 252 | object = object, 253 | geom = "violin", 254 | min = min(params[["min_novelty"]]) 255 | ) 256 | 257 | markdownHeader("Ridgeline", level = 2L) 258 | plotNovelty( 259 | object = object, 260 | geom = "ridgeline", 261 | min = min(params[["min_novelty"]]) 262 | ) 263 | 264 | markdownHeader("Histogram", level = 2L) 265 | plotNovelty( 266 | object = object, 267 | geom = "histogram", 268 | min = min(params[["min_novelty"]]) 269 | ) 270 | 271 | markdownHeader("ECDF", level = 2L) 272 | plotNovelty( 273 | object = object, 274 | geom = "ecdf", 275 | min = min(params[["min_novelty"]]) 276 | ) 277 | ``` 278 | 279 | # Mitochondrial abundance {.tabset} 280 | 281 | We evaluate overall mitochondrial gene expression as a biomarker of cellular 282 | stress during sample preparation. 283 | 284 | ```{r plot-mito-ratio} 285 | markdownHeader("Violin", level = 2L) 286 | plotMitoRatio( 287 | object = object, 288 | geom = "violin", 289 | max = max(params[["max_mito_ratio"]]) 290 | ) 291 | 292 | markdownHeader("Ridgeline", level = 2L) 293 | plotMitoRatio( 294 | object = object, 295 | geom = "ridgeline", 296 | max = max(params[["max_mito_ratio"]]) 297 | ) 298 | 299 | markdownHeader("Histogram", level = 2L) 300 | plotMitoRatio( 301 | object = object, 302 | geom = "histogram", 303 | max = max(params[["max_mito_ratio"]]) 304 | ) 305 | 306 | markdownHeader("ECDF", level = 2L) 307 | plotMitoRatio( 308 | object = object, 309 | geom = "ecdf", 310 | max = max(params[["max_mito_ratio"]]) 311 | ) 312 | ``` 313 | 314 | # Filter cells 315 | 316 | ```{r filter-cells} 317 | object <- filterCells( 318 | object = object, 319 | minCounts = params[["min_counts"]], 320 | maxCounts = params[["max_counts"]], 321 | minFeatures = params[["min_features"]], 322 | maxFeatures = params[["max_features"]], 323 | maxMitoRatio = params[["max_mito_ratio"]], 324 | minNovelty = params[["min_novelty"]], 325 | nCells = params[["n_cells"]], 326 | minCellsPerFeature = params[["min_cells_per_feature"]] 327 | ) 328 | ``` 329 | 330 | ```{r plot-filtered-qc} 331 | plotQc(object, geom = "violin") 332 | ``` 333 | 334 | # Save filtered data 335 | 336 | ```{r save-filtered} 337 | name <- basenameSansExt(params[["bcb_file"]]) 338 | assignAndSaveData( 339 | name = paste(name, "filtered", sep = "_"), 340 | object = object, 341 | dir = params[["data_dir"]] 342 | ) 343 | ``` 344 | 345 | ```{r export} 346 | export( 347 | object = object, 348 | con = params[["output_dir"]], 349 | compress = TRUE 350 | ) 351 | ``` 352 | 353 | ```{r footer, child="_footer.Rmd"} 354 | ``` 355 | 356 | ```{r links, child="_links.Rmd"} 357 | ``` 358 | -------------------------------------------------------------------------------- /R/updateObject-methods.R: -------------------------------------------------------------------------------- 1 | #' Update object 2 | #' 3 | #' @name updateObject 4 | #' @author Michael Steinbaugh 5 | #' @note Updated 2023-12-04. 6 | #' 7 | #' @inheritParams AcidRoxygen::params 8 | #' 9 | #' @return Modified object. 10 | #' 11 | #' @examples 12 | #' data(bcb) 13 | #' 14 | #' ## bcbioSingleCell ==== 15 | #' updateObject(bcb) 16 | #' 17 | #' ## Example that depends on remote file. 18 | #' ## > x <- import( 19 | #' ## > con = file.path( 20 | #' ## > bcbioSingleCellTestsUrl, 21 | #' ## > "bcbioSingleCell_0.1.0.rds" 22 | #' ## > ) 23 | #' ## > ) 24 | #' ## > x <- updateObject(x) 25 | #' ## > x 26 | NULL 27 | 28 | 29 | 30 | ## Updated 2022-05-09. 31 | `updateObject,bcbioSingleCell` <- # nolint 32 | function(object, ..., verbose = FALSE) { 33 | assert(isFlag(verbose)) 34 | if (isTRUE(verbose)) { 35 | h1("Update object") 36 | } 37 | sce <- as(object, "SingleCellExperiment") 38 | cells <- colnames(sce) 39 | assays <- assays(sce) 40 | rowRanges <- rowRanges(sce) 41 | colData <- colData(sce) 42 | metadata <- metadata(sce) 43 | version <- metadata[["version"]] 44 | assert(is(version, "package_version")) 45 | if (isTRUE(verbose)) { 46 | alert(sprintf( 47 | fmt = "Upgrading {.var %s} from version %s to %s.", 48 | "bcbioSingleCell", 49 | as.character(version), 50 | as.character(.pkgVersion) 51 | )) 52 | } 53 | ## Assays -------------------------------------------------------------- 54 | if (isTRUE(verbose)) { 55 | h2("Assays") 56 | } 57 | ## Ensure raw counts are always named "counts". 58 | if (isSubset("assay", names(assays))) { 59 | ## Versions < 0.1 (e.g. 0.0.21). 60 | if (isTRUE(verbose)) { 61 | alert(sprintf( 62 | "Renaming {.var %s} to {.var %s}.", 63 | "assay", "counts" 64 | )) 65 | } 66 | names(assays)[names(assays) == "assay"] <- "counts" 67 | } else if (isSubset("raw", names(assays))) { 68 | if (isTRUE(verbose)) { 69 | alert(sprintf( 70 | "Renaming {.var %s} assay to {.var %s}.", 71 | "raw", "counts" 72 | )) 73 | } 74 | names(assays)[names(assays) == "raw"] <- "counts" 75 | } 76 | assays <- Filter(Negate(is.null), assays) 77 | ## Put the required assays first, in order. 78 | assays <- assays[unique(c(.requiredAssays, names(assays)))] 79 | assert(isSubset(.requiredAssays, names(assays))) 80 | ## Row data ------------------------------------------------------------ 81 | if (hasNames(mcols(rowRanges))) { 82 | mcols(rowRanges) <- 83 | camelCase(mcols(rowRanges), strict = TRUE) 84 | } 85 | ## Column data --------------------------------------------------------- 86 | if (isTRUE(verbose)) { 87 | h2("Column data") 88 | } 89 | colnames(colData) <- camelCase(colnames(colData), strict = TRUE) 90 | if (isSubset(c("nCount", "nUmi"), colnames(colData))) { 91 | if (isTRUE(verbose)) { 92 | alert(sprintf( 93 | "Renaming {.var %s} to {.var %s}.", 94 | "nCount", "nRead" 95 | )) 96 | } 97 | colnames(colData)[colnames(colData) == "nCount"] <- "nRead" 98 | if (isTRUE(verbose)) { 99 | alert(sprintf( 100 | "Renaming {.var %s} to {.var %s}.", 101 | "nUmi", "nCount" 102 | )) 103 | } 104 | colnames(colData)[colnames(colData) == "nUmi"] <- "nCount" 105 | } 106 | if (isSubset("nGene", colnames(colData))) { 107 | if (isTRUE(verbose)) { 108 | alert(sprintf( 109 | "Renaming {.var %s} to {.var %s}.", 110 | "nGene", "nFeature" 111 | )) 112 | } 113 | colnames(colData)[colnames(colData) == "nGene"] <- "nFeature" 114 | if (isTRUE(verbose)) { 115 | alert(sprintf( 116 | "Renaming {.var %s} to {.var %s}.", 117 | "log10GenesPerUmi", "log10FeaturesPerCount" 118 | )) 119 | } 120 | colnames(colData)[colnames(colData) == "log10GenesPerUmi"] <- 121 | "log10FeaturesPerCount" 122 | } 123 | ## Move sampleData into colData. 124 | if (isSubset("sampleData", names(metadata))) { 125 | sampleData <- metadata[["sampleData"]] 126 | } else if (isSubset("sampleMetadata", names(metadata))) { 127 | sampleData <- metadata[["sampleMetadata"]] 128 | } else { 129 | sampleData <- NULL 130 | } 131 | if (!is.null(sampleData)) { 132 | colnames(sampleData) <- 133 | camelCase(colnames(sampleData), strict = TRUE) 134 | if (isTRUE(verbose)) { 135 | alert(sprintf( 136 | "Moving {.var %s} from {.fun %s} into {.fun %s}.", 137 | "sampleData", "metadata", "colData" 138 | )) 139 | } 140 | assert(isSubset("sampleId", colnames(sampleData))) 141 | sampleData <- as(sampleData, "DataFrame") 142 | colData <- colData[ 143 | , 144 | setdiff(colnames(colData), colnames(sampleData)), 145 | drop = FALSE 146 | ] 147 | if (isTRUE(verbose)) { 148 | alert("Mapping cells to samples.") 149 | } 150 | c2s <- mapCellsToSamples( 151 | cells = cells, 152 | samples = as.character(sampleData[["sampleId"]]) 153 | ) 154 | assert(is.factor(c2s)) 155 | colData[["sampleId"]] <- c2s 156 | sampleData[["sampleId"]] <- as.factor(rownames(sampleData)) 157 | colData <- leftJoin(x = colData, y = sampleData, by = "sampleId") 158 | assert( 159 | is(colData, "DataFrame"), 160 | identical(rownames(colData), colnames(object)) 161 | ) 162 | ## Ensure rows are ordered to match the object. 163 | colData <- colData[cells, , drop = FALSE] 164 | } 165 | ## Metadata ------------------------------------------------------------ 166 | if (isTRUE(verbose)) { 167 | h2("Metadata") 168 | } 169 | ## dataVersions. 170 | dataVersions <- metadata[["dataVersions"]] 171 | if (is(dataVersions, "data.frame")) { 172 | if (isTRUE(verbose)) { 173 | alert(sprintf( 174 | "Setting {.var %s} as {.cls %s}.", 175 | "dataVersions", "DataFrame" 176 | )) 177 | } 178 | metadata[["dataVersions"]] <- as(dataVersions, "DataFrame") 179 | } 180 | ## ensemblRelease. 181 | if (isSubset("ensemblVersion", names(metadata))) { 182 | if (isTRUE(verbose)) { 183 | alert(sprintf( 184 | "Renaming {.var %s} to {.var %s}.", 185 | "ensemblVersion", "ensemblRelease" 186 | )) 187 | } 188 | names(metadata)[ 189 | names(metadata) == "ensemblVersion" 190 | ] <- "ensemblRelease" 191 | } 192 | if ( 193 | is.numeric(metadata[["ensemblRelease"]]) && 194 | !is.integer(metadata[["ensemblRelease"]]) 195 | ) { 196 | if (isTRUE(verbose)) { 197 | alert(sprintf( 198 | "Setting {.var %s} as integer.", 199 | "ensemblRelease" 200 | )) 201 | } 202 | metadata[["ensemblRelease"]] <- 203 | as.integer(metadata[["ensemblRelease"]]) 204 | } 205 | ## Update the version, if necessary. 206 | if (!identical(metadata[["version"]], .pkgVersion)) { 207 | metadata[["originalVersion"]] <- metadata[["version"]] 208 | metadata[["version"]] <- .pkgVersion 209 | } 210 | ## gffFile. 211 | if (isSubset("gtfFile", names(metadata))) { 212 | if (isTRUE(verbose)) { 213 | alert(sprintf( 214 | "Renaming {.var %s} to {.var %s}.", 215 | "gtfFile", "gffFile" 216 | )) 217 | } 218 | names(metadata)[names(metadata) == "gtfFile"] <- "gffFile" 219 | } 220 | if (!isSubset("gffFile", names(metadata))) { 221 | if (isTRUE(verbose)) { 222 | alert(sprintf( 223 | "Setting {.var %s} as {.val %s}.", 224 | "gffFile", "empty character" 225 | )) 226 | } 227 | metadata[["gffFile"]] <- character() 228 | } 229 | ## lanes. 230 | if (!is.integer(metadata[["lanes"]])) { 231 | if (isTRUE(verbose)) { 232 | alert(sprintf( 233 | "Setting {.var %s} as {.val %s}.", 234 | "lanes", "integer" 235 | )) 236 | } 237 | metadata[["lanes"]] <- as.integer(metadata[["lanes"]]) 238 | } 239 | ## level. 240 | if (!isSubset("level", names(metadata))) { 241 | if (isTRUE(verbose)) { 242 | alert(sprintf( 243 | "Setting {.var %s} as {.val %s}.", 244 | "level", "genes" 245 | )) 246 | } 247 | metadata[["level"]] <- "genes" 248 | } 249 | ## programVersions. 250 | if ( 251 | !isSubset("programVersions", names(metadata)) && 252 | isSubset("programs", names(metadata)) 253 | ) { 254 | if (isTRUE(verbose)) { 255 | alert(sprintf( 256 | "Renaming {.var %s} to {.var %s}.", 257 | "programs", "programVersions" 258 | )) 259 | } 260 | names(metadata)[names(metadata) == "programs"] <- "programVersions" 261 | } 262 | programVersions <- metadata[["programVersions"]] 263 | if (is(programVersions, "data.frame")) { 264 | metadata[["programVersions"]] <- as(programVersions, "DataFrame") 265 | } 266 | ## sampleMetadataFile. 267 | if (!is.character(metadata[["sampleMetadataFile"]])) { 268 | if (isTRUE(verbose)) { 269 | alert(sprintf( 270 | "Setting {.var %s} as {.val %s}.", 271 | "sampleMetadataFile", "empty character" 272 | )) 273 | } 274 | metadata[["sampleMetadataFile"]] <- character() 275 | } 276 | ## sessionInfo. 277 | if (isSubset("utilsSessionInfo", names(metadata))) { 278 | if (isTRUE(verbose)) { 279 | alert(sprintf("Simplifying stashed {.var %s}.", "sessionInfo")) 280 | } 281 | names(metadata)[ 282 | names(metadata) == "utilsSessionInfo" 283 | ] <- "sessionInfo" 284 | metadata[["devtoolsSessionInfo"]] <- NULL 285 | } 286 | ## Drop legacy slots. 287 | keep <- setdiff( 288 | x = names(metadata), 289 | y = c("cellToSample", "sampleData", "sampleMetadata") 290 | ) 291 | metadata <- metadata[keep] 292 | ## Return -------------------------------------------------------------- 293 | assays(sce) <- assays 294 | rowRanges(sce) <- rowRanges 295 | colData(sce) <- colData 296 | metadata(sce) <- metadata 297 | bcb <- new(Class = "bcbioSingleCell", sce) 298 | validObject(bcb) 299 | if (isTRUE(verbose)) { 300 | alertSuccess(sprintf( 301 | "Update of {.var %s} object was successful.", 302 | "bcbioSingleCell" 303 | )) 304 | } 305 | bcb 306 | } 307 | 308 | 309 | 310 | #' @rdname updateObject 311 | #' @export 312 | setMethod( 313 | f = "updateObject", 314 | signature = signature(object = "bcbioSingleCell"), 315 | definition = `updateObject,bcbioSingleCell` 316 | ) 317 | -------------------------------------------------------------------------------- /R/AllGenerators.R: -------------------------------------------------------------------------------- 1 | #' @inherit bcbioSingleCell-class title description 2 | #' @author Michael Steinbaugh 3 | #' @note Updated 2023-09-21. 4 | #' @export 5 | #' 6 | #' @inheritParams AcidSingleCell::makeSingleCellExperiment 7 | #' @inheritParams AcidRoxygen::params 8 | #' 9 | #' @section Remote data: 10 | #' 11 | #' When working in RStudio, we recommend connecting to the bcbio-nextgen run 12 | #' directory as a remote connection over 13 | #' [sshfs](https://github.com/osxfuse/osxfuse/wiki/SSHFS). 14 | #' 15 | #' @return `bcbioSingleCell`. 16 | #' 17 | #' @seealso 18 | #' - `SingleCellExperiment::SingleCellExperiment()`. 19 | #' - `.S4methods(class = "bcbioSingleCell")`. 20 | #' 21 | #' @examples 22 | #' uploadDir <- system.file("extdata/indrops", package = "bcbioSingleCell") 23 | #' 24 | #' x <- bcbioSingleCell(uploadDir) 25 | #' print(x) 26 | #' 27 | #' x <- bcbioSingleCell( 28 | #' uploadDir = uploadDir, 29 | #' sampleMetadataFile = file.path(uploadDir, "metadata.csv") 30 | #' ) 31 | #' print(x) 32 | bcbioSingleCell <- 33 | function(uploadDir, 34 | sampleMetadataFile = NULL, 35 | organism = NULL, 36 | ensemblRelease = NULL, 37 | genomeBuild = NULL, 38 | gffFile = NULL, 39 | transgeneNames = NULL, 40 | interestingGroups = "sampleName") { 41 | assert( 42 | isADirectory(uploadDir), 43 | isString(sampleMetadataFile, nullOk = TRUE), 44 | isString(organism, nullOk = TRUE), 45 | isInt(ensemblRelease, nullOk = TRUE), 46 | isString(genomeBuild, nullOk = TRUE), 47 | isString(gffFile, nullOk = TRUE), 48 | isCharacter(transgeneNames, nullOk = TRUE), 49 | isCharacter(interestingGroups) 50 | ) 51 | if (isString(gffFile)) { 52 | isAFile(gffFile) || isAUrl(gffFile) 53 | } 54 | h1("bcbioSingleCell") 55 | alert("Importing bcbio-nextgen single-cell RNA-seq run") 56 | sampleData <- NULL 57 | ## Run info ------------------------------------------------------------ 58 | uploadDir <- realpath(uploadDir) 59 | projectDir <- projectDir(uploadDir) 60 | sampleDirs <- sampleDirs(uploadDir) 61 | lanes <- detectLanes(sampleDirs) 62 | yaml <- import(file.path(projectDir, "project-summary.yaml")) 63 | dataVersions <- 64 | importDataVersions(file.path(projectDir, "data_versions.csv")) 65 | assert(is(dataVersions, "DataFrame")) 66 | programVersions <- 67 | importProgramVersions(file.path(projectDir, "programs.txt")) 68 | assert(is(dataVersions, "DataFrame")) 69 | log <- import(file.path(projectDir, "bcbio-nextgen.log")) 70 | ## This step enables our minimal dataset to pass checks. 71 | tryCatch( 72 | expr = assert(isCharacter(log)), 73 | error = function(e) { 74 | alertWarning(sprintf( 75 | "{.file %s} file is empty.", 76 | "bcbio-nextgen.log" 77 | )) 78 | } 79 | ) 80 | commandsLog <- 81 | import(file.path(projectDir, "bcbio-nextgen-commands.log")) 82 | ## This step enables our minimal dataset to pass checks. 83 | tryCatch( 84 | expr = assert(isCharacter(commandsLog)), 85 | error = function(e) { 86 | alertWarning( 87 | "{.file bcbio-nextgen-commands.log} file is empty." 88 | ) 89 | } 90 | ) 91 | cutoff <- getBarcodeCutoffFromCommands(commandsLog) 92 | level <- getLevelFromCommands(commandsLog) 93 | umiType <- getUmiTypeFromCommands(commandsLog) 94 | ## Check to see if we're dealing with a multiplexed platform. 95 | multiplexed <- any(vapply( 96 | X = c("dropseq", "indrop"), 97 | FUN = function(pattern) { 98 | grepl(pattern = pattern, x = umiType) 99 | }, 100 | FUN.VALUE = logical(1L) 101 | )) 102 | ## Sample metadata ----------------------------------------------------- 103 | h2("Sample metadata") 104 | allSamples <- TRUE 105 | sampleData <- NULL 106 | if (isString(sampleMetadataFile)) { 107 | sampleData <- importSampleData( 108 | file = sampleMetadataFile, 109 | lanes = lanes, 110 | pipeline = "bcbio" 111 | ) 112 | ## Error on incorrect reverse complement input. 113 | if (isSubset("sequence", colnames(sampleData))) { 114 | sampleDirSequence <- strMatch( 115 | x = names(sampleDirs), 116 | pattern = "^.+_([ACGT]+)$" 117 | )[, 2L] 118 | assert( 119 | !identical( 120 | sort(sampleDirSequence), 121 | sort(as.character(sampleData[["sequence"]])) 122 | ), 123 | msg = paste( 124 | "It appears that the reverse complement sequence of", 125 | "the i5 index barcodes were input into the sample", 126 | "metadata 'sequence' column. bcbio outputs the revcomp", 127 | "into the sample directories, but the forward sequence", 128 | "should be used in the R package." 129 | ) 130 | ) 131 | } 132 | ## Allow sample selection by with this file. 133 | if (nrow(sampleData) < length(sampleDirs)) { 134 | sampleDirs <- sampleDirs[rownames(sampleData)] 135 | alert(sprintf( 136 | fmt = "Loading a subset of samples: %s.", 137 | toInlineString(basename(sampleDirs), n = 5L) 138 | )) 139 | allSamples <- FALSE 140 | } 141 | } 142 | ## Assays (counts) ----------------------------------------------------- 143 | h2("Counts") 144 | ## Note that we're now allowing transcript-level counts. 145 | counts <- .importCounts(sampleDirs = sampleDirs) 146 | assert(hasValidDimnames(counts)) 147 | ## Row data (genes/transcripts) ---------------------------------------- 148 | h2("Feature metadata") 149 | ## Annotation priority: 150 | ## 1. AnnotationHub. 151 | ## - Requires `organism` to be declared. 152 | ## - Ensure that Ensembl release and genome build match. 153 | ## 2. GTF/GFF file. Use the bcbio GTF if possible. 154 | ## 3. Fall back to slotting empty ranges. This is offered as support for 155 | ## complex datasets (e.g. multiple organisms). 156 | if (isString(organism) && is.numeric(ensemblRelease)) { 157 | ## AnnotationHub (ensembldb). 158 | alert("{.fun makeGRangesFromEnsembl}") 159 | rowRanges <- makeGRangesFromEnsembl( 160 | organism = organism, 161 | level = level, 162 | genomeBuild = genomeBuild, 163 | release = ensemblRelease 164 | ) 165 | } else { 166 | ## GTF/GFF file. 167 | if (is.null(gffFile)) { 168 | ## Attempt to use bcbio GTF automatically. 169 | gffFile <- getGtfFileFromYaml(yaml) 170 | } 171 | if (!is.null(gffFile)) { 172 | alert("{.fun makeGRangesFromGff}") 173 | gffFile <- realpath(gffFile) 174 | rowRanges <- makeGRangesFromGff(file = gffFile, level = level) 175 | } else { 176 | alertWarning("Slotting empty ranges into {.fun rowRanges}.") 177 | rowRanges <- emptyRanges(rownames(counts)) 178 | } 179 | } 180 | assert(is(rowRanges, "GenomicRanges")) 181 | ## Attempt to get genome build and Ensembl release if not declared. 182 | ## Note that these will remain NULL when using GTF file (see above). 183 | if (is.null(genomeBuild)) { 184 | genomeBuild <- metadata(rowRanges)[["genomeBuild"]] 185 | } 186 | if (is.null(ensemblRelease)) { 187 | ensemblRelease <- metadata(rowRanges)[["ensemblRelease"]] 188 | } 189 | ## Column data --------------------------------------------------------- 190 | h2("Column data") 191 | colData <- DataFrame(row.names = colnames(counts)) 192 | ## Generate automatic sample metadata, if necessary. 193 | if (is.null(sampleData)) { 194 | if (isTRUE(multiplexed)) { 195 | ## Multiplexed samples without user-defined metadata. 196 | alertWarning(sprintf( 197 | fmt = paste( 198 | "{.var %s} is recommended for", 199 | "multiplexed samples (e.g. {.val %s})." 200 | ), 201 | "sampleMetadataFile", umiType 202 | )) 203 | sampleData <- minimalSampleData(basename(sampleDirs)) 204 | } else { 205 | sampleData <- getSampleDataFromYaml(yaml) 206 | } 207 | } 208 | assert(isSubset(rownames(sampleData), names(sampleDirs))) 209 | ## Join `sampleData` into cell-level `colData`. 210 | if (identical(nrow(sampleData), 1L)) { 211 | colData[["sampleId"]] <- as.factor(rownames(sampleData)) 212 | } else { 213 | colData[["sampleId"]] <- mapCellsToSamples( 214 | cells = rownames(colData), 215 | samples = rownames(sampleData) 216 | ) 217 | } 218 | sampleData[["sampleId"]] <- as.factor(rownames(sampleData)) 219 | ## Need to ensure the `sampleId` factor levels match up, otherwise we'll 220 | ## get a warning during the `leftJoin()` call below. 221 | assert(areSetEqual( 222 | x = levels(colData[["sampleId"]]), 223 | y = levels(sampleData[["sampleId"]]) 224 | )) 225 | levels(sampleData[["sampleId"]]) <- levels(colData[["sampleId"]]) 226 | colData <- leftJoin(colData, sampleData, by = "sampleId") 227 | assert( 228 | is(colData, "DataFrame"), 229 | hasRownames(colData) 230 | ) 231 | ## Metadata ------------------------------------------------------------ 232 | h2("Metadata") 233 | cbList <- .importReads(sampleDirs = sampleDirs) 234 | runDate <- runDate(projectDir) 235 | interestingGroups <- camelCase(interestingGroups, strict = TRUE) 236 | assert(isSubset(interestingGroups, colnames(sampleData))) 237 | metadata <- list( 238 | "allSamples" = allSamples, 239 | "bcbioCommandsLog" = commandsLog, 240 | "bcbioLog" = log, 241 | "call" = standardizeCall(), 242 | "cellularBarcodeCutoff" = cutoff, 243 | "cellularBarcodes" = cbList, 244 | "dataVersions" = dataVersions, 245 | "ensemblRelease" = as.integer(ensemblRelease), 246 | "genomeBuild" = as.character(genomeBuild), 247 | "gffFile" = as.character(gffFile), 248 | "interestingGroups" = interestingGroups, 249 | "lanes" = lanes, 250 | "level" = level, 251 | "organism" = as.character(organism), 252 | "pipeline" = "bcbio", 253 | "programVersions" = programVersions, 254 | "projectDir" = projectDir, 255 | "runDate" = runDate, 256 | "sampleDirs" = sampleDirs, 257 | "sampleMetadataFile" = as.character(sampleMetadataFile), 258 | "umiType" = umiType, 259 | "uploadDir" = uploadDir, 260 | "version" = .pkgVersion, 261 | "yaml" = yaml 262 | ) 263 | ## SingleCellExperiment ------------------------------------------------ 264 | object <- makeSingleCellExperiment( 265 | assays = SimpleList("counts" = counts), 266 | rowRanges = rowRanges, 267 | colData = colData, 268 | metadata = metadata, 269 | transgeneNames = transgeneNames 270 | ) 271 | ## Return -------------------------------------------------------------- 272 | ## Always prefilter, removing very low quality cells and/or genes. 273 | object <- calculateMetrics(object = object, prefilter = TRUE) 274 | ## Bind the `nRead` column into the cell metrics. These are the number 275 | ## of raw read counts prior to UMI disambiguation that bcbio uses for 276 | ## initial filtering (`minimum_barcode_depth`` in YAML). 277 | colData <- colData(object) 278 | nRead <- .nRead(cbList) 279 | assert( 280 | is.integer(nRead), 281 | isSubset(rownames(colData), names(nRead)), 282 | areDisjointSets("nRead", colnames(colData)) 283 | ) 284 | colData[["nRead"]] <- unname(nRead[rownames(colData)]) 285 | colData <- colData[, sort(colnames(colData)), drop = FALSE] 286 | colData(object) <- colData 287 | bcb <- new(Class = "bcbioSingleCell", object) 288 | alertSuccess("bcbio single-cell RNA-seq run imported successfully.") 289 | bcb 290 | } 291 | -------------------------------------------------------------------------------- /R/plotReadsPerCell-methods.R: -------------------------------------------------------------------------------- 1 | #' @name plotReadsPerCell 2 | #' @author Michael Steinbaugh, Rory Kirchner 3 | #' @inherit AcidGenerics::plotReadsPerCell 4 | #' @note Updated 2023-12-04. 5 | #' 6 | #' @inheritParams AcidRoxygen::params 7 | #' @param ... Additional arguments. 8 | #' 9 | #' @param cutoffLine `logical(1)`. 10 | #' Include a line marking the cutoff. 11 | #' 12 | #' @examples 13 | #' data(bcb) 14 | #' 15 | #' ## bcbioSingleCell ==== 16 | #' plotReadsPerCell(bcb, geom = "histogram") 17 | #' plotReadsPerCell(bcb, geom = "ecdf") 18 | NULL 19 | 20 | 21 | 22 | #' Proportional cellular barcodes data 23 | #' 24 | #' Modified version of Allon Klein Lab MATLAB code. 25 | #' 26 | #' @author Michael Steinbaugh, Rory Kirchner 27 | #' @keywords internal 28 | #' @note Updated 2022-05-07. 29 | #' @noRd 30 | #' 31 | #' @param data `DataFrame`. 32 | #' Raw read counts per cellular barcode. 33 | #' Return from `.rawMetrics()` function. 34 | #' 35 | #' @return `DataFrame`. 36 | .proportionalReadsPerCell <- 37 | function(data, 38 | sampleData, 39 | breaks = 100L) { 40 | assert( 41 | requireNamespaces("graphics"), 42 | is(data, "DataFrame"), 43 | isSubset(c("nRead", "sampleId"), colnames(data)), 44 | is.integer(data[["nRead"]]), 45 | is.factor(data[["sampleId"]]), 46 | is(sampleData, "DataFrame"), 47 | isInt(breaks) 48 | ) 49 | sampleData[["sampleId"]] <- as.factor(rownames(sampleData)) 50 | samples <- levels(data[["sampleId"]]) 51 | list <- DataFrameList(lapply( 52 | X = samples, 53 | FUN = function(sampleId) { 54 | keep <- which(data[["sampleId"]] == sampleId) 55 | subset <- data[keep, , drop = FALSE] 56 | ## Histogram of log10-transformed counts. 57 | h <- graphics::hist( 58 | x = log10(subset[["nRead"]]), 59 | n = breaks, 60 | plot = FALSE 61 | ) 62 | ## Klein Lab MATLAB code reference. 63 | ## counts: fLog; mids: xLog 64 | proportion <- h[["counts"]] * 65 | (10L^h[["mids"]]) / 66 | sum(h[["counts"]] * (10L^h[["mids"]])) 67 | DataFrame( 68 | "sampleId" = factor(sampleId), 69 | "log10Read" = h[["mids"]], 70 | "proportion" = proportion 71 | ) 72 | } 73 | )) 74 | out <- unlist(list, recursive = FALSE, use.names = FALSE) 75 | out <- leftJoin(out, sampleData, by = "sampleId") 76 | out 77 | } 78 | 79 | 80 | 81 | #' Plot proportional reads per cell histogram 82 | #' 83 | #' @note Updated 2023-08-16. 84 | #' @noRd 85 | #' 86 | #' @param data Return from `.proportionalReadsPerCell()` function. 87 | #' 88 | #' @return `ggplot`. 89 | .plotReadsPerCellHistogram <- 90 | function(data, 91 | min = 0L) { 92 | assert(is(data, "DataFrame")) 93 | p <- ggplot( 94 | data = as.data.frame(data), 95 | mapping = aes( 96 | x = .data[["log10Read"]], 97 | y = .data[["proportion"]], 98 | color = .data[["interestingGroups"]] 99 | ) 100 | ) + 101 | geom_step( 102 | alpha = 0.75, 103 | linewidth = 1L 104 | ) + 105 | labs( 106 | x = "log10 reads per cell", 107 | y = "proportion of reads" 108 | ) 109 | ## Cutoff line. 110 | if (min > 0L) { 111 | p <- p + acid_geom_abline(xintercept = log10(min)) 112 | } 113 | ## Color palette. 114 | p <- p + acid_scale_color_discrete() 115 | ## Facets. 116 | facets <- NULL 117 | if (isSubset("aggregate", colnames(data))) { 118 | facets <- c(facets, "aggregate") 119 | } 120 | if (is.character(facets)) { 121 | p <- p + facet_wrap( 122 | facets = vars(!!!syms(facets)), 123 | scales = "free" 124 | ) 125 | } 126 | ## Return. 127 | p 128 | } 129 | 130 | 131 | 132 | ## Updated 2023-08-16. 133 | .plotReadsPerCellBoxplot <- 134 | function(data, 135 | min = 0L) { 136 | assert(is(data, "DataFrame")) 137 | p <- ggplot( 138 | data = as.data.frame(data), 139 | mapping = aes( 140 | x = .data[["sampleName"]], 141 | y = .data[["nRead"]], 142 | fill = .data[["interestingGroups"]] 143 | ) 144 | ) + 145 | geom_boxplot(color = "black", outlier.shape = NA) + 146 | scale_y_continuous(trans = "log10") + 147 | acid_geom_label_average( 148 | data = as.data.frame(data), 149 | col = "nRead", 150 | digits = 0L 151 | ) + 152 | labs( 153 | x = NULL, 154 | y = "reads per cell" 155 | ) 156 | ## Cutoff line. 157 | if (min > 0L) { 158 | p <- p + acid_geom_abline(yintercept = min) 159 | } 160 | ## Color palette. 161 | p <- p + acid_scale_fill_discrete() 162 | ## Facets. 163 | facets <- NULL 164 | if (isSubset("aggregate", colnames(data))) { 165 | facets <- c(facets, "aggregate") 166 | } 167 | if (is.character(facets)) { 168 | p <- p + facet_wrap( 169 | facets = vars(!!!syms(facets)), 170 | scales = "free" 171 | ) 172 | } 173 | ## Return. 174 | p 175 | } 176 | 177 | 178 | 179 | ## Updated 2023-08-16. 180 | .plotReadsPerCellEcdf <- 181 | function(data, 182 | min = 0L) { 183 | assert(is(data, "DataFrame")) 184 | p <- ggplot( 185 | data = as.data.frame(data), 186 | mapping = aes( 187 | x = .data[["nRead"]], 188 | color = .data[["interestingGroups"]] 189 | ) 190 | ) + 191 | stat_ecdf(geom = "step", linewidth = 1L) + 192 | labs( 193 | x = "reads per cell", 194 | y = "frequency" 195 | ) + 196 | scale_x_continuous(trans = "log10") 197 | ## Cutoff line. 198 | if (min > 0L) { 199 | p <- p + acid_geom_abline(xintercept = min) 200 | } 201 | ## Color palette. 202 | p <- p + acid_scale_color_discrete() 203 | ## Facets. 204 | facets <- NULL 205 | if (isSubset("aggregate", colnames(data))) { 206 | facets <- c(facets, "aggregate") 207 | } 208 | if (is.character(facets)) { 209 | p <- p + facet_wrap( 210 | facets = vars(!!!syms(facets)), 211 | scales = "free" 212 | ) 213 | } 214 | ## Return. 215 | p 216 | } 217 | 218 | 219 | 220 | ## Updated 2023-08-16. 221 | .plotReadsPerCellRidgeline <- 222 | function(data, 223 | min = 0L) { 224 | assert(is(data, "DataFrame")) 225 | p <- ggplot( 226 | data = as.data.frame(data), 227 | mapping = aes( 228 | x = .data[["nRead"]], 229 | y = .data[["sampleName"]], 230 | fill = .data[["interestingGroups"]] 231 | ) 232 | ) + 233 | geom_density_ridges( 234 | alpha = 0.75, 235 | color = "black", 236 | panel_scaling = TRUE, 237 | scale = 10L 238 | ) + 239 | scale_x_continuous(trans = "log10") + 240 | acid_geom_label_average( 241 | data = as.data.frame(data), 242 | col = "nRead", 243 | digits = 0L 244 | ) + 245 | labs( 246 | x = "reads per cell", 247 | y = NULL 248 | ) 249 | ## Cutoff line. 250 | if (min > 0L) { 251 | p <- p + acid_geom_abline(xintercept = min) 252 | } 253 | ## Color palette. 254 | p <- p + acid_scale_fill_discrete() 255 | ## Facets. 256 | facets <- NULL 257 | if (isSubset("aggregate", colnames(data))) { 258 | facets <- c(facets, "aggregate") 259 | } 260 | if (is.character(facets)) { 261 | p <- p + facet_wrap( 262 | facets = vars(!!!syms(facets)), 263 | scales = "free" 264 | ) 265 | } 266 | p 267 | } 268 | 269 | 270 | 271 | ## Updated 2023-08-16. 272 | .plotReadsPerCellViolin <- 273 | function(data, 274 | min = 0L) { 275 | assert(is(data, "DataFrame")) 276 | p <- ggplot( 277 | data = as.data.frame(data), 278 | mapping = aes( 279 | x = .data[["sampleName"]], 280 | y = .data[["nRead"]], 281 | fill = .data[["interestingGroups"]] 282 | ) 283 | ) + 284 | geom_violin( 285 | color = "black", 286 | scale = "count" 287 | ) + 288 | scale_y_continuous(trans = "log10") + 289 | acid_geom_label_average( 290 | data = as.data.frame(data), 291 | col = "nRead", 292 | digits = 0L 293 | ) + 294 | labs( 295 | x = NULL, 296 | y = "reads per cell" 297 | ) 298 | ## Cutoff line. 299 | if (min > 0L) { 300 | p <- p + acid_geom_abline(yintercept = min) 301 | } 302 | ## Color palette. 303 | p <- p + acid_scale_fill_discrete() 304 | ## Facets. 305 | facets <- NULL 306 | if (isSubset("aggregate", colnames(data))) { 307 | facets <- c(facets, "aggregate") 308 | } 309 | if (is.character(facets)) { 310 | p <- p + facet_wrap( 311 | facets = vars(!!!syms(facets)), 312 | scales = "free" 313 | ) 314 | } 315 | ## Return. 316 | p 317 | } 318 | 319 | 320 | 321 | ## Updated 2023-08-16. 322 | `plotReadsPerCell,bcbioSingleCell` <- # nolint 323 | function(object, 324 | interestingGroups = NULL, 325 | geom, 326 | cutoffLine = FALSE, 327 | title = "Reads per cell") { 328 | validObject(object) 329 | assert(isString(title, nullOk = TRUE)) 330 | interestingGroups(object) <- 331 | matchInterestingGroups(object, interestingGroups) 332 | interestingGroups <- interestingGroups(object) 333 | geom <- match.arg(geom) 334 | ## Minimum reads per barcode cutoff (for unfiltered data). 335 | if (!is.null(metadata(object)[["filterCells"]])) { 336 | min <- 0L 337 | subtitle <- NULL 338 | } else { 339 | cutoff <- metadata(object)[["cellularBarcodeCutoff"]] 340 | subtitle <- paste("cutoff", cutoff, sep = " = ") 341 | if (isTRUE(cutoffLine)) { 342 | min <- cutoff 343 | } else { 344 | min <- 0L 345 | } 346 | } 347 | assert(isInt(min)) 348 | ## This step will intentionally error for filtered objects. 349 | data <- .rawMetrics(object) 350 | p <- switch( 351 | EXPR = geom, 352 | boxplot = do.call( 353 | what = .plotReadsPerCellBoxplot, 354 | args = list( 355 | "data" = data, 356 | "min" = min 357 | ) 358 | ), 359 | ecdf = do.call( 360 | what = .plotReadsPerCellEcdf, 361 | args = list( 362 | "data" = data, 363 | "min" = min 364 | ) 365 | ), 366 | histogram = { 367 | data <- do.call( 368 | what = .proportionalReadsPerCell, 369 | args = list( 370 | "data" = data, 371 | "sampleData" = sampleData(object) 372 | ) 373 | ) 374 | do.call( 375 | what = .plotReadsPerCellHistogram, 376 | args = list( 377 | "data" = data, 378 | "min" = min 379 | ) 380 | ) 381 | }, 382 | ridgeline = do.call( 383 | what = .plotReadsPerCellRidgeline, 384 | args = list( 385 | "data" = data, 386 | "min" = min 387 | ) 388 | ), 389 | violin = do.call( 390 | what = .plotReadsPerCellViolin, 391 | args = list( 392 | "data" = data, 393 | "min" = min 394 | ) 395 | ) 396 | ) 397 | ## Add title and subtitle containing cutoff information. 398 | p <- p + 399 | labs( 400 | title = title, 401 | subtitle = subtitle, 402 | color = paste(interestingGroups, collapse = ":\n"), 403 | fill = paste(interestingGroups, collapse = ":\n") 404 | ) 405 | ## Return. 406 | p 407 | } 408 | 409 | formals(`plotReadsPerCell,bcbioSingleCell`)[["geom"]] <- # nolint 410 | .geom 411 | 412 | 413 | 414 | #' @rdname plotReadsPerCell 415 | #' @export 416 | setMethod( 417 | f = "plotReadsPerCell", 418 | signature = signature(object = "bcbioSingleCell"), 419 | definition = `plotReadsPerCell,bcbioSingleCell` 420 | ) 421 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for 11 | software and other kinds of works, specifically designed to ensure 12 | cooperation with the community in the case of network server software. 13 | 14 | The licenses for most software and other practical works are designed 15 | to take away your freedom to share and change the works. By contrast, 16 | our General Public Licenses are intended to guarantee your freedom to 17 | share and change all versions of a program--to make sure it remains free 18 | software for all its users. 19 | 20 | When we speak of free software, we are referring to freedom, not 21 | price. Our General Public Licenses are designed to make sure that you 22 | have the freedom to distribute copies of free software (and charge for 23 | them if you wish), that you receive source code or can get it if you 24 | want it, that you can change the software or use pieces of it in new 25 | free programs, and that you know you can do these things. 26 | 27 | Developers that use our General Public Licenses protect your rights 28 | with two steps: (1) assert copyright on the software, and (2) offer 29 | you this License which gives you legal permission to copy, distribute 30 | and/or modify the software. 31 | 32 | A secondary benefit of defending all users' freedom is that 33 | improvements made in alternate versions of the program, if they 34 | receive widespread use, become available for other developers to 35 | incorporate. Many developers of free software are heartened and 36 | encouraged by the resulting cooperation. However, in the case of 37 | software used on network servers, this result may fail to come about. 38 | The GNU General Public License permits making a modified version and 39 | letting the public access it on a server without ever releasing its 40 | source code to the public. 41 | 42 | The GNU Affero General Public License is designed specifically to 43 | ensure that, in such cases, the modified source code becomes available 44 | to the community. It requires the operator of a network server to 45 | provide the source code of the modified version running there to the 46 | users of that server. Therefore, public use of a modified version, on 47 | a publicly accessible server, gives the public access to the source 48 | code of the modified version. 49 | 50 | An older license, called the Affero General Public License and 51 | published by Affero, was designed to accomplish similar goals. This is 52 | a different license, not a version of the Affero GPL, but Affero has 53 | released a new version of the Affero GPL which permits relicensing under 54 | this license. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | TERMS AND CONDITIONS 60 | 61 | 0. Definitions. 62 | 63 | "This License" refers to version 3 of the GNU Affero General Public License. 64 | 65 | "Copyright" also means copyright-like laws that apply to other kinds of 66 | works, such as semiconductor masks. 67 | 68 | "The Program" refers to any copyrightable work licensed under this 69 | License. Each licensee is addressed as "you". "Licensees" and 70 | "recipients" may be individuals or organizations. 71 | 72 | To "modify" a work means to copy from or adapt all or part of the work 73 | in a fashion requiring copyright permission, other than the making of an 74 | exact copy. The resulting work is called a "modified version" of the 75 | earlier work or a work "based on" the earlier work. 76 | 77 | A "covered work" means either the unmodified Program or a work based 78 | on the Program. 79 | 80 | To "propagate" a work means to do anything with it that, without 81 | permission, would make you directly or secondarily liable for 82 | infringement under applicable copyright law, except executing it on a 83 | computer or modifying a private copy. Propagation includes copying, 84 | distribution (with or without modification), making available to the 85 | public, and in some countries other activities as well. 86 | 87 | To "convey" a work means any kind of propagation that enables other 88 | parties to make or receive copies. Mere interaction with a user through 89 | a computer network, with no transfer of a copy, is not conveying. 90 | 91 | An interactive user interface displays "Appropriate Legal Notices" 92 | to the extent that it includes a convenient and prominently visible 93 | feature that (1) displays an appropriate copyright notice, and (2) 94 | tells the user that there is no warranty for the work (except to the 95 | extent that warranties are provided), that licensees may convey the 96 | work under this License, and how to view a copy of this License. If 97 | the interface presents a list of user commands or options, such as a 98 | menu, a prominent item in the list meets this criterion. 99 | 100 | 1. Source Code. 101 | 102 | The "source code" for a work means the preferred form of the work 103 | for making modifications to it. "Object code" means any non-source 104 | form of a work. 105 | 106 | A "Standard Interface" means an interface that either is an official 107 | standard defined by a recognized standards body, or, in the case of 108 | interfaces specified for a particular programming language, one that 109 | is widely used among developers working in that language. 110 | 111 | The "System Libraries" of an executable work include anything, other 112 | than the work as a whole, that (a) is included in the normal form of 113 | packaging a Major Component, but which is not part of that Major 114 | Component, and (b) serves only to enable use of the work with that 115 | Major Component, or to implement a Standard Interface for which an 116 | implementation is available to the public in source code form. A 117 | "Major Component", in this context, means a major essential component 118 | (kernel, window system, and so on) of the specific operating system 119 | (if any) on which the executable work runs, or a compiler used to 120 | produce the work, or an object code interpreter used to run it. 121 | 122 | The "Corresponding Source" for a work in object code form means all 123 | the source code needed to generate, install, and (for an executable 124 | work) run the object code and to modify the work, including scripts to 125 | control those activities. However, it does not include the work's 126 | System Libraries, or general-purpose tools or generally available free 127 | programs which are used unmodified in performing those activities but 128 | which are not part of the work. For example, Corresponding Source 129 | includes interface definition files associated with source files for 130 | the work, and the source code for shared libraries and dynamically 131 | linked subprograms that the work is specifically designed to require, 132 | such as by intimate data communication or control flow between those 133 | subprograms and other parts of the work. 134 | 135 | The Corresponding Source need not include anything that users 136 | can regenerate automatically from other parts of the Corresponding 137 | Source. 138 | 139 | The Corresponding Source for a work in source code form is that 140 | same work. 141 | 142 | 2. Basic Permissions. 143 | 144 | All rights granted under this License are granted for the term of 145 | copyright on the Program, and are irrevocable provided the stated 146 | conditions are met. This License explicitly affirms your unlimited 147 | permission to run the unmodified Program. The output from running a 148 | covered work is covered by this License only if the output, given its 149 | content, constitutes a covered work. This License acknowledges your 150 | rights of fair use or other equivalent, as provided by copyright law. 151 | 152 | You may make, run and propagate covered works that you do not 153 | convey, without conditions so long as your license otherwise remains 154 | in force. You may convey covered works to others for the sole purpose 155 | of having them make modifications exclusively for you, or provide you 156 | with facilities for running those works, provided that you comply with 157 | the terms of this License in conveying all material for which you do 158 | not control copyright. Those thus making or running the covered works 159 | for you must do so exclusively on your behalf, under your direction 160 | and control, on terms that prohibit them from making any copies of 161 | your copyrighted material outside their relationship with you. 162 | 163 | Conveying under any other circumstances is permitted solely under 164 | the conditions stated below. Sublicensing is not allowed; section 10 165 | makes it unnecessary. 166 | 167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 168 | 169 | No covered work shall be deemed part of an effective technological 170 | measure under any applicable law fulfilling obligations under article 171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 172 | similar laws prohibiting or restricting circumvention of such 173 | measures. 174 | 175 | When you convey a covered work, you waive any legal power to forbid 176 | circumvention of technological measures to the extent such circumvention 177 | is effected by exercising rights under this License with respect to 178 | the covered work, and you disclaim any intention to limit operation or 179 | modification of the work as a means of enforcing, against the work's 180 | users, your or third parties' legal rights to forbid circumvention of 181 | technological measures. 182 | 183 | 4. Conveying Verbatim Copies. 184 | 185 | You may convey verbatim copies of the Program's source code as you 186 | receive it, in any medium, provided that you conspicuously and 187 | appropriately publish on each copy an appropriate copyright notice; 188 | keep intact all notices stating that this License and any 189 | non-permissive terms added in accord with section 7 apply to the code; 190 | keep intact all notices of the absence of any warranty; and give all 191 | recipients a copy of this License along with the Program. 192 | 193 | You may charge any price or no price for each copy that you convey, 194 | and you may offer support or warranty protection for a fee. 195 | 196 | 5. Conveying Modified Source Versions. 197 | 198 | You may convey a work based on the Program, or the modifications to 199 | produce it from the Program, in the form of source code under the 200 | terms of section 4, provided that you also meet all of these conditions: 201 | 202 | a) The work must carry prominent notices stating that you modified 203 | it, and giving a relevant date. 204 | 205 | b) The work must carry prominent notices stating that it is 206 | released under this License and any conditions added under section 207 | 7. This requirement modifies the requirement in section 4 to 208 | "keep intact all notices". 209 | 210 | c) You must license the entire work, as a whole, under this 211 | License to anyone who comes into possession of a copy. This 212 | License will therefore apply, along with any applicable section 7 213 | additional terms, to the whole of the work, and all its parts, 214 | regardless of how they are packaged. This License gives no 215 | permission to license the work in any other way, but it does not 216 | invalidate such permission if you have separately received it. 217 | 218 | d) If the work has interactive user interfaces, each must display 219 | Appropriate Legal Notices; however, if the Program has interactive 220 | interfaces that do not display Appropriate Legal Notices, your 221 | work need not make them do so. 222 | 223 | A compilation of a covered work with other separate and independent 224 | works, which are not by their nature extensions of the covered work, 225 | and which are not combined with it such as to form a larger program, 226 | in or on a volume of a storage or distribution medium, is called an 227 | "aggregate" if the compilation and its resulting copyright are not 228 | used to limit the access or legal rights of the compilation's users 229 | beyond what the individual works permit. Inclusion of a covered work 230 | in an aggregate does not cause this License to apply to the other 231 | parts of the aggregate. 232 | 233 | 6. Conveying Non-Source Forms. 234 | 235 | You may convey a covered work in object code form under the terms 236 | of sections 4 and 5, provided that you also convey the 237 | machine-readable Corresponding Source under the terms of this License, 238 | in one of these ways: 239 | 240 | a) Convey the object code in, or embodied in, a physical product 241 | (including a physical distribution medium), accompanied by the 242 | Corresponding Source fixed on a durable physical medium 243 | customarily used for software interchange. 244 | 245 | b) Convey the object code in, or embodied in, a physical product 246 | (including a physical distribution medium), accompanied by a 247 | written offer, valid for at least three years and valid for as 248 | long as you offer spare parts or customer support for that product 249 | model, to give anyone who possesses the object code either (1) a 250 | copy of the Corresponding Source for all the software in the 251 | product that is covered by this License, on a durable physical 252 | medium customarily used for software interchange, for a price no 253 | more than your reasonable cost of physically performing this 254 | conveying of source, or (2) access to copy the 255 | Corresponding Source from a network server at no charge. 256 | 257 | c) Convey individual copies of the object code with a copy of the 258 | written offer to provide the Corresponding Source. This 259 | alternative is allowed only occasionally and noncommercially, and 260 | only if you received the object code with such an offer, in accord 261 | with subsection 6b. 262 | 263 | d) Convey the object code by offering access from a designated 264 | place (gratis or for a charge), and offer equivalent access to the 265 | Corresponding Source in the same way through the same place at no 266 | further charge. You need not require recipients to copy the 267 | Corresponding Source along with the object code. If the place to 268 | copy the object code is a network server, the Corresponding Source 269 | may be on a different server (operated by you or a third party) 270 | that supports equivalent copying facilities, provided you maintain 271 | clear directions next to the object code saying where to find the 272 | Corresponding Source. Regardless of what server hosts the 273 | Corresponding Source, you remain obligated to ensure that it is 274 | available for as long as needed to satisfy these requirements. 275 | 276 | e) Convey the object code using peer-to-peer transmission, provided 277 | you inform other peers where the object code and Corresponding 278 | Source of the work are being offered to the general public at no 279 | charge under subsection 6d. 280 | 281 | A separable portion of the object code, whose source code is excluded 282 | from the Corresponding Source as a System Library, need not be 283 | included in conveying the object code work. 284 | 285 | A "User Product" is either (1) a "consumer product", which means any 286 | tangible personal property which is normally used for personal, family, 287 | or household purposes, or (2) anything designed or sold for incorporation 288 | into a dwelling. In determining whether a product is a consumer product, 289 | doubtful cases shall be resolved in favor of coverage. For a particular 290 | product received by a particular user, "normally used" refers to a 291 | typical or common use of that class of product, regardless of the status 292 | of the particular user or of the way in which the particular user 293 | actually uses, or expects or is expected to use, the product. A product 294 | is a consumer product regardless of whether the product has substantial 295 | commercial, industrial or non-consumer uses, unless such uses represent 296 | the only significant mode of use of the product. 297 | 298 | "Installation Information" for a User Product means any methods, 299 | procedures, authorization keys, or other information required to install 300 | and execute modified versions of a covered work in that User Product from 301 | a modified version of its Corresponding Source. The information must 302 | suffice to ensure that the continued functioning of the modified object 303 | code is in no case prevented or interfered with solely because 304 | modification has been made. 305 | 306 | If you convey an object code work under this section in, or with, or 307 | specifically for use in, a User Product, and the conveying occurs as 308 | part of a transaction in which the right of possession and use of the 309 | User Product is transferred to the recipient in perpetuity or for a 310 | fixed term (regardless of how the transaction is characterized), the 311 | Corresponding Source conveyed under this section must be accompanied 312 | by the Installation Information. But this requirement does not apply 313 | if neither you nor any third party retains the ability to install 314 | modified object code on the User Product (for example, the work has 315 | been installed in ROM). 316 | 317 | The requirement to provide Installation Information does not include a 318 | requirement to continue to provide support service, warranty, or updates 319 | for a work that has been modified or installed by the recipient, or for 320 | the User Product in which it has been modified or installed. Access to a 321 | network may be denied when the modification itself materially and 322 | adversely affects the operation of the network or violates the rules and 323 | protocols for communication across the network. 324 | 325 | Corresponding Source conveyed, and Installation Information provided, 326 | in accord with this section must be in a format that is publicly 327 | documented (and with an implementation available to the public in 328 | source code form), and must require no special password or key for 329 | unpacking, reading or copying. 330 | 331 | 7. Additional Terms. 332 | 333 | "Additional permissions" are terms that supplement the terms of this 334 | License by making exceptions from one or more of its conditions. 335 | Additional permissions that are applicable to the entire Program shall 336 | be treated as though they were included in this License, to the extent 337 | that they are valid under applicable law. If additional permissions 338 | apply only to part of the Program, that part may be used separately 339 | under those permissions, but the entire Program remains governed by 340 | this License without regard to the additional permissions. 341 | 342 | When you convey a copy of a covered work, you may at your option 343 | remove any additional permissions from that copy, or from any part of 344 | it. (Additional permissions may be written to require their own 345 | removal in certain cases when you modify the work.) You may place 346 | additional permissions on material, added by you to a covered work, 347 | for which you have or can give appropriate copyright permission. 348 | 349 | Notwithstanding any other provision of this License, for material you 350 | add to a covered work, you may (if authorized by the copyright holders of 351 | that material) supplement the terms of this License with terms: 352 | 353 | a) Disclaiming warranty or limiting liability differently from the 354 | terms of sections 15 and 16 of this License; or 355 | 356 | b) Requiring preservation of specified reasonable legal notices or 357 | author attributions in that material or in the Appropriate Legal 358 | Notices displayed by works containing it; or 359 | 360 | c) Prohibiting misrepresentation of the origin of that material, or 361 | requiring that modified versions of such material be marked in 362 | reasonable ways as different from the original version; or 363 | 364 | d) Limiting the use for publicity purposes of names of licensors or 365 | authors of the material; or 366 | 367 | e) Declining to grant rights under trademark law for use of some 368 | trade names, trademarks, or service marks; or 369 | 370 | f) Requiring indemnification of licensors and authors of that 371 | material by anyone who conveys the material (or modified versions of 372 | it) with contractual assumptions of liability to the recipient, for 373 | any liability that these contractual assumptions directly impose on 374 | those licensors and authors. 375 | 376 | All other non-permissive additional terms are considered "further 377 | restrictions" within the meaning of section 10. If the Program as you 378 | received it, or any part of it, contains a notice stating that it is 379 | governed by this License along with a term that is a further 380 | restriction, you may remove that term. If a license document contains 381 | a further restriction but permits relicensing or conveying under this 382 | License, you may add to a covered work material governed by the terms 383 | of that license document, provided that the further restriction does 384 | not survive such relicensing or conveying. 385 | 386 | If you add terms to a covered work in accord with this section, you 387 | must place, in the relevant source files, a statement of the 388 | additional terms that apply to those files, or a notice indicating 389 | where to find the applicable terms. 390 | 391 | Additional terms, permissive or non-permissive, may be stated in the 392 | form of a separately written license, or stated as exceptions; 393 | the above requirements apply either way. 394 | 395 | 8. Termination. 396 | 397 | You may not propagate or modify a covered work except as expressly 398 | provided under this License. Any attempt otherwise to propagate or 399 | modify it is void, and will automatically terminate your rights under 400 | this License (including any patent licenses granted under the third 401 | paragraph of section 11). 402 | 403 | However, if you cease all violation of this License, then your 404 | license from a particular copyright holder is reinstated (a) 405 | provisionally, unless and until the copyright holder explicitly and 406 | finally terminates your license, and (b) permanently, if the copyright 407 | holder fails to notify you of the violation by some reasonable means 408 | prior to 60 days after the cessation. 409 | 410 | Moreover, your license from a particular copyright holder is 411 | reinstated permanently if the copyright holder notifies you of the 412 | violation by some reasonable means, this is the first time you have 413 | received notice of violation of this License (for any work) from that 414 | copyright holder, and you cure the violation prior to 30 days after 415 | your receipt of the notice. 416 | 417 | Termination of your rights under this section does not terminate the 418 | licenses of parties who have received copies or rights from you under 419 | this License. If your rights have been terminated and not permanently 420 | reinstated, you do not qualify to receive new licenses for the same 421 | material under section 10. 422 | 423 | 9. Acceptance Not Required for Having Copies. 424 | 425 | You are not required to accept this License in order to receive or 426 | run a copy of the Program. Ancillary propagation of a covered work 427 | occurring solely as a consequence of using peer-to-peer transmission 428 | to receive a copy likewise does not require acceptance. However, 429 | nothing other than this License grants you permission to propagate or 430 | modify any covered work. These actions infringe copyright if you do 431 | not accept this License. Therefore, by modifying or propagating a 432 | covered work, you indicate your acceptance of this License to do so. 433 | 434 | 10. Automatic Licensing of Downstream Recipients. 435 | 436 | Each time you convey a covered work, the recipient automatically 437 | receives a license from the original licensors, to run, modify and 438 | propagate that work, subject to this License. You are not responsible 439 | for enforcing compliance by third parties with this License. 440 | 441 | An "entity transaction" is a transaction transferring control of an 442 | organization, or substantially all assets of one, or subdividing an 443 | organization, or merging organizations. If propagation of a covered 444 | work results from an entity transaction, each party to that 445 | transaction who receives a copy of the work also receives whatever 446 | licenses to the work the party's predecessor in interest had or could 447 | give under the previous paragraph, plus a right to possession of the 448 | Corresponding Source of the work from the predecessor in interest, if 449 | the predecessor has it or can get it with reasonable efforts. 450 | 451 | You may not impose any further restrictions on the exercise of the 452 | rights granted or affirmed under this License. For example, you may 453 | not impose a license fee, royalty, or other charge for exercise of 454 | rights granted under this License, and you may not initiate litigation 455 | (including a cross-claim or counterclaim in a lawsuit) alleging that 456 | any patent claim is infringed by making, using, selling, offering for 457 | sale, or importing the Program or any portion of it. 458 | 459 | 11. Patents. 460 | 461 | A "contributor" is a copyright holder who authorizes use under this 462 | License of the Program or a work on which the Program is based. The 463 | work thus licensed is called the contributor's "contributor version". 464 | 465 | A contributor's "essential patent claims" are all patent claims 466 | owned or controlled by the contributor, whether already acquired or 467 | hereafter acquired, that would be infringed by some manner, permitted 468 | by this License, of making, using, or selling its contributor version, 469 | but do not include claims that would be infringed only as a 470 | consequence of further modification of the contributor version. For 471 | purposes of this definition, "control" includes the right to grant 472 | patent sublicenses in a manner consistent with the requirements of 473 | this License. 474 | 475 | Each contributor grants you a non-exclusive, worldwide, royalty-free 476 | patent license under the contributor's essential patent claims, to 477 | make, use, sell, offer for sale, import and otherwise run, modify and 478 | propagate the contents of its contributor version. 479 | 480 | In the following three paragraphs, a "patent license" is any express 481 | agreement or commitment, however denominated, not to enforce a patent 482 | (such as an express permission to practice a patent or covenant not to 483 | sue for patent infringement). To "grant" such a patent license to a 484 | party means to make such an agreement or commitment not to enforce a 485 | patent against the party. 486 | 487 | If you convey a covered work, knowingly relying on a patent license, 488 | and the Corresponding Source of the work is not available for anyone 489 | to copy, free of charge and under the terms of this License, through a 490 | publicly available network server or other readily accessible means, 491 | then you must either (1) cause the Corresponding Source to be so 492 | available, or (2) arrange to deprive yourself of the benefit of the 493 | patent license for this particular work, or (3) arrange, in a manner 494 | consistent with the requirements of this License, to extend the patent 495 | license to downstream recipients. "Knowingly relying" means you have 496 | actual knowledge that, but for the patent license, your conveying the 497 | covered work in a country, or your recipient's use of the covered work 498 | in a country, would infringe one or more identifiable patents in that 499 | country that you have reason to believe are valid. 500 | 501 | If, pursuant to or in connection with a single transaction or 502 | arrangement, you convey, or propagate by procuring conveyance of, a 503 | covered work, and grant a patent license to some of the parties 504 | receiving the covered work authorizing them to use, propagate, modify 505 | or convey a specific copy of the covered work, then the patent license 506 | you grant is automatically extended to all recipients of the covered 507 | work and works based on it. 508 | 509 | A patent license is "discriminatory" if it does not include within 510 | the scope of its coverage, prohibits the exercise of, or is 511 | conditioned on the non-exercise of one or more of the rights that are 512 | specifically granted under this License. You may not convey a covered 513 | work if you are a party to an arrangement with a third party that is 514 | in the business of distributing software, under which you make payment 515 | to the third party based on the extent of your activity of conveying 516 | the work, and under which the third party grants, to any of the 517 | parties who would receive the covered work from you, a discriminatory 518 | patent license (a) in connection with copies of the covered work 519 | conveyed by you (or copies made from those copies), or (b) primarily 520 | for and in connection with specific products or compilations that 521 | contain the covered work, unless you entered into that arrangement, 522 | or that patent license was granted, prior to 28 March 2007. 523 | 524 | Nothing in this License shall be construed as excluding or limiting 525 | any implied license or other defenses to infringement that may 526 | otherwise be available to you under applicable patent law. 527 | 528 | 12. No Surrender of Others' Freedom. 529 | 530 | If conditions are imposed on you (whether by court order, agreement or 531 | otherwise) that contradict the conditions of this License, they do not 532 | excuse you from the conditions of this License. If you cannot convey a 533 | covered work so as to satisfy simultaneously your obligations under this 534 | License and any other pertinent obligations, then as a consequence you may 535 | not convey it at all. For example, if you agree to terms that obligate you 536 | to collect a royalty for further conveying from those to whom you convey 537 | the Program, the only way you could satisfy both those terms and this 538 | License would be to refrain entirely from conveying the Program. 539 | 540 | 13. Remote Network Interaction; Use with the GNU General Public License. 541 | 542 | Notwithstanding any other provision of this License, if you modify the 543 | Program, your modified version must prominently offer all users 544 | interacting with it remotely through a computer network (if your version 545 | supports such interaction) an opportunity to receive the Corresponding 546 | Source of your version by providing access to the Corresponding Source 547 | from a network server at no charge, through some standard or customary 548 | means of facilitating copying of software. This Corresponding Source 549 | shall include the Corresponding Source for any work covered by version 3 550 | of the GNU General Public License that is incorporated pursuant to the 551 | following paragraph. 552 | 553 | Notwithstanding any other provision of this License, you have 554 | permission to link or combine any covered work with a work licensed 555 | under version 3 of the GNU General Public License into a single 556 | combined work, and to convey the resulting work. The terms of this 557 | License will continue to apply to the part which is the covered work, 558 | but the work with which it is combined will remain governed by version 559 | 3 of the GNU General Public License. 560 | 561 | 14. Revised Versions of this License. 562 | 563 | The Free Software Foundation may publish revised and/or new versions of 564 | the GNU Affero General Public License from time to time. Such new versions 565 | will be similar in spirit to the present version, but may differ in detail to 566 | address new problems or concerns. 567 | 568 | Each version is given a distinguishing version number. If the 569 | Program specifies that a certain numbered version of the GNU Affero General 570 | Public License "or any later version" applies to it, you have the 571 | option of following the terms and conditions either of that numbered 572 | version or of any later version published by the Free Software 573 | Foundation. If the Program does not specify a version number of the 574 | GNU Affero General Public License, you may choose any version ever published 575 | by the Free Software Foundation. 576 | 577 | If the Program specifies that a proxy can decide which future 578 | versions of the GNU Affero General Public License can be used, that proxy's 579 | public statement of acceptance of a version permanently authorizes you 580 | to choose that version for the Program. 581 | 582 | Later license versions may give you additional or different 583 | permissions. However, no additional obligations are imposed on any 584 | author or copyright holder as a result of your choosing to follow a 585 | later version. 586 | 587 | 15. Disclaimer of Warranty. 588 | 589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 597 | 598 | 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 608 | SUCH DAMAGES. 609 | 610 | 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these terms. 626 | 627 | To do so, attach the following notices to the program. It is safest 628 | to attach them to the start of each source file to most effectively 629 | state the exclusion of warranty; and each file should have at least 630 | the "copyright" line and a pointer to where the full notice is found. 631 | 632 | 633 | Copyright (C) 634 | 635 | This program is free software: you can redistribute it and/or modify 636 | it under the terms of the GNU Affero General Public License as published by 637 | the Free Software Foundation, either version 3 of the License, or 638 | (at your option) any later version. 639 | 640 | This program is distributed in the hope that it will be useful, 641 | but WITHOUT ANY WARRANTY; without even the implied warranty of 642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 643 | GNU Affero General Public License for more details. 644 | 645 | You should have received a copy of the GNU Affero General Public License 646 | along with this program. If not, see . 647 | 648 | Also add information on how to contact you by electronic and paper mail. 649 | 650 | If your software can interact with users remotely through a computer 651 | network, you should also make sure that it provides a way for users to 652 | get its source. For example, if your program is a web application, its 653 | interface could display a "Source" link that leads users to an archive 654 | of the code. There are many ways you could offer source, and different 655 | solutions will be better for different programs; see section 13 for the 656 | specific requirements. 657 | 658 | You should also get your employer (if you work as a programmer) or school, 659 | if any, to sign a "copyright disclaimer" for the program, if necessary. 660 | For more information on this, and how to apply and follow the GNU AGPL, see 661 | . 662 | --------------------------------------------------------------------------------