├── data
├── example_sce.rda
├── holik_data.rda
├── example_marker_mat.rda
├── example_TME_markers.rda
└── example_cellassign_fit.rda
├── docs
├── pkgdown.yml
├── articles
│ ├── cellassign_overview.png
│ ├── introduction-to-cellassign_files
│ │ └── figure-html
│ │ │ └── unnamed-chunk-16-1.png
│ ├── constructing-markers-from-purified-data_files
│ │ └── figure-html
│ │ │ └── unnamed-chunk-17-1.png
│ └── index.html
├── link.svg
├── docsearch.js
├── pkgdown.js
├── 404.html
├── authors.html
├── pkgdown.css
├── reference
│ ├── dot-onLoad.html
│ ├── initialize_X.html
│ ├── get_mle_cell_type.html
│ ├── extract_expression_matrix.html
│ ├── holik_data.html
│ ├── example_TME_markers.html
│ ├── example_sce.html
│ ├── example_cellassign_fit.html
│ ├── example_marker_mat.html
│ ├── print.cellassign.html
│ ├── inference_tensorflow.html
│ ├── index.html
│ ├── simulate_cellassign.html
│ └── marker_list_to_mat.html
└── docsearch.css
├── tests
├── testthat.R
└── testthat
│ └── test_cellassign.R
├── inst
└── cellassign_schematic.png
├── .Rbuildignore
├── vignettes
├── cellassign_overview.png
├── constructing-markers-from-purified-data.Rmd
└── introduction-to-cellassign.Rmd
├── .gitignore
├── man
├── dot-onLoad.Rd
├── initialize_X.Rd
├── get_mle_cell_type.Rd
├── holik_data.Rd
├── extract_expression_matrix.Rd
├── example_TME_markers.Rd
├── example_sce.Rd
├── example_marker_mat.Rd
├── example_cellassign_fit.Rd
├── print.cellassign.Rd
├── mleparams.Rd
├── cellprobs.Rd
├── celltypes.Rd
├── inference_tensorflow.Rd
├── marker_list_to_mat.Rd
├── simulate_cellassign.Rd
└── cellassign.Rd
├── NAMESPACE
├── .travis.yml
├── DESCRIPTION
├── CODE_OF_CONDUCT.md
├── R
├── simulate.R
├── utils.R
└── inference-tensorflow.R
├── README.md
└── LICENSE.md
/data/example_sce.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/data/example_sce.rda
--------------------------------------------------------------------------------
/data/holik_data.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/data/holik_data.rda
--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
1 | pandoc: 2.3.1
2 | pkgdown: 1.4.1
3 | pkgdown_sha: ~
4 | articles: []
5 |
6 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(cellassign)
3 |
4 | test_check("cellassign")
5 |
--------------------------------------------------------------------------------
/data/example_marker_mat.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/data/example_marker_mat.rda
--------------------------------------------------------------------------------
/data/example_TME_markers.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/data/example_TME_markers.rda
--------------------------------------------------------------------------------
/inst/cellassign_schematic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/inst/cellassign_schematic.png
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^Meta$
2 | ^doc$
3 | ^cellassign\.Rproj$
4 | ^\.Rproj\.user$
5 | ^LICENSE\.md$
6 | ^CODE_OF_CONDUCT\.md$
7 |
--------------------------------------------------------------------------------
/data/example_cellassign_fit.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/data/example_cellassign_fit.rda
--------------------------------------------------------------------------------
/vignettes/cellassign_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/vignettes/cellassign_overview.png
--------------------------------------------------------------------------------
/docs/articles/cellassign_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/docs/articles/cellassign_overview.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | Meta
2 | doc
3 | .Rproj.user
4 | .Rhistory
5 | .RData
6 | Rmd/
7 | inst/doc
8 | data/hodgkin_500.rds
9 | inst/*.ai
10 | inst/*.pdf
11 |
--------------------------------------------------------------------------------
/docs/articles/introduction-to-cellassign_files/figure-html/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/docs/articles/introduction-to-cellassign_files/figure-html/unnamed-chunk-16-1.png
--------------------------------------------------------------------------------
/docs/articles/constructing-markers-from-purified-data_files/figure-html/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/docs/articles/constructing-markers-from-purified-data_files/figure-html/unnamed-chunk-17-1.png
--------------------------------------------------------------------------------
/man/dot-onLoad.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{.onLoad}
4 | \alias{.onLoad}
5 | \title{Check for tensorflow}
6 | \usage{
7 | .onLoad(libname, pkgname)
8 | }
9 | \value{
10 | Installs tensorflow if not already installed
11 | }
12 | \description{
13 | Check for tensorflow
14 | }
15 | \keyword{internal}
16 |
--------------------------------------------------------------------------------
/man/initialize_X.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{initialize_X}
4 | \alias{initialize_X}
5 | \title{Create X matrix}
6 | \usage{
7 | initialize_X(X, N, verbose = FALSE)
8 | }
9 | \value{
10 | A cleaned covariate matrix given the input provided by the user
11 | }
12 | \description{
13 | Create X matrix
14 | }
15 | \keyword{internal}
16 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | S3method(cellprobs,cellassign)
4 | S3method(celltypes,cellassign)
5 | S3method(mleparams,cellassign)
6 | S3method(print,cellassign)
7 | export(cellassign)
8 | export(cellprobs)
9 | export(celltypes)
10 | export(marker_list_to_mat)
11 | export(mleparams)
12 | import(tensorflow)
13 | importFrom(SummarizedExperiment,assays)
14 | importFrom(methods,is)
15 | importFrom(stats,rnbinom)
16 | importFrom(stats,var)
17 |
--------------------------------------------------------------------------------
/man/get_mle_cell_type.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{get_mle_cell_type}
4 | \alias{get_mle_cell_type}
5 | \title{Get MLE estimates of type of each cell}
6 | \usage{
7 | get_mle_cell_type(gamma)
8 | }
9 | \value{
10 | A vector of MLE cell types, where the names are
11 | taken from the column names of the input matrix
12 | }
13 | \description{
14 | Get MLE estimates of type of each cell
15 | }
16 | \keyword{internal}
17 |
--------------------------------------------------------------------------------
/man/holik_data.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cellassign.R
3 | \docType{data}
4 | \name{holik_data}
5 | \alias{holik_data}
6 | \title{Example bulk RNA-seq data}
7 | \format{An object of class \code{list} of length 2.}
8 | \usage{
9 | holik_data
10 | }
11 | \description{
12 | An example bulk RNA-seq dataset from Holik et al. Nucleic Acids Research 2017 to
13 | demonstrate deriving marker genes
14 | }
15 | \examples{
16 | data(holik_data)
17 | }
18 | \keyword{datasets}
19 |
--------------------------------------------------------------------------------
/man/extract_expression_matrix.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{extract_expression_matrix}
4 | \alias{extract_expression_matrix}
5 | \title{Extract expression matrix from expression object}
6 | \usage{
7 | extract_expression_matrix(exprs_obj, sce_assay = "counts")
8 | }
9 | \value{
10 | The cleaned expression matrix (of counts) from whatever input to \code{cellassign}
11 | }
12 | \description{
13 | Extract expression matrix from expression object
14 | }
15 | \keyword{internal}
16 |
--------------------------------------------------------------------------------
/man/example_TME_markers.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cellassign.R
3 | \docType{data}
4 | \name{example_TME_markers}
5 | \alias{example_TME_markers}
6 | \title{Example tumour microevironment markers}
7 | \format{An object of class \code{list} of length 2.}
8 | \usage{
9 | example_TME_markers
10 | }
11 | \description{
12 | A set of example marker genes for commonly profiling the
13 | human tumour mircoenvironment
14 | }
15 | \examples{
16 | data(example_TME_markers)
17 | }
18 | \keyword{datasets}
19 |
--------------------------------------------------------------------------------
/man/example_sce.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cellassign.R
3 | \docType{data}
4 | \name{example_sce}
5 | \alias{example_sce}
6 | \title{Example SingleCellExperiment}
7 | \format{An object of class \code{SingleCellExperiment} with 200 rows and 500 columns.}
8 | \usage{
9 | example_sce
10 | }
11 | \description{
12 | An example \code{SingleCellExperiment} for 10 marker genes and 500 cells.
13 | }
14 | \examples{
15 | data(example_sce)
16 | }
17 | \seealso{
18 | example_cellassign_fit
19 | }
20 | \keyword{datasets}
21 |
--------------------------------------------------------------------------------
/man/example_marker_mat.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cellassign.R
3 | \docType{data}
4 | \name{example_marker_mat}
5 | \alias{example_marker_mat}
6 | \title{Example cell marker matrix}
7 | \format{An object of class \code{matrix} with 10 rows and 2 columns.}
8 | \usage{
9 | example_marker_mat
10 | }
11 | \description{
12 | An example matrix for 10 genes and 2 cell types showing the membership
13 | of marker genes to cell types
14 | }
15 | \examples{
16 | data(example_marker_mat)
17 | }
18 | \seealso{
19 | example_cellassign_fit
20 | }
21 | \keyword{datasets}
22 |
--------------------------------------------------------------------------------
/man/example_cellassign_fit.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cellassign.R
3 | \docType{data}
4 | \name{example_cellassign_fit}
5 | \alias{example_cellassign_fit}
6 | \title{Example cellassign fit}
7 | \format{An object of class \code{cellassign} of length 3.}
8 | \usage{
9 | example_cellassign_fit
10 | }
11 | \description{
12 | An example fit of calling \code{cellassign} on both
13 | \code{example_marker_mat} and \code{example_sce}
14 | }
15 | \examples{
16 | data(example_cellassign_fit)
17 | }
18 | \seealso{
19 | example_cellassign_fit
20 | }
21 | \keyword{datasets}
22 |
--------------------------------------------------------------------------------
/man/print.cellassign.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cellassign.R
3 | \name{print.cellassign}
4 | \alias{print.cellassign}
5 | \title{Print a \code{cellassign} fit}
6 | \usage{
7 | \method{print}{cellassign}(x, ...)
8 | }
9 | \arguments{
10 | \item{x}{An object of class \code{cellassign}}
11 |
12 | \item{...}{Additional arguments (unused)}
13 | }
14 | \value{
15 | Prints a structured representation of the \code{cellassign}
16 | }
17 | \description{
18 | Print a \code{cellassign} fit
19 | }
20 | \examples{
21 | data(example_cellassign_fit)
22 | print(example_cellassign_fit)
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/man/mleparams.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cellassign.R
3 | \name{mleparams}
4 | \alias{mleparams}
5 | \alias{mleparams.cellassign}
6 | \title{Get the MLE parameter list of a \code{cellassign} fit}
7 | \usage{
8 | mleparams(x)
9 |
10 | \method{mleparams}{cellassign}(x)
11 | }
12 | \arguments{
13 | \item{x}{An object of class \code{cellassign} returned
14 | by a call to \code{cellassign(...)}}
15 | }
16 | \value{
17 | A list of MLE parameter estimates from cellassign
18 | }
19 | \description{
20 | Get the MLE parameter list of a \code{cellassign} fit
21 | }
22 | \examples{
23 | data(example_cellassign_fit)
24 | mleparams(example_cellassign_fit)
25 | }
26 |
--------------------------------------------------------------------------------
/man/cellprobs.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cellassign.R
3 | \name{cellprobs}
4 | \alias{cellprobs}
5 | \alias{cellprobs.cellassign}
6 | \title{Get the cell assignment probabilities of a \code{cellassign} fit}
7 | \usage{
8 | cellprobs(x)
9 |
10 | \method{cellprobs}{cellassign}(x)
11 | }
12 | \arguments{
13 | \item{x}{An object of class \code{cellassign}
14 | returned by a call to \code{cellassign(...)}}
15 | }
16 | \value{
17 | A cell by celltype matrix with assignment probabilities
18 | }
19 | \description{
20 | Get the MLE cell type assignment probabilities for each cell
21 | }
22 | \examples{
23 | data(example_cellassign_fit)
24 | cellprobs(example_cellassign_fit)
25 | }
26 |
--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
13 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: required
2 | language: r
3 | warnings_are_errors: false
4 | dist: xenial
5 | r: bioc-devel
6 |
7 | branches:
8 | only:
9 | - bioc
10 | - master
11 | - basilisk
12 |
13 | r_packages:
14 | - devtools
15 | - rmarkdown
16 |
17 | biocpackages:
18 | - SingleCellExperiment
19 |
20 |
21 | cache:
22 | packages: true
23 | directories:
24 | - $HOME/.cache/pip
25 |
26 | addons:
27 | apt:
28 | sources:
29 | ubuntu-toolchain-r-test
30 | packages:
31 | - imagemagick
32 | - libmagick++-dev
33 | - wget
34 | - libatlas3-base
35 | - libatlas-dev
36 | - python-joblib
37 | - python-dev
38 | - libv8-dev
39 |
40 | before_install:
41 | - R -e 'Sys.setenv(PIP_QUIET=1); install.packages("tensorflow"); tensorflow::install_tensorflow(extra_packages="tensorflow-probability", version = "2.1.0")'
42 | - R -e 'tensorflow::tf_config()'
43 |
44 |
--------------------------------------------------------------------------------
/man/celltypes.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cellassign.R
3 | \name{celltypes}
4 | \alias{celltypes}
5 | \alias{celltypes.cellassign}
6 | \title{Get the cell type assignments of a \code{cellassign} fit}
7 | \usage{
8 | celltypes(x, assign_prob = 0.95)
9 |
10 | \method{celltypes}{cellassign}(x, assign_prob = 0.95)
11 | }
12 | \arguments{
13 | \item{x}{An object of class \code{cellassign} returned by a call to \code{cellassign(...)}}
14 |
15 | \item{assign_prob}{The probability threshold above which a cell is assigned to a given cell type,
16 | otherwise "unassigned"}
17 | }
18 | \value{
19 | A character vector with the MLE cell type for each cell, if the probability
20 | is greater than \code{assign_prob}.
21 | }
22 | \description{
23 | Get the MLE cell type estimates for each cell
24 | }
25 | \examples{
26 | data(example_cellassign_fit)
27 | celltypes(example_cellassign_fit)
28 | }
29 |
--------------------------------------------------------------------------------
/man/inference_tensorflow.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/inference-tensorflow.R
3 | \name{inference_tensorflow}
4 | \alias{inference_tensorflow}
5 | \title{cellassign inference in tensorflow, semi-supervised version}
6 | \usage{
7 | inference_tensorflow(
8 | Y,
9 | rho,
10 | s,
11 | X,
12 | G,
13 | C,
14 | N,
15 | P,
16 | B = 10,
17 | shrinkage,
18 | verbose = FALSE,
19 | n_batches = 1,
20 | rel_tol_adam = 1e-04,
21 | rel_tol_em = 1e-04,
22 | max_iter_adam = 1e+05,
23 | max_iter_em = 20,
24 | learning_rate = 1e-04,
25 | random_seed = NULL,
26 | min_delta = 2,
27 | dirichlet_concentration = rep(0.01, C),
28 | threads = 0
29 | )
30 | }
31 | \value{
32 | A list of MLE cell type calls, MLE parameter estimates,
33 | and log likelihoods during optimization.
34 | }
35 | \description{
36 | cellassign inference in tensorflow, semi-supervised version
37 | }
38 | \keyword{internal}
39 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: cellassign
2 | Version: 0.99.21
3 | Title: Automated, probabilistic assignment of scRNA-seq to cell types
4 | Description: CellAssign assigns cells measured with scRNA-seq to both known and de novo cell types based on the declaring certain genes as markers for different cell types.
5 | Authors@R: c(
6 | person("Allen", "Zhang", email = "alzhang@bccrc.ca", role = c("aut")),
7 | person("Kieran", "Campbell", email = "kieranrcampbell@gmail.com", role = c("aut", "cre"))
8 | )
9 | License: Apache License (>= 2.0)
10 | Encoding: UTF-8
11 | Depends: R (>= 3.6)
12 | Imports:
13 | methods,
14 | stats,
15 | tensorflow,
16 | SummarizedExperiment,
17 | scran
18 | Suggests:
19 | knitr,
20 | SingleCellExperiment,
21 | rmarkdown,
22 | BiocStyle,
23 | dplyr,
24 | pheatmap,
25 | testthat,
26 | limma,
27 | org.Hs.eg.db,
28 | edgeR,
29 | matrixStats,
30 | plyr,
31 | magrittr,
32 | reticulate,
33 | magick
34 | biocViews:
35 | Software,
36 | Transcriptomics,
37 | GeneExpression,
38 | RNASeq,
39 | SingleCell
40 | LazyData: true
41 | ByteCompile: true
42 | Roxygen: list(markdown = TRUE)
43 | RoxygenNote: 7.0.2
44 | VignetteBuilder: knitr
45 | BugReports: https://github.com/irrationone/cellassign/issues
46 |
--------------------------------------------------------------------------------
/man/marker_list_to_mat.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{marker_list_to_mat}
4 | \alias{marker_list_to_mat}
5 | \title{Convert a list of marker genes to a binary matrix}
6 | \usage{
7 | marker_list_to_mat(marker_list, include_other = TRUE)
8 | }
9 | \arguments{
10 | \item{marker_list}{A list where each entry is named by a cell type and
11 | contains a character vector of gene names belonging to that cell type}
12 |
13 | \item{include_other}{If \code{TRUE} adds a column of zeros for cells that do not
14 | exhibit high expression of any marker gene to be binned into}
15 | }
16 | \value{
17 | A cell type by gene binary matrix with 1 if a gene is a marker for
18 | a cell type and 0 otherwise
19 | }
20 | \description{
21 | Given a list of cell types and marker genes, convert to a binary
22 | cell type by gene matrix required by cellassign.
23 | }
24 | \details{
25 | This function takes a list of marker genes and converts it to a binary
26 | gene by cell type matrix. The input list should be the same
27 | length as the number of cell types with names corresponding to cell types.
28 | Each element of the list should be a character vector of the genes corresponding
29 | to that cell type. There is no requirement for mutually-exclusive marker genes.
30 | }
31 | \examples{
32 | marker_list <- list(
33 | `cell_type_1` = c("geneA", "geneB"),
34 | `cell_type_2` = c("geneB", "geneC")
35 | )
36 | marker_list_to_mat(marker_list)
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Code of Conduct
2 |
3 | As contributors and maintainers of this project, we pledge to respect all people who
4 | contribute through reporting issues, posting feature requests, updating documentation,
5 | submitting pull requests or patches, and other activities.
6 |
7 | We are committed to making participation in this project a harassment-free experience for
8 | everyone, regardless of level of experience, gender, gender identity and expression,
9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
10 |
11 | Examples of unacceptable behavior by participants include the use of sexual language or
12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment,
13 | insults, or other unprofessional conduct.
14 |
15 | Project maintainers have the right and responsibility to remove, edit, or reject comments,
16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this
17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed
18 | from the project team.
19 |
20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by
21 | opening an issue or contacting one or more of the project maintainers.
22 |
23 | This Code of Conduct is adapted from the Contributor Covenant
24 | (https://www.contributor-covenant.org), version 1.0.0, available at
25 | https://contributor-covenant.org/version/1/0/0/.
26 |
--------------------------------------------------------------------------------
/man/simulate_cellassign.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/simulate.R
3 | \name{simulate_cellassign}
4 | \alias{simulate_cellassign}
5 | \title{Simulate from the cellassign model}
6 | \usage{
7 | simulate_cellassign(
8 | rho,
9 | s,
10 | pi,
11 | delta,
12 | B = 20,
13 | a,
14 | beta,
15 | X = NULL,
16 | min_Y = 0,
17 | max_Y = 1000
18 | )
19 | }
20 | \arguments{
21 | \item{rho}{A gene by cell type binary matrix relating markers to cell types}
22 |
23 | \item{s}{A vector of cell-specific size factors}
24 |
25 | \item{pi}{An ordinal vector relating each cell to its true marker type assignment}
26 |
27 | \item{delta}{Gene by cell type matrix delta (all entries with corresponding zeros
28 | in rho will be ignored)}
29 |
30 | \item{B}{Granularity of spline-based fitting of dispersions}
31 |
32 | \item{a}{Alpha parameters for spline inference of dispersions}
33 |
34 | \item{beta}{A gene by covariate vector of coefficients - the first column
35 | should correspond to the intercept (baseline expression) values}
36 |
37 | \item{X}{A cell by covariate matrix of covariates - the intercept column will
38 | always be added.}
39 |
40 | \item{b}{Beta parameters for spline inference of dispersions}
41 | }
42 | \value{
43 | An N by G matrix of simulated counts
44 | }
45 | \description{
46 | Simulate RNA-seq counts from the cell-assign model
47 | }
48 | \details{
49 | The number of genes, cells, and cell types is automatically
50 | inferred from the dimensions of rho (gene by cell-type) and
51 | s (vector of length number of cells). The specification of X
52 | is optional - a column of ones will always be added as an intercept.
53 | }
54 | \keyword{internal}
55 |
--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
1 | $(function() {
2 |
3 | // register a handler to move the focus to the search bar
4 | // upon pressing shift + "/" (i.e. "?")
5 | $(document).on('keydown', function(e) {
6 | if (e.shiftKey && e.keyCode == 191) {
7 | e.preventDefault();
8 | $("#search-input").focus();
9 | }
10 | });
11 |
12 | $(document).ready(function() {
13 | // do keyword highlighting
14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 | var mark = function() {
16 |
17 | var referrer = document.URL ;
18 | var paramKey = "q" ;
19 |
20 | if (referrer.indexOf("?") !== -1) {
21 | var qs = referrer.substr(referrer.indexOf('?') + 1);
22 | var qs_noanchor = qs.split('#')[0];
23 | var qsa = qs_noanchor.split('&');
24 | var keyword = "";
25 |
26 | for (var i = 0; i < qsa.length; i++) {
27 | var currentParam = qsa[i].split('=');
28 |
29 | if (currentParam.length !== 2) {
30 | continue;
31 | }
32 |
33 | if (currentParam[0] == paramKey) {
34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 | }
36 | }
37 |
38 | if (keyword !== "") {
39 | $(".contents").unmark({
40 | done: function() {
41 | $(".contents").mark(keyword);
42 | }
43 | });
44 | }
45 | }
46 | };
47 |
48 | mark();
49 | });
50 | });
51 |
52 | /* Search term highlighting ------------------------------*/
53 |
54 | function matchedWords(hit) {
55 | var words = [];
56 |
57 | var hierarchy = hit._highlightResult.hierarchy;
58 | // loop to fetch from lvl0, lvl1, etc.
59 | for (var idx in hierarchy) {
60 | words = words.concat(hierarchy[idx].matchedWords);
61 | }
62 |
63 | var content = hit._highlightResult.content;
64 | if (content) {
65 | words = words.concat(content.matchedWords);
66 | }
67 |
68 | // return unique words
69 | var words_uniq = [...new Set(words)];
70 | return words_uniq;
71 | }
72 |
73 | function updateHitURL(hit) {
74 |
75 | var words = matchedWords(hit);
76 | var url = "";
77 |
78 | if (hit.anchor) {
79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 | } else {
81 | url = hit.url + '?q=' + escape(words.join(" "));
82 | }
83 |
84 | return url;
85 | }
86 |
--------------------------------------------------------------------------------
/tests/testthat/test_cellassign.R:
--------------------------------------------------------------------------------
1 | context("Basic operations")
2 |
3 | test_that("cellassign(...) returns a valid object", {
4 | library(SummarizedExperiment)
5 | data(example_sce)
6 | data(example_marker_mat)
7 | N <- ncol(example_sce)
8 | G <- nrow(example_marker_mat)
9 | C <- ncol(example_marker_mat)
10 |
11 | fit <- cellassign(example_sce[rownames(example_marker_mat),],
12 | example_marker_mat,
13 | s = sizeFactors(example_sce),
14 | max_iter_adam = 2,
15 | max_iter_em = 2)
16 |
17 | expect_is(fit, "cellassign")
18 |
19 | cell_types <- fit$cell_type
20 |
21 | expect_equal(length(cell_types), N)
22 |
23 | cell_type_names <- sort(unique(cell_types))
24 |
25 | expect_equal(cell_type_names, sort(colnames(example_marker_mat)))
26 |
27 | print(dim(fit$mle_params$gamma))
28 |
29 | expect_equal(C, ncol(fit$mle_params$gamma))
30 |
31 | expect_equal(N, nrow(fit$mle_params$gamma))
32 |
33 | })
34 |
35 | test_that("cellassign(...) returns a valid SingleCellExperiment", {
36 | library(SummarizedExperiment)
37 | data(example_sce)
38 | data(example_marker_mat)
39 | N <- ncol(example_sce)
40 | G <- nrow(example_marker_mat)
41 | C <- ncol(example_marker_mat)
42 |
43 | sce <- cellassign(example_sce[rownames(example_marker_mat),],
44 | example_marker_mat,
45 | s = sizeFactors(example_sce),
46 | max_iter_adam = 2,
47 | max_iter_em = 2,
48 | return_SCE = TRUE)
49 |
50 | expect_is(sce, "SingleCellExperiment")
51 |
52 | expect_true("cellassign_celltype" %in% names(colData(sce)))
53 | expect_true("cellassign" %in% names(sce@metadata))
54 |
55 | })
56 |
57 |
58 | test_that("marker_gene_list() works as required", {
59 |
60 | data(example_sce)
61 | data(example_marker_mat)
62 |
63 | marker_gene_list <- list(
64 | Group1 = c("Gene1", "Gene3", "Gene4", "Gene5", "Gene10"),
65 | Group2 = c("Gene2", "Gene6", "Gene7", "Gene8", "Gene9")
66 | )
67 |
68 | mat <- marker_list_to_mat(marker_gene_list, include_other = FALSE)
69 |
70 | expect_equal(nrow(mat), 10)
71 |
72 | expect_equal(ncol(mat), 2)
73 |
74 | expect_equal(length(setdiff(unlist(marker_gene_list), rownames(mat))), 0)
75 |
76 | expect_equal(sum(mat), length(unique(unlist(marker_gene_list))))
77 |
78 | fit <- cellassign(example_sce[rownames(mat),],
79 | marker_gene_list,
80 | s = sizeFactors(example_sce),
81 | max_iter_adam = 2,
82 | max_iter_em = 2)
83 |
84 | })
85 |
--------------------------------------------------------------------------------
/R/simulate.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | #' Simulate from the cellassign model
4 | #'
5 | #' Simulate RNA-seq counts from the cell-assign model
6 | #'
7 | #' The number of genes, cells, and cell types is automatically
8 | #' inferred from the dimensions of rho (gene by cell-type) and
9 | #' s (vector of length number of cells). The specification of X
10 | #' is optional - a column of ones will always be added as an intercept.
11 | #'
12 | #' @param rho A gene by cell type binary matrix relating markers to cell types
13 | #' @param s A vector of cell-specific size factors
14 | #' @param pi An ordinal vector relating each cell to its true marker type assignment
15 | #' @param delta Gene by cell type matrix delta (all entries with corresponding zeros
16 | #' in rho will be ignored)
17 | #' @param B Granularity of spline-based fitting of dispersions
18 | #' @param a Alpha parameters for spline inference of dispersions
19 | #' @param b Beta parameters for spline inference of dispersions
20 | #' @param beta A gene by covariate vector of coefficients - the first column
21 | #' should correspond to the intercept (baseline expression) values
22 | #' @param X A cell by covariate matrix of covariates - the intercept column will
23 | #' always be added.
24 | #'
25 | #' @return An N by G matrix of simulated counts
26 | #'
27 | #' @importFrom stats rnbinom
28 | #'
29 | #' @keywords internal
30 | simulate_cellassign <- function(rho,
31 | s,
32 | pi,
33 | delta,
34 | B = 20,
35 | a,
36 | beta,
37 | X = NULL,
38 | min_Y = 0,
39 | max_Y = 1000) {
40 |
41 | C <- ncol(rho)
42 | N <- length(s)
43 | G <- nrow(rho)
44 | P <- ncol(beta)
45 | B <- as.integer(B)
46 |
47 | stopifnot(length(pi) == N)
48 | stopifnot(nrow(beta) == G)
49 | stopifnot(ncol(delta) == C)
50 | stopifnot(nrow(delta) == G)
51 |
52 | X <- initialize_X(X, N)
53 |
54 | basis_means <- seq(from = min_Y, to = max_Y, length.out = B)
55 | b_init <- 2 * (basis_means[2] - basis_means[1])^2
56 | b <- exp(rep(-log(b_init), B))
57 | LOWER_BOUND <- 1e-10
58 |
59 | stopifnot(ncol(X) == P)
60 |
61 | mean_mat <- exp(log(s) + X %*% t(beta) + t((rho * delta)[,pi]))
62 |
63 | mean_mat_tiled <- replicate(B, mean_mat)
64 |
65 | phi <- apply(a * exp(sweep((sweep(mean_mat_tiled, 3, basis_means))^2, 3, -b, '*')), c(1:2), sum) + LOWER_BOUND
66 |
67 | counts <- sapply(seq_len(G), function(g) {
68 | rnbinom(N, mu = mean_mat[,g], size = phi[g,])
69 | })
70 |
71 | counts
72 | }
73 |
--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
2 | (function($) {
3 | $(function() {
4 |
5 | $('.navbar-fixed-top').headroom();
6 |
7 | $('body').css('padding-top', $('.navbar').height() + 10);
8 | $(window).resize(function(){
9 | $('body').css('padding-top', $('.navbar').height() + 10);
10 | });
11 |
12 | $('body').scrollspy({
13 | target: '#sidebar',
14 | offset: 60
15 | });
16 |
17 | $('[data-toggle="tooltip"]').tooltip();
18 |
19 | var cur_path = paths(location.pathname);
20 | var links = $("#navbar ul li a");
21 | var max_length = -1;
22 | var pos = -1;
23 | for (var i = 0; i < links.length; i++) {
24 | if (links[i].getAttribute("href") === "#")
25 | continue;
26 | // Ignore external links
27 | if (links[i].host !== location.host)
28 | continue;
29 |
30 | var nav_path = paths(links[i].pathname);
31 |
32 | var length = prefix_length(nav_path, cur_path);
33 | if (length > max_length) {
34 | max_length = length;
35 | pos = i;
36 | }
37 | }
38 |
39 | // Add class to parent
, and enclosing
if in dropdown
40 | if (pos >= 0) {
41 | var menu_anchor = $(links[pos]);
42 | menu_anchor.parent().addClass("active");
43 | menu_anchor.closest("li.dropdown").addClass("active");
44 | }
45 | });
46 |
47 | function paths(pathname) {
48 | var pieces = pathname.split("/");
49 | pieces.shift(); // always starts with /
50 |
51 | var end = pieces[pieces.length - 1];
52 | if (end === "index.html" || end === "")
53 | pieces.pop();
54 | return(pieces);
55 | }
56 |
57 | // Returns -1 if not found
58 | function prefix_length(needle, haystack) {
59 | if (needle.length > haystack.length)
60 | return(-1);
61 |
62 | // Special case for length-0 haystack, since for loop won't run
63 | if (haystack.length === 0) {
64 | return(needle.length === 0 ? 0 : -1);
65 | }
66 |
67 | for (var i = 0; i < haystack.length; i++) {
68 | if (needle[i] != haystack[i])
69 | return(i);
70 | }
71 |
72 | return(haystack.length);
73 | }
74 |
75 | /* Clipboard --------------------------*/
76 |
77 | function changeTooltipMessage(element, msg) {
78 | var tooltipOriginalTitle=element.getAttribute('data-original-title');
79 | element.setAttribute('data-original-title', msg);
80 | $(element).tooltip('show');
81 | element.setAttribute('data-original-title', tooltipOriginalTitle);
82 | }
83 |
84 | if(ClipboardJS.isSupported()) {
85 | $(document).ready(function() {
86 | var copyButton = "";
87 |
88 | $(".examples, div.sourceCode").addClass("hasCopyButton");
89 |
90 | // Insert copy buttons:
91 | $(copyButton).prependTo(".hasCopyButton");
92 |
93 | // Initialize tooltips:
94 | $('.btn-copy-ex').tooltip({container: 'body'});
95 |
96 | // Initialize clipboard:
97 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', {
98 | text: function(trigger) {
99 | return trigger.parentNode.textContent;
100 | }
101 | });
102 |
103 | clipboardBtnCopies.on('success', function(e) {
104 | changeTooltipMessage(e.trigger, 'Copied!');
105 | e.clearSelection();
106 | });
107 |
108 | clipboardBtnCopies.on('error', function() {
109 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
110 | });
111 | });
112 | }
113 | })(window.jQuery || window.$)
114 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # cellassign
2 |
3 | [](https://travis-ci.com/Irrationone/cellassign) [](http://bioconda.github.io/recipes/r-cellassign/README.html)
4 |
5 |
6 | `cellassign` automatically assigns single-cell RNA-seq data to known cell types across thousands of cells accounting for patient and batch specific effects. Information about *a priori* known markers cell types is provided as input to the model in the form of a (binary) marker gene by cell-type matrix. `cellassign` then probabilistically assigns each cell to a cell type, removing subjective biases from typical unsupervised clustering workflows.
7 |
8 |
9 |
10 |
11 |
12 | # Getting started
13 |
14 | ## Installation
15 |
16 |
17 | ### Installing from GitHub
18 |
19 | `cellassign` is built using Google's Tensorflow, and as such requires installation of the R package `tensorflow`:
20 |
21 | ``` r
22 | install.packages("tensorflow")
23 | tensorflow::install_tensorflow(extra_packages='tensorflow-probability', version = "2.1.0")
24 | ```
25 |
26 | Please ensure this installs version 2 of tensorflow. You can check this by calling
27 |
28 | ```r
29 | tensorflow::tf_config()
30 | ```
31 |
32 | ```
33 | TensorFlow v2.1.0 (/usr/local/lib/python3.7/site-packages/tensorflow)
34 | ```
35 |
36 | `cellassign` can then be installed from github:
37 |
38 | ``` r
39 | install.packages("devtools") # If not already installed
40 | devtools::install_github("Irrationone/cellassign")
41 | ```
42 |
43 |
44 | ### Installing from conda
45 |
46 | With [conda](https://conda.io/miniconda.html), install the current release version of `cellassign` as follows:
47 |
48 | ``` r
49 | conda install -c conda-forge -c bioconda r-cellassign
50 | ```
51 |
52 | ## Documentation
53 |
54 | Package documentation can be found [here](https://irrationone.github.io/cellassign/index.html). This includes the following vignettes:
55 |
56 | - [Assigning single-cells to known cell types with CellAssign](https://irrationone.github.io/cellassign/articles/introduction-to-cellassign.html)
57 |
58 | - [Constructing marker genes from purified bulk/scRNA-seq data](https://irrationone.github.io/cellassign/articles/constructing-markers-from-purified-data.html)
59 |
60 | ## Basic usage
61 |
62 | `cellassign` requires the following inputs:
63 |
64 | * `exprs_obj`: Cell-by-gene matrix of raw counts (or SingleCellExperiment with `counts` assay)
65 | * `marker_gene_info`: Binary gene-by-celltype marker gene matrix or list relating cell types to marker genes
66 | * `s`: Size factors
67 | * `X`: Design matrix for any patient/batch specific effects
68 |
69 | The model can be run as follows:
70 |
71 | ``` r
72 | cas <- cellassign(exprs_obj = gene_expression_data,
73 | marker_gene_info = marker_gene_info,
74 | s = s,
75 | X = X)
76 | ```
77 |
78 | An example set of markers for the human tumour microenvironment can be loaded by calling
79 |
80 | ``` r
81 | data(example_TME_markers)
82 |
83 | ```
84 |
85 | Please see the package vignette for details and caveats.
86 |
87 | # Paper
88 |
89 | [Probabilistic cell-type assignment of single-cell RNA-seq for tumor microenvironment profiling, _Nature Methods 2019_](https://www.nature.com/articles/s41592-019-0529-1)
90 |
91 | # Code of Conduct
92 |
93 | Please note that the 'cellassign' project is released with a
94 | [Contributor Code of Conduct](CODE_OF_CONDUCT.md).
95 | By contributing to this project, you agree to abide by its terms.
96 |
97 | # Authors
98 |
99 | Allen W Zhang, University of British Columbia
100 |
101 | Kieran R Campbell, University of British Columbia
102 |
--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 |
2 | #' Convert a list of marker genes to a binary matrix
3 | #'
4 | #' Given a list of cell types and marker genes, convert to a binary
5 | #' cell type by gene matrix required by cellassign.
6 | #'
7 | #' This function takes a list of marker genes and converts it to a binary
8 | #' gene by cell type matrix. The input list should be the same
9 | #' length as the number of cell types with names corresponding to cell types.
10 | #' Each element of the list should be a character vector of the genes corresponding
11 | #' to that cell type. There is no requirement for mutually-exclusive marker genes.
12 | #'
13 | #' @param marker_list A list where each entry is named by a cell type and
14 | #' contains a character vector of gene names belonging to that cell type
15 | #' @param include_other If \code{TRUE} adds a column of zeros for cells that do not
16 | #' exhibit high expression of any marker gene to be binned into
17 | #'
18 | #' @return A cell type by gene binary matrix with 1 if a gene is a marker for
19 | #' a cell type and 0 otherwise
20 | #'
21 | #' @examples
22 | #' marker_list <- list(
23 | #' `cell_type_1` = c("geneA", "geneB"),
24 | #' `cell_type_2` = c("geneB", "geneC")
25 | #' )
26 | #' marker_list_to_mat(marker_list)
27 | #'
28 | #' @export
29 | marker_list_to_mat <- function(marker_list, include_other = TRUE) {
30 | cell_types <- names(marker_list)
31 |
32 | if(is.null(cell_types)) {
33 | warning("Marker list has no cell type names - replacing with generics")
34 | cell_types <- paste0("cell_type_", seq_along(marker_list))
35 | names(marker_list) <- cell_types
36 | }
37 |
38 | genes <- sort(unique(unlist(marker_list)))
39 | genes <- genes[nchar(genes) > 0]
40 |
41 | n_cell_types <- length(cell_types)
42 | n_genes <- length(genes)
43 |
44 | mat <- matrix(0, nrow = n_cell_types, ncol = n_genes)
45 | colnames(mat) <- genes
46 | rownames(mat) <- cell_types
47 |
48 | for(cell_type in names(marker_list)) {
49 | mat[cell_type,] <- genes %in% marker_list[[cell_type]]
50 | }
51 |
52 | if(include_other) {
53 | mat <- rbind(mat, `other` = 0)
54 | }
55 |
56 | mat <- t(mat) # Make it gene type by cell
57 |
58 | mat
59 | }
60 |
61 | #' Get MLE estimates of type of each cell
62 | #'
63 | #' @return A vector of MLE cell types, where the names are
64 | #' taken from the column names of the input matrix
65 | #'
66 | #' @keywords internal
67 | get_mle_cell_type <- function(gamma) {
68 | which_max <- apply(gamma, 1, which.max)
69 | colnames(gamma)[which_max]
70 | }
71 |
72 | #' Extract expression matrix from expression object
73 | #'
74 | #' @return The cleaned expression matrix (of counts) from whatever input to \code{cellassign}
75 | #'
76 | #' @keywords internal
77 | extract_expression_matrix <- function(exprs_obj, sce_assay = "counts") {
78 | if(is(exprs_obj, "SummarizedExperiment")) {
79 | Y <- t(as.matrix(SummarizedExperiment::assay(exprs_obj, sce_assay)))
80 | } else if(is.matrix(exprs_obj) && is.numeric(exprs_obj)) {
81 | Y <- exprs_obj
82 | } else {
83 | stop("Input exprs_obj must either be a SummarizedExperiment or numeric matrix of gene expression")
84 | }
85 | return(Y)
86 | }
87 |
88 | #' Create X matrix
89 | #'
90 | #' @importFrom stats var
91 | #'
92 | #' @return A cleaned covariate matrix given the input provided by the user
93 | #'
94 | #' @keywords internal
95 | initialize_X <- function(X, N, verbose = FALSE) {
96 | if(is.null(X)) {
97 | if (N > 0) {
98 | X <- matrix(1, nrow = N)
99 | } else {
100 | X <- matrix(nrow = 0, ncol = 1)
101 | }
102 | } else {
103 | # We can be a little intelligent about whether or not to add an intercept -
104 | # if any column variance of X is 0 then the associated covariate is constant
105 | # so we don't need to add an intercept
106 | col_vars <- apply(X, 2, var)
107 | if(any(col_vars == 0)) {
108 | if(verbose) {
109 | message("Intecept column detected in X")
110 | }
111 | } else {
112 | X <- cbind(1, X)
113 | if(verbose) {
114 | message("No intercept column detected in X - adding")
115 | }
116 | }
117 | }
118 | return(X)
119 | }
120 |
121 |
122 | #' Check for tensorflow
123 | #'
124 | #' @keywords internal
125 | #'
126 | #' @return Installs tensorflow if not already installed
127 | .onLoad <- function(libname, pkgname) {
128 | if(is.null(tensorflow::tf_version())) {
129 | stop("Tensorflow installation not detected. Please run 'tensorflow::install_tensorflow()' to continue...")
130 | }
131 | }
132 |
133 |
--------------------------------------------------------------------------------
/man/cellassign.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cellassign.R
3 | \name{cellassign}
4 | \alias{cellassign}
5 | \title{Annotate cells to cell types using cellassign}
6 | \usage{
7 | cellassign(
8 | exprs_obj,
9 | marker_gene_info,
10 | s = NULL,
11 | min_delta = 2,
12 | X = NULL,
13 | B = 10,
14 | shrinkage = TRUE,
15 | n_batches = 1,
16 | dirichlet_concentration = 0.01,
17 | rel_tol_adam = 1e-04,
18 | rel_tol_em = 1e-04,
19 | max_iter_adam = 1e+05,
20 | max_iter_em = 20,
21 | learning_rate = 0.1,
22 | verbose = TRUE,
23 | sce_assay = "counts",
24 | return_SCE = FALSE,
25 | num_runs = 1,
26 | threads = 0
27 | )
28 | }
29 | \arguments{
30 | \item{exprs_obj}{Either a matrix representing gene
31 | expression counts or a \code{SummarizedExperiment}.
32 | See details.}
33 |
34 | \item{marker_gene_info}{Information relating marker genes to cell types.
35 | See details.}
36 |
37 | \item{s}{Numeric vector of cell size factors}
38 |
39 | \item{min_delta}{The minimum log fold change a marker gene must
40 | be over-expressed by in its cell type}
41 |
42 | \item{X}{Numeric matrix of external covariates. See details.}
43 |
44 | \item{B}{Number of bases to use for RBF dispersion function}
45 |
46 | \item{shrinkage}{Logical - should the delta parameters
47 | have hierarchical shrinkage?}
48 |
49 | \item{n_batches}{Number of data subsample batches to use in inference}
50 |
51 | \item{dirichlet_concentration}{Dirichlet concentration parameter for cell
52 | type abundances}
53 |
54 | \item{rel_tol_adam}{The change in Q function value (in pct) below which
55 | each optimization round is considered converged}
56 |
57 | \item{rel_tol_em}{The change in log marginal likelihood value (in pct)
58 | below which the EM algorithm is considered converged}
59 |
60 | \item{max_iter_adam}{Maximum number of ADAM iterations
61 | to perform in each M-step}
62 |
63 | \item{max_iter_em}{Maximum number of EM iterations to perform}
64 |
65 | \item{learning_rate}{Learning rate of ADAM optimization}
66 |
67 | \item{verbose}{Logical - should running info be printed?}
68 |
69 | \item{sce_assay}{The \code{assay} from the input#' \code{SingleCellExperiment} to use: this assay
70 | should always represent raw counts.}
71 |
72 | \item{return_SCE}{Logical - should a SingleCellExperiment be returned
73 | with the cell
74 | type annotations added? See details.}
75 |
76 | \item{num_runs}{Number of EM optimizations to perform (the one with the maximum
77 | log-marginal likelihood value will be used as the final).}
78 |
79 | \item{threads}{Maximum number of threads used by the algorithm
80 | (defaults to the number of cores available on the machine)}
81 | }
82 | \value{
83 | An object of class \code{cellassign}. See \code{details}
84 | }
85 | \description{
86 | Automatically annotate cells to known types based
87 | on the expression patterns of
88 | a priori known marker genes.
89 | }
90 | \details{
91 | \strong{Input format}
92 | \code{exprs_obj} should be either a
93 | \code{SummarizedExperiment} (we recommend the
94 | \code{SingleCellExperiment} package) or a
95 | cell (row) by gene (column) matrix of
96 | \emph{raw} RNA-seq counts (do \strong{not}
97 | log-transform or otherwise normalize).
98 |
99 | \code{marker_gene_info} should either be
100 | \itemize{
101 | \item A gene by cell type binary matrix, where a 1 indicates that a gene is a
102 | marker for a cell type, and 0 otherwise
103 | \item A list with names corresponding to cell types, where each entry is a
104 | vector of marker gene names. These are converted to the above matrix using
105 | the \code{marker_list_to_mat} function.
106 | }
107 |
108 | \strong{Cell size factors}
109 | If the cell size factors \code{s} are
110 | not provided they are computed using the
111 | \code{computeSumFactors} function from
112 | the \code{scran} package.
113 |
114 | \strong{Covariates}
115 | If \code{X} is not \code{NULL} then it should be
116 | an \code{N} by \code{P} matrix
117 | of covariates for \code{N} cells and \code{P} covariates.
118 | Such a matrix would typically
119 | be returned by a call to \code{model.matrix}
120 | \strong{with no intercept}. It is also highly
121 | recommended that any numerical (ie non-factor or one-hot-encoded)
122 | covariates be standardized
123 | to have mean 0 and standard deviation 1.
124 |
125 | \strong{cellassign}
126 | A call to \code{cellassign} returns an object
127 | of class \code{cellassign}. To access the
128 | MLE estimates of cell types, call \code{fit$cell_type}.
129 | To access all MLE parameter
130 | estimates, call \code{fit$mle_params}.
131 |
132 | \strong{Returning a SingleCellExperiment}
133 |
134 | If \code{return_SCE} is true, a call to \code{cellassign} will return
135 | the input SingleCellExperiment, with the following added:
136 | \itemize{
137 | \item A column \code{cellassign_celltype} to \code{colData(sce)} with the MAP
138 | estimate of the cell type
139 | \item A slot \code{sce@metadata$cellassign} containing the cellassign fit.
140 | Note that a \code{SingleCellExperiment} must be provided as \code{exprs_obj}
141 | for this option to be valid.
142 | }
143 | }
144 | \examples{
145 | data(example_sce)
146 | data(example_marker_mat)
147 |
148 | fit <- em_result <- cellassign(example_sce[rownames(example_marker_mat),],
149 | marker_gene_info = example_marker_mat,
150 | s = colSums(SummarizedExperiment::assay(example_sce, "counts")),
151 | learning_rate = 1e-2,
152 | shrinkage = TRUE,
153 | verbose = FALSE)
154 |
155 |
156 | }
157 |
--------------------------------------------------------------------------------
/docs/404.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Page not found (404) • cellassign
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
A gene by cell type binary matrix relating markers to cell types
136 |
137 |
138 |
s
139 |
A vector of cell-specific size factors
140 |
141 |
142 |
pi
143 |
An ordinal vector relating each cell to its true marker type assignment
144 |
145 |
146 |
delta
147 |
Gene by cell type matrix delta (all entries with corresponding zeros
148 | in rho will be ignored)
149 |
150 |
151 |
B
152 |
Granularity of spline-based fitting of dispersions
153 |
154 |
155 |
a
156 |
Alpha parameters for spline inference of dispersions
157 |
158 |
159 |
beta
160 |
A gene by covariate vector of coefficients - the first column
161 | should correspond to the intercept (baseline expression) values
162 |
163 |
164 |
X
165 |
A cell by covariate matrix of covariates - the intercept column will
166 | always be added.
167 |
168 |
169 |
b
170 |
Beta parameters for spline inference of dispersions
171 |
172 |
173 |
174 |
Value
175 |
176 |
An N by G matrix of simulated counts
177 |
Details
178 |
179 |
The number of genes, cells, and cell types is automatically
180 | inferred from the dimensions of rho (gene by cell-type) and
181 | s (vector of length number of cells). The specification of X
182 | is optional - a column of ones will always be added as an intercept.
A list where each entry is named by a cell type and
137 | contains a character vector of gene names belonging to that cell type
138 |
139 |
140 |
include_other
141 |
If TRUE adds a column of zeros for cells that do not
142 | exhibit high expression of any marker gene to be binned into
143 |
144 |
145 |
146 |
Value
147 |
148 |
A cell type by gene binary matrix with 1 if a gene is a marker for
149 | a cell type and 0 otherwise
150 |
Details
151 |
152 |
This function takes a list of marker genes and converts it to a binary
153 | gene by cell type matrix. The input list should be the same
154 | length as the number of cell types with names corresponding to cell types.
155 | Each element of the list should be a character vector of the genes corresponding
156 | to that cell type. There is no requirement for mutually-exclusive marker genes.
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
--------------------------------------------------------------------------------
/vignettes/introduction-to-cellassign.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Assigning single-cells to known cell types with CellAssign"
3 | author: "Allen W Zhang and Kieran R Campbell"
4 | date: "October 2019"
5 | package: "cellassign"
6 | output: BiocStyle::html_document
7 | vignette: >
8 | %\VignetteIndexEntry{Introduction to CellAssign}
9 | %\VignetteEngine{knitr::rmarkdown}
10 | %\VignetteEncoding{UTF-8}
11 | ---
12 |
13 | ```{r setup, include = FALSE}
14 | knitr::opts_chunk$set(
15 | collapse = TRUE,
16 | warnings = FALSE,
17 | messages = FALSE,
18 | comment = "#>"
19 | )
20 | ```
21 |
22 | # Overview
23 |
24 | `cellassign` assigns cells measured using single cell RNA sequencing
25 | to known cell types based on marker gene information. Unlike other
26 | methods for assigning cell types from single cell RNA-seq data,
27 | `cellassign` does not require labeled single cell or purified bulk
28 | expression data -- `cellassign` only needs to know whether or not
29 | each given gene is a marker of each cell type:
30 |
31 | ```{r, echo = FALSE}
32 | knitr::include_graphics("cellassign_overview.png")
33 | ```
34 |
35 | Inference is performed using [Tensorflow](http://tensorflow.org/). For more
36 | details please see the
37 | [manuscript](https://www.biorxiv.org/content/10.1101/521914v1).
38 |
39 | # Installation
40 |
41 | `cellassign` depends on `tensorflow`, which can be installed as follows:
42 |
43 | ```{r, eval=FALSE}
44 | install.packages("tensorflow")
45 | library(tensorflow)
46 | install_tensorflow(extra_packages = "tensorflow-probability")
47 | ```
48 |
49 | Please ensure this installs version 2 of tensorflow. You can check this by calling
50 |
51 | ```{r}
52 | tensorflow::tf_config()
53 | ```
54 |
55 | You can confirm that the installation succeeded by running:
56 |
57 | ```{r, eval=FALSE}
58 | sess = tf$Session()
59 | hello <- tf$constant('Hello, TensorFlow!')
60 | sess$run(hello)
61 | ```
62 |
63 | Note that the `tf` object is created automatically when the `tensorflow` library is loaded to provide access to the Tensorflow interface.
64 |
65 | For more details see the [Rstudio page on tensorflow installation](https://tensorflow.rstudio.com/tensorflow/articles/installation.html).
66 |
67 | `cellassign` can then be installed through Bioconductor via
68 |
69 | ```{r, eval=FALSE}
70 | BiocManager::install('cellassign')
71 | ```
72 |
73 | or the development version through github using the `devtools` package :
74 |
75 | ```{r, eval=FALSE}
76 | devtools::install_github("Irrationone/cellassign")
77 | ```
78 |
79 |
80 | # Basic usage
81 |
82 | We begin by illustrating basic usage of `cellassign` on some
83 | example data bundled with the package. First, load the relevant libraries:
84 |
85 | ```{r, results='hide', message=FALSE, warning=FALSE}
86 | library(SingleCellExperiment)
87 | library(cellassign)
88 | ```
89 |
90 | We use an example `SingleCellExperiment` consisting of 200 genes
91 | and 500 cells:
92 |
93 | ```{r}
94 | data(example_sce)
95 | print(example_sce)
96 | ```
97 |
98 | The true cell types are annotated for convenience in the `Group`
99 | slot of the `SingleCellExperiment`:
100 |
101 | ```{r}
102 | print(head(example_sce$Group))
103 | ```
104 |
105 |
106 | Also provided is an example gene-by-cell-type binary matrix, whose
107 | entries are 1 if a gene is a marker for a given cell type and 0 otherwise:
108 |
109 | ```{r}
110 | data(example_marker_mat)
111 | print(example_marker_mat)
112 | ```
113 |
114 | We further require size factors for each cell. These are stored
115 | in `sizeFactors(example_sce)` - for your data we recommend computing
116 | them using the `computeSumFactors` function from the `scran` package. Note: **it is highly recommended to compute size factors using the full set of genes, before subsetting to markers for input to cellassign**.
117 |
118 | ```{r}
119 | s <- sizeFactors(example_sce)
120 | ```
121 |
122 | We then call `cellassign` using the `cellassign()` function, passing
123 | in the above information. **It is critical that gene expression data containing only marker genes is used as input to cellassign**. We do this here by subsetting the input `SingleCellExperiment` using the row names (gene names) of the marker matrix. This also ensures that the order of the genes in the gene expression data matches the order of the genes in the marker matrix.
124 |
125 |
126 | ```{r}
127 | fit <- cellassign(exprs_obj = example_sce[rownames(example_marker_mat),],
128 | marker_gene_info = example_marker_mat,
129 | s = s,
130 | learning_rate = 1e-2,
131 | shrinkage = TRUE,
132 | verbose = FALSE)
133 | ```
134 |
135 | This returns a `cellassign` object:
136 |
137 | ```{r}
138 | print(fit)
139 | ```
140 |
141 | We can access the maximum likelihood estimates (MLE) of cell type using the `celltypes` function:
142 |
143 | ```{r}
144 | print(head(celltypes(fit)))
145 | ```
146 |
147 | By default, this assigns a cell to a type of the probability of assignment is greater than 0.95, and "unassigned" otherwise. This can be changed with the `assign_prob` parameter.
148 |
149 | It is also possible to get all MLE parameters using `mleparams`:
150 |
151 | ```{r}
152 | print(str(mleparams(fit)))
153 | ```
154 |
155 | We can also visualize the probabilities of assignment using the `cellprobs` function that returns a probability matrix for each cell and cell type:
156 |
157 | ```{r}
158 | pheatmap::pheatmap(cellprobs(fit))
159 | ```
160 |
161 |
162 | Finally, since this is simulated data we can check the concordance
163 | with the true group values:
164 |
165 | ```{r}
166 | print(table(example_sce$Group, celltypes(fit)))
167 | ```
168 |
169 | # Example set of markers for tumour microenvironment
170 |
171 | A set of example markers are included with the `cellassign` package
172 | for common cell types in the human tumour microenvironment. Users
173 | should be aware that
174 |
175 | 1. This set is provided as an _example_ only and we recommend
176 | researchers derive marker gene sets for their own use
177 | 2. The `cellassign` workflow is typically iterative, including
178 | ensuring all markers are expressed in your expression data, and
179 | removing cell types from the input marker matrix that do not appear
180 | to be present
181 |
182 | The marker genes are available for the following cell types:
183 |
184 | * B cells
185 | * T cells
186 | * Cytotoxic T cells
187 | * Monocyte/Macrophage
188 | * Epithelial cells
189 | * Myofibroblasts
190 | * Vascular smooth muscle cells
191 | * Endothelial cells
192 |
193 | These can be accessed by calling
194 |
195 | ```{r}
196 | data(example_TME_markers)
197 | ```
198 |
199 | Note that this is a list of two marker lists:
200 |
201 | ```{r}
202 | names(example_TME_markers)
203 | ```
204 |
205 | Where `symbol` contains gene symbols:
206 |
207 | ```{r}
208 | lapply(head(example_TME_markers$symbol, n = 4), head, n = 4)
209 | ```
210 |
211 | and `ensembl` contains the equivalent ensembl gene ids:
212 |
213 | ```{r}
214 | lapply(head(example_TME_markers$ensembl, n = 4), head, n = 4)
215 | ```
216 |
217 | To use these with `cellassign` we can turn them into the binary
218 | marker by cell type matrix:
219 |
220 | ```{r}
221 | marker_mat <- marker_list_to_mat(example_TME_markers$ensembl)
222 |
223 | marker_mat[1:3, 1:3]
224 | ```
225 |
226 | *Important*: the single cell experiment or input gene expression
227 | matrix should be subset accordingly to match the rows of the marker
228 | input matrix, e.g. if `sce` is a `SingleCellExperiment` with ensembl
229 | IDs as rownames then call
230 |
231 | ```{r, eval = FALSE}
232 | sce_marker <- sce[intersect(rownames(marker_mat), rownames(sce)),]
233 | ```
234 |
235 | Note that the rows in the single cell experiment or gene expression
236 | matrix should be ordered identically to those in the marker input
237 | matrix.
238 |
239 | You can the proceed using `cellassign` as before.
240 |
241 |
242 | # Advanced usage
243 |
244 | ## Options for a `cellassign()` call
245 |
246 | There are several options to a call to `cellassign` that can alter
247 | the results:
248 |
249 | * `min_delta`: the minimum log-fold change in expression above which a
250 | genemust be over-expressed in the cells of which it is a marker compared to
251 | all others
252 | * `X`: a covariate matrix, see section below
253 | * `shrinkage`: whether to impose a hierarchical prior on the values of `delta`
254 | (cell type specific increase in expression of marker genes)
255 |
256 |
257 | ## Constructing a marker gene matrix
258 |
259 | Here we demonstrate a method of constructing the binary marker gene
260 | matrix that encodes our *a priori* knowledge of cell types.
261 |
262 | For two types of cells (`Group1` and `Group2`) we know *a priori* several good
263 | marker genes, e.g.:
264 |
265 | | Cell type | Genes |
266 | | --------- | ----- |
267 | | Group1 | Gene186, Gene269, Gene526, Gene536, Gene994 |
268 | | Group2 | Gene205, Gene575, Gene754, Gene773, Gene949 |
269 |
270 | To use this in `cellassign`, we must turn this into a *named list*, where
271 | the names are the cell types and the entries are marker genes
272 | (not necessarily mutually exclusive) for each cell type:
273 |
274 | ```{r}
275 | marker_gene_list <- list(
276 | Group1 = c("Gene186", "Gene269", "Gene526", "Gene536", "Gene994"),
277 | Group2 = c("Gene205", "Gene575", "Gene754", "Gene773", "Gene949")
278 | )
279 |
280 | print(str(marker_gene_list))
281 | ```
282 |
283 | We can then directly provide this to `cellassign` or turn it into a binary
284 | marker gene matrix first using the `marker_list_to_mat` function:
285 |
286 | ```{r}
287 | print(marker_list_to_mat(marker_gene_list))
288 | ```
289 |
290 | This has automatically included an `other` group for cells that do not fall
291 | into either type - this can be excluded by setting `include_other = FALSE`.
292 |
293 | ## Adding covariates
294 |
295 | Covariates corresponding to batch, sample, or patient-specific effects can
296 | be included in the `cellassign` model. For example, if we have two covariates
297 | `x1` and `x2`:
298 |
299 | ```{r}
300 | N <- ncol(example_sce)
301 | x1 <- rnorm(N)
302 | x2 <- rnorm(N)
303 | ```
304 |
305 | We can construct a design matrix using the `model.matrix` function in R:
306 |
307 | ```{r}
308 | X <- model.matrix(~ 0 + x1 + x2)
309 | ```
310 |
311 | Note we explicitly set no intercept by passing in `0` in the beginning.
312 | We can then perform an equivalent cell assignment passing this in also:
313 |
314 | ```{r, eval = FALSE}
315 | fit <- cellassign(exprs_obj = example_sce,
316 | marker_gene_info = example_marker_mat,
317 | X = X,
318 | s = s,
319 | learning_rate = 1e-2,
320 | shrinkage = TRUE,
321 | verbose = FALSE)
322 | ```
323 |
324 |
325 | # Technical
326 |
327 | ```{r}
328 | sessionInfo()
329 | ```
330 |
331 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Apache License
2 | ==============
3 |
4 | _Version 2.0, January 2004_
5 | _<>_
6 |
7 | ### Terms and Conditions for use, reproduction, and distribution
8 |
9 | #### 1. Definitions
10 |
11 | “License” shall mean the terms and conditions for use, reproduction, and
12 | distribution as defined by Sections 1 through 9 of this document.
13 |
14 | “Licensor” shall mean the copyright owner or entity authorized by the copyright
15 | owner that is granting the License.
16 |
17 | “Legal Entity” shall mean the union of the acting entity and all other entities
18 | that control, are controlled by, or are under common control with that entity.
19 | For the purposes of this definition, “control” means **(i)** the power, direct or
20 | indirect, to cause the direction or management of such entity, whether by
21 | contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the
22 | outstanding shares, or **(iii)** beneficial ownership of such entity.
23 |
24 | “You” (or “Your”) shall mean an individual or Legal Entity exercising
25 | permissions granted by this License.
26 |
27 | “Source” form shall mean the preferred form for making modifications, including
28 | but not limited to software source code, documentation source, and configuration
29 | files.
30 |
31 | “Object” form shall mean any form resulting from mechanical transformation or
32 | translation of a Source form, including but not limited to compiled object code,
33 | generated documentation, and conversions to other media types.
34 |
35 | “Work” shall mean the work of authorship, whether in Source or Object form, made
36 | available under the License, as indicated by a copyright notice that is included
37 | in or attached to the work (an example is provided in the Appendix below).
38 |
39 | “Derivative Works” shall mean any work, whether in Source or Object form, that
40 | is based on (or derived from) the Work and for which the editorial revisions,
41 | annotations, elaborations, or other modifications represent, as a whole, an
42 | original work of authorship. For the purposes of this License, Derivative Works
43 | shall not include works that remain separable from, or merely link (or bind by
44 | name) to the interfaces of, the Work and Derivative Works thereof.
45 |
46 | “Contribution” shall mean any work of authorship, including the original version
47 | of the Work and any modifications or additions to that Work or Derivative Works
48 | thereof, that is intentionally submitted to Licensor for inclusion in the Work
49 | by the copyright owner or by an individual or Legal Entity authorized to submit
50 | on behalf of the copyright owner. For the purposes of this definition,
51 | “submitted” means any form of electronic, verbal, or written communication sent
52 | to the Licensor or its representatives, including but not limited to
53 | communication on electronic mailing lists, source code control systems, and
54 | issue tracking systems that are managed by, or on behalf of, the Licensor for
55 | the purpose of discussing and improving the Work, but excluding communication
56 | that is conspicuously marked or otherwise designated in writing by the copyright
57 | owner as “Not a Contribution.”
58 |
59 | “Contributor” shall mean Licensor and any individual or Legal Entity on behalf
60 | of whom a Contribution has been received by Licensor and subsequently
61 | incorporated within the Work.
62 |
63 | #### 2. Grant of Copyright License
64 |
65 | Subject to the terms and conditions of this License, each Contributor hereby
66 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
67 | irrevocable copyright license to reproduce, prepare Derivative Works of,
68 | publicly display, publicly perform, sublicense, and distribute the Work and such
69 | Derivative Works in Source or Object form.
70 |
71 | #### 3. Grant of Patent License
72 |
73 | Subject to the terms and conditions of this License, each Contributor hereby
74 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
75 | irrevocable (except as stated in this section) patent license to make, have
76 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where
77 | such license applies only to those patent claims licensable by such Contributor
78 | that are necessarily infringed by their Contribution(s) alone or by combination
79 | of their Contribution(s) with the Work to which such Contribution(s) was
80 | submitted. If You institute patent litigation against any entity (including a
81 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a
82 | Contribution incorporated within the Work constitutes direct or contributory
83 | patent infringement, then any patent licenses granted to You under this License
84 | for that Work shall terminate as of the date such litigation is filed.
85 |
86 | #### 4. Redistribution
87 |
88 | You may reproduce and distribute copies of the Work or Derivative Works thereof
89 | in any medium, with or without modifications, and in Source or Object form,
90 | provided that You meet the following conditions:
91 |
92 | * **(a)** You must give any other recipients of the Work or Derivative Works a copy of
93 | this License; and
94 | * **(b)** You must cause any modified files to carry prominent notices stating that You
95 | changed the files; and
96 | * **(c)** You must retain, in the Source form of any Derivative Works that You distribute,
97 | all copyright, patent, trademark, and attribution notices from the Source form
98 | of the Work, excluding those notices that do not pertain to any part of the
99 | Derivative Works; and
100 | * **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any
101 | Derivative Works that You distribute must include a readable copy of the
102 | attribution notices contained within such NOTICE file, excluding those notices
103 | that do not pertain to any part of the Derivative Works, in at least one of the
104 | following places: within a NOTICE text file distributed as part of the
105 | Derivative Works; within the Source form or documentation, if provided along
106 | with the Derivative Works; or, within a display generated by the Derivative
107 | Works, if and wherever such third-party notices normally appear. The contents of
108 | the NOTICE file are for informational purposes only and do not modify the
109 | License. You may add Your own attribution notices within Derivative Works that
110 | You distribute, alongside or as an addendum to the NOTICE text from the Work,
111 | provided that such additional attribution notices cannot be construed as
112 | modifying the License.
113 |
114 | You may add Your own copyright statement to Your modifications and may provide
115 | additional or different license terms and conditions for use, reproduction, or
116 | distribution of Your modifications, or for any such Derivative Works as a whole,
117 | provided Your use, reproduction, and distribution of the Work otherwise complies
118 | with the conditions stated in this License.
119 |
120 | #### 5. Submission of Contributions
121 |
122 | Unless You explicitly state otherwise, any Contribution intentionally submitted
123 | for inclusion in the Work by You to the Licensor shall be under the terms and
124 | conditions of this License, without any additional terms or conditions.
125 | Notwithstanding the above, nothing herein shall supersede or modify the terms of
126 | any separate license agreement you may have executed with Licensor regarding
127 | such Contributions.
128 |
129 | #### 6. Trademarks
130 |
131 | This License does not grant permission to use the trade names, trademarks,
132 | service marks, or product names of the Licensor, except as required for
133 | reasonable and customary use in describing the origin of the Work and
134 | reproducing the content of the NOTICE file.
135 |
136 | #### 7. Disclaimer of Warranty
137 |
138 | Unless required by applicable law or agreed to in writing, Licensor provides the
139 | Work (and each Contributor provides its Contributions) on an “AS IS” BASIS,
140 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
141 | including, without limitation, any warranties or conditions of TITLE,
142 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
143 | solely responsible for determining the appropriateness of using or
144 | redistributing the Work and assume any risks associated with Your exercise of
145 | permissions under this License.
146 |
147 | #### 8. Limitation of Liability
148 |
149 | In no event and under no legal theory, whether in tort (including negligence),
150 | contract, or otherwise, unless required by applicable law (such as deliberate
151 | and grossly negligent acts) or agreed to in writing, shall any Contributor be
152 | liable to You for damages, including any direct, indirect, special, incidental,
153 | or consequential damages of any character arising as a result of this License or
154 | out of the use or inability to use the Work (including but not limited to
155 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or
156 | any and all other commercial damages or losses), even if such Contributor has
157 | been advised of the possibility of such damages.
158 |
159 | #### 9. Accepting Warranty or Additional Liability
160 |
161 | While redistributing the Work or Derivative Works thereof, You may choose to
162 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or
163 | other liability obligations and/or rights consistent with this License. However,
164 | in accepting such obligations, You may act only on Your own behalf and on Your
165 | sole responsibility, not on behalf of any other Contributor, and only if You
166 | agree to indemnify, defend, and hold each Contributor harmless for any liability
167 | incurred by, or claims asserted against, such Contributor by reason of your
168 | accepting any such warranty or additional liability.
169 |
170 | _END OF TERMS AND CONDITIONS_
171 |
172 | ### APPENDIX: How to apply the Apache License to your work
173 |
174 | To apply the Apache License to your work, attach the following boilerplate
175 | notice, with the fields enclosed by brackets `[]` replaced with your own
176 | identifying information. (Don't include the brackets!) The text should be
177 | enclosed in the appropriate comment syntax for the file format. We also
178 | recommend that a file or class name and description of purpose be included on
179 | the same “printed page” as the copyright notice for easier identification within
180 | third-party archives.
181 |
182 | Copyright 2018 Apache 2.0
183 |
184 | Licensed under the Apache License, Version 2.0 (the "License");
185 | you may not use this file except in compliance with the License.
186 | You may obtain a copy of the License at
187 |
188 | http://www.apache.org/licenses/LICENSE-2.0
189 |
190 | Unless required by applicable law or agreed to in writing, software
191 | distributed under the License is distributed on an "AS IS" BASIS,
192 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
193 | See the License for the specific language governing permissions and
194 | limitations under the License.
195 |
--------------------------------------------------------------------------------
/R/inference-tensorflow.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | #' @keywords internal
4 | #' Taken from https://github.com/tensorflow/tensorflow/issues/9162
5 | entry_stop_gradients <- function(target, mask) {
6 | mask_h <- tf$logical_not(mask)
7 | mask <- tf$cast(mask, dtype = target$dtype)
8 | mask_h <- tf$cast(mask_h, dtype = target$dtype)
9 |
10 | tf$add(tf$stop_gradient(tf$multiply(mask_h, target)), tf$multiply(mask, target))
11 | }
12 |
13 |
14 |
15 | #' cellassign inference in tensorflow, semi-supervised version
16 | #'
17 | #' @import tensorflow
18 | #'
19 | #' @return A list of MLE cell type calls, MLE parameter estimates,
20 | #' and log likelihoods during optimization.
21 | #'
22 | #' @keywords internal
23 | inference_tensorflow <- function(Y,
24 | rho,
25 | s,
26 | X,
27 | G,
28 | C,
29 | N,
30 | P,
31 | B = 10,
32 | shrinkage,
33 | verbose = FALSE,
34 | n_batches = 1,
35 | rel_tol_adam = 1e-4,
36 | rel_tol_em = 1e-4,
37 | max_iter_adam = 1e5,
38 | max_iter_em = 20,
39 | learning_rate = 1e-4,
40 | random_seed = NULL,
41 | min_delta = 2,
42 | dirichlet_concentration = rep(1e-2, C),
43 | threads = 0) {
44 |
45 | tf <- tf$compat$v1
46 | tf$disable_v2_behavior()
47 |
48 | tfp <- reticulate::import('tensorflow_probability')
49 | tfd <- tfp$distributions
50 |
51 |
52 | tf$reset_default_graph()
53 |
54 | # Data placeholders
55 | Y_ <- tf$placeholder(tf$float64, shape = shape(NULL, G), name = "Y_")
56 | X_ <- tf$placeholder(tf$float64, shape = shape(NULL, P), name = "X_")
57 | s_ <- tf$placeholder(tf$float64, shape = shape(NULL), name = "s_")
58 | rho_ <- tf$placeholder(tf$float64, shape = shape(G,C), name = "rho_")
59 |
60 | sample_idx <- tf$placeholder(tf$int32, shape = shape(NULL), name = "sample_idx")
61 |
62 | # Added for splines
63 | B <- as.integer(B)
64 |
65 | basis_means_fixed <- seq(from = min(Y), to = max(Y), length.out = B)
66 | basis_means <- tf$constant(basis_means_fixed, dtype = tf$float64)
67 |
68 | b_init <- 2 * (basis_means_fixed[2] - basis_means_fixed[1])^2
69 |
70 | LOWER_BOUND <- 1e-10
71 |
72 | # Variables
73 |
74 | ## Shrinkage prior on delta
75 | if (shrinkage) {
76 | delta_log_mean <- tf$Variable(0, dtype = tf$float64)
77 | delta_log_variance <- tf$Variable(1, dtype = tf$float64) # May need to bound this or put a prior over this
78 | }
79 |
80 | ## Regular variables
81 | delta_log <- tf$Variable(tf$random_uniform(shape(G,C),
82 | minval = -2,
83 | maxval = 2,
84 | seed = random_seed,
85 | dtype = tf$float64),
86 | dtype = tf$float64,
87 | constraint = function(x) {
88 | tf$clip_by_value(x,
89 | tf$constant(log(min_delta),
90 | dtype = tf$float64),
91 | tf$constant(Inf, dtype = tf$float64))
92 | })
93 |
94 | # beta <- tf$Variable(tf$random_normal(shape(G,P),
95 | # mean = 0,
96 | # stddev = 1,
97 | # seed = random_seed,
98 | # dtype = tf$float64),
99 | # dtype = tf$float64)
100 |
101 | beta_0_init <- scale(colMeans(Y))
102 | beta_init <- cbind(beta_0_init,
103 | matrix(0, nrow = G, ncol = P-1))
104 | beta <- tf$Variable(tf$constant(beta_init, dtype = tf$float64),
105 | dtype = tf$float64)
106 |
107 | theta_logit <- tf$Variable(tf$random_normal(shape(C),
108 | mean = 0,
109 | stddev = 1,
110 | seed = random_seed,
111 | dtype = tf$float64),
112 | dtype = tf$float64)
113 |
114 | ## Spline variables
115 | a <- tf$exp(tf$Variable(tf$zeros(shape = B, dtype = tf$float64)))
116 | b <- tf$exp(tf$constant(rep(-log(b_init), B), dtype = tf$float64))
117 |
118 | # Stop gradient for irrelevant entries of delta_log
119 | delta_log <- entry_stop_gradients(delta_log, tf$cast(rho_, tf$bool))
120 |
121 | # Transformed variables
122 | delta = tf$exp(delta_log)
123 | theta_log = tf$nn$log_softmax(theta_logit)
124 |
125 | # Model likelihood
126 | base_mean <- tf$transpose(tf$einsum('np,gp->gn', X_, beta) +
127 | tf$log(s_))
128 |
129 | base_mean_list <- list()
130 | for(c in seq_len(C)) base_mean_list[[c]] <- base_mean
131 | mu_ngc = tf$add(tf$stack(base_mean_list, 2),
132 | tf$multiply(delta, rho_),
133 | name = "adding_base_mean_to_delta_rho")
134 |
135 | mu_cng = tf$transpose(mu_ngc, shape(2,0,1))
136 |
137 | mu_cngb <- tf$tile(tf$expand_dims(mu_cng, axis = 3L), c(1L, 1L, 1L, B))
138 |
139 | phi_cng <- tf$reduce_sum(a * tf$exp(-b * tf$square(mu_cngb - basis_means)), 3L) +
140 | LOWER_BOUND
141 | phi <- tf$transpose(phi_cng, shape(1,2,0))
142 |
143 | mu_ngc <- tf$transpose(mu_cng, shape(1,2,0))
144 |
145 | mu_ngc <- tf$exp(mu_ngc)
146 |
147 | p = mu_ngc / (mu_ngc + phi)
148 |
149 | nb_pdf <- tfd$NegativeBinomial(probs = p, total_count = phi)
150 |
151 |
152 | Y_tensor_list <- list()
153 | for(c in seq_len(C)) Y_tensor_list[[c]] <- Y_
154 | Y__ = tf$stack(Y_tensor_list, axis = 2)
155 |
156 | y_log_prob_raw <- nb_pdf$log_prob(Y__)
157 | y_log_prob <- tf$transpose(y_log_prob_raw, shape(0,2,1))
158 | y_log_prob_sum <- tf$reduce_sum(y_log_prob, 2L) + theta_log
159 | p_y_on_c_unorm <- tf$transpose(y_log_prob_sum, shape(1,0))
160 |
161 | gamma_fixed = tf$placeholder(dtype = tf$float64, shape = shape(NULL,C))
162 |
163 | Q = -tf$einsum('nc,cn->', gamma_fixed, p_y_on_c_unorm)
164 |
165 | p_y_on_c_norm <- tf$reshape(tf$reduce_logsumexp(p_y_on_c_unorm, 0L), shape(1,-1))
166 |
167 | gamma <- tf$transpose(tf$exp(p_y_on_c_unorm - p_y_on_c_norm))
168 |
169 | ## Priors
170 | if (shrinkage) {
171 | delta_log_prior <- tfd$Normal(loc = delta_log_mean * rho_,
172 | scale = delta_log_variance)
173 | delta_log_prob <- -tf$reduce_sum(delta_log_prior$log_prob(delta_log))
174 | }
175 |
176 | THETA_LOWER_BOUND <- 1e-20
177 |
178 | theta_log_prior <- tfd$Dirichlet(concentration = tf$constant(dirichlet_concentration,
179 | dtype = tf$float64))
180 | theta_log_prob <- -theta_log_prior$log_prob(tf$exp(theta_log) + THETA_LOWER_BOUND)
181 |
182 | ## End priors
183 | Q <- Q + theta_log_prob
184 | if (shrinkage) {
185 | Q <- Q + delta_log_prob
186 | }
187 |
188 |
189 | optimizer = tf$train$AdamOptimizer(learning_rate=learning_rate)
190 | train = optimizer$minimize(Q)
191 |
192 | # Marginal log likelihood for monitoring convergence
193 | L_y = tf$reduce_sum(tf$reduce_logsumexp(p_y_on_c_unorm, 0L))
194 |
195 | L_y <- L_y - theta_log_prob
196 | if (shrinkage) {
197 | L_y <- L_y - delta_log_prob
198 | }
199 |
200 |
201 | # Split the data
202 | splits <- split(sample(seq_len(N), size = N, replace = FALSE), seq_len(n_batches))
203 |
204 | # Start the graph and inference
205 | session_conf <- tf$ConfigProto(intra_op_parallelism_threads = threads,
206 | inter_op_parallelism_threads = threads)
207 | sess <- tf$Session(config = session_conf)
208 | init <- tf$global_variables_initializer()
209 | sess$run(init)
210 |
211 |
212 | fd_full <- dict(Y_ = Y, X_ = X, s_ = s, rho_ = rho)
213 |
214 | log_liks <- ll_old <- sess$run(L_y, feed_dict = fd_full)
215 |
216 | for(i in seq_len(max_iter_em)) {
217 | ll <- 0 # log likelihood for this "epoch"
218 | for(b in seq_len(n_batches)) {
219 |
220 | fd <- dict(Y_ = Y[splits[[b]], ],
221 | X_ = X[splits[[b]], , drop = FALSE],
222 | s_ = s[splits[[b]]],
223 | rho_ = rho)
224 |
225 | g <- sess$run(gamma, feed_dict = fd)
226 |
227 | # M-step
228 | gfd <- dict(Y_ = Y[splits[[b]], ],
229 | X_ = X[splits[[b]], , drop = FALSE],
230 | s_ = s[splits[[b]]],
231 | rho_ = rho,
232 | gamma_fixed = g)
233 |
234 | Q_old <- sess$run(Q, feed_dict = gfd)
235 | Q_diff <- rel_tol_adam + 1
236 | mi = 0
237 |
238 | while(mi < max_iter_adam && Q_diff > rel_tol_adam) {
239 | mi <- mi + 1
240 |
241 | sess$run(train, feed_dict = gfd)
242 |
243 | if(mi %% 20 == 0) {
244 | if (verbose) {
245 | message(paste(mi, sess$run(Q, feed_dict = gfd)))
246 | }
247 | Q_new <- sess$run(Q, feed_dict = gfd)
248 | Q_diff = -(Q_new - Q_old) / abs(Q_old)
249 | Q_old <- Q_new
250 | }
251 | } # End gradient descent
252 |
253 | l_new = sess$run(L_y, feed_dict = gfd) # Log likelihood for this "epoch"
254 | ll <- ll + l_new
255 | }
256 |
257 | ll_diff <- (ll - ll_old) / abs(ll_old)
258 |
259 | if(verbose) {
260 | message(sprintf("%i\tL old: %f; L new: %f; Difference (%%): %f",
261 | mi, ll_old, ll, ll_diff))
262 | }
263 | ll_old <- ll
264 | log_liks <- c(log_liks, ll)
265 |
266 | if (ll_diff < rel_tol_em) {
267 | break
268 | }
269 | }
270 |
271 | # Finished EM - peel off final values
272 | variable_list <- list(delta, beta, phi, gamma, mu_ngc, a, tf$exp(theta_log))
273 | variable_names <- c("delta", "beta", "phi", "gamma", "mu", "a", "theta")
274 |
275 |
276 | if (shrinkage) {
277 | variable_list <- c(variable_list, list(delta_log_mean, delta_log_variance))
278 | variable_names <- c(variable_names, "ld_mean", "ld_var")
279 | }
280 |
281 | mle_params <- sess$run(variable_list, feed_dict = fd_full)
282 | names(mle_params) <- variable_names
283 | sess$close()
284 |
285 | mle_params$delta[rho == 0] <- 0
286 |
287 | if(is.null(colnames(rho))) {
288 | colnames(rho) <- paste0("cell_type_", seq_len(ncol(rho)))
289 | }
290 | colnames(mle_params$gamma) <- colnames(rho)
291 | rownames(mle_params$delta) <- rownames(rho)
292 | colnames(mle_params$delta) <- colnames(rho)
293 | rownames(mle_params$beta) <- rownames(rho)
294 | names(mle_params$theta) <- colnames(rho)
295 |
296 |
297 | cell_type <- get_mle_cell_type(mle_params$gamma)
298 |
299 | rlist <- list(
300 | cell_type = cell_type,
301 | mle_params = mle_params,
302 | lls=log_liks
303 | )
304 |
305 | return(rlist)
306 |
307 | }
308 |
309 |
--------------------------------------------------------------------------------
/docs/docsearch.css:
--------------------------------------------------------------------------------
1 | /* Docsearch -------------------------------------------------------------- */
2 | /*
3 | Source: https://github.com/algolia/docsearch/
4 | License: MIT
5 | */
6 |
7 | .algolia-autocomplete {
8 | display: block;
9 | -webkit-box-flex: 1;
10 | -ms-flex: 1;
11 | flex: 1
12 | }
13 |
14 | .algolia-autocomplete .ds-dropdown-menu {
15 | width: 100%;
16 | min-width: none;
17 | max-width: none;
18 | padding: .75rem 0;
19 | background-color: #fff;
20 | background-clip: padding-box;
21 | border: 1px solid rgba(0, 0, 0, .1);
22 | box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175);
23 | }
24 |
25 | @media (min-width:768px) {
26 | .algolia-autocomplete .ds-dropdown-menu {
27 | width: 175%
28 | }
29 | }
30 |
31 | .algolia-autocomplete .ds-dropdown-menu::before {
32 | display: none
33 | }
34 |
35 | .algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] {
36 | padding: 0;
37 | background-color: rgb(255,255,255);
38 | border: 0;
39 | max-height: 80vh;
40 | }
41 |
42 | .algolia-autocomplete .ds-dropdown-menu .ds-suggestions {
43 | margin-top: 0
44 | }
45 |
46 | .algolia-autocomplete .algolia-docsearch-suggestion {
47 | padding: 0;
48 | overflow: visible
49 | }
50 |
51 | .algolia-autocomplete .algolia-docsearch-suggestion--category-header {
52 | padding: .125rem 1rem;
53 | margin-top: 0;
54 | font-size: 1.3em;
55 | font-weight: 500;
56 | color: #00008B;
57 | border-bottom: 0
58 | }
59 |
60 | .algolia-autocomplete .algolia-docsearch-suggestion--wrapper {
61 | float: none;
62 | padding-top: 0
63 | }
64 |
65 | .algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column {
66 | float: none;
67 | width: auto;
68 | padding: 0;
69 | text-align: left
70 | }
71 |
72 | .algolia-autocomplete .algolia-docsearch-suggestion--content {
73 | float: none;
74 | width: auto;
75 | padding: 0
76 | }
77 |
78 | .algolia-autocomplete .algolia-docsearch-suggestion--content::before {
79 | display: none
80 | }
81 |
82 | .algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header {
83 | padding-top: .75rem;
84 | margin-top: .75rem;
85 | border-top: 1px solid rgba(0, 0, 0, .1)
86 | }
87 |
88 | .algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column {
89 | display: block;
90 | padding: .1rem 1rem;
91 | margin-bottom: 0.1;
92 | font-size: 1.0em;
93 | font-weight: 400
94 | /* display: none */
95 | }
96 |
97 | .algolia-autocomplete .algolia-docsearch-suggestion--title {
98 | display: block;
99 | padding: .25rem 1rem;
100 | margin-bottom: 0;
101 | font-size: 0.9em;
102 | font-weight: 400
103 | }
104 |
105 | .algolia-autocomplete .algolia-docsearch-suggestion--text {
106 | padding: 0 1rem .5rem;
107 | margin-top: -.25rem;
108 | font-size: 0.8em;
109 | font-weight: 400;
110 | line-height: 1.25
111 | }
112 |
113 | .algolia-autocomplete .algolia-docsearch-footer {
114 | width: 110px;
115 | height: 20px;
116 | z-index: 3;
117 | margin-top: 10.66667px;
118 | float: right;
119 | font-size: 0;
120 | line-height: 0;
121 | }
122 |
123 | .algolia-autocomplete .algolia-docsearch-footer--logo {
124 | background-image: url("data:image/svg+xml;utf8,");
125 | background-repeat: no-repeat;
126 | background-position: 50%;
127 | background-size: 100%;
128 | overflow: hidden;
129 | text-indent: -9000px;
130 | width: 100%;
131 | height: 100%;
132 | display: block;
133 | transform: translate(-8px);
134 | }
135 |
136 | .algolia-autocomplete .algolia-docsearch-suggestion--highlight {
137 | color: #FF8C00;
138 | background: rgba(232, 189, 54, 0.1)
139 | }
140 |
141 |
142 | .algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight {
143 | box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5)
144 | }
145 |
146 | .algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content {
147 | background-color: rgba(192, 192, 192, .15)
148 | }
149 |
--------------------------------------------------------------------------------