├── data ├── example_sce.rda ├── holik_data.rda ├── example_marker_mat.rda ├── example_TME_markers.rda └── example_cellassign_fit.rda ├── docs ├── pkgdown.yml ├── articles │ ├── cellassign_overview.png │ ├── introduction-to-cellassign_files │ │ └── figure-html │ │ │ └── unnamed-chunk-16-1.png │ ├── constructing-markers-from-purified-data_files │ │ └── figure-html │ │ │ └── unnamed-chunk-17-1.png │ └── index.html ├── link.svg ├── docsearch.js ├── pkgdown.js ├── 404.html ├── authors.html ├── pkgdown.css ├── reference │ ├── dot-onLoad.html │ ├── initialize_X.html │ ├── get_mle_cell_type.html │ ├── extract_expression_matrix.html │ ├── holik_data.html │ ├── example_TME_markers.html │ ├── example_sce.html │ ├── example_cellassign_fit.html │ ├── example_marker_mat.html │ ├── print.cellassign.html │ ├── inference_tensorflow.html │ ├── index.html │ ├── simulate_cellassign.html │ └── marker_list_to_mat.html └── docsearch.css ├── tests ├── testthat.R └── testthat │ └── test_cellassign.R ├── inst └── cellassign_schematic.png ├── .Rbuildignore ├── vignettes ├── cellassign_overview.png ├── constructing-markers-from-purified-data.Rmd └── introduction-to-cellassign.Rmd ├── .gitignore ├── man ├── dot-onLoad.Rd ├── initialize_X.Rd ├── get_mle_cell_type.Rd ├── holik_data.Rd ├── extract_expression_matrix.Rd ├── example_TME_markers.Rd ├── example_sce.Rd ├── example_marker_mat.Rd ├── example_cellassign_fit.Rd ├── print.cellassign.Rd ├── mleparams.Rd ├── cellprobs.Rd ├── celltypes.Rd ├── inference_tensorflow.Rd ├── marker_list_to_mat.Rd ├── simulate_cellassign.Rd └── cellassign.Rd ├── NAMESPACE ├── .travis.yml ├── DESCRIPTION ├── CODE_OF_CONDUCT.md ├── R ├── simulate.R ├── utils.R └── inference-tensorflow.R ├── README.md └── LICENSE.md /data/example_sce.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/data/example_sce.rda -------------------------------------------------------------------------------- /data/holik_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/data/holik_data.rda -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.3.1 2 | pkgdown: 1.4.1 3 | pkgdown_sha: ~ 4 | articles: [] 5 | 6 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(cellassign) 3 | 4 | test_check("cellassign") 5 | -------------------------------------------------------------------------------- /data/example_marker_mat.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/data/example_marker_mat.rda -------------------------------------------------------------------------------- /data/example_TME_markers.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/data/example_TME_markers.rda -------------------------------------------------------------------------------- /inst/cellassign_schematic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/inst/cellassign_schematic.png -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^Meta$ 2 | ^doc$ 3 | ^cellassign\.Rproj$ 4 | ^\.Rproj\.user$ 5 | ^LICENSE\.md$ 6 | ^CODE_OF_CONDUCT\.md$ 7 | -------------------------------------------------------------------------------- /data/example_cellassign_fit.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/data/example_cellassign_fit.rda -------------------------------------------------------------------------------- /vignettes/cellassign_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/vignettes/cellassign_overview.png -------------------------------------------------------------------------------- /docs/articles/cellassign_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/docs/articles/cellassign_overview.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Meta 2 | doc 3 | .Rproj.user 4 | .Rhistory 5 | .RData 6 | Rmd/ 7 | inst/doc 8 | data/hodgkin_500.rds 9 | inst/*.ai 10 | inst/*.pdf 11 | -------------------------------------------------------------------------------- /docs/articles/introduction-to-cellassign_files/figure-html/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/docs/articles/introduction-to-cellassign_files/figure-html/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /docs/articles/constructing-markers-from-purified-data_files/figure-html/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Irrationone/cellassign/HEAD/docs/articles/constructing-markers-from-purified-data_files/figure-html/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /man/dot-onLoad.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{.onLoad} 4 | \alias{.onLoad} 5 | \title{Check for tensorflow} 6 | \usage{ 7 | .onLoad(libname, pkgname) 8 | } 9 | \value{ 10 | Installs tensorflow if not already installed 11 | } 12 | \description{ 13 | Check for tensorflow 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/initialize_X.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{initialize_X} 4 | \alias{initialize_X} 5 | \title{Create X matrix} 6 | \usage{ 7 | initialize_X(X, N, verbose = FALSE) 8 | } 9 | \value{ 10 | A cleaned covariate matrix given the input provided by the user 11 | } 12 | \description{ 13 | Create X matrix 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(cellprobs,cellassign) 4 | S3method(celltypes,cellassign) 5 | S3method(mleparams,cellassign) 6 | S3method(print,cellassign) 7 | export(cellassign) 8 | export(cellprobs) 9 | export(celltypes) 10 | export(marker_list_to_mat) 11 | export(mleparams) 12 | import(tensorflow) 13 | importFrom(SummarizedExperiment,assays) 14 | importFrom(methods,is) 15 | importFrom(stats,rnbinom) 16 | importFrom(stats,var) 17 | -------------------------------------------------------------------------------- /man/get_mle_cell_type.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{get_mle_cell_type} 4 | \alias{get_mle_cell_type} 5 | \title{Get MLE estimates of type of each cell} 6 | \usage{ 7 | get_mle_cell_type(gamma) 8 | } 9 | \value{ 10 | A vector of MLE cell types, where the names are 11 | taken from the column names of the input matrix 12 | } 13 | \description{ 14 | Get MLE estimates of type of each cell 15 | } 16 | \keyword{internal} 17 | -------------------------------------------------------------------------------- /man/holik_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cellassign.R 3 | \docType{data} 4 | \name{holik_data} 5 | \alias{holik_data} 6 | \title{Example bulk RNA-seq data} 7 | \format{An object of class \code{list} of length 2.} 8 | \usage{ 9 | holik_data 10 | } 11 | \description{ 12 | An example bulk RNA-seq dataset from Holik et al. Nucleic Acids Research 2017 to 13 | demonstrate deriving marker genes 14 | } 15 | \examples{ 16 | data(holik_data) 17 | } 18 | \keyword{datasets} 19 | -------------------------------------------------------------------------------- /man/extract_expression_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{extract_expression_matrix} 4 | \alias{extract_expression_matrix} 5 | \title{Extract expression matrix from expression object} 6 | \usage{ 7 | extract_expression_matrix(exprs_obj, sce_assay = "counts") 8 | } 9 | \value{ 10 | The cleaned expression matrix (of counts) from whatever input to \code{cellassign} 11 | } 12 | \description{ 13 | Extract expression matrix from expression object 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/example_TME_markers.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cellassign.R 3 | \docType{data} 4 | \name{example_TME_markers} 5 | \alias{example_TME_markers} 6 | \title{Example tumour microevironment markers} 7 | \format{An object of class \code{list} of length 2.} 8 | \usage{ 9 | example_TME_markers 10 | } 11 | \description{ 12 | A set of example marker genes for commonly profiling the 13 | human tumour mircoenvironment 14 | } 15 | \examples{ 16 | data(example_TME_markers) 17 | } 18 | \keyword{datasets} 19 | -------------------------------------------------------------------------------- /man/example_sce.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cellassign.R 3 | \docType{data} 4 | \name{example_sce} 5 | \alias{example_sce} 6 | \title{Example SingleCellExperiment} 7 | \format{An object of class \code{SingleCellExperiment} with 200 rows and 500 columns.} 8 | \usage{ 9 | example_sce 10 | } 11 | \description{ 12 | An example \code{SingleCellExperiment} for 10 marker genes and 500 cells. 13 | } 14 | \examples{ 15 | data(example_sce) 16 | } 17 | \seealso{ 18 | example_cellassign_fit 19 | } 20 | \keyword{datasets} 21 | -------------------------------------------------------------------------------- /man/example_marker_mat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cellassign.R 3 | \docType{data} 4 | \name{example_marker_mat} 5 | \alias{example_marker_mat} 6 | \title{Example cell marker matrix} 7 | \format{An object of class \code{matrix} with 10 rows and 2 columns.} 8 | \usage{ 9 | example_marker_mat 10 | } 11 | \description{ 12 | An example matrix for 10 genes and 2 cell types showing the membership 13 | of marker genes to cell types 14 | } 15 | \examples{ 16 | data(example_marker_mat) 17 | } 18 | \seealso{ 19 | example_cellassign_fit 20 | } 21 | \keyword{datasets} 22 | -------------------------------------------------------------------------------- /man/example_cellassign_fit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cellassign.R 3 | \docType{data} 4 | \name{example_cellassign_fit} 5 | \alias{example_cellassign_fit} 6 | \title{Example cellassign fit} 7 | \format{An object of class \code{cellassign} of length 3.} 8 | \usage{ 9 | example_cellassign_fit 10 | } 11 | \description{ 12 | An example fit of calling \code{cellassign} on both 13 | \code{example_marker_mat} and \code{example_sce} 14 | } 15 | \examples{ 16 | data(example_cellassign_fit) 17 | } 18 | \seealso{ 19 | example_cellassign_fit 20 | } 21 | \keyword{datasets} 22 | -------------------------------------------------------------------------------- /man/print.cellassign.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cellassign.R 3 | \name{print.cellassign} 4 | \alias{print.cellassign} 5 | \title{Print a \code{cellassign} fit} 6 | \usage{ 7 | \method{print}{cellassign}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An object of class \code{cellassign}} 11 | 12 | \item{...}{Additional arguments (unused)} 13 | } 14 | \value{ 15 | Prints a structured representation of the \code{cellassign} 16 | } 17 | \description{ 18 | Print a \code{cellassign} fit 19 | } 20 | \examples{ 21 | data(example_cellassign_fit) 22 | print(example_cellassign_fit) 23 | 24 | } 25 | -------------------------------------------------------------------------------- /man/mleparams.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cellassign.R 3 | \name{mleparams} 4 | \alias{mleparams} 5 | \alias{mleparams.cellassign} 6 | \title{Get the MLE parameter list of a \code{cellassign} fit} 7 | \usage{ 8 | mleparams(x) 9 | 10 | \method{mleparams}{cellassign}(x) 11 | } 12 | \arguments{ 13 | \item{x}{An object of class \code{cellassign} returned 14 | by a call to \code{cellassign(...)}} 15 | } 16 | \value{ 17 | A list of MLE parameter estimates from cellassign 18 | } 19 | \description{ 20 | Get the MLE parameter list of a \code{cellassign} fit 21 | } 22 | \examples{ 23 | data(example_cellassign_fit) 24 | mleparams(example_cellassign_fit) 25 | } 26 | -------------------------------------------------------------------------------- /man/cellprobs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cellassign.R 3 | \name{cellprobs} 4 | \alias{cellprobs} 5 | \alias{cellprobs.cellassign} 6 | \title{Get the cell assignment probabilities of a \code{cellassign} fit} 7 | \usage{ 8 | cellprobs(x) 9 | 10 | \method{cellprobs}{cellassign}(x) 11 | } 12 | \arguments{ 13 | \item{x}{An object of class \code{cellassign} 14 | returned by a call to \code{cellassign(...)}} 15 | } 16 | \value{ 17 | A cell by celltype matrix with assignment probabilities 18 | } 19 | \description{ 20 | Get the MLE cell type assignment probabilities for each cell 21 | } 22 | \examples{ 23 | data(example_cellassign_fit) 24 | cellprobs(example_cellassign_fit) 25 | } 26 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | language: r 3 | warnings_are_errors: false 4 | dist: xenial 5 | r: bioc-devel 6 | 7 | branches: 8 | only: 9 | - bioc 10 | - master 11 | - basilisk 12 | 13 | r_packages: 14 | - devtools 15 | - rmarkdown 16 | 17 | biocpackages: 18 | - SingleCellExperiment 19 | 20 | 21 | cache: 22 | packages: true 23 | directories: 24 | - $HOME/.cache/pip 25 | 26 | addons: 27 | apt: 28 | sources: 29 | ubuntu-toolchain-r-test 30 | packages: 31 | - imagemagick 32 | - libmagick++-dev 33 | - wget 34 | - libatlas3-base 35 | - libatlas-dev 36 | - python-joblib 37 | - python-dev 38 | - libv8-dev 39 | 40 | before_install: 41 | - R -e 'Sys.setenv(PIP_QUIET=1); install.packages("tensorflow"); tensorflow::install_tensorflow(extra_packages="tensorflow-probability", version = "2.1.0")' 42 | - R -e 'tensorflow::tf_config()' 43 | 44 | -------------------------------------------------------------------------------- /man/celltypes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cellassign.R 3 | \name{celltypes} 4 | \alias{celltypes} 5 | \alias{celltypes.cellassign} 6 | \title{Get the cell type assignments of a \code{cellassign} fit} 7 | \usage{ 8 | celltypes(x, assign_prob = 0.95) 9 | 10 | \method{celltypes}{cellassign}(x, assign_prob = 0.95) 11 | } 12 | \arguments{ 13 | \item{x}{An object of class \code{cellassign} returned by a call to \code{cellassign(...)}} 14 | 15 | \item{assign_prob}{The probability threshold above which a cell is assigned to a given cell type, 16 | otherwise "unassigned"} 17 | } 18 | \value{ 19 | A character vector with the MLE cell type for each cell, if the probability 20 | is greater than \code{assign_prob}. 21 | } 22 | \description{ 23 | Get the MLE cell type estimates for each cell 24 | } 25 | \examples{ 26 | data(example_cellassign_fit) 27 | celltypes(example_cellassign_fit) 28 | } 29 | -------------------------------------------------------------------------------- /man/inference_tensorflow.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/inference-tensorflow.R 3 | \name{inference_tensorflow} 4 | \alias{inference_tensorflow} 5 | \title{cellassign inference in tensorflow, semi-supervised version} 6 | \usage{ 7 | inference_tensorflow( 8 | Y, 9 | rho, 10 | s, 11 | X, 12 | G, 13 | C, 14 | N, 15 | P, 16 | B = 10, 17 | shrinkage, 18 | verbose = FALSE, 19 | n_batches = 1, 20 | rel_tol_adam = 1e-04, 21 | rel_tol_em = 1e-04, 22 | max_iter_adam = 1e+05, 23 | max_iter_em = 20, 24 | learning_rate = 1e-04, 25 | random_seed = NULL, 26 | min_delta = 2, 27 | dirichlet_concentration = rep(0.01, C), 28 | threads = 0 29 | ) 30 | } 31 | \value{ 32 | A list of MLE cell type calls, MLE parameter estimates, 33 | and log likelihoods during optimization. 34 | } 35 | \description{ 36 | cellassign inference in tensorflow, semi-supervised version 37 | } 38 | \keyword{internal} 39 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: cellassign 2 | Version: 0.99.21 3 | Title: Automated, probabilistic assignment of scRNA-seq to cell types 4 | Description: CellAssign assigns cells measured with scRNA-seq to both known and de novo cell types based on the declaring certain genes as markers for different cell types. 5 | Authors@R: c( 6 | person("Allen", "Zhang", email = "alzhang@bccrc.ca", role = c("aut")), 7 | person("Kieran", "Campbell", email = "kieranrcampbell@gmail.com", role = c("aut", "cre")) 8 | ) 9 | License: Apache License (>= 2.0) 10 | Encoding: UTF-8 11 | Depends: R (>= 3.6) 12 | Imports: 13 | methods, 14 | stats, 15 | tensorflow, 16 | SummarizedExperiment, 17 | scran 18 | Suggests: 19 | knitr, 20 | SingleCellExperiment, 21 | rmarkdown, 22 | BiocStyle, 23 | dplyr, 24 | pheatmap, 25 | testthat, 26 | limma, 27 | org.Hs.eg.db, 28 | edgeR, 29 | matrixStats, 30 | plyr, 31 | magrittr, 32 | reticulate, 33 | magick 34 | biocViews: 35 | Software, 36 | Transcriptomics, 37 | GeneExpression, 38 | RNASeq, 39 | SingleCell 40 | LazyData: true 41 | ByteCompile: true 42 | Roxygen: list(markdown = TRUE) 43 | RoxygenNote: 7.0.2 44 | VignetteBuilder: knitr 45 | BugReports: https://github.com/irrationone/cellassign/issues 46 | -------------------------------------------------------------------------------- /man/marker_list_to_mat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{marker_list_to_mat} 4 | \alias{marker_list_to_mat} 5 | \title{Convert a list of marker genes to a binary matrix} 6 | \usage{ 7 | marker_list_to_mat(marker_list, include_other = TRUE) 8 | } 9 | \arguments{ 10 | \item{marker_list}{A list where each entry is named by a cell type and 11 | contains a character vector of gene names belonging to that cell type} 12 | 13 | \item{include_other}{If \code{TRUE} adds a column of zeros for cells that do not 14 | exhibit high expression of any marker gene to be binned into} 15 | } 16 | \value{ 17 | A cell type by gene binary matrix with 1 if a gene is a marker for 18 | a cell type and 0 otherwise 19 | } 20 | \description{ 21 | Given a list of cell types and marker genes, convert to a binary 22 | cell type by gene matrix required by cellassign. 23 | } 24 | \details{ 25 | This function takes a list of marker genes and converts it to a binary 26 | gene by cell type matrix. The input list should be the same 27 | length as the number of cell types with names corresponding to cell types. 28 | Each element of the list should be a character vector of the genes corresponding 29 | to that cell type. There is no requirement for mutually-exclusive marker genes. 30 | } 31 | \examples{ 32 | marker_list <- list( 33 | `cell_type_1` = c("geneA", "geneB"), 34 | `cell_type_2` = c("geneB", "geneC") 35 | ) 36 | marker_list_to_mat(marker_list) 37 | 38 | } 39 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | As contributors and maintainers of this project, we pledge to respect all people who 4 | contribute through reporting issues, posting feature requests, updating documentation, 5 | submitting pull requests or patches, and other activities. 6 | 7 | We are committed to making participation in this project a harassment-free experience for 8 | everyone, regardless of level of experience, gender, gender identity and expression, 9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. 10 | 11 | Examples of unacceptable behavior by participants include the use of sexual language or 12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment, 13 | insults, or other unprofessional conduct. 14 | 15 | Project maintainers have the right and responsibility to remove, edit, or reject comments, 16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 18 | from the project team. 19 | 20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 21 | opening an issue or contacting one or more of the project maintainers. 22 | 23 | This Code of Conduct is adapted from the Contributor Covenant 24 | (https://www.contributor-covenant.org), version 1.0.0, available at 25 | https://contributor-covenant.org/version/1/0/0/. 26 | -------------------------------------------------------------------------------- /man/simulate_cellassign.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulate.R 3 | \name{simulate_cellassign} 4 | \alias{simulate_cellassign} 5 | \title{Simulate from the cellassign model} 6 | \usage{ 7 | simulate_cellassign( 8 | rho, 9 | s, 10 | pi, 11 | delta, 12 | B = 20, 13 | a, 14 | beta, 15 | X = NULL, 16 | min_Y = 0, 17 | max_Y = 1000 18 | ) 19 | } 20 | \arguments{ 21 | \item{rho}{A gene by cell type binary matrix relating markers to cell types} 22 | 23 | \item{s}{A vector of cell-specific size factors} 24 | 25 | \item{pi}{An ordinal vector relating each cell to its true marker type assignment} 26 | 27 | \item{delta}{Gene by cell type matrix delta (all entries with corresponding zeros 28 | in rho will be ignored)} 29 | 30 | \item{B}{Granularity of spline-based fitting of dispersions} 31 | 32 | \item{a}{Alpha parameters for spline inference of dispersions} 33 | 34 | \item{beta}{A gene by covariate vector of coefficients - the first column 35 | should correspond to the intercept (baseline expression) values} 36 | 37 | \item{X}{A cell by covariate matrix of covariates - the intercept column will 38 | always be added.} 39 | 40 | \item{b}{Beta parameters for spline inference of dispersions} 41 | } 42 | \value{ 43 | An N by G matrix of simulated counts 44 | } 45 | \description{ 46 | Simulate RNA-seq counts from the cell-assign model 47 | } 48 | \details{ 49 | The number of genes, cells, and cell types is automatically 50 | inferred from the dimensions of rho (gene by cell-type) and 51 | s (vector of length number of cells). The specification of X 52 | is optional - a column of ones will always be added as an intercept. 53 | } 54 | \keyword{internal} 55 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /tests/testthat/test_cellassign.R: -------------------------------------------------------------------------------- 1 | context("Basic operations") 2 | 3 | test_that("cellassign(...) returns a valid object", { 4 | library(SummarizedExperiment) 5 | data(example_sce) 6 | data(example_marker_mat) 7 | N <- ncol(example_sce) 8 | G <- nrow(example_marker_mat) 9 | C <- ncol(example_marker_mat) 10 | 11 | fit <- cellassign(example_sce[rownames(example_marker_mat),], 12 | example_marker_mat, 13 | s = sizeFactors(example_sce), 14 | max_iter_adam = 2, 15 | max_iter_em = 2) 16 | 17 | expect_is(fit, "cellassign") 18 | 19 | cell_types <- fit$cell_type 20 | 21 | expect_equal(length(cell_types), N) 22 | 23 | cell_type_names <- sort(unique(cell_types)) 24 | 25 | expect_equal(cell_type_names, sort(colnames(example_marker_mat))) 26 | 27 | print(dim(fit$mle_params$gamma)) 28 | 29 | expect_equal(C, ncol(fit$mle_params$gamma)) 30 | 31 | expect_equal(N, nrow(fit$mle_params$gamma)) 32 | 33 | }) 34 | 35 | test_that("cellassign(...) returns a valid SingleCellExperiment", { 36 | library(SummarizedExperiment) 37 | data(example_sce) 38 | data(example_marker_mat) 39 | N <- ncol(example_sce) 40 | G <- nrow(example_marker_mat) 41 | C <- ncol(example_marker_mat) 42 | 43 | sce <- cellassign(example_sce[rownames(example_marker_mat),], 44 | example_marker_mat, 45 | s = sizeFactors(example_sce), 46 | max_iter_adam = 2, 47 | max_iter_em = 2, 48 | return_SCE = TRUE) 49 | 50 | expect_is(sce, "SingleCellExperiment") 51 | 52 | expect_true("cellassign_celltype" %in% names(colData(sce))) 53 | expect_true("cellassign" %in% names(sce@metadata)) 54 | 55 | }) 56 | 57 | 58 | test_that("marker_gene_list() works as required", { 59 | 60 | data(example_sce) 61 | data(example_marker_mat) 62 | 63 | marker_gene_list <- list( 64 | Group1 = c("Gene1", "Gene3", "Gene4", "Gene5", "Gene10"), 65 | Group2 = c("Gene2", "Gene6", "Gene7", "Gene8", "Gene9") 66 | ) 67 | 68 | mat <- marker_list_to_mat(marker_gene_list, include_other = FALSE) 69 | 70 | expect_equal(nrow(mat), 10) 71 | 72 | expect_equal(ncol(mat), 2) 73 | 74 | expect_equal(length(setdiff(unlist(marker_gene_list), rownames(mat))), 0) 75 | 76 | expect_equal(sum(mat), length(unique(unlist(marker_gene_list)))) 77 | 78 | fit <- cellassign(example_sce[rownames(mat),], 79 | marker_gene_list, 80 | s = sizeFactors(example_sce), 81 | max_iter_adam = 2, 82 | max_iter_em = 2) 83 | 84 | }) 85 | -------------------------------------------------------------------------------- /R/simulate.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' Simulate from the cellassign model 4 | #' 5 | #' Simulate RNA-seq counts from the cell-assign model 6 | #' 7 | #' The number of genes, cells, and cell types is automatically 8 | #' inferred from the dimensions of rho (gene by cell-type) and 9 | #' s (vector of length number of cells). The specification of X 10 | #' is optional - a column of ones will always be added as an intercept. 11 | #' 12 | #' @param rho A gene by cell type binary matrix relating markers to cell types 13 | #' @param s A vector of cell-specific size factors 14 | #' @param pi An ordinal vector relating each cell to its true marker type assignment 15 | #' @param delta Gene by cell type matrix delta (all entries with corresponding zeros 16 | #' in rho will be ignored) 17 | #' @param B Granularity of spline-based fitting of dispersions 18 | #' @param a Alpha parameters for spline inference of dispersions 19 | #' @param b Beta parameters for spline inference of dispersions 20 | #' @param beta A gene by covariate vector of coefficients - the first column 21 | #' should correspond to the intercept (baseline expression) values 22 | #' @param X A cell by covariate matrix of covariates - the intercept column will 23 | #' always be added. 24 | #' 25 | #' @return An N by G matrix of simulated counts 26 | #' 27 | #' @importFrom stats rnbinom 28 | #' 29 | #' @keywords internal 30 | simulate_cellassign <- function(rho, 31 | s, 32 | pi, 33 | delta, 34 | B = 20, 35 | a, 36 | beta, 37 | X = NULL, 38 | min_Y = 0, 39 | max_Y = 1000) { 40 | 41 | C <- ncol(rho) 42 | N <- length(s) 43 | G <- nrow(rho) 44 | P <- ncol(beta) 45 | B <- as.integer(B) 46 | 47 | stopifnot(length(pi) == N) 48 | stopifnot(nrow(beta) == G) 49 | stopifnot(ncol(delta) == C) 50 | stopifnot(nrow(delta) == G) 51 | 52 | X <- initialize_X(X, N) 53 | 54 | basis_means <- seq(from = min_Y, to = max_Y, length.out = B) 55 | b_init <- 2 * (basis_means[2] - basis_means[1])^2 56 | b <- exp(rep(-log(b_init), B)) 57 | LOWER_BOUND <- 1e-10 58 | 59 | stopifnot(ncol(X) == P) 60 | 61 | mean_mat <- exp(log(s) + X %*% t(beta) + t((rho * delta)[,pi])) 62 | 63 | mean_mat_tiled <- replicate(B, mean_mat) 64 | 65 | phi <- apply(a * exp(sweep((sweep(mean_mat_tiled, 3, basis_means))^2, 3, -b, '*')), c(1:2), sum) + LOWER_BOUND 66 | 67 | counts <- sapply(seq_len(G), function(g) { 68 | rnbinom(N, mu = mean_mat[,g], size = phi[g,]) 69 | }) 70 | 71 | counts 72 | } 73 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('.navbar-fixed-top').headroom(); 6 | 7 | $('body').css('padding-top', $('.navbar').height() + 10); 8 | $(window).resize(function(){ 9 | $('body').css('padding-top', $('.navbar').height() + 10); 10 | }); 11 | 12 | $('body').scrollspy({ 13 | target: '#sidebar', 14 | offset: 60 15 | }); 16 | 17 | $('[data-toggle="tooltip"]').tooltip(); 18 | 19 | var cur_path = paths(location.pathname); 20 | var links = $("#navbar ul li a"); 21 | var max_length = -1; 22 | var pos = -1; 23 | for (var i = 0; i < links.length; i++) { 24 | if (links[i].getAttribute("href") === "#") 25 | continue; 26 | // Ignore external links 27 | if (links[i].host !== location.host) 28 | continue; 29 | 30 | var nav_path = paths(links[i].pathname); 31 | 32 | var length = prefix_length(nav_path, cur_path); 33 | if (length > max_length) { 34 | max_length = length; 35 | pos = i; 36 | } 37 | } 38 | 39 | // Add class to parent
  • , and enclosing
  • if in dropdown 40 | if (pos >= 0) { 41 | var menu_anchor = $(links[pos]); 42 | menu_anchor.parent().addClass("active"); 43 | menu_anchor.closest("li.dropdown").addClass("active"); 44 | } 45 | }); 46 | 47 | function paths(pathname) { 48 | var pieces = pathname.split("/"); 49 | pieces.shift(); // always starts with / 50 | 51 | var end = pieces[pieces.length - 1]; 52 | if (end === "index.html" || end === "") 53 | pieces.pop(); 54 | return(pieces); 55 | } 56 | 57 | // Returns -1 if not found 58 | function prefix_length(needle, haystack) { 59 | if (needle.length > haystack.length) 60 | return(-1); 61 | 62 | // Special case for length-0 haystack, since for loop won't run 63 | if (haystack.length === 0) { 64 | return(needle.length === 0 ? 0 : -1); 65 | } 66 | 67 | for (var i = 0; i < haystack.length; i++) { 68 | if (needle[i] != haystack[i]) 69 | return(i); 70 | } 71 | 72 | return(haystack.length); 73 | } 74 | 75 | /* Clipboard --------------------------*/ 76 | 77 | function changeTooltipMessage(element, msg) { 78 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 79 | element.setAttribute('data-original-title', msg); 80 | $(element).tooltip('show'); 81 | element.setAttribute('data-original-title', tooltipOriginalTitle); 82 | } 83 | 84 | if(ClipboardJS.isSupported()) { 85 | $(document).ready(function() { 86 | var copyButton = ""; 87 | 88 | $(".examples, div.sourceCode").addClass("hasCopyButton"); 89 | 90 | // Insert copy buttons: 91 | $(copyButton).prependTo(".hasCopyButton"); 92 | 93 | // Initialize tooltips: 94 | $('.btn-copy-ex').tooltip({container: 'body'}); 95 | 96 | // Initialize clipboard: 97 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { 98 | text: function(trigger) { 99 | return trigger.parentNode.textContent; 100 | } 101 | }); 102 | 103 | clipboardBtnCopies.on('success', function(e) { 104 | changeTooltipMessage(e.trigger, 'Copied!'); 105 | e.clearSelection(); 106 | }); 107 | 108 | clipboardBtnCopies.on('error', function() { 109 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 110 | }); 111 | }); 112 | } 113 | })(window.jQuery || window.$) 114 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cellassign 2 | 3 | [![Build Status](https://travis-ci.com/Irrationone/cellassign.svg?token=HqeTkKNZ9uXDwGpFxagC&branch=master)](https://travis-ci.com/Irrationone/cellassign) [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/recipes/r-cellassign/README.html) 4 | 5 | 6 | `cellassign` automatically assigns single-cell RNA-seq data to known cell types across thousands of cells accounting for patient and batch specific effects. Information about *a priori* known markers cell types is provided as input to the model in the form of a (binary) marker gene by cell-type matrix. `cellassign` then probabilistically assigns each cell to a cell type, removing subjective biases from typical unsupervised clustering workflows. 7 | 8 |
    9 | 10 |
    11 | 12 | # Getting started 13 | 14 | ## Installation 15 | 16 | 17 | ### Installing from GitHub 18 | 19 | `cellassign` is built using Google's Tensorflow, and as such requires installation of the R package `tensorflow`: 20 | 21 | ``` r 22 | install.packages("tensorflow") 23 | tensorflow::install_tensorflow(extra_packages='tensorflow-probability', version = "2.1.0") 24 | ``` 25 | 26 | Please ensure this installs version 2 of tensorflow. You can check this by calling 27 | 28 | ```r 29 | tensorflow::tf_config() 30 | ``` 31 | 32 | ``` 33 | TensorFlow v2.1.0 (/usr/local/lib/python3.7/site-packages/tensorflow) 34 | ``` 35 | 36 | `cellassign` can then be installed from github: 37 | 38 | ``` r 39 | install.packages("devtools") # If not already installed 40 | devtools::install_github("Irrationone/cellassign") 41 | ``` 42 | 43 | 44 | ### Installing from conda 45 | 46 | With [conda](https://conda.io/miniconda.html), install the current release version of `cellassign` as follows: 47 | 48 | ``` r 49 | conda install -c conda-forge -c bioconda r-cellassign 50 | ``` 51 | 52 | ## Documentation 53 | 54 | Package documentation can be found [here](https://irrationone.github.io/cellassign/index.html). This includes the following vignettes: 55 | 56 | - [Assigning single-cells to known cell types with CellAssign](https://irrationone.github.io/cellassign/articles/introduction-to-cellassign.html) 57 | 58 | - [Constructing marker genes from purified bulk/scRNA-seq data](https://irrationone.github.io/cellassign/articles/constructing-markers-from-purified-data.html) 59 | 60 | ## Basic usage 61 | 62 | `cellassign` requires the following inputs: 63 | 64 | * `exprs_obj`: Cell-by-gene matrix of raw counts (or SingleCellExperiment with `counts` assay) 65 | * `marker_gene_info`: Binary gene-by-celltype marker gene matrix or list relating cell types to marker genes 66 | * `s`: Size factors 67 | * `X`: Design matrix for any patient/batch specific effects 68 | 69 | The model can be run as follows: 70 | 71 | ``` r 72 | cas <- cellassign(exprs_obj = gene_expression_data, 73 | marker_gene_info = marker_gene_info, 74 | s = s, 75 | X = X) 76 | ``` 77 | 78 | An example set of markers for the human tumour microenvironment can be loaded by calling 79 | 80 | ``` r 81 | data(example_TME_markers) 82 | 83 | ``` 84 | 85 | Please see the package vignette for details and caveats. 86 | 87 | # Paper 88 | 89 | [Probabilistic cell-type assignment of single-cell RNA-seq for tumor microenvironment profiling, _Nature Methods 2019_](https://www.nature.com/articles/s41592-019-0529-1) 90 | 91 | # Code of Conduct 92 | 93 | Please note that the 'cellassign' project is released with a 94 | [Contributor Code of Conduct](CODE_OF_CONDUCT.md). 95 | By contributing to this project, you agree to abide by its terms. 96 | 97 | # Authors 98 | 99 | Allen W Zhang, University of British Columbia 100 | 101 | Kieran R Campbell, University of British Columbia 102 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | 2 | #' Convert a list of marker genes to a binary matrix 3 | #' 4 | #' Given a list of cell types and marker genes, convert to a binary 5 | #' cell type by gene matrix required by cellassign. 6 | #' 7 | #' This function takes a list of marker genes and converts it to a binary 8 | #' gene by cell type matrix. The input list should be the same 9 | #' length as the number of cell types with names corresponding to cell types. 10 | #' Each element of the list should be a character vector of the genes corresponding 11 | #' to that cell type. There is no requirement for mutually-exclusive marker genes. 12 | #' 13 | #' @param marker_list A list where each entry is named by a cell type and 14 | #' contains a character vector of gene names belonging to that cell type 15 | #' @param include_other If \code{TRUE} adds a column of zeros for cells that do not 16 | #' exhibit high expression of any marker gene to be binned into 17 | #' 18 | #' @return A cell type by gene binary matrix with 1 if a gene is a marker for 19 | #' a cell type and 0 otherwise 20 | #' 21 | #' @examples 22 | #' marker_list <- list( 23 | #' `cell_type_1` = c("geneA", "geneB"), 24 | #' `cell_type_2` = c("geneB", "geneC") 25 | #' ) 26 | #' marker_list_to_mat(marker_list) 27 | #' 28 | #' @export 29 | marker_list_to_mat <- function(marker_list, include_other = TRUE) { 30 | cell_types <- names(marker_list) 31 | 32 | if(is.null(cell_types)) { 33 | warning("Marker list has no cell type names - replacing with generics") 34 | cell_types <- paste0("cell_type_", seq_along(marker_list)) 35 | names(marker_list) <- cell_types 36 | } 37 | 38 | genes <- sort(unique(unlist(marker_list))) 39 | genes <- genes[nchar(genes) > 0] 40 | 41 | n_cell_types <- length(cell_types) 42 | n_genes <- length(genes) 43 | 44 | mat <- matrix(0, nrow = n_cell_types, ncol = n_genes) 45 | colnames(mat) <- genes 46 | rownames(mat) <- cell_types 47 | 48 | for(cell_type in names(marker_list)) { 49 | mat[cell_type,] <- genes %in% marker_list[[cell_type]] 50 | } 51 | 52 | if(include_other) { 53 | mat <- rbind(mat, `other` = 0) 54 | } 55 | 56 | mat <- t(mat) # Make it gene type by cell 57 | 58 | mat 59 | } 60 | 61 | #' Get MLE estimates of type of each cell 62 | #' 63 | #' @return A vector of MLE cell types, where the names are 64 | #' taken from the column names of the input matrix 65 | #' 66 | #' @keywords internal 67 | get_mle_cell_type <- function(gamma) { 68 | which_max <- apply(gamma, 1, which.max) 69 | colnames(gamma)[which_max] 70 | } 71 | 72 | #' Extract expression matrix from expression object 73 | #' 74 | #' @return The cleaned expression matrix (of counts) from whatever input to \code{cellassign} 75 | #' 76 | #' @keywords internal 77 | extract_expression_matrix <- function(exprs_obj, sce_assay = "counts") { 78 | if(is(exprs_obj, "SummarizedExperiment")) { 79 | Y <- t(as.matrix(SummarizedExperiment::assay(exprs_obj, sce_assay))) 80 | } else if(is.matrix(exprs_obj) && is.numeric(exprs_obj)) { 81 | Y <- exprs_obj 82 | } else { 83 | stop("Input exprs_obj must either be a SummarizedExperiment or numeric matrix of gene expression") 84 | } 85 | return(Y) 86 | } 87 | 88 | #' Create X matrix 89 | #' 90 | #' @importFrom stats var 91 | #' 92 | #' @return A cleaned covariate matrix given the input provided by the user 93 | #' 94 | #' @keywords internal 95 | initialize_X <- function(X, N, verbose = FALSE) { 96 | if(is.null(X)) { 97 | if (N > 0) { 98 | X <- matrix(1, nrow = N) 99 | } else { 100 | X <- matrix(nrow = 0, ncol = 1) 101 | } 102 | } else { 103 | # We can be a little intelligent about whether or not to add an intercept - 104 | # if any column variance of X is 0 then the associated covariate is constant 105 | # so we don't need to add an intercept 106 | col_vars <- apply(X, 2, var) 107 | if(any(col_vars == 0)) { 108 | if(verbose) { 109 | message("Intecept column detected in X") 110 | } 111 | } else { 112 | X <- cbind(1, X) 113 | if(verbose) { 114 | message("No intercept column detected in X - adding") 115 | } 116 | } 117 | } 118 | return(X) 119 | } 120 | 121 | 122 | #' Check for tensorflow 123 | #' 124 | #' @keywords internal 125 | #' 126 | #' @return Installs tensorflow if not already installed 127 | .onLoad <- function(libname, pkgname) { 128 | if(is.null(tensorflow::tf_version())) { 129 | stop("Tensorflow installation not detected. Please run 'tensorflow::install_tensorflow()' to continue...") 130 | } 131 | } 132 | 133 | -------------------------------------------------------------------------------- /man/cellassign.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cellassign.R 3 | \name{cellassign} 4 | \alias{cellassign} 5 | \title{Annotate cells to cell types using cellassign} 6 | \usage{ 7 | cellassign( 8 | exprs_obj, 9 | marker_gene_info, 10 | s = NULL, 11 | min_delta = 2, 12 | X = NULL, 13 | B = 10, 14 | shrinkage = TRUE, 15 | n_batches = 1, 16 | dirichlet_concentration = 0.01, 17 | rel_tol_adam = 1e-04, 18 | rel_tol_em = 1e-04, 19 | max_iter_adam = 1e+05, 20 | max_iter_em = 20, 21 | learning_rate = 0.1, 22 | verbose = TRUE, 23 | sce_assay = "counts", 24 | return_SCE = FALSE, 25 | num_runs = 1, 26 | threads = 0 27 | ) 28 | } 29 | \arguments{ 30 | \item{exprs_obj}{Either a matrix representing gene 31 | expression counts or a \code{SummarizedExperiment}. 32 | See details.} 33 | 34 | \item{marker_gene_info}{Information relating marker genes to cell types. 35 | See details.} 36 | 37 | \item{s}{Numeric vector of cell size factors} 38 | 39 | \item{min_delta}{The minimum log fold change a marker gene must 40 | be over-expressed by in its cell type} 41 | 42 | \item{X}{Numeric matrix of external covariates. See details.} 43 | 44 | \item{B}{Number of bases to use for RBF dispersion function} 45 | 46 | \item{shrinkage}{Logical - should the delta parameters 47 | have hierarchical shrinkage?} 48 | 49 | \item{n_batches}{Number of data subsample batches to use in inference} 50 | 51 | \item{dirichlet_concentration}{Dirichlet concentration parameter for cell 52 | type abundances} 53 | 54 | \item{rel_tol_adam}{The change in Q function value (in pct) below which 55 | each optimization round is considered converged} 56 | 57 | \item{rel_tol_em}{The change in log marginal likelihood value (in pct) 58 | below which the EM algorithm is considered converged} 59 | 60 | \item{max_iter_adam}{Maximum number of ADAM iterations 61 | to perform in each M-step} 62 | 63 | \item{max_iter_em}{Maximum number of EM iterations to perform} 64 | 65 | \item{learning_rate}{Learning rate of ADAM optimization} 66 | 67 | \item{verbose}{Logical - should running info be printed?} 68 | 69 | \item{sce_assay}{The \code{assay} from the input#' \code{SingleCellExperiment} to use: this assay 70 | should always represent raw counts.} 71 | 72 | \item{return_SCE}{Logical - should a SingleCellExperiment be returned 73 | with the cell 74 | type annotations added? See details.} 75 | 76 | \item{num_runs}{Number of EM optimizations to perform (the one with the maximum 77 | log-marginal likelihood value will be used as the final).} 78 | 79 | \item{threads}{Maximum number of threads used by the algorithm 80 | (defaults to the number of cores available on the machine)} 81 | } 82 | \value{ 83 | An object of class \code{cellassign}. See \code{details} 84 | } 85 | \description{ 86 | Automatically annotate cells to known types based 87 | on the expression patterns of 88 | a priori known marker genes. 89 | } 90 | \details{ 91 | \strong{Input format} 92 | \code{exprs_obj} should be either a 93 | \code{SummarizedExperiment} (we recommend the 94 | \code{SingleCellExperiment} package) or a 95 | cell (row) by gene (column) matrix of 96 | \emph{raw} RNA-seq counts (do \strong{not} 97 | log-transform or otherwise normalize). 98 | 99 | \code{marker_gene_info} should either be 100 | \itemize{ 101 | \item A gene by cell type binary matrix, where a 1 indicates that a gene is a 102 | marker for a cell type, and 0 otherwise 103 | \item A list with names corresponding to cell types, where each entry is a 104 | vector of marker gene names. These are converted to the above matrix using 105 | the \code{marker_list_to_mat} function. 106 | } 107 | 108 | \strong{Cell size factors} 109 | If the cell size factors \code{s} are 110 | not provided they are computed using the 111 | \code{computeSumFactors} function from 112 | the \code{scran} package. 113 | 114 | \strong{Covariates} 115 | If \code{X} is not \code{NULL} then it should be 116 | an \code{N} by \code{P} matrix 117 | of covariates for \code{N} cells and \code{P} covariates. 118 | Such a matrix would typically 119 | be returned by a call to \code{model.matrix} 120 | \strong{with no intercept}. It is also highly 121 | recommended that any numerical (ie non-factor or one-hot-encoded) 122 | covariates be standardized 123 | to have mean 0 and standard deviation 1. 124 | 125 | \strong{cellassign} 126 | A call to \code{cellassign} returns an object 127 | of class \code{cellassign}. To access the 128 | MLE estimates of cell types, call \code{fit$cell_type}. 129 | To access all MLE parameter 130 | estimates, call \code{fit$mle_params}. 131 | 132 | \strong{Returning a SingleCellExperiment} 133 | 134 | If \code{return_SCE} is true, a call to \code{cellassign} will return 135 | the input SingleCellExperiment, with the following added: 136 | \itemize{ 137 | \item A column \code{cellassign_celltype} to \code{colData(sce)} with the MAP 138 | estimate of the cell type 139 | \item A slot \code{sce@metadata$cellassign} containing the cellassign fit. 140 | Note that a \code{SingleCellExperiment} must be provided as \code{exprs_obj} 141 | for this option to be valid. 142 | } 143 | } 144 | \examples{ 145 | data(example_sce) 146 | data(example_marker_mat) 147 | 148 | fit <- em_result <- cellassign(example_sce[rownames(example_marker_mat),], 149 | marker_gene_info = example_marker_mat, 150 | s = colSums(SummarizedExperiment::assay(example_sce, "counts")), 151 | learning_rate = 1e-2, 152 | shrinkage = TRUE, 153 | verbose = FALSE) 154 | 155 | 156 | } 157 | -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Page not found (404) • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
    58 |
    59 | 108 | 109 | 110 | 111 |
    112 | 113 |
    114 |
    115 | 118 | 119 | Content not found. Please use links in the navbar. 120 | 121 |
    122 | 123 |
    124 | 125 | 126 | 127 |
    128 | 131 | 132 |
    133 |

    Site built with pkgdown 1.4.1.

    134 |
    135 | 136 |
    137 |
    138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Authors • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
    58 |
    59 | 108 | 109 | 110 | 111 |
    112 | 113 |
    114 |
    115 | 118 | 119 |
      120 |
    • 121 |

      Allen Zhang. Author. 122 |

      123 |
    • 124 |
    • 125 |

      Kieran Campbell. Author, maintainer. 126 |

      127 |
    • 128 |
    129 | 130 |
    131 | 132 |
    133 | 134 | 135 | 136 |
    137 | 140 | 141 |
    142 |

    Site built with pkgdown 1.4.1.

    143 |
    144 | 145 |
    146 |
    147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Articles • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
    58 |
    59 | 108 | 109 | 110 | 111 |
    112 | 113 |
    114 |
    115 | 118 | 119 | 128 |
    129 |
    130 | 131 | 132 |
    133 | 136 | 137 |
    138 |

    Site built with pkgdown 1.4.1.

    139 |
    140 | 141 |
    142 |
    143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /docs/pkgdown.css: -------------------------------------------------------------------------------- 1 | /* Sticky footer */ 2 | 3 | /** 4 | * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ 5 | * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css 6 | * 7 | * .Site -> body > .container 8 | * .Site-content -> body > .container .row 9 | * .footer -> footer 10 | * 11 | * Key idea seems to be to ensure that .container and __all its parents__ 12 | * have height set to 100% 13 | * 14 | */ 15 | 16 | html, body { 17 | height: 100%; 18 | } 19 | 20 | body > .container { 21 | display: flex; 22 | height: 100%; 23 | flex-direction: column; 24 | } 25 | 26 | body > .container .row { 27 | flex: 1 0 auto; 28 | } 29 | 30 | footer { 31 | margin-top: 45px; 32 | padding: 35px 0 36px; 33 | border-top: 1px solid #e5e5e5; 34 | color: #666; 35 | display: flex; 36 | flex-shrink: 0; 37 | } 38 | footer p { 39 | margin-bottom: 0; 40 | } 41 | footer div { 42 | flex: 1; 43 | } 44 | footer .pkgdown { 45 | text-align: right; 46 | } 47 | footer p { 48 | margin-bottom: 0; 49 | } 50 | 51 | img.icon { 52 | float: right; 53 | } 54 | 55 | img { 56 | max-width: 100%; 57 | } 58 | 59 | /* Fix bug in bootstrap (only seen in firefox) */ 60 | summary { 61 | display: list-item; 62 | } 63 | 64 | /* Typographic tweaking ---------------------------------*/ 65 | 66 | .contents .page-header { 67 | margin-top: calc(-60px + 1em); 68 | } 69 | 70 | /* Section anchors ---------------------------------*/ 71 | 72 | a.anchor { 73 | margin-left: -30px; 74 | display:inline-block; 75 | width: 30px; 76 | height: 30px; 77 | visibility: hidden; 78 | 79 | background-image: url(./link.svg); 80 | background-repeat: no-repeat; 81 | background-size: 20px 20px; 82 | background-position: center center; 83 | } 84 | 85 | .hasAnchor:hover a.anchor { 86 | visibility: visible; 87 | } 88 | 89 | @media (max-width: 767px) { 90 | .hasAnchor:hover a.anchor { 91 | visibility: hidden; 92 | } 93 | } 94 | 95 | 96 | /* Fixes for fixed navbar --------------------------*/ 97 | 98 | .contents h1, .contents h2, .contents h3, .contents h4 { 99 | padding-top: 60px; 100 | margin-top: -40px; 101 | } 102 | 103 | /* Sidebar --------------------------*/ 104 | 105 | #sidebar { 106 | margin-top: 30px; 107 | position: -webkit-sticky; 108 | position: sticky; 109 | top: 70px; 110 | } 111 | #sidebar h2 { 112 | font-size: 1.5em; 113 | margin-top: 1em; 114 | } 115 | 116 | #sidebar h2:first-child { 117 | margin-top: 0; 118 | } 119 | 120 | #sidebar .list-unstyled li { 121 | margin-bottom: 0.5em; 122 | } 123 | 124 | .orcid { 125 | height: 16px; 126 | /* margins are required by official ORCID trademark and display guidelines */ 127 | margin-left:4px; 128 | margin-right:4px; 129 | vertical-align: middle; 130 | } 131 | 132 | /* Reference index & topics ----------------------------------------------- */ 133 | 134 | .ref-index th {font-weight: normal;} 135 | 136 | .ref-index td {vertical-align: top;} 137 | .ref-index .icon {width: 40px;} 138 | .ref-index .alias {width: 40%;} 139 | .ref-index-icons .alias {width: calc(40% - 40px);} 140 | .ref-index .title {width: 60%;} 141 | 142 | .ref-arguments th {text-align: right; padding-right: 10px;} 143 | .ref-arguments th, .ref-arguments td {vertical-align: top;} 144 | .ref-arguments .name {width: 20%;} 145 | .ref-arguments .desc {width: 80%;} 146 | 147 | /* Nice scrolling for wide elements --------------------------------------- */ 148 | 149 | table { 150 | display: block; 151 | overflow: auto; 152 | } 153 | 154 | /* Syntax highlighting ---------------------------------------------------- */ 155 | 156 | pre { 157 | word-wrap: normal; 158 | word-break: normal; 159 | border: 1px solid #eee; 160 | } 161 | 162 | pre, code { 163 | background-color: #f8f8f8; 164 | color: #333; 165 | } 166 | 167 | pre code { 168 | overflow: auto; 169 | word-wrap: normal; 170 | white-space: pre; 171 | } 172 | 173 | pre .img { 174 | margin: 5px 0; 175 | } 176 | 177 | pre .img img { 178 | background-color: #fff; 179 | display: block; 180 | height: auto; 181 | } 182 | 183 | code a, pre a { 184 | color: #375f84; 185 | } 186 | 187 | a.sourceLine:hover { 188 | text-decoration: none; 189 | } 190 | 191 | .fl {color: #1514b5;} 192 | .fu {color: #000000;} /* function */ 193 | .ch,.st {color: #036a07;} /* string */ 194 | .kw {color: #264D66;} /* keyword */ 195 | .co {color: #888888;} /* comment */ 196 | 197 | .message { color: black; font-weight: bolder;} 198 | .error { color: orange; font-weight: bolder;} 199 | .warning { color: #6A0366; font-weight: bolder;} 200 | 201 | /* Clipboard --------------------------*/ 202 | 203 | .hasCopyButton { 204 | position: relative; 205 | } 206 | 207 | .btn-copy-ex { 208 | position: absolute; 209 | right: 0; 210 | top: 0; 211 | visibility: hidden; 212 | } 213 | 214 | .hasCopyButton:hover button.btn-copy-ex { 215 | visibility: visible; 216 | } 217 | 218 | /* headroom.js ------------------------ */ 219 | 220 | .headroom { 221 | will-change: transform; 222 | transition: transform 200ms linear; 223 | } 224 | .headroom--pinned { 225 | transform: translateY(0%); 226 | } 227 | .headroom--unpinned { 228 | transform: translateY(-100%); 229 | } 230 | 231 | /* mark.js ----------------------------*/ 232 | 233 | mark { 234 | background-color: rgba(255, 255, 51, 0.5); 235 | border-bottom: 2px solid rgba(255, 153, 51, 0.3); 236 | padding: 1px; 237 | } 238 | 239 | /* vertical spacing after htmlwidgets */ 240 | .html-widget { 241 | margin-bottom: 10px; 242 | } 243 | 244 | /* fontawesome ------------------------ */ 245 | 246 | .fab { 247 | font-family: "Font Awesome 5 Brands" !important; 248 | } 249 | 250 | /* don't display links in code chunks when printing */ 251 | /* source: https://stackoverflow.com/a/10781533 */ 252 | @media print { 253 | code a:link:after, code a:visited:after { 254 | content: ""; 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /docs/reference/dot-onLoad.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Check for tensorflow — .onLoad • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
    60 |
    61 | 110 | 111 | 112 | 113 |
    114 | 115 |
    116 |
    117 | 122 | 123 |
    124 |

    Check for tensorflow

    125 |
    126 | 127 |
    .onLoad(libname, pkgname)
    128 | 129 | 130 |

    Value

    131 | 132 |

    Installs tensorflow if not already installed

    133 | 134 |
    135 | 142 |
    143 | 144 | 145 |
    146 | 149 | 150 |
    151 |

    Site built with pkgdown 1.4.1.

    152 |
    153 | 154 |
    155 |
    156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /docs/reference/initialize_X.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Create X matrix — initialize_X • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
    60 |
    61 | 110 | 111 | 112 | 113 |
    114 | 115 |
    116 |
    117 | 122 | 123 |
    124 |

    Create X matrix

    125 |
    126 | 127 |
    initialize_X(X, N, verbose = FALSE)
    128 | 129 | 130 |

    Value

    131 | 132 |

    A cleaned covariate matrix given the input provided by the user

    133 | 134 |
    135 | 142 |
    143 | 144 | 145 |
    146 | 149 | 150 |
    151 |

    Site built with pkgdown 1.4.1.

    152 |
    153 | 154 |
    155 |
    156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /docs/reference/get_mle_cell_type.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Get MLE estimates of type of each cell — get_mle_cell_type • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
    60 |
    61 | 110 | 111 | 112 | 113 |
    114 | 115 |
    116 |
    117 | 122 | 123 |
    124 |

    Get MLE estimates of type of each cell

    125 |
    126 | 127 |
    get_mle_cell_type(gamma)
    128 | 129 | 130 |

    Value

    131 | 132 |

    A vector of MLE cell types, where the names are 133 | taken from the column names of the input matrix

    134 | 135 |
    136 | 143 |
    144 | 145 | 146 |
    147 | 150 | 151 |
    152 |

    Site built with pkgdown 1.4.1.

    153 |
    154 | 155 |
    156 |
    157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /docs/reference/extract_expression_matrix.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Extract expression matrix from expression object — extract_expression_matrix • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
    60 |
    61 | 110 | 111 | 112 | 113 |
    114 | 115 |
    116 |
    117 | 122 | 123 |
    124 |

    Extract expression matrix from expression object

    125 |
    126 | 127 |
    extract_expression_matrix(exprs_obj, sce_assay = "counts")
    128 | 129 | 130 |

    Value

    131 | 132 |

    The cleaned expression matrix (of counts) from whatever input to cellassign

    133 | 134 |
    135 | 142 |
    143 | 144 | 145 |
    146 | 149 | 150 |
    151 |

    Site built with pkgdown 1.4.1.

    152 |
    153 | 154 |
    155 |
    156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /docs/reference/holik_data.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Example bulk RNA-seq data — holik_data • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 54 | 55 | 56 | 57 | 58 | 59 | 60 |
    61 |
    62 | 111 | 112 | 113 | 114 |
    115 | 116 |
    117 |
    118 | 123 | 124 |
    125 |

    An example bulk RNA-seq dataset from Holik et al. Nucleic Acids Research 2017 to 126 | demonstrate deriving marker genes

    127 |
    128 | 129 |
    holik_data
    130 | 131 | 132 |

    Format

    133 | 134 |

    An object of class list of length 2.

    135 | 136 |

    Examples

    137 |
    data(holik_data)
    138 |
    139 | 147 |
    148 | 149 | 150 |
    151 | 154 | 155 |
    156 |

    Site built with pkgdown 1.4.1.

    157 |
    158 | 159 |
    160 |
    161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /docs/reference/example_TME_markers.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Example tumour microevironment markers — example_TME_markers • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 54 | 55 | 56 | 57 | 58 | 59 | 60 |
    61 |
    62 | 111 | 112 | 113 | 114 |
    115 | 116 |
    117 |
    118 | 123 | 124 |
    125 |

    A set of example marker genes for commonly profiling the 126 | human tumour mircoenvironment

    127 |
    128 | 129 |
    example_TME_markers
    130 | 131 | 132 |

    Format

    133 | 134 |

    An object of class list of length 2.

    135 | 136 |

    Examples

    137 |
    data(example_TME_markers)
    138 |
    139 | 147 |
    148 | 149 | 150 |
    151 | 154 | 155 |
    156 |

    Site built with pkgdown 1.4.1.

    157 |
    158 | 159 |
    160 |
    161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /docs/reference/example_sce.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Example SingleCellExperiment — example_sce • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
    60 |
    61 | 110 | 111 | 112 | 113 |
    114 | 115 |
    116 |
    117 | 122 | 123 |
    124 |

    An example SingleCellExperiment for 10 marker genes and 500 cells.

    125 |
    126 | 127 |
    example_sce
    128 | 129 | 130 |

    Format

    131 | 132 |

    An object of class SingleCellExperiment with 200 rows and 500 columns.

    133 |

    See also

    134 | 135 |

    example_cellassign_fit

    136 | 137 |

    Examples

    138 |
    data(example_sce)
    139 |
    140 | 149 |
    150 | 151 | 152 |
    153 | 156 | 157 |
    158 |

    Site built with pkgdown 1.4.1.

    159 |
    160 | 161 |
    162 |
    163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /docs/reference/example_cellassign_fit.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Example cellassign fit — example_cellassign_fit • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 54 | 55 | 56 | 57 | 58 | 59 | 60 |
    61 |
    62 | 111 | 112 | 113 | 114 |
    115 | 116 |
    117 |
    118 | 123 | 124 |
    125 |

    An example fit of calling cellassign on both 126 | example_marker_mat and example_sce

    127 |
    128 | 129 |
    example_cellassign_fit
    130 | 131 | 132 |

    Format

    133 | 134 |

    An object of class cellassign of length 3.

    135 |

    See also

    136 | 137 |

    example_cellassign_fit

    138 | 139 |

    Examples

    140 |
    data(example_cellassign_fit)
    141 |
    142 | 151 |
    152 | 153 | 154 |
    155 | 158 | 159 |
    160 |

    Site built with pkgdown 1.4.1.

    161 |
    162 | 163 |
    164 |
    165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | -------------------------------------------------------------------------------- /docs/reference/example_marker_mat.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Example cell marker matrix — example_marker_mat • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 54 | 55 | 56 | 57 | 58 | 59 | 60 |
    61 |
    62 | 111 | 112 | 113 | 114 |
    115 | 116 |
    117 |
    118 | 123 | 124 |
    125 |

    An example matrix for 10 genes and 2 cell types showing the membership 126 | of marker genes to cell types

    127 |
    128 | 129 |
    example_marker_mat
    130 | 131 | 132 |

    Format

    133 | 134 |

    An object of class matrix with 10 rows and 2 columns.

    135 |

    See also

    136 | 137 |

    example_cellassign_fit

    138 | 139 |

    Examples

    140 |
    data(example_marker_mat)
    141 |
    142 | 151 |
    152 | 153 | 154 |
    155 | 158 | 159 |
    160 |

    Site built with pkgdown 1.4.1.

    161 |
    162 | 163 |
    164 |
    165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | -------------------------------------------------------------------------------- /vignettes/constructing-markers-from-purified-data.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Constructing marker genes from purified scRNA-seq data" 3 | author: "Allen W Zhang and Kieran R Campbell" 4 | date: "April 2019" 5 | package: "cellassign" 6 | output: BiocStyle::html_document 7 | vignette: > 8 | %\VignetteIndexEntry{Constructing marker genes from purified scRNA-seq data} 9 | %\VignetteEngine{knitr::rmarkdown} 10 | %\VignetteEncoding{UTF-8} 11 | --- 12 | 13 | ```{r setup, include = FALSE} 14 | knitr::opts_chunk$set( 15 | collapse = TRUE, 16 | warnings = FALSE, 17 | messages = FALSE, 18 | comment = "#>" 19 | ) 20 | ``` 21 | 22 | ```{r, include = FALSE} 23 | suppressPackageStartupMessages({ 24 | library(magrittr) 25 | library(limma) 26 | library(org.Hs.eg.db) 27 | library(edgeR) 28 | library(matrixStats) 29 | library(pheatmap) 30 | library(cellassign) 31 | }) 32 | ``` 33 | 34 | # Overview 35 | 36 | In many situations, marker genes for cell types are either known _a priori_ 37 | as expert knowledge, or can be curated through databases such as the [Cellmark](http://biocc.hrbmu.edu.cn/CellMarker/) database. Alternatively, 38 | if purified expression data exists (either in bulk or single-cell form), it 39 | is possible to quickly derive marker genes using the `findMarkers` function 40 | in the [scran](http://bioconductor.org/packages/release/bioc/html/scran.html) 41 | R package. 42 | 43 | Below we detail a case study in deriving marker genes through a differential 44 | expression approach. 45 | 46 | # Data 47 | 48 | ## Overview 49 | 50 | We take bulk RNA-seq data from [Holik et al. Nucleic Acids Research 2017](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5389713/) to derive 51 | marker genes for 3 different cell lines. This is packaged with `cellassign` 52 | as `holik_data`: 53 | 54 | ```{r} 55 | data(holik_data) 56 | ``` 57 | 58 | which contains a matrix of counts, where each row is a gene 59 | (index by entrez ID) and each column is a sample: 60 | 61 | ```{r} 62 | head(holik_data$counts[,1:2]) 63 | ``` 64 | 65 | as well as a vector with the cell line of origin for each sample: 66 | 67 | ```{r} 68 | head(holik_data$cell_line) 69 | ``` 70 | 71 | 72 | ## Preparation 73 | 74 | We first provide a map from entrez IDs to gene symbols: 75 | 76 | ```{r} 77 | entrez_map <- select(org.Hs.eg.db, 78 | as.character(rownames(holik_data$counts)), 79 | c("SYMBOL"), "ENTREZID") 80 | gene_annotations <- entrez_map %>% 81 | dplyr::rename(GeneID=ENTREZID, 82 | Symbol=SYMBOL) 83 | ``` 84 | 85 | Then construct the `DGEList` object for input to `limma voom`, 86 | filtering out lowly expressed genes: 87 | 88 | ```{r} 89 | dge <- DGEList(counts = holik_data$counts, 90 | group = holik_data$cell_line, 91 | genes = gene_annotations, 92 | remove.zeros = TRUE) 93 | genes_to_keep <- rowSums(cpm(dge$counts) > 0.5) >= 2 94 | dge_filt <- dge[genes_to_keep,] 95 | ``` 96 | 97 | and finally calculate the normalization factors: 98 | 99 | ```{r} 100 | dge_filt <- calcNormFactors(dge_filt, method="TMM") 101 | ``` 102 | 103 | # Differential expression 104 | 105 | We next perform differential expression using Limma Voom on a 106 | subset of 3 samples: HCC827, H2228, H1975: 107 | 108 | ```{r} 109 | dge_subset <- dge_filt[,dge_filt$samples$group %in% c("HCC827", "H2228", "H1975")] 110 | design <- model.matrix(~ 0+dge_subset$samples$group) 111 | colnames(design) <- levels(dge_subset$samples$group) 112 | v <- voom(dge_subset, design) 113 | fit <- lmFit(v, design) 114 | ``` 115 | 116 | Next, fit contrasts to find differentially expressed genes between 117 | cell types: 118 | 119 | ```{r} 120 | contrast.matrix <- makeContrasts(H2228 - H1975, 121 | HCC827 - H1975, 122 | HCC827 - H2228, 123 | levels = design) 124 | fit2 <- contrasts.fit(fit, contrast.matrix) 125 | fit2 <- eBayes(fit2) 126 | ``` 127 | 128 | Finally, compute gene summary statistics and filter to only 129 | significantly differentially expressed geens (FDR < 0.05): 130 | 131 | ```{r} 132 | tt <- topTable(fit2, n=Inf) 133 | tt_sig <- tt %>% 134 | dplyr::filter(adj.P.Val < 0.05) 135 | 136 | head(tt_sig) 137 | ``` 138 | 139 | # Marker gene derivation 140 | 141 | To derive marker genes, we first create a log fold change 142 | matrix using H1975 as the baseline expression: 143 | 144 | ```{r} 145 | lfc_table <- tt_sig[,c("H2228...H1975", "HCC827...H1975")] 146 | lfc_table <- lfc_table %>% 147 | dplyr::mutate(H1975=0, 148 | H2228=H2228...H1975, 149 | HCC827=HCC827...H1975) %>% 150 | dplyr::select(H1975, H2228, HCC827) 151 | rownames(lfc_table) <- tt_sig$GeneID 152 | ``` 153 | 154 | 155 | Then, for each gene, we subtract the minimum log fold change, as 156 | we care about overexpression of genes relative to some minimum 157 | expression level, as this defines a marker gene: 158 | 159 | ```{r} 160 | lfc_table <- as.matrix(lfc_table) 161 | lfc_table <- lfc_table - rowMins(lfc_table) 162 | lfc_table <- as.data.frame(lfc_table) 163 | ``` 164 | 165 | We now define a helper function for turning log fold changes into 166 | a binary matrix. This takes a matrix and a threshold, and any values 167 | less than or equal to the threshold are set to 0, and all others to 1: 168 | 169 | ```{r} 170 | binarize <- function(x, threshold) { 171 | x[x <= threshold] <- -Inf 172 | x[x > -Inf] <- 1 173 | x[x == -Inf] <- 0 174 | return(x) 175 | } 176 | ``` 177 | 178 | Next, we implement a basic procedure for binarizing this matrix. 179 | Essentially, we look for the largest 'gap' in expression for each gene, 180 | and the cell types with expression above this gap are designated has 181 | having that gene as a marker: 182 | 183 | ```{r} 184 | # Find the biggest difference 185 | maxdiffs <- apply(lfc_table, 1, function(x) max(diff(sort(x)))) 186 | 187 | # 188 | thres_vals <- apply(lfc_table, 1, function(x) sort(x)[which.max(diff(sort(x)))]) 189 | expr_mat_thres <- plyr::rbind.fill(lapply(1:nrow(lfc_table), function(i) { 190 | binarize(lfc_table[i,], thres_vals[i]) 191 | })) 192 | rownames(expr_mat_thres) <- rownames(lfc_table) 193 | marker_gene_mat <- expr_mat_thres[(maxdiffs >= quantile(maxdiffs, c(.99))) 194 | & (thres_vals <= log(2)),] %>% 195 | as.matrix 196 | ``` 197 | 198 | Finally, we add back in gene symbols rather than entrez ids: 199 | 200 | ```{r, warning=FALSE} 201 | suppressMessages({ 202 | symbols <- plyr::mapvalues( 203 | rownames(marker_gene_mat), 204 | from = gene_annotations$GeneID, 205 | to = gene_annotations$Symbol 206 | ) 207 | }) 208 | 209 | is_na <- is.na(symbols) 210 | 211 | marker_gene_mat <- marker_gene_mat[!is_na,] 212 | rownames(marker_gene_mat) <- symbols[!is_na] 213 | ``` 214 | 215 | And there we have a marker gene matrix for our cell types: 216 | 217 | ```{r} 218 | head(marker_gene_mat) 219 | ``` 220 | 221 | ```{r, fig.width = 10, fig.height = 3} 222 | pheatmap(t(marker_gene_mat)) 223 | ``` 224 | 225 | Note that the expression data used for input to `CellAssign` should 226 | use only these as input. 227 | 228 | # Technical 229 | 230 | ```{r} 231 | sessionInfo() 232 | ``` 233 | 234 | -------------------------------------------------------------------------------- /docs/reference/print.cellassign.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Print a <code>cellassign</code> fit — print.cellassign • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
    60 |
    61 | 110 | 111 | 112 | 113 |
    114 | 115 |
    116 |
    117 | 122 | 123 |
    124 |

    Print a cellassign fit

    125 |
    126 | 127 |
    # S3 method for cellassign
    128 | print(x, ...)
    129 | 130 |

    Arguments

    131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 |
    x

    An object of class cellassign

    ...

    Additional arguments (unused)

    142 | 143 |

    Value

    144 | 145 |

    Prints a structured representation of the cellassign

    146 | 147 |

    Examples

    148 |
    data(example_cellassign_fit) 149 | print(example_cellassign_fit)
    #> A cellassign fit for 500 cells, 10 genes, 2 cell types with 0 covariates 150 | #> To access cell types, call celltypes(x) 151 | #> To access cell type probabilities, call cellprobs(x) 152 | #>
    153 |
    154 |
    155 | 164 |
    165 | 166 | 167 |
    168 | 171 | 172 |
    173 |

    Site built with pkgdown 1.4.1.

    174 |
    175 | 176 |
    177 |
    178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | -------------------------------------------------------------------------------- /docs/reference/inference_tensorflow.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | cellassign inference in tensorflow, semi-supervised version — inference_tensorflow • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
    60 |
    61 | 110 | 111 | 112 | 113 |
    114 | 115 |
    116 |
    117 | 122 | 123 |
    124 |

    cellassign inference in tensorflow, semi-supervised version

    125 |
    126 | 127 |
    inference_tensorflow(Y, rho, s, X, G, C, N, P, B = 10, shrinkage,
    128 |   verbose = FALSE, n_batches = 1, rel_tol_adam = 1e-04,
    129 |   rel_tol_em = 1e-04, max_iter_adam = 1e+05, max_iter_em = 20,
    130 |   learning_rate = 1e-04, random_seed = NULL, min_delta = 2,
    131 |   dirichlet_concentration = rep(0.01, C))
    132 | 133 | 134 |

    Value

    135 | 136 |

    A list of MLE cell type calls, MLE parameter estimates, 137 | and log likelihoods during optimization.

    138 | 139 |
    140 | 147 |
    148 | 149 | 150 |
    151 | 154 | 155 |
    156 |

    Site built with pkgdown 1.4.1.

    157 |
    158 | 159 |
    160 |
    161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /docs/reference/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Function reference • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
    58 |
    59 | 108 | 109 | 110 | 111 |
    112 | 113 |
    114 |
    115 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 133 | 134 | 135 | 136 | 139 | 140 | 141 | 142 | 145 | 146 | 147 | 148 | 151 | 152 | 153 | 154 | 157 | 158 | 159 | 160 | 163 | 164 | 165 | 166 | 169 | 170 | 171 | 172 | 175 | 176 | 177 | 178 | 181 | 182 | 183 | 184 | 187 | 188 | 189 | 190 | 193 | 194 | 195 | 196 | 199 | 200 | 201 | 202 |
    130 |

    All functions

    131 |

    132 |
    137 |

    cellassign()

    138 |

    Annotate cells to cell types using cellassign

    143 |

    cellprobs()

    144 |

    Get the cell assignment probabilities of a cellassign fit

    149 |

    celltypes()

    150 |

    Get the cell type assignments of a cellassign fit

    155 |

    example_TME_markers

    156 |

    Example tumour microevironment markers

    161 |

    example_cellassign_fit

    162 |

    Example cellassign fit

    167 |

    example_marker_mat

    168 |

    Example cell marker matrix

    173 |

    example_sce

    174 |

    Example SingleCellExperiment

    179 |

    holik_data

    180 |

    Example bulk RNA-seq data

    185 |

    marker_list_to_mat()

    186 |

    Convert a list of marker genes to a binary matrix

    191 |

    mleparams()

    192 |

    Get the MLE parameter list of a cellassign fit

    197 |

    print(<cellassign>)

    198 |

    Print a cellassign fit

    203 |
    204 | 205 | 211 |
    212 | 213 | 214 |
    215 | 218 | 219 |
    220 |

    Site built with pkgdown 1.4.1.

    221 |
    222 | 223 |
    224 |
    225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | -------------------------------------------------------------------------------- /docs/reference/simulate_cellassign.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Simulate from the cellassign model — simulate_cellassign • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
    60 |
    61 | 110 | 111 | 112 | 113 |
    114 | 115 |
    116 |
    117 | 122 | 123 |
    124 |

    Simulate RNA-seq counts from the cell-assign model

    125 |
    126 | 127 |
    simulate_cellassign(rho, s, pi, delta, B = 20, a, beta, X = NULL,
    128 |   min_Y = 0, max_Y = 1000)
    129 | 130 |

    Arguments

    131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 162 | 163 | 164 | 165 | 167 | 168 | 169 | 170 | 171 | 172 |
    rho

    A gene by cell type binary matrix relating markers to cell types

    s

    A vector of cell-specific size factors

    pi

    An ordinal vector relating each cell to its true marker type assignment

    delta

    Gene by cell type matrix delta (all entries with corresponding zeros 148 | in rho will be ignored)

    B

    Granularity of spline-based fitting of dispersions

    a

    Alpha parameters for spline inference of dispersions

    beta

    A gene by covariate vector of coefficients - the first column 161 | should correspond to the intercept (baseline expression) values

    X

    A cell by covariate matrix of covariates - the intercept column will 166 | always be added.

    b

    Beta parameters for spline inference of dispersions

    173 | 174 |

    Value

    175 | 176 |

    An N by G matrix of simulated counts

    177 |

    Details

    178 | 179 |

    The number of genes, cells, and cell types is automatically 180 | inferred from the dimensions of rho (gene by cell-type) and 181 | s (vector of length number of cells). The specification of X 182 | is optional - a column of ones will always be added as an intercept.

    183 | 184 |
    185 | 194 |
    195 | 196 | 197 |
    198 | 201 | 202 |
    203 |

    Site built with pkgdown 1.4.1.

    204 |
    205 | 206 |
    207 |
    208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | -------------------------------------------------------------------------------- /docs/reference/marker_list_to_mat.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Convert a list of marker genes to a binary matrix — marker_list_to_mat • cellassign 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 54 | 55 | 56 | 57 | 58 | 59 | 60 |
    61 |
    62 | 111 | 112 | 113 | 114 |
    115 | 116 |
    117 |
    118 | 123 | 124 |
    125 |

    Given a list of cell types and marker genes, convert to a binary 126 | cell type by gene matrix required by cellassign.

    127 |
    128 | 129 |
    marker_list_to_mat(marker_list, include_other = TRUE)
    130 | 131 |

    Arguments

    132 | 133 | 134 | 135 | 136 | 138 | 139 | 140 | 141 | 143 | 144 |
    marker_list

    A list where each entry is named by a cell type and 137 | contains a character vector of gene names belonging to that cell type

    include_other

    If TRUE adds a column of zeros for cells that do not 142 | exhibit high expression of any marker gene to be binned into

    145 | 146 |

    Value

    147 | 148 |

    A cell type by gene binary matrix with 1 if a gene is a marker for 149 | a cell type and 0 otherwise

    150 |

    Details

    151 | 152 |

    This function takes a list of marker genes and converts it to a binary 153 | gene by cell type matrix. The input list should be the same 154 | length as the number of cell types with names corresponding to cell types. 155 | Each element of the list should be a character vector of the genes corresponding 156 | to that cell type. There is no requirement for mutually-exclusive marker genes.

    157 | 158 |

    Examples

    159 |
    marker_list <- list( 160 | `cell_type_1` = c("geneA", "geneB"), 161 | `cell_type_2` = c("geneB", "geneC") 162 | ) 163 | marker_list_to_mat(marker_list)
    #> cell_type_1 cell_type_2 other 164 | #> geneA 1 0 0 165 | #> geneB 1 1 0 166 | #> geneC 0 1 0
    167 |
    168 |
    169 | 179 |
    180 | 181 | 182 |
    183 | 186 | 187 |
    188 |

    Site built with pkgdown 1.4.1.

    189 |
    190 | 191 |
    192 |
    193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | -------------------------------------------------------------------------------- /vignettes/introduction-to-cellassign.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Assigning single-cells to known cell types with CellAssign" 3 | author: "Allen W Zhang and Kieran R Campbell" 4 | date: "October 2019" 5 | package: "cellassign" 6 | output: BiocStyle::html_document 7 | vignette: > 8 | %\VignetteIndexEntry{Introduction to CellAssign} 9 | %\VignetteEngine{knitr::rmarkdown} 10 | %\VignetteEncoding{UTF-8} 11 | --- 12 | 13 | ```{r setup, include = FALSE} 14 | knitr::opts_chunk$set( 15 | collapse = TRUE, 16 | warnings = FALSE, 17 | messages = FALSE, 18 | comment = "#>" 19 | ) 20 | ``` 21 | 22 | # Overview 23 | 24 | `cellassign` assigns cells measured using single cell RNA sequencing 25 | to known cell types based on marker gene information. Unlike other 26 | methods for assigning cell types from single cell RNA-seq data, 27 | `cellassign` does not require labeled single cell or purified bulk 28 | expression data -- `cellassign` only needs to know whether or not 29 | each given gene is a marker of each cell type: 30 | 31 | ```{r, echo = FALSE} 32 | knitr::include_graphics("cellassign_overview.png") 33 | ``` 34 | 35 | Inference is performed using [Tensorflow](http://tensorflow.org/). For more 36 | details please see the 37 | [manuscript](https://www.biorxiv.org/content/10.1101/521914v1). 38 | 39 | # Installation 40 | 41 | `cellassign` depends on `tensorflow`, which can be installed as follows: 42 | 43 | ```{r, eval=FALSE} 44 | install.packages("tensorflow") 45 | library(tensorflow) 46 | install_tensorflow(extra_packages = "tensorflow-probability") 47 | ``` 48 | 49 | Please ensure this installs version 2 of tensorflow. You can check this by calling 50 | 51 | ```{r} 52 | tensorflow::tf_config() 53 | ``` 54 | 55 | You can confirm that the installation succeeded by running: 56 | 57 | ```{r, eval=FALSE} 58 | sess = tf$Session() 59 | hello <- tf$constant('Hello, TensorFlow!') 60 | sess$run(hello) 61 | ``` 62 | 63 | Note that the `tf` object is created automatically when the `tensorflow` library is loaded to provide access to the Tensorflow interface. 64 | 65 | For more details see the [Rstudio page on tensorflow installation](https://tensorflow.rstudio.com/tensorflow/articles/installation.html). 66 | 67 | `cellassign` can then be installed through Bioconductor via 68 | 69 | ```{r, eval=FALSE} 70 | BiocManager::install('cellassign') 71 | ``` 72 | 73 | or the development version through github using the `devtools` package : 74 | 75 | ```{r, eval=FALSE} 76 | devtools::install_github("Irrationone/cellassign") 77 | ``` 78 | 79 | 80 | # Basic usage 81 | 82 | We begin by illustrating basic usage of `cellassign` on some 83 | example data bundled with the package. First, load the relevant libraries: 84 | 85 | ```{r, results='hide', message=FALSE, warning=FALSE} 86 | library(SingleCellExperiment) 87 | library(cellassign) 88 | ``` 89 | 90 | We use an example `SingleCellExperiment` consisting of 200 genes 91 | and 500 cells: 92 | 93 | ```{r} 94 | data(example_sce) 95 | print(example_sce) 96 | ``` 97 | 98 | The true cell types are annotated for convenience in the `Group` 99 | slot of the `SingleCellExperiment`: 100 | 101 | ```{r} 102 | print(head(example_sce$Group)) 103 | ``` 104 | 105 | 106 | Also provided is an example gene-by-cell-type binary matrix, whose 107 | entries are 1 if a gene is a marker for a given cell type and 0 otherwise: 108 | 109 | ```{r} 110 | data(example_marker_mat) 111 | print(example_marker_mat) 112 | ``` 113 | 114 | We further require size factors for each cell. These are stored 115 | in `sizeFactors(example_sce)` - for your data we recommend computing 116 | them using the `computeSumFactors` function from the `scran` package. Note: **it is highly recommended to compute size factors using the full set of genes, before subsetting to markers for input to cellassign**. 117 | 118 | ```{r} 119 | s <- sizeFactors(example_sce) 120 | ``` 121 | 122 | We then call `cellassign` using the `cellassign()` function, passing 123 | in the above information. **It is critical that gene expression data containing only marker genes is used as input to cellassign**. We do this here by subsetting the input `SingleCellExperiment` using the row names (gene names) of the marker matrix. This also ensures that the order of the genes in the gene expression data matches the order of the genes in the marker matrix. 124 | 125 | 126 | ```{r} 127 | fit <- cellassign(exprs_obj = example_sce[rownames(example_marker_mat),], 128 | marker_gene_info = example_marker_mat, 129 | s = s, 130 | learning_rate = 1e-2, 131 | shrinkage = TRUE, 132 | verbose = FALSE) 133 | ``` 134 | 135 | This returns a `cellassign` object: 136 | 137 | ```{r} 138 | print(fit) 139 | ``` 140 | 141 | We can access the maximum likelihood estimates (MLE) of cell type using the `celltypes` function: 142 | 143 | ```{r} 144 | print(head(celltypes(fit))) 145 | ``` 146 | 147 | By default, this assigns a cell to a type of the probability of assignment is greater than 0.95, and "unassigned" otherwise. This can be changed with the `assign_prob` parameter. 148 | 149 | It is also possible to get all MLE parameters using `mleparams`: 150 | 151 | ```{r} 152 | print(str(mleparams(fit))) 153 | ``` 154 | 155 | We can also visualize the probabilities of assignment using the `cellprobs` function that returns a probability matrix for each cell and cell type: 156 | 157 | ```{r} 158 | pheatmap::pheatmap(cellprobs(fit)) 159 | ``` 160 | 161 | 162 | Finally, since this is simulated data we can check the concordance 163 | with the true group values: 164 | 165 | ```{r} 166 | print(table(example_sce$Group, celltypes(fit))) 167 | ``` 168 | 169 | # Example set of markers for tumour microenvironment 170 | 171 | A set of example markers are included with the `cellassign` package 172 | for common cell types in the human tumour microenvironment. Users 173 | should be aware that 174 | 175 | 1. This set is provided as an _example_ only and we recommend 176 | researchers derive marker gene sets for their own use 177 | 2. The `cellassign` workflow is typically iterative, including 178 | ensuring all markers are expressed in your expression data, and 179 | removing cell types from the input marker matrix that do not appear 180 | to be present 181 | 182 | The marker genes are available for the following cell types: 183 | 184 | * B cells 185 | * T cells 186 | * Cytotoxic T cells 187 | * Monocyte/Macrophage 188 | * Epithelial cells 189 | * Myofibroblasts 190 | * Vascular smooth muscle cells 191 | * Endothelial cells 192 | 193 | These can be accessed by calling 194 | 195 | ```{r} 196 | data(example_TME_markers) 197 | ``` 198 | 199 | Note that this is a list of two marker lists: 200 | 201 | ```{r} 202 | names(example_TME_markers) 203 | ``` 204 | 205 | Where `symbol` contains gene symbols: 206 | 207 | ```{r} 208 | lapply(head(example_TME_markers$symbol, n = 4), head, n = 4) 209 | ``` 210 | 211 | and `ensembl` contains the equivalent ensembl gene ids: 212 | 213 | ```{r} 214 | lapply(head(example_TME_markers$ensembl, n = 4), head, n = 4) 215 | ``` 216 | 217 | To use these with `cellassign` we can turn them into the binary 218 | marker by cell type matrix: 219 | 220 | ```{r} 221 | marker_mat <- marker_list_to_mat(example_TME_markers$ensembl) 222 | 223 | marker_mat[1:3, 1:3] 224 | ``` 225 | 226 | *Important*: the single cell experiment or input gene expression 227 | matrix should be subset accordingly to match the rows of the marker 228 | input matrix, e.g. if `sce` is a `SingleCellExperiment` with ensembl 229 | IDs as rownames then call 230 | 231 | ```{r, eval = FALSE} 232 | sce_marker <- sce[intersect(rownames(marker_mat), rownames(sce)),] 233 | ``` 234 | 235 | Note that the rows in the single cell experiment or gene expression 236 | matrix should be ordered identically to those in the marker input 237 | matrix. 238 | 239 | You can the proceed using `cellassign` as before. 240 | 241 | 242 | # Advanced usage 243 | 244 | ## Options for a `cellassign()` call 245 | 246 | There are several options to a call to `cellassign` that can alter 247 | the results: 248 | 249 | * `min_delta`: the minimum log-fold change in expression above which a 250 | genemust be over-expressed in the cells of which it is a marker compared to 251 | all others 252 | * `X`: a covariate matrix, see section below 253 | * `shrinkage`: whether to impose a hierarchical prior on the values of `delta` 254 | (cell type specific increase in expression of marker genes) 255 | 256 | 257 | ## Constructing a marker gene matrix 258 | 259 | Here we demonstrate a method of constructing the binary marker gene 260 | matrix that encodes our *a priori* knowledge of cell types. 261 | 262 | For two types of cells (`Group1` and `Group2`) we know *a priori* several good 263 | marker genes, e.g.: 264 | 265 | | Cell type | Genes | 266 | | --------- | ----- | 267 | | Group1 | Gene186, Gene269, Gene526, Gene536, Gene994 | 268 | | Group2 | Gene205, Gene575, Gene754, Gene773, Gene949 | 269 | 270 | To use this in `cellassign`, we must turn this into a *named list*, where 271 | the names are the cell types and the entries are marker genes 272 | (not necessarily mutually exclusive) for each cell type: 273 | 274 | ```{r} 275 | marker_gene_list <- list( 276 | Group1 = c("Gene186", "Gene269", "Gene526", "Gene536", "Gene994"), 277 | Group2 = c("Gene205", "Gene575", "Gene754", "Gene773", "Gene949") 278 | ) 279 | 280 | print(str(marker_gene_list)) 281 | ``` 282 | 283 | We can then directly provide this to `cellassign` or turn it into a binary 284 | marker gene matrix first using the `marker_list_to_mat` function: 285 | 286 | ```{r} 287 | print(marker_list_to_mat(marker_gene_list)) 288 | ``` 289 | 290 | This has automatically included an `other` group for cells that do not fall 291 | into either type - this can be excluded by setting `include_other = FALSE`. 292 | 293 | ## Adding covariates 294 | 295 | Covariates corresponding to batch, sample, or patient-specific effects can 296 | be included in the `cellassign` model. For example, if we have two covariates 297 | `x1` and `x2`: 298 | 299 | ```{r} 300 | N <- ncol(example_sce) 301 | x1 <- rnorm(N) 302 | x2 <- rnorm(N) 303 | ``` 304 | 305 | We can construct a design matrix using the `model.matrix` function in R: 306 | 307 | ```{r} 308 | X <- model.matrix(~ 0 + x1 + x2) 309 | ``` 310 | 311 | Note we explicitly set no intercept by passing in `0` in the beginning. 312 | We can then perform an equivalent cell assignment passing this in also: 313 | 314 | ```{r, eval = FALSE} 315 | fit <- cellassign(exprs_obj = example_sce, 316 | marker_gene_info = example_marker_mat, 317 | X = X, 318 | s = s, 319 | learning_rate = 1e-2, 320 | shrinkage = TRUE, 321 | verbose = FALSE) 322 | ``` 323 | 324 | 325 | # Technical 326 | 327 | ```{r} 328 | sessionInfo() 329 | ``` 330 | 331 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | ============== 3 | 4 | _Version 2.0, January 2004_ 5 | _<>_ 6 | 7 | ### Terms and Conditions for use, reproduction, and distribution 8 | 9 | #### 1. Definitions 10 | 11 | “License” shall mean the terms and conditions for use, reproduction, and 12 | distribution as defined by Sections 1 through 9 of this document. 13 | 14 | “Licensor” shall mean the copyright owner or entity authorized by the copyright 15 | owner that is granting the License. 16 | 17 | “Legal Entity” shall mean the union of the acting entity and all other entities 18 | that control, are controlled by, or are under common control with that entity. 19 | For the purposes of this definition, “control” means **(i)** the power, direct or 20 | indirect, to cause the direction or management of such entity, whether by 21 | contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the 22 | outstanding shares, or **(iii)** beneficial ownership of such entity. 23 | 24 | “You” (or “Your”) shall mean an individual or Legal Entity exercising 25 | permissions granted by this License. 26 | 27 | “Source” form shall mean the preferred form for making modifications, including 28 | but not limited to software source code, documentation source, and configuration 29 | files. 30 | 31 | “Object” form shall mean any form resulting from mechanical transformation or 32 | translation of a Source form, including but not limited to compiled object code, 33 | generated documentation, and conversions to other media types. 34 | 35 | “Work” shall mean the work of authorship, whether in Source or Object form, made 36 | available under the License, as indicated by a copyright notice that is included 37 | in or attached to the work (an example is provided in the Appendix below). 38 | 39 | “Derivative Works” shall mean any work, whether in Source or Object form, that 40 | is based on (or derived from) the Work and for which the editorial revisions, 41 | annotations, elaborations, or other modifications represent, as a whole, an 42 | original work of authorship. For the purposes of this License, Derivative Works 43 | shall not include works that remain separable from, or merely link (or bind by 44 | name) to the interfaces of, the Work and Derivative Works thereof. 45 | 46 | “Contribution” shall mean any work of authorship, including the original version 47 | of the Work and any modifications or additions to that Work or Derivative Works 48 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 49 | by the copyright owner or by an individual or Legal Entity authorized to submit 50 | on behalf of the copyright owner. For the purposes of this definition, 51 | “submitted” means any form of electronic, verbal, or written communication sent 52 | to the Licensor or its representatives, including but not limited to 53 | communication on electronic mailing lists, source code control systems, and 54 | issue tracking systems that are managed by, or on behalf of, the Licensor for 55 | the purpose of discussing and improving the Work, but excluding communication 56 | that is conspicuously marked or otherwise designated in writing by the copyright 57 | owner as “Not a Contribution.” 58 | 59 | “Contributor” shall mean Licensor and any individual or Legal Entity on behalf 60 | of whom a Contribution has been received by Licensor and subsequently 61 | incorporated within the Work. 62 | 63 | #### 2. Grant of Copyright License 64 | 65 | Subject to the terms and conditions of this License, each Contributor hereby 66 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 67 | irrevocable copyright license to reproduce, prepare Derivative Works of, 68 | publicly display, publicly perform, sublicense, and distribute the Work and such 69 | Derivative Works in Source or Object form. 70 | 71 | #### 3. Grant of Patent License 72 | 73 | Subject to the terms and conditions of this License, each Contributor hereby 74 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 75 | irrevocable (except as stated in this section) patent license to make, have 76 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 77 | such license applies only to those patent claims licensable by such Contributor 78 | that are necessarily infringed by their Contribution(s) alone or by combination 79 | of their Contribution(s) with the Work to which such Contribution(s) was 80 | submitted. If You institute patent litigation against any entity (including a 81 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 82 | Contribution incorporated within the Work constitutes direct or contributory 83 | patent infringement, then any patent licenses granted to You under this License 84 | for that Work shall terminate as of the date such litigation is filed. 85 | 86 | #### 4. Redistribution 87 | 88 | You may reproduce and distribute copies of the Work or Derivative Works thereof 89 | in any medium, with or without modifications, and in Source or Object form, 90 | provided that You meet the following conditions: 91 | 92 | * **(a)** You must give any other recipients of the Work or Derivative Works a copy of 93 | this License; and 94 | * **(b)** You must cause any modified files to carry prominent notices stating that You 95 | changed the files; and 96 | * **(c)** You must retain, in the Source form of any Derivative Works that You distribute, 97 | all copyright, patent, trademark, and attribution notices from the Source form 98 | of the Work, excluding those notices that do not pertain to any part of the 99 | Derivative Works; and 100 | * **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any 101 | Derivative Works that You distribute must include a readable copy of the 102 | attribution notices contained within such NOTICE file, excluding those notices 103 | that do not pertain to any part of the Derivative Works, in at least one of the 104 | following places: within a NOTICE text file distributed as part of the 105 | Derivative Works; within the Source form or documentation, if provided along 106 | with the Derivative Works; or, within a display generated by the Derivative 107 | Works, if and wherever such third-party notices normally appear. The contents of 108 | the NOTICE file are for informational purposes only and do not modify the 109 | License. You may add Your own attribution notices within Derivative Works that 110 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 111 | provided that such additional attribution notices cannot be construed as 112 | modifying the License. 113 | 114 | You may add Your own copyright statement to Your modifications and may provide 115 | additional or different license terms and conditions for use, reproduction, or 116 | distribution of Your modifications, or for any such Derivative Works as a whole, 117 | provided Your use, reproduction, and distribution of the Work otherwise complies 118 | with the conditions stated in this License. 119 | 120 | #### 5. Submission of Contributions 121 | 122 | Unless You explicitly state otherwise, any Contribution intentionally submitted 123 | for inclusion in the Work by You to the Licensor shall be under the terms and 124 | conditions of this License, without any additional terms or conditions. 125 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 126 | any separate license agreement you may have executed with Licensor regarding 127 | such Contributions. 128 | 129 | #### 6. Trademarks 130 | 131 | This License does not grant permission to use the trade names, trademarks, 132 | service marks, or product names of the Licensor, except as required for 133 | reasonable and customary use in describing the origin of the Work and 134 | reproducing the content of the NOTICE file. 135 | 136 | #### 7. Disclaimer of Warranty 137 | 138 | Unless required by applicable law or agreed to in writing, Licensor provides the 139 | Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, 140 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 141 | including, without limitation, any warranties or conditions of TITLE, 142 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 143 | solely responsible for determining the appropriateness of using or 144 | redistributing the Work and assume any risks associated with Your exercise of 145 | permissions under this License. 146 | 147 | #### 8. Limitation of Liability 148 | 149 | In no event and under no legal theory, whether in tort (including negligence), 150 | contract, or otherwise, unless required by applicable law (such as deliberate 151 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 152 | liable to You for damages, including any direct, indirect, special, incidental, 153 | or consequential damages of any character arising as a result of this License or 154 | out of the use or inability to use the Work (including but not limited to 155 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 156 | any and all other commercial damages or losses), even if such Contributor has 157 | been advised of the possibility of such damages. 158 | 159 | #### 9. Accepting Warranty or Additional Liability 160 | 161 | While redistributing the Work or Derivative Works thereof, You may choose to 162 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 163 | other liability obligations and/or rights consistent with this License. However, 164 | in accepting such obligations, You may act only on Your own behalf and on Your 165 | sole responsibility, not on behalf of any other Contributor, and only if You 166 | agree to indemnify, defend, and hold each Contributor harmless for any liability 167 | incurred by, or claims asserted against, such Contributor by reason of your 168 | accepting any such warranty or additional liability. 169 | 170 | _END OF TERMS AND CONDITIONS_ 171 | 172 | ### APPENDIX: How to apply the Apache License to your work 173 | 174 | To apply the Apache License to your work, attach the following boilerplate 175 | notice, with the fields enclosed by brackets `[]` replaced with your own 176 | identifying information. (Don't include the brackets!) The text should be 177 | enclosed in the appropriate comment syntax for the file format. We also 178 | recommend that a file or class name and description of purpose be included on 179 | the same “printed page” as the copyright notice for easier identification within 180 | third-party archives. 181 | 182 | Copyright 2018 Apache 2.0 183 | 184 | Licensed under the Apache License, Version 2.0 (the "License"); 185 | you may not use this file except in compliance with the License. 186 | You may obtain a copy of the License at 187 | 188 | http://www.apache.org/licenses/LICENSE-2.0 189 | 190 | Unless required by applicable law or agreed to in writing, software 191 | distributed under the License is distributed on an "AS IS" BASIS, 192 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 193 | See the License for the specific language governing permissions and 194 | limitations under the License. 195 | -------------------------------------------------------------------------------- /R/inference-tensorflow.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' @keywords internal 4 | #' Taken from https://github.com/tensorflow/tensorflow/issues/9162 5 | entry_stop_gradients <- function(target, mask) { 6 | mask_h <- tf$logical_not(mask) 7 | mask <- tf$cast(mask, dtype = target$dtype) 8 | mask_h <- tf$cast(mask_h, dtype = target$dtype) 9 | 10 | tf$add(tf$stop_gradient(tf$multiply(mask_h, target)), tf$multiply(mask, target)) 11 | } 12 | 13 | 14 | 15 | #' cellassign inference in tensorflow, semi-supervised version 16 | #' 17 | #' @import tensorflow 18 | #' 19 | #' @return A list of MLE cell type calls, MLE parameter estimates, 20 | #' and log likelihoods during optimization. 21 | #' 22 | #' @keywords internal 23 | inference_tensorflow <- function(Y, 24 | rho, 25 | s, 26 | X, 27 | G, 28 | C, 29 | N, 30 | P, 31 | B = 10, 32 | shrinkage, 33 | verbose = FALSE, 34 | n_batches = 1, 35 | rel_tol_adam = 1e-4, 36 | rel_tol_em = 1e-4, 37 | max_iter_adam = 1e5, 38 | max_iter_em = 20, 39 | learning_rate = 1e-4, 40 | random_seed = NULL, 41 | min_delta = 2, 42 | dirichlet_concentration = rep(1e-2, C), 43 | threads = 0) { 44 | 45 | tf <- tf$compat$v1 46 | tf$disable_v2_behavior() 47 | 48 | tfp <- reticulate::import('tensorflow_probability') 49 | tfd <- tfp$distributions 50 | 51 | 52 | tf$reset_default_graph() 53 | 54 | # Data placeholders 55 | Y_ <- tf$placeholder(tf$float64, shape = shape(NULL, G), name = "Y_") 56 | X_ <- tf$placeholder(tf$float64, shape = shape(NULL, P), name = "X_") 57 | s_ <- tf$placeholder(tf$float64, shape = shape(NULL), name = "s_") 58 | rho_ <- tf$placeholder(tf$float64, shape = shape(G,C), name = "rho_") 59 | 60 | sample_idx <- tf$placeholder(tf$int32, shape = shape(NULL), name = "sample_idx") 61 | 62 | # Added for splines 63 | B <- as.integer(B) 64 | 65 | basis_means_fixed <- seq(from = min(Y), to = max(Y), length.out = B) 66 | basis_means <- tf$constant(basis_means_fixed, dtype = tf$float64) 67 | 68 | b_init <- 2 * (basis_means_fixed[2] - basis_means_fixed[1])^2 69 | 70 | LOWER_BOUND <- 1e-10 71 | 72 | # Variables 73 | 74 | ## Shrinkage prior on delta 75 | if (shrinkage) { 76 | delta_log_mean <- tf$Variable(0, dtype = tf$float64) 77 | delta_log_variance <- tf$Variable(1, dtype = tf$float64) # May need to bound this or put a prior over this 78 | } 79 | 80 | ## Regular variables 81 | delta_log <- tf$Variable(tf$random_uniform(shape(G,C), 82 | minval = -2, 83 | maxval = 2, 84 | seed = random_seed, 85 | dtype = tf$float64), 86 | dtype = tf$float64, 87 | constraint = function(x) { 88 | tf$clip_by_value(x, 89 | tf$constant(log(min_delta), 90 | dtype = tf$float64), 91 | tf$constant(Inf, dtype = tf$float64)) 92 | }) 93 | 94 | # beta <- tf$Variable(tf$random_normal(shape(G,P), 95 | # mean = 0, 96 | # stddev = 1, 97 | # seed = random_seed, 98 | # dtype = tf$float64), 99 | # dtype = tf$float64) 100 | 101 | beta_0_init <- scale(colMeans(Y)) 102 | beta_init <- cbind(beta_0_init, 103 | matrix(0, nrow = G, ncol = P-1)) 104 | beta <- tf$Variable(tf$constant(beta_init, dtype = tf$float64), 105 | dtype = tf$float64) 106 | 107 | theta_logit <- tf$Variable(tf$random_normal(shape(C), 108 | mean = 0, 109 | stddev = 1, 110 | seed = random_seed, 111 | dtype = tf$float64), 112 | dtype = tf$float64) 113 | 114 | ## Spline variables 115 | a <- tf$exp(tf$Variable(tf$zeros(shape = B, dtype = tf$float64))) 116 | b <- tf$exp(tf$constant(rep(-log(b_init), B), dtype = tf$float64)) 117 | 118 | # Stop gradient for irrelevant entries of delta_log 119 | delta_log <- entry_stop_gradients(delta_log, tf$cast(rho_, tf$bool)) 120 | 121 | # Transformed variables 122 | delta = tf$exp(delta_log) 123 | theta_log = tf$nn$log_softmax(theta_logit) 124 | 125 | # Model likelihood 126 | base_mean <- tf$transpose(tf$einsum('np,gp->gn', X_, beta) + 127 | tf$log(s_)) 128 | 129 | base_mean_list <- list() 130 | for(c in seq_len(C)) base_mean_list[[c]] <- base_mean 131 | mu_ngc = tf$add(tf$stack(base_mean_list, 2), 132 | tf$multiply(delta, rho_), 133 | name = "adding_base_mean_to_delta_rho") 134 | 135 | mu_cng = tf$transpose(mu_ngc, shape(2,0,1)) 136 | 137 | mu_cngb <- tf$tile(tf$expand_dims(mu_cng, axis = 3L), c(1L, 1L, 1L, B)) 138 | 139 | phi_cng <- tf$reduce_sum(a * tf$exp(-b * tf$square(mu_cngb - basis_means)), 3L) + 140 | LOWER_BOUND 141 | phi <- tf$transpose(phi_cng, shape(1,2,0)) 142 | 143 | mu_ngc <- tf$transpose(mu_cng, shape(1,2,0)) 144 | 145 | mu_ngc <- tf$exp(mu_ngc) 146 | 147 | p = mu_ngc / (mu_ngc + phi) 148 | 149 | nb_pdf <- tfd$NegativeBinomial(probs = p, total_count = phi) 150 | 151 | 152 | Y_tensor_list <- list() 153 | for(c in seq_len(C)) Y_tensor_list[[c]] <- Y_ 154 | Y__ = tf$stack(Y_tensor_list, axis = 2) 155 | 156 | y_log_prob_raw <- nb_pdf$log_prob(Y__) 157 | y_log_prob <- tf$transpose(y_log_prob_raw, shape(0,2,1)) 158 | y_log_prob_sum <- tf$reduce_sum(y_log_prob, 2L) + theta_log 159 | p_y_on_c_unorm <- tf$transpose(y_log_prob_sum, shape(1,0)) 160 | 161 | gamma_fixed = tf$placeholder(dtype = tf$float64, shape = shape(NULL,C)) 162 | 163 | Q = -tf$einsum('nc,cn->', gamma_fixed, p_y_on_c_unorm) 164 | 165 | p_y_on_c_norm <- tf$reshape(tf$reduce_logsumexp(p_y_on_c_unorm, 0L), shape(1,-1)) 166 | 167 | gamma <- tf$transpose(tf$exp(p_y_on_c_unorm - p_y_on_c_norm)) 168 | 169 | ## Priors 170 | if (shrinkage) { 171 | delta_log_prior <- tfd$Normal(loc = delta_log_mean * rho_, 172 | scale = delta_log_variance) 173 | delta_log_prob <- -tf$reduce_sum(delta_log_prior$log_prob(delta_log)) 174 | } 175 | 176 | THETA_LOWER_BOUND <- 1e-20 177 | 178 | theta_log_prior <- tfd$Dirichlet(concentration = tf$constant(dirichlet_concentration, 179 | dtype = tf$float64)) 180 | theta_log_prob <- -theta_log_prior$log_prob(tf$exp(theta_log) + THETA_LOWER_BOUND) 181 | 182 | ## End priors 183 | Q <- Q + theta_log_prob 184 | if (shrinkage) { 185 | Q <- Q + delta_log_prob 186 | } 187 | 188 | 189 | optimizer = tf$train$AdamOptimizer(learning_rate=learning_rate) 190 | train = optimizer$minimize(Q) 191 | 192 | # Marginal log likelihood for monitoring convergence 193 | L_y = tf$reduce_sum(tf$reduce_logsumexp(p_y_on_c_unorm, 0L)) 194 | 195 | L_y <- L_y - theta_log_prob 196 | if (shrinkage) { 197 | L_y <- L_y - delta_log_prob 198 | } 199 | 200 | 201 | # Split the data 202 | splits <- split(sample(seq_len(N), size = N, replace = FALSE), seq_len(n_batches)) 203 | 204 | # Start the graph and inference 205 | session_conf <- tf$ConfigProto(intra_op_parallelism_threads = threads, 206 | inter_op_parallelism_threads = threads) 207 | sess <- tf$Session(config = session_conf) 208 | init <- tf$global_variables_initializer() 209 | sess$run(init) 210 | 211 | 212 | fd_full <- dict(Y_ = Y, X_ = X, s_ = s, rho_ = rho) 213 | 214 | log_liks <- ll_old <- sess$run(L_y, feed_dict = fd_full) 215 | 216 | for(i in seq_len(max_iter_em)) { 217 | ll <- 0 # log likelihood for this "epoch" 218 | for(b in seq_len(n_batches)) { 219 | 220 | fd <- dict(Y_ = Y[splits[[b]], ], 221 | X_ = X[splits[[b]], , drop = FALSE], 222 | s_ = s[splits[[b]]], 223 | rho_ = rho) 224 | 225 | g <- sess$run(gamma, feed_dict = fd) 226 | 227 | # M-step 228 | gfd <- dict(Y_ = Y[splits[[b]], ], 229 | X_ = X[splits[[b]], , drop = FALSE], 230 | s_ = s[splits[[b]]], 231 | rho_ = rho, 232 | gamma_fixed = g) 233 | 234 | Q_old <- sess$run(Q, feed_dict = gfd) 235 | Q_diff <- rel_tol_adam + 1 236 | mi = 0 237 | 238 | while(mi < max_iter_adam && Q_diff > rel_tol_adam) { 239 | mi <- mi + 1 240 | 241 | sess$run(train, feed_dict = gfd) 242 | 243 | if(mi %% 20 == 0) { 244 | if (verbose) { 245 | message(paste(mi, sess$run(Q, feed_dict = gfd))) 246 | } 247 | Q_new <- sess$run(Q, feed_dict = gfd) 248 | Q_diff = -(Q_new - Q_old) / abs(Q_old) 249 | Q_old <- Q_new 250 | } 251 | } # End gradient descent 252 | 253 | l_new = sess$run(L_y, feed_dict = gfd) # Log likelihood for this "epoch" 254 | ll <- ll + l_new 255 | } 256 | 257 | ll_diff <- (ll - ll_old) / abs(ll_old) 258 | 259 | if(verbose) { 260 | message(sprintf("%i\tL old: %f; L new: %f; Difference (%%): %f", 261 | mi, ll_old, ll, ll_diff)) 262 | } 263 | ll_old <- ll 264 | log_liks <- c(log_liks, ll) 265 | 266 | if (ll_diff < rel_tol_em) { 267 | break 268 | } 269 | } 270 | 271 | # Finished EM - peel off final values 272 | variable_list <- list(delta, beta, phi, gamma, mu_ngc, a, tf$exp(theta_log)) 273 | variable_names <- c("delta", "beta", "phi", "gamma", "mu", "a", "theta") 274 | 275 | 276 | if (shrinkage) { 277 | variable_list <- c(variable_list, list(delta_log_mean, delta_log_variance)) 278 | variable_names <- c(variable_names, "ld_mean", "ld_var") 279 | } 280 | 281 | mle_params <- sess$run(variable_list, feed_dict = fd_full) 282 | names(mle_params) <- variable_names 283 | sess$close() 284 | 285 | mle_params$delta[rho == 0] <- 0 286 | 287 | if(is.null(colnames(rho))) { 288 | colnames(rho) <- paste0("cell_type_", seq_len(ncol(rho))) 289 | } 290 | colnames(mle_params$gamma) <- colnames(rho) 291 | rownames(mle_params$delta) <- rownames(rho) 292 | colnames(mle_params$delta) <- colnames(rho) 293 | rownames(mle_params$beta) <- rownames(rho) 294 | names(mle_params$theta) <- colnames(rho) 295 | 296 | 297 | cell_type <- get_mle_cell_type(mle_params$gamma) 298 | 299 | rlist <- list( 300 | cell_type = cell_type, 301 | mle_params = mle_params, 302 | lls=log_liks 303 | ) 304 | 305 | return(rlist) 306 | 307 | } 308 | 309 | -------------------------------------------------------------------------------- /docs/docsearch.css: -------------------------------------------------------------------------------- 1 | /* Docsearch -------------------------------------------------------------- */ 2 | /* 3 | Source: https://github.com/algolia/docsearch/ 4 | License: MIT 5 | */ 6 | 7 | .algolia-autocomplete { 8 | display: block; 9 | -webkit-box-flex: 1; 10 | -ms-flex: 1; 11 | flex: 1 12 | } 13 | 14 | .algolia-autocomplete .ds-dropdown-menu { 15 | width: 100%; 16 | min-width: none; 17 | max-width: none; 18 | padding: .75rem 0; 19 | background-color: #fff; 20 | background-clip: padding-box; 21 | border: 1px solid rgba(0, 0, 0, .1); 22 | box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); 23 | } 24 | 25 | @media (min-width:768px) { 26 | .algolia-autocomplete .ds-dropdown-menu { 27 | width: 175% 28 | } 29 | } 30 | 31 | .algolia-autocomplete .ds-dropdown-menu::before { 32 | display: none 33 | } 34 | 35 | .algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { 36 | padding: 0; 37 | background-color: rgb(255,255,255); 38 | border: 0; 39 | max-height: 80vh; 40 | } 41 | 42 | .algolia-autocomplete .ds-dropdown-menu .ds-suggestions { 43 | margin-top: 0 44 | } 45 | 46 | .algolia-autocomplete .algolia-docsearch-suggestion { 47 | padding: 0; 48 | overflow: visible 49 | } 50 | 51 | .algolia-autocomplete .algolia-docsearch-suggestion--category-header { 52 | padding: .125rem 1rem; 53 | margin-top: 0; 54 | font-size: 1.3em; 55 | font-weight: 500; 56 | color: #00008B; 57 | border-bottom: 0 58 | } 59 | 60 | .algolia-autocomplete .algolia-docsearch-suggestion--wrapper { 61 | float: none; 62 | padding-top: 0 63 | } 64 | 65 | .algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { 66 | float: none; 67 | width: auto; 68 | padding: 0; 69 | text-align: left 70 | } 71 | 72 | .algolia-autocomplete .algolia-docsearch-suggestion--content { 73 | float: none; 74 | width: auto; 75 | padding: 0 76 | } 77 | 78 | .algolia-autocomplete .algolia-docsearch-suggestion--content::before { 79 | display: none 80 | } 81 | 82 | .algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { 83 | padding-top: .75rem; 84 | margin-top: .75rem; 85 | border-top: 1px solid rgba(0, 0, 0, .1) 86 | } 87 | 88 | .algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { 89 | display: block; 90 | padding: .1rem 1rem; 91 | margin-bottom: 0.1; 92 | font-size: 1.0em; 93 | font-weight: 400 94 | /* display: none */ 95 | } 96 | 97 | .algolia-autocomplete .algolia-docsearch-suggestion--title { 98 | display: block; 99 | padding: .25rem 1rem; 100 | margin-bottom: 0; 101 | font-size: 0.9em; 102 | font-weight: 400 103 | } 104 | 105 | .algolia-autocomplete .algolia-docsearch-suggestion--text { 106 | padding: 0 1rem .5rem; 107 | margin-top: -.25rem; 108 | font-size: 0.8em; 109 | font-weight: 400; 110 | line-height: 1.25 111 | } 112 | 113 | .algolia-autocomplete .algolia-docsearch-footer { 114 | width: 110px; 115 | height: 20px; 116 | z-index: 3; 117 | margin-top: 10.66667px; 118 | float: right; 119 | font-size: 0; 120 | line-height: 0; 121 | } 122 | 123 | .algolia-autocomplete .algolia-docsearch-footer--logo { 124 | background-image: url("data:image/svg+xml;utf8,"); 125 | background-repeat: no-repeat; 126 | background-position: 50%; 127 | background-size: 100%; 128 | overflow: hidden; 129 | text-indent: -9000px; 130 | width: 100%; 131 | height: 100%; 132 | display: block; 133 | transform: translate(-8px); 134 | } 135 | 136 | .algolia-autocomplete .algolia-docsearch-suggestion--highlight { 137 | color: #FF8C00; 138 | background: rgba(232, 189, 54, 0.1) 139 | } 140 | 141 | 142 | .algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { 143 | box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) 144 | } 145 | 146 | .algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { 147 | background-color: rgba(192, 192, 192, .15) 148 | } 149 | --------------------------------------------------------------------------------