├── .Rbuildignore ├── .gitignore ├── .vscode └── settings.json ├── DESCRIPTION ├── NAMESPACE ├── R ├── AnnotateNMF.R ├── AnnotationPlot.R ├── FindLocalNeighbors.R ├── GSEAHeatmap.R ├── GetBestRank.R ├── GetSharedFactors.R ├── GetUniqueFactors.R ├── MetadataHeatmap.R ├── MetadataPlot.R ├── MetadataSummary.R ├── PreprocessData.R ├── ProjectData.R ├── RankPlot.R ├── RcppExports.R ├── RescaleSpatial.R ├── RunGCNMF.R ├── RunGSEA.R ├── RunLNMF.R ├── RunNMF.R ├── ard_nmf.R ├── cellxgene_pipeline.R ├── checkColumns.R ├── checkDesigns.R ├── coercions.R ├── cross_validate_nmf.R ├── getDesigns.R ├── getModelFit.R ├── getModelMatrix.R ├── getModelResults.R ├── get_pbmc3k_data.R ├── methods.R ├── pbmc3k.R ├── plot.cross_validate_nmf_data.R ├── plot.nmf_metadata_summary.R ├── plotFactorWeights.R ├── rasterize_rowwise.R ├── run_nmf.R └── singlet.R ├── README.md ├── _pkgdown.yml ├── data └── pbmc3k.RData ├── docs ├── 404.html ├── articles │ ├── Batch_Integration_with_Linked_NMF.html │ ├── Batch_Integration_with_Linked_NMF_files │ │ └── figure-html │ │ │ ├── plot-lnmf-metadata-1.png │ │ │ ├── plot-metadata-1.png │ │ │ ├── plot-umap-1.png │ │ │ ├── run-nmf-1.png │ │ │ ├── unnamed-chunk-1-1.png │ │ │ ├── unnamed-chunk-2-1.png │ │ │ ├── unnamed-chunk-4-1.png │ │ │ ├── unnamed-chunk-6-1.png │ │ │ └── unnamed-chunk-8-1.png │ ├── Guided_Clustering_with_NMF.html │ ├── Guided_Clustering_with_NMF_files │ │ └── figure-html │ │ │ ├── dim-plot-1.png │ │ │ ├── feature-plot-1.png │ │ │ ├── gsea-heatmap-1.png │ │ │ ├── map-cluster-ids-1.png │ │ │ ├── plot-metadata-1.png │ │ │ ├── unnamed-chunk-1-1.png │ │ │ └── viz-dim-loadings-1.png │ └── index.html ├── authors.html ├── deps │ ├── bootstrap-5.1.3 │ │ ├── bootstrap.bundle.min.js │ │ ├── bootstrap.bundle.min.js.map │ │ └── bootstrap.min.css │ ├── data-deps.txt │ └── jquery-3.6.0 │ │ ├── jquery-3.6.0.js │ │ ├── jquery-3.6.0.min.js │ │ └── jquery-3.6.0.min.map ├── index.html ├── link.svg ├── pkgdown.js ├── pkgdown.yml ├── reference │ ├── GSEAHeatmap.html │ ├── MetadataSummary.html │ ├── RankPlot.html │ ├── RunGSEA.html │ ├── RunLNMF.html │ ├── RunNMF.html │ ├── ard_nmf.html │ ├── cross_validate_nmf.html │ ├── get_pbmc3k_data.html │ ├── index.html │ ├── pbmc3k.html │ ├── run_linked_nmf.html │ ├── run_nmf.html │ └── singlet.html ├── search.json └── sitemap.xml ├── inst ├── CITATION └── include │ ├── IVSparse.h │ ├── singlet.h │ └── src │ ├── CSC │ ├── CSC_BLAS.hpp │ ├── CSC_Constructors.hpp │ ├── CSC_Methods.hpp │ ├── CSC_Operators.hpp │ ├── CSC_Private_Methods.hpp │ └── CSC_SparseMatrix.hpp │ ├── IVCSC │ ├── IVCSC_BLAS.hpp │ ├── IVCSC_Constructors.hpp │ ├── IVCSC_Methods.hpp │ ├── IVCSC_Operators.hpp │ ├── IVCSC_Private_Methods.hpp │ └── IVCSC_SparseMatrix.hpp │ ├── IVSparse_Base_Methods.hpp │ ├── IVSparse_SparseMatrixBase.hpp │ ├── InnerIterators │ ├── CSC_Iterator.hpp │ ├── CSC_Iterator_Methods.hpp │ ├── IVCSC_Iterator.hpp │ ├── IVCSC_Iterator_Methods.hpp │ ├── VCSC_Iterator.hpp │ └── VCSC_Iterator_Methods.hpp │ ├── VCSC │ ├── VCSC_BLAS.hpp │ ├── VCSC_Constructors.hpp │ ├── VCSC_Methods.hpp │ ├── VCSC_Operators.hpp │ ├── VCSC_Private_Methods.hpp │ └── VCSC_SparseMatrix.hpp │ └── Vectors │ ├── CSC_Vector.hpp │ ├── CSC_Vector_Methods.hpp │ ├── IVCSC_Vector.hpp │ ├── IVCSC_Vector_Methods.hpp │ ├── VCSC_Vector.hpp │ └── VCSC_Vector_Methods.hpp ├── man ├── AnnotateNMF.Rd ├── AnnotationPlot.Rd ├── FindLocalNeighbors.Rd ├── GSEAHeatmap.Rd ├── GetBestRank.Rd ├── MetadataSummary.Rd ├── PreprocessData.Rd ├── ProjectData.Rd ├── RankPlot.Rd ├── RasterizeRowwise.Rd ├── RescaleSpatial.Rd ├── RunGCNMF.Rd ├── RunGSEA.Rd ├── RunLNMF.Rd ├── RunNMF.Rd ├── ard_nmf.Rd ├── cellxgene_pipeline.Rd ├── checkColumns.Rd ├── checkDesigns.Rd ├── cross_validate_nmf.Rd ├── getDesigns.Rd ├── getModelFit.Rd ├── getModelMatrix.Rd ├── getModelResults.Rd ├── get_pbmc3k_data.Rd ├── pbmc3k.Rd ├── plotFactorWeights.Rd ├── project_model.Rd ├── run_linked_nmf.Rd ├── run_nmf.Rd ├── singlet.Rd └── write_IVCSC.Rd ├── singlet.Rproj ├── src ├── Makevars ├── Makevars.win ├── RcppExports.cpp └── singlet.cpp ├── tests ├── testthat.R └── testthat │ ├── helper.R │ └── test-pbmc3k.R └── vignettes ├── Batch_Integration_with_Linked_NMF.Rmd └── Guided_Clustering_with_NMF.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | src/*.o 6 | src/*.so 7 | src/*.dll 8 | Makefile 9 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.associations": { 3 | "*.rmd": "markdown", 4 | "array": "cpp", 5 | "initializer_list": "cpp", 6 | "atomic": "cpp", 7 | "*.tcc": "cpp", 8 | "cctype": "cpp", 9 | "chrono": "cpp", 10 | "clocale": "cpp", 11 | "cmath": "cpp", 12 | "complex": "cpp", 13 | "cstdarg": "cpp", 14 | "cstdint": "cpp", 15 | "cstdio": "cpp", 16 | "cstdlib": "cpp", 17 | "cstring": "cpp", 18 | "ctime": "cpp", 19 | "cwchar": "cpp", 20 | "cwctype": "cpp", 21 | "deque": "cpp", 22 | "unordered_map": "cpp", 23 | "vector": "cpp", 24 | "exception": "cpp", 25 | "fstream": "cpp", 26 | "functional": "cpp", 27 | "iosfwd": "cpp", 28 | "iostream": "cpp", 29 | "istream": "cpp", 30 | "limits": "cpp", 31 | "new": "cpp", 32 | "ostream": "cpp", 33 | "numeric": "cpp", 34 | "ratio": "cpp", 35 | "sstream": "cpp", 36 | "stdexcept": "cpp", 37 | "streambuf": "cpp", 38 | "thread": "cpp", 39 | "tuple": "cpp", 40 | "type_traits": "cpp", 41 | "utility": "cpp", 42 | "typeinfo": "cpp", 43 | "core": "cpp", 44 | "random": "cpp", 45 | "eigencore": "cpp", 46 | "string_view": "cpp", 47 | "bit": "cpp", 48 | "compare": "cpp", 49 | "concepts": "cpp", 50 | "cstddef": "cpp", 51 | "map": "cpp", 52 | "set": "cpp", 53 | "string": "cpp", 54 | "algorithm": "cpp", 55 | "any": "cpp", 56 | "iterator": "cpp", 57 | "memory": "cpp", 58 | "memory_resource": "cpp", 59 | "system_error": "cpp", 60 | "iomanip": "cpp", 61 | "numbers": "cpp", 62 | "semaphore": "cpp", 63 | "stop_token": "cpp" 64 | } 65 | } -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: singlet 2 | Type: Package 3 | Title: Non-negative Matrix Factorization for single-cell analysis 4 | Version: 0.99.8 5 | Date: 2024-10-15 6 | Author: Zach Debruine 7 | Maintainer: Zach Debruine 8 | Authors@R: c(person("Zach", "DeBruine", 9 | email = "debruinz@gvsu.edu", 10 | role = c("aut", "cre"), 11 | comment = c(ORCID = "0000-0003-2234-4827")), 12 | person("Tim", "Triche", 13 | email = "trichelab@gmail.com", 14 | role = c("aut"), 15 | comment = c(ORCID = "0000-0001-5665-946X")), 16 | person("Chan Zuckerberg Initiative", 17 | role = c("fnd"), 18 | comment = "https://chanzuckerberg.com/science/programs-resources/single-cell-biology/data-insights/efficient-data-structures-for-single-cell-data-integration/")) 19 | Description: Fast NMF with automatic rank-determination for dimension reduction of single-cell data using Seurat, RcppML nmf, SingleCellExperiments and similar. 20 | License: GPL (>= 2) 21 | Depends: 22 | Seurat, 23 | RcppML, 24 | dplyr, 25 | RcppEigen 26 | Imports: 27 | Matrix, 28 | methods, 29 | stats, 30 | knitr, 31 | ggplot2, 32 | limma, 33 | reshape2, 34 | utils, 35 | fgsea, 36 | msigdbr, 37 | RcppML, 38 | statmod 39 | LinkingTo: 40 | Rcpp, 41 | RcppEigen 42 | Suggests: 43 | rmarkdown, 44 | devtools, 45 | cowplot, 46 | viridis, 47 | testthat (>= 3.0.0), 48 | SingleCellExperiment, 49 | rWikiPathways, 50 | plotly, 51 | igvR 52 | VignetteBuilder: knitr 53 | RoxygenNote: 7.3.2 54 | Config/testthat/edition: 3 55 | URL: https://github.com/zdebruine/singlet 56 | LazyData: true 57 | BugReports: https://github.com/zdebruine/singlet/issues 58 | Encoding: UTF-8 59 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(AnnotateNMF,DimReduc) 4 | S3method(AnnotateNMF,Seurat) 5 | S3method(AnnotateNMF,nmf) 6 | S3method(AnnotationPlot,DimReduc) 7 | S3method(AnnotationPlot,Seurat) 8 | S3method(AnnotationPlot,data.frame) 9 | S3method(AnnotationPlot,list) 10 | S3method(AnnotationPlot,nmf) 11 | S3method(FindLocalNeighbors,Seurat) 12 | S3method(MetadataPlot,Seurat) 13 | S3method(PreprocessData,Assay) 14 | S3method(PreprocessData,Seurat) 15 | S3method(PreprocessData,dgCMatrix) 16 | S3method(ProjectData,Seurat) 17 | S3method(ProjectData,SingleCellExperiment) 18 | S3method(RankPlot,Seurat) 19 | S3method(RescaleSpatial,Seurat) 20 | S3method(RunGCNMF,Seurat) 21 | S3method(RunLNMF,Seurat) 22 | S3method(RunNMF,Seurat) 23 | S3method(RunNMF,SingleCellExperiment) 24 | S3method(plot,cross_validate_nmf_data) 25 | S3method(plot,nmf_metadata_summary) 26 | export(AnnotateNMF) 27 | export(AnnotationPlot) 28 | export(FindLocalNeighbors) 29 | export(GSEAHeatmap) 30 | export(GetBestRank) 31 | export(GetSharedFactors) 32 | export(GetUniqueFactors) 33 | export(MetadataHeatmap) 34 | export(MetadataPlot) 35 | export(MetadataSummary) 36 | export(PreprocessData) 37 | export(ProjectData) 38 | export(RankPlot) 39 | export(RasterizeRowwise) 40 | export(RescaleSpatial) 41 | export(RunGCNMF) 42 | export(RunGSEA) 43 | export(RunLNMF) 44 | export(RunNMF) 45 | export(ard_nmf) 46 | export(cellxgene_pipeline) 47 | export(checkColumns) 48 | export(checkDesigns) 49 | export(cross_validate_nmf) 50 | export(getDesigns) 51 | export(getModelFit) 52 | export(getModelMatrix) 53 | export(getModelResults) 54 | export(get_pbmc3k_data) 55 | export(plotFactorWeights) 56 | export(project_model) 57 | export(run_nmf) 58 | export(write_IVCSC) 59 | exportMethods("$") 60 | exportMethods("$<-") 61 | exportMethods("[") 62 | exportMethods(coerce) 63 | import(Matrix) 64 | import(RcppML) 65 | import(Seurat) 66 | import(dplyr) 67 | import(fgsea) 68 | import(ggplot2) 69 | import(limma) 70 | import(msigdbr) 71 | importClassesFrom(RcppML,nmf) 72 | importFrom(methods,as) 73 | importFrom(methods,is) 74 | importFrom(methods,new) 75 | importFrom(reshape2,acast) 76 | importFrom(reshape2,melt) 77 | importFrom(stats,dist) 78 | importFrom(stats,hclust) 79 | importFrom(stats,model.matrix) 80 | importFrom(stats,p.adjust) 81 | importFrom(stats,reshape) 82 | importFrom(stats,runif) 83 | importFrom(utils,data) 84 | importFrom(utils,setTxtProgressBar) 85 | importFrom(utils,txtProgressBar) 86 | useDynLib(singlet, .registration = TRUE) 87 | -------------------------------------------------------------------------------- /R/AnnotateNMF.R: -------------------------------------------------------------------------------- 1 | #' annotate an NMF model 2 | #' 3 | #' @param object an object suitable for annotation (Seurat, DimReduc, or nmf) 4 | #' @param columns factor columns of meta.data (see below) to annotate against 5 | #' @param meta.data a data.frame, if one is not already part of the object 6 | #' @param designs named list of design matrices (supersedes meta.data/columns) 7 | #' @param center center the factor matrix for testing? (TRUE) 8 | #' @param scale scale the factor matrix for testing? (FALSE) 9 | #' @param max.levels maximum number of levels a factor may have in order to be included in analysis 10 | #' @param ... not implemented 11 | #' @export 12 | #' 13 | AnnotateNMF <- function(object, ...) { 14 | UseMethod("AnnotateNMF") 15 | } 16 | 17 | 18 | #' Annotate NMF model with cell or sample metadata 19 | #' 20 | #' @rdname AnnotateNMF 21 | #' @aliases AnnotateNMF 22 | #' 23 | #' @import limma 24 | #' 25 | #' @export 26 | #' 27 | AnnotateNMF.DimReduc <- function(object, meta.data = NULL, columns = NULL, designs = NULL, center = TRUE, scale = FALSE, max.levels = 200, ...) { 28 | designs <- getDesigns(columns = columns, 29 | meta.data = meta.data, 30 | designs = designs, 31 | max.levels) 32 | fits <- lapply(designs, 33 | getModelFit, 34 | object = object, 35 | center = center, 36 | scale = scale) 37 | object@misc$annotations <- lapply(fits, 38 | getModelResults) 39 | return(object) 40 | } 41 | 42 | 43 | #' @rdname AnnotateNMF 44 | #' @name AnnotateNMF 45 | #' 46 | #' @export 47 | #' 48 | .S3method("AnnotateNMF", "DimReduc", AnnotateNMF.DimReduc) 49 | 50 | 51 | #' @rdname AnnotateNMF 52 | #' 53 | #' @param reduction the reductions slot in the Seurat object containing the model to annotate 54 | #' 55 | #' @examples 56 | #' \dontrun{ 57 | #' get_pbmc3k_data() %>% 58 | #' NormalizeData() %>% 59 | #' RunNMF() -> pbmc3k 60 | #' AnnotateNMF(pbmc3k) 61 | #' } 62 | #' @aliases AnnotateNMF 63 | #' 64 | #' @export 65 | #' 66 | AnnotateNMF.Seurat <- function(object, columns = NULL, reduction = "nmf", ...) { 67 | if (is.null(columns)) columns <- colnames(object@meta.data) 68 | object@reductions[[reduction]] <- 69 | AnnotateNMF.DimReduc( 70 | object = object@reductions[[reduction]], 71 | meta.data = object@meta.data[, columns], 72 | columns = columns, ... 73 | ) 74 | return(object) 75 | } 76 | 77 | 78 | #' @rdname AnnotateNMF 79 | #' @name AnnotateNMF 80 | #' 81 | #' @export 82 | #' 83 | .S3method("AnnotateNMF", "Seurat", AnnotateNMF.Seurat) 84 | 85 | 86 | #' Annotate NMF model with cell metadata 87 | #' 88 | #' @details Maps factor information in an RcppML::nmf object against meta.data 89 | #' 90 | #' @rdname AnnotateNMF 91 | #' @aliases AnnotateNMF 92 | #' 93 | #' @import limma 94 | #' 95 | #' @export 96 | #' 97 | AnnotateNMF.nmf <- function(object, meta.data, columns = NULL, designs = NULL, center = TRUE, scale = FALSE, max.levels = 200, ...) { 98 | designs <- getDesigns(columns = columns, meta.data = meta.data, designs = designs, max.levels, ...) 99 | fits <- lapply(designs, getModelFit, object = object, center = center, scale = scale) 100 | object@misc$annotations <- lapply(fits, getModelResults) 101 | return(object) 102 | } 103 | 104 | 105 | #' @rdname AnnotateNMF 106 | #' @name AnnotateNMF 107 | #' 108 | #' @export 109 | #' 110 | .S3method("AnnotateNMF", "nmf", AnnotateNMF.nmf) 111 | -------------------------------------------------------------------------------- /R/GSEAHeatmap.R: -------------------------------------------------------------------------------- 1 | #' Plot GSEA results on a heatmap 2 | #' 3 | #' Plot top GSEA terms for each NMF factor on a heatmap 4 | #' 5 | #' @param object Seurat or RcppML::nmf object 6 | #' @param reduction a dimensional reduction for which GSEA analysis has been performed 7 | #' @param max.terms.per.factor show this number of top terms for each factor 8 | #' @param dropcommon drop broadly enriched terms across factors? (TRUE) 9 | #' 10 | #' @return ggplot2 object 11 | #' 12 | #' @export 13 | #' 14 | GSEAHeatmap <- function(object, reduction = "nmf", max.terms.per.factor = 3, dropcommon = TRUE) { 15 | 16 | if (is(object, "Seurat")) { 17 | df <- object@reductions[[reduction]]@misc$gsea$padj 18 | } else if (is(object, "nmf")) { 19 | df <- object@misc$gsea$padj 20 | } 21 | 22 | # markers for each factor based on the proportion of signal in that factor 23 | df2 <- as.matrix(Diagonal(x = 1 / rowSums(df)) %*% df) 24 | 25 | # see https://github.com/zdebruine/singlet/issues/26 26 | # thanks to @earbebarnes 27 | rownames(df2) <- rownames(df) #add row names to df2 28 | 29 | terms <- c() 30 | for (i in 1:ncol(df2)) { 31 | terms_i <- df[, i] 32 | idx <- terms_i > -log10(0.05) 33 | terms_i <- terms_i[idx] 34 | terms_j <- df2[idx, i] 35 | v <- sort(terms_j, decreasing = TRUE) 36 | if (length(v) > max.terms.per.factor) { 37 | terms <- c(terms, names(v)[1:max.terms.per.factor]) 38 | } else { 39 | terms <- c(terms, names(v)) 40 | } 41 | } 42 | terms <- unique(terms) 43 | df <- df[terms, ] 44 | 45 | rownames(df) <- sapply(rownames(df), function(x) { 46 | ifelse(nchar(x) > 48, paste0(substr(x, 1, 45), "..."), x) 47 | }) 48 | 49 | if (dropcommon) { 50 | # remove terms that are broadly significant 51 | v <- which((rowSums(df > -log10(0.05)) > (ncol(df) / 2))) 52 | if (length(v) > 0) df <- df[-v, ] 53 | } 54 | df <- reshape2::melt(df) 55 | p <- ggplot(df, aes(Var2, Var1, fill = value)) + 56 | geom_tile() + 57 | scale_fill_viridis_c(option = "B") + 58 | theme_classic() + 59 | scale_x_discrete(expand = c(0, 0)) + 60 | scale_y_discrete(expand = c(0, 0)) + 61 | labs( 62 | x = "NMF factor", 63 | y = "GO Term", 64 | fill = "FDR\n(-log10)" 65 | ) + 66 | theme( 67 | axis.text.y = element_text(size = 6), 68 | axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1) 69 | ) + 70 | NULL 71 | 72 | return(p) 73 | 74 | } 75 | -------------------------------------------------------------------------------- /R/GetBestRank.R: -------------------------------------------------------------------------------- 1 | #' determine the appropriate rank for an AutoNMF decomposition 2 | #' 3 | #' @param df a data.frame of output from crossvalidation: rep, rank, error 4 | #' @inheritParams RunNMF 5 | #' @return the lowest rank that minimizes the reconstruction error 6 | #' @export 7 | #' 8 | GetBestRank <- function(df, tol.overfit = 1e-4, ...) { 9 | df$rep <- factor(df$rep) 10 | best_ranks <- c() 11 | for (replicate in levels(df$rep)) { 12 | df_rep <- subset(df, rep == replicate) 13 | # calculate overfitting tolerance 14 | max_rank <- max(df_rep$k) + 1 15 | for (rank in unique(df_rep$k)) { 16 | if (rank < max_rank) { 17 | df_rank <- subset(df_rep, k == rank) 18 | if (nrow(df_rank) > 1) { 19 | v2 <- df_rank$test_error[2:nrow(df_rank)] 20 | v1 <- df_rank$test_error[1:(nrow(df_rank) - 1)] 21 | if(length(v1) >=2 ){ 22 | for (pos in 2:length(v1)) { 23 | if (v1[[pos]] > v1[[pos - 1]]) v1[[pos]] <- v1[[pos - 1]] 24 | } 25 | } 26 | if (max(c(0, (v2 - v1) / (v2 + v1))) > tol.overfit) { 27 | max_rank <- rank 28 | } 29 | } 30 | } 31 | } 32 | df_rep <- subset(df_rep, k < max_rank) 33 | if (nrow(df_rep) == 0) { 34 | best_ranks <- c(best_ranks, 2) 35 | } else if (nrow(df) == 1) { 36 | best_ranks <- c(best_ranks, df_rep$k[[1]]) 37 | } else { 38 | # condense to simple format by taking the last iteration in each model 39 | df_rep <- as.data.frame(group_by(df_rep, rep, k) %>% slice(which.max(iter))) 40 | best_ranks <- c(best_ranks, df_rep$k[which.min(df_rep$test_error)]) 41 | } 42 | } 43 | 44 | # get the lowest rank for each replicate, take the mean and floor it 45 | floor(mean(best_ranks)) 46 | } 47 | -------------------------------------------------------------------------------- /R/GetSharedFactors.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | #' @rdname RunLNMF 3 | #' 4 | GetSharedFactors <- function(object, split.by, reduction = "lnmf") { 5 | if (!(reduction %in% names(object@reductions))) { 6 | stop("this Seurat object does not contain the requested reductions slot") 7 | } 8 | # which(rowSums(object@reductions[[reduction]]@misc$link_matrix == 0) == 0) 9 | which(!(colnames(object@reductions[[reduction]]@cell.embeddings) %in% names(which(apply(MetadataSummary(t(object@reductions[[reduction]]@cell.embeddings), object@meta.data[[split.by]]), 2, function(x) min(x) == 0))))) 10 | } 11 | -------------------------------------------------------------------------------- /R/GetUniqueFactors.R: -------------------------------------------------------------------------------- 1 | #' @rdname RunLNMF 2 | #' @export 3 | #' 4 | GetUniqueFactors <- function(object, split.by, reduction = "lnmf") { 5 | if (!(reduction %in% names(object@reductions))) { 6 | stop("this Seurat object does not contain the requested reductions slot") 7 | } 8 | # which(rowSums(object@reductions[[reduction]]@misc$link_matrix == 0) > 0) 9 | which((colnames(object@reductions[[reduction]]@cell.embeddings) %in% names(which(apply(MetadataSummary(t(object@reductions[[reduction]]@cell.embeddings), object@meta.data[[split.by]]), 2, function(x) min(x) == 0))))) 10 | } 11 | -------------------------------------------------------------------------------- /R/MetadataHeatmap.R: -------------------------------------------------------------------------------- 1 | #' @rdname MetadataSummary 2 | #' 3 | #' @param x result of \code{MetadataSummary} 4 | #' 5 | #' @importFrom reshape2 melt 6 | #' 7 | #' @export 8 | #' 9 | MetadataHeatmap <- function(x) { 10 | m <- reshape2::melt(as.matrix(x)) 11 | colnames(m) <- c("factor", "group", "frac") 12 | ggplot(m, aes(x = factor(factor, levels = unique(factor)), y = group, fill = frac)) + 13 | geom_tile() + 14 | theme_classic() + 15 | theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), axis.line = element_blank(), axis.ticks = element_blank()) + 16 | labs(x = "factor", y = "group", fill = "relative\ntotal weight") + 17 | scale_y_discrete(expand = c(0, 0)) + 18 | scale_x_discrete(expand = c(0, 0)) + 19 | scale_fill_gradient2(low = "white", high = "red") 20 | } 21 | -------------------------------------------------------------------------------- /R/MetadataPlot.R: -------------------------------------------------------------------------------- 1 | #' @rdname RunLNMF 2 | #' 3 | #' @export 4 | #' 5 | MetadataPlot <- function(object, ...) { 6 | UseMethod("MetadataPlot") 7 | } 8 | 9 | 10 | #' @rdname RunLNMF 11 | #' 12 | #' @name MetadataPlot 13 | #' 14 | #' @export 15 | #' 16 | MetadataPlot.Seurat <- function(object, split.by, reduction = "lnmf", ...) { 17 | if (!(reduction %in% names(object@reductions))) { 18 | stop("this Seurat object does not contain the requested reductions slot") 19 | } 20 | plot(MetadataSummary(t(object@reductions[[reduction]]@cell.embeddings), object@meta.data[[split.by]])) 21 | } 22 | 23 | 24 | #' @rdname RunLNMF 25 | #' 26 | #' @name MetadataPlot 27 | #' 28 | #' @export 29 | #' 30 | .S3method("MetadataPlot", "Seurat", MetadataPlot.Seurat) 31 | -------------------------------------------------------------------------------- /R/MetadataSummary.R: -------------------------------------------------------------------------------- 1 | #' Summarize contribution of sample groups to NMF factors 2 | #' 3 | #' Calculate the mean weight of samples in discrete and unique groups to each factor 4 | #' 5 | #' @rdname MetadataSummary 6 | #' 7 | #' @param h matrix giving factors as rows and samples as columns 8 | #' @param factor_data a factor of the same length as the number of columns in \code{h} 9 | #' @param reorder sort results by proportion in each group (uses \code{hclust} if >2 groups) 10 | #' 11 | #' @return \code{data.frame} of mean weights for each sample group within each factor of class \code{nmf_metadata_summary}. Use the \code{plot} method to visualize. 12 | #' 13 | #' @export 14 | #' 15 | MetadataSummary <- function(h, factor_data, reorder = TRUE) { 16 | factor_data <- as.factor(factor_data) 17 | if (is.null(rownames(h))) rownames(h) <- paste0("factor", 1:nrow(h)) 18 | m <- matrix(0, nrow(h), length(levels(factor_data))) 19 | rownames(m) <- rownames(h) 20 | colnames(m) <- levels(factor_data) 21 | for (j in 1:length(levels(factor_data))) { 22 | for (i in 1:nrow(h)) { 23 | m[i, j] <- mean(h[i, which(factor_data == levels(factor_data)[[j]])]) 24 | } 25 | } 26 | m <- apply(m, 1, function(x) x / sum(x)) 27 | if (length(levels(factor_data)) == 2) { 28 | m <- m[order(m[, 1], decreasing = TRUE), ] 29 | } else if (reorder) { 30 | m <- m[hclust(dist(m), method = "ward.D2")$order, hclust(dist(t(m)), method = "ward.D2")$order] 31 | } 32 | t(m) 33 | m <- as.data.frame(m) 34 | class(m) <- c("nmf_metadata_summary", "data.frame") 35 | m 36 | } 37 | -------------------------------------------------------------------------------- /R/PreprocessData.R: -------------------------------------------------------------------------------- 1 | #' Normalize count data 2 | #' 3 | #' Standard log-normalization equivalent to \code{Seurat::LogNormalize} 4 | #' 5 | #' @param object Seurat object 6 | #' @param assay assay in which the counts matrix resides 7 | #' @param scale.factor value by which to multiply all columns after unit normalization and before \code{log1p} transformation 8 | #' @param ... arguments to \code{Seurat::LogNormalize} 9 | #' @export 10 | #' @rdname PreprocessData 11 | #' 12 | PreprocessData.Seurat <- function(object, scale.factor = 10000, assay = NULL, ...) { 13 | if (is.null(assay)) assay <- names(object@assays)[[1]] 14 | if (is.null(object@assays[[assay]]@key)) { 15 | object@assays[[assay]]@key <- paste0(assay, "_") 16 | } 17 | object@assays[[assay]] <- PreprocessData(object@assays[[assay]], ...) 18 | object 19 | } 20 | 21 | #' @rdname PreprocessData 22 | #' @export 23 | PreprocessData.Assay <- function(object, scale.factor = 10000, ...) { 24 | if (ncol(object@counts) == 0) { 25 | object@data <- PreprocessData(object@data, ...) 26 | } else { 27 | object@data <- PreprocessData(object@counts, ...) 28 | } 29 | object 30 | } 31 | 32 | #' @rdname PreprocessData 33 | #' @export 34 | PreprocessData.dgCMatrix <- function(object, scale.factor = 10000, ...) { 35 | m <- Seurat::LogNormalize(object, scale.factor, ...) 36 | rownames(m) <- rownames(object) 37 | colnames(m) <- colnames(object) 38 | m 39 | } 40 | 41 | #' @export 42 | #' @rdname PreprocessData 43 | #' 44 | PreprocessData <- function(object, scale.factor, ...) { 45 | UseMethod("PreprocessData") 46 | } 47 | 48 | #' @export 49 | #' @rdname PreprocessData 50 | #' @name PreprocessData 51 | #' 52 | .S3method("PreprocessData", "dgCMatrix", PreprocessData.dgCMatrix) 53 | 54 | #' @export 55 | #' @rdname PreprocessData 56 | #' @name PreprocessData 57 | #' 58 | .S3method("PreprocessData", "Assay", PreprocessData.Assay) 59 | 60 | 61 | #' @export 62 | #' @rdname PreprocessData 63 | #' @name PreprocessData 64 | #' 65 | .S3method("PreprocessData", "Seurat", PreprocessData.Seurat) 66 | -------------------------------------------------------------------------------- /R/RankPlot.R: -------------------------------------------------------------------------------- 1 | #' Plot NMF cross-validation results 2 | #' 3 | #' Given a NMF reduction at multiple ranks, plot rank vs. test set reconstruction error to determine the optimal rank. 4 | #' 5 | #' @param object a Seurat object or a \code{data.frame} that is the result of \code{RunNMF} 6 | #' @param reduction name of the NMF reduction in the Seurat object (result of \code{RunNMF}) for which multiple \code{ranks} were computed. 7 | #' @param ... not implemented 8 | #' 9 | #' @return A ggplot2 object 10 | #' 11 | #' @aliases RankPlot 12 | #' 13 | #' @export 14 | #' 15 | RankPlot <- function(object, reduction = "nmf", ...) { 16 | UseMethod("RankPlot") 17 | } 18 | 19 | 20 | #' @rdname RankPlot 21 | #' 22 | #' @param reduction the NMF reduction slot name (result of \code{RunNMF} where \code{k} was an array) 23 | #' @param detail.level of detail to plot, \code{1} for test set reconstruction error at convergence of each factorization, \code{2} for test set reconstruction error at each fitting iteration of each factorization 24 | #' 25 | #' @export 26 | #' 27 | RankPlot.Seurat <- function(object, reduction = "nmf", detail.level = 1, ...) { 28 | if (detail.level == 2) { 29 | plot(subset(object@reductions[[reduction]]@misc$cv_data, iter >= 5), detail.level) 30 | } else { 31 | plot(object@reductions[[reduction]]@misc$cv_data, detail.level) 32 | } 33 | } 34 | 35 | 36 | #' Plot NMF cross-validation results given a Seurat object 37 | #' 38 | #' S3 method for Seurat that runs the \code{singlet::RunNMF} function. 39 | #' 40 | #' @method RankPlot Seurat 41 | #' @rdname RankPlot 42 | #' @name RankPlot 43 | #' 44 | #' @export 45 | #' 46 | .S3method("RankPlot", "Seurat", RankPlot.Seurat) 47 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | weight_by_split <- function(A_, split_by, n_groups) { 5 | .Call(`_singlet_weight_by_split`, A_, split_by, n_groups) 6 | } 7 | 8 | rowwise_compress_sparse <- function(A, n = 10L, threads = 0L) { 9 | .Call(`_singlet_rowwise_compress_sparse`, A, n, threads) 10 | } 11 | 12 | rowwise_compress_dense <- function(A, n = 10L, threads = 0L) { 13 | .Call(`_singlet_rowwise_compress_dense`, A, n, threads) 14 | } 15 | 16 | calc_L1_matrix <- function(h, batch_id) { 17 | .Call(`_singlet_calc_L1_matrix`, h, batch_id) 18 | } 19 | 20 | Rcpp_predict <- function(A, w, L1, L2, threads) { 21 | .Call(`_singlet_Rcpp_predict`, A, w, L1, L2, threads) 22 | } 23 | 24 | c_project_model <- function(A, w, L1, L2, threads) { 25 | .Call(`_singlet_c_project_model`, A, w, L1, L2, threads) 26 | } 27 | 28 | c_nmf <- function(A, At, tol, maxit, verbose, L1_w, L1_h, L2_w, L2_h, threads, w) { 29 | .Call(`_singlet_c_nmf`, A, At, tol, maxit, verbose, L1_w, L1_h, L2_w, L2_h, threads, w) 30 | } 31 | 32 | c_nmf_batch <- function(A, At, tol, maxit, verbose, L1, L2, threads, w, batch_id) { 33 | .Call(`_singlet_c_nmf_batch`, A, At, tol, maxit, verbose, L1, L2, threads, w, batch_id) 34 | } 35 | 36 | c_nmf_sparse_list <- function(A_, At_, tol, maxit, verbose, L1, L2, threads, w) { 37 | .Call(`_singlet_c_nmf_sparse_list`, A_, At_, tol, maxit, verbose, L1, L2, threads, w) 38 | } 39 | 40 | #' Write an IVCSC matrix 41 | #' 42 | #' @param L input dgCMatrix list 43 | #' @param verbose print outputs 44 | #' @export 45 | #' 46 | write_IVCSC <- function(L, verbose = TRUE) { 47 | .Call(`_singlet_write_IVCSC`, L, verbose) 48 | } 49 | 50 | save_IVSparse <- function(A_, verbose = TRUE) { 51 | .Call(`_singlet_save_IVSparse`, A_, verbose) 52 | } 53 | 54 | build_IVCSC2 <- function(L, verbose = TRUE) { 55 | .Call(`_singlet_build_IVCSC2`, L, verbose) 56 | } 57 | 58 | read_IVSparse <- function() { 59 | .Call(`_singlet_read_IVSparse`) 60 | } 61 | 62 | run_nmf_on_sparsematrix_list <- function(A_, tol, maxit, verbose, threads, w, use_vcsc = FALSE, L1 = 0, L2 = 0) { 63 | .Call(`_singlet_run_nmf_on_sparsematrix_list`, A_, tol, maxit, verbose, threads, w, use_vcsc, L1, L2) 64 | } 65 | 66 | c_mu_nmf <- function(A, At, tol, maxit, verbose, L1, L2, threads, w) { 67 | .Call(`_singlet_c_mu_nmf`, A, At, tol, maxit, verbose, L1, L2, threads, w) 68 | } 69 | 70 | c_nmf_dense <- function(A, At, tol, maxit, verbose, L1_w, L1_h, L2_w, L2_h, threads, w) { 71 | .Call(`_singlet_c_nmf_dense`, A, At, tol, maxit, verbose, L1_w, L1_h, L2_w, L2_h, threads, w) 72 | } 73 | 74 | c_linked_nmf <- function(A, At, tol, maxit, verbose, L1, L2, threads, w, link_h, link_w) { 75 | .Call(`_singlet_c_linked_nmf`, A, At, tol, maxit, verbose, L1, L2, threads, w, link_h, link_w) 76 | } 77 | 78 | c_ard_nmf <- function(A, At, tol, maxit, verbose, L1, L2, threads, w, seed, inv_density, overfit_threshold, trace_test_mse) { 79 | .Call(`_singlet_c_ard_nmf`, A, At, tol, maxit, verbose, L1, L2, threads, w, seed, inv_density, overfit_threshold, trace_test_mse) 80 | } 81 | 82 | c_ard_nmf_sparse_list <- function(A_, At_, tol, maxit, verbose, L1, L2, threads, w, rng_seed, inv_density, overfit_threshold, trace_test_mse) { 83 | .Call(`_singlet_c_ard_nmf_sparse_list`, A_, At_, tol, maxit, verbose, L1, L2, threads, w, rng_seed, inv_density, overfit_threshold, trace_test_mse) 84 | } 85 | 86 | c_ard_nmf_dense <- function(A, At, tol, maxit, verbose, L1, L2, threads, w, seed, inv_density, overfit_threshold, trace_test_mse) { 87 | .Call(`_singlet_c_ard_nmf_dense`, A, At, tol, maxit, verbose, L1, L2, threads, w, seed, inv_density, overfit_threshold, trace_test_mse) 88 | } 89 | 90 | spatial_graph <- function(c1, c2, max_dist, max_k = 100L, threads = 0L) { 91 | .Call(`_singlet_spatial_graph`, c1, c2, max_dist, max_k, threads) 92 | } 93 | 94 | c_LKNN <- function(m, coord_x, coord_y, k, radius, metric, similarity, max_dist, verbose, threads) { 95 | .Call(`_singlet_c_LKNN`, m, coord_x, coord_y, k, radius, metric, similarity, max_dist, verbose, threads) 96 | } 97 | 98 | c_SNN <- function(G, min_similarity, threads) { 99 | .Call(`_singlet_c_SNN`, G, min_similarity, threads) 100 | } 101 | 102 | c_gcnmf <- function(A, At, G, tol, maxit, verbose, L1, L2, threads, w) { 103 | .Call(`_singlet_c_gcnmf`, A, At, G, tol, maxit, verbose, L1, L2, threads, w) 104 | } 105 | 106 | c_differentiate_model <- function(h, G) { 107 | .Call(`_singlet_c_differentiate_model`, h, G) 108 | } 109 | 110 | c_assign_cells_to_edge_clusters <- function(G, h_diff_clusters) { 111 | .Call(`_singlet_c_assign_cells_to_edge_clusters`, G, h_diff_clusters) 112 | } 113 | 114 | -------------------------------------------------------------------------------- /R/RescaleSpatial.R: -------------------------------------------------------------------------------- 1 | #' Rescale spatial coordinates 2 | #' 3 | #' Convert coordinates in the "spatial" reduction to natural numbers rather than values between 0 and 1. This allows for intuitive graph construction based on the radius surrounding any given cell (i.e. a radius of one corresponds to all cells next to the cell of interest) 4 | #' 5 | #' @param object Seurat object 6 | #' @param reduction the name of the spatial reduction to use 7 | #' @export 8 | #' @return Seurat object with rescaled spatial coordinates 9 | #' @aliases RescaleSpatial.Seurat 10 | #' @rdname RescaleSpatial 11 | RescaleSpatial.Seurat <- function(object, reduction = "spatial"){ 12 | df <- object@reductions[[reduction]]@cell.embeddings 13 | df[,1] <- df[,1] - min(df[,1]) 14 | df[,2] <- df[,2] - min(df[,2]) 15 | df[,1] <- df[,1] / max(df[,1]) 16 | df[,2] <- df[,2] / max(df[,2]) 17 | df[,1] <- df[,1] * 1 / median(diff(sort(unique(df[,1])))) 18 | df[,2] <- df[,2] * 1 / median(diff(sort(unique(df[,2])))) 19 | df <- round(df) 20 | object@reductions[[reduction]]@cell.embeddings <- df 21 | object 22 | } 23 | 24 | 25 | #' @rdname RunGCNMF 26 | #' @name RunGCNMF 27 | #' @export 28 | #' 29 | RescaleSpatial <- function(object, ...) { 30 | UseMethod("RescaleSpatial") 31 | } 32 | 33 | #' @rdname RescaleSpatial 34 | #' @name RescaleSpatial 35 | #' @export 36 | #' 37 | .S3method("RescaleSpatial", "Seurat", RescaleSpatial.Seurat) 38 | -------------------------------------------------------------------------------- /R/RunGCNMF.R: -------------------------------------------------------------------------------- 1 | #' Run Graph-Convolutional Non-negative Matrix Factorization 2 | #' 3 | #' @description Run NMF with weighted convolution determined by edges in a graph of dimensions \code{n x n}, where \code{n} is the number of columns in the matrix. 4 | #' 5 | #' @inheritParams RunNMF 6 | #' @param graph A graph to use, either directed or undirected 7 | #' @param verbose print updates to console 8 | #' @param k rank of the factorization (no automatic rank determination for GCNMF. Use \code{\link{RunNMF}}). Alternatively, specify an initial \code{w} matrix of dimensions \code{m x k}, where \code{m} is the number of rows in the matrix to be factorized. 9 | #' 10 | #' @return Returns a Seurat object with the GCNMF model stored in the reductions slot 11 | #' 12 | #' @details Use \code{set.seed()} to guarantee reproducibility! 13 | #' @rdname RunGCNMF 14 | #' @aliases RunGCNMF.Seurat 15 | #' @name RunGCNMF.Seurat 16 | #' 17 | #' @seealso \code{\link{RunNMF}} 18 | #' 19 | #' @export 20 | #' 21 | RunGCNMF.Seurat <- function(object, 22 | graph, 23 | k, 24 | split.by = NULL, 25 | assay = NULL, 26 | tol = 1e-5, 27 | L1 = 0.01, 28 | L2 = 0, 29 | verbose = 2, 30 | reduction.name = "gcnmf", 31 | reduction.key = "GCNMF_", 32 | maxit = 100, 33 | threads = 0, 34 | features = NULL, 35 | ...) { 36 | if (is.null(assay)) { 37 | assay <- names(object@assays)[[1]] 38 | } 39 | 40 | # check if data has been normalized 41 | v <- object@assays[[assay]]@data@x 42 | if (sum(as.integer(v)) == sum(v)) { 43 | object <- PreprocessData(object, assay = assay) 44 | } 45 | A <- object@assays[[assay]]@data 46 | 47 | if (!is.null(features)) { 48 | if (features[[1]] == "var.features") { 49 | A <- A[object@assays[[assay]]@var.features, ] 50 | } else if (is.integer(features) || is.character(features)) { 51 | # array of indices or rownames 52 | A <- A[features, ] 53 | } else { 54 | stop("'features' vector was invalid.") 55 | } 56 | } 57 | 58 | rnames <- rownames(A) 59 | cnames <- colnames(A) 60 | 61 | if (!is.null(split.by)) { 62 | split.by <- as.integer(as.numeric(as.factor(object@meta.data[[split.by]]))) - 1 63 | if (any(sapply(split.by, is.na))) { 64 | stop("'split.by' cannot contain NA values") 65 | } 66 | A <- weight_by_split(A, split.by, length(unique(split.by))) 67 | } 68 | At <- Matrix::t(A) 69 | seed.use <- abs(.Random.seed[[3]]) 70 | set.seed(seed.use) 71 | if(is.matrix(k)){ 72 | if(!(nrow(A) %in% dim(k))) stop("dimensions of matrix specified for 'k' are not compatible with number of rows in 'A'") 73 | } else { 74 | w_init <- matrix(runif(k * nrow(A)), k, nrow(A)) 75 | } 76 | 77 | nmf_model <- c_gcnmf(A, At, G, tol, maxit, verbose, L1, L2, threads, w_init) 78 | rownames(nmf_model$h) <- colnames(nmf_model$w) <- paste0(reduction.key, 1:nrow(nmf_model$h)) 79 | rownames(nmf_model$w) <- rnames 80 | colnames(nmf_model$h) <- cnames 81 | object@reductions[[reduction.name]] <- new("DimReduc", 82 | cell.embeddings = t(nmf_model$h), 83 | feature.loadings = nmf_model$w, 84 | assay.used = assay, 85 | stdev = nmf_model$d, 86 | global = FALSE, 87 | key = reduction.key) 88 | 89 | object 90 | } 91 | 92 | #' @rdname RunGCNMF 93 | #' 94 | #' @name RunGCNMF 95 | #' 96 | #' @export 97 | #' 98 | RunGCNMF <- function(object, ...) { 99 | UseMethod("RunGCNMF") 100 | } 101 | 102 | #' @rdname RunGCNMF 103 | #' 104 | #' @name RunGCNMF 105 | #' 106 | #' @export 107 | #' 108 | .S3method("RunGCNMF", "Seurat", RunGCNMF.Seurat) 109 | -------------------------------------------------------------------------------- /R/RunGSEA.R: -------------------------------------------------------------------------------- 1 | #' Run Gene Set Enrichment Analysis on a Reduction 2 | #' 3 | #' Run GSEA to identify gene sets that are enriched within NMF factors. 4 | #' 5 | #' @param object a Seurat or RcppML::nmf object 6 | #' @param reduction dimensional reduction to use (if Seurat) 7 | #' @param species species for which to load gene sets 8 | #' @param category msigdbr gene set category (i.e. "H", "C5", etc.) 9 | #' @param min.size minimum number of terms in a gene set 10 | #' @param max.size maximum number of terms in a gene set 11 | #' @param dims factors in the reduction to use, default \code{NULL} for all factors 12 | #' @param verbose print progress to console 13 | #' @param padj.sig significance cutoff for BH-adjusted p-values (default 0.01) 14 | #' @param ... additional params to pass to msigdbr 15 | #' 16 | #' @return a Seurat or nmf object, with GSEA information in the misc slot. BH-adj p-values are on a -log10 scale. 17 | #' 18 | #' @import fgsea 19 | #' @import msigdbr 20 | #' 21 | #' @export 22 | #' 23 | RunGSEA <- function(object, reduction = "nmf", species = "Homo sapiens", category = "C5", 24 | min.size = 10, max.size = 500, dims = NULL, 25 | verbose = TRUE, padj.sig = 0.01, ...) { 26 | 27 | if (verbose) cat("fetching gene sets\n") 28 | gene_sets <- msigdbr(species = species, category = category, ...) 29 | 30 | if (verbose) cat("filtering pathways\n") 31 | pathways <- split(x = gene_sets$gene_symbol, f = gene_sets$gs_name) 32 | pathways <- pathways[lapply(pathways, length) > min.size] 33 | 34 | if (verbose) cat("filtering genes in pathways to those in reduction\n") 35 | genes_in_pathways <- unique(unlist(pathways)) 36 | 37 | # work on RcppML nmf objects too: 38 | if (is(object, "Seurat")) { 39 | w <- object@reductions[[reduction]]@feature.loadings 40 | } else if (is(object, "nmf")) { 41 | w <- object@w 42 | } 43 | if (!is.null(dims)) w <- w[, dims] 44 | 45 | if (verbose) cat("filtering genes in reduction to those in pathways\n") 46 | w <- w[which(rownames(w) %in% genes_in_pathways), ] 47 | pathways <- lapply(pathways, function(x) x[x %in% rownames(w)]) 48 | v <- lapply(pathways, length) 49 | pathways <- pathways[which(v > min.size & v < max.size)] 50 | 51 | cat("running GSEA on", ncol(w), "factors...\n") 52 | pb <- utils::txtProgressBar(min = 0, max = ncol(w), style = 3) 53 | results <- list() 54 | for (i in 1:ncol(w)) { 55 | ranks <- sort(w[, i]) 56 | results[[i]] <- suppressWarnings(fgseaMultilevel( 57 | pathways, ranks, 58 | minSize = min.size, maxSize = max.size, scoreType = "pos" 59 | )) 60 | utils::setTxtProgressBar(pb, i) 61 | } 62 | close(pb) 63 | 64 | pval <- do.call(cbind, lapply(results, function(x) x$pval)) 65 | padj <- do.call(cbind, lapply(results, function(x) x$padj)) 66 | es <- do.call(cbind, lapply(results, function(x) x$ES)) 67 | nes <- do.call(cbind, lapply(results, function(x) x$NES)) 68 | rownames(pval) <- rownames(padj) <- rownames(es) <- rownames(nes) <- results[[1]]$pathway 69 | 70 | idx <- which(apply(padj, 1, function(x) min(x) < padj.sig)) 71 | 72 | if (!is.null(dims)) { 73 | dims <- paste0(reduction, dims) 74 | } else if (is(object, "Seurat")) { 75 | dims <- paste0(reduction, 1:ncol(object@reductions[[reduction]])) 76 | } else if (is(object, "nmf")) { 77 | dims <- paste0("nmf", 1:ncol(w)) 78 | } 79 | colnames(pval) <- colnames(padj) <- colnames(es) <- colnames(nes) <- dims 80 | 81 | # reorder with hclust 82 | padj <- -log10(padj) 83 | pval <- -log10(pval) 84 | row_order <- hclust(dist(padj), method = "ward.D2")$order 85 | col_order <- hclust(dist(t(padj)), method = "ward.D2")$order 86 | pval <- pval[row_order, col_order] 87 | padj <- padj[row_order, col_order] 88 | es <- es[row_order, col_order] 89 | nes <- nes[row_order, col_order] 90 | 91 | if (is(object, "Seurat")) { 92 | object@reductions[[reduction]]@misc$gsea <- 93 | list("pval" = pval, "padj" = padj, "es" = es, "nes" = nes) 94 | } else if (is(object, "nmf")) { 95 | object@misc$gsea <- 96 | list("pval" = pval, "padj" = padj, "es" = es, "nes" = nes) 97 | } 98 | 99 | object 100 | } 101 | -------------------------------------------------------------------------------- /R/cellxgene_pipeline.R: -------------------------------------------------------------------------------- 1 | #' Learn an NMF model from a cellxgene Seurat object 2 | #' 3 | #' @description Provide a link to download a cellxgene Seurat object, and this pipeline will return a standardized annotated NMF object at the optimal rank 4 | #' 5 | #' @details 6 | #' This pipeline runs the following steps: 7 | #' 1. Download a Seurat v4 object from the provided URL 8 | #' 2. Preprocess the data and run NMF using parameters specified in the \code{...} argument 9 | #' 3. Annotate the NMF model against existing multi-level factor information 10 | #' 4. Extract the model and annotations and save to an RDS file 11 | #' 12 | #' @param url download url for a Seurat v4 object 13 | #' @param ... arguments to \code{RunNMF} 14 | #' @export 15 | #' @md 16 | #' 17 | cellxgene_pipeline <- function(filename, reps = 1, verbose = 3, L1 = 0.05, ...) { 18 | cat("reading ", filename, "\n") 19 | A <- readRDS(filename) 20 | if ("RNA" %in% names(A@assays)) { 21 | A@assays$RNA@key <- "RNA_" 22 | # keep only RNA assay 23 | A@assays <- list("RNA" = A@assays$RNA) 24 | cat(" normalizing...\n") 25 | A <- PreprocessData(A) 26 | cat(" running NMF...\n") 27 | t1 <- system.time({ 28 | A <- RunNMF(A, reps = reps, verbose = 3, L1 = L1, ...) 29 | })[[3]] 30 | cat(" annotating NMF model...\n") 31 | A <- AnnotateNMF(A) 32 | 33 | model <- list( 34 | "w" = as(A@reductions$nmf@feature.loadings, "dgCMatrix"), 35 | "d" = A@reductions$nmf@stdev, 36 | "h" = as(A@reductions$nmf@cell.embeddings, "dgCMatrix"), 37 | "misc" = A@reductions$nmf@misc, 38 | "metadata" = A@meta.data, 39 | "dataset" = A@misc$title, 40 | "runtime" = t1 41 | ) 42 | 43 | filename <- paste0(gsub("[^a-zA-Z]", "", A@misc$title), ".rds") 44 | cat(" saving model...\n") 45 | saveRDS(model, filename) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /R/checkColumns.R: -------------------------------------------------------------------------------- 1 | #' verify that columns for auto-annotation are factors with > 1 level 2 | #' 3 | #' @param meta.data the meta.data (or a Seurat object if needs be) 4 | #' @param columns the columns (optional; if NULL, will check all columns) 5 | #' @param max.levels maximum number of levels permitted for a factor to be kept 6 | #' @return a vector of suitable columns (may be length 0) 7 | #' 8 | #' @export 9 | checkColumns <- function(meta.data, columns = NULL, max.levels = 200) { 10 | verbose <- !is.null(columns) 11 | if (is(meta.data, "Seurat")) meta.data <- meta.data@meta.data 12 | if (is.null(columns)) columns <- colnames(meta.data) 13 | names(columns) <- columns 14 | keep <- names(which(sapply(columns, .keepColumn, meta.data = meta.data, max.levels = max.levels))) 15 | discard <- setdiff(columns, keep) 16 | if (verbose & length(discard) > 0) { 17 | message("Some columns are not factors, or have only one level, or have more than max.levels levels.") 18 | message("Skipping `", paste(discard, collapse = "`, `"), "`.") 19 | } 20 | names(keep) <- keep 21 | return(keep) 22 | } 23 | 24 | 25 | # helper fn 26 | .keepColumn <- function(x, meta.data, max.levels) { 27 | if (!x %in% names(meta.data)) { 28 | return(FALSE) 29 | } 30 | if (!is(meta.data[[x]], "factor")) { 31 | return(FALSE) 32 | } 33 | if (nlevels(meta.data[[x]]) < 2) { 34 | return(FALSE) 35 | } 36 | if (nlevels(meta.data[[x]]) > max.levels) { 37 | return(FALSE) 38 | } 39 | return(TRUE) 40 | } 41 | -------------------------------------------------------------------------------- /R/checkDesigns.R: -------------------------------------------------------------------------------- 1 | #' verify that a list of matrices is in fact a named list of model matrices 2 | #' 3 | #' @param designs an alleged list of model matrices 4 | #' 5 | #' @return the list of model matrices, assuming it passes 6 | #' 7 | #' @details this function will squawk and stop if the list is no good 8 | #' 9 | #' @export 10 | checkDesigns <- function(designs) { 11 | 12 | if (is.null(names(designs)) | 13 | !all(sapply(designs, function(x) !is.null(attr(x, "assign"))))) { 14 | stop("`designs` must be a named list of model.matrix outputs.") 15 | } else { 16 | return(designs) 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /R/coercions.R: -------------------------------------------------------------------------------- 1 | #' @exportMethod coerce 2 | #' @importClassesFrom RcppML nmf 3 | setAs("list", "nmf", 4 | function(from) { 5 | 6 | msg <- NULL 7 | required <- c("w", "d", "h") 8 | if (!all(required %in% names(from))) { 9 | msg <- c(msg, 10 | "Cannot find $w, $d, and $h to create nmf object from list.") 11 | } else { 12 | if (ncol(from$w) != nrow(from$h)) { 13 | msg <- c(msg, 14 | "The $w and $h matrices are of unequal rank. Cannot coerce.") 15 | } 16 | if (ncol(from$w) != length(from$d)) { 17 | msg <- c(msg, 18 | "The scaling diagonal $d is the wrong length. Cannot coerce.") 19 | } 20 | } 21 | 22 | if (!is.null(msg)) { 23 | stop(msg) 24 | } else { 25 | new("nmf", 26 | w = from$w, 27 | d = from$d, 28 | h = from$h, 29 | misc = from[setdiff(names(from), required)]) 30 | } 31 | 32 | }) 33 | 34 | 35 | #' @exportMethod coerce 36 | #' @importClassesFrom RcppML nmf 37 | if (requireNamespace("SingleCellExperiment", quietly=TRUE)) { 38 | setAs("nmf", "LinearEmbeddingMatrix", function(from) { 39 | factorNames <- colnames(from@w) 40 | sampleNames <- colnames(from@h) 41 | lem <- LinearEmbeddingMatrix(sampleFactors=t(from@h), 42 | featureLoadings=from@w, 43 | factorData=DataFrame(d=from@d, 44 | row.names=factorNames), 45 | metadata=from@misc) 46 | rownames(lem) <- sampleNames 47 | return(lem) 48 | }) 49 | } 50 | 51 | 52 | #' @exportMethod coerce 53 | #' @importClassesFrom RcppML nmf 54 | if (requireNamespace("SingleCellExperiment", quietly=TRUE)) { 55 | setAs("LinearEmbeddingMatrix", "nmf", function(from) { 56 | d <- factorData(from)$d 57 | names(d) <- rownames(factorData(from)) 58 | new("nmf", 59 | w = featureLoadings(from), 60 | d = d, 61 | h = t(sampleFactors(from)), 62 | misc = metadata(from)) 63 | }) 64 | } 65 | -------------------------------------------------------------------------------- /R/cross_validate_nmf.R: -------------------------------------------------------------------------------- 1 | #' Determine best rank for NMF using cross-validation 2 | #' 3 | #' @description Find the rank that minimizes the mean squared error of test set reconstruction using cross-validation. 4 | #' 5 | #' @inheritParams run_nmf 6 | #' @param ranks a vector of ranks at which to fit a model and compute test set reconstruction error 7 | #' @param n_replicates number of random test sets 8 | #' @param test_density fraction of values to include in the test set 9 | #' @param tol_overfit stopping criterion, maximum increase in test set reconstruction error at any iteration compared to test set reconstruction error at \code{trace_test_mse} 10 | #' @param trace_test_mse first iteration at which to calculate test set reconstruction error, and the error to compare all later iterations to when determining whether overfitting has occurred. 11 | #' @return a \code{data.frame} of test set reconstruction error vs. rank of class \code{nmf_cross_validate_data}. Use \code{plot} method to visualize or \code{min} to compute optimal rank. 12 | #' @rdname cross_validate_nmf 13 | #' @param ... additional arguments (not implemented) 14 | #' @export 15 | #' @importFrom utils txtProgressBar setTxtProgressBar 16 | #' @importFrom stats runif 17 | #' 18 | cross_validate_nmf <- function(A, ranks, n_replicates = 3, tol = 1e-4, maxit = 100, verbose = 1, L1 = 0.01, L2 = 0, threads = 0, test_density = 0.05, tol_overfit = 1e-4, trace_test_mse = 5) { 19 | if (L1 >= 1) { 20 | stop("L1 penalty must be strictly in the range (0, 1]") 21 | } 22 | 23 | if (test_density > 0.2 | test_density < 0.01) { 24 | warning("'test_density' should not be greater than 0.2 or less than 0.01, as a general rule of thumb") 25 | } 26 | 27 | if("list" %in% class(A)){ 28 | # check that number of rows is identical 29 | if(var(sapply(A, nrow)) != 0) 30 | stop("number of rows in all provided 'A' matrices are not identical") 31 | if(!all(sapply(A, function(x) class(x) == "dgCMatrix"))) 32 | stop("if providing a list, you must provide a list of all 'dgCMatrix' objects") 33 | if(!is.null(rownames(A[[1]]))){ 34 | if(!all(sapply(A, function(x) all.equal(rownames(x), rownames(A[[1]]))))) stop("rownames of all dgCMatrix objects in list must be identical") 35 | } 36 | 37 | # generate a distributed transpose 38 | if(verbose > 0) cat("generating a distributed transpose of input matrix list\n") 39 | block_sizes <- floor(c(seq(1, nrow(A[[1]]), nrow(A[[1]]) /(length(A))), nrow(A[[1]]) + 1)) 40 | At <- list() 41 | if(verbose > 0) pb <- txtProgressBar(min = 0, max = length(A)) 42 | for(i in 1:length(A)){ 43 | At[[i]] <- list() 44 | for(j in 1:length(A)){ 45 | At[[i]][[j]] <- t(A[[j]][block_sizes[i]:(block_sizes[i+1] - 1), ]) 46 | } 47 | At[[i]] <- do.call(rbind, At[[i]]) 48 | if(verbose > 0) setTxtProgressBar(pb, i) 49 | } 50 | if(verbose > 0) close(pb) 51 | if (verbose > 0) cat("running with sparse optimization\n") 52 | w_init <- lapply(1:n_replicates, function(x) matrix(stats::runif(nrow(A[[1]]) * max(ranks)), max(ranks), nrow(A[[1]]))) 53 | sparse_list <- TRUE 54 | } else { 55 | if (class(A)[[1]] != "matrix") { 56 | if (verbose > 0) cat("running with sparse optimization\n") 57 | A <- as(as(as(A, "dMatrix"), "generalMatrix"), "CsparseMatrix") 58 | At <- Matrix::t(A) 59 | dense_mode <- FALSE 60 | } else { 61 | if (verbose > 0) cat("running with dense optimization\n") 62 | At <- t(A) 63 | dense_mode <- TRUE 64 | } 65 | w_init <- lapply(1:n_replicates, function(x) matrix(stats::runif(nrow(A) * max(ranks)), max(ranks), nrow(A))) 66 | sparse_list <- FALSE 67 | } 68 | 69 | df <- expand.grid("k" = ranks, "rep" = 1:n_replicates) 70 | df2 <- list() 71 | df$test_error <- 0 72 | if (verbose == 1) { 73 | pb <- utils::txtProgressBar(min = 0, max = nrow(df), style = 3) 74 | } 75 | for (i in 1:nrow(df)) { 76 | rep <- df$rep[[i]] 77 | if (verbose > 1) { 78 | cat(paste0("k = ", df$k[[i]], ", rep = ", rep, " (", i, "/", nrow(df), "):\n")) 79 | } 80 | if(!sparse_list){ 81 | if (dense_mode) { 82 | model <- c_ard_nmf_dense(A, At, tol, maxit, verbose > 1, L1, L2, threads, w_init[[rep]][1:df$k[[i]], ], abs(.Random.seed[[3 + rep]]), round(1 / test_density), tol_overfit, trace_test_mse) 83 | } else { 84 | model <- c_ard_nmf(A, At, tol, maxit, verbose > 1, L1, L2, threads, w_init[[rep]][1:df$k[[i]], ], abs(.Random.seed[[3 + rep]]), round(1 / test_density), tol_overfit, trace_test_mse) 85 | } 86 | } else { 87 | model <- c_ard_nmf_sparse_list(A, At, tol, maxit, verbose > 1, L1, L2, threads, w_init[[rep]][1:df$k[[i]], ], abs(.Random.seed[[3 + rep]]), round(1 / test_density), tol_overfit, trace_test_mse) 88 | } 89 | df$test_error[[i]] <- model$test_mse[[length(model$test_mse)]] 90 | df2[[length(df2) + 1]] <- data.frame("k" = df$k[[i]], "rep" = df$rep[[i]], "test_error" = model$test_mse, "iter" = model$iter, "tol" = model$tol) 91 | if (verbose == 1) utils::setTxtProgressBar(pb, i) 92 | if (verbose > 1) cat(paste0("test set error: ", sprintf(df$test_error[[i]], fmt = "%#.4e"), "\n\n")) 93 | 94 | if (model$test_mse[[length(model$test_mse)]] / model$test_mse[[1]] > (1 + tol_overfit)) { 95 | if (verbose > 1) cat(paste0("overfitting detected, lower rank recommended\n")) 96 | } 97 | } 98 | if (verbose == 1) close(pb) 99 | 100 | df$rep <- factor(df$rep) 101 | class(df) <- c("cross_validate_nmf_data", "data.frame") 102 | df2 <- do.call(rbind, df2) 103 | class(df2) <- c("cross_validate_nmf_data", "data.frame") 104 | df2 105 | } 106 | -------------------------------------------------------------------------------- /R/getDesigns.R: -------------------------------------------------------------------------------- 1 | #' Refactored out from AnnotateNMF to ease argument handling 2 | #' 3 | #' @param columns factor columns of meta.data, optional if !is.null(designs) 4 | #' @param meta.data a data.frame of annotations, optional if !is.null(designs) 5 | #' @param designs named list of design matrices (supersedes meta.data/columns) 6 | #' @param max.levels maximum number of levels permitted for a factor to be kept 7 | #' 8 | #' @return a named list of design matrices, if one was not provided 9 | #' @export 10 | getDesigns <- function(columns = NULL, meta.data = NULL, designs = NULL, max.levels = 200) { 11 | 12 | if (is.null(designs)) { 13 | stopifnot(any(!is.null(c(columns, meta.data)))) 14 | columns <- checkColumns(meta.data = meta.data, 15 | columns = columns, 16 | max.levels = max.levels) 17 | designs <- lapply(columns, getModelMatrix, meta.data = meta.data) 18 | } 19 | 20 | checkDesigns(designs) 21 | } 22 | -------------------------------------------------------------------------------- /R/getModelFit.R: -------------------------------------------------------------------------------- 1 | #' get linear all-pairs comparisons fits for a design matrix and data matrix 2 | #' 3 | #' Continuing along with the theme of "stupid limma tricks", this function 4 | #' fits and shrinks a means model for a factor. The proportion of factors 5 | #' assumed to have a fold-change > 0 is 1%, and a robust fit is applied. 6 | #' 7 | #' @param design a model.matrix (or a sparse.model.matrix, perhaps) 8 | #' @param object a data.matrix, Seurat DimReduc, or RcppML nmf object 9 | #' @param center center the factor matrix for testing? (TRUE) 10 | #' @param ... additional arguments, passed to base::scale 11 | #' 12 | #' 13 | #' @examples 14 | #' if (FALSE) { 15 | #' get_pbmc3k_data() %>% NormalizeData() -> pbmc3k 16 | #' design <- model.matrix(~ 0 + cell_type, data=pbmc3k@meta.data) 17 | #' fit <- getModelFit(design, pbmc3k) # toy fit on lognormcounts 18 | #' # Subsetting data to non-NA observations to match design matrix. 19 | #' limma::topTable(fit) 20 | #' } 21 | #' 22 | #' @export 23 | getModelFit <- function(design, object, center=TRUE, ...) { 24 | 25 | dat <- object 26 | if (is(object, "nmf")) dat <- object@h # RcppML nmf 27 | if (is(object, "Seurat")) dat <- object@assays$RNA$data 28 | if (is(object, "DimReduc")) dat <- t(object@cell.embeddings) 29 | if (is(object, "SingleCellExperiment")) dat <- logcounts(object) 30 | # SingleCellExperiment::reducedDim(object, dimname) just returns a data.matrix 31 | if (ncol(dat) < nrow(design)) dat <- t(dat) # transpose reduced dims if needed 32 | 33 | # janky, but should be foolproof 34 | if (nrow(design) != ncol(dat)) { 35 | if (!all(rownames(design) %in% colnames(dat))) { 36 | message("Rows of the design matrix do not match columns of the object.") 37 | message("This usually means that there are NAs in the sample metadata.") 38 | message("Ensure rownames of your design matrix match data column names.") 39 | message("Alternatively, provide object[, !is.na(object$predictor)]") 40 | message("so that the dimensions of the data and design matrices match.") 41 | stop("Cannot proceed as called.") 42 | } else { 43 | message("Subsetting data to non-NA observations to match design matrix.") 44 | tofit <- dat[, rownames(design)] 45 | } 46 | } else { 47 | tofit <- dat 48 | if (is.null(rownames(design))) { 49 | warning("Design matrix has appropriate rank, but no row names. Beware!") 50 | } else if (!identical(rownames(design), colnames(tofit))) { 51 | warning("Design matrix row names do not match data observation names!") 52 | warning("This is usually a VERY BAD THING. You MUST check your data.") 53 | warning("If this warning message persists, file a bug with a reprex.") 54 | } 55 | } 56 | 57 | if (center) tofit <- t(scale(t(tofit), ...)) 58 | fit <- eBayes(lmFit(tofit, design), proportion=0.01, robust=TRUE) 59 | fit$centered <- center 60 | return(fit) 61 | 62 | } 63 | -------------------------------------------------------------------------------- /R/getModelMatrix.R: -------------------------------------------------------------------------------- 1 | #' automatically generate a means model (one-vs-all group associations) 2 | #' 3 | #' A little-known trick in limma is to fit ~ 0 + group for a means model. 4 | #' This function automates that for a data.frame and a factor column of it. 5 | #' 6 | #' @param field the name of a column in the data.frame, or the column 7 | #' @param meta.data a data.frame with one or more factor columns, or NULL 8 | #' @param sparse fit a sparse model.matrix? (FALSE) 9 | #' @param ova fit a One-Vs-All model matrix (no referent)? (TRUE) 10 | #' @param ... any additional params to pass to model.matrix 11 | #' 12 | #' @details 13 | #' If a factor (and no meta.data) is supplied (usually by with(meta.data, ...)), 14 | #' getModelMatrix will attempt to figure out the text to remove from the matrix 15 | #' column names by using deparse() and match.call() on the arguments (voodoo!). 16 | #' In order to fit one-vs-all comparisons, a means model is the default. If you 17 | #' have a referent group (e.g. normal bone marrow vs. a bunch of leukemia cells) 18 | #' or simply don't want a means model, set `ova` (one vs all) to FALSE. 19 | #' 20 | #' @return a model.matrix or sparse.model.matrix (if sparse==TRUE) 21 | #' 22 | #' @examples 23 | #' 24 | #' covs <- get_pbmc3k_data()@meta.data 25 | #' design <- getModelMatrix("cell_type", covs) 26 | #' head(design) 27 | #' sparsedesign <- getModelMatrix("cell_type", covs, sparse=TRUE) 28 | #' head(sparsedesign) 29 | #' 30 | #' if (FALSE) { 31 | #' # test Seurat and SCE support too 32 | #' mm1 <- getModelMatrix("cell_type", pbmc3k) 33 | #' mm2 <- getModelMatrix("cell_type", pbmc) 34 | #' identical(mm1, mm2) 35 | #' # [1] TRUE 36 | #' fit1 <- getModelFit(mm2, pbmc3k) 37 | #' fit2 <- getModelFit(mm1, pbmc) 38 | #' identical(fit1, fit2) 39 | #' # [1] TRUE 40 | #' limma::topTable(fit1) 41 | #' } 42 | #' 43 | #' @import Matrix 44 | #' 45 | #' @export 46 | getModelMatrix <- function(field, meta.data=NULL, sparse=FALSE, ova=TRUE, ...) { 47 | 48 | if (is.null(meta.data)) { 49 | if (is.factor(field) & nlevels(field) > 1) { 50 | fieldname <- as.character(sapply(match.call()[-1], deparse)[1]) # voodoo 51 | meta.data <- data.frame(field) 52 | names(meta.data) <- fieldname 53 | field <- fieldname 54 | } else { 55 | stop("If meta.data is NULL, `field` must be a factor with > 1 levels.") 56 | } 57 | } else if (is(meta.data, "Seurat")) { 58 | meta.data <- meta.data@meta.data 59 | } else if (is(meta.data, "SingleCellExperiment")) { 60 | meta.data <- colData(meta.data) 61 | } else { 62 | stopifnot(field %in% names(meta.data)) 63 | } 64 | 65 | notNA <- which(is.na(meta.data[[field]])) 66 | if (!sparse) { 67 | if (ova) { 68 | mat <- model.matrix(~ 0 + ., data=meta.data[, field, drop=FALSE], ...) 69 | } else { 70 | message("Fitting a model with a referent group. Be sure you want this!") 71 | mat <- model.matrix(~ ., data=meta.data[, field, drop=FALSE], ...) 72 | } 73 | } else { 74 | if (ova) { 75 | mat <- sparse.model.matrix(~ 0 + ., data=meta.data[, field, drop=FALSE]) 76 | } else { 77 | message("Fitting a model with a referent group. Be sure you want this!") 78 | mat <- sparse.model.matrix(~ ., data=meta.data[, field, drop=FALSE], ...) 79 | } 80 | } 81 | colnames(mat) <- gsub(field, "", colnames(mat)) 82 | return(mat) 83 | 84 | } 85 | -------------------------------------------------------------------------------- /R/getModelResults.R: -------------------------------------------------------------------------------- 1 | #' extract data.frame of lods and pvalues for differential factor representation 2 | #' 3 | #' log-odds of non-null differences for a response by a factor are in fit$lods 4 | #' (which will usually be a matrix), and one-sided p-values for the moderated t 5 | #' test are computed from fit$t and fit$df.total using pt(t, df, lower=FALSE), 6 | #' then adjusted using the step-up procedure of Benjamini & Hochberg. 7 | #' 8 | #' @param fit an lmFit result from limma, shrunken with eBayes() 9 | #' @param noneg drop results with negative lods scores? (TRUE) 10 | #' @param noint drop any results for '(Intercept)'? (TRUE) 11 | #' 12 | #' @return a data.frame with columns 'factor', 'group', 'fc', and 'p' 13 | #' 14 | #' @details If an (Intercept) term is found, it will be dropped, and if 15 | #' negative LODS scores are encountered, they will be dropped, 16 | #' unless `noneg` and/or `noint` are FALSE. 17 | #' 18 | #' @importFrom reshape2 melt 19 | #' @import limma 20 | #' 21 | #' @export 22 | getModelResults <- function(fit, noneg=TRUE, noint=TRUE) { 23 | 24 | # fits are centered, so use signed lods for evidence 25 | fcl <- with(fit, melt(lods)) 26 | names(fcl)[3] <- "lods" 27 | fct <- with(fit, melt(t)) 28 | names(fct)[3] <- "t" 29 | fcp <- merge(fcl, fct) 30 | names(fcp)[1:2] <- c("factor", "group") 31 | fcp$df <- fit$df.total[fcp$factor] 32 | fcp$p_raw <- with(fcp, pt(t, df, lower=FALSE)) 33 | fcp$p <- p.adjust(fcp$p_raw, method="fdr") 34 | 35 | # better might be to fit without an intercept term 36 | if (noneg) fcp <- subset(fcp, sign(lods) > 0) 37 | if (noint) fcp <- subset(fcp, group != "(Intercept)") 38 | if (length(fcp) == 0) message("No associations after filtering.") 39 | names(fcp) <- sub("^lods$", "fc", names(fcp)) 40 | return(fcp[, c("group", "factor", "fc", "p")]) 41 | 42 | } 43 | -------------------------------------------------------------------------------- /R/get_pbmc3k_data.R: -------------------------------------------------------------------------------- 1 | #' Load the pbmc3k dataset 2 | #' 3 | #' 2,700 peripheral blood mononuclear cells (PBMC) from 10x genomics taken from the "SeuratData" package 4 | #' 5 | #' @description 6 | #' This dataset is adapted directly from the Satija lab "pbmc3k" dataset used in their popular tutorial on guided clustering. It is provided in this package for convenience since "SeuratData" is not available on CRAN. 7 | #' 8 | #' For more information, please see their documentation. 9 | #' 10 | #' @returns Seurat object with \code{$cell_type} info in the \code{meta.data} slot. 11 | #' 12 | #' @export 13 | #' 14 | get_pbmc3k_data <- function() { 15 | data(pbmc3k) 16 | pbmc3k 17 | A <- CreateSeuratObject(counts = new("dgCMatrix", i = pbmc3k$i, p = pbmc3k$p, Dim = pbmc3k$Dim, Dimnames = pbmc3k$Dimnames, x = as.numeric(inverse.rle(pbmc3k$x)))) 18 | A@meta.data$cell_type <- pbmc3k$cell_type 19 | A 20 | } 21 | -------------------------------------------------------------------------------- /R/methods.R: -------------------------------------------------------------------------------- 1 | #' @exportMethod coerce 2 | #' @importClassesFrom RcppML nmf 3 | setAs("list", "nmf", 4 | function(from) { 5 | 6 | msg <- NULL 7 | required <- c("w", "d", "h") 8 | if (!all(required %in% names(from))) { 9 | msg <- c(msg, 10 | "Cannot find $w, $d, and $h to create nmf object from list.") 11 | } else { 12 | if (ncol(from$w) != nrow(from$h)) { 13 | msg <- c(msg, 14 | "The $w and $h matrices are of unequal rank. Cannot coerce.") 15 | } 16 | if (ncol(from$w) != length(from$d)) { 17 | msg <- c(msg, 18 | "The scaling diagonal $d is the wrong length. Cannot coerce.") 19 | } 20 | } 21 | 22 | if (!is.null(msg)) { 23 | stop(msg) 24 | } else { 25 | new("nmf", 26 | w = from$w, 27 | d = from$d, 28 | h = from$h, 29 | misc = from[setdiff(names(from), required)]) 30 | } 31 | 32 | }) 33 | 34 | 35 | #' @exportMethod [ 36 | #' @importClassesFrom RcppML nmf 37 | setMethod("[", "nmf", 38 | function(x, i, j, ..., drop=TRUE) { 39 | if (missing(i) & missing(j)) return(x) 40 | if (missing(i)) i <- seq_along(x@d) 41 | if (missing(j)) j <- colnames(x@h) 42 | if ("covs" %in% names(x@misc)) x@misc$covs <- x@misc$covs[j, ] 43 | new("nmf", w = x@w[, i], d = x@d[i], h = x@h[i, j], misc = x@misc) 44 | }) 45 | 46 | 47 | #' @exportMethod $ 48 | #' @importClassesFrom RcppML nmf 49 | setMethod("$", "nmf", 50 | function(x, name) { 51 | if ("covs" %in% names(x@misc)) { 52 | x@misc$covs[[name]] 53 | } else { 54 | NULL 55 | } 56 | }) 57 | 58 | 59 | #' @exportMethod $<- 60 | #' @importClassesFrom RcppML nmf 61 | setReplaceMethod("$", "nmf", 62 | function(x, name, value) { 63 | if (is.null(x@misc$covs)) { 64 | x@misc$covs <- data.frame(row.names = colnames(x@h)) 65 | } 66 | x@misc$covs[[name]] <- value 67 | return(x) 68 | }) 69 | 70 | 71 | # make seurat objects behave vaguely like reasonable data structures 72 | # if SingleCellExperiment has been loaded, since otherwise who knows 73 | if (requireNamespace("Seurat") & requireNamespace("SingleCellExperiment")) { 74 | require("Seurat") 75 | require("SingleCellExperiment") 76 | setMethod("assay", "Seurat", function(x, i, withDimnames = TRUE, ...) x@assays[[i]]) 77 | setMethod("assays", "Seurat", function(x, withDimnames = TRUE, ...) x@assays) 78 | setMethod("assayNames", "Seurat", function(x, ...) names(x@assays)) 79 | setMethod("metadata", "Seurat", function(x, withDimnames = TRUE, ...) x@assays[[i]]) 80 | setMethod("reducedDim", "Seurat", function(x, i, ...) x@reductions[[i]]) 81 | setMethod("reducedDims", "Seurat", function(x, ...) x@reductions) 82 | setMethod("reducedDimNames", "Seurat", function(x) names(x@reductions)) 83 | } 84 | 85 | -------------------------------------------------------------------------------- /R/pbmc3k.R: -------------------------------------------------------------------------------- 1 | #' Compressed form of pbmc3k dataset 2 | #' 3 | #' @description See \code{\link{get_pbmc3k_data}} 4 | #' 5 | #' @md 6 | #' @docType data 7 | #' @usage data(pbmc3k) 8 | #' @format compressed version of the \code{dgCMatrix}, use \code{\link{get_pbmc3k_data}} to use this dataset. 9 | "pbmc3k" 10 | -------------------------------------------------------------------------------- /R/plot.cross_validate_nmf_data.R: -------------------------------------------------------------------------------- 1 | #' plot the result of cross-validating rank selection in NMF 2 | #' 3 | #' @param x the result of \code{cross_validate_nmf} (a data.frame) 4 | #' @param detail level of detail to plot 5 | #' @inheritParams ard_nmf 6 | #' @rdname cross_validate_nmf 7 | #' 8 | #' @import ggplot2 9 | #' 10 | #' @export 11 | #' 12 | plot.cross_validate_nmf_data <- function(x, detail = 2, tol.overfit = 1e-4, ...) { 13 | if (ncol(x) == 5 & detail == 1) { 14 | x <- as.data.frame(group_by(x, rep, k) %>% slice(which.max(iter))) 15 | x$iter <- NULL 16 | } 17 | if (ncol(x) < 5) { 18 | x$rep <- factor(x$rep) 19 | # simple format (detail_level = 1) 20 | # normalize each replicate to the same minimum 21 | for (rep in levels(x$rep)) { 22 | idx <- which(x$rep == rep) 23 | x$test_error[idx] <- x$test_error[idx] / min(x$test_error[idx]) 24 | } 25 | best_rank <- GetBestRank(x, tol.overfit) 26 | ggplot(x, aes(k, test_error, color = factor(rep))) + 27 | geom_point() + 28 | geom_line() + 29 | theme_classic() + 30 | labs(x = "factorization rank", y = "relative test set error", color = "replicate", caption = paste0("(best rank is k = ", best_rank, ")")) + 31 | theme(aspect.ratio = 1, plot.caption = element_text(hjust = 0.5)) + 32 | geom_vline(xintercept = best_rank, linetype = "dashed", color = "red") + 33 | scale_y_continuous(trans = "log10") 34 | } else { 35 | # detail_level = 2 format 36 | best_rank <- GetBestRank(x, tol.overfit) 37 | if (length(unique(x$rep)) == 1) { 38 | ggplot(x, aes(k, test_error, color = iter, group = iter)) + 39 | geom_line() + 40 | scale_color_viridis_c(option = "B") + 41 | theme_classic() + 42 | theme(aspect.ratio = 1, plot.caption = element_text(hjust = 0.5)) + 43 | geom_vline(xintercept = best_rank, linetype = "dashed", color = "red") + 44 | scale_y_continuous(trans = "log10") + 45 | labs(x = "factorization rank", y = "test set error", color = "model iteration", caption = paste0("(best rank is k = ", best_rank, ")")) 46 | } else { 47 | ggplot(x, aes(k, test_error, color = iter, group = iter)) + 48 | geom_line() + 49 | scale_color_viridis_c(option = "B") + 50 | theme_classic() + 51 | theme(aspect.ratio = 1, plot.caption = element_text(hjust = 0.5)) + 52 | geom_vline(xintercept = best_rank, linetype = "dashed", color = "red") + 53 | scale_y_continuous(trans = "log10") + 54 | labs(x = "factorization rank", y = "test set error", color = "model iteration", caption = paste0("(best rank is k = ", best_rank, ")")) + 55 | facet_grid(cols = vars(rep)) 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /R/plot.nmf_metadata_summary.R: -------------------------------------------------------------------------------- 1 | #' @rdname MetadataSummary 2 | #' 3 | #' @param x a data.frame 4 | #' @param ... not implemented 5 | #' 6 | #' @importFrom reshape2 melt 7 | #' 8 | #' @export 9 | #' 10 | plot.nmf_metadata_summary <- function(x, ...) { 11 | m <- reshape2::melt(as.matrix(x)) 12 | colnames(m) <- c("group", "factor", "frac") 13 | ggplot(m, aes(x = factor(factor, levels = unique(factor)), y = frac, fill = group)) + 14 | geom_bar(position = "fill", stat = "identity") + 15 | theme_classic() + 16 | theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) + 17 | labs(x = "factor", y = "Representation in group") + 18 | scale_y_continuous(expand = c(0, 0)) 19 | } 20 | 21 | 22 | #' @rdname MetadataSummary 23 | #' 24 | #' @name MetadataSummary 25 | #' 26 | #' @export 27 | #' 28 | .S3method("plot", "nmf_metadata_summary", plot.nmf_metadata_summary) 29 | -------------------------------------------------------------------------------- /R/plotFactorWeights.R: -------------------------------------------------------------------------------- 1 | #' convenience function to map one or more factors along a genome using igvR 2 | #' 3 | #' @param object an nmf object or something with a @w weights matrix 4 | #' @param gr a GRanges object with coordinates for the features 5 | #' @param factors which factors to plot weights for (default: 1, 2, 3) 6 | #' @param plot use igvR to plot the factors? (TRUE, if igvR detected) 7 | #' 8 | #' @return the GRanges gr, but with factor weights added as mcols 9 | #' 10 | #' @details 11 | #' This function presumes a GRanges object will be supplied, which in turn 12 | #' presumes that the GenomicRanges package is installed from Bioconductor. 13 | #' Further, if plot == TRUE, the igvR package is presumed to be installed. 14 | #' If either of these presumptions are false, or if factor weights cannot 15 | #' be mapped to identifiers in the GRanges, this function will fail. 16 | #' 17 | #' @export 18 | #' 19 | plotFactorWeights <- function(object, gr, factors=1:3, plot=FALSE) { 20 | 21 | requireNamespace("GenomicRanges") 22 | stopifnot(is(gr, "GRanges")) 23 | stopifnot(all(rownames(object@w) %in% names(gr))) 24 | gr <- gr[rownames(object@w)] 25 | 26 | for (fact in factors) { 27 | if (is.numeric(fact) | is.integer(fact)) fact <- colnames(object@w)[fact] 28 | mcols(gr)[, fact] <- object@w[, fact] 29 | } 30 | 31 | if (plot) { 32 | requireNamespace("igvR") 33 | message("igvR support is in process") 34 | } 35 | 36 | return(gr) 37 | 38 | } 39 | -------------------------------------------------------------------------------- /R/rasterize_rowwise.R: -------------------------------------------------------------------------------- 1 | # Row-wise rasterization of a sparse matrix 2 | #' 3 | #' Bin together values from every block of \code{n} rows and calculate mean value, with a sparse \code{dgCMatrix} as input and a dense \code{matrix} as output. This technique is useful in some genomics applications. 4 | #' 5 | #' @param A matrix to be rasterized 6 | #' @param n row-wise binning size 7 | #' @param threads number of threads to use (0 to let OpenMP decide how many are available and use them all) 8 | #' @export 9 | #' 10 | RasterizeRowwise <- function(A, n = 10, threads = 0){ 11 | if(class(A)[[1]] == "dgCMatrix"){ 12 | B <- rowwise_compress_sparse(A, n, threads) 13 | } else { 14 | A <- as.matrix(A) 15 | B <- rowwise_compress_dense(A, n, threads) 16 | } 17 | rownames(B) <- rownames(A)[seq(1, floor(nrow(A) / n) * n, n)] 18 | colnames(B) <- colnames(A) 19 | B 20 | } -------------------------------------------------------------------------------- /R/run_nmf.R: -------------------------------------------------------------------------------- 1 | #' @title Run Non-negative Matrix Factorization 2 | #' 3 | #' @description Run NMF on a sparse matrix with automatic rank determination by cross-validation 4 | #' 5 | #' @param A sparse matrix giving normalized counts for genes x cells (rows x columns), or a list of sparse matrices with equal number of rows and identical rownames 6 | #' @param rank factorization rank 7 | #' @param tol tolerance of the fit (1e-5 for publication quality, 1e-4 for cross-validation) 8 | #' @param maxit maximum number of iterations 9 | #' @param verbose verbosity level 10 | #' @param L1 L1/LASSO penalty to increase sparsity of model 11 | #' @param L2 L2/Ridge penalty to increase angles between factors 12 | #' @param threads number of threads for parallelization across CPUs, 0 = use all available threads 13 | #' @param compression_level either 2 or 3, for VCSC or IVCSC, respectively. For development purposes. 14 | #' @rdname run_nmf 15 | #' @importFrom stats runif 16 | #' @export 17 | #' 18 | run_nmf <- function(A, rank, tol = 1e-4, maxit = 100, verbose = TRUE, L1 = 0.01, L2 = 0, threads = 0, compression_level = 3) { 19 | use_vcsc <- compression_level == 2 20 | 21 | if ("list" %in% class(A)) { 22 | # check that number of rows is identical 23 | if (var(sapply(A, nrow)) != 0) { 24 | stop("number of rows in all provided 'A' matrices are not identical") 25 | } 26 | if (!all(sapply(A, function(x) class(x) == "dgCMatrix"))) { 27 | stop("if providing a list, you must provide a list of all 'dgCMatrix' objects") 28 | } 29 | if (!is.null(rownames(A[[1]]))) { 30 | if (!all(sapply(A, function(x) all.equal(rownames(x), rownames(A[[1]]))))) stop("rownames of all dgCMatrix objects in list must be identical") 31 | } 32 | w_init <- matrix(stats::runif(nrow(A[[1]]) * rank), rank, nrow(A[[1]])) 33 | model <- run_nmf_on_sparsematrix_list(A, tol, maxit, verbose, threads, w_init, use_vcsc) 34 | rn <- rownames(A[[1]]) 35 | cn <- do.call(c, lapply(A, colnames)) 36 | } else { 37 | if (class(A)[[1]] != "matrix") { 38 | if (verbose > 0) cat("running with sparse optimization\n") 39 | A <- as(as(as(A, "dMatrix"), "generalMatrix"), "CsparseMatrix") 40 | At <- Matrix::t(A) 41 | dense_mode <- FALSE 42 | } else { 43 | if (verbose > 0) cat("running with dense optimization\n") 44 | At <- t(A) 45 | dense_mode <- TRUE 46 | } 47 | 48 | if(length(L1) != 2){ 49 | L1 <- c(L1[[1]], L1[[1]]) 50 | } 51 | if(length(L2) != 2){ 52 | L2 <- c(L2[[1]], L2[[1]]) 53 | } 54 | 55 | w_init <- matrix(stats::runif(nrow(A) * rank), rank, nrow(A)) 56 | if (dense_mode) { 57 | model <- c_nmf_dense(A, At, tol, maxit, verbose, L1[[1]], L1[[2]], L2[[1]], L2[[2]], threads, w_init) 58 | } else { 59 | model <- c_nmf(A, At, tol, maxit, verbose, L1[[1]], L1[[2]], L2[[1]], L2[[2]], threads, w_init) 60 | } 61 | rn <- rownames(A) 62 | cn <- colnames(A) 63 | } 64 | 65 | sort_index <- order(model$d, decreasing = TRUE) 66 | model$d <- model$d[sort_index] 67 | model$w <- t(model$w)[, sort_index] 68 | model$h <- model$h[sort_index, ] 69 | if (rank == 1) { 70 | model$w <- matrix(model$w, ncol=1) 71 | model$h <- matrix(model$h, nrow=1) 72 | } 73 | rownames(model$w) <- rn 74 | colnames(model$h) <- cn 75 | colnames(model$w) <- rownames(model$h) <- paste0("NMF_", 1:ncol(model$w)) 76 | model 77 | } 78 | 79 | distributed_transpose <- function(A){ 80 | library(Matrix) 81 | setwd("/active/debruinz_project/debruinz/CellCensusNMF") 82 | A <- lapply(paste0("../../CellCensus/R/chunk", 1:100, "_counts.rds"), readRDS) 83 | block_sizes <- floor(c(seq(1, nrow(A[[1]]), nrow(A[[1]]) / (length(A))), nrow(A[[1]]) + 1)) 84 | for (i in 1:length(block_sizes)) { 85 | cat("CHUNK", i, "/100\n") 86 | At <- list() 87 | pb <- txtProgressBar(min = 0, max = length(A), style = 3) 88 | for (j in 1:length(A)) { 89 | At[[j]] <- t(A[[j]][block_sizes[i]:(block_sizes[i + 1] - 1), ]) 90 | setTxtProgressBar(pb, j) 91 | } 92 | cat(" rbinding\n") 93 | At <- do.call(rbind, At) 94 | cat(" saving\n") 95 | saveRDS(At, paste0("chunk", i, "_transpose_counts.rds")) 96 | } 97 | } 98 | 99 | split_into_chunks <- function(A, n_chunks){ 100 | breakpoints <- seq(1, ncol(A), floor(ncol(A) / n_chunks)) 101 | breakpoints[length(breakpoints) + 1] <- ncol(A) 102 | result <- list() 103 | for(i in 1:n_chunks){ 104 | result[[i]] <- A[,breakpoints[i]:breakpoints[i + 1]] 105 | } 106 | result 107 | } 108 | -------------------------------------------------------------------------------- /R/singlet.R: -------------------------------------------------------------------------------- 1 | #' Singlet 2 | #' 3 | #' Fast single-cell analysis with non-negative dimensional reductions 4 | #' 5 | #' @details 6 | #' There are reasons to not use PCA. 7 | #' * PCA fits to missing signal, 8 | #' * considers only highly variable features, 9 | #' * is almost useless without further graph-based analysis, 10 | #' * requires centering and scaling of your data, 11 | #' * and is robust only within experiments. 12 | #' 13 | #' Instead, you should use Non-negative Matrix Factorization (NMF). 14 | #' * NMF imputes missing signal, 15 | #' * learns models using all features, 16 | #' * does everything PCA does and provides useful information itself, 17 | #' * requires only variance stabilization, 18 | #' * and is robust across experiments. 19 | #' 20 | #' Singlet is all about extremely fast NMF for single-cell dimensional reduction and integration. 21 | #' 22 | #' See the vignettes to get started. 23 | #' 24 | #' @useDynLib singlet, .registration = TRUE 25 | #' 26 | #' @name singlet 27 | #' 28 | #' @author Zach DeBruine 29 | #' 30 | #' @aliases singlet-package 31 | #' 32 | #' @import ggplot2 33 | #' @import Seurat 34 | #' @import dplyr 35 | #' @import Matrix 36 | #' @import msigdbr 37 | #' @import fgsea 38 | #' 39 | #' @importFrom methods as new is 40 | #' @importFrom stats dist hclust model.matrix p.adjust 41 | #' @importFrom utils data 42 | #' 43 | #' @md 44 | #' 45 | "_PACKAGE" 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # singlet v.0.0.99 2 | 3 | See the [pkgdown website](https://zdebruine.github.io/singlet/)! 4 | 5 | Singlet is in active development right now. Do not expect stable functionality yet. Coming soon! 6 | 7 | Singlet brings fast Non-negative Matrix Factorization (NMF) with automatic rank determination to the Seurat package for single-cell analysis. 8 | 9 | ## Install 10 | 11 | First install the development version of `RcppML`, note that the CRAN RcppML version will not work: 12 | 13 | ```{R} 14 | devtools::install_github("zdebruine/RcppML") 15 | ``` 16 | 17 | Then install required dependencies, including `limma` and `fgsea`: 18 | 19 | ```{R} 20 | BiocManager::install("fgsea") 21 | BiocManager::install("limma") 22 | ``` 23 | 24 | Now install `singlet`: 25 | 26 | ```{R} 27 | devtools::install_github("zdebruine/singlet") 28 | ``` 29 | 30 | ## Introductory Vignette 31 | 32 | [Guided clustering tutorial](https://zdebruine.github.io/singlet/articles/Guided_Clustering_with_NMF.html) 33 | 34 | ## Dimension Reduction with NMF 35 | 36 | Analyze your single-cell assay with NMF: 37 | 38 | ```{R} 39 | library(singlet) 40 | library(Seurat) 41 | library(dplyr) 42 | library(cowplot) 43 | set.seed(123) # for reproducible NMF models 44 | get_pbmc3k_data() %>% NormalizeData %>% RunNMF -> pbmc3k 45 | pbmc3k <- RunUMAP(pbmc3k, reduction = "nmf", dims = 1:ncol(pbmc3k@reductions$nmf)) 46 | 47 | plot_grid( 48 | RankPlot(pbmc3k) + NoLegend(), 49 | DimPlot(pbmc3k) + NoLegend(), 50 | ncol = 2) 51 | ``` 52 | 53 | NMF can do almost anything that PCA can do, but also imputes missing signal, always has an optimal rank (for variance-stabilized data), uses all the information in your assay (incl. "non-variable" genes), is robust across experiments, learns signatures of transcriptional activity, and is colinear and non-negative (interpretable) rather than orthogonal and signed (not interpretable) 54 | 55 | Singlet internally provides the **fastest implementation of NMF**. Cross-validation can take a few minutes for datasets with a few ten thousand cells, but is extremely scalable and runs excellently on HPC nodes and average laptops alike. 56 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: ~ 2 | template: 3 | bootstrap: 5 4 | 5 | -------------------------------------------------------------------------------- /data/pbmc3k.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/data/pbmc3k.RData -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Page not found (404) • singlet 9 | 10 | 11 | 12 | 13 | 14 | 18 | 19 | 20 | Skip to contents 21 | 22 | 23 |
64 |
65 |
69 | 70 | Content not found. Please use links in the navbar. 71 | 72 |
73 |
74 | 75 | 76 |
80 | 81 | 85 | 86 |
87 |
88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/plot-lnmf-metadata-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/plot-lnmf-metadata-1.png -------------------------------------------------------------------------------- /docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/plot-metadata-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/plot-metadata-1.png -------------------------------------------------------------------------------- /docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/plot-umap-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/plot-umap-1.png -------------------------------------------------------------------------------- /docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/run-nmf-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/run-nmf-1.png -------------------------------------------------------------------------------- /docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /docs/articles/Guided_Clustering_with_NMF_files/figure-html/dim-plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Guided_Clustering_with_NMF_files/figure-html/dim-plot-1.png -------------------------------------------------------------------------------- /docs/articles/Guided_Clustering_with_NMF_files/figure-html/feature-plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Guided_Clustering_with_NMF_files/figure-html/feature-plot-1.png -------------------------------------------------------------------------------- /docs/articles/Guided_Clustering_with_NMF_files/figure-html/gsea-heatmap-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Guided_Clustering_with_NMF_files/figure-html/gsea-heatmap-1.png -------------------------------------------------------------------------------- /docs/articles/Guided_Clustering_with_NMF_files/figure-html/map-cluster-ids-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Guided_Clustering_with_NMF_files/figure-html/map-cluster-ids-1.png -------------------------------------------------------------------------------- /docs/articles/Guided_Clustering_with_NMF_files/figure-html/plot-metadata-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Guided_Clustering_with_NMF_files/figure-html/plot-metadata-1.png -------------------------------------------------------------------------------- /docs/articles/Guided_Clustering_with_NMF_files/figure-html/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Guided_Clustering_with_NMF_files/figure-html/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /docs/articles/Guided_Clustering_with_NMF_files/figure-html/viz-dim-loadings-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Guided_Clustering_with_NMF_files/figure-html/viz-dim-loadings-1.png -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | Articles • singlet 6 | Skip to contents 7 | 8 | 9 |
45 |
46 |
49 | 50 |
51 |

All vignettes

52 |

53 | 54 |
Batch Integration with Linked NMF
55 |
56 |
Guided Clustering with NMF
57 |
58 |
59 |
60 | 61 | 62 |
65 | 66 | 69 | 70 |
71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | Authors and Citation • singlet 6 | Skip to contents 7 | 8 | 9 |
45 |
46 |
49 | 50 |
51 |

Authors

52 | 53 |
  • 54 |

    Zach DeBruine. Author, maintainer. 55 |

    56 |
  • 57 |
58 | 59 |
60 |

Citation

61 |

Source: DESCRIPTION

62 | 63 |

DeBruine Z (2022). 64 | singlet: Non-negative Matrix Factorization for single-cell analysis. 65 | R package version 0.0.99, https://github.com/zdebruine/singlet. 66 |

67 |
@Manual{,
68 |   title = {singlet: Non-negative Matrix Factorization for single-cell analysis},
69 |   author = {Zach DeBruine},
70 |   year = {2022},
71 |   note = {R package version 0.0.99},
72 |   url = {https://github.com/zdebruine/singlet},
73 | }
74 |
75 |
77 | 78 | 79 |
82 | 83 | 86 | 87 |
88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /docs/deps/data-deps.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('nav.navbar').headroom(); 6 | 7 | Toc.init({ 8 | $nav: $("#toc"), 9 | $scope: $("main h2, main h3, main h4, main h5, main h6") 10 | }); 11 | 12 | if ($('#toc').length) { 13 | $('body').scrollspy({ 14 | target: '#toc', 15 | offset: $("nav.navbar").outerHeight() + 1 16 | }); 17 | } 18 | 19 | // Activate popovers 20 | $('[data-bs-toggle="popover"]').popover({ 21 | container: 'body', 22 | html: true, 23 | trigger: 'focus', 24 | placement: "top", 25 | sanitize: false, 26 | }); 27 | 28 | $('[data-bs-toggle="tooltip"]').tooltip(); 29 | 30 | /* Clipboard --------------------------*/ 31 | 32 | function changeTooltipMessage(element, msg) { 33 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 34 | element.setAttribute('data-original-title', msg); 35 | $(element).tooltip('show'); 36 | element.setAttribute('data-original-title', tooltipOriginalTitle); 37 | } 38 | 39 | if(ClipboardJS.isSupported()) { 40 | $(document).ready(function() { 41 | var copyButton = ""; 42 | 43 | $("div.sourceCode").addClass("hasCopyButton"); 44 | 45 | // Insert copy buttons: 46 | $(copyButton).prependTo(".hasCopyButton"); 47 | 48 | // Initialize tooltips: 49 | $('.btn-copy-ex').tooltip({container: 'body'}); 50 | 51 | // Initialize clipboard: 52 | var clipboard = new ClipboardJS('[data-clipboard-copy]', { 53 | text: function(trigger) { 54 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); 55 | } 56 | }); 57 | 58 | clipboard.on('success', function(e) { 59 | changeTooltipMessage(e.trigger, 'Copied!'); 60 | e.clearSelection(); 61 | }); 62 | 63 | clipboard.on('error', function() { 64 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 65 | }); 66 | 67 | }); 68 | } 69 | 70 | /* Search marking --------------------------*/ 71 | var url = new URL(window.location.href); 72 | var toMark = url.searchParams.get("q"); 73 | var mark = new Mark("main#main"); 74 | if (toMark) { 75 | mark.mark(toMark, { 76 | accuracy: { 77 | value: "complementary", 78 | limiters: [",", ".", ":", "/"], 79 | } 80 | }); 81 | } 82 | 83 | /* Search --------------------------*/ 84 | /* Adapted from https://github.com/rstudio/bookdown/blob/2d692ba4b61f1e466c92e78fd712b0ab08c11d31/inst/resources/bs4_book/bs4_book.js#L25 */ 85 | // Initialise search index on focus 86 | var fuse; 87 | $("#search-input").focus(async function(e) { 88 | if (fuse) { 89 | return; 90 | } 91 | 92 | $(e.target).addClass("loading"); 93 | var response = await fetch($("#search-input").data("search-index")); 94 | var data = await response.json(); 95 | 96 | var options = { 97 | keys: ["what", "text", "code"], 98 | ignoreLocation: true, 99 | threshold: 0.1, 100 | includeMatches: true, 101 | includeScore: true, 102 | }; 103 | fuse = new Fuse(data, options); 104 | 105 | $(e.target).removeClass("loading"); 106 | }); 107 | 108 | // Use algolia autocomplete 109 | var options = { 110 | autoselect: true, 111 | debug: true, 112 | hint: false, 113 | minLength: 2, 114 | }; 115 | var q; 116 | async function searchFuse(query, callback) { 117 | await fuse; 118 | 119 | var items; 120 | if (!fuse) { 121 | items = []; 122 | } else { 123 | q = query; 124 | var results = fuse.search(query, { limit: 20 }); 125 | items = results 126 | .filter((x) => x.score <= 0.75) 127 | .map((x) => x.item); 128 | if (items.length === 0) { 129 | items = [{dir:"Sorry 😿",previous_headings:"",title:"No results found.",what:"No results found.",path:window.location.href}]; 130 | } 131 | } 132 | callback(items); 133 | } 134 | $("#search-input").autocomplete(options, [ 135 | { 136 | name: "content", 137 | source: searchFuse, 138 | templates: { 139 | suggestion: (s) => { 140 | if (s.title == s.what) { 141 | return `${s.dir} >
${s.title}
`; 142 | } else if (s.previous_headings == "") { 143 | return `${s.dir} >
${s.title}
> ${s.what}`; 144 | } else { 145 | return `${s.dir} >
${s.title}
> ${s.previous_headings} > ${s.what}`; 146 | } 147 | }, 148 | }, 149 | }, 150 | ]).on('autocomplete:selected', function(event, s) { 151 | window.location.href = s.path + "?q=" + q + "#" + s.id; 152 | }); 153 | }); 154 | })(window.jQuery || window.$) 155 | 156 | 157 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.17.1.1 2 | pkgdown: 2.0.6 3 | pkgdown_sha: ~ 4 | articles: 5 | Batch_Integration_with_Linked_NMF: Batch_Integration_with_Linked_NMF.html 6 | Guided_Clustering_with_NMF: Guided_Clustering_with_NMF.html 7 | last_built: 2022-09-09T14:57Z 8 | 9 | -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | /404.html 5 | 6 | 7 | /articles/Batch_Integration_with_Linked_NMF.html 8 | 9 | 10 | /articles/Guided_Clustering_with_NMF.html 11 | 12 | 13 | /articles/index.html 14 | 15 | 16 | /authors.html 17 | 18 | 19 | /index.html 20 | 21 | 22 | /reference/GSEAHeatmap.html 23 | 24 | 25 | /reference/MetadataSummary.html 26 | 27 | 28 | /reference/RankPlot.html 29 | 30 | 31 | /reference/RunGSEA.html 32 | 33 | 34 | /reference/RunLNMF.html 35 | 36 | 37 | /reference/RunNMF.html 38 | 39 | 40 | /reference/ard_nmf.html 41 | 42 | 43 | /reference/cross_validate_nmf.html 44 | 45 | 46 | /reference/get_pbmc3k_data.html 47 | 48 | 49 | /reference/index.html 50 | 51 | 52 | /reference/pbmc3k.html 53 | 54 | 55 | /reference/run_linked_nmf.html 56 | 57 | 58 | /reference/run_nmf.html 59 | 60 | 61 | /reference/singlet.html 62 | 63 | 64 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | c(bibentry(bibtype = "Article", 2 | key = "fastnmf", 3 | title = "{Fast and robust non-negative matrix factorization for single-cell experiments}", 4 | author = c( 5 | person(c("Zachary", "J."), "Debruine"), 6 | person("Karsten", "Melcher"), 7 | person(c("Timothy", "J."), "Triche") 8 | ), 9 | journal = "bioRXiv", 10 | year = 2021, 11 | url = "https://doi.org/10.1101/2021.09.01.458620", 12 | doi = "10.1101/2021.09.01.458620", 13 | header = "The RcppML package is described in:")) 14 | -------------------------------------------------------------------------------- /inst/include/IVSparse.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file SparseMatrix 3 | * @author Skyler Ruiter and Seth Wolfgang 4 | * @brief IVSparse Sparse Matrix Library 5 | * @version 0.1 6 | * @date 2023-07-03 7 | */ 8 | 9 | #pragma once 10 | 11 | // Library Constants 12 | #define DELIM 0 13 | #define NUM_META_DATA 6 14 | #define META_DATA_SIZE 24 15 | #define ONE_BYTE_MAX 255 16 | #define TWO_BYTE_MAX 65535 17 | #define FOUR_BYTE_MAX 4294967295 18 | 19 | // Library Preprocessor Directives 20 | 21 | // Parallel Processing Directives (On by default) 22 | #if (defined _OPENMP) && (!defined IVSPARSE_DONT_PARALLEL) 23 | #define IVSPARSE_HAS_OPENMP 24 | #endif 25 | #ifdef IVSPARSE_HAS_OPENMP 26 | #include 27 | #include 28 | #endif 29 | 30 | // Debugging Directives (Off by default) 31 | #ifndef IVSPARSE_DEBUG_OFF 32 | #define IVSPARSE_DEBUG 33 | #endif 34 | 35 | // Library Includes 36 | 37 | // Eigen is already pulled in by "singlet" 38 | //[[Rcpp::depends(RcppEigen)]] 39 | // #include 40 | 41 | 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | // Library Namespaces 50 | 51 | // SparseMatrixBase Files 52 | // #include "src/IVSparse_SparseMatrixBase.hpp" 53 | // #include "src/IVSparse_Base_Methods.hpp" 54 | 55 | // SparseMatrix Level 3 Files 56 | #include "src/IVCSC/IVCSC_SparseMatrix.hpp" 57 | #include "src/IVCSC/IVCSC_Operators.hpp" 58 | #include "src/IVCSC/IVCSC_Private_Methods.hpp" 59 | #include "src/IVCSC/IVCSC_Methods.hpp" 60 | #include "src/IVCSC/IVCSC_Constructors.hpp" 61 | #include "src/IVCSC/IVCSC_BLAS.hpp" 62 | // Vector and Iterator Files 63 | #include "src/Vectors/IVCSC_Vector.hpp" 64 | #include "src/Vectors/IVCSC_Vector_Methods.hpp" 65 | #include "src/InnerIterators/IVCSC_Iterator.hpp" 66 | #include "src/InnerIterators/IVCSC_Iterator_Methods.hpp" 67 | 68 | // SparseMatrix Level 2 Files 69 | #include "src/VCSC/VCSC_SparseMatrix.hpp" 70 | #include "src/VCSC/VCSC_Operators.hpp" 71 | #include "src/VCSC/VCSC_Private_Methods.hpp" 72 | #include "src/VCSC/VCSC_Methods.hpp" 73 | #include "src/VCSC/VCSC_Constructors.hpp" 74 | #include "src/VCSC/VCSC_BLAS.hpp" 75 | // Vector and Iterator Files 76 | #include "src/Vectors/VCSC_Vector.hpp" 77 | #include "src/Vectors/VCSC_Vector_Methods.hpp" 78 | #include "src/InnerIterators/VCSC_Iterator.hpp" 79 | #include "src/InnerIterators/VCSC_Iterator_Methods.hpp" 80 | 81 | // SparseMatrix Level 1 Files 82 | #include "src/CSC/CSC_SparseMatrix.hpp" 83 | #include "src/CSC/CSC_Operators.hpp" 84 | #include "src/CSC/CSC_Private_Methods.hpp" 85 | #include "src/CSC/CSC_Methods.hpp" 86 | #include "src/CSC/CSC_Constructors.hpp" 87 | #include "src/CSC/CSC_BLAS.hpp" 88 | // Vector and Iterator Files 89 | #include "src/Vectors/CSC_Vector.hpp" 90 | #include "src/Vectors/CSC_Vector_Methods.hpp" 91 | #include "src/InnerIterators/CSC_Iterator.hpp" 92 | #include "src/InnerIterators/CSC_Iterator_Methods.hpp" -------------------------------------------------------------------------------- /inst/include/singlet.h: -------------------------------------------------------------------------------- 1 | #ifndef SINGLET_H 2 | #define SINGLET_H 3 | 4 | #include 5 | 6 | // forward declare Rcpp::as<> Exporter 7 | namespace Rcpp { 8 | class SparseMatrix; 9 | namespace traits { 10 | template <> 11 | class Exporter; 12 | } // namespace traits 13 | } // namespace Rcpp 14 | 15 | //[[Rcpp::plugins(openmp)]] 16 | #ifdef _OPENMP 17 | #include 18 | #endif 19 | 20 | //[[Rcpp::depends(RcppEigen)]] 21 | #include 22 | 23 | // now pull in IVSparse after loading Eigen 24 | #include 25 | 26 | // this class is provided for consistency with Eigen::SparseMatrix, but using 27 | // R objects (i.e. Rcpp::NumericVector, Rcpp::IntegerVector) that comprise Matrix::dgCMatrix in R. 28 | // R objects are pointers to underlying memory-mapped SEXP vectors, and are usable in C++ without any 29 | // affect on performance. Thus, this class achieves zero-copy access to R sparse matrix objects, with equal 30 | // performance for read-only column iteration (`InnerIterator`) like `Eigen::SparseMatrix`. 31 | // 32 | // The class is designed with an `InnerIterator` class that exactly mimics `Eigen::SparseMatrix::InnerIterator`, 33 | // and also contains `.rows()` and `.cols()` member functions. This allows it to substitute for `Eigen::SparseMatrix` 34 | // in all SLAM routines. 35 | namespace Rcpp { 36 | class SparseMatrix { 37 | public: 38 | NumericVector x; 39 | IntegerVector i, p, Dim; 40 | 41 | // constructors 42 | SparseMatrix(NumericVector x, IntegerVector i, IntegerVector p, IntegerVector Dim) : x(x), i(i), p(p), Dim(Dim) {} 43 | SparseMatrix(const S4& s) { 44 | if (!s.hasSlot("x") || !s.hasSlot("p") || !s.hasSlot("i") || !s.hasSlot("Dim")) 45 | throw std::invalid_argument("Cannot construct SparseMatrix from this S4 object"); 46 | x = s.slot("x"); 47 | i = s.slot("i"); 48 | p = s.slot("p"); 49 | Dim = s.slot("Dim"); 50 | } 51 | SparseMatrix() {} 52 | 53 | unsigned int rows() { return Dim[0]; } 54 | unsigned int cols() { return Dim[1]; } 55 | 56 | // const column iterator 57 | class InnerIterator { 58 | public: 59 | InnerIterator(SparseMatrix& ptr, int col) : ptr(ptr), col_(col), index(ptr.p[col]), max_index(ptr.p[col + 1]) {} 60 | operator bool() const { return (index < max_index); } 61 | InnerIterator& operator++() { 62 | ++index; 63 | return *this; 64 | } 65 | double& value() const { return ptr.x[index]; } 66 | int row() const { return ptr.i[index]; } 67 | int col() const { return col_; } 68 | 69 | private: 70 | SparseMatrix& ptr; 71 | int col_, index, max_index; 72 | }; 73 | 74 | SparseMatrix clone() { 75 | NumericVector x_ = Rcpp::clone(x); 76 | IntegerVector i_ = Rcpp::clone(i); 77 | IntegerVector p_ = Rcpp::clone(p); 78 | IntegerVector Dim_ = Rcpp::clone(Dim); 79 | return SparseMatrix(x_, i_, p_, Dim_); 80 | } 81 | 82 | SparseMatrix transpose() { 83 | S4 s(std::string("dgCMatrix")); 84 | s.slot("i") = i; 85 | s.slot("p") = p; 86 | s.slot("x") = x; 87 | s.slot("Dim") = Dim; 88 | Environment base = Environment::namespace_env("Matrix"); 89 | Function t_r = base["t"]; 90 | S4 At = t_r(_["x"] = s); 91 | return SparseMatrix(At); 92 | }; 93 | 94 | S4 wrap() { 95 | S4 s(std::string("dgCMatrix")); 96 | s.slot("x") = x; 97 | s.slot("i") = i; 98 | s.slot("p") = p; 99 | s.slot("Dim") = Dim; 100 | return s; 101 | } 102 | }; 103 | 104 | namespace traits { 105 | /* support for Rcpp::as */ 106 | 107 | // export a sparse matrix 108 | template <> 109 | class Exporter { 110 | Rcpp::NumericVector x_; 111 | Rcpp::IntegerVector i, p, Dim; 112 | 113 | public: 114 | Exporter(SEXP x) { 115 | Rcpp::S4 s(x); 116 | if (!s.hasSlot("x") || !s.hasSlot("p") || !s.hasSlot("i") || !s.hasSlot("Dim")) 117 | throw std::invalid_argument("Cannot construct Rcpp::SparseMatrix from this S4 object"); 118 | x_ = s.slot("x"); 119 | i = s.slot("i"); 120 | p = s.slot("p"); 121 | Dim = s.slot("Dim"); 122 | } 123 | 124 | Rcpp::SparseMatrix get() { 125 | return Rcpp::SparseMatrix(x_, i, p, Dim); 126 | } 127 | }; 128 | 129 | } // namespace traits 130 | } // namespace Rcpp 131 | #endif -------------------------------------------------------------------------------- /inst/include/src/CSC/CSC_Operators.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file CSC_Operators.hpp 3 | * @author Skyler Ruiter and Seth Wolfgang 4 | * @brief Operator Overloads for CSC Sparse Matrices 5 | * @version 0.1 6 | * @date 2023-07-03 7 | */ 8 | 9 | #pragma once 10 | 11 | namespace IVSparse { 12 | 13 | // Assignment Operator 14 | template 15 | SparseMatrix & 16 | SparseMatrix::operator=(const IVSparse::SparseMatrix &other) { 17 | // check for self assignment 18 | if (this != &other) { 19 | // free the old data 20 | if (vals != nullptr) { 21 | free(vals); 22 | } 23 | if (innerIdx != nullptr) { 24 | free(innerIdx); 25 | } 26 | if (outerPtr != nullptr) { 27 | free(outerPtr); 28 | } 29 | if (metadata != nullptr) { 30 | delete[] metadata; 31 | } 32 | 33 | // Deep copy the matrix 34 | metadata = new uint32_t[NUM_META_DATA]; 35 | memcpy(metadata, other.metadata, NUM_META_DATA * sizeof(uint32_t)); 36 | 37 | // set the dimensions of the matrix 38 | numRows = other.numRows; 39 | numCols = other.numCols; 40 | outerDim = other.outerDim; 41 | innerDim = other.innerDim; 42 | nnz = other.nnz; 43 | compSize = other.compSize; 44 | 45 | // encode the value type and index type 46 | encodeValueType(); 47 | index_t = sizeof(indexT); 48 | 49 | // check for an empty matrix 50 | if (nnz == 0) { 51 | vals = nullptr; 52 | innerIdx = nullptr; 53 | outerPtr = nullptr; 54 | } 55 | 56 | // allocate the memory 57 | try { 58 | vals = (T *)malloc(nnz * sizeof(T)); 59 | innerIdx = (indexT *)malloc(nnz * sizeof(indexT)); 60 | outerPtr = (indexT *)malloc((outerDim + 1) * sizeof(indexT)); 61 | } catch (std::bad_alloc &e) { 62 | std::cerr << "Error: Failed to allocate memory for the matrix" 63 | << std::endl; 64 | exit(1); 65 | } 66 | 67 | // copy the data 68 | memcpy(vals, other.vals, nnz * sizeof(T)); 69 | memcpy(innerIdx, other.innerIdx, nnz * sizeof(indexT)); 70 | memcpy(outerPtr, other.outerPtr, (outerDim + 1) * sizeof(indexT)); 71 | } 72 | 73 | // return the matrix 74 | return *this; 75 | } 76 | 77 | // Equality Operator 78 | template 79 | bool SparseMatrix::operator==(const SparseMatrix &other) { 80 | // check if the dimensions are the same 81 | if (numRows != other.numRows || numCols != other.numCols) { 82 | return false; 83 | } 84 | 85 | // check if the number of nonzeros are the same 86 | if (nnz != other.nnz) { 87 | return false; 88 | } 89 | 90 | // check the matrix data against each other 91 | if (memcmp(vals, other.vals, nnz * sizeof(T)) != 0) { 92 | return false; 93 | } 94 | if (memcmp(innerIdx, other.innerIdx, nnz * sizeof(indexT)) != 0) { 95 | return false; 96 | } 97 | if (memcmp(outerPtr, other.outerPtr, (outerDim + 1) * sizeof(indexT)) != 0) { 98 | return false; 99 | } 100 | 101 | // if all the data is the same return true 102 | return true; 103 | } 104 | 105 | // Inequality Operator 106 | template 107 | bool SparseMatrix::operator!=(const SparseMatrix &other) { 108 | return !(*this == other); 109 | } 110 | 111 | // Coefficent Access Operator 112 | template 113 | T SparseMatrix::operator()(uint32_t row, uint32_t col) { 114 | 115 | #ifdef IVSPARSE_DEBUG 116 | // check if the row and column are in bounds 117 | if (row >= numRows || col >= numCols) { 118 | std::cerr << "Error: Index out of bounds" << std::endl; 119 | exit(1); 120 | } 121 | #endif 122 | 123 | // get the vector and index 124 | uint32_t vector = columnMajor ? col : row; 125 | uint32_t index = columnMajor ? row : col; 126 | 127 | // get an iterator for the desired vector 128 | for (typename SparseMatrix::InnerIterator it( 129 | *this, vector); 130 | it; ++it) { 131 | if (it.getIndex() == (indexT)index) { 132 | // if the index is found return the value 133 | return it.value(); 134 | } 135 | } 136 | 137 | // if the index is not found return 0 138 | return 0; 139 | } 140 | 141 | // Vector Access Operator 142 | template 143 | typename SparseMatrix::Vector 144 | SparseMatrix::operator[](uint32_t vec) { 145 | #ifdef IVSPARSE_DEBUG 146 | // check if the vector is out of bounds 147 | assert((vec < outerDim && vec >= 0) && "Vector index out of bounds"); 148 | #endif 149 | 150 | // return a IVSparse vector 151 | typename IVSparse::SparseMatrix::Vector newVector( 152 | *this, vec); 153 | return newVector; 154 | } 155 | 156 | //* BLAS Operators *// 157 | 158 | // Scalar Multiplication 159 | template 160 | IVSparse::SparseMatrix 161 | SparseMatrix::operator*(T scalar) { 162 | return scalarMultiply(scalar); 163 | } 164 | 165 | // In place scalar multiplication 166 | template 167 | void SparseMatrix::operator*=(T scalar) { 168 | return inPlaceScalarMultiply(scalar); 169 | } 170 | 171 | // IVSparse Matrix * IVSparse Vector Multiplication 172 | template 173 | Eigen::VectorXd SparseMatrix::operator*(SparseMatrix::Vector &vec) { 174 | return vectorMultiply(vec); 175 | } 176 | 177 | // Matrix Vector Multiplication (IVSparse Eigen -> Eigen) 178 | template 179 | Eigen::VectorXd SparseMatrix::operator*(Eigen::VectorXd &vec) { 180 | return vectorMultiply(vec); 181 | } 182 | 183 | // Matrix Matrix Multiplication (IVSparse Eigen -> Eigen) 184 | template 185 | Eigen::Matrix SparseMatrix::operator*(Eigen::Matrix mat) { 186 | return matrixMultiply(mat); 187 | } 188 | 189 | } // namespace IVSparse -------------------------------------------------------------------------------- /inst/include/src/CSC/CSC_Private_Methods.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file CSC_Private_Methods.hpp 3 | * @author Skyler Ruiter and Seth Wolfgang 4 | * @brief Private Methods for CSC Sparse Matrices 5 | * @version 0.1 6 | * @date 2023-07-03 7 | */ 8 | 9 | #pragma once 10 | 11 | namespace IVSparse { 12 | 13 | // Calculates the number of bytes needed to store a value 14 | template 15 | inline uint8_t SparseMatrix::byteWidth(size_t size) { 16 | if (size <= 0xFF) { 17 | return 1; 18 | } else if (size <= 0xFFFF) { 19 | return 2; 20 | } else if (size <= 0xFFFFFF) { 21 | return 3; 22 | } else if (size <= 0xFFFFFFFF) { 23 | return 4; 24 | } else if (size <= 0xFFFFFFFFFF) { 25 | return 5; 26 | } else if (size <= 0xFFFFFFFFFFFF) { 27 | return 6; 28 | } else if (size <= 0xFFFFFFFFFFFFFF) { 29 | return 7; 30 | } else { 31 | return 8; 32 | } 33 | } 34 | 35 | // Encodes the value type of the matrix in a uint32_t 36 | template 37 | void SparseMatrix::encodeValueType() { 38 | uint8_t byte0 = sizeof(T); 39 | uint8_t byte1 = std::is_floating_point::value ? 1 : 0; 40 | uint8_t byte2 = std::is_signed::value ? 1 : 0; 41 | uint8_t byte3 = columnMajor ? 1 : 0; 42 | 43 | val_t = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0; 44 | } 45 | 46 | // Checks if the value type is correct for the matrix 47 | template 48 | void SparseMatrix::checkValueType() { 49 | uint8_t byte0 = val_t & 0xFF; 50 | uint8_t byte1 = (val_t >> 8) & 0xFF; 51 | uint8_t byte2 = (val_t >> 16) & 0xFF; 52 | uint8_t byte3 = (val_t >> 24) & 0xFF; 53 | assert(byte0 == sizeof(T) && "Value type size does not match"); 54 | assert(byte1 == std::is_floating_point::value && 55 | "Value type is not floating point"); 56 | assert(byte2 == std::is_signed::value && "Value type is not signed"); 57 | assert(byte3 == columnMajor && "Major direction does not match"); 58 | } 59 | 60 | // performs some simple user checks on the matrices metadata 61 | template 62 | void SparseMatrix::userChecks() { 63 | assert((innerDim > 1 || outerDim > 1 || nnz > 1) && 64 | "The matrix must have at least one row, column, and nonzero value"); 65 | assert(std::is_floating_point::value == false && 66 | "The index type must be a non-floating point type"); 67 | assert((std::is_arithmetic::value && std::is_arithmetic::value) && 68 | "The value and index types must be numeric types"); 69 | assert((std::is_same::value == false) && 70 | "The index type must not be bool"); 71 | assert((innerDim < std::numeric_limits::max() && 72 | outerDim < std::numeric_limits::max()) && 73 | "The number of rows and columns must be less than the maximum value " 74 | "of the index type"); 75 | checkValueType(); 76 | } 77 | 78 | // Calculates the current byte size of the matrix in memory 79 | template 80 | void SparseMatrix::calculateCompSize() { 81 | // set compSize to zero 82 | compSize = 0; 83 | 84 | // add the size of the metadata 85 | compSize += META_DATA_SIZE; 86 | 87 | // add the csc vectors 88 | compSize += sizeof(T) * nnz; // values 89 | compSize += sizeof(indexT) * nnz; // innerIdx 90 | compSize += sizeof(indexT) * (outerDim + 1); // outerPtr 91 | } 92 | 93 | } // namespace IVSparse -------------------------------------------------------------------------------- /inst/include/src/IVSparse_Base_Methods.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file IVSparse_Base_Methods.hpp 3 | * @author Skyler Ruiter and Seth Wolfgang 4 | * @brief IVSparse Sparse Matrix Base Methods 5 | * @version 0.1 6 | * @date 2023-07-03 7 | */ 8 | 9 | #pragma once 10 | 11 | namespace IVSparse { 12 | 13 | // Calculates the number of bytes needed to store a value 14 | inline uint8_t SparseMatrix::byteWidth(size_t size) { 15 | if (size <= 0xFF){ 16 | return 1; 17 | } 18 | else if (size <= 0xFFFF){ 19 | return 2; 20 | } 21 | else if (size <= 0xFFFFFF){ 22 | return 3; 23 | } 24 | else if (size <= 0xFFFFFFFF){ 25 | return 4; 26 | } 27 | else if (size <= 0xFFFFFFFFFF){ 28 | return 5; 29 | } 30 | else if (size <= 0xFFFFFFFFFFFF){ 31 | return 6; 32 | } 33 | else if (size <= 0xFFFFFFFFFFFFFF){ 34 | return 7; 35 | } 36 | else{ 37 | return 8; 38 | } 39 | 40 | } 41 | 42 | // Gets the number of rows in the matrix 43 | uint32_t SparseMatrix::rows() const { return numRows; } 44 | 45 | // Gets the number of columns in the matrix 46 | uint32_t SparseMatrix::cols() const { return numCols; } 47 | 48 | // Gets the inner dimension of the matrix 49 | uint32_t SparseMatrix::innerSize() const { return innerDim; } 50 | 51 | // Gets the outer dimension of the matrix 52 | uint32_t SparseMatrix::outerSize() const { return outerDim; } 53 | 54 | // Gets the number of non-zero elements in the matrix 55 | uint32_t SparseMatrix::nonZeros() const { return nnz; } 56 | 57 | // Gets the number of bytes needed to store the matrix 58 | size_t SparseMatrix::byteSize() const { return compSize; } 59 | 60 | } // namespace IVSparse -------------------------------------------------------------------------------- /inst/include/src/IVSparse_SparseMatrixBase.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file IVSparse_SparseMatrixBase.hpp 3 | * @author Skyler Ruiter and Seth Wolfgang 4 | * @brief IVSparse Sparse Matrix Base Class Declerations 5 | * @version 0.1 6 | * @date 2023-07-03 7 | */ 8 | 9 | #pragma once 10 | 11 | namespace IVSparse { 12 | 13 | /** 14 | * IVSparse Sparse Matrix Base Class \n \n 15 | * This is the overarching base class for the IVSparse Sparse Matrix 16 | * Library. It contains methods and variables shared between all 17 | * compression levels of IVSparse Sparse Matrices and serves to reduce 18 | * code duplication. 19 | */ 20 | class SparseMatrixBase { 21 | private: 22 | //* The Matrix Info *// 23 | 24 | uint32_t innerDim = 0; // The inner dimension of the matrix 25 | uint32_t outerDim = 0; // The outer dimension of the matrix 26 | 27 | uint32_t numRows = 0; // The number of rows in the matrix 28 | uint32_t numCols = 0; // The number of columns in the matrix 29 | 30 | uint32_t nnz = 0; // The number of non-zero values in the matrix 31 | 32 | size_t compSize = 0; // The size of the compressed matrix in bytes 33 | 34 | //* The Value and Index Types *// 35 | 36 | uint32_t val_t; // Information about the value type (size, signededness, etc.) 37 | uint32_t index_t; // Information about the index type (size) 38 | 39 | uint32_t* metadata = nullptr; // The metadata of the matrix 40 | 41 | //* Private Methods *// 42 | 43 | // Calculates the number of bytes needed to store a value 44 | inline uint8_t byteWidth(size_t size); 45 | 46 | // Creates value type information 47 | virtual void encodeValueType() = 0; 48 | 49 | // Checks the value type information 50 | virtual void checkValueType() = 0; 51 | 52 | // User checks to confirm a valid matrix 53 | virtual void userChecks() = 0; 54 | 55 | // Calculates the size of the matrix in bytes 56 | virtual void calculateCompSize() = 0; 57 | 58 | public: 59 | //* Friends *// 60 | 61 | // IVSparse Sparse Matrix Class 62 | template 63 | friend class SparseMatrix; 64 | 65 | //* Constructors *// 66 | 67 | // Default Constructor 68 | SparseMatrixBase() {}; 69 | 70 | //* Getters *// 71 | 72 | /** 73 | * @returns The number of rows in the matrix. 74 | */ 75 | uint32_t rows() const; 76 | 77 | /** 78 | * @returns The number of columns in the matrix. 79 | */ 80 | uint32_t cols() const; 81 | 82 | /** 83 | * @returns The inner dimension of the matrix. 84 | */ 85 | uint32_t innerSize() const; 86 | 87 | /** 88 | * @returns The outer dimension of the matrix. 89 | */ 90 | uint32_t outerSize() const; 91 | 92 | /** 93 | * @returns The number of non-zero elements in the matrix. 94 | */ 95 | uint32_t nonZeros() const; 96 | 97 | /** 98 | * @returns The size of the matrix in bytes. 99 | */ 100 | uint64_t byteSize() const; 101 | 102 | //* Utility Methods *// 103 | 104 | /** 105 | * Writes the matrix to a file with the given filename. 106 | */ 107 | virtual void write(const char* filename) = 0; 108 | 109 | /** 110 | * Prints the matrix to the console. 111 | */ 112 | virtual void print() = 0; 113 | 114 | }; // class SparseMatrixBase 115 | 116 | } // namespace IVSparse -------------------------------------------------------------------------------- /inst/include/src/InnerIterators/CSC_Iterator.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file CSC_Iterator.hpp 3 | * @author Skyler Ruiter and Seth Wolfgang 4 | * @brief Inner Iterator for CSC Declerations 5 | * @version 0.1 6 | * @date 2023-07-03 7 | */ 8 | 9 | #pragma once 10 | 11 | namespace IVSparse { 12 | 13 | /** 14 | * CSC Inner Iterator Class \n \n 15 | * The CSC Inner Iterator is a forward traversal iterator like the others in the 16 | * IVSparse library. It's very low overhead and is used to traverse over the 17 | * nonzeros of a single vector of a matrix or a vector on its own. 18 | */ 19 | template 20 | class SparseMatrix::InnerIterator { 21 | private: 22 | //* Private Class Variables *// 23 | 24 | T* val; // Current value 25 | indexT index; // Current index 26 | indexT outer; // Outer dimension 27 | 28 | T* vals; 29 | indexT* indices; 30 | indexT* endPtr; 31 | 32 | public: 33 | //* Constructors & Destructor *// 34 | /** @name Constructors 35 | */ 36 | ///@{ 37 | 38 | /** 39 | * Default Iterator Constructor \n \n 40 | * Creates an empty iterator that can't be used on its own. 41 | */ 42 | InnerIterator() {}; 43 | 44 | /** 45 | * CSC Matrix InnerIterator Constructor \n \n 46 | * The main constructor for the Inner Iterator. Given a matrix the iterator 47 | * will forward traverse over the given vector of the matrix. The traversal 48 | * is sorted by index. 49 | */ 50 | InnerIterator(SparseMatrix& mat, uint32_t vec); 51 | 52 | /** 53 | * CSC Vector InnerIterator Constructor \n \n 54 | * Same as the previous constructor but for a single standalone vector. 55 | * Can be used in the same way as the previous constructor. 56 | */ 57 | InnerIterator(SparseMatrix::Vector& vec); 58 | 59 | ///@} 60 | 61 | //* Getters *// 62 | /** @name Getters 63 | */ 64 | ///@{ 65 | 66 | /** 67 | * @returns The current index of the iterator. 68 | */ 69 | indexT getIndex(); 70 | 71 | /** 72 | * @returns The current outer dimension of the iterator. 73 | */ 74 | indexT outerDim(); 75 | 76 | /** 77 | * @returns The current row of the iterator. 78 | */ 79 | indexT row(); 80 | 81 | /** 82 | * @returns The current column of the iterator. 83 | */ 84 | indexT col(); 85 | 86 | /** 87 | * @returns The current value of the iterator. 88 | */ 89 | T value(); 90 | 91 | /** 92 | * Changes the value where the iterator is pointing. 93 | * 94 | * @note This is the only way to update elements in the IVSparse format. 95 | * 96 | * @warning This method may break things if used without care, IVSparse is not 97 | * meant to update values. 98 | */ 99 | void coeff(T newValue); 100 | 101 | ///@} 102 | 103 | //* Operator Overloads *// 104 | 105 | // Prefix Increment Operator 106 | void __attribute__((hot)) operator++(); 107 | 108 | // Equality Operator 109 | bool operator==(const InnerIterator& other); 110 | 111 | // Inequality Operator 112 | bool operator!=(const InnerIterator& other); 113 | 114 | // Less Than Operator 115 | bool operator<(const InnerIterator& other); 116 | 117 | // Greater Than Operator 118 | bool operator>(const InnerIterator& other); 119 | 120 | // Dereference Operator 121 | T& operator*(); 122 | 123 | // Bool Operator 124 | inline __attribute__((hot)) operator bool() { return indices < endPtr; }; 125 | 126 | }; // class InnerIterator 127 | 128 | } // namespace IVSparse -------------------------------------------------------------------------------- /inst/include/src/InnerIterators/CSC_Iterator_Methods.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file CSC_Iterator_Methods.hpp 3 | * @author Skyler Ruiter and Seth Wolfgang 4 | * @brief Iterator Methods for CSC Sparse Matrices 5 | * @version 0.1 6 | * @date 2023-07-03 7 | */ 8 | 9 | #pragma once 10 | 11 | namespace IVSparse { 12 | 13 | //* Constructors *// 14 | 15 | // CSC Matrix Constructor 16 | template 17 | inline SparseMatrix::InnerIterator::InnerIterator( 18 | SparseMatrix& mat, uint32_t vec) { 19 | 20 | this->outer = vec; 21 | 22 | // check if the vector is empty 23 | if (mat.getOuterPointers()[vec] == mat.getOuterPointers()[vec + 1]) { 24 | vals = nullptr; 25 | indices = nullptr; 26 | endPtr = nullptr; 27 | return; 28 | } 29 | 30 | // set the pointers to the correct locations 31 | vals = &mat.vals[mat.outerPtr[vec]]; 32 | indices = &mat.innerIdx[mat.outerPtr[vec]]; 33 | endPtr = &mat.innerIdx[mat.outerPtr[vec + 1]]; 34 | 35 | // set the values of the iterator 36 | val = vals; 37 | index = indices[0]; 38 | } 39 | 40 | // CSC Vector Constructor 41 | template 42 | SparseMatrix::InnerIterator::InnerIterator( 43 | SparseMatrix::Vector& vec) { 44 | 45 | this->outer = 0; 46 | 47 | // set the pointers to the correct locations 48 | vals = vec.values(); 49 | indices = vec.indexPtr(); 50 | endPtr = vec.indexPtr() + vec.nonZeros(); 51 | 52 | // set the values of the iterator 53 | val = vals; 54 | index = indices[0]; 55 | } 56 | 57 | //* Overloaded Operators *// 58 | 59 | // Increment Operator 60 | template 61 | inline void SparseMatrix::InnerIterator::operator++() { 62 | vals++; 63 | indices++; 64 | 65 | // check if the iterator is at the end of the vector 66 | if (indices == endPtr) { 67 | return; 68 | } 69 | 70 | // set the values of the iterator 71 | val = vals; 72 | index = *indices; 73 | } 74 | 75 | // Equality Operator 76 | template 77 | bool SparseMatrix::InnerIterator::operator==(const InnerIterator& other) { 78 | return (vals == other.vals && indices == other.index); 79 | } 80 | 81 | // Inequality Operator 82 | template 83 | bool SparseMatrix::InnerIterator::operator!=(const InnerIterator& other) { 84 | return (vals != other.vals || indices != other.index); 85 | } 86 | 87 | // Less Than Operator 88 | template 89 | bool SparseMatrix::InnerIterator::operator<(const InnerIterator& other) { 90 | return (vals < other.vals && indices < other.index); 91 | } 92 | 93 | // Greater Than Operator 94 | template 95 | bool SparseMatrix::InnerIterator::operator>(const InnerIterator& other) { 96 | return (vals > other.vals && indices > other.index); 97 | } 98 | 99 | // Dereference Operator 100 | template 101 | T& SparseMatrix::InnerIterator::operator*() { 102 | return val; 103 | } 104 | 105 | //* Getters & Setters *// 106 | 107 | // Get the current index of the iterator 108 | template 109 | indexT SparseMatrix::InnerIterator::getIndex() { 110 | return index; 111 | } 112 | 113 | // Get the current outer dimension of the iterator 114 | template 115 | indexT SparseMatrix::InnerIterator::outerDim() { 116 | return outer; 117 | } 118 | 119 | // Get the current row of the iterator 120 | template 121 | indexT SparseMatrix::InnerIterator::row() { 122 | if (columnMajor) { 123 | return index; 124 | } 125 | else { 126 | return outer; 127 | } 128 | } 129 | 130 | // Get the current column of the iterator 131 | template 132 | indexT SparseMatrix::InnerIterator::col() { 133 | if (columnMajor) { 134 | return outer; 135 | } 136 | else { 137 | return index; 138 | } 139 | } 140 | 141 | // Get the current value of the iterator 142 | template 143 | T SparseMatrix::InnerIterator::value() { 144 | return *val; 145 | } 146 | 147 | // coefficent access method 148 | template 149 | void SparseMatrix::InnerIterator::coeff(T value) { 150 | *val = value; 151 | } 152 | 153 | } // namespace IVSparse -------------------------------------------------------------------------------- /inst/include/src/InnerIterators/IVCSC_Iterator.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file IVCSC_Iterator.hpp 3 | * @author Skyler Ruiter and Seth Wolfgang 4 | * @brief Inner Iterator for IVCSC Declerations 5 | * @version 0.1 6 | * @date 2023-07-03 7 | */ 8 | 9 | #pragma once 10 | 11 | namespace IVSparse { 12 | 13 | /** 14 | * @tparam T The type of the values in the matrix 15 | * @tparam indexT The type of the indices in the matrix 16 | * @tparam compressionLevel The level of compression used in the matrix 17 | * @tparam columnMajor Whether the matrix is column major or not 18 | * 19 | * IVCSC Inner Iterator Class \n \n 20 | * The IVCSC Inner Iterator is a forward traversal iterator like the others in 21 | * the IVSparse library. The IVCSC Iterator is slower than the others due to 22 | * needing to decode compressed data. 23 | */ 24 | template 25 | class SparseMatrix::InnerIterator { 26 | private: 27 | //* Private Class Variables *// 28 | 29 | indexT outer; // Outer dimension 30 | indexT index; // Current index 31 | T* val = nullptr; // Current value 32 | 33 | indexT newIndex = 0; // Next index 34 | 35 | uint8_t indexWidth = 1; // Width of the current run 36 | 37 | void* data; // Pointer to the current data 38 | void* endPtr; // Pointer to the end of the data 39 | 40 | bool firstIndex = true; // Is this the first index of the vector 41 | 42 | //* Private Class Methods *// 43 | 44 | // Decodes the index from the data pointer 45 | void __attribute__((hot)) decodeIndex(); 46 | 47 | public: 48 | //* Constructors & Destructor *// 49 | /** @name Constructors 50 | */ 51 | ///@{ 52 | 53 | /** 54 | * Default Iterator Constructor \n \n 55 | * Creates an empty iterator that can't be used on its own. 56 | */ 57 | InnerIterator() {}; 58 | 59 | /** 60 | * IVCSC Matrix InnerIterator Constructor \n \n 61 | * The main constructor for the Inner Iterator. Given a matrix the iterator 62 | * will forward traverse over the given vector of the matrix. The traversal 63 | * is sorted by value in ascending order. 64 | */ 65 | InnerIterator(SparseMatrix& mat, 66 | uint32_t col); 67 | 68 | /** 69 | * IVCSC Vector InnerIterator Constructor \n \n 70 | * Same as the previous constructor but for a single standalone vector. 71 | * Can be used in the same way as the previous constructor. 72 | */ 73 | InnerIterator( 74 | SparseMatrix::Vector& vec); 75 | 76 | ///@} 77 | 78 | //* Getters *// 79 | /** @name Getters 80 | */ 81 | ///@{ 82 | 83 | /** 84 | * @returns The current index of the iterator. 85 | */ 86 | indexT getIndex(); 87 | 88 | /** 89 | * @returns The current outer dimension of the iterator. 90 | */ 91 | indexT outerDim(); 92 | 93 | /** 94 | * @returns The current row of the iterator. 95 | */ 96 | indexT row(); 97 | 98 | /** 99 | * @returns The current column of the iterator. 100 | */ 101 | indexT col(); 102 | 103 | /** 104 | * @returns The current value of the iterator. 105 | */ 106 | T value(); 107 | 108 | /** 109 | * Changes the value where the iterator is pointing. 110 | * 111 | * @note This is the only way to update elements in the IVSparse format. 112 | */ 113 | void coeff(T newValue); 114 | 115 | /** 116 | * @returns If the iterator is at the beginning of a new run. 117 | */ 118 | bool isNewRun(); 119 | 120 | ///@} 121 | 122 | //* Operator Overloads *// 123 | 124 | // Increment Operator 125 | void __attribute__((hot)) operator++(); 126 | 127 | // Equality Operators 128 | bool operator==(const InnerIterator& other); 129 | 130 | // Inequality Operators 131 | bool operator!=(const InnerIterator& other); 132 | 133 | // Less Than Operator 134 | bool operator<(const InnerIterator& other); 135 | 136 | // Greater Than Operator 137 | bool operator>(const InnerIterator& other); 138 | 139 | // Bool Operator 140 | inline __attribute__((hot)) operator bool() { 141 | return ((char*)endPtr - indexWidth > data); 142 | } 143 | 144 | // Dereference Operator 145 | T& operator*(); 146 | 147 | }; // End of InnerIterator Class 148 | 149 | } // namespace IVSparse 150 | -------------------------------------------------------------------------------- /inst/include/src/InnerIterators/VCSC_Iterator.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file VCSC_Iterator.hpp 3 | * @author Skyler Ruiter and Seth Wolfgang 4 | * @brief Inner Iterator for VCSC Declerations 5 | * @version 0.1 6 | * @date 2023-07-03 7 | */ 8 | 9 | #pragma once 10 | 11 | namespace IVSparse { 12 | 13 | /** 14 | * VCSC Inner Iterator Class \n \n 15 | * The VCSC Inner Iterator is a forward traversal iterator like the others in 16 | * the IVSparse library. It's very low overhead and is used to traverse over the 17 | * nonzeros of a single vector of a matrix or a vector on its own. The VCSC 18 | * Inner Iterator differs from the CSC Iterator in that it travereses a counts 19 | * vector in the VCSC Matrix instead of a outer pointers vector. 20 | */ 21 | template 22 | class SparseMatrix::InnerIterator { 23 | private: 24 | //* Private Class Variables *// 25 | 26 | indexT outer = 0; // Outer dimension 27 | indexT index = 0; // Current index 28 | indexT newIndex = 0; // Next index 29 | T* val = nullptr; // Current value 30 | 31 | T* vals = nullptr; // Pointer to values 32 | indexT* counts = nullptr; // Pointer to counts 33 | indexT* indices = nullptr; // Pointer to indices 34 | 35 | indexT valsSize = 0; // Number of unique values 36 | indexT indexSize = 0; // Number of indices 37 | 38 | indexT count = 0; // Current count 39 | indexT countIndex = 0; // Current count of indices 40 | 41 | //* Private Class Methods *// 42 | 43 | public: 44 | //* Constructors & Destructor *// 45 | /** @name Constructors 46 | */ 47 | ///@{ 48 | 49 | /** 50 | * Default Iterator Constructor \n \n 51 | * Creates an empty iterator that can't be used on its own. 52 | */ 53 | InnerIterator() {}; 54 | 55 | /** 56 | * VCSC Matrix InnerIterator Constructor \n \n 57 | * The main constructor for the Inner Iterator. Given a matrix the iterator 58 | * will forward traverse over the given vector of the matrix. The traversal 59 | * is sorted by value in ascending order. 60 | */ 61 | InnerIterator(SparseMatrix& mat, uint32_t col); 62 | 63 | /** 64 | * VCSC Vector InnerIterator Constructor \n \n 65 | * Same as the previous constructor but for a single standalone vector. 66 | * Can be used in the same way as the previous constructor. 67 | */ 68 | InnerIterator(SparseMatrix::Vector& vec); 69 | 70 | ///@} 71 | 72 | //* Getters *// 73 | /** @name Getters 74 | */ 75 | ///@{ 76 | 77 | /** 78 | * @returns The current index of the iterator. 79 | */ 80 | indexT getIndex(); 81 | 82 | /** 83 | * @returns The current outer dimension of the iterator. 84 | */ 85 | indexT outerDim(); 86 | 87 | /** 88 | * @returns The current row of the iterator. 89 | */ 90 | indexT row(); 91 | 92 | /** 93 | * @returns The current column of the iterator. 94 | */ 95 | indexT col(); 96 | 97 | /** 98 | * @returns The current value of the iterator. 99 | */ 100 | T value(); 101 | 102 | /** 103 | * Changes the value where the iterator is pointing. 104 | * 105 | * @note This is the only way to update elements in the IVSparse format. 106 | */ 107 | void coeff(T newValue); 108 | 109 | /** 110 | * @returns If the iterator is at the beginning of a new run. 111 | */ 112 | 113 | ///@} 114 | 115 | //* Operator Overloads *// 116 | 117 | // Prefix increment operator 118 | void __attribute__((hot)) operator++(); 119 | 120 | // Equality operator 121 | bool operator==(const InnerIterator& other); 122 | 123 | // Inequality operator 124 | bool operator!=(const InnerIterator& other); 125 | 126 | // Less than operator 127 | bool operator<(const InnerIterator& other); 128 | 129 | // Greater than operator 130 | bool operator>(const InnerIterator& other); 131 | 132 | // Boolean operator 133 | inline __attribute__((hot)) operator bool() { return countIndex < indexSize; } 134 | 135 | // Dereference operator 136 | T& operator*(); 137 | 138 | }; // End of VCSC Inner Iterator Class 139 | 140 | } // namespace IVSparse -------------------------------------------------------------------------------- /inst/include/src/Vectors/CSC_Vector.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file CSC_Vector.hpp 3 | * @author Skyler Ruiter and Seth Wolfgang 4 | * @brief CSC Vector Class Declerations 5 | * @version 0.1 6 | * @date 2023-07-03 7 | */ 8 | 9 | #pragma once 10 | 11 | namespace IVSparse { 12 | 13 | /** 14 | * CSC Vector Class \n \n 15 | * The CSC Vector class is a vector class that is used to work with 16 | * CSC matrices. It works with the same logic as the corresponding 17 | * matrix compression level and is useful when working with these matrices. 18 | */ 19 | template 20 | class SparseMatrix::Vector { 21 | private: 22 | //* Private Class Variables *// 23 | 24 | size_t size = 0; // size of the vector in bytes 25 | 26 | T *vals = nullptr; // values of the vector 27 | indexT *innerIdx = nullptr; // inner indices of the vector 28 | 29 | uint32_t length = 0; // length of the vector 30 | uint32_t nnz = 0; // number of non-zero elements in the vector 31 | 32 | //* Private Class Methods *// 33 | 34 | // User checks to confirm a valid vector 35 | void userChecks(); 36 | 37 | // Calculates the size of the vector in bytes 38 | void calculateCompSize(); 39 | 40 | public: 41 | //* Constructors & Destructor *// 42 | /** @name Constructors 43 | */ 44 | ///@{ 45 | 46 | /** 47 | * Default Vector Constructor \n \n 48 | * Creates an empty vector with everything set to null/zero. 49 | */ 50 | Vector(){}; 51 | 52 | /** 53 | * IVSparse Matrix to Vector Constructor \n \n 54 | * Creates a vector from a CSC Matrix at the given vector index. 55 | * 56 | * @note Can only get a vector from a matrix in the storage order of the 57 | * matrix. 58 | */ 59 | Vector(IVSparse::SparseMatrix &mat, uint32_t vec); 60 | 61 | /** 62 | * Deep Copy Vector Constructor \n \n 63 | * Creates a deep copy of the given vector. 64 | */ 65 | Vector(IVSparse::SparseMatrix::Vector &vec); 66 | 67 | /** 68 | * Destroys the vector. 69 | */ 70 | ~Vector(); 71 | 72 | ///@} 73 | 74 | //* Getters *// 75 | /** @name Getters 76 | */ 77 | ///@{ 78 | 79 | /** 80 | * @returns The coefficient at the given index. 81 | */ 82 | T coeff(uint32_t index); 83 | 84 | /** 85 | * @returns The size of the vector in bytes. 86 | */ 87 | size_t byteSize(); 88 | 89 | /** 90 | * @returns The inner size of the vector. 91 | */ 92 | uint32_t innerSize(); 93 | 94 | /** 95 | * @returns The outer size of the vector. 96 | */ 97 | uint32_t outerSize(); 98 | 99 | /** 100 | * @returns The number of non-zero elements in the vector. 101 | */ 102 | uint32_t nonZeros(); 103 | 104 | /** 105 | * @returns The length of the vector. 106 | */ 107 | uint32_t getLength(); 108 | 109 | /** 110 | * @returns A pointer to the values of the vector. 111 | */ 112 | T *getValues() const; 113 | 114 | /** 115 | * @returns A pointer to the inner indices of the vector. 116 | */ 117 | indexT *getInnerIndices() const; 118 | 119 | ///@} 120 | 121 | //* Utility Methods *// 122 | /** @name Utility Methods 123 | */ 124 | ///@{ 125 | 126 | /** 127 | * Prints the vector dense to the console. 128 | */ 129 | void print(); 130 | 131 | ///@} 132 | 133 | //* Operator Overloads *// 134 | 135 | // Coefficient Access Operator 136 | T operator[](uint32_t index); 137 | 138 | // Assignment Operator 139 | typename SparseMatrix::Vector operator=( 140 | typename SparseMatrix::Vector &vec); 141 | 142 | // Equality Operator 143 | bool operator==( 144 | typename SparseMatrix::Vector &vec); 145 | 146 | // Inequality Operator 147 | bool operator!=( 148 | typename SparseMatrix::Vector &vec); 149 | 150 | }; // class Vector 151 | 152 | } // namespace IVSparse -------------------------------------------------------------------------------- /inst/include/src/Vectors/IVCSC_Vector.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file IVCSC_Vector.hpp 3 | * @author Skyler Ruiter and Seth Wolfgang 4 | * @brief IVCSC Vector Class Declerations 5 | * @version 0.1 6 | * @date 2023-07-03 7 | */ 8 | 9 | #pragma once 10 | 11 | namespace IVSparse { 12 | 13 | /** 14 | * @tparam T Type of the values in the matrix 15 | * @tparam indexT Type of the indices in the matrix 16 | * @tparam compressionLevel Compression level of the matrix 17 | * @tparam columnMajor Storage order of the matrix 18 | * 19 | * IVCSC Vector Class \n \n 20 | * The IVCSC Vector class is a vector class that is used to work with 21 | * IVCSC matrices. It works with the same logic as the corresponding 22 | * matrix compression level and is useful when working with these matrices. 23 | */ 24 | template 25 | class SparseMatrix::Vector { 26 | private: 27 | //* Private Class Variables *// 28 | 29 | size_t size = 0; // size of the vector in bytes 30 | 31 | void *data = nullptr; // data of the vector 32 | void *endPtr = nullptr; // pointer to the end of the vector 33 | 34 | uint32_t length = 0; // length of the vector 35 | 36 | uint8_t indexWidth = 1; // width of the indices 37 | 38 | uint32_t nnz = 0; // number of non-zero elements in the vector 39 | 40 | //* Private Class Methods *// 41 | 42 | // User checks to confirm a valid vector 43 | void userChecks(); 44 | 45 | // Calculates the size of the vector in bytes 46 | void calculateCompSize(); 47 | 48 | public: 49 | //* Constructors & Destructor *// 50 | /** @name Constructors 51 | */ 52 | ///@{ 53 | 54 | /** 55 | * Default Vector Constructor \n \n 56 | * Creates an empty vector with everything set to null/zero. 57 | */ 58 | Vector(){}; 59 | 60 | /** 61 | * Length Vector Constructor \n \n 62 | * Creates a vector of the given length with everything set to null/zero. 63 | */ 64 | Vector(uint32_t length); 65 | 66 | /** 67 | * IVSparse Matrix to Vector Constructor \n \n 68 | * Creates a vector from a IVCSC Matrix at the given vector index. 69 | * 70 | * @note Can only get a vector from a matrix in the storage order of the 71 | * matrix. 72 | */ 73 | Vector(IVSparse::SparseMatrix &mat, uint32_t vec); 74 | 75 | /** 76 | * Deep Copy Vector Constructor \n \n 77 | * Creates a deep copy of the given vector. 78 | */ 79 | Vector(IVSparse::SparseMatrix::Vector &vec); 80 | 81 | /** 82 | * Destroys the vector. 83 | */ 84 | ~Vector(); 85 | 86 | ///@} 87 | 88 | //* Getters *// 89 | /** @name Getters 90 | */ 91 | ///@{ 92 | 93 | /** 94 | * @returns The coefficient at the given index. 95 | */ 96 | T coeff(uint32_t index); 97 | 98 | /** 99 | * @returns A pointer to the beginning of the vector. 100 | */ 101 | void *begin(); 102 | 103 | /** 104 | * @returns A pointer to the end of the vector. 105 | */ 106 | void *end(); 107 | 108 | /** 109 | * @returns The size of the vector in bytes. 110 | */ 111 | size_t byteSize(); 112 | 113 | /** 114 | * @returns The inner size of the vector. 115 | */ 116 | uint32_t innerSize(); 117 | 118 | /** 119 | * @returns The outer size of the vector. 120 | */ 121 | uint32_t outerSize(); 122 | 123 | /** 124 | * @returns The number of non-zero elements in the vector. 125 | */ 126 | uint32_t nonZeros(); 127 | 128 | /** 129 | * @returns The length of the vector. 130 | */ 131 | uint32_t getLength(); 132 | 133 | ///@} 134 | 135 | //* Utility Methods *// 136 | /** @name Utility Methods 137 | */ 138 | ///@{ 139 | 140 | /** 141 | * Prints the vector dense to the console. 142 | */ 143 | void print(); 144 | 145 | ///@} 146 | 147 | //* Calculations *// 148 | /** @name Calculation Methods 149 | */ 150 | ///@{ 151 | 152 | /** 153 | * @returns The norm of the vector. 154 | */ 155 | double norm(); 156 | 157 | /** 158 | * @returns The sum of the vector. 159 | */ 160 | T sum(); 161 | 162 | /** 163 | * @returns The dot product of the vector and an Eigen Dense Vector. 164 | */ 165 | double dot(Eigen::Matrix &other); 166 | 167 | /** 168 | * @returns The dot product of the vector and an Eigen Sparse Vector. 169 | */ 170 | double dot(Eigen::SparseVector &other); 171 | 172 | ///@} 173 | 174 | //* Operator Overloads *// 175 | 176 | // In place scalar multiplication 177 | void operator*=(T scalar); 178 | 179 | // scalar multiplication 180 | typename IVSparse::SparseMatrix::Vector operator*(T scalar); 181 | 182 | // equality operator 183 | bool operator==(typename SparseMatrix::Vector &vec); 185 | 186 | // inequality operator 187 | bool operator!=(typename SparseMatrix::Vector &vec); 189 | 190 | // coefficient access 191 | T operator[](uint32_t index); 192 | 193 | // boolean operator 194 | operator bool() { return (char *)endPtr - indexWidth > data; }; 195 | 196 | // assignment operator 197 | typename SparseMatrix::Vector 198 | operator=(typename SparseMatrix::Vector &vec); 199 | 200 | }; // class Vector 201 | 202 | } // namespace IVSparse -------------------------------------------------------------------------------- /inst/include/src/Vectors/VCSC_Vector.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file VCSC_Vector.hpp 3 | * @author Skyler Ruiter and Seth Wolfgang 4 | * @brief VCSC Vector Class Declerations 5 | * @version 0.1 6 | * @date 2023-07-03 7 | */ 8 | 9 | #pragma once 10 | 11 | namespace IVSparse { 12 | 13 | /** 14 | * VCSC Vector Class \n \n 15 | * The VCSC Vector class is a vector class that is used to work with 16 | * VCSC matrices. It works with the same logic as the corresponding 17 | * matrix compression level and is useful when working with these matrices. 18 | */ 19 | template 20 | class SparseMatrix::Vector { 21 | private: 22 | //* Private Class Variables *// 23 | 24 | size_t size = 0; // size of the vector in bytes 25 | 26 | std::map> data; // map of values to indices 27 | 28 | uint32_t length = 0; // length of the vector 29 | 30 | uint8_t indexWidth = 1; // width of the indices 31 | 32 | uint32_t nnz = 0; // number of non-zero elements in the vector 33 | 34 | //* Private Class Methods *// 35 | 36 | // User checks to confirm a valid vector 37 | void userChecks(); 38 | 39 | // Calculates the size of the vector in bytes 40 | void calculateCompSize(); 41 | 42 | public: 43 | //* Constructors & Destructor *// 44 | /** @name Constructors 45 | */ 46 | ///@{ 47 | 48 | /** 49 | * Default Vector Constructor \n \n 50 | * Creates an empty vector with everything set to null/zero. 51 | */ 52 | Vector(){}; 53 | 54 | /** 55 | * IVSparse Matrix to Vector Constructor \n \n 56 | * Creates a vector from a VCSC Matrix at the given vector index. 57 | * 58 | * @note Can only get a vector from a matrix in the storage order of the 59 | * matrix. 60 | */ 61 | Vector(IVSparse::SparseMatrix &mat, uint32_t vec); 62 | 63 | /** 64 | * Deep Copy Vector Constructor \n \n 65 | * Creates a deep copy of the given vector. 66 | */ 67 | Vector(IVSparse::SparseMatrix::Vector &vec); 68 | 69 | /** 70 | * Destroys the vector. 71 | */ 72 | ~Vector(); 73 | 74 | ///@} 75 | 76 | //* Getters *// 77 | /** @name Getters 78 | */ 79 | ///@{ 80 | 81 | /** 82 | * @returns The coefficient at the given index. 83 | */ 84 | T coeff(uint32_t index); 85 | 86 | /** 87 | * @returns The size of the vector in bytes. 88 | */ 89 | size_t byteSize(); 90 | 91 | /** 92 | * @returns The inner size of the vector. 93 | */ 94 | uint32_t innerSize(); 95 | 96 | /** 97 | * @returns The outer size of the vector. 98 | */ 99 | uint32_t outerSize(); 100 | 101 | /** 102 | * @returns The number of non-zero elements in the vector. 103 | */ 104 | uint32_t nonZeros(); 105 | 106 | /** 107 | * @returns The length of the vector. 108 | */ 109 | uint32_t getLength(); 110 | 111 | /** 112 | * @returns A pointer to the values of the vector. 113 | */ 114 | std::vector getValues(); 115 | 116 | /** 117 | * @returns A pointer to the counts of the vector. 118 | */ 119 | std::vector getCounts(); 120 | 121 | /** 122 | * @returns A pointer to the indices of the vector. 123 | */ 124 | std::vector getIndices(); 125 | 126 | /** 127 | * @returns The underlying data map 128 | */ 129 | std::map> getData(); 130 | 131 | /** 132 | * @returns The number of unique values in the vector. 133 | */ 134 | indexT uniqueVals(); 135 | 136 | ///@} 137 | 138 | //* Utility Methods *// 139 | /** @name Utility Methods 140 | */ 141 | ///@{ 142 | 143 | /** 144 | * Prints the vector dense to the console. 145 | */ 146 | void print(); 147 | 148 | ///@} 149 | 150 | //* Calculations *// 151 | /** @name Calculation Methods 152 | */ 153 | ///@{ 154 | 155 | /** 156 | * @returns The norm of the vector. 157 | */ 158 | double norm(); 159 | 160 | /** 161 | * @returns The sum of the vector. 162 | */ 163 | T sum(); 164 | 165 | /** 166 | * @returns The dot product of the vector and an Eigen Dense Vector. 167 | */ 168 | double dot(Eigen::Matrix &other); 169 | 170 | /** 171 | * @returns The dot product of the vector and an Eigen Sparse Vector. 172 | */ 173 | double dot(Eigen::SparseVector &other); 174 | 175 | ///@} 176 | 177 | //* Operator Overloads *// 178 | 179 | // Coefficient Access Operator 180 | T operator[](uint32_t index); 181 | 182 | // Assignment Operator 183 | typename SparseMatrix::Vector operator=( 184 | typename SparseMatrix::Vector &vec); 185 | 186 | // Equality Operators 187 | bool operator==( 188 | typename SparseMatrix::Vector &vec); 189 | 190 | // Inequality Operators 191 | bool operator!=( 192 | typename SparseMatrix::Vector &vec); 193 | 194 | // Scalar Multiplication Operator (In Place) 195 | void operator*=(T scalar); 196 | 197 | // Scalar Multiplication Operator (Copy) 198 | typename IVSparse::SparseMatrix::Vector operator*( 199 | T scalar); 200 | 201 | }; // class Vector 202 | 203 | } // namespace IVSparse -------------------------------------------------------------------------------- /man/AnnotateNMF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AnnotateNMF.R 3 | \name{AnnotateNMF} 4 | \alias{AnnotateNMF} 5 | \alias{AnnotateNMF.DimReduc} 6 | \alias{AnnotateNMF.Seurat} 7 | \alias{AnnotateNMF.nmf} 8 | \title{annotate an NMF model} 9 | \usage{ 10 | AnnotateNMF(object, ...) 11 | 12 | \method{AnnotateNMF}{DimReduc}( 13 | object, 14 | meta.data = NULL, 15 | columns = NULL, 16 | designs = NULL, 17 | center = TRUE, 18 | scale = FALSE, 19 | max.levels = 200, 20 | ... 21 | ) 22 | 23 | \method{AnnotateNMF}{Seurat}(object, columns = NULL, reduction = "nmf", ...) 24 | 25 | \method{AnnotateNMF}{nmf}( 26 | object, 27 | meta.data, 28 | columns = NULL, 29 | designs = NULL, 30 | center = TRUE, 31 | scale = FALSE, 32 | max.levels = 200, 33 | ... 34 | ) 35 | } 36 | \arguments{ 37 | \item{object}{an object suitable for annotation (Seurat, DimReduc, or nmf)} 38 | 39 | \item{...}{not implemented} 40 | 41 | \item{meta.data}{a data.frame, if one is not already part of the object} 42 | 43 | \item{columns}{factor columns of meta.data (see below) to annotate against} 44 | 45 | \item{designs}{named list of design matrices (supersedes meta.data/columns)} 46 | 47 | \item{center}{center the factor matrix for testing? (TRUE)} 48 | 49 | \item{scale}{scale the factor matrix for testing? (FALSE)} 50 | 51 | \item{max.levels}{maximum number of levels a factor may have in order to be included in analysis} 52 | 53 | \item{reduction}{the reductions slot in the Seurat object containing the model to annotate} 54 | } 55 | \description{ 56 | annotate an NMF model 57 | 58 | Annotate NMF model with cell or sample metadata 59 | 60 | Annotate NMF model with cell metadata 61 | } 62 | \details{ 63 | Maps factor information in an RcppML::nmf object against meta.data 64 | } 65 | \examples{ 66 | \dontrun{ 67 | get_pbmc3k_data() \%>\% 68 | NormalizeData() \%>\% 69 | RunNMF() -> pbmc3k 70 | AnnotateNMF(pbmc3k) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /man/AnnotationPlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AnnotationPlot.R 3 | \name{AnnotationPlot} 4 | \alias{AnnotationPlot} 5 | \alias{AnnotationPlot.Seurat} 6 | \alias{AnnotationPlot.DimReduc} 7 | \alias{AnnotationPlot.nmf} 8 | \alias{AnnotationPlot.list} 9 | \alias{AnnotationPlot.data.frame} 10 | \title{Plot annotations from an NMF model or other compatible objects.} 11 | \usage{ 12 | AnnotationPlot(object, ...) 13 | 14 | \method{AnnotationPlot}{Seurat}( 15 | object, 16 | plot.field = NULL, 17 | reduction = "nmf", 18 | dropEmpty = TRUE, 19 | ... 20 | ) 21 | 22 | \method{AnnotationPlot}{DimReduc}(object, plot.field = NULL, dropEmpty = TRUE, ...) 23 | 24 | \method{AnnotationPlot}{nmf}(object, plot.field = NULL, dropEmpty = TRUE, ...) 25 | 26 | \method{AnnotationPlot}{list}(object, plot.field, dropEmpty = TRUE, ...) 27 | 28 | \method{AnnotationPlot}{data.frame}(object, plot.field, dropEmpty = TRUE, ...) 29 | } 30 | \arguments{ 31 | \item{object}{a compatible object (Seurat, DimReduc, nmf, data.frame)} 32 | 33 | \item{...}{additional arguments passed to called functions} 34 | 35 | \item{plot.field}{metadata grouping to plot} 36 | 37 | \item{reduction}{the reduction to plot (default is 'nmf')} 38 | 39 | \item{dropEmpty}{drop factors without significant associations? (TRUE)} 40 | } 41 | \value{ 42 | a ggplot2 object 43 | } 44 | \description{ 45 | After running \code{AnnotateNMF}, this function returns 46 | a dot plot of the results 47 | 48 | After running \code{AnnotateNMF}, this function returns 49 | a dot plot of the results. Right now the code is the same as for DimReduc. 50 | } 51 | \examples{ 52 | \dontrun{ 53 | get_pbmc3k_data() \%>\% NormalizeData \%>\% RunNMF \%>\% AnnotateNMF -> pbmc3k 54 | AnnotationPlot(pbmc3k, "cell_type") 55 | } 56 | \dontrun{ 57 | dat <- pbmc3k@reductions$nmf@misc$annotations$cell_type 58 | AnnotationPlot(dat, "cell_type") 59 | 60 | # if running interactively: 61 | library(plotly) 62 | ggplotly(AnnotationPlot(dat, "cell_type")) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /man/FindLocalNeighbors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FindLocalNeighbors.R 3 | \name{FindLocalNeighbors.Seurat} 4 | \alias{FindLocalNeighbors.Seurat} 5 | \alias{FindLocalNeighbors} 6 | \title{(Shared) Local Nearest-neighbor graph construction} 7 | \usage{ 8 | \method{FindLocalNeighbors}{Seurat}( 9 | object, 10 | k.param = 20, 11 | spatial.radius = 4, 12 | spatial.reduction = "spatial", 13 | reduction = "nmf", 14 | nn.metric = "jaccard", 15 | use.dist = FALSE, 16 | compute.SNN = TRUE, 17 | prune.SNN = 1/15, 18 | prune.KNN = 1/10, 19 | return.dist = FALSE, 20 | verbose = FALSE, 21 | dims = NULL, 22 | graph.name = NULL, 23 | threads = 0, 24 | ... 25 | ) 26 | 27 | FindLocalNeighbors(object, ...) 28 | } 29 | \arguments{ 30 | \item{object}{An object} 31 | 32 | \item{k.param}{Defines k for the k-nearest neighbor algorithm} 33 | 34 | \item{spatial.reduction}{Spatial coordinates to use as input for building the (S)NN. Ensure that radius is given in the same units as spatial coordinates, and that spatial coordinates are fixed on both axes (not scaled).} 35 | 36 | \item{reduction}{Reduction to use as input for building the (S)NN} 37 | 38 | \item{nn.metric}{Distance metric for nearest neighbors search. Options include: jaccard, cosine, euclidean, manhattan, hamming, and kl (kullback-leibler divergence).} 39 | 40 | \item{use.dist}{use distance instead of similarity (i.e. find k-furthest-neighbors). Useful for edge detection. Applies only to \code{metric = c("jaccard", "cosine")}.} 41 | 42 | \item{compute.SNN}{also compute the shared nearest neighbor graph} 43 | 44 | \item{prune.SNN}{Sets the cutoff for acceptable Jaccard index when computing the neighborhood overlap for the SNN construction. Any edges with values less than or equal to this will be set to 0 and removed from the SNN graph. Essentially sets the stringency of pruning (0 = no pruning, 1 = prune everything).} 45 | 46 | \item{prune.KNN}{Sets the cutoff for acceptable distance when computing the neighborhood for the Local KNN graph construction. Any edges with values less than or equal to this will be set to 0 and removed from the KNN graph. Essentially sets the stringency of pruning (0 = no pruning, 1 = prune everything when distance is "jaccard" or "cosine", otherwise whatever the equivalent is in the distance specified).} 47 | 48 | \item{return.dist}{return distances to nearest neighbors rather than a binary result} 49 | 50 | \item{verbose}{print output to the console} 51 | 52 | \item{dims}{Dimensions of the reduction to use as input (\code{NULL} = use all dimensions in reduction)} 53 | 54 | \item{graph.name}{Naming parameter for stored (S)NN graph. Default is \code{_local_(s)nn}. To store both the neighbor graph and the shared nearest neighbor graph, you must supply a vector containing two names to the \code{graph.name} parameter. The first element in the vector will be used to store the nearest neighbor graph, and the second element will be used to store the shared nearest neighbor graph. If only one name is supplied, only the nearest neighbor graph is stored.} 55 | 56 | \item{threads}{number of threads to use for parallelization} 57 | 58 | \item{...}{not implemented} 59 | } 60 | \value{ 61 | an object (Seurat object with graph, or just a graph) 62 | } 63 | \description{ 64 | Computes the \code{k.param} nearest neighbors within a spatial radius for a given dataset. Can also optionally (via \code{compute.SNN}), construct a shared nearest neighbor graph by calculating the neighborhood overlap (Jaccard index) between every cell and it's \code{k.param} nearest neighbors. Local KNN calculations are exact. 65 | } 66 | \details{ 67 | IMPORTANT: You must make sure that your \code{radius} is given in the same units as your 68 | \code{spatial.reduction} coordinates, and that your \code{spatial.reduction} gives fixed 69 | coordinates. This means distance on x-coordinates must be equal to distance on y-coordinates. 70 | Many spatial assays store distances in x and y scaled between 0 and 1, which is NOT going 71 | to work. You must use \code{\link{RescaleSpatial}} to convert back to a fixed coordinate 72 | system. If your radius is 5, this function will look for neighbors within a distance of 5 73 | from a given point as determined by your spatial coordinates. 74 | } 75 | -------------------------------------------------------------------------------- /man/GSEAHeatmap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GSEAHeatmap.R 3 | \name{GSEAHeatmap} 4 | \alias{GSEAHeatmap} 5 | \title{Plot GSEA results on a heatmap} 6 | \usage{ 7 | GSEAHeatmap( 8 | object, 9 | reduction = "nmf", 10 | max.terms.per.factor = 3, 11 | dropcommon = TRUE 12 | ) 13 | } 14 | \arguments{ 15 | \item{object}{Seurat or RcppML::nmf object} 16 | 17 | \item{reduction}{a dimensional reduction for which GSEA analysis has been performed} 18 | 19 | \item{max.terms.per.factor}{show this number of top terms for each factor} 20 | 21 | \item{dropcommon}{drop broadly enriched terms across factors? (TRUE)} 22 | } 23 | \value{ 24 | ggplot2 object 25 | } 26 | \description{ 27 | Plot top GSEA terms for each NMF factor on a heatmap 28 | } 29 | -------------------------------------------------------------------------------- /man/GetBestRank.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GetBestRank.R 3 | \name{GetBestRank} 4 | \alias{GetBestRank} 5 | \title{determine the appropriate rank for an AutoNMF decomposition} 6 | \usage{ 7 | GetBestRank(df, tol.overfit = 1e-04, ...) 8 | } 9 | \arguments{ 10 | \item{df}{a data.frame of output from crossvalidation: rep, rank, error} 11 | 12 | \item{tol.overfit}{tolerance for increase in test set reconstruction error relative to minimum observed value during fitting} 13 | 14 | \item{...}{not implemented} 15 | } 16 | \value{ 17 | the lowest rank that minimizes the reconstruction error 18 | } 19 | \description{ 20 | determine the appropriate rank for an AutoNMF decomposition 21 | } 22 | -------------------------------------------------------------------------------- /man/MetadataSummary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MetadataHeatmap.R, R/MetadataSummary.R, 3 | % R/plot.nmf_metadata_summary.R 4 | \name{MetadataHeatmap} 5 | \alias{MetadataHeatmap} 6 | \alias{MetadataSummary} 7 | \alias{plot.nmf_metadata_summary} 8 | \title{Summarize contribution of sample groups to NMF factors} 9 | \usage{ 10 | MetadataHeatmap(x) 11 | 12 | MetadataSummary(h, factor_data, reorder = TRUE) 13 | 14 | \method{plot}{nmf_metadata_summary}(x, ...) 15 | } 16 | \arguments{ 17 | \item{x}{a data.frame} 18 | 19 | \item{h}{matrix giving factors as rows and samples as columns} 20 | 21 | \item{factor_data}{a factor of the same length as the number of columns in \code{h}} 22 | 23 | \item{reorder}{sort results by proportion in each group (uses \code{hclust} if >2 groups)} 24 | 25 | \item{...}{not implemented} 26 | } 27 | \value{ 28 | \code{data.frame} of mean weights for each sample group within each factor of class \code{nmf_metadata_summary}. Use the \code{plot} method to visualize. 29 | } 30 | \description{ 31 | Calculate the mean weight of samples in discrete and unique groups to each factor 32 | } 33 | -------------------------------------------------------------------------------- /man/PreprocessData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PreprocessData.R 3 | \name{PreprocessData.Seurat} 4 | \alias{PreprocessData.Seurat} 5 | \alias{PreprocessData.Assay} 6 | \alias{PreprocessData.dgCMatrix} 7 | \alias{PreprocessData} 8 | \title{Normalize count data} 9 | \usage{ 10 | \method{PreprocessData}{Seurat}(object, scale.factor = 10000, assay = NULL, ...) 11 | 12 | \method{PreprocessData}{Assay}(object, scale.factor = 10000, ...) 13 | 14 | \method{PreprocessData}{dgCMatrix}(object, scale.factor = 10000, ...) 15 | 16 | PreprocessData(object, scale.factor, ...) 17 | } 18 | \arguments{ 19 | \item{object}{Seurat object} 20 | 21 | \item{scale.factor}{value by which to multiply all columns after unit normalization and before \code{log1p} transformation} 22 | 23 | \item{assay}{assay in which the counts matrix resides} 24 | 25 | \item{...}{arguments to \code{Seurat::LogNormalize}} 26 | } 27 | \description{ 28 | Standard log-normalization equivalent to \code{Seurat::LogNormalize} 29 | } 30 | -------------------------------------------------------------------------------- /man/ProjectData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ProjectData.R 3 | \name{ProjectData.Seurat} 4 | \alias{ProjectData.Seurat} 5 | \alias{ProjectData} 6 | \alias{ProjectData.SingleCellExperiment} 7 | \title{Project data onto a factor model} 8 | \usage{ 9 | \method{ProjectData}{Seurat}( 10 | object, 11 | w, 12 | split.by = NULL, 13 | assay = NULL, 14 | L1 = 0.01, 15 | L2 = 0, 16 | reduction.name = "nmf_projection", 17 | reduction.key = "NNLS_", 18 | threads = 0, 19 | reorder = FALSE, 20 | ... 21 | ) 22 | 23 | \method{ProjectData}{SingleCellExperiment}( 24 | object, 25 | w, 26 | split.by = NULL, 27 | assay = "logcounts", 28 | L1 = 0.01, 29 | L2 = 0, 30 | reduction.name = "NNLS", 31 | reduction.key = "NMF_", 32 | threads = 0, 33 | reorder = FALSE, 34 | ... 35 | ) 36 | 37 | ProjectData(object, ...) 38 | } 39 | \arguments{ 40 | \item{object}{A Seurat or SingleCellExperiment object} 41 | 42 | \item{w}{factor loadings with nrow(w) equal to nrow(object)} 43 | 44 | \item{split.by}{column name in \code{colData} giving a \code{factor} with multiple levels for splitting. Data will be weighted such that each level in the factor contributes equally to the NMF model.} 45 | 46 | \item{assay}{Assay to use, defaults to logcounts} 47 | 48 | \item{L1}{L1/LASSO penalty to increase sparsity of the model} 49 | 50 | \item{L2}{L2/Ridge-like penalty to increase angles between factors} 51 | 52 | \item{reduction.name}{Name to store resulting DimReduc object as ("NMF")} 53 | 54 | \item{reduction.key}{Key for resulting DimReduc ("NMF")} 55 | 56 | \item{threads}{number of threads to use (0 = let OpenMP use all available threads)} 57 | 58 | \item{reorder}{reorder the factors of the projection by d? (FALSE)} 59 | 60 | \item{...}{not implemented} 61 | } 62 | \value{ 63 | Returns a Seurat object with the projection stored in the reductions slot 64 | 65 | a SingleCellExperiment with projection stored in reducedDim(, "NNLS") 66 | } 67 | \description{ 68 | Non-negative Least Squares (NNLS) projection of assay data onto a factor model for transfer learning 69 | 70 | Non-negative Least Squares (NNLS) projection of assay data onto a factor model for transfer learning 71 | } 72 | \details{ 73 | Use \code{set.seed()} to guarantee reproducibility! 74 | 75 | Use \code{set.seed()} to guarantee reproducibility! 76 | } 77 | \seealso{ 78 | \code{\link{RunLNMF}}, \code{\link{MetadataSummary}} 79 | 80 | \code{\link{RunLNMF}}, \code{\link{MetadataSummary}} 81 | } 82 | -------------------------------------------------------------------------------- /man/RankPlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RankPlot.R 3 | \name{RankPlot} 4 | \alias{RankPlot} 5 | \alias{RankPlot.Seurat} 6 | \title{Plot NMF cross-validation results} 7 | \usage{ 8 | RankPlot(object, reduction = "nmf", ...) 9 | 10 | \method{RankPlot}{Seurat}(object, reduction = "nmf", detail.level = 1, ...) 11 | } 12 | \arguments{ 13 | \item{object}{a Seurat object or a \code{data.frame} that is the result of \code{RunNMF}} 14 | 15 | \item{reduction}{the NMF reduction slot name (result of \code{RunNMF} where \code{k} was an array)} 16 | 17 | \item{...}{not implemented} 18 | 19 | \item{detail.level}{of detail to plot, \code{1} for test set reconstruction error at convergence of each factorization, \code{2} for test set reconstruction error at each fitting iteration of each factorization} 20 | } 21 | \value{ 22 | A ggplot2 object 23 | } 24 | \description{ 25 | Given a NMF reduction at multiple ranks, plot rank vs. test set reconstruction error to determine the optimal rank. 26 | 27 | S3 method for Seurat that runs the \code{singlet::RunNMF} function. 28 | } 29 | -------------------------------------------------------------------------------- /man/RasterizeRowwise.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rasterize_rowwise.R 3 | \name{RasterizeRowwise} 4 | \alias{RasterizeRowwise} 5 | \title{Bin together values from every block of \code{n} rows and calculate mean value, with a sparse \code{dgCMatrix} as input and a dense \code{matrix} as output. This technique is useful in some genomics applications.} 6 | \usage{ 7 | RasterizeRowwise(A, n = 10, threads = 0) 8 | } 9 | \arguments{ 10 | \item{A}{matrix to be rasterized} 11 | 12 | \item{n}{row-wise binning size} 13 | 14 | \item{threads}{number of threads to use (0 to let OpenMP decide how many are available and use them all)} 15 | } 16 | \description{ 17 | Bin together values from every block of \code{n} rows and calculate mean value, with a sparse \code{dgCMatrix} as input and a dense \code{matrix} as output. This technique is useful in some genomics applications. 18 | } 19 | -------------------------------------------------------------------------------- /man/RescaleSpatial.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RescaleSpatial.R 3 | \name{RescaleSpatial.Seurat} 4 | \alias{RescaleSpatial.Seurat} 5 | \alias{RescaleSpatial} 6 | \title{Rescale spatial coordinates} 7 | \usage{ 8 | \method{RescaleSpatial}{Seurat}(object, reduction = "spatial") 9 | } 10 | \arguments{ 11 | \item{object}{Seurat object} 12 | 13 | \item{reduction}{the name of the spatial reduction to use} 14 | } 15 | \value{ 16 | Seurat object with rescaled spatial coordinates 17 | } 18 | \description{ 19 | Convert coordinates in the "spatial" reduction to natural numbers rather than values between 0 and 1. This allows for intuitive graph construction based on the radius surrounding any given cell (i.e. a radius of one corresponds to all cells next to the cell of interest) 20 | } 21 | -------------------------------------------------------------------------------- /man/RunGCNMF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RescaleSpatial.R, R/RunGCNMF.R 3 | \name{RunGCNMF} 4 | \alias{RunGCNMF} 5 | \alias{RescaleSpatial} 6 | \alias{RunGCNMF.Seurat} 7 | \title{Run Graph-Convolutional Non-negative Matrix Factorization} 8 | \usage{ 9 | RescaleSpatial(object, ...) 10 | 11 | \method{RunGCNMF}{Seurat}( 12 | object, 13 | graph, 14 | k, 15 | split.by = NULL, 16 | assay = NULL, 17 | tol = 1e-05, 18 | L1 = 0.01, 19 | L2 = 0, 20 | verbose = 2, 21 | reduction.name = "gcnmf", 22 | reduction.key = "GCNMF_", 23 | maxit = 100, 24 | threads = 0, 25 | features = NULL, 26 | ... 27 | ) 28 | 29 | RunGCNMF(object, ...) 30 | } 31 | \arguments{ 32 | \item{object}{A Seurat or SingleCellExperiment object} 33 | 34 | \item{...}{not implemented} 35 | 36 | \item{graph}{A graph to use, either directed or undirected} 37 | 38 | \item{k}{rank of the factorization (no automatic rank determination for GCNMF. Use \code{\link{RunNMF}}). Alternatively, specify an initial \code{w} matrix of dimensions \code{m x k}, where \code{m} is the number of rows in the matrix to be factorized.} 39 | 40 | \item{split.by}{column name in \code{colData} giving a \code{factor} with multiple levels for splitting. Data will be weighted such that each level in the factor contributes equally to the NMF model.} 41 | 42 | \item{assay}{Assay to use, defaults to logcounts} 43 | 44 | \item{tol}{tolerance of the fit (correlation distance of the model across consecutive iterations). Cross-validation fits are 10x coarser than this tolerance.} 45 | 46 | \item{L1}{L1/LASSO penalty to increase sparsity of the model} 47 | 48 | \item{L2}{L2/Ridge-like penalty to increase angles between factors} 49 | 50 | \item{verbose}{print updates to console} 51 | 52 | \item{reduction.name}{Name to store resulting DimReduc object as ("NMF")} 53 | 54 | \item{reduction.key}{Key for resulting DimReduc ("NMF")} 55 | 56 | \item{maxit}{maximum number of fitting iterations} 57 | 58 | \item{threads}{number of threads to use (0 = let OpenMP use all available threads)} 59 | 60 | \item{features}{unused for this method} 61 | } 62 | \value{ 63 | Returns a Seurat object with the GCNMF model stored in the reductions slot 64 | } 65 | \description{ 66 | Run NMF with weighted convolution determined by edges in a graph of dimensions \code{n x n}, where \code{n} is the number of columns in the matrix. 67 | } 68 | \details{ 69 | Use \code{set.seed()} to guarantee reproducibility! 70 | } 71 | \seealso{ 72 | \code{\link{RunNMF}} 73 | } 74 | -------------------------------------------------------------------------------- /man/RunGSEA.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RunGSEA.R 3 | \name{RunGSEA} 4 | \alias{RunGSEA} 5 | \title{Run Gene Set Enrichment Analysis on a Reduction} 6 | \usage{ 7 | RunGSEA( 8 | object, 9 | reduction = "nmf", 10 | species = "Homo sapiens", 11 | category = "C5", 12 | min.size = 10, 13 | max.size = 500, 14 | dims = NULL, 15 | verbose = TRUE, 16 | padj.sig = 0.01, 17 | ... 18 | ) 19 | } 20 | \arguments{ 21 | \item{object}{a Seurat or RcppML::nmf object} 22 | 23 | \item{reduction}{dimensional reduction to use (if Seurat)} 24 | 25 | \item{species}{species for which to load gene sets} 26 | 27 | \item{category}{msigdbr gene set category (i.e. "H", "C5", etc.)} 28 | 29 | \item{min.size}{minimum number of terms in a gene set} 30 | 31 | \item{max.size}{maximum number of terms in a gene set} 32 | 33 | \item{dims}{factors in the reduction to use, default \code{NULL} for all factors} 34 | 35 | \item{verbose}{print progress to console} 36 | 37 | \item{padj.sig}{significance cutoff for BH-adjusted p-values (default 0.01)} 38 | 39 | \item{...}{additional params to pass to msigdbr} 40 | } 41 | \value{ 42 | a Seurat or nmf object, with GSEA information in the misc slot. BH-adj p-values are on a -log10 scale. 43 | } 44 | \description{ 45 | Run GSEA to identify gene sets that are enriched within NMF factors. 46 | } 47 | -------------------------------------------------------------------------------- /man/RunLNMF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GetSharedFactors.R, R/GetUniqueFactors.R, 3 | % R/MetadataPlot.R, R/RunLNMF.R 4 | \name{GetSharedFactors} 5 | \alias{GetSharedFactors} 6 | \alias{GetUniqueFactors} 7 | \alias{MetadataPlot} 8 | \alias{MetadataPlot.Seurat} 9 | \alias{RunLNMF} 10 | \alias{RunLNMF.Seurat} 11 | \title{Run Linked NMF on a Seurat object} 12 | \usage{ 13 | GetSharedFactors(object, split.by, reduction = "lnmf") 14 | 15 | GetUniqueFactors(object, split.by, reduction = "lnmf") 16 | 17 | MetadataPlot(object, ...) 18 | 19 | \method{MetadataPlot}{Seurat}(object, split.by, reduction = "lnmf", ...) 20 | 21 | RunLNMF(object, ...) 22 | 23 | \method{RunLNMF}{Seurat}( 24 | object, 25 | split.by, 26 | reduction.use = "nmf", 27 | reduction.name = "lnmf", 28 | reduction.key = "LNMF_", 29 | verbose = TRUE, 30 | link.cutoff = 0.5, 31 | tol = 1e-05, 32 | maxit = 100, 33 | L1 = 0.01, 34 | L2 = 0, 35 | threads = 0, 36 | ... 37 | ) 38 | } 39 | \arguments{ 40 | \item{object}{A Seurat or SingleCellExperiment object} 41 | 42 | \item{split.by}{column name in \code{@meta.data} giving a \code{Factor} with multiple levels for splitting. Data will be weighted such that each group contributes equally to the LNMF model.} 43 | 44 | \item{reduction}{reduction to use for metadata analysis} 45 | 46 | \item{...}{not implemented} 47 | 48 | \item{reduction.use}{NMF reduction to use for initializing the linked factorization.} 49 | 50 | \item{reduction.name}{name to store resulting DimReduc object as} 51 | 52 | \item{reduction.key}{key for resulting DimReduc} 53 | 54 | \item{verbose}{print fitting progress to console} 55 | 56 | \item{link.cutoff}{if the relative contribution of samples in any given group to a factor falls below \code{link.cutoff}, unlink it from the factor. \code{link.cutoff = 1} means a factor must contribute exactly equally before being unlinked.} 57 | 58 | \item{tol}{tolerance of the fit (correlation distance of the model across consecutive iterations).} 59 | 60 | \item{maxit}{maximum number of fitting iterations} 61 | 62 | \item{L1}{L1/LASSO penalty to increase sparsity of the model} 63 | 64 | \item{L2}{L2/Ridge-like penalty to increase angles between factors} 65 | 66 | \item{threads}{number of threads to use (0 = let OpenMP use all available threads)} 67 | } 68 | \value{ 69 | a Seurat object with the NMF model stored in the reductions slot 70 | } 71 | \description{ 72 | Run a Linked Non-negative Matrix Factorization to separate shared and unique signals for integration or signature extraction. 73 | 74 | S3 method for Seurat that runs the \code{singlet::RunLNMF} function. 75 | } 76 | \details{ 77 | Use \code{set.seed()} to guarantee reproducibility! 78 | } 79 | \seealso{ 80 | \code{\link{RunNMF}}, \code{\link{RankPlot}}, \code{\link{MetadataSummary}} 81 | } 82 | -------------------------------------------------------------------------------- /man/RunNMF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RunNMF.R 3 | \name{RunNMF.Seurat} 4 | \alias{RunNMF.Seurat} 5 | \alias{RunNMF} 6 | \alias{RunNMF.SingleCellExperiment} 7 | \title{Run NMF on a Seurat object} 8 | \usage{ 9 | \method{RunNMF}{Seurat}( 10 | object, 11 | split.by = NULL, 12 | k = NULL, 13 | assay = NULL, 14 | reps = 3, 15 | tol = 1e-05, 16 | L1 = 0.01, 17 | L2 = 0, 18 | verbose = 2, 19 | reduction.name = "nmf", 20 | reduction.key = "NMF_", 21 | maxit = 100, 22 | test.set.density = 0.05, 23 | learning.rate = 0.8, 24 | tol.overfit = 1e-04, 25 | trace.test.mse = 5, 26 | threads = 0, 27 | features = NULL, 28 | ... 29 | ) 30 | 31 | RunNMF(object, ...) 32 | 33 | \method{RunNMF}{SingleCellExperiment}( 34 | object, 35 | split.by = NULL, 36 | k = NULL, 37 | assay = NULL, 38 | reps = 3, 39 | tol = 1e-05, 40 | L1 = 0.01, 41 | L2 = 0, 42 | verbose = 2, 43 | reduction.name = "nmf", 44 | reduction.key = "NMF_", 45 | maxit = 100, 46 | test.set.density = 0.05, 47 | learning.rate = 0.8, 48 | tol.overfit = 1e-04, 49 | trace.test.mse = 5, 50 | threads = 0, 51 | features = NULL, 52 | ... 53 | ) 54 | } 55 | \arguments{ 56 | \item{object}{A Seurat or SingleCellExperiment object} 57 | 58 | \item{split.by}{column name in \code{colData} giving a \code{factor} with multiple levels for splitting. Data will be weighted such that each level in the factor contributes equally to the NMF model.} 59 | 60 | \item{k}{either \code{NULL} for automatic rank determination, a single integer giving the desired rank, or a vector of ranks to use for cross-validation.} 61 | 62 | \item{assay}{Assay to use, defaults to logcounts} 63 | 64 | \item{reps}{number of replicates for cross-validation} 65 | 66 | \item{tol}{tolerance of the fit (correlation distance of the model across consecutive iterations). Cross-validation fits are 10x coarser than this tolerance.} 67 | 68 | \item{L1}{L1/LASSO penalty to increase sparsity of the model} 69 | 70 | \item{L2}{L2/Ridge-like penalty to increase angles between factors} 71 | 72 | \item{verbose}{Level of console output (0/FALSE, 1/TRUE, 2)} 73 | 74 | \item{reduction.name}{Name to store resulting DimReduc object as ("NMF")} 75 | 76 | \item{reduction.key}{Key for resulting DimReduc ("NMF")} 77 | 78 | \item{maxit}{maximum number of fitting iterations} 79 | 80 | \item{test.set.density}{approximate density of the test set (default 0.05)} 81 | 82 | \item{learning.rate}{exponent on step size for automatic rank determination} 83 | 84 | \item{tol.overfit}{tolerance for increase in test set reconstruction error relative to minimum observed value during fitting} 85 | 86 | \item{trace.test.mse}{during automatic rank determination, calculate test set reconstruction error every trace iterations} 87 | 88 | \item{threads}{number of threads to use (0 = let OpenMP use all available threads)} 89 | 90 | \item{features}{unused for this method} 91 | 92 | \item{...}{not implemented} 93 | } 94 | \value{ 95 | Returns a Seurat object with the NMF model stored in the reductions slot 96 | 97 | Returns an SCE with the NMF model stored in reducedDim 98 | } 99 | \description{ 100 | Run Non-negative Matrix Factorization with rank determined by CV 101 | 102 | Run Non-negative Matrix Factorization with rank determined by CV 103 | } 104 | \details{ 105 | Use \code{set.seed()} to guarantee reproducibility! 106 | 107 | Use \code{set.seed()} to guarantee reproducibility! 108 | } 109 | \examples{ 110 | \dontrun{ 111 | get_pbmc3k_data() \%>\% 112 | NormalizeData() \%>\% 113 | RunNMF() -> pbmc3k 114 | } 115 | } 116 | \seealso{ 117 | \code{\link{RunLNMF}}, \code{\link{RankPlot}}, \code{\link{MetadataSummary}} 118 | } 119 | -------------------------------------------------------------------------------- /man/ard_nmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ard_nmf.R 3 | \name{ard_nmf} 4 | \alias{ard_nmf} 5 | \title{Automatic Rank Determination NMF} 6 | \usage{ 7 | ard_nmf( 8 | A, 9 | k_init = 2, 10 | k_max = 100, 11 | k_min = 2, 12 | n_replicates = 1, 13 | tol = 1e-05, 14 | cv_tol = 1e-04, 15 | maxit = 100, 16 | verbose = 1, 17 | L1 = 0.01, 18 | L2 = 0, 19 | threads = 0, 20 | test_density = 0.05, 21 | learning_rate = 1, 22 | tol_overfit = 0.001, 23 | trace_test_mse = 1 24 | ) 25 | } 26 | \arguments{ 27 | \item{A}{sparse matrix giving normalized counts for genes x cells (rows x columns), or a list of sparse matrices with equal number of rows and identical rownames} 28 | 29 | \item{k_init}{initial rank at which to begin search for local minimum. \code{k_init = 2} is a reasonable default, higher values can lead to swift convergence to a local minmum.} 30 | 31 | \item{k_max}{maximum rank to consider during automatic rank determination} 32 | 33 | \item{k_min}{minimum rank to consider during automatic rank determination (cannot be less than 2)} 34 | 35 | \item{n_replicates}{number of random test sets} 36 | 37 | \item{tol}{tolerance of the final fit} 38 | 39 | \item{cv_tol}{tolerance for cross-validation} 40 | 41 | \item{maxit}{maximum number of iterations} 42 | 43 | \item{verbose}{no output (0/FALSE), rank-level output (1/TRUE) and step size info (2) and individual model fitting updates (3)} 44 | 45 | \item{L1}{L1/LASSO penalty to increase sparsity of model} 46 | 47 | \item{L2}{L2/Ridge penalty to increase angles between factors} 48 | 49 | \item{threads}{number of threads for parallelization across CPUs, 0 = use all available threads} 50 | 51 | \item{test_density}{fraction of values to include in the test set} 52 | 53 | \item{learning_rate}{exponent on step size for automatic rank determination} 54 | 55 | \item{tol_overfit}{stopping criterion, maximum increase in test set reconstruction error at any iteration compared to test set reconstruction error at \code{trace_test_mse}} 56 | 57 | \item{trace_test_mse}{first iteration at which to calculate test set reconstruction error, and the error to compare all later iterations to when determining whether overfitting has occurred.} 58 | } 59 | \description{ 60 | ARD NMF quickly finds the optimal rank for an NMF model using an exponentially variable learning rate and basic coordinate descent. 61 | } 62 | \details{ 63 | If running ard_nmf() standalone, the following coercion can be useful: 64 | 65 | res <- ard_nmf(data_matrix, ...) 66 | plot(res$cv_data) # rank finding 67 | nmfres <- as(res, "nmf") # other 68 | 69 | This coercion allows AnnotateNMF, AnnotationPlot, etc. to work on `nmfres` 70 | directly, rather than assuming a Seurat-like class structure is present. 71 | The coercion simply checks the dimensions of res$w, res$d, and res$h, 72 | then shoves all other list elements from res into nmfres@misc. 73 | } 74 | -------------------------------------------------------------------------------- /man/cellxgene_pipeline.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cellxgene_pipeline.R 3 | \name{cellxgene_pipeline} 4 | \alias{cellxgene_pipeline} 5 | \title{Learn an NMF model from a cellxgene Seurat object} 6 | \usage{ 7 | cellxgene_pipeline(filename, reps = 1, verbose = 3, L1 = 0.05, ...) 8 | } 9 | \arguments{ 10 | \item{...}{arguments to \code{RunNMF}} 11 | 12 | \item{url}{download url for a Seurat v4 object} 13 | } 14 | \description{ 15 | Provide a link to download a cellxgene Seurat object, and this pipeline will return a standardized annotated NMF object at the optimal rank 16 | } 17 | \details{ 18 | This pipeline runs the following steps: 19 | \enumerate{ 20 | \item Download a Seurat v4 object from the provided URL 21 | \item Preprocess the data and run NMF using parameters specified in the \code{...} argument 22 | \item Annotate the NMF model against existing multi-level factor information 23 | \item Extract the model and annotations and save to an RDS file 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /man/checkColumns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/checkColumns.R 3 | \name{checkColumns} 4 | \alias{checkColumns} 5 | \title{verify that columns for auto-annotation are factors with > 1 level} 6 | \usage{ 7 | checkColumns(meta.data, columns = NULL, max.levels = 200) 8 | } 9 | \arguments{ 10 | \item{meta.data}{the meta.data (or a Seurat object if needs be)} 11 | 12 | \item{columns}{the columns (optional; if NULL, will check all columns)} 13 | 14 | \item{max.levels}{maximum number of levels permitted for a factor to be kept} 15 | } 16 | \value{ 17 | a vector of suitable columns (may be length 0) 18 | } 19 | \description{ 20 | verify that columns for auto-annotation are factors with > 1 level 21 | } 22 | -------------------------------------------------------------------------------- /man/checkDesigns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/checkDesigns.R 3 | \name{checkDesigns} 4 | \alias{checkDesigns} 5 | \title{verify that a list of matrices is in fact a named list of model matrices} 6 | \usage{ 7 | checkDesigns(designs) 8 | } 9 | \arguments{ 10 | \item{designs}{an alleged list of model matrices} 11 | } 12 | \value{ 13 | the list of model matrices, assuming it passes 14 | } 15 | \description{ 16 | verify that a list of matrices is in fact a named list of model matrices 17 | } 18 | \details{ 19 | this function will squawk and stop if the list is no good 20 | } 21 | -------------------------------------------------------------------------------- /man/cross_validate_nmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cross_validate_nmf.R, 3 | % R/plot.cross_validate_nmf_data.R 4 | \name{cross_validate_nmf} 5 | \alias{cross_validate_nmf} 6 | \alias{plot.cross_validate_nmf_data} 7 | \title{Determine best rank for NMF using cross-validation} 8 | \usage{ 9 | cross_validate_nmf( 10 | A, 11 | ranks, 12 | n_replicates = 3, 13 | tol = 1e-04, 14 | maxit = 100, 15 | verbose = 1, 16 | L1 = 0.01, 17 | L2 = 0, 18 | threads = 0, 19 | test_density = 0.05, 20 | tol_overfit = 1e-04, 21 | trace_test_mse = 5 22 | ) 23 | 24 | \method{plot}{cross_validate_nmf_data}(x, detail = 2, tol.overfit = 1e-04, ...) 25 | } 26 | \arguments{ 27 | \item{A}{sparse matrix giving normalized counts for genes x cells (rows x columns), or a list of sparse matrices with equal number of rows and identical rownames} 28 | 29 | \item{ranks}{a vector of ranks at which to fit a model and compute test set reconstruction error} 30 | 31 | \item{n_replicates}{number of random test sets} 32 | 33 | \item{tol}{tolerance of the fit (1e-5 for publication quality, 1e-4 for cross-validation)} 34 | 35 | \item{maxit}{maximum number of iterations} 36 | 37 | \item{verbose}{verbosity level} 38 | 39 | \item{L1}{L1/LASSO penalty to increase sparsity of model} 40 | 41 | \item{L2}{L2/Ridge penalty to increase angles between factors} 42 | 43 | \item{threads}{number of threads for parallelization across CPUs, 0 = use all available threads} 44 | 45 | \item{test_density}{fraction of values to include in the test set} 46 | 47 | \item{tol_overfit}{stopping criterion, maximum increase in test set reconstruction error at any iteration compared to test set reconstruction error at \code{trace_test_mse}} 48 | 49 | \item{trace_test_mse}{first iteration at which to calculate test set reconstruction error, and the error to compare all later iterations to when determining whether overfitting has occurred.} 50 | 51 | \item{x}{the result of \code{cross_validate_nmf} (a data.frame)} 52 | 53 | \item{detail}{level of detail to plot} 54 | 55 | \item{...}{additional arguments (not implemented)} 56 | } 57 | \value{ 58 | a \code{data.frame} of test set reconstruction error vs. rank of class \code{nmf_cross_validate_data}. Use \code{plot} method to visualize or \code{min} to compute optimal rank. 59 | } 60 | \description{ 61 | Find the rank that minimizes the mean squared error of test set reconstruction using cross-validation. 62 | } 63 | -------------------------------------------------------------------------------- /man/getDesigns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getDesigns.R 3 | \name{getDesigns} 4 | \alias{getDesigns} 5 | \title{Refactored out from AnnotateNMF to ease argument handling} 6 | \usage{ 7 | getDesigns(columns = NULL, meta.data = NULL, designs = NULL, max.levels = 200) 8 | } 9 | \arguments{ 10 | \item{columns}{factor columns of meta.data, optional if !is.null(designs)} 11 | 12 | \item{meta.data}{a data.frame of annotations, optional if !is.null(designs)} 13 | 14 | \item{designs}{named list of design matrices (supersedes meta.data/columns)} 15 | 16 | \item{max.levels}{maximum number of levels permitted for a factor to be kept} 17 | } 18 | \value{ 19 | a named list of design matrices, if one was not provided 20 | } 21 | \description{ 22 | Refactored out from AnnotateNMF to ease argument handling 23 | } 24 | -------------------------------------------------------------------------------- /man/getModelFit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getModelFit.R 3 | \name{getModelFit} 4 | \alias{getModelFit} 5 | \title{get linear all-pairs comparisons fits for a design matrix and data matrix} 6 | \usage{ 7 | getModelFit(design, object, center = TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{design}{a model.matrix (or a sparse.model.matrix, perhaps)} 11 | 12 | \item{object}{a data.matrix, Seurat DimReduc, or RcppML nmf object} 13 | 14 | \item{center}{center the factor matrix for testing? (TRUE)} 15 | 16 | \item{...}{additional arguments, passed to base::scale} 17 | } 18 | \description{ 19 | Continuing along with the theme of "stupid limma tricks", this function 20 | fits and shrinks a means model for a factor. The proportion of factors 21 | assumed to have a fold-change > 0 is 1%, and a robust fit is applied. 22 | } 23 | \examples{ 24 | if (FALSE) { 25 | get_pbmc3k_data() \%>\% NormalizeData() -> pbmc3k 26 | design <- model.matrix(~ 0 + cell_type, data=pbmc3k@meta.data) 27 | fit <- getModelFit(design, pbmc3k) # toy fit on lognormcounts 28 | # Subsetting data to non-NA observations to match design matrix. 29 | limma::topTable(fit) 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /man/getModelMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getModelMatrix.R 3 | \name{getModelMatrix} 4 | \alias{getModelMatrix} 5 | \title{automatically generate a means model (one-vs-all group associations)} 6 | \usage{ 7 | getModelMatrix(field, meta.data = NULL, sparse = FALSE, ova = TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{field}{the name of a column in the data.frame, or the column} 11 | 12 | \item{meta.data}{a data.frame with one or more factor columns, or NULL} 13 | 14 | \item{sparse}{fit a sparse model.matrix? (FALSE)} 15 | 16 | \item{ova}{fit a One-Vs-All model matrix (no referent)? (TRUE)} 17 | 18 | \item{...}{any additional params to pass to model.matrix} 19 | } 20 | \value{ 21 | a model.matrix or sparse.model.matrix (if sparse==TRUE) 22 | } 23 | \description{ 24 | A little-known trick in limma is to fit ~ 0 + group for a means model. 25 | This function automates that for a data.frame and a factor column of it. 26 | } 27 | \details{ 28 | If a factor (and no meta.data) is supplied (usually by with(meta.data, ...)), 29 | getModelMatrix will attempt to figure out the text to remove from the matrix 30 | column names by using deparse() and match.call() on the arguments (voodoo!). 31 | In order to fit one-vs-all comparisons, a means model is the default. If you 32 | have a referent group (e.g. normal bone marrow vs. a bunch of leukemia cells) 33 | or simply don't want a means model, set `ova` (one vs all) to FALSE. 34 | } 35 | \examples{ 36 | 37 | covs <- get_pbmc3k_data()@meta.data 38 | design <- getModelMatrix("cell_type", covs) 39 | head(design) 40 | sparsedesign <- getModelMatrix("cell_type", covs, sparse=TRUE) 41 | head(sparsedesign) 42 | 43 | if (FALSE) { 44 | # test Seurat and SCE support too 45 | mm1 <- getModelMatrix("cell_type", pbmc3k) 46 | mm2 <- getModelMatrix("cell_type", pbmc) 47 | identical(mm1, mm2) 48 | # [1] TRUE 49 | fit1 <- getModelFit(mm2, pbmc3k) 50 | fit2 <- getModelFit(mm1, pbmc) 51 | identical(fit1, fit2) 52 | # [1] TRUE 53 | limma::topTable(fit1) 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /man/getModelResults.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getModelResults.R 3 | \name{getModelResults} 4 | \alias{getModelResults} 5 | \title{extract data.frame of lods and pvalues for differential factor representation} 6 | \usage{ 7 | getModelResults(fit, noneg = TRUE, noint = TRUE) 8 | } 9 | \arguments{ 10 | \item{fit}{an lmFit result from limma, shrunken with eBayes()} 11 | 12 | \item{noneg}{drop results with negative lods scores? (TRUE)} 13 | 14 | \item{noint}{drop any results for '(Intercept)'? (TRUE)} 15 | } 16 | \value{ 17 | a data.frame with columns 'factor', 'group', 'fc', and 'p' 18 | } 19 | \description{ 20 | log-odds of non-null differences for a response by a factor are in fit$lods 21 | (which will usually be a matrix), and one-sided p-values for the moderated t 22 | test are computed from fit$t and fit$df.total using pt(t, df, lower=FALSE), 23 | then adjusted using the step-up procedure of Benjamini & Hochberg. 24 | } 25 | \details{ 26 | If an (Intercept) term is found, it will be dropped, and if 27 | negative LODS scores are encountered, they will be dropped, 28 | unless `noneg` and/or `noint` are FALSE. 29 | } 30 | -------------------------------------------------------------------------------- /man/get_pbmc3k_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_pbmc3k_data.R 3 | \name{get_pbmc3k_data} 4 | \alias{get_pbmc3k_data} 5 | \title{Load the pbmc3k dataset} 6 | \usage{ 7 | get_pbmc3k_data() 8 | } 9 | \value{ 10 | Seurat object with \code{$cell_type} info in the \code{meta.data} slot. 11 | } 12 | \description{ 13 | This dataset is adapted directly from the Satija lab "pbmc3k" dataset used in their popular tutorial on guided clustering. It is provided in this package for convenience since "SeuratData" is not available on CRAN. 14 | 15 | For more information, please see their documentation. 16 | } 17 | \details{ 18 | 2,700 peripheral blood mononuclear cells (PBMC) from 10x genomics taken from the "SeuratData" package 19 | } 20 | -------------------------------------------------------------------------------- /man/pbmc3k.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pbmc3k.R 3 | \docType{data} 4 | \name{pbmc3k} 5 | \alias{pbmc3k} 6 | \title{Compressed form of pbmc3k dataset} 7 | \format{ 8 | compressed version of the \code{dgCMatrix}, use \code{\link{get_pbmc3k_data}} to use this dataset. 9 | } 10 | \usage{ 11 | data(pbmc3k) 12 | } 13 | \description{ 14 | See \code{\link{get_pbmc3k_data}} 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/plotFactorWeights.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotFactorWeights.R 3 | \name{plotFactorWeights} 4 | \alias{plotFactorWeights} 5 | \title{convenience function to map one or more factors along a genome using igvR} 6 | \usage{ 7 | plotFactorWeights(object, gr, factors = 1:3, plot = FALSE) 8 | } 9 | \arguments{ 10 | \item{object}{an nmf object or something with a @w weights matrix} 11 | 12 | \item{gr}{a GRanges object with coordinates for the features} 13 | 14 | \item{factors}{which factors to plot weights for (default: 1, 2, 3)} 15 | 16 | \item{plot}{use igvR to plot the factors? (TRUE, if igvR detected)} 17 | } 18 | \value{ 19 | the GRanges gr, but with factor weights added as mcols 20 | } 21 | \description{ 22 | convenience function to map one or more factors along a genome using igvR 23 | } 24 | \details{ 25 | This function presumes a GRanges object will be supplied, which in turn 26 | presumes that the GenomicRanges package is installed from Bioconductor. 27 | Further, if plot == TRUE, the igvR package is presumed to be installed. 28 | If either of these presumptions are false, or if factor weights cannot 29 | be mapped to identifiers in the GRanges, this function will fail. 30 | } 31 | -------------------------------------------------------------------------------- /man/project_model.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ProjectData.R 3 | \name{project_model} 4 | \alias{project_model} 5 | \title{Project a factor model} 6 | \usage{ 7 | project_model(A, w, L1 = 0.01, L2 = 0, threads = 0) 8 | } 9 | \arguments{ 10 | \item{A}{sparse matrix giving normalized counts for genes x cells (rows x columns), or a list of sparse matrices with equal number of rows and identical rownames} 11 | 12 | \item{w}{matrix giving the factor model, of dimensions \code{nrow(A) x k}} 13 | 14 | \item{L1}{L1/LASSO penalty to increase sparsity of model} 15 | 16 | \item{L2}{L2/Ridge penalty to increase angles between factors} 17 | 18 | \item{threads}{number of threads for parallelization across CPUs, 0 = use all available threads} 19 | } 20 | \value{ 21 | list of \code{h} and \code{d}, where \code{d} gives the relative contribution of each factor in \code{h} to the model 22 | } 23 | \description{ 24 | Project a dataset onto a factor model for transfer learning 25 | } 26 | -------------------------------------------------------------------------------- /man/run_linked_nmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RunLNMF.R 3 | \name{run_linked_nmf} 4 | \alias{run_linked_nmf} 5 | \title{Run Linked Non-negative Matrix Factorization} 6 | \usage{ 7 | run_linked_nmf( 8 | A, 9 | w, 10 | link_h = NULL, 11 | link_w = NULL, 12 | tol = 1e-04, 13 | maxit = 100, 14 | verbose = TRUE, 15 | L1 = 0.01, 16 | L2 = 0, 17 | threads = 0 18 | ) 19 | } 20 | \arguments{ 21 | \item{A}{sparse matrix giving normalized counts for genes x cells (rows x columns), or a list of sparse matrices with equal number of rows and identical rownames} 22 | 23 | \item{w}{initial matrix for 'w', usually taken from the result of \code{run_nmf}, of dimensions \code{nrow(A) x rank}.} 24 | 25 | \item{link_h}{matrix giving the linkage weight (usually in the range \code{(0, 1)}) of dimensions \code{rank x ncol(A)}.} 26 | 27 | \item{link_w}{matrix giving the linkage weight of dimensions \code{nrow(A) x rank}.} 28 | 29 | \item{tol}{tolerance of the fit (1e-5 for publication quality, 1e-4 for cross-validation)} 30 | 31 | \item{maxit}{maximum number of iterations} 32 | 33 | \item{verbose}{verbosity level} 34 | 35 | \item{L1}{L1/LASSO penalty to increase sparsity of model} 36 | 37 | \item{L2}{L2/Ridge penalty to increase angles between factors} 38 | 39 | \item{threads}{number of threads for parallelization across CPUs, 0 = use all available threads} 40 | } 41 | \description{ 42 | Run LNMF, initialized from any NMF model, where factors may be "linked" to certain samples. 43 | } 44 | -------------------------------------------------------------------------------- /man/run_nmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run_nmf.R 3 | \name{run_nmf} 4 | \alias{run_nmf} 5 | \title{Run Non-negative Matrix Factorization} 6 | \usage{ 7 | run_nmf( 8 | A, 9 | rank, 10 | tol = 1e-04, 11 | maxit = 100, 12 | verbose = TRUE, 13 | L1 = 0.01, 14 | L2 = 0, 15 | threads = 0, 16 | compression_level = 3 17 | ) 18 | } 19 | \arguments{ 20 | \item{A}{sparse matrix giving normalized counts for genes x cells (rows x columns), or a list of sparse matrices with equal number of rows and identical rownames} 21 | 22 | \item{rank}{factorization rank} 23 | 24 | \item{tol}{tolerance of the fit (1e-5 for publication quality, 1e-4 for cross-validation)} 25 | 26 | \item{maxit}{maximum number of iterations} 27 | 28 | \item{verbose}{verbosity level} 29 | 30 | \item{L1}{L1/LASSO penalty to increase sparsity of model} 31 | 32 | \item{L2}{L2/Ridge penalty to increase angles between factors} 33 | 34 | \item{threads}{number of threads for parallelization across CPUs, 0 = use all available threads} 35 | 36 | \item{compression_level}{either 2 or 3, for VCSC or IVCSC, respectively. For development purposes.} 37 | } 38 | \description{ 39 | Run NMF on a sparse matrix with automatic rank determination by cross-validation 40 | } 41 | -------------------------------------------------------------------------------- /man/singlet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/singlet.R 3 | \docType{package} 4 | \name{singlet} 5 | \alias{singlet} 6 | \alias{singlet-package} 7 | \title{Singlet} 8 | \description{ 9 | Fast single-cell analysis with non-negative dimensional reductions 10 | } 11 | \details{ 12 | There are reasons to not use PCA. 13 | \itemize{ 14 | \item PCA fits to missing signal, 15 | \item considers only highly variable features, 16 | \item is almost useless without further graph-based analysis, 17 | \item requires centering and scaling of your data, 18 | \item and is robust only within experiments. 19 | } 20 | 21 | Instead, you should use Non-negative Matrix Factorization (NMF). 22 | \itemize{ 23 | \item NMF imputes missing signal, 24 | \item learns models using all features, 25 | \item does everything PCA does and provides useful information itself, 26 | \item requires only variance stabilization, 27 | \item and is robust across experiments. 28 | } 29 | 30 | Singlet is all about extremely fast NMF for single-cell dimensional reduction and integration. 31 | 32 | See the vignettes to get started. 33 | } 34 | \seealso{ 35 | Useful links: 36 | \itemize{ 37 | \item \url{https://github.com/zdebruine/singlet} 38 | \item Report bugs at \url{https://github.com/zdebruine/singlet/issues} 39 | } 40 | 41 | } 42 | \author{ 43 | Zach DeBruine 44 | } 45 | -------------------------------------------------------------------------------- /man/write_IVCSC.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{write_IVCSC} 4 | \alias{write_IVCSC} 5 | \title{Write an IVCSC matrix} 6 | \usage{ 7 | write_IVCSC(L, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{L}{input dgCMatrix list} 11 | 12 | \item{verbose}{print outputs} 13 | } 14 | \description{ 15 | Write an IVCSC matrix 16 | } 17 | -------------------------------------------------------------------------------- /singlet.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | PKG_CPPFLAGS = -I../inst/include/ 2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 3 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DEIGEN_INITIALIZE_MATRICES_BY_ZERO -DEIGEN_NO_DEBUG 4 | CXX_STD = CXX11 -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | PKG_CPPFLAGS = -I../inst/include/ 2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 3 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DEIGEN_INITIALIZE_MATRICES_BY_ZERO -DEIGEN_NO_DEBUG 4 | CXX_STD = CXX11 -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | test_check("singlet") 3 | -------------------------------------------------------------------------------- /tests/testthat/helper.R: -------------------------------------------------------------------------------- 1 | Sys.setlocale("LC_COLLATE", "C") ## What CRAN does; affects sort order 2 | set.seed(999) ## To ensure that tests that involve randomness are reproducible 3 | options(warn=1) 4 | -------------------------------------------------------------------------------- /tests/testthat/test-pbmc3k.R: -------------------------------------------------------------------------------- 1 | test_that("Testing pbmc3k data set", 2 | { 3 | data("pbmc3k", package="singlet") 4 | all(c("i", "p", "Dim", "Dimnames", "x", "cell_type") 5 | %in% names(pbmc3k)) 6 | expect_true(TRUE) 7 | }) 8 | -------------------------------------------------------------------------------- /vignettes/Batch_Integration_with_Linked_NMF.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Batch Integration with Linked NMF" 3 | author: "Zach DeBruine" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Batch Integration with Linked NMF} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r setup, include=FALSE} 13 | knitr::opts_chunk$set(echo = TRUE) 14 | ``` 15 | 16 | ## Get Started 17 | 18 | Use the Seurat `ifnb` dataset: 19 | 20 | ```{R, warning = FALSE, message = FALSE, get-started} 21 | library(Seurat) 22 | library(ggplot2) 23 | library(singlet) 24 | library(SeuratData) 25 | library(cowplot) 26 | data(ifnb) 27 | ifnb <- NormalizeData(ifnb, verbose = FALSE) 28 | ``` 29 | 30 | ## Determine NMF Rank 31 | 32 | First we determine number of factors using cross-validation. 33 | 34 | Note the use of `split.by = "stim"`, where we are indicating that the "stim" field in the `meta.data` slot of our Seurat object is a factor giving discrete groupings of samples (either stimulated or unstimulated). The `RunNMF` function will weight samples from both groups equally in the NMF objective, regardless of whether the groups are of equal size. 35 | 36 | ```{R, message = FALSE, warning = FALSE, results = 'hide', run-nmf} 37 | set.seed(123) 38 | ifnb <- RunNMF(ifnb, split.by = "stim") 39 | ``` 40 | 41 | ```{R, fig.width = 4, fig.height = 4} 42 | RankPlot(ifnb) + scale_y_continuous(limits = c(1, 1.1)) 43 | ``` 44 | 45 | Visualize contribution of groups to both factors: 46 | 47 | ```{R, fig.width = 6, plot-metadata} 48 | MetadataPlot(ifnb, split.by = "stim", reduction = "nmf") 49 | ``` 50 | 51 | Some factors are almost exclusively explaining signal from one dataset, and not the other. 52 | 53 | ## Run Linked NMF 54 | 55 | Linked NMF will uncouple sample groups from factors in which they are only weakly represented. LNMF is initialized with the joint NMF model that we trained before, we just specify a cutoff for the minimum fractional representation of any sample group in any given factor at which it will be uncoupled from the factor. 56 | 57 | ```{R, message = FALSE, warning = FALSE, run-lnmf} 58 | ifnb <- RunLNMF( 59 | ifnb, 60 | split.by = "stim", 61 | reduction.use = "nmf", 62 | link.cutoff = 0.7, 63 | verbose = FALSE) 64 | ``` 65 | 66 | LNMF creates a new reduction in the Seurat object, `lnmf`. Now examine how each group is represented in NMF factors: 67 | 68 | ```{R, fig.width = 6, plot-lnmf-metadata} 69 | MetadataPlot(ifnb, split.by = "stim", reduction = "lnmf") 70 | ``` 71 | 72 | We can visualize these models on UMAP coordinates using the joint model, the entire linked NMF model, and the linked NMF model using only shared factors: 73 | 74 | ```{R, message = FALSE, warning = FALSE, results = 'hide', run-umap} 75 | ifnb <- RunUMAP(ifnb, 76 | reduction = "nmf", 77 | dims = 1:ncol(ifnb@reductions$nmf), 78 | reduction.name = "jnmf_all", 79 | verbose = FALSE) 80 | 81 | ifnb <- RunUMAP(ifnb, 82 | reduction = "lnmf", 83 | dims = GetSharedFactors(ifnb, split.by = "stim"), 84 | reduction.name = "lnmf_shared", 85 | verbose = FALSE) 86 | 87 | p_jnmf_umap <- DimPlot(ifnb, reduction = "jnmf_all", group.by = "stim") 88 | p_lnmf_umap <- DimPlot(ifnb, reduction = "lnmf_shared", group.by = "stim") 89 | ``` 90 | 91 | ## Visualize 92 | 93 | Plot the results: 94 | 95 | ```{R, fig.width = 8, fig.height = 4, plot-umap} 96 | plot_grid( 97 | p_jnmf_umap + 98 | ggtitle("joint NMF") + 99 | theme(legend.position = "none"), 100 | p_lnmf_umap + 101 | ggtitle("linked NMF") + 102 | theme(legend.position = "none"), 103 | get_legend(p_jnmf_umap), 104 | ncol = 3, 105 | rel_widths = c(1, 1, 0.2) 106 | ) 107 | ``` --------------------------------------------------------------------------------