├── .Rbuildignore
├── .gitignore
├── .vscode
    └── settings.json
├── DESCRIPTION
├── NAMESPACE
├── R
    ├── AnnotateNMF.R
    ├── AnnotationPlot.R
    ├── FindLocalNeighbors.R
    ├── GSEAHeatmap.R
    ├── GetBestRank.R
    ├── GetSharedFactors.R
    ├── GetUniqueFactors.R
    ├── MetadataHeatmap.R
    ├── MetadataPlot.R
    ├── MetadataSummary.R
    ├── PreprocessData.R
    ├── ProjectData.R
    ├── RankPlot.R
    ├── RcppExports.R
    ├── RescaleSpatial.R
    ├── RunGCNMF.R
    ├── RunGSEA.R
    ├── RunLNMF.R
    ├── RunNMF.R
    ├── ard_nmf.R
    ├── cellxgene_pipeline.R
    ├── checkColumns.R
    ├── checkDesigns.R
    ├── coercions.R
    ├── cross_validate_nmf.R
    ├── getDesigns.R
    ├── getModelFit.R
    ├── getModelMatrix.R
    ├── getModelResults.R
    ├── get_pbmc3k_data.R
    ├── methods.R
    ├── pbmc3k.R
    ├── plot.cross_validate_nmf_data.R
    ├── plot.nmf_metadata_summary.R
    ├── plotFactorWeights.R
    ├── rasterize_rowwise.R
    ├── run_nmf.R
    └── singlet.R
├── README.md
├── _pkgdown.yml
├── data
    └── pbmc3k.RData
├── docs
    ├── 404.html
    ├── articles
    │   ├── Batch_Integration_with_Linked_NMF.html
    │   ├── Batch_Integration_with_Linked_NMF_files
    │   │   └── figure-html
    │   │   │   ├── plot-lnmf-metadata-1.png
    │   │   │   ├── plot-metadata-1.png
    │   │   │   ├── plot-umap-1.png
    │   │   │   ├── run-nmf-1.png
    │   │   │   ├── unnamed-chunk-1-1.png
    │   │   │   ├── unnamed-chunk-2-1.png
    │   │   │   ├── unnamed-chunk-4-1.png
    │   │   │   ├── unnamed-chunk-6-1.png
    │   │   │   └── unnamed-chunk-8-1.png
    │   ├── Guided_Clustering_with_NMF.html
    │   ├── Guided_Clustering_with_NMF_files
    │   │   └── figure-html
    │   │   │   ├── dim-plot-1.png
    │   │   │   ├── feature-plot-1.png
    │   │   │   ├── gsea-heatmap-1.png
    │   │   │   ├── map-cluster-ids-1.png
    │   │   │   ├── plot-metadata-1.png
    │   │   │   ├── unnamed-chunk-1-1.png
    │   │   │   └── viz-dim-loadings-1.png
    │   └── index.html
    ├── authors.html
    ├── deps
    │   ├── bootstrap-5.1.3
    │   │   ├── bootstrap.bundle.min.js
    │   │   ├── bootstrap.bundle.min.js.map
    │   │   └── bootstrap.min.css
    │   ├── data-deps.txt
    │   └── jquery-3.6.0
    │   │   ├── jquery-3.6.0.js
    │   │   ├── jquery-3.6.0.min.js
    │   │   └── jquery-3.6.0.min.map
    ├── index.html
    ├── link.svg
    ├── pkgdown.js
    ├── pkgdown.yml
    ├── reference
    │   ├── GSEAHeatmap.html
    │   ├── MetadataSummary.html
    │   ├── RankPlot.html
    │   ├── RunGSEA.html
    │   ├── RunLNMF.html
    │   ├── RunNMF.html
    │   ├── ard_nmf.html
    │   ├── cross_validate_nmf.html
    │   ├── get_pbmc3k_data.html
    │   ├── index.html
    │   ├── pbmc3k.html
    │   ├── run_linked_nmf.html
    │   ├── run_nmf.html
    │   └── singlet.html
    ├── search.json
    └── sitemap.xml
├── inst
    ├── CITATION
    └── include
    │   ├── IVSparse.h
    │   ├── singlet.h
    │   └── src
    │       ├── CSC
    │           ├── CSC_BLAS.hpp
    │           ├── CSC_Constructors.hpp
    │           ├── CSC_Methods.hpp
    │           ├── CSC_Operators.hpp
    │           ├── CSC_Private_Methods.hpp
    │           └── CSC_SparseMatrix.hpp
    │       ├── IVCSC
    │           ├── IVCSC_BLAS.hpp
    │           ├── IVCSC_Constructors.hpp
    │           ├── IVCSC_Methods.hpp
    │           ├── IVCSC_Operators.hpp
    │           ├── IVCSC_Private_Methods.hpp
    │           └── IVCSC_SparseMatrix.hpp
    │       ├── IVSparse_Base_Methods.hpp
    │       ├── IVSparse_SparseMatrixBase.hpp
    │       ├── InnerIterators
    │           ├── CSC_Iterator.hpp
    │           ├── CSC_Iterator_Methods.hpp
    │           ├── IVCSC_Iterator.hpp
    │           ├── IVCSC_Iterator_Methods.hpp
    │           ├── VCSC_Iterator.hpp
    │           └── VCSC_Iterator_Methods.hpp
    │       ├── VCSC
    │           ├── VCSC_BLAS.hpp
    │           ├── VCSC_Constructors.hpp
    │           ├── VCSC_Methods.hpp
    │           ├── VCSC_Operators.hpp
    │           ├── VCSC_Private_Methods.hpp
    │           └── VCSC_SparseMatrix.hpp
    │       └── Vectors
    │           ├── CSC_Vector.hpp
    │           ├── CSC_Vector_Methods.hpp
    │           ├── IVCSC_Vector.hpp
    │           ├── IVCSC_Vector_Methods.hpp
    │           ├── VCSC_Vector.hpp
    │           └── VCSC_Vector_Methods.hpp
├── man
    ├── AnnotateNMF.Rd
    ├── AnnotationPlot.Rd
    ├── FindLocalNeighbors.Rd
    ├── GSEAHeatmap.Rd
    ├── GetBestRank.Rd
    ├── MetadataSummary.Rd
    ├── PreprocessData.Rd
    ├── ProjectData.Rd
    ├── RankPlot.Rd
    ├── RasterizeRowwise.Rd
    ├── RescaleSpatial.Rd
    ├── RunGCNMF.Rd
    ├── RunGSEA.Rd
    ├── RunLNMF.Rd
    ├── RunNMF.Rd
    ├── ard_nmf.Rd
    ├── cellxgene_pipeline.Rd
    ├── checkColumns.Rd
    ├── checkDesigns.Rd
    ├── cross_validate_nmf.Rd
    ├── getDesigns.Rd
    ├── getModelFit.Rd
    ├── getModelMatrix.Rd
    ├── getModelResults.Rd
    ├── get_pbmc3k_data.Rd
    ├── pbmc3k.Rd
    ├── plotFactorWeights.Rd
    ├── project_model.Rd
    ├── run_linked_nmf.Rd
    ├── run_nmf.Rd
    ├── singlet.Rd
    └── write_IVCSC.Rd
├── singlet.Rproj
├── src
    ├── Makevars
    ├── Makevars.win
    ├── RcppExports.cpp
    └── singlet.cpp
├── tests
    ├── testthat.R
    └── testthat
    │   ├── helper.R
    │   └── test-pbmc3k.R
└── vignettes
    ├── Batch_Integration_with_Linked_NMF.Rmd
    └── Guided_Clustering_with_NMF.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | src/*.o
6 | src/*.so
7 | src/*.dll
8 | Makefile
9 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "files.associations": {
 3 |         "*.rmd": "markdown",
 4 |         "array": "cpp",
 5 |         "initializer_list": "cpp",
 6 |         "atomic": "cpp",
 7 |         "*.tcc": "cpp",
 8 |         "cctype": "cpp",
 9 |         "chrono": "cpp",
10 |         "clocale": "cpp",
11 |         "cmath": "cpp",
12 |         "complex": "cpp",
13 |         "cstdarg": "cpp",
14 |         "cstdint": "cpp",
15 |         "cstdio": "cpp",
16 |         "cstdlib": "cpp",
17 |         "cstring": "cpp",
18 |         "ctime": "cpp",
19 |         "cwchar": "cpp",
20 |         "cwctype": "cpp",
21 |         "deque": "cpp",
22 |         "unordered_map": "cpp",
23 |         "vector": "cpp",
24 |         "exception": "cpp",
25 |         "fstream": "cpp",
26 |         "functional": "cpp",
27 |         "iosfwd": "cpp",
28 |         "iostream": "cpp",
29 |         "istream": "cpp",
30 |         "limits": "cpp",
31 |         "new": "cpp",
32 |         "ostream": "cpp",
33 |         "numeric": "cpp",
34 |         "ratio": "cpp",
35 |         "sstream": "cpp",
36 |         "stdexcept": "cpp",
37 |         "streambuf": "cpp",
38 |         "thread": "cpp",
39 |         "tuple": "cpp",
40 |         "type_traits": "cpp",
41 |         "utility": "cpp",
42 |         "typeinfo": "cpp",
43 |         "core": "cpp",
44 |         "random": "cpp",
45 |         "eigencore": "cpp",
46 |         "string_view": "cpp",
47 |         "bit": "cpp",
48 |         "compare": "cpp",
49 |         "concepts": "cpp",
50 |         "cstddef": "cpp",
51 |         "map": "cpp",
52 |         "set": "cpp",
53 |         "string": "cpp",
54 |         "algorithm": "cpp",
55 |         "any": "cpp",
56 |         "iterator": "cpp",
57 |         "memory": "cpp",
58 |         "memory_resource": "cpp",
59 |         "system_error": "cpp",
60 |         "iomanip": "cpp",
61 |         "numbers": "cpp",
62 |         "semaphore": "cpp",
63 |         "stop_token": "cpp"
64 |     }
65 | }


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: singlet
 2 | Type: Package
 3 | Title: Non-negative Matrix Factorization for single-cell analysis
 4 | Version: 0.99.8
 5 | Date: 2024-10-15
 6 | Author: Zach Debruine <debruinz@gvsu.edu>
 7 | Maintainer: Zach Debruine <debruinz@gvsu.edu>
 8 | Authors@R: c(person("Zach", "DeBruine", 
 9 |              email = "debruinz@gvsu.edu", 
10 |              role = c("aut", "cre"), 
11 |              comment = c(ORCID = "0000-0003-2234-4827")),
12 |              person("Tim", "Triche",
13 |              email = "trichelab@gmail.com", 
14 |              role = c("aut"), 
15 |              comment = c(ORCID = "0000-0001-5665-946X")),
16 |              person("Chan Zuckerberg Initiative", 
17 |              role = c("fnd"), 
18 |              comment = "https://chanzuckerberg.com/science/programs-resources/single-cell-biology/data-insights/efficient-data-structures-for-single-cell-data-integration/"))
19 | Description: Fast NMF with automatic rank-determination for dimension reduction of single-cell data using Seurat, RcppML nmf, SingleCellExperiments and similar.
20 | License: GPL (>= 2)
21 | Depends:
22 |     Seurat,
23 |     RcppML,
24 |     dplyr,
25 |     RcppEigen
26 | Imports:
27 |     Matrix,
28 |     methods,
29 |     stats,
30 |     knitr,
31 |     ggplot2,
32 |     limma,
33 |     reshape2,
34 |     utils,
35 |     fgsea,
36 |     msigdbr,
37 |     RcppML,
38 |     statmod
39 | LinkingTo: 
40 |     Rcpp,
41 |     RcppEigen
42 | Suggests: 
43 |     rmarkdown, 
44 |     devtools,
45 |     cowplot,
46 |     viridis,
47 |     testthat (>= 3.0.0),
48 |     SingleCellExperiment,
49 |     rWikiPathways,
50 |     plotly,
51 |     igvR
52 | VignetteBuilder: knitr
53 | RoxygenNote: 7.3.2
54 | Config/testthat/edition: 3
55 | URL: https://github.com/zdebruine/singlet
56 | LazyData: true
57 | BugReports: https://github.com/zdebruine/singlet/issues
58 | Encoding: UTF-8
59 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(AnnotateNMF,DimReduc)
 4 | S3method(AnnotateNMF,Seurat)
 5 | S3method(AnnotateNMF,nmf)
 6 | S3method(AnnotationPlot,DimReduc)
 7 | S3method(AnnotationPlot,Seurat)
 8 | S3method(AnnotationPlot,data.frame)
 9 | S3method(AnnotationPlot,list)
10 | S3method(AnnotationPlot,nmf)
11 | S3method(FindLocalNeighbors,Seurat)
12 | S3method(MetadataPlot,Seurat)
13 | S3method(PreprocessData,Assay)
14 | S3method(PreprocessData,Seurat)
15 | S3method(PreprocessData,dgCMatrix)
16 | S3method(ProjectData,Seurat)
17 | S3method(ProjectData,SingleCellExperiment)
18 | S3method(RankPlot,Seurat)
19 | S3method(RescaleSpatial,Seurat)
20 | S3method(RunGCNMF,Seurat)
21 | S3method(RunLNMF,Seurat)
22 | S3method(RunNMF,Seurat)
23 | S3method(RunNMF,SingleCellExperiment)
24 | S3method(plot,cross_validate_nmf_data)
25 | S3method(plot,nmf_metadata_summary)
26 | export(AnnotateNMF)
27 | export(AnnotationPlot)
28 | export(FindLocalNeighbors)
29 | export(GSEAHeatmap)
30 | export(GetBestRank)
31 | export(GetSharedFactors)
32 | export(GetUniqueFactors)
33 | export(MetadataHeatmap)
34 | export(MetadataPlot)
35 | export(MetadataSummary)
36 | export(PreprocessData)
37 | export(ProjectData)
38 | export(RankPlot)
39 | export(RasterizeRowwise)
40 | export(RescaleSpatial)
41 | export(RunGCNMF)
42 | export(RunGSEA)
43 | export(RunLNMF)
44 | export(RunNMF)
45 | export(ard_nmf)
46 | export(cellxgene_pipeline)
47 | export(checkColumns)
48 | export(checkDesigns)
49 | export(cross_validate_nmf)
50 | export(getDesigns)
51 | export(getModelFit)
52 | export(getModelMatrix)
53 | export(getModelResults)
54 | export(get_pbmc3k_data)
55 | export(plotFactorWeights)
56 | export(project_model)
57 | export(run_nmf)
58 | export(write_IVCSC)
59 | exportMethods("$")
60 | exportMethods("$<-")
61 | exportMethods("[")
62 | exportMethods(coerce)
63 | import(Matrix)
64 | import(RcppML)
65 | import(Seurat)
66 | import(dplyr)
67 | import(fgsea)
68 | import(ggplot2)
69 | import(limma)
70 | import(msigdbr)
71 | importClassesFrom(RcppML,nmf)
72 | importFrom(methods,as)
73 | importFrom(methods,is)
74 | importFrom(methods,new)
75 | importFrom(reshape2,acast)
76 | importFrom(reshape2,melt)
77 | importFrom(stats,dist)
78 | importFrom(stats,hclust)
79 | importFrom(stats,model.matrix)
80 | importFrom(stats,p.adjust)
81 | importFrom(stats,reshape)
82 | importFrom(stats,runif)
83 | importFrom(utils,data)
84 | importFrom(utils,setTxtProgressBar)
85 | importFrom(utils,txtProgressBar)
86 | useDynLib(singlet, .registration = TRUE)
87 | 


--------------------------------------------------------------------------------
/R/AnnotateNMF.R:
--------------------------------------------------------------------------------
  1 | #' annotate an NMF model
  2 | #'
  3 | #' @param object    an object suitable for annotation (Seurat, DimReduc, or nmf)
  4 | #' @param columns   factor columns of meta.data (see below) to annotate against
  5 | #' @param meta.data a data.frame, if one is not already part of the object
  6 | #' @param designs   named list of design matrices (supersedes meta.data/columns)
  7 | #' @param center    center the factor matrix for testing? (TRUE)
  8 | #' @param scale     scale the factor matrix for testing? (FALSE)
  9 | #' @param max.levels maximum number of levels a factor may have in order to be included in analysis
 10 | #' @param ... not implemented
 11 | #' @export
 12 | #'
 13 | AnnotateNMF <- function(object, ...) {
 14 |   UseMethod("AnnotateNMF")
 15 | }
 16 | 
 17 | 
 18 | #' Annotate NMF model with cell or sample metadata
 19 | #'
 20 | #' @rdname AnnotateNMF
 21 | #' @aliases AnnotateNMF
 22 | #'
 23 | #' @import limma
 24 | #'
 25 | #' @export
 26 | #'
 27 | AnnotateNMF.DimReduc <- function(object, meta.data = NULL, columns = NULL, designs = NULL, center = TRUE, scale = FALSE, max.levels = 200, ...) {
 28 |   designs <- getDesigns(columns = columns,
 29 |                         meta.data = meta.data, 
 30 |                         designs = designs, 
 31 |                         max.levels)
 32 |   fits <- lapply(designs, 
 33 |                  getModelFit, 
 34 |                  object = object, 
 35 |                  center = center, 
 36 |                  scale = scale)
 37 |   object@misc$annotations <- lapply(fits, 
 38 |                                     getModelResults)
 39 |   return(object)
 40 | }
 41 | 
 42 | 
 43 | #' @rdname AnnotateNMF
 44 | #' @name AnnotateNMF
 45 | #'
 46 | #' @export
 47 | #'
 48 | .S3method("AnnotateNMF", "DimReduc", AnnotateNMF.DimReduc)
 49 | 
 50 | 
 51 | #' @rdname AnnotateNMF
 52 | #'
 53 | #' @param reduction the reductions slot in the Seurat object containing the model to annotate
 54 | #'
 55 | #' @examples
 56 | #' \dontrun{
 57 | #' get_pbmc3k_data() %>%
 58 | #'   NormalizeData() %>%
 59 | #'   RunNMF() -> pbmc3k
 60 | #' AnnotateNMF(pbmc3k)
 61 | #' }
 62 | #' @aliases AnnotateNMF
 63 | #'
 64 | #' @export
 65 | #'
 66 | AnnotateNMF.Seurat <- function(object, columns = NULL, reduction = "nmf", ...) {
 67 |   if (is.null(columns)) columns <- colnames(object@meta.data)
 68 |   object@reductions[[reduction]] <-
 69 |     AnnotateNMF.DimReduc(
 70 |       object = object@reductions[[reduction]],
 71 |       meta.data = object@meta.data[, columns],
 72 |       columns = columns, ...
 73 |     )
 74 |   return(object)
 75 | }
 76 | 
 77 | 
 78 | #' @rdname AnnotateNMF
 79 | #' @name AnnotateNMF
 80 | #'
 81 | #' @export
 82 | #'
 83 | .S3method("AnnotateNMF", "Seurat", AnnotateNMF.Seurat)
 84 | 
 85 | 
 86 | #' Annotate NMF model with cell metadata
 87 | #'
 88 | #' @details Maps factor information in an RcppML::nmf object against meta.data
 89 | #'
 90 | #' @rdname AnnotateNMF
 91 | #' @aliases AnnotateNMF
 92 | #'
 93 | #' @import limma
 94 | #'
 95 | #' @export
 96 | #'
 97 | AnnotateNMF.nmf <- function(object, meta.data, columns = NULL, designs = NULL, center = TRUE, scale = FALSE, max.levels = 200, ...) {
 98 |   designs <- getDesigns(columns = columns, meta.data = meta.data, designs = designs, max.levels, ...)
 99 |   fits <- lapply(designs, getModelFit, object = object, center = center, scale = scale)
100 |   object@misc$annotations <- lapply(fits, getModelResults)
101 |   return(object)
102 | }
103 | 
104 | 
105 | #' @rdname AnnotateNMF
106 | #' @name AnnotateNMF
107 | #'
108 | #' @export
109 | #'
110 | .S3method("AnnotateNMF", "nmf", AnnotateNMF.nmf)
111 | 


--------------------------------------------------------------------------------
/R/GSEAHeatmap.R:
--------------------------------------------------------------------------------
 1 | #' Plot GSEA results on a heatmap
 2 | #'
 3 | #' Plot top GSEA terms for each NMF factor on a heatmap
 4 | #'
 5 | #' @param object Seurat or RcppML::nmf object
 6 | #' @param reduction a dimensional reduction for which GSEA analysis has been performed
 7 | #' @param max.terms.per.factor show this number of top terms for each factor
 8 | #' @param dropcommon  drop broadly enriched terms across factors? (TRUE) 
 9 | #'
10 | #' @return ggplot2 object
11 | #'
12 | #' @export
13 | #'
14 | GSEAHeatmap <- function(object, reduction = "nmf", max.terms.per.factor = 3, dropcommon = TRUE) {
15 | 
16 |   if (is(object, "Seurat")) {
17 |     df <- object@reductions[[reduction]]@misc$gsea$padj
18 |   } else if (is(object, "nmf")) {
19 |     df <- object@misc$gsea$padj
20 |   }
21 |   
22 |   # markers for each factor based on the proportion of signal in that factor
23 |   df2 <- as.matrix(Diagonal(x = 1 / rowSums(df)) %*% df)
24 | 
25 |   # see https://github.com/zdebruine/singlet/issues/26
26 |   # thanks to @earbebarnes
27 |   rownames(df2) <- rownames(df) #add row names to df2
28 | 
29 |   terms <- c()
30 |   for (i in 1:ncol(df2)) {
31 |     terms_i <- df[, i]
32 |     idx <- terms_i > -log10(0.05)
33 |     terms_i <- terms_i[idx]
34 |     terms_j <- df2[idx, i]
35 |     v <- sort(terms_j, decreasing = TRUE)
36 |     if (length(v) > max.terms.per.factor) {
37 |       terms <- c(terms, names(v)[1:max.terms.per.factor])
38 |     } else {
39 |       terms <- c(terms, names(v))
40 |     }
41 |   }
42 |   terms <- unique(terms)
43 |   df <- df[terms, ]
44 | 
45 |   rownames(df) <- sapply(rownames(df), function(x) {
46 |     ifelse(nchar(x) > 48, paste0(substr(x, 1, 45), "..."), x)
47 |   })
48 | 
49 |   if (dropcommon) { 
50 |     # remove terms that are broadly significant
51 |     v <- which((rowSums(df > -log10(0.05)) > (ncol(df) / 2)))
52 |     if (length(v) > 0) df <- df[-v, ]
53 |   }
54 |   df <- reshape2::melt(df)
55 |   p <- ggplot(df, aes(Var2, Var1, fill = value)) +
56 |     geom_tile() +
57 |     scale_fill_viridis_c(option = "B") +
58 |     theme_classic() +
59 |     scale_x_discrete(expand = c(0, 0)) +
60 |     scale_y_discrete(expand = c(0, 0)) +
61 |     labs(
62 |       x = "NMF factor",
63 |       y = "GO Term",
64 |       fill = "FDR\n(-log10)"
65 |     ) +
66 |     theme(
67 |       axis.text.y = element_text(size = 6),
68 |       axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)
69 |     ) + 
70 |     NULL 
71 | 
72 |   return(p) 
73 | 
74 | }
75 | 


--------------------------------------------------------------------------------
/R/GetBestRank.R:
--------------------------------------------------------------------------------
 1 | #' determine the appropriate rank for an AutoNMF decomposition
 2 | #'
 3 | #' @param df  a data.frame of output from crossvalidation: rep, rank, error
 4 | #' @inheritParams RunNMF
 5 | #' @return    the lowest rank that minimizes the reconstruction error
 6 | #' @export
 7 | #'
 8 | GetBestRank <- function(df, tol.overfit = 1e-4, ...) {
 9 |   df$rep <- factor(df$rep)
10 |   best_ranks <- c()
11 |   for (replicate in levels(df$rep)) {
12 |     df_rep <- subset(df, rep == replicate)
13 |     # calculate overfitting tolerance
14 |     max_rank <- max(df_rep$k) + 1
15 |     for (rank in unique(df_rep$k)) {
16 |       if (rank < max_rank) {
17 |         df_rank <- subset(df_rep, k == rank)
18 |         if (nrow(df_rank) > 1) {
19 |           v2 <- df_rank$test_error[2:nrow(df_rank)]
20 |           v1 <- df_rank$test_error[1:(nrow(df_rank) - 1)]
21 |           if(length(v1) >=2 ){
22 |             for (pos in 2:length(v1)) {
23 |               if (v1[[pos]] > v1[[pos - 1]]) v1[[pos]] <- v1[[pos - 1]]
24 |             }
25 |           }
26 |           if (max(c(0, (v2 - v1) / (v2 + v1))) > tol.overfit) {
27 |             max_rank <- rank
28 |           }
29 |         }
30 |       }
31 |     }
32 |     df_rep <- subset(df_rep, k < max_rank)
33 |     if (nrow(df_rep) == 0) {
34 |       best_ranks <- c(best_ranks, 2)
35 |     } else if (nrow(df) == 1) {
36 |       best_ranks <- c(best_ranks, df_rep$k[[1]])
37 |     } else {
38 |       # condense to simple format by taking the last iteration in each model
39 |       df_rep <- as.data.frame(group_by(df_rep, rep, k) %>% slice(which.max(iter)))
40 |       best_ranks <- c(best_ranks, df_rep$k[which.min(df_rep$test_error)])
41 |     }
42 |   }
43 | 
44 |   # get the lowest rank for each replicate, take the mean and floor it
45 |   floor(mean(best_ranks))
46 | }
47 | 


--------------------------------------------------------------------------------
/R/GetSharedFactors.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | #' @rdname RunLNMF
 3 | #'
 4 | GetSharedFactors <- function(object, split.by, reduction = "lnmf") {
 5 |   if (!(reduction %in% names(object@reductions))) {
 6 |     stop("this Seurat object does not contain the requested reductions slot")
 7 |   }
 8 |   # which(rowSums(object@reductions[[reduction]]@misc$link_matrix == 0) == 0)
 9 |   which(!(colnames(object@reductions[[reduction]]@cell.embeddings) %in% names(which(apply(MetadataSummary(t(object@reductions[[reduction]]@cell.embeddings), object@meta.data[[split.by]]), 2, function(x) min(x) == 0)))))
10 | }
11 | 


--------------------------------------------------------------------------------
/R/GetUniqueFactors.R:
--------------------------------------------------------------------------------
 1 | #' @rdname RunLNMF
 2 | #' @export
 3 | #'
 4 | GetUniqueFactors <- function(object, split.by, reduction = "lnmf") {
 5 |   if (!(reduction %in% names(object@reductions))) {
 6 |     stop("this Seurat object does not contain the requested reductions slot")
 7 |   }
 8 |   # which(rowSums(object@reductions[[reduction]]@misc$link_matrix == 0) > 0)
 9 |   which((colnames(object@reductions[[reduction]]@cell.embeddings) %in% names(which(apply(MetadataSummary(t(object@reductions[[reduction]]@cell.embeddings), object@meta.data[[split.by]]), 2, function(x) min(x) == 0)))))
10 | }
11 | 


--------------------------------------------------------------------------------
/R/MetadataHeatmap.R:
--------------------------------------------------------------------------------
 1 | #' @rdname MetadataSummary
 2 | #'
 3 | #' @param x result of \code{MetadataSummary}
 4 | #'
 5 | #' @importFrom reshape2 melt
 6 | #'
 7 | #' @export
 8 | #'
 9 | MetadataHeatmap <- function(x) {
10 |   m <- reshape2::melt(as.matrix(x))
11 |   colnames(m) <- c("factor", "group", "frac")
12 |   ggplot(m, aes(x = factor(factor, levels = unique(factor)), y = group, fill = frac)) +
13 |     geom_tile() +
14 |     theme_classic() +
15 |     theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), axis.line = element_blank(), axis.ticks = element_blank()) +
16 |     labs(x = "factor", y = "group", fill = "relative\ntotal weight") +
17 |     scale_y_discrete(expand = c(0, 0)) +
18 |     scale_x_discrete(expand = c(0, 0)) +
19 |     scale_fill_gradient2(low = "white", high = "red")
20 | }
21 | 


--------------------------------------------------------------------------------
/R/MetadataPlot.R:
--------------------------------------------------------------------------------
 1 | #' @rdname RunLNMF
 2 | #'
 3 | #' @export
 4 | #'
 5 | MetadataPlot <- function(object, ...) {
 6 |   UseMethod("MetadataPlot")
 7 | }
 8 | 
 9 | 
10 | #' @rdname RunLNMF
11 | #'
12 | #' @name   MetadataPlot
13 | #'
14 | #' @export
15 | #'
16 | MetadataPlot.Seurat <- function(object, split.by, reduction = "lnmf", ...) {
17 |   if (!(reduction %in% names(object@reductions))) {
18 |     stop("this Seurat object does not contain the requested reductions slot")
19 |   }
20 |   plot(MetadataSummary(t(object@reductions[[reduction]]@cell.embeddings), object@meta.data[[split.by]]))
21 | }
22 | 
23 | 
24 | #' @rdname RunLNMF
25 | #'
26 | #' @name   MetadataPlot
27 | #'
28 | #' @export
29 | #'
30 | .S3method("MetadataPlot", "Seurat", MetadataPlot.Seurat)
31 | 


--------------------------------------------------------------------------------
/R/MetadataSummary.R:
--------------------------------------------------------------------------------
 1 | #' Summarize contribution of sample groups to NMF factors
 2 | #'
 3 | #' Calculate the mean weight of samples in discrete and unique groups to each factor
 4 | #'
 5 | #' @rdname MetadataSummary
 6 | #'
 7 | #' @param h matrix giving factors as rows and samples as columns
 8 | #' @param factor_data a factor of the same length as the number of columns in \code{h}
 9 | #' @param reorder sort results by proportion in each group (uses \code{hclust} if >2 groups)
10 | #'
11 | #' @return \code{data.frame} of mean weights for each sample group within each factor of class \code{nmf_metadata_summary}. Use the \code{plot} method to visualize.
12 | #'
13 | #' @export
14 | #'
15 | MetadataSummary <- function(h, factor_data, reorder = TRUE) {
16 |   factor_data <- as.factor(factor_data)
17 |   if (is.null(rownames(h))) rownames(h) <- paste0("factor", 1:nrow(h))
18 |   m <- matrix(0, nrow(h), length(levels(factor_data)))
19 |   rownames(m) <- rownames(h)
20 |   colnames(m) <- levels(factor_data)
21 |   for (j in 1:length(levels(factor_data))) {
22 |     for (i in 1:nrow(h)) {
23 |       m[i, j] <- mean(h[i, which(factor_data == levels(factor_data)[[j]])])
24 |     }
25 |   }
26 |   m <- apply(m, 1, function(x) x / sum(x))
27 |   if (length(levels(factor_data)) == 2) {
28 |     m <- m[order(m[, 1], decreasing = TRUE), ]
29 |   } else if (reorder) {
30 |     m <- m[hclust(dist(m), method = "ward.D2")$order, hclust(dist(t(m)), method = "ward.D2")$order]
31 |   }
32 |   t(m)
33 |   m <- as.data.frame(m)
34 |   class(m) <- c("nmf_metadata_summary", "data.frame")
35 |   m
36 | }
37 | 


--------------------------------------------------------------------------------
/R/PreprocessData.R:
--------------------------------------------------------------------------------
 1 | #' Normalize count data
 2 | #'
 3 | #' Standard log-normalization equivalent to \code{Seurat::LogNormalize}
 4 | #'
 5 | #' @param object Seurat object
 6 | #' @param assay assay in which the counts matrix resides
 7 | #' @param scale.factor value by which to multiply all columns after unit normalization and before \code{log1p} transformation
 8 | #' @param ... arguments to \code{Seurat::LogNormalize}
 9 | #' @export
10 | #' @rdname PreprocessData
11 | #'
12 | PreprocessData.Seurat <- function(object, scale.factor = 10000, assay = NULL, ...) {
13 |   if (is.null(assay)) assay <- names(object@assays)[[1]]
14 |   if (is.null(object@assays[[assay]]@key)) {
15 |     object@assays[[assay]]@key <- paste0(assay, "_")
16 |   }
17 |   object@assays[[assay]] <- PreprocessData(object@assays[[assay]], ...)
18 |   object
19 | }
20 | 
21 | #' @rdname PreprocessData
22 | #' @export
23 | PreprocessData.Assay <- function(object, scale.factor = 10000, ...) {
24 |   if (ncol(object@counts) == 0) {
25 |     object@data <- PreprocessData(object@data, ...)
26 |   } else {
27 |     object@data <- PreprocessData(object@counts, ...)
28 |   }
29 |   object
30 | }
31 | 
32 | #' @rdname PreprocessData
33 | #' @export
34 | PreprocessData.dgCMatrix <- function(object, scale.factor = 10000, ...) {
35 |   m <- Seurat::LogNormalize(object, scale.factor, ...)
36 |   rownames(m) <- rownames(object)
37 |   colnames(m) <- colnames(object)
38 |   m
39 | }
40 | 
41 | #' @export
42 | #' @rdname PreprocessData
43 | #'
44 | PreprocessData <- function(object, scale.factor, ...) {
45 |   UseMethod("PreprocessData")
46 | }
47 | 
48 | #' @export
49 | #' @rdname PreprocessData
50 | #' @name PreprocessData
51 | #'
52 | .S3method("PreprocessData", "dgCMatrix", PreprocessData.dgCMatrix)
53 | 
54 | #' @export
55 | #' @rdname PreprocessData
56 | #' @name PreprocessData
57 | #'
58 | .S3method("PreprocessData", "Assay", PreprocessData.Assay)
59 | 
60 | 
61 | #' @export
62 | #' @rdname PreprocessData
63 | #' @name PreprocessData
64 | #'
65 | .S3method("PreprocessData", "Seurat", PreprocessData.Seurat)
66 | 


--------------------------------------------------------------------------------
/R/RankPlot.R:
--------------------------------------------------------------------------------
 1 | #' Plot NMF cross-validation results
 2 | #'
 3 | #' Given a NMF reduction at multiple ranks, plot rank vs. test set reconstruction error to determine the optimal rank.
 4 | #'
 5 | #' @param object a Seurat object or a \code{data.frame} that is the result of \code{RunNMF}
 6 | #' @param reduction name of the NMF reduction in the Seurat object (result of \code{RunNMF}) for which multiple \code{ranks} were computed.
 7 | #' @param ... not implemented
 8 | #'
 9 | #' @return A ggplot2 object
10 | #'
11 | #' @aliases RankPlot
12 | #'
13 | #' @export
14 | #'
15 | RankPlot <- function(object, reduction = "nmf", ...) {
16 |   UseMethod("RankPlot")
17 | }
18 | 
19 | 
20 | #' @rdname RankPlot
21 | #'
22 | #' @param reduction the NMF reduction slot name (result of \code{RunNMF} where \code{k} was an array)
23 | #' @param detail.level of detail to plot, \code{1} for test set reconstruction error at convergence of each factorization, \code{2} for test set reconstruction error at each fitting iteration of each factorization
24 | #'
25 | #' @export
26 | #'
27 | RankPlot.Seurat <- function(object, reduction = "nmf", detail.level = 1, ...) {
28 |   if (detail.level == 2) {
29 |     plot(subset(object@reductions[[reduction]]@misc$cv_data, iter >= 5), detail.level)
30 |   } else {
31 |     plot(object@reductions[[reduction]]@misc$cv_data, detail.level)
32 |   }
33 | }
34 | 
35 | 
36 | #' Plot NMF cross-validation results given a Seurat object
37 | #'
38 | #' S3 method for Seurat that runs the \code{singlet::RunNMF} function.
39 | #'
40 | #' @method RankPlot Seurat
41 | #' @rdname RankPlot
42 | #' @name RankPlot
43 | #'
44 | #' @export
45 | #'
46 | .S3method("RankPlot", "Seurat", RankPlot.Seurat)
47 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
  1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
  2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
  3 | 
  4 | weight_by_split <- function(A_, split_by, n_groups) {
  5 |     .Call(`_singlet_weight_by_split`, A_, split_by, n_groups)
  6 | }
  7 | 
  8 | rowwise_compress_sparse <- function(A, n = 10L, threads = 0L) {
  9 |     .Call(`_singlet_rowwise_compress_sparse`, A, n, threads)
 10 | }
 11 | 
 12 | rowwise_compress_dense <- function(A, n = 10L, threads = 0L) {
 13 |     .Call(`_singlet_rowwise_compress_dense`, A, n, threads)
 14 | }
 15 | 
 16 | calc_L1_matrix <- function(h, batch_id) {
 17 |     .Call(`_singlet_calc_L1_matrix`, h, batch_id)
 18 | }
 19 | 
 20 | Rcpp_predict <- function(A, w, L1, L2, threads) {
 21 |     .Call(`_singlet_Rcpp_predict`, A, w, L1, L2, threads)
 22 | }
 23 | 
 24 | c_project_model <- function(A, w, L1, L2, threads) {
 25 |     .Call(`_singlet_c_project_model`, A, w, L1, L2, threads)
 26 | }
 27 | 
 28 | c_nmf <- function(A, At, tol, maxit, verbose, L1_w, L1_h, L2_w, L2_h, threads, w) {
 29 |     .Call(`_singlet_c_nmf`, A, At, tol, maxit, verbose, L1_w, L1_h, L2_w, L2_h, threads, w)
 30 | }
 31 | 
 32 | c_nmf_batch <- function(A, At, tol, maxit, verbose, L1, L2, threads, w, batch_id) {
 33 |     .Call(`_singlet_c_nmf_batch`, A, At, tol, maxit, verbose, L1, L2, threads, w, batch_id)
 34 | }
 35 | 
 36 | c_nmf_sparse_list <- function(A_, At_, tol, maxit, verbose, L1, L2, threads, w) {
 37 |     .Call(`_singlet_c_nmf_sparse_list`, A_, At_, tol, maxit, verbose, L1, L2, threads, w)
 38 | }
 39 | 
 40 | #' Write an IVCSC matrix
 41 | #'
 42 | #' @param L input dgCMatrix list
 43 | #' @param verbose print outputs
 44 | #' @export
 45 | #'
 46 | write_IVCSC <- function(L, verbose = TRUE) {
 47 |     .Call(`_singlet_write_IVCSC`, L, verbose)
 48 | }
 49 | 
 50 | save_IVSparse <- function(A_, verbose = TRUE) {
 51 |     .Call(`_singlet_save_IVSparse`, A_, verbose)
 52 | }
 53 | 
 54 | build_IVCSC2 <- function(L, verbose = TRUE) {
 55 |     .Call(`_singlet_build_IVCSC2`, L, verbose)
 56 | }
 57 | 
 58 | read_IVSparse <- function() {
 59 |     .Call(`_singlet_read_IVSparse`)
 60 | }
 61 | 
 62 | run_nmf_on_sparsematrix_list <- function(A_, tol, maxit, verbose, threads, w, use_vcsc = FALSE, L1 = 0, L2 = 0) {
 63 |     .Call(`_singlet_run_nmf_on_sparsematrix_list`, A_, tol, maxit, verbose, threads, w, use_vcsc, L1, L2)
 64 | }
 65 | 
 66 | c_mu_nmf <- function(A, At, tol, maxit, verbose, L1, L2, threads, w) {
 67 |     .Call(`_singlet_c_mu_nmf`, A, At, tol, maxit, verbose, L1, L2, threads, w)
 68 | }
 69 | 
 70 | c_nmf_dense <- function(A, At, tol, maxit, verbose, L1_w, L1_h, L2_w, L2_h, threads, w) {
 71 |     .Call(`_singlet_c_nmf_dense`, A, At, tol, maxit, verbose, L1_w, L1_h, L2_w, L2_h, threads, w)
 72 | }
 73 | 
 74 | c_linked_nmf <- function(A, At, tol, maxit, verbose, L1, L2, threads, w, link_h, link_w) {
 75 |     .Call(`_singlet_c_linked_nmf`, A, At, tol, maxit, verbose, L1, L2, threads, w, link_h, link_w)
 76 | }
 77 | 
 78 | c_ard_nmf <- function(A, At, tol, maxit, verbose, L1, L2, threads, w, seed, inv_density, overfit_threshold, trace_test_mse) {
 79 |     .Call(`_singlet_c_ard_nmf`, A, At, tol, maxit, verbose, L1, L2, threads, w, seed, inv_density, overfit_threshold, trace_test_mse)
 80 | }
 81 | 
 82 | c_ard_nmf_sparse_list <- function(A_, At_, tol, maxit, verbose, L1, L2, threads, w, rng_seed, inv_density, overfit_threshold, trace_test_mse) {
 83 |     .Call(`_singlet_c_ard_nmf_sparse_list`, A_, At_, tol, maxit, verbose, L1, L2, threads, w, rng_seed, inv_density, overfit_threshold, trace_test_mse)
 84 | }
 85 | 
 86 | c_ard_nmf_dense <- function(A, At, tol, maxit, verbose, L1, L2, threads, w, seed, inv_density, overfit_threshold, trace_test_mse) {
 87 |     .Call(`_singlet_c_ard_nmf_dense`, A, At, tol, maxit, verbose, L1, L2, threads, w, seed, inv_density, overfit_threshold, trace_test_mse)
 88 | }
 89 | 
 90 | spatial_graph <- function(c1, c2, max_dist, max_k = 100L, threads = 0L) {
 91 |     .Call(`_singlet_spatial_graph`, c1, c2, max_dist, max_k, threads)
 92 | }
 93 | 
 94 | c_LKNN <- function(m, coord_x, coord_y, k, radius, metric, similarity, max_dist, verbose, threads) {
 95 |     .Call(`_singlet_c_LKNN`, m, coord_x, coord_y, k, radius, metric, similarity, max_dist, verbose, threads)
 96 | }
 97 | 
 98 | c_SNN <- function(G, min_similarity, threads) {
 99 |     .Call(`_singlet_c_SNN`, G, min_similarity, threads)
100 | }
101 | 
102 | c_gcnmf <- function(A, At, G, tol, maxit, verbose, L1, L2, threads, w) {
103 |     .Call(`_singlet_c_gcnmf`, A, At, G, tol, maxit, verbose, L1, L2, threads, w)
104 | }
105 | 
106 | c_differentiate_model <- function(h, G) {
107 |     .Call(`_singlet_c_differentiate_model`, h, G)
108 | }
109 | 
110 | c_assign_cells_to_edge_clusters <- function(G, h_diff_clusters) {
111 |     .Call(`_singlet_c_assign_cells_to_edge_clusters`, G, h_diff_clusters)
112 | }
113 | 
114 | 


--------------------------------------------------------------------------------
/R/RescaleSpatial.R:
--------------------------------------------------------------------------------
 1 | #' Rescale spatial coordinates
 2 | #' 
 3 | #' Convert coordinates in the "spatial" reduction to natural numbers rather than values between 0 and 1. This allows for intuitive graph construction based on the radius surrounding any given cell (i.e. a radius of one corresponds to all cells next to the cell of interest)
 4 | #' 
 5 | #' @param object Seurat object
 6 | #' @param reduction the name of the spatial reduction to use
 7 | #' @export
 8 | #' @return Seurat object with rescaled spatial coordinates
 9 | #' @aliases RescaleSpatial.Seurat
10 | #' @rdname RescaleSpatial
11 | RescaleSpatial.Seurat <- function(object, reduction = "spatial"){
12 |   df <- object@reductions[[reduction]]@cell.embeddings
13 |   df[,1] <- df[,1] - min(df[,1])
14 |   df[,2] <- df[,2] - min(df[,2])
15 |   df[,1] <- df[,1] / max(df[,1])
16 |   df[,2] <- df[,2] / max(df[,2])
17 |   df[,1] <- df[,1] * 1 / median(diff(sort(unique(df[,1]))))
18 |   df[,2] <- df[,2] * 1 / median(diff(sort(unique(df[,2]))))
19 |   df <- round(df)
20 |   object@reductions[[reduction]]@cell.embeddings <- df
21 |   object
22 | }
23 | 
24 | 
25 | #' @rdname RunGCNMF
26 | #' @name RunGCNMF
27 | #' @export
28 | #'
29 | RescaleSpatial <- function(object, ...) {
30 |   UseMethod("RescaleSpatial")
31 | }
32 | 
33 | #' @rdname RescaleSpatial
34 | #' @name RescaleSpatial
35 | #' @export
36 | #'
37 | .S3method("RescaleSpatial", "Seurat", RescaleSpatial.Seurat)
38 | 


--------------------------------------------------------------------------------
/R/RunGCNMF.R:
--------------------------------------------------------------------------------
  1 | #' Run Graph-Convolutional Non-negative Matrix Factorization
  2 | #'
  3 | #' @description Run NMF with weighted convolution determined by edges in a graph of dimensions \code{n x n}, where \code{n} is the number of columns in the matrix.
  4 | #'
  5 | #' @inheritParams RunNMF
  6 | #' @param graph A graph to use, either directed or undirected
  7 | #' @param verbose print updates to console
  8 | #' @param k rank of the factorization (no automatic rank determination for GCNMF. Use \code{\link{RunNMF}}). Alternatively, specify an initial \code{w} matrix of dimensions \code{m x k}, where \code{m} is the number of rows in the matrix to be factorized.
  9 | #'
 10 | #' @return Returns a Seurat object with the GCNMF model stored in the reductions slot
 11 | #'
 12 | #' @details Use \code{set.seed()} to guarantee reproducibility!
 13 | #' @rdname RunGCNMF
 14 | #' @aliases RunGCNMF.Seurat
 15 | #' @name RunGCNMF.Seurat
 16 | #'
 17 | #' @seealso \code{\link{RunNMF}}
 18 | #'
 19 | #' @export
 20 | #'
 21 | RunGCNMF.Seurat <- function(object,
 22 |                           graph,
 23 |                           k,
 24 |                           split.by = NULL,
 25 |                           assay = NULL,
 26 |                           tol = 1e-5,
 27 |                           L1 = 0.01,
 28 |                           L2 = 0,
 29 |                           verbose = 2,
 30 |                           reduction.name = "gcnmf",
 31 |                           reduction.key = "GCNMF_",
 32 |                           maxit = 100,
 33 |                           threads = 0,
 34 |                           features = NULL,
 35 |                           ...) {
 36 |   if (is.null(assay)) {
 37 |     assay <- names(object@assays)[[1]]
 38 |   }
 39 |   
 40 |   # check if data has been normalized
 41 |   v <- object@assays[[assay]]@data@x
 42 |   if (sum(as.integer(v)) == sum(v)) {
 43 |     object <- PreprocessData(object, assay = assay)
 44 |   }
 45 |   A <- object@assays[[assay]]@data
 46 |   
 47 |   if (!is.null(features)) {
 48 |     if (features[[1]] == "var.features") {
 49 |       A <- A[object@assays[[assay]]@var.features, ]
 50 |     } else if (is.integer(features) || is.character(features)) {
 51 |       # array of indices or rownames
 52 |       A <- A[features, ]
 53 |     } else {
 54 |       stop("'features' vector was invalid.")
 55 |     }
 56 |   }
 57 |   
 58 |   rnames <- rownames(A)
 59 |   cnames <- colnames(A)
 60 |   
 61 |   if (!is.null(split.by)) {
 62 |     split.by <- as.integer(as.numeric(as.factor(object@meta.data[[split.by]]))) - 1
 63 |     if (any(sapply(split.by, is.na))) {
 64 |       stop("'split.by' cannot contain NA values")
 65 |     }
 66 |     A <- weight_by_split(A, split.by, length(unique(split.by)))
 67 |   }
 68 |   At <- Matrix::t(A)
 69 |   seed.use <- abs(.Random.seed[[3]])
 70 |   set.seed(seed.use)
 71 |   if(is.matrix(k)){
 72 |     if(!(nrow(A) %in% dim(k))) stop("dimensions of matrix specified for 'k' are not compatible with number of rows in 'A'")
 73 |   } else {
 74 |     w_init <- matrix(runif(k * nrow(A)), k, nrow(A))
 75 |   }
 76 |   
 77 |   nmf_model <- c_gcnmf(A, At, G, tol, maxit, verbose, L1, L2, threads, w_init)
 78 |   rownames(nmf_model$h) <- colnames(nmf_model$w) <- paste0(reduction.key, 1:nrow(nmf_model$h))
 79 |   rownames(nmf_model$w) <- rnames
 80 |   colnames(nmf_model$h) <- cnames
 81 |   object@reductions[[reduction.name]] <- new("DimReduc",
 82 |                                              cell.embeddings = t(nmf_model$h),
 83 |                                              feature.loadings = nmf_model$w,
 84 |                                              assay.used = assay,
 85 |                                              stdev = nmf_model$d,
 86 |                                              global = FALSE,
 87 |                                              key = reduction.key)
 88 | 
 89 |   object
 90 | }
 91 | 
 92 | #' @rdname RunGCNMF
 93 | #'
 94 | #' @name RunGCNMF
 95 | #'
 96 | #' @export
 97 | #'
 98 | RunGCNMF <- function(object, ...) {
 99 |   UseMethod("RunGCNMF")
100 | }
101 | 
102 | #' @rdname RunGCNMF
103 | #'
104 | #' @name RunGCNMF
105 | #'
106 | #' @export
107 | #'
108 | .S3method("RunGCNMF", "Seurat", RunGCNMF.Seurat)
109 | 


--------------------------------------------------------------------------------
/R/RunGSEA.R:
--------------------------------------------------------------------------------
  1 | #' Run Gene Set Enrichment Analysis on a Reduction
  2 | #'
  3 | #' Run GSEA to identify gene sets that are enriched within NMF factors.
  4 | #'
  5 | #' @param object a Seurat or RcppML::nmf object
  6 | #' @param reduction dimensional reduction to use (if Seurat)
  7 | #' @param species species for which to load gene sets
  8 | #' @param category msigdbr gene set category (i.e. "H", "C5", etc.)
  9 | #' @param min.size minimum number of terms in a gene set
 10 | #' @param max.size maximum number of terms in a gene set
 11 | #' @param dims factors in the reduction to use, default \code{NULL} for all factors
 12 | #' @param verbose print progress to console
 13 | #' @param padj.sig significance cutoff for BH-adjusted p-values (default 0.01)
 14 | #' @param ...   additional params to pass to msigdbr
 15 | #'
 16 | #' @return a Seurat or nmf object, with GSEA information in the misc slot. BH-adj p-values are on a -log10 scale.
 17 | #'
 18 | #' @import fgsea
 19 | #' @import msigdbr
 20 | #'
 21 | #' @export
 22 | #'
 23 | RunGSEA <- function(object, reduction = "nmf", species = "Homo sapiens", category = "C5",
 24 |                     min.size = 10, max.size = 500, dims = NULL,
 25 |                     verbose = TRUE, padj.sig = 0.01, ...) {
 26 | 
 27 |   if (verbose) cat("fetching gene sets\n")
 28 |   gene_sets <- msigdbr(species = species, category = category, ...)
 29 | 
 30 |   if (verbose) cat("filtering pathways\n")
 31 |   pathways <- split(x = gene_sets$gene_symbol, f = gene_sets$gs_name)
 32 |   pathways <- pathways[lapply(pathways, length) > min.size]
 33 | 
 34 |   if (verbose) cat("filtering genes in pathways to those in reduction\n")
 35 |   genes_in_pathways <- unique(unlist(pathways))
 36 | 
 37 |   # work on RcppML nmf objects too: 
 38 |   if (is(object, "Seurat")) {
 39 |     w <- object@reductions[[reduction]]@feature.loadings
 40 |   } else if (is(object, "nmf")) { 
 41 |     w <- object@w
 42 |   }
 43 |   if (!is.null(dims)) w <- w[, dims]
 44 |   
 45 |   if (verbose) cat("filtering genes in reduction to those in pathways\n")
 46 |   w <- w[which(rownames(w) %in% genes_in_pathways), ]
 47 |   pathways <- lapply(pathways, function(x) x[x %in% rownames(w)])
 48 |   v <- lapply(pathways, length)
 49 |   pathways <- pathways[which(v > min.size & v < max.size)]
 50 | 
 51 |   cat("running GSEA on", ncol(w), "factors...\n")
 52 |   pb <- utils::txtProgressBar(min = 0, max = ncol(w), style = 3)
 53 |   results <- list()
 54 |   for (i in 1:ncol(w)) {
 55 |     ranks <- sort(w[, i])
 56 |     results[[i]] <- suppressWarnings(fgseaMultilevel(
 57 |       pathways, ranks,
 58 |       minSize = min.size, maxSize = max.size, scoreType = "pos"
 59 |     ))
 60 |     utils::setTxtProgressBar(pb, i)
 61 |   }
 62 |   close(pb)
 63 | 
 64 |   pval <- do.call(cbind, lapply(results, function(x) x$pval))
 65 |   padj <- do.call(cbind, lapply(results, function(x) x$padj))
 66 |   es <- do.call(cbind, lapply(results, function(x) x$ES))
 67 |   nes <- do.call(cbind, lapply(results, function(x) x$NES))
 68 |   rownames(pval) <- rownames(padj) <- rownames(es) <- rownames(nes) <- results[[1]]$pathway
 69 | 
 70 |   idx <- which(apply(padj, 1, function(x) min(x) < padj.sig))
 71 | 
 72 |   if (!is.null(dims)) {
 73 |     dims <- paste0(reduction, dims)
 74 |   } else if (is(object, "Seurat")) {
 75 |     dims <- paste0(reduction, 1:ncol(object@reductions[[reduction]]))
 76 |   } else if (is(object, "nmf")) {
 77 |     dims <- paste0("nmf", 1:ncol(w))
 78 |   }
 79 |   colnames(pval) <- colnames(padj) <- colnames(es) <- colnames(nes) <- dims
 80 | 
 81 |   # reorder with hclust
 82 |   padj <- -log10(padj)
 83 |   pval <- -log10(pval)
 84 |   row_order <- hclust(dist(padj), method = "ward.D2")$order
 85 |   col_order <- hclust(dist(t(padj)), method = "ward.D2")$order
 86 |   pval <- pval[row_order, col_order]
 87 |   padj <- padj[row_order, col_order]
 88 |   es <- es[row_order, col_order]
 89 |   nes <- nes[row_order, col_order]
 90 | 
 91 |   if (is(object, "Seurat")) {
 92 |     object@reductions[[reduction]]@misc$gsea <- 
 93 |       list("pval" = pval, "padj" = padj, "es" = es, "nes" = nes)
 94 |   } else if (is(object, "nmf")) { 
 95 |     object@misc$gsea <- 
 96 |       list("pval" = pval, "padj" = padj, "es" = es, "nes" = nes)
 97 |   }
 98 | 
 99 |   object
100 | }
101 | 


--------------------------------------------------------------------------------
/R/cellxgene_pipeline.R:
--------------------------------------------------------------------------------
 1 | #' Learn an NMF model from a cellxgene Seurat object
 2 | #'
 3 | #' @description Provide a link to download a cellxgene Seurat object, and this pipeline will return a standardized annotated NMF object at the optimal rank
 4 | #'
 5 | #' @details
 6 | #' This pipeline runs the following steps:
 7 | #' 1. Download a Seurat v4 object from the provided URL
 8 | #' 2. Preprocess the data and run NMF using parameters specified in the \code{...} argument
 9 | #' 3. Annotate the NMF model against existing multi-level factor information
10 | #' 4. Extract the model and annotations and save to an RDS file
11 | #'
12 | #' @param url download url for a Seurat v4 object
13 | #' @param ... arguments to \code{RunNMF}
14 | #' @export
15 | #' @md
16 | #'
17 | cellxgene_pipeline <- function(filename, reps = 1, verbose = 3, L1 = 0.05, ...) {
18 |     cat("reading ", filename, "\n")
19 |     A <- readRDS(filename)
20 |     if ("RNA" %in% names(A@assays)) {
21 |         A@assays$RNA@key <- "RNA_"
22 |         # keep only RNA assay
23 |         A@assays <- list("RNA" = A@assays$RNA)
24 |         cat(" normalizing...\n")
25 |         A <- PreprocessData(A)
26 |         cat(" running NMF...\n")
27 |         t1 <- system.time({
28 |             A <- RunNMF(A, reps = reps, verbose = 3, L1 = L1, ...)
29 |         })[[3]]
30 |         cat(" annotating NMF model...\n")
31 |         A <- AnnotateNMF(A)
32 | 
33 |         model <- list(
34 |             "w" = as(A@reductions$nmf@feature.loadings, "dgCMatrix"),
35 |             "d" = A@reductions$nmf@stdev,
36 |             "h" = as(A@reductions$nmf@cell.embeddings, "dgCMatrix"),
37 |             "misc" = A@reductions$nmf@misc,
38 |             "metadata" = A@meta.data,
39 |             "dataset" = A@misc$title,
40 |             "runtime" = t1
41 |         )
42 | 
43 |         filename <- paste0(gsub("[^a-zA-Z]", "", A@misc$title), ".rds")
44 |         cat(" saving model...\n")
45 |         saveRDS(model, filename)
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/R/checkColumns.R:
--------------------------------------------------------------------------------
 1 | #' verify that columns for auto-annotation are factors with > 1 level
 2 | #'
 3 | #' @param meta.data the meta.data (or a Seurat object if needs be)
 4 | #' @param columns   the columns (optional; if NULL, will check all columns)
 5 | #' @param max.levels maximum number of levels permitted for a factor to be kept
 6 | #' @return          a vector of suitable columns (may be length 0)
 7 | #'
 8 | #' @export
 9 | checkColumns <- function(meta.data, columns = NULL, max.levels = 200) {
10 |   verbose <- !is.null(columns)
11 |   if (is(meta.data, "Seurat")) meta.data <- meta.data@meta.data
12 |   if (is.null(columns)) columns <- colnames(meta.data)
13 |   names(columns) <- columns
14 |   keep <- names(which(sapply(columns, .keepColumn, meta.data = meta.data, max.levels = max.levels)))
15 |   discard <- setdiff(columns, keep)
16 |   if (verbose & length(discard) > 0) {
17 |     message("Some columns are not factors, or have only one level, or have more than max.levels levels.")
18 |     message("Skipping `", paste(discard, collapse = "`, `"), "`.")
19 |   }
20 |   names(keep) <- keep
21 |   return(keep)
22 | }
23 | 
24 | 
25 | # helper fn
26 | .keepColumn <- function(x, meta.data, max.levels) {
27 |   if (!x %in% names(meta.data)) {
28 |     return(FALSE)
29 |   }
30 |   if (!is(meta.data[[x]], "factor")) {
31 |     return(FALSE)
32 |   }
33 |   if (nlevels(meta.data[[x]]) < 2) {
34 |     return(FALSE)
35 |   }
36 |   if (nlevels(meta.data[[x]]) > max.levels) {
37 |     return(FALSE)
38 |   }
39 |   return(TRUE)
40 | }
41 | 


--------------------------------------------------------------------------------
/R/checkDesigns.R:
--------------------------------------------------------------------------------
 1 | #' verify that a list of matrices is in fact a named list of model matrices
 2 | #' 
 3 | #' @param designs     an alleged list of model matrices
 4 | #' 
 5 | #' @return            the list of model matrices, assuming it passes
 6 | #'
 7 | #' @details           this function will squawk and stop if the list is no good
 8 | #'
 9 | #' @export
10 | checkDesigns <- function(designs) {
11 | 
12 |   if (is.null(names(designs)) |
13 |       !all(sapply(designs, function(x) !is.null(attr(x, "assign"))))) {
14 |     stop("`designs` must be a named list of model.matrix outputs.")
15 |   } else { 
16 |     return(designs)
17 |   }
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/R/coercions.R:
--------------------------------------------------------------------------------
 1 | #' @exportMethod coerce
 2 | #' @importClassesFrom RcppML nmf
 3 | setAs("list", "nmf", 
 4 |       function(from) {
 5 | 
 6 |         msg <- NULL
 7 |         required <- c("w", "d", "h")
 8 |         if (!all(required %in% names(from))) {
 9 |           msg <- c(msg, 
10 |             "Cannot find $w, $d, and $h to create nmf object from list.")
11 |         } else { 
12 |           if (ncol(from$w) != nrow(from$h)) {
13 |             msg <- c(msg, 
14 |               "The $w and $h matrices are of unequal rank. Cannot coerce.")
15 |           }
16 |           if (ncol(from$w) != length(from$d)) {
17 |             msg <- c(msg, 
18 |               "The scaling diagonal $d is the wrong length. Cannot coerce.")
19 |           }
20 |         }
21 |       
22 |         if (!is.null(msg)) {
23 |           stop(msg)
24 |         } else { 
25 |           new("nmf",
26 |               w = from$w,
27 |               d = from$d,
28 |               h = from$h,
29 |               misc = from[setdiff(names(from), required)])
30 |         }
31 | 
32 |       })
33 | 
34 | 
35 | #' @exportMethod coerce
36 | #' @importClassesFrom RcppML nmf
37 | if (requireNamespace("SingleCellExperiment", quietly=TRUE)) {
38 |   setAs("nmf", "LinearEmbeddingMatrix", function(from) {
39 |     factorNames <- colnames(from@w)
40 |     sampleNames <- colnames(from@h) 
41 |     lem <- LinearEmbeddingMatrix(sampleFactors=t(from@h), 
42 |                                  featureLoadings=from@w,
43 |                                  factorData=DataFrame(d=from@d, 
44 |                                                       row.names=factorNames),
45 |                                  metadata=from@misc)
46 |     rownames(lem) <- sampleNames
47 |     return(lem)
48 |   })
49 | }
50 | 
51 | 
52 | #' @exportMethod coerce
53 | #' @importClassesFrom RcppML nmf
54 | if (requireNamespace("SingleCellExperiment", quietly=TRUE)) {
55 |   setAs("LinearEmbeddingMatrix", "nmf", function(from) {
56 |     d <- factorData(from)$d
57 |     names(d) <- rownames(factorData(from))
58 |     new("nmf", 
59 |         w = featureLoadings(from), 
60 |         d = d,
61 |         h = t(sampleFactors(from)),
62 |         misc = metadata(from))
63 |   })
64 | }
65 | 


--------------------------------------------------------------------------------
/R/cross_validate_nmf.R:
--------------------------------------------------------------------------------
  1 | #' Determine best rank for NMF using cross-validation
  2 | #'
  3 | #' @description Find the rank that minimizes the mean squared error of test set reconstruction using cross-validation.
  4 | #'
  5 | #' @inheritParams run_nmf
  6 | #' @param ranks a vector of ranks at which to fit a model and compute test set reconstruction error
  7 | #' @param n_replicates number of random test sets
  8 | #' @param test_density fraction of values to include in the test set
  9 | #' @param tol_overfit stopping criterion, maximum increase in test set reconstruction error at any iteration compared to test set reconstruction error at \code{trace_test_mse}
 10 | #' @param trace_test_mse first iteration at which to calculate test set reconstruction error, and the error to compare all later iterations to when determining whether overfitting has occurred.
 11 | #' @return a \code{data.frame} of test set reconstruction error vs. rank of class \code{nmf_cross_validate_data}. Use \code{plot} method to visualize or \code{min} to compute optimal rank.
 12 | #' @rdname cross_validate_nmf
 13 | #' @param ... additional arguments (not implemented)
 14 | #' @export
 15 | #' @importFrom utils txtProgressBar setTxtProgressBar
 16 | #' @importFrom stats runif
 17 | #'
 18 | cross_validate_nmf <- function(A, ranks, n_replicates = 3, tol = 1e-4, maxit = 100, verbose = 1, L1 = 0.01, L2 = 0, threads = 0, test_density = 0.05, tol_overfit = 1e-4, trace_test_mse = 5) {
 19 |   if (L1 >= 1) {
 20 |     stop("L1 penalty must be strictly in the range (0, 1]")
 21 |   }
 22 | 
 23 |   if (test_density > 0.2 | test_density < 0.01) {
 24 |     warning("'test_density' should not be greater than 0.2 or less than 0.01, as a general rule of thumb")
 25 |   }
 26 | 
 27 |   if("list" %in% class(A)){
 28 |     # check that number of rows is identical
 29 |     if(var(sapply(A, nrow)) != 0) 
 30 |       stop("number of rows in all provided 'A' matrices are not identical")
 31 |     if(!all(sapply(A, function(x) class(x) == "dgCMatrix")))
 32 |       stop("if providing a list, you must provide a list of all 'dgCMatrix' objects")
 33 |     if(!is.null(rownames(A[[1]]))){
 34 |       if(!all(sapply(A, function(x) all.equal(rownames(x), rownames(A[[1]]))))) stop("rownames of all dgCMatrix objects in list must be identical")
 35 |     }
 36 |     
 37 |     # generate a distributed transpose
 38 |     if(verbose > 0) cat("generating a distributed transpose of input matrix list\n")
 39 |     block_sizes <- floor(c(seq(1, nrow(A[[1]]), nrow(A[[1]]) /(length(A))), nrow(A[[1]]) + 1))
 40 |     At <- list()
 41 |     if(verbose > 0) pb <- txtProgressBar(min = 0, max = length(A))
 42 |     for(i in 1:length(A)){
 43 |       At[[i]] <- list()
 44 |       for(j in 1:length(A)){
 45 |         At[[i]][[j]] <- t(A[[j]][block_sizes[i]:(block_sizes[i+1] - 1), ])      
 46 |       }
 47 |       At[[i]] <- do.call(rbind, At[[i]])
 48 |       if(verbose > 0) setTxtProgressBar(pb, i)
 49 |     }
 50 |     if(verbose > 0) close(pb)
 51 |     if (verbose > 0) cat("running with sparse optimization\n")
 52 |     w_init <- lapply(1:n_replicates, function(x) matrix(stats::runif(nrow(A[[1]]) * max(ranks)), max(ranks), nrow(A[[1]])))
 53 |     sparse_list <- TRUE
 54 |   } else {
 55 |     if (class(A)[[1]] != "matrix") {
 56 |       if (verbose > 0) cat("running with sparse optimization\n")
 57 |       A <- as(as(as(A, "dMatrix"), "generalMatrix"), "CsparseMatrix")
 58 |       At <- Matrix::t(A)
 59 |       dense_mode <- FALSE
 60 |     } else {
 61 |       if (verbose > 0) cat("running with dense optimization\n")
 62 |       At <- t(A)
 63 |       dense_mode <- TRUE
 64 |     }
 65 |     w_init <- lapply(1:n_replicates, function(x) matrix(stats::runif(nrow(A) * max(ranks)), max(ranks), nrow(A)))
 66 |     sparse_list <- FALSE
 67 |   }
 68 |     
 69 |   df <- expand.grid("k" = ranks, "rep" = 1:n_replicates)
 70 |   df2 <- list()
 71 |   df$test_error <- 0
 72 |   if (verbose == 1) {
 73 |     pb <- utils::txtProgressBar(min = 0, max = nrow(df), style = 3)
 74 |   }
 75 |   for (i in 1:nrow(df)) {
 76 |     rep <- df$rep[[i]]
 77 |     if (verbose > 1) {
 78 |       cat(paste0("k = ", df$k[[i]], ", rep = ", rep, " (", i, "/", nrow(df), "):\n"))
 79 |     }
 80 |     if(!sparse_list){
 81 |       if (dense_mode) {
 82 |         model <- c_ard_nmf_dense(A, At, tol, maxit, verbose > 1, L1, L2, threads, w_init[[rep]][1:df$k[[i]], ], abs(.Random.seed[[3 + rep]]), round(1 / test_density), tol_overfit, trace_test_mse)
 83 |       } else {
 84 |         model <- c_ard_nmf(A, At, tol, maxit, verbose > 1, L1, L2, threads, w_init[[rep]][1:df$k[[i]], ], abs(.Random.seed[[3 + rep]]), round(1 / test_density), tol_overfit, trace_test_mse)
 85 |       }
 86 |     } else {
 87 |       model <- c_ard_nmf_sparse_list(A, At, tol, maxit, verbose > 1, L1, L2, threads, w_init[[rep]][1:df$k[[i]], ], abs(.Random.seed[[3 + rep]]), round(1 / test_density), tol_overfit, trace_test_mse)
 88 |     }
 89 |     df$test_error[[i]] <- model$test_mse[[length(model$test_mse)]]
 90 |     df2[[length(df2) + 1]] <- data.frame("k" = df$k[[i]], "rep" = df$rep[[i]], "test_error" = model$test_mse, "iter" = model$iter, "tol" = model$tol)
 91 |     if (verbose == 1) utils::setTxtProgressBar(pb, i)
 92 |     if (verbose > 1) cat(paste0("test set error: ", sprintf(df$test_error[[i]], fmt = "%#.4e"), "\n\n"))
 93 |     
 94 |     if (model$test_mse[[length(model$test_mse)]] / model$test_mse[[1]] > (1 + tol_overfit)) {
 95 |       if (verbose > 1) cat(paste0("overfitting detected, lower rank recommended\n"))
 96 |     }
 97 |   }
 98 |   if (verbose == 1) close(pb)
 99 | 
100 |   df$rep <- factor(df$rep)
101 |   class(df) <- c("cross_validate_nmf_data", "data.frame")
102 |   df2 <- do.call(rbind, df2)
103 |   class(df2) <- c("cross_validate_nmf_data", "data.frame")
104 |   df2
105 | }
106 | 


--------------------------------------------------------------------------------
/R/getDesigns.R:
--------------------------------------------------------------------------------
 1 | #' Refactored out from AnnotateNMF to ease argument handling
 2 | #'
 3 | #' @param columns   factor columns of meta.data, optional if !is.null(designs)
 4 | #' @param meta.data a data.frame of annotations, optional if !is.null(designs)
 5 | #' @param designs   named list of design matrices (supersedes meta.data/columns)
 6 | #' @param max.levels maximum number of levels permitted for a factor to be kept
 7 | #'
 8 | #' @return a named list of design matrices, if one was not provided
 9 | #' @export
10 | getDesigns <- function(columns = NULL, meta.data = NULL, designs = NULL, max.levels = 200) {
11 | 
12 |   if (is.null(designs)) {
13 |     stopifnot(any(!is.null(c(columns, meta.data))))
14 |     columns <- checkColumns(meta.data = meta.data, 
15 |                             columns = columns, 
16 |                             max.levels = max.levels)
17 |     designs <- lapply(columns, getModelMatrix, meta.data = meta.data)
18 |   }
19 | 
20 |   checkDesigns(designs)
21 | }
22 | 


--------------------------------------------------------------------------------
/R/getModelFit.R:
--------------------------------------------------------------------------------
 1 | #' get linear all-pairs comparisons fits for a design matrix and data matrix
 2 | #' 
 3 | #' Continuing along with the theme of "stupid limma tricks", this function 
 4 | #' fits and shrinks a means model for a factor. The proportion of factors
 5 | #' assumed to have a fold-change > 0 is 1%, and a robust fit is applied. 
 6 | #'
 7 | #' @param design      a model.matrix (or a sparse.model.matrix, perhaps)
 8 | #' @param object      a data.matrix, Seurat DimReduc, or RcppML nmf object
 9 | #' @param center      center the factor matrix for testing? (TRUE) 
10 | #' @param ...         additional arguments, passed to base::scale
11 | #'
12 | #' 
13 | #' @examples 
14 | #' if (FALSE) { 
15 | #'   get_pbmc3k_data() %>% NormalizeData() -> pbmc3k
16 | #'   design <- model.matrix(~ 0 + cell_type, data=pbmc3k@meta.data)
17 | #'   fit <- getModelFit(design, pbmc3k) # toy fit on lognormcounts  
18 | #'   # Subsetting data to non-NA observations to match design matrix.
19 | #'   limma::topTable(fit)
20 | #' }
21 | #' 
22 | #' @export
23 | getModelFit <- function(design, object, center=TRUE, ...) {
24 | 
25 |   dat <- object
26 |   if (is(object, "nmf")) dat <- object@h # RcppML nmf 
27 |   if (is(object, "Seurat")) dat <- object@assays$RNA$data
28 |   if (is(object, "DimReduc")) dat <- t(object@cell.embeddings)
29 |   if (is(object, "SingleCellExperiment")) dat <- logcounts(object)
30 |   # SingleCellExperiment::reducedDim(object, dimname) just returns a data.matrix
31 |   if (ncol(dat) < nrow(design)) dat <- t(dat) # transpose reduced dims if needed
32 | 
33 |   # janky, but should be foolproof 
34 |   if (nrow(design) != ncol(dat)) {
35 |     if (!all(rownames(design) %in% colnames(dat))) {
36 |       message("Rows of the design matrix do not match columns of the object.")
37 |       message("This usually means that there are NAs in the sample metadata.")
38 |       message("Ensure rownames of your design matrix match data column names.")
39 |       message("Alternatively, provide object[, !is.na(object$predictor)]")
40 |       message("so that the dimensions of the data and design matrices match.")
41 |       stop("Cannot proceed as called.")
42 |     } else { 
43 |       message("Subsetting data to non-NA observations to match design matrix.")
44 |       tofit <- dat[, rownames(design)]
45 |     }
46 |   } else {
47 |     tofit <- dat
48 |     if (is.null(rownames(design))) {
49 |       warning("Design matrix has appropriate rank, but no row names. Beware!")
50 |     } else if (!identical(rownames(design), colnames(tofit))) {
51 |       warning("Design matrix row names do not match data observation names!")
52 |       warning("This is usually a VERY BAD THING. You MUST check your data.")
53 |       warning("If this warning message persists, file a bug with a reprex.")
54 |     }
55 |   }
56 | 
57 |   if (center) tofit <- t(scale(t(tofit), ...))
58 |   fit <- eBayes(lmFit(tofit, design), proportion=0.01, robust=TRUE)
59 |   fit$centered <- center
60 |   return(fit)
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/R/getModelMatrix.R:
--------------------------------------------------------------------------------
 1 | #' automatically generate a means model (one-vs-all group associations) 
 2 | #'
 3 | #' A little-known trick in limma is to fit ~ 0 + group for a means model.
 4 | #' This function automates that for a data.frame and a factor column of it.
 5 | #'
 6 | #' @param field       the name of a column in the data.frame, or the column
 7 | #' @param meta.data   a data.frame with one or more factor columns, or NULL
 8 | #' @param sparse      fit a sparse model.matrix? (FALSE) 
 9 | #' @param ova         fit a One-Vs-All model matrix (no referent)? (TRUE)
10 | #' @param ...         any additional params to pass to model.matrix
11 | #'
12 | #' @details
13 | #' If a factor (and no meta.data) is supplied (usually by with(meta.data, ...)),
14 | #' getModelMatrix will attempt to figure out the text to remove from the matrix
15 | #' column names by using deparse() and match.call() on the arguments (voodoo!).
16 | #' In order to fit one-vs-all comparisons, a means model is the default. If you
17 | #' have a referent group (e.g. normal bone marrow vs. a bunch of leukemia cells)
18 | #' or simply don't want a means model, set `ova` (one vs all) to FALSE. 
19 | #' 
20 | #' @return            a model.matrix or sparse.model.matrix (if sparse==TRUE)
21 | #'
22 | #' @examples 
23 | #' 
24 | #' covs <- get_pbmc3k_data()@meta.data
25 | #' design <- getModelMatrix("cell_type", covs)
26 | #' head(design)
27 | #' sparsedesign <- getModelMatrix("cell_type", covs, sparse=TRUE)
28 | #' head(sparsedesign)
29 | #' 
30 | #' if (FALSE) {
31 | #'   # test Seurat and SCE support too
32 | #'   mm1 <- getModelMatrix("cell_type", pbmc3k)
33 | #'   mm2 <- getModelMatrix("cell_type", pbmc)
34 | #'   identical(mm1, mm2)
35 | #'   # [1] TRUE
36 | #'   fit1 <- getModelFit(mm2, pbmc3k)
37 | #'   fit2 <- getModelFit(mm1, pbmc)
38 | #'   identical(fit1, fit2)
39 | #'   # [1] TRUE
40 | #'   limma::topTable(fit1)
41 | #' }
42 | #'
43 | #' @import Matrix
44 | #'
45 | #' @export
46 | getModelMatrix <- function(field, meta.data=NULL, sparse=FALSE, ova=TRUE, ...) {
47 | 
48 |   if (is.null(meta.data)) { 
49 |     if (is.factor(field) & nlevels(field) > 1) {
50 |       fieldname <- as.character(sapply(match.call()[-1], deparse)[1]) # voodoo
51 |       meta.data <- data.frame(field)
52 |       names(meta.data) <- fieldname
53 |       field <- fieldname
54 |     } else { 
55 |       stop("If meta.data is NULL, `field` must be a factor with > 1 levels.")
56 |     }
57 |   } else if (is(meta.data, "Seurat")) {
58 |     meta.data <- meta.data@meta.data
59 |   } else if (is(meta.data, "SingleCellExperiment")) {
60 |     meta.data <- colData(meta.data)
61 |   } else {
62 |     stopifnot(field %in% names(meta.data))
63 |   }
64 | 
65 |   notNA <- which(is.na(meta.data[[field]]))
66 |   if (!sparse) {
67 |     if (ova) {
68 |       mat <- model.matrix(~ 0 + ., data=meta.data[, field, drop=FALSE], ...)
69 |     } else { 
70 |       message("Fitting a model with a referent group. Be sure you want this!")
71 |       mat <- model.matrix(~ ., data=meta.data[, field, drop=FALSE], ...)
72 |     }
73 |   } else { 
74 |     if (ova) { 
75 |       mat <- sparse.model.matrix(~ 0 + ., data=meta.data[, field, drop=FALSE])
76 |     } else {
77 |       message("Fitting a model with a referent group. Be sure you want this!")
78 |       mat <- sparse.model.matrix(~ ., data=meta.data[, field, drop=FALSE], ...)
79 |     }
80 |   }
81 |   colnames(mat) <- gsub(field, "", colnames(mat))
82 |   return(mat)
83 | 
84 | }
85 | 


--------------------------------------------------------------------------------
/R/getModelResults.R:
--------------------------------------------------------------------------------
 1 | #' extract data.frame of lods and pvalues for differential factor representation
 2 | #' 
 3 | #' log-odds of non-null differences for a response by a factor are in fit$lods
 4 | #' (which will usually be a matrix), and one-sided p-values for the moderated t
 5 | #' test are computed from fit$t and fit$df.total using pt(t, df, lower=FALSE),
 6 | #' then adjusted using the step-up procedure of Benjamini & Hochberg.
 7 | #' 
 8 | #' @param fit   an lmFit result from limma, shrunken with eBayes()
 9 | #' @param noneg drop results with negative lods scores? (TRUE) 
10 | #' @param noint drop any results for '(Intercept)'? (TRUE) 
11 | #'
12 | #' @return      a data.frame with columns 'factor', 'group', 'fc', and 'p' 
13 | #' 
14 | #' @details     If an (Intercept) term is found, it will be dropped, and if
15 | #'              negative LODS scores are encountered, they will be dropped,
16 | #'              unless `noneg` and/or `noint` are FALSE.  
17 | #' 
18 | #' @importFrom  reshape2 melt
19 | #' @import      limma
20 | #'
21 | #' @export
22 | getModelResults <- function(fit, noneg=TRUE, noint=TRUE) { 
23 | 
24 |   # fits are centered, so use signed lods for evidence 
25 |   fcl <- with(fit, melt(lods))
26 |   names(fcl)[3] <- "lods"
27 |   fct <- with(fit, melt(t))
28 |   names(fct)[3] <- "t"
29 |   fcp <- merge(fcl, fct)
30 |   names(fcp)[1:2] <- c("factor", "group")
31 |   fcp$df <- fit$df.total[fcp$factor]
32 |   fcp$p_raw <- with(fcp, pt(t, df, lower=FALSE))
33 |   fcp$p <- p.adjust(fcp$p_raw, method="fdr")
34 | 
35 |   # better might be to fit without an intercept term 
36 |   if (noneg) fcp <- subset(fcp, sign(lods) > 0)
37 |   if (noint) fcp <- subset(fcp, group != "(Intercept)")
38 |   if (length(fcp) == 0) message("No associations after filtering.")
39 |   names(fcp) <- sub("^lods$", "fc", names(fcp))
40 |   return(fcp[, c("group", "factor", "fc", "p")])
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/R/get_pbmc3k_data.R:
--------------------------------------------------------------------------------
 1 | #' Load the pbmc3k dataset
 2 | #'
 3 | #' 2,700 peripheral blood mononuclear cells (PBMC) from 10x genomics taken from the "SeuratData" package
 4 | #'
 5 | #' @description
 6 | #' This dataset is adapted directly from the Satija lab "pbmc3k" dataset used in their popular tutorial on guided clustering. It is provided in this package for convenience since "SeuratData" is not available on CRAN.
 7 | #'
 8 | #' For more information, please see their documentation.
 9 | #'
10 | #' @returns Seurat object with \code{$cell_type} info in the \code{meta.data} slot.
11 | #'
12 | #' @export
13 | #'
14 | get_pbmc3k_data <- function() {
15 |   data(pbmc3k)
16 |   pbmc3k
17 |   A <- CreateSeuratObject(counts = new("dgCMatrix", i = pbmc3k$i, p = pbmc3k$p, Dim = pbmc3k$Dim, Dimnames = pbmc3k$Dimnames, x = as.numeric(inverse.rle(pbmc3k$x))))
18 |   A@meta.data$cell_type <- pbmc3k$cell_type
19 |   A
20 | }
21 | 


--------------------------------------------------------------------------------
/R/methods.R:
--------------------------------------------------------------------------------
 1 | #' @exportMethod coerce
 2 | #' @importClassesFrom RcppML nmf
 3 | setAs("list", "nmf", 
 4 |       function(from) {
 5 | 
 6 |         msg <- NULL
 7 |         required <- c("w", "d", "h")
 8 |         if (!all(required %in% names(from))) {
 9 |           msg <- c(msg, 
10 |             "Cannot find $w, $d, and $h to create nmf object from list.")
11 |         } else { 
12 |           if (ncol(from$w) != nrow(from$h)) {
13 |             msg <- c(msg, 
14 |               "The $w and $h matrices are of unequal rank. Cannot coerce.")
15 |           }
16 |           if (ncol(from$w) != length(from$d)) {
17 |             msg <- c(msg, 
18 |               "The scaling diagonal $d is the wrong length. Cannot coerce.")
19 |           }
20 |         }
21 |       
22 |         if (!is.null(msg)) {
23 |           stop(msg)
24 |         } else { 
25 |           new("nmf",
26 |               w = from$w,
27 |               d = from$d,
28 |               h = from$h,
29 |               misc = from[setdiff(names(from), required)])
30 |         }
31 | 
32 |       })
33 | 
34 | 
35 | #' @exportMethod [ 
36 | #' @importClassesFrom RcppML nmf
37 | setMethod("[", "nmf",
38 |           function(x, i, j, ..., drop=TRUE) {
39 |             if (missing(i) & missing(j)) return(x)
40 |             if (missing(i)) i <- seq_along(x@d)
41 |             if (missing(j)) j <- colnames(x@h)
42 |             if ("covs" %in% names(x@misc)) x@misc$covs <- x@misc$covs[j, ]
43 |             new("nmf", w = x@w[, i], d = x@d[i], h = x@h[i, j], misc = x@misc)
44 |           })
45 | 
46 | 
47 | #' @exportMethod $
48 | #' @importClassesFrom RcppML nmf
49 | setMethod("$", "nmf", 
50 |           function(x, name) {
51 |             if ("covs" %in% names(x@misc)) {
52 |               x@misc$covs[[name]]
53 |             } else { 
54 |               NULL
55 |             }
56 |           })
57 | 
58 | 
59 | #' @exportMethod $<-
60 | #' @importClassesFrom RcppML nmf
61 | setReplaceMethod("$", "nmf", 
62 |           function(x, name, value) {
63 |             if (is.null(x@misc$covs)) {
64 |               x@misc$covs <- data.frame(row.names = colnames(x@h))
65 |             }
66 |             x@misc$covs[[name]] <- value
67 |             return(x)
68 |           })
69 | 
70 | 
71 | # make seurat objects behave vaguely like reasonable data structures 
72 | # if SingleCellExperiment has been loaded, since otherwise who knows
73 | if (requireNamespace("Seurat") & requireNamespace("SingleCellExperiment")) {
74 |   require("Seurat")
75 |   require("SingleCellExperiment")
76 |   setMethod("assay", "Seurat", function(x, i, withDimnames = TRUE, ...) x@assays[[i]])
77 |   setMethod("assays", "Seurat", function(x, withDimnames = TRUE, ...) x@assays)
78 |   setMethod("assayNames", "Seurat", function(x, ...) names(x@assays))
79 |   setMethod("metadata", "Seurat", function(x, withDimnames = TRUE, ...) x@assays[[i]])
80 |   setMethod("reducedDim", "Seurat", function(x, i, ...) x@reductions[[i]])
81 |   setMethod("reducedDims", "Seurat", function(x, ...) x@reductions)
82 |   setMethod("reducedDimNames", "Seurat", function(x) names(x@reductions))
83 | }
84 | 
85 | 


--------------------------------------------------------------------------------
/R/pbmc3k.R:
--------------------------------------------------------------------------------
 1 | #' Compressed form of pbmc3k dataset
 2 | #'
 3 | #' @description See \code{\link{get_pbmc3k_data}}
 4 | #'
 5 | #' @md
 6 | #' @docType data
 7 | #' @usage data(pbmc3k)
 8 | #' @format compressed version of the \code{dgCMatrix}, use \code{\link{get_pbmc3k_data}} to use this dataset.
 9 | "pbmc3k"
10 | 


--------------------------------------------------------------------------------
/R/plot.cross_validate_nmf_data.R:
--------------------------------------------------------------------------------
 1 | #' plot the result of cross-validating rank selection in NMF
 2 | #'
 3 | #' @param x the result of \code{cross_validate_nmf} (a data.frame)
 4 | #' @param detail level of detail to plot
 5 | #' @inheritParams ard_nmf
 6 | #' @rdname cross_validate_nmf
 7 | #'
 8 | #' @import ggplot2
 9 | #'
10 | #' @export
11 | #'
12 | plot.cross_validate_nmf_data <- function(x, detail = 2, tol.overfit = 1e-4, ...) {
13 |   if (ncol(x) == 5 & detail == 1) {
14 |     x <- as.data.frame(group_by(x, rep, k) %>% slice(which.max(iter)))
15 |     x$iter <- NULL
16 |   }
17 |   if (ncol(x) < 5) {
18 |     x$rep <- factor(x$rep)
19 |     # simple format (detail_level = 1)
20 |     # normalize each replicate to the same minimum
21 |     for (rep in levels(x$rep)) {
22 |       idx <- which(x$rep == rep)
23 |       x$test_error[idx] <- x$test_error[idx] / min(x$test_error[idx])
24 |     }
25 |     best_rank <- GetBestRank(x, tol.overfit)
26 |     ggplot(x, aes(k, test_error, color = factor(rep))) +
27 |       geom_point() +
28 |       geom_line() +
29 |       theme_classic() +
30 |       labs(x = "factorization rank", y = "relative test set error", color = "replicate", caption = paste0("(best rank is k = ", best_rank, ")")) +
31 |       theme(aspect.ratio = 1, plot.caption = element_text(hjust = 0.5)) +
32 |       geom_vline(xintercept = best_rank, linetype = "dashed", color = "red") +
33 |       scale_y_continuous(trans = "log10")
34 |   } else {
35 |     # detail_level = 2 format
36 |     best_rank <- GetBestRank(x, tol.overfit)
37 |     if (length(unique(x$rep)) == 1) {
38 |       ggplot(x, aes(k, test_error, color = iter, group = iter)) +
39 |         geom_line() +
40 |         scale_color_viridis_c(option = "B") +
41 |         theme_classic() +
42 |         theme(aspect.ratio = 1, plot.caption = element_text(hjust = 0.5)) +
43 |         geom_vline(xintercept = best_rank, linetype = "dashed", color = "red") +
44 |         scale_y_continuous(trans = "log10") +
45 |         labs(x = "factorization rank", y = "test set error", color = "model iteration", caption = paste0("(best rank is k = ", best_rank, ")"))
46 |     } else {
47 |       ggplot(x, aes(k, test_error, color = iter, group = iter)) +
48 |         geom_line() +
49 |         scale_color_viridis_c(option = "B") +
50 |         theme_classic() +
51 |         theme(aspect.ratio = 1, plot.caption = element_text(hjust = 0.5)) +
52 |         geom_vline(xintercept = best_rank, linetype = "dashed", color = "red") +
53 |         scale_y_continuous(trans = "log10") +
54 |         labs(x = "factorization rank", y = "test set error", color = "model iteration", caption = paste0("(best rank is k = ", best_rank, ")")) +
55 |         facet_grid(cols = vars(rep))
56 |     }
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/R/plot.nmf_metadata_summary.R:
--------------------------------------------------------------------------------
 1 | #' @rdname MetadataSummary
 2 | #'
 3 | #' @param x   a data.frame
 4 | #' @param ... not implemented
 5 | #'
 6 | #' @importFrom reshape2 melt
 7 | #'
 8 | #' @export
 9 | #'
10 | plot.nmf_metadata_summary <- function(x, ...) {
11 |   m <- reshape2::melt(as.matrix(x))
12 |   colnames(m) <- c("group", "factor", "frac")
13 |   ggplot(m, aes(x = factor(factor, levels = unique(factor)), y = frac, fill = group)) +
14 |     geom_bar(position = "fill", stat = "identity") +
15 |     theme_classic() +
16 |     theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
17 |     labs(x = "factor", y = "Representation in group") +
18 |     scale_y_continuous(expand = c(0, 0))
19 | }
20 | 
21 | 
22 | #' @rdname MetadataSummary
23 | #'
24 | #' @name MetadataSummary
25 | #'
26 | #' @export
27 | #'
28 | .S3method("plot", "nmf_metadata_summary", plot.nmf_metadata_summary)
29 | 


--------------------------------------------------------------------------------
/R/plotFactorWeights.R:
--------------------------------------------------------------------------------
 1 | #' convenience function to map one or more factors along a genome using igvR
 2 | #'
 3 | #' @param   object    an nmf object or something with a @w weights matrix
 4 | #' @param   gr        a GRanges object with coordinates for the features 
 5 | #' @param   factors   which factors to plot weights for (default: 1, 2, 3)
 6 | #' @param   plot      use igvR to plot the factors? (TRUE, if igvR detected)
 7 | #' 
 8 | #' @return            the GRanges gr, but with factor weights added as mcols
 9 | #'
10 | #' @details
11 | #'  This function presumes a GRanges object will be supplied, which in turn
12 | #'  presumes that the GenomicRanges package is installed from Bioconductor. 
13 | #'  Further, if plot == TRUE, the igvR package is presumed to be installed. 
14 | #'  If either of these presumptions are false, or if factor weights cannot
15 | #'  be mapped to identifiers in the GRanges, this function will fail. 
16 | #' 
17 | #' @export
18 | #'
19 | plotFactorWeights <- function(object, gr, factors=1:3, plot=FALSE) {
20 | 
21 |   requireNamespace("GenomicRanges")
22 |   stopifnot(is(gr, "GRanges"))
23 |   stopifnot(all(rownames(object@w) %in% names(gr)))
24 |   gr <- gr[rownames(object@w)]
25 |   
26 |   for (fact in factors) {
27 |     if (is.numeric(fact) | is.integer(fact)) fact <- colnames(object@w)[fact]
28 |     mcols(gr)[, fact] <- object@w[, fact]
29 |   }
30 | 
31 |   if (plot) {
32 |     requireNamespace("igvR")
33 |     message("igvR support is in process")
34 |   }
35 | 
36 |   return(gr)
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/R/rasterize_rowwise.R:
--------------------------------------------------------------------------------
 1 | # Row-wise rasterization of a sparse matrix
 2 | #' 
 3 | #' Bin together values from every block of \code{n} rows and calculate mean value, with a sparse \code{dgCMatrix} as input and a dense \code{matrix} as output. This technique is useful in some genomics applications.
 4 | #'
 5 | #' @param A matrix to be rasterized
 6 | #' @param n row-wise binning size
 7 | #' @param threads number of threads to use (0 to let OpenMP decide how many are available and use them all)
 8 | #' @export
 9 | #' 
10 | RasterizeRowwise <- function(A, n = 10, threads = 0){
11 |   if(class(A)[[1]] == "dgCMatrix"){
12 |     B <- rowwise_compress_sparse(A, n, threads)
13 |   } else {
14 |     A <- as.matrix(A)
15 |     B <- rowwise_compress_dense(A, n, threads)
16 |   }
17 |   rownames(B) <- rownames(A)[seq(1, floor(nrow(A) / n) * n, n)]
18 |   colnames(B) <- colnames(A)
19 |   B
20 | }


--------------------------------------------------------------------------------
/R/run_nmf.R:
--------------------------------------------------------------------------------
  1 | #' @title Run Non-negative Matrix Factorization
  2 | #'
  3 | #' @description Run NMF on a sparse matrix with automatic rank determination by cross-validation
  4 | #'
  5 | #' @param A sparse matrix giving normalized counts for genes x cells (rows x columns), or a list of sparse matrices with equal number of rows and identical rownames
  6 | #' @param rank factorization rank
  7 | #' @param tol tolerance of the fit (1e-5 for publication quality, 1e-4 for cross-validation)
  8 | #' @param maxit maximum number of iterations
  9 | #' @param verbose verbosity level
 10 | #' @param L1 L1/LASSO penalty to increase sparsity of model
 11 | #' @param L2 L2/Ridge penalty to increase angles between factors
 12 | #' @param threads number of threads for parallelization across CPUs, 0 = use all available threads
 13 | #' @param compression_level either 2 or 3, for VCSC or IVCSC, respectively. For development purposes.
 14 | #' @rdname run_nmf
 15 | #' @importFrom stats runif
 16 | #' @export
 17 | #'
 18 | run_nmf <- function(A, rank, tol = 1e-4, maxit = 100, verbose = TRUE, L1 = 0.01, L2 = 0, threads = 0, compression_level = 3) {
 19 |   use_vcsc <- compression_level == 2
 20 | 
 21 |   if ("list" %in% class(A)) {
 22 |     # check that number of rows is identical
 23 |     if (var(sapply(A, nrow)) != 0) {
 24 |       stop("number of rows in all provided 'A' matrices are not identical")
 25 |     }
 26 |     if (!all(sapply(A, function(x) class(x) == "dgCMatrix"))) {
 27 |       stop("if providing a list, you must provide a list of all 'dgCMatrix' objects")
 28 |     }
 29 |     if (!is.null(rownames(A[[1]]))) {
 30 |       if (!all(sapply(A, function(x) all.equal(rownames(x), rownames(A[[1]]))))) stop("rownames of all dgCMatrix objects in list must be identical")
 31 |     }
 32 |     w_init <- matrix(stats::runif(nrow(A[[1]]) * rank), rank, nrow(A[[1]]))
 33 |     model <- run_nmf_on_sparsematrix_list(A, tol, maxit, verbose, threads, w_init, use_vcsc)
 34 |     rn <- rownames(A[[1]])
 35 |     cn <- do.call(c, lapply(A, colnames))
 36 |   } else {
 37 |     if (class(A)[[1]] != "matrix") {
 38 |       if (verbose > 0) cat("running with sparse optimization\n")
 39 |       A <- as(as(as(A, "dMatrix"), "generalMatrix"), "CsparseMatrix")
 40 |       At <- Matrix::t(A)
 41 |       dense_mode <- FALSE
 42 |     } else {
 43 |       if (verbose > 0) cat("running with dense optimization\n")
 44 |       At <- t(A)
 45 |       dense_mode <- TRUE
 46 |     }
 47 |     
 48 |     if(length(L1) != 2){
 49 |       L1 <- c(L1[[1]], L1[[1]])
 50 |     }
 51 |     if(length(L2) != 2){
 52 |       L2 <- c(L2[[1]], L2[[1]])
 53 |     }
 54 | 
 55 |     w_init <- matrix(stats::runif(nrow(A) * rank), rank, nrow(A))
 56 |     if (dense_mode) {
 57 |       model <- c_nmf_dense(A, At, tol, maxit, verbose, L1[[1]], L1[[2]], L2[[1]], L2[[2]], threads, w_init)
 58 |     } else {
 59 |       model <- c_nmf(A, At, tol, maxit, verbose, L1[[1]], L1[[2]], L2[[1]], L2[[2]], threads, w_init)
 60 |     }
 61 |     rn <- rownames(A)
 62 |     cn <- colnames(A)
 63 |   }
 64 | 
 65 |   sort_index <- order(model$d, decreasing = TRUE)
 66 |   model$d <- model$d[sort_index]
 67 |   model$w <- t(model$w)[, sort_index]
 68 |   model$h <- model$h[sort_index, ]
 69 |   if (rank == 1) {
 70 |     model$w <- matrix(model$w, ncol=1)
 71 |     model$h <- matrix(model$h, nrow=1)
 72 |   }
 73 |   rownames(model$w) <- rn
 74 |   colnames(model$h) <- cn
 75 |   colnames(model$w) <- rownames(model$h) <- paste0("NMF_", 1:ncol(model$w))
 76 |   model
 77 | }
 78 | 
 79 | distributed_transpose <- function(A){
 80 |   library(Matrix)
 81 |   setwd("/active/debruinz_project/debruinz/CellCensusNMF")
 82 |   A <- lapply(paste0("../../CellCensus/R/chunk", 1:100, "_counts.rds"), readRDS)
 83 |   block_sizes <- floor(c(seq(1, nrow(A[[1]]), nrow(A[[1]]) / (length(A))), nrow(A[[1]]) + 1))
 84 |   for (i in 1:length(block_sizes)) {
 85 |     cat("CHUNK", i, "/100\n")
 86 |     At <- list()
 87 |     pb <- txtProgressBar(min = 0, max = length(A), style = 3)
 88 |     for (j in 1:length(A)) {
 89 |       At[[j]] <- t(A[[j]][block_sizes[i]:(block_sizes[i + 1] - 1), ])
 90 |       setTxtProgressBar(pb, j)
 91 |     }
 92 |     cat("   rbinding\n")
 93 |     At <- do.call(rbind, At)
 94 |     cat("   saving\n")
 95 |     saveRDS(At, paste0("chunk", i, "_transpose_counts.rds"))
 96 |   }
 97 | }
 98 | 
 99 | split_into_chunks <- function(A, n_chunks){
100 |   breakpoints <- seq(1, ncol(A), floor(ncol(A) / n_chunks))
101 |   breakpoints[length(breakpoints) + 1] <- ncol(A)
102 |   result <- list()
103 |   for(i in 1:n_chunks){
104 |     result[[i]] <- A[,breakpoints[i]:breakpoints[i + 1]]
105 |   }
106 |   result
107 | }
108 | 


--------------------------------------------------------------------------------
/R/singlet.R:
--------------------------------------------------------------------------------
 1 | #' Singlet
 2 | #' 
 3 | #' Fast single-cell analysis with non-negative dimensional reductions
 4 | #' 
 5 | #' @details 
 6 | #' There are reasons to not use PCA. 
 7 | #' * PCA fits to missing signal, 
 8 | #' * considers only highly variable features, 
 9 | #' * is almost useless without further graph-based analysis, 
10 | #' * requires centering and scaling of your data,
11 | #' * and is robust only within experiments.
12 | #' 
13 | #' Instead, you should use Non-negative Matrix Factorization (NMF).
14 | #' * NMF imputes missing signal,
15 | #' * learns models using all features,
16 | #' * does everything PCA does and provides useful information itself,
17 | #' * requires only variance stabilization,
18 | #' * and is robust across experiments.
19 | #' 
20 | #' Singlet is all about extremely fast NMF for single-cell dimensional reduction and integration.
21 | #' 
22 | #' See the vignettes to get started.
23 | #' 
24 | #' @useDynLib singlet, .registration = TRUE
25 | #'
26 | #' @name singlet
27 | #'
28 | #' @author Zach DeBruine
29 | #'
30 | #' @aliases singlet-package
31 | #'
32 | #' @import ggplot2
33 | #' @import Seurat
34 | #' @import dplyr
35 | #' @import Matrix
36 | #' @import msigdbr 
37 | #' @import fgsea
38 | #'
39 | #' @importFrom methods as new is
40 | #' @importFrom stats dist hclust model.matrix p.adjust
41 | #' @importFrom utils data
42 | #'
43 | #' @md
44 | #'
45 | "_PACKAGE"
46 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # singlet v.0.0.99
 2 | 
 3 | See the [pkgdown website](https://zdebruine.github.io/singlet/)!
 4 | 
 5 | Singlet is in active development right now. Do not expect stable functionality yet. Coming soon!
 6 | 
 7 | Singlet brings fast Non-negative Matrix Factorization (NMF) with automatic rank determination to the Seurat package for single-cell analysis.
 8 | 
 9 | ## Install
10 | 
11 | First install the development version of `RcppML`, note that the CRAN RcppML version will not work:
12 | 
13 | ```{R}
14 | devtools::install_github("zdebruine/RcppML")
15 | ```
16 | 
17 | Then install required dependencies, including `limma` and `fgsea`:
18 | 
19 | ```{R}
20 | BiocManager::install("fgsea")
21 | BiocManager::install("limma")
22 | ```
23 | 
24 | Now install `singlet`:
25 | 
26 | ```{R}
27 | devtools::install_github("zdebruine/singlet")
28 | ```
29 | 
30 | ## Introductory Vignette
31 | 
32 | [Guided clustering tutorial](https://zdebruine.github.io/singlet/articles/Guided_Clustering_with_NMF.html)
33 | 
34 | ## Dimension Reduction with NMF
35 | 
36 | Analyze your single-cell assay with NMF:
37 | 
38 | ```{R}
39 | library(singlet)
40 | library(Seurat)
41 | library(dplyr)
42 | library(cowplot)
43 | set.seed(123) # for reproducible NMF models
44 | get_pbmc3k_data() %>% NormalizeData %>% RunNMF -> pbmc3k
45 | pbmc3k <- RunUMAP(pbmc3k, reduction = "nmf", dims = 1:ncol(pbmc3k@reductions$nmf))
46 | 
47 | plot_grid(
48 |      RankPlot(pbmc3k) + NoLegend(), 
49 |      DimPlot(pbmc3k) + NoLegend(), 
50 |      ncol = 2)
51 | ```
52 | 
53 | NMF can do almost anything that PCA can do, but also imputes missing signal, always has an optimal rank (for variance-stabilized data), uses all the information in your assay (incl. "non-variable" genes), is robust across experiments, learns signatures of transcriptional activity, and is colinear and non-negative (interpretable) rather than orthogonal and signed (not interpretable)
54 | 
55 | Singlet internally provides the **fastest implementation of NMF**. Cross-validation can take a few minutes for datasets with a few ten thousand cells, but is extremely scalable and runs excellently on HPC nodes and average laptops alike.
56 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: ~
2 | template:
3 |   bootstrap: 5
4 | 
5 | 


--------------------------------------------------------------------------------
/data/pbmc3k.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/data/pbmc3k.RData


--------------------------------------------------------------------------------
/docs/404.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en">
 3 | <head>
 4 | <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 5 | <meta charset="utf-8">
 6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
 7 | <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
 8 | <title>Page not found (404) • singlet</title>
 9 | <script src="deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
10 | <link href="deps/bootstrap-5.1.3/bootstrap.min.css" rel="stylesheet">
11 | <script src="deps/bootstrap-5.1.3/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
12 | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
13 | <!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="pkgdown.js"></script><meta property="og:title" content="Page not found (404)">
14 | <!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
15 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
16 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
17 | <![endif]-->
18 | </head>
19 | <body>
20 |     <a href="#main" class="visually-hidden-focusable">Skip to contents</a>
21 |     
22 | 
23 |     <nav class="navbar fixed-top navbar-light navbar-expand-lg bg-light"><div class="container">
24 |     
25 |     <a class="navbar-brand me-2" href="index.html">singlet</a>
26 | 
27 |     <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.99</small>
28 | 
29 |     
30 |     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
31 |       <span class="navbar-toggler-icon"></span>
32 |     </button>
33 | 
34 |     <div id="navbar" class="collapse navbar-collapse ms-3">
35 |       <ul class="navbar-nav me-auto">
36 | <li class="nav-item">
37 |   <a class="nav-link" href="reference/index.html">Reference</a>
38 | </li>
39 | <li class="nav-item dropdown">
40 |   <a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown-articles">Articles</a>
41 |   <div class="dropdown-menu" aria-labelledby="dropdown-articles">
42 |     <a class="dropdown-item" href="articles/Batch_Integration_with_Linked_NMF.html">Batch Integration with Linked NMF</a>
43 |     <a class="dropdown-item" href="articles/Guided_Clustering_with_NMF.html">Guided Clustering with NMF</a>
44 |   </div>
45 | </li>
46 |       </ul>
47 | <form class="form-inline my-2 my-lg-0" role="search">
48 |         <input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="search.json" id="search-input" placeholder="Search for" autocomplete="off">
49 | </form>
50 | 
51 |       <ul class="navbar-nav">
52 | <li class="nav-item">
53 |   <a class="external-link nav-link" href="https://github.com/zdebruine/singlet/" aria-label="github">
54 |     <span class="fab fa fab fa-github fa-lg"></span>
55 |      
56 |   </a>
57 | </li>
58 |       </ul>
59 | </div>
60 | 
61 |     
62 |   </div>
63 | </nav><div class="container template-title-body">
64 | <div class="row">
65 |   <main id="main" class="col-md-9"><div class="page-header">
66 |       <img src="" class="logo" alt=""><h1>Page not found (404)</h1>
67 |       
68 |     </div>
69 | 
70 | Content not found. Please use links in the navbar.
71 | 
72 |   </main>
73 | </div>
74 | 
75 | 
76 |     <footer><div class="pkgdown-footer-left">
77 |   <p></p>
78 | <p>Developed by Zach DeBruine.</p>
79 | </div>
80 | 
81 | <div class="pkgdown-footer-right">
82 |   <p></p>
83 | <p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
84 | </div>
85 | 
86 |     </footer>
87 | </div>
88 | 
89 |   
90 | 
91 |   
92 | 
93 |   </body>
94 | </html>
95 | 


--------------------------------------------------------------------------------
/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/plot-lnmf-metadata-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/plot-lnmf-metadata-1.png


--------------------------------------------------------------------------------
/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/plot-metadata-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/plot-metadata-1.png


--------------------------------------------------------------------------------
/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/plot-umap-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/plot-umap-1.png


--------------------------------------------------------------------------------
/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/run-nmf-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/run-nmf-1.png


--------------------------------------------------------------------------------
/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Batch_Integration_with_Linked_NMF_files/figure-html/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/docs/articles/Guided_Clustering_with_NMF_files/figure-html/dim-plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Guided_Clustering_with_NMF_files/figure-html/dim-plot-1.png


--------------------------------------------------------------------------------
/docs/articles/Guided_Clustering_with_NMF_files/figure-html/feature-plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Guided_Clustering_with_NMF_files/figure-html/feature-plot-1.png


--------------------------------------------------------------------------------
/docs/articles/Guided_Clustering_with_NMF_files/figure-html/gsea-heatmap-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Guided_Clustering_with_NMF_files/figure-html/gsea-heatmap-1.png


--------------------------------------------------------------------------------
/docs/articles/Guided_Clustering_with_NMF_files/figure-html/map-cluster-ids-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Guided_Clustering_with_NMF_files/figure-html/map-cluster-ids-1.png


--------------------------------------------------------------------------------
/docs/articles/Guided_Clustering_with_NMF_files/figure-html/plot-metadata-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Guided_Clustering_with_NMF_files/figure-html/plot-metadata-1.png


--------------------------------------------------------------------------------
/docs/articles/Guided_Clustering_with_NMF_files/figure-html/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Guided_Clustering_with_NMF_files/figure-html/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/docs/articles/Guided_Clustering_with_NMF_files/figure-html/viz-dim-loadings-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdebruine/singlet/ef4a374f27681477b95d856b74bd3cbbb379308d/docs/articles/Guided_Clustering_with_NMF_files/figure-html/viz-dim-loadings-1.png


--------------------------------------------------------------------------------
/docs/articles/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><title>Articles • singlet</title><script src="../deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><link href="../deps/bootstrap-5.1.3/bootstrap.min.css" rel="stylesheet"><script src="../deps/bootstrap-5.1.3/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><meta property="og:title" content="Articles"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 5 | <![endif]--></head><body>
 6 |     <a href="#main" class="visually-hidden-focusable">Skip to contents</a>
 7 |     
 8 | 
 9 |     <nav class="navbar fixed-top navbar-light navbar-expand-lg bg-light"><div class="container">
10 |     
11 |     <a class="navbar-brand me-2" href="../index.html">singlet</a>
12 | 
13 |     <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.99</small>
14 | 
15 |     
16 |     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
17 |       <span class="navbar-toggler-icon"></span>
18 |     </button>
19 | 
20 |     <div id="navbar" class="collapse navbar-collapse ms-3">
21 |       <ul class="navbar-nav me-auto"><li class="nav-item">
22 |   <a class="nav-link" href="../reference/index.html">Reference</a>
23 | </li>
24 | <li class="nav-item dropdown">
25 |   <a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown-articles">Articles</a>
26 |   <div class="dropdown-menu" aria-labelledby="dropdown-articles">
27 |     <a class="dropdown-item" href="../articles/Batch_Integration_with_Linked_NMF.html">Batch Integration with Linked NMF</a>
28 |     <a class="dropdown-item" href="../articles/Guided_Clustering_with_NMF.html">Guided Clustering with NMF</a>
29 |   </div>
30 | </li>
31 |       </ul><form class="form-inline my-2 my-lg-0" role="search">
32 |         <input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="../search.json" id="search-input" placeholder="Search for" autocomplete="off"></form>
33 | 
34 |       <ul class="navbar-nav"><li class="nav-item">
35 |   <a class="external-link nav-link" href="https://github.com/zdebruine/singlet/" aria-label="github">
36 |     <span class="fab fa fab fa-github fa-lg"></span>
37 |      
38 |   </a>
39 | </li>
40 |       </ul></div>
41 | 
42 |     
43 |   </div>
44 | </nav><div class="container template-article-index">
45 | <div class="row">
46 |   <main id="main" class="col-md-9"><div class="page-header">
47 |       <img src="" class="logo" alt=""><h1>Articles</h1>
48 |     </div>
49 | 
50 |     <div class="section ">
51 |       <h3>All vignettes</h3>
52 |       <p class="section-desc"></p>
53 | 
54 |       <dl><dt><a href="Batch_Integration_with_Linked_NMF.html">Batch Integration with Linked NMF</a></dt>
55 |         <dd>
56 |         </dd><dt><a href="Guided_Clustering_with_NMF.html">Guided Clustering with NMF</a></dt>
57 |         <dd>
58 |       </dd></dl></div>
59 |   </main></div>
60 | 
61 | 
62 |     <footer><div class="pkgdown-footer-left">
63 |   <p></p><p>Developed by Zach DeBruine.</p>
64 | </div>
65 | 
66 | <div class="pkgdown-footer-right">
67 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
68 | </div>
69 | 
70 |     </footer></div>
71 | 
72 |   
73 | 
74 |   
75 | 
76 |   </body></html>
77 | 
78 | 


--------------------------------------------------------------------------------
/docs/authors.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><title>Authors and Citation • singlet</title><script src="deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><link href="deps/bootstrap-5.1.3/bootstrap.min.css" rel="stylesheet"><script src="deps/bootstrap-5.1.3/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="pkgdown.js"></script><meta property="og:title" content="Authors and Citation"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 5 | <![endif]--></head><body>
 6 |     <a href="#main" class="visually-hidden-focusable">Skip to contents</a>
 7 |     
 8 | 
 9 |     <nav class="navbar fixed-top navbar-light navbar-expand-lg bg-light"><div class="container">
10 |     
11 |     <a class="navbar-brand me-2" href="index.html">singlet</a>
12 | 
13 |     <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.0.99</small>
14 | 
15 |     
16 |     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
17 |       <span class="navbar-toggler-icon"></span>
18 |     </button>
19 | 
20 |     <div id="navbar" class="collapse navbar-collapse ms-3">
21 |       <ul class="navbar-nav me-auto"><li class="nav-item">
22 |   <a class="nav-link" href="reference/index.html">Reference</a>
23 | </li>
24 | <li class="nav-item dropdown">
25 |   <a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown-articles">Articles</a>
26 |   <div class="dropdown-menu" aria-labelledby="dropdown-articles">
27 |     <a class="dropdown-item" href="articles/Batch_Integration_with_Linked_NMF.html">Batch Integration with Linked NMF</a>
28 |     <a class="dropdown-item" href="articles/Guided_Clustering_with_NMF.html">Guided Clustering with NMF</a>
29 |   </div>
30 | </li>
31 |       </ul><form class="form-inline my-2 my-lg-0" role="search">
32 |         <input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="search.json" id="search-input" placeholder="Search for" autocomplete="off"></form>
33 | 
34 |       <ul class="navbar-nav"><li class="nav-item">
35 |   <a class="external-link nav-link" href="https://github.com/zdebruine/singlet/" aria-label="github">
36 |     <span class="fab fa fab fa-github fa-lg"></span>
37 |      
38 |   </a>
39 | </li>
40 |       </ul></div>
41 | 
42 |     
43 |   </div>
44 | </nav><div class="container template-citation-authors">
45 | <div class="row">
46 |   <main id="main" class="col-md-9"><div class="page-header">
47 |       <img src="" class="logo" alt=""><h1>Authors and Citation</h1>
48 |     </div>
49 | 
50 |     <div class="section level2 citation">
51 |       <h2>Authors</h2>
52 |       
53 |       <ul class="list-unstyled"><li>
54 |           <p><strong>Zach DeBruine</strong>. Author, maintainer. <a href="https://orcid.org/0000-0003-2234-4827" target="orcid.widget" aria-label="ORCID" class="external-link"><span class="fab fa-orcid orcid" aria-hidden="true"></span></a>
55 |           </p>
56 |         </li>
57 |       </ul></div>
58 | 
59 |     <div class="section level2 citation">
60 |       <h2 id="citation">Citation</h2>
61 |       <p><small class="dont-index">Source: <a href="https://github.com/zdebruine/singlet/blob/HEAD/DESCRIPTION" class="external-link"><code>DESCRIPTION</code></a></small></p>
62 | 
63 |       <p>DeBruine Z (2022).
64 | <em>singlet: Non-negative Matrix Factorization for single-cell analysis</em>.
65 | R package version 0.0.99, <a href="https://github.com/zdebruine/singlet" class="external-link">https://github.com/zdebruine/singlet</a>. 
66 | </p>
67 |       <pre>@Manual{,
68 |   title = {singlet: Non-negative Matrix Factorization for single-cell analysis},
69 |   author = {Zach DeBruine},
70 |   year = {2022},
71 |   note = {R package version 0.0.99},
72 |   url = {https://github.com/zdebruine/singlet},
73 | }</pre>
74 |     </div>
75 |   </main><aside class="col-md-3"><nav id="toc"><h2>On this page</h2>
76 |     </nav></aside></div>
77 | 
78 | 
79 |     <footer><div class="pkgdown-footer-left">
80 |   <p></p><p>Developed by Zach DeBruine.</p>
81 | </div>
82 | 
83 | <div class="pkgdown-footer-right">
84 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
85 | </div>
86 | 
87 |     </footer></div>
88 | 
89 |   
90 | 
91 |   
92 | 
93 |   </body></html>
94 | 
95 | 


--------------------------------------------------------------------------------
/docs/deps/data-deps.txt:
--------------------------------------------------------------------------------
1 | <script src="deps/jquery-3.6.0/jquery-3.6.0.min.js"></script>
2 | <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" />
3 | <link href="deps/bootstrap-5.1.3/bootstrap.min.css" rel="stylesheet" />
4 | <script src="deps/bootstrap-5.1.3/bootstrap.bundle.min.js"></script>
5 | 


--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#75AADB;}
 7 | </style>
 8 | <path class="st0" d="M4,11.3h1.3v1.3H4c-2,0-4-2.3-4-4.7s2.1-4.7,4-4.7h5.3c1.9,0,4,2.3,4,4.7c0,1.9-1.2,3.6-2.7,4.3v-1.5
 9 | 	C11.4,10.2,12,9.1,12,8c0-1.7-1.4-3.3-2.7-3.3H4C2.7,4.7,1.3,6.3,1.3,8S2.7,11.3,4,11.3z M16,7.3h-1.3v1.3H16c1.3,0,2.7,1.6,2.7,3.3
10 | 	s-1.4,3.3-2.7,3.3h-5.3C9.4,15.3,8,13.7,8,12c0-1.1,0.6-2.2,1.3-2.8V7.7C7.9,8.4,6.7,10.1,6.7,12c0,2.4,2.1,4.7,4,4.7H16
11 | 	c1.9,0,4-2.3,4-4.7S18,7.3,16,7.3z"/>
12 | </svg>
13 | 


--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
  1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
  2 | (function($) {
  3 |   $(function() {
  4 | 
  5 |     $('nav.navbar').headroom();
  6 | 
  7 |     Toc.init({
  8 |       $nav: $("#toc"),
  9 |       $scope: $("main h2, main h3, main h4, main h5, main h6")
 10 |     });
 11 | 
 12 |     if ($('#toc').length) {
 13 |       $('body').scrollspy({
 14 |         target: '#toc',
 15 |         offset: $("nav.navbar").outerHeight() + 1
 16 |       });
 17 |     }
 18 | 
 19 |     // Activate popovers
 20 |     $('[data-bs-toggle="popover"]').popover({
 21 |       container: 'body',
 22 |       html: true,
 23 |       trigger: 'focus',
 24 |       placement: "top",
 25 |       sanitize: false,
 26 |     });
 27 | 
 28 |     $('[data-bs-toggle="tooltip"]').tooltip();
 29 | 
 30 |   /* Clipboard --------------------------*/
 31 | 
 32 |   function changeTooltipMessage(element, msg) {
 33 |     var tooltipOriginalTitle=element.getAttribute('data-original-title');
 34 |     element.setAttribute('data-original-title', msg);
 35 |     $(element).tooltip('show');
 36 |     element.setAttribute('data-original-title', tooltipOriginalTitle);
 37 |   }
 38 | 
 39 |   if(ClipboardJS.isSupported()) {
 40 |     $(document).ready(function() {
 41 |       var copyButton = "<button type='button' class='btn btn-primary btn-copy-ex' title='Copy to clipboard' aria-label='Copy to clipboard' data-toggle='tooltip' data-placement='left' data-trigger='hover' data-clipboard-copy><i class='fa fa-copy'></i></button>";
 42 | 
 43 |       $("div.sourceCode").addClass("hasCopyButton");
 44 | 
 45 |       // Insert copy buttons:
 46 |       $(copyButton).prependTo(".hasCopyButton");
 47 | 
 48 |       // Initialize tooltips:
 49 |       $('.btn-copy-ex').tooltip({container: 'body'});
 50 | 
 51 |       // Initialize clipboard:
 52 |       var clipboard = new ClipboardJS('[data-clipboard-copy]', {
 53 |         text: function(trigger) {
 54 |           return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, "");
 55 |         }
 56 |       });
 57 | 
 58 |       clipboard.on('success', function(e) {
 59 |         changeTooltipMessage(e.trigger, 'Copied!');
 60 |         e.clearSelection();
 61 |       });
 62 | 
 63 |       clipboard.on('error', function() {
 64 |         changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
 65 |       });
 66 | 
 67 |     });
 68 |   }
 69 | 
 70 |     /* Search marking --------------------------*/
 71 |     var url = new URL(window.location.href);
 72 |     var toMark = url.searchParams.get("q");
 73 |     var mark = new Mark("main#main");
 74 |     if (toMark) {
 75 |       mark.mark(toMark, {
 76 |         accuracy: {
 77 |           value: "complementary",
 78 |           limiters: [",", ".", ":", "/"],
 79 |         }
 80 |       });
 81 |     }
 82 | 
 83 |   /* Search --------------------------*/
 84 |   /* Adapted from https://github.com/rstudio/bookdown/blob/2d692ba4b61f1e466c92e78fd712b0ab08c11d31/inst/resources/bs4_book/bs4_book.js#L25 */
 85 |     // Initialise search index on focus
 86 |   var fuse;
 87 |   $("#search-input").focus(async function(e) {
 88 |     if (fuse) {
 89 |       return;
 90 |     }
 91 | 
 92 |     $(e.target).addClass("loading");
 93 |     var response = await fetch($("#search-input").data("search-index"));
 94 |     var data = await response.json();
 95 | 
 96 |     var options = {
 97 |       keys: ["what", "text", "code"],
 98 |       ignoreLocation: true,
 99 |       threshold: 0.1,
100 |       includeMatches: true,
101 |       includeScore: true,
102 |     };
103 |     fuse = new Fuse(data, options);
104 | 
105 |     $(e.target).removeClass("loading");
106 |   });
107 | 
108 |   // Use algolia autocomplete
109 |   var options = {
110 |     autoselect: true,
111 |     debug: true,
112 |     hint: false,
113 |     minLength: 2,
114 |   };
115 |   var q;
116 | async function searchFuse(query, callback) {
117 |   await fuse;
118 | 
119 |   var items;
120 |   if (!fuse) {
121 |     items = [];
122 |   } else {
123 |     q = query;
124 |     var results = fuse.search(query, { limit: 20 });
125 |     items = results
126 |       .filter((x) => x.score <= 0.75)
127 |       .map((x) => x.item);
128 |     if (items.length === 0) {
129 |       items = [{dir:"Sorry 😿",previous_headings:"",title:"No results found.",what:"No results found.",path:window.location.href}];
130 |     }
131 |   }
132 |   callback(items);
133 | }
134 |   $("#search-input").autocomplete(options, [
135 |     {
136 |       name: "content",
137 |       source: searchFuse,
138 |       templates: {
139 |         suggestion: (s) => {
140 |           if (s.title == s.what) {
141 |             return `${s.dir} >	<div class="search-details"> ${s.title}</div>`;
142 |           } else if (s.previous_headings == "") {
143 |             return `${s.dir} >	<div class="search-details"> ${s.title}</div> > ${s.what}`;
144 |           } else {
145 |             return `${s.dir} >	<div class="search-details"> ${s.title}</div> > ${s.previous_headings} > ${s.what}`;
146 |           }
147 |         },
148 |       },
149 |     },
150 |   ]).on('autocomplete:selected', function(event, s) {
151 |     window.location.href = s.path + "?q=" + q + "#" + s.id;
152 |   });
153 |   });
154 | })(window.jQuery || window.$)
155 | 
156 | 
157 | 


--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
1 | pandoc: 2.17.1.1
2 | pkgdown: 2.0.6
3 | pkgdown_sha: ~
4 | articles:
5 |   Batch_Integration_with_Linked_NMF: Batch_Integration_with_Linked_NMF.html
6 |   Guided_Clustering_with_NMF: Guided_Clustering_with_NMF.html
7 | last_built: 2022-09-09T14:57Z
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/sitemap.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
 3 |   <url>
 4 |     <loc>/404.html</loc>
 5 |   </url>
 6 |   <url>
 7 |     <loc>/articles/Batch_Integration_with_Linked_NMF.html</loc>
 8 |   </url>
 9 |   <url>
10 |     <loc>/articles/Guided_Clustering_with_NMF.html</loc>
11 |   </url>
12 |   <url>
13 |     <loc>/articles/index.html</loc>
14 |   </url>
15 |   <url>
16 |     <loc>/authors.html</loc>
17 |   </url>
18 |   <url>
19 |     <loc>/index.html</loc>
20 |   </url>
21 |   <url>
22 |     <loc>/reference/GSEAHeatmap.html</loc>
23 |   </url>
24 |   <url>
25 |     <loc>/reference/MetadataSummary.html</loc>
26 |   </url>
27 |   <url>
28 |     <loc>/reference/RankPlot.html</loc>
29 |   </url>
30 |   <url>
31 |     <loc>/reference/RunGSEA.html</loc>
32 |   </url>
33 |   <url>
34 |     <loc>/reference/RunLNMF.html</loc>
35 |   </url>
36 |   <url>
37 |     <loc>/reference/RunNMF.html</loc>
38 |   </url>
39 |   <url>
40 |     <loc>/reference/ard_nmf.html</loc>
41 |   </url>
42 |   <url>
43 |     <loc>/reference/cross_validate_nmf.html</loc>
44 |   </url>
45 |   <url>
46 |     <loc>/reference/get_pbmc3k_data.html</loc>
47 |   </url>
48 |   <url>
49 |     <loc>/reference/index.html</loc>
50 |   </url>
51 |   <url>
52 |     <loc>/reference/pbmc3k.html</loc>
53 |   </url>
54 |   <url>
55 |     <loc>/reference/run_linked_nmf.html</loc>
56 |   </url>
57 |   <url>
58 |     <loc>/reference/run_nmf.html</loc>
59 |   </url>
60 |   <url>
61 |     <loc>/reference/singlet.html</loc>
62 |   </url>
63 | </urlset>
64 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | c(bibentry(bibtype = "Article",
 2 |            key = "fastnmf",
 3 |            title = "{Fast and robust non-negative matrix factorization for single-cell experiments}",
 4 |            author = c(
 5 |                person(c("Zachary", "J."), "Debruine"),
 6 |                person("Karsten", "Melcher"),
 7 |                person(c("Timothy", "J."), "Triche")
 8 |            ),
 9 |            journal = "bioRXiv",
10 |            year = 2021,
11 |            url = "https://doi.org/10.1101/2021.09.01.458620",
12 |            doi = "10.1101/2021.09.01.458620",
13 |            header = "The RcppML package is described in:"))
14 | 


--------------------------------------------------------------------------------
/inst/include/IVSparse.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file SparseMatrix
 3 |  * @author Skyler Ruiter and Seth Wolfgang
 4 |  * @brief IVSparse Sparse Matrix Library
 5 |  * @version 0.1
 6 |  * @date 2023-07-03
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | // Library Constants
12 | #define DELIM 0
13 | #define NUM_META_DATA 6
14 | #define META_DATA_SIZE 24
15 | #define ONE_BYTE_MAX 255
16 | #define TWO_BYTE_MAX 65535
17 | #define FOUR_BYTE_MAX 4294967295
18 | 
19 | // Library Preprocessor Directives
20 | 
21 | // Parallel Processing Directives (On by default)
22 | #if (defined _OPENMP) && (!defined IVSPARSE_DONT_PARALLEL)
23 |     #define IVSPARSE_HAS_OPENMP
24 | #endif
25 | #ifdef IVSPARSE_HAS_OPENMP
26 | #include <atomic>
27 | #include <omp.h>
28 | #endif
29 | 
30 | // Debugging Directives (Off by default)
31 | #ifndef IVSPARSE_DEBUG_OFF
32 | #define IVSPARSE_DEBUG
33 | #endif
34 | 
35 | // Library Includes
36 | 
37 | // Eigen is already pulled in by "singlet"
38 | //[[Rcpp::depends(RcppEigen)]]
39 | // #include <RcppEigen.h>
40 | 
41 | 
42 | #include <iostream>
43 | #include <vector>
44 | #include <map>
45 | #include <algorithm>
46 | #include <type_traits>
47 | #include <iomanip>
48 | #include <type_traits>
49 | // Library Namespaces
50 | 
51 | // SparseMatrixBase Files
52 | // #include "src/IVSparse_SparseMatrixBase.hpp"
53 | // #include "src/IVSparse_Base_Methods.hpp"
54 | 
55 | // SparseMatrix Level 3 Files
56 | #include "src/IVCSC/IVCSC_SparseMatrix.hpp"
57 | #include "src/IVCSC/IVCSC_Operators.hpp"
58 | #include "src/IVCSC/IVCSC_Private_Methods.hpp"
59 | #include "src/IVCSC/IVCSC_Methods.hpp"
60 | #include "src/IVCSC/IVCSC_Constructors.hpp"
61 | #include "src/IVCSC/IVCSC_BLAS.hpp"
62 |     // Vector and Iterator Files
63 |     #include "src/Vectors/IVCSC_Vector.hpp"
64 |     #include "src/Vectors/IVCSC_Vector_Methods.hpp"
65 |     #include "src/InnerIterators/IVCSC_Iterator.hpp"
66 |     #include "src/InnerIterators/IVCSC_Iterator_Methods.hpp"
67 | 
68 | // SparseMatrix Level 2 Files
69 | #include "src/VCSC/VCSC_SparseMatrix.hpp"
70 | #include "src/VCSC/VCSC_Operators.hpp"
71 | #include "src/VCSC/VCSC_Private_Methods.hpp"
72 | #include "src/VCSC/VCSC_Methods.hpp"
73 | #include "src/VCSC/VCSC_Constructors.hpp"
74 | #include "src/VCSC/VCSC_BLAS.hpp"
75 |     // Vector and Iterator Files
76 |     #include "src/Vectors/VCSC_Vector.hpp"
77 |     #include "src/Vectors/VCSC_Vector_Methods.hpp"
78 |     #include "src/InnerIterators/VCSC_Iterator.hpp"
79 |     #include "src/InnerIterators/VCSC_Iterator_Methods.hpp"
80 | 
81 | // SparseMatrix Level 1 Files
82 | #include "src/CSC/CSC_SparseMatrix.hpp"
83 | #include "src/CSC/CSC_Operators.hpp"
84 | #include "src/CSC/CSC_Private_Methods.hpp"
85 | #include "src/CSC/CSC_Methods.hpp"
86 | #include "src/CSC/CSC_Constructors.hpp"
87 | #include "src/CSC/CSC_BLAS.hpp"
88 |     // Vector and Iterator Files
89 |     #include "src/Vectors/CSC_Vector.hpp"
90 |     #include "src/Vectors/CSC_Vector_Methods.hpp"
91 |     #include "src/InnerIterators/CSC_Iterator.hpp"
92 |     #include "src/InnerIterators/CSC_Iterator_Methods.hpp"


--------------------------------------------------------------------------------
/inst/include/singlet.h:
--------------------------------------------------------------------------------
  1 | #ifndef SINGLET_H
  2 | #define SINGLET_H
  3 | 
  4 | #include <RcppCommon.h>
  5 | 
  6 | // forward declare Rcpp::as<> Exporter
  7 | namespace Rcpp {
  8 | class SparseMatrix;
  9 | namespace traits {
 10 | template <>
 11 | class Exporter<Rcpp::SparseMatrix>;
 12 | }  // namespace traits
 13 | }  // namespace Rcpp
 14 | 
 15 | //[[Rcpp::plugins(openmp)]]
 16 | #ifdef _OPENMP
 17 | #include <omp.h>
 18 | #endif
 19 | 
 20 | //[[Rcpp::depends(RcppEigen)]]
 21 | #include <RcppEigen.h>
 22 | 
 23 | // now pull in IVSparse after loading Eigen
 24 | #include <IVSparse.h>
 25 | 
 26 | // this class is provided for consistency with Eigen::SparseMatrix, but using
 27 | // R objects (i.e. Rcpp::NumericVector, Rcpp::IntegerVector) that comprise Matrix::dgCMatrix in R.
 28 | // R objects are pointers to underlying memory-mapped SEXP vectors, and are usable in C++ without any
 29 | // affect on performance. Thus, this class achieves zero-copy access to R sparse matrix objects, with equal
 30 | // performance for read-only column iteration (`InnerIterator`) like `Eigen::SparseMatrix<double>`.
 31 | //
 32 | // The class is designed with an `InnerIterator` class that exactly mimics `Eigen::SparseMatrix<T>::InnerIterator`,
 33 | // and also contains `.rows()` and `.cols()` member functions. This allows it to substitute for `Eigen::SparseMatrix`
 34 | // in all SLAM routines.
 35 | namespace Rcpp {
 36 | class SparseMatrix {
 37 |    public:
 38 |     NumericVector x;
 39 |     IntegerVector i, p, Dim;
 40 | 
 41 |     // constructors
 42 |     SparseMatrix(NumericVector x, IntegerVector i, IntegerVector p, IntegerVector Dim) : x(x), i(i), p(p), Dim(Dim) {}
 43 |     SparseMatrix(const S4& s) {
 44 |         if (!s.hasSlot("x") || !s.hasSlot("p") || !s.hasSlot("i") || !s.hasSlot("Dim"))
 45 |             throw std::invalid_argument("Cannot construct SparseMatrix from this S4 object");
 46 |         x = s.slot("x");
 47 |         i = s.slot("i");
 48 |         p = s.slot("p");
 49 |         Dim = s.slot("Dim");
 50 |     }
 51 |     SparseMatrix() {}
 52 | 
 53 |     unsigned int rows() { return Dim[0]; }
 54 |     unsigned int cols() { return Dim[1]; }
 55 | 
 56 |     // const column iterator
 57 |     class InnerIterator {
 58 |        public:
 59 |         InnerIterator(SparseMatrix& ptr, int col) : ptr(ptr), col_(col), index(ptr.p[col]), max_index(ptr.p[col + 1]) {}
 60 |         operator bool() const { return (index < max_index); }
 61 |         InnerIterator& operator++() {
 62 |             ++index;
 63 |             return *this;
 64 |         }
 65 |         double& value() const { return ptr.x[index]; }
 66 |         int row() const { return ptr.i[index]; }
 67 |         int col() const { return col_; }
 68 | 
 69 |        private:
 70 |         SparseMatrix& ptr;
 71 |         int col_, index, max_index;
 72 |     };
 73 | 
 74 |     SparseMatrix clone() {
 75 |         NumericVector x_ = Rcpp::clone(x);
 76 |         IntegerVector i_ = Rcpp::clone(i);
 77 |         IntegerVector p_ = Rcpp::clone(p);
 78 |         IntegerVector Dim_ = Rcpp::clone(Dim);
 79 |         return SparseMatrix(x_, i_, p_, Dim_);
 80 |     }
 81 | 
 82 |     SparseMatrix transpose() {
 83 |         S4 s(std::string("dgCMatrix"));
 84 |         s.slot("i") = i;
 85 |         s.slot("p") = p;
 86 |         s.slot("x") = x;
 87 |         s.slot("Dim") = Dim;
 88 |         Environment base = Environment::namespace_env("Matrix");
 89 |         Function t_r = base["t"];
 90 |         S4 At = t_r(_["x"] = s);
 91 |         return SparseMatrix(At);
 92 |     };
 93 | 
 94 |     S4 wrap() {
 95 |         S4 s(std::string("dgCMatrix"));
 96 |         s.slot("x") = x;
 97 |         s.slot("i") = i;
 98 |         s.slot("p") = p;
 99 |         s.slot("Dim") = Dim;
100 |         return s;
101 |     }
102 | };
103 | 
104 | namespace traits {
105 | /* support for Rcpp::as */
106 | 
107 | // export a sparse matrix
108 | template <>
109 | class Exporter<Rcpp::SparseMatrix> {
110 |     Rcpp::NumericVector x_;
111 |     Rcpp::IntegerVector i, p, Dim;
112 | 
113 |    public:
114 |     Exporter(SEXP x) {
115 |         Rcpp::S4 s(x);
116 |         if (!s.hasSlot("x") || !s.hasSlot("p") || !s.hasSlot("i") || !s.hasSlot("Dim"))
117 |             throw std::invalid_argument("Cannot construct Rcpp::SparseMatrix from this S4 object");
118 |         x_ = s.slot("x");
119 |         i = s.slot("i");
120 |         p = s.slot("p");
121 |         Dim = s.slot("Dim");
122 |     }
123 | 
124 |     Rcpp::SparseMatrix get() {
125 |         return Rcpp::SparseMatrix(x_, i, p, Dim);
126 |     }
127 | };
128 | 
129 | }  // namespace traits
130 | }  // namespace Rcpp
131 | #endif


--------------------------------------------------------------------------------
/inst/include/src/CSC/CSC_Operators.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file CSC_Operators.hpp
  3 |  * @author Skyler Ruiter and Seth Wolfgang
  4 |  * @brief Operator Overloads for CSC Sparse Matrices
  5 |  * @version 0.1
  6 |  * @date 2023-07-03
  7 |  */
  8 | 
  9 | #pragma once
 10 | 
 11 | namespace IVSparse {
 12 | 
 13 | // Assignment Operator
 14 | template <typename T, typename indexT, bool columnMajor>
 15 | SparseMatrix<T, indexT, 1, columnMajor> &
 16 | SparseMatrix<T, indexT, 1, columnMajor>::operator=(const IVSparse::SparseMatrix<T, indexT, 1, columnMajor> &other) {
 17 |   // check for self assignment
 18 |   if (this != &other) {
 19 |     // free the old data
 20 |     if (vals != nullptr) {
 21 |       free(vals);
 22 |     }
 23 |     if (innerIdx != nullptr) {
 24 |       free(innerIdx);
 25 |     }
 26 |     if (outerPtr != nullptr) {
 27 |       free(outerPtr);
 28 |     }
 29 |     if (metadata != nullptr) {
 30 |       delete[] metadata;
 31 |     }
 32 | 
 33 |     // Deep copy the matrix
 34 |     metadata = new uint32_t[NUM_META_DATA];
 35 |     memcpy(metadata, other.metadata, NUM_META_DATA * sizeof(uint32_t));
 36 | 
 37 |     // set the dimensions of the matrix
 38 |     numRows = other.numRows;
 39 |     numCols = other.numCols;
 40 |     outerDim = other.outerDim;
 41 |     innerDim = other.innerDim;
 42 |     nnz = other.nnz;
 43 |     compSize = other.compSize;
 44 | 
 45 |     // encode the value type and index type
 46 |     encodeValueType();
 47 |     index_t = sizeof(indexT);
 48 | 
 49 |     // check for an empty matrix
 50 |     if (nnz == 0) {
 51 |       vals = nullptr;
 52 |       innerIdx = nullptr;
 53 |       outerPtr = nullptr;
 54 |     }
 55 | 
 56 |     // allocate the memory
 57 |     try {
 58 |       vals = (T *)malloc(nnz * sizeof(T));
 59 |       innerIdx = (indexT *)malloc(nnz * sizeof(indexT));
 60 |       outerPtr = (indexT *)malloc((outerDim + 1) * sizeof(indexT));
 61 |     } catch (std::bad_alloc &e) {
 62 |       std::cerr << "Error: Failed to allocate memory for the matrix"
 63 |                 << std::endl;
 64 |       exit(1);
 65 |     }
 66 | 
 67 |     // copy the data
 68 |     memcpy(vals, other.vals, nnz * sizeof(T));
 69 |     memcpy(innerIdx, other.innerIdx, nnz * sizeof(indexT));
 70 |     memcpy(outerPtr, other.outerPtr, (outerDim + 1) * sizeof(indexT));
 71 |   }
 72 | 
 73 |   // return the matrix
 74 |   return *this;
 75 | }
 76 | 
 77 | // Equality Operator
 78 | template <typename T, typename indexT, bool columnMajor>
 79 | bool SparseMatrix<T, indexT, 1, columnMajor>::operator==(const SparseMatrix<T, indexT, 1, columnMajor> &other) {
 80 |   // check if the dimensions are the same
 81 |   if (numRows != other.numRows || numCols != other.numCols) {
 82 |     return false;
 83 |   }
 84 | 
 85 |   // check if the number of nonzeros are the same
 86 |   if (nnz != other.nnz) {
 87 |     return false;
 88 |   }
 89 | 
 90 |   // check the matrix data against each other
 91 |   if (memcmp(vals, other.vals, nnz * sizeof(T)) != 0) {
 92 |     return false;
 93 |   }
 94 |   if (memcmp(innerIdx, other.innerIdx, nnz * sizeof(indexT)) != 0) {
 95 |     return false;
 96 |   }
 97 |   if (memcmp(outerPtr, other.outerPtr, (outerDim + 1) * sizeof(indexT)) != 0) {
 98 |     return false;
 99 |   }
100 | 
101 |   // if all the data is the same return true
102 |   return true;
103 | }
104 | 
105 | // Inequality Operator
106 | template <typename T, typename indexT, bool columnMajor>
107 | bool SparseMatrix<T, indexT, 1, columnMajor>::operator!=(const SparseMatrix<T, indexT, 1, columnMajor> &other) {
108 |   return !(*this == other);
109 | }
110 | 
111 | // Coefficent Access Operator
112 | template <typename T, typename indexT, bool columnMajor>
113 | T SparseMatrix<T, indexT, 1, columnMajor>::operator()(uint32_t row, uint32_t col) {
114 |   
115 |   #ifdef IVSPARSE_DEBUG
116 |   // check if the row and column are in bounds
117 |   if (row >= numRows || col >= numCols) {
118 |     std::cerr << "Error: Index out of bounds" << std::endl;
119 |     exit(1);
120 |   }
121 |   #endif
122 | 
123 |   // get the vector and index
124 |   uint32_t vector = columnMajor ? col : row;
125 |   uint32_t index = columnMajor ? row : col;
126 | 
127 |   // get an iterator for the desired vector
128 |   for (typename SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator it(
129 |            *this, vector);
130 |        it; ++it) {
131 |     if (it.getIndex() == (indexT)index) {
132 |       // if the index is found return the value
133 |       return it.value();
134 |     }
135 |   }
136 | 
137 |   // if the index is not found return 0
138 |   return 0;
139 | }
140 | 
141 | // Vector Access Operator
142 | template <typename T, typename indexT, bool columnMajor>
143 | typename SparseMatrix<T, indexT, 1, columnMajor>::Vector
144 | SparseMatrix<T, indexT, 1, columnMajor>::operator[](uint32_t vec) {
145 |   #ifdef IVSPARSE_DEBUG
146 |   // check if the vector is out of bounds
147 |   assert((vec < outerDim && vec >= 0) && "Vector index out of bounds");
148 |   #endif
149 | 
150 |   // return a IVSparse vector
151 |   typename IVSparse::SparseMatrix<T, indexT, 1, columnMajor>::Vector newVector(
152 |       *this, vec);
153 |   return newVector;
154 | }
155 | 
156 | //* BLAS Operators *//
157 | 
158 | // Scalar Multiplication
159 | template <typename T, typename indexT, bool columnMajor>
160 | IVSparse::SparseMatrix<T, indexT, 1, columnMajor>
161 | SparseMatrix<T, indexT, 1, columnMajor>::operator*(T scalar) {
162 |   return scalarMultiply(scalar);
163 | }
164 | 
165 | // In place scalar multiplication
166 | template <typename T, typename indexT, bool columnMajor>
167 | void SparseMatrix<T, indexT, 1, columnMajor>::operator*=(T scalar) {
168 |   return inPlaceScalarMultiply(scalar);
169 | }
170 | 
171 | // IVSparse Matrix * IVSparse Vector Multiplication
172 | template <typename T, typename indexT, bool columnMajor>
173 | Eigen::VectorXd SparseMatrix<T, indexT, 1, columnMajor>::operator*(SparseMatrix<T, indexT, 1, columnMajor>::Vector &vec) {
174 |   return vectorMultiply(vec);
175 | }
176 | 
177 | // Matrix Vector Multiplication (IVSparse Eigen -> Eigen)
178 | template <typename T, typename indexT, bool columnMajor>
179 | Eigen::VectorXd SparseMatrix<T, indexT, 1, columnMajor>::operator*(Eigen::VectorXd &vec) {
180 |   return vectorMultiply(vec);
181 | }
182 | 
183 | // Matrix Matrix Multiplication (IVSparse Eigen -> Eigen)
184 | template <typename T, typename indexT, bool columnMajor>
185 | Eigen::Matrix<T, -1, -1> SparseMatrix<T, indexT, 1, columnMajor>::operator*(Eigen::Matrix<T, -1, -1> mat) {
186 |   return matrixMultiply(mat);
187 | }
188 | 
189 | }  // namespace IVSparse


--------------------------------------------------------------------------------
/inst/include/src/CSC/CSC_Private_Methods.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file CSC_Private_Methods.hpp
 3 |  * @author Skyler Ruiter and Seth Wolfgang
 4 |  * @brief Private Methods for CSC Sparse Matrices
 5 |  * @version 0.1
 6 |  * @date 2023-07-03
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | namespace IVSparse {
12 | 
13 | // Calculates the number of bytes needed to store a value
14 | template <typename T, typename indexType, bool columnMajor>
15 | inline uint8_t SparseMatrix<T, indexType, 1, columnMajor>::byteWidth(size_t size) {
16 |     if (size <= 0xFF) {
17 |         return 1;
18 |     } else if (size <= 0xFFFF) {
19 |         return 2;
20 |     } else if (size <= 0xFFFFFF) {
21 |         return 3;
22 |     } else if (size <= 0xFFFFFFFF) {
23 |         return 4;
24 |     } else if (size <= 0xFFFFFFFFFF) {
25 |         return 5;
26 |     } else if (size <= 0xFFFFFFFFFFFF) {
27 |         return 6;
28 |     } else if (size <= 0xFFFFFFFFFFFFFF) {
29 |         return 7;
30 |     } else {
31 |         return 8;
32 |     }
33 | }
34 | 
35 | // Encodes the value type of the matrix in a uint32_t
36 | template <typename T, typename indexT, bool columnMajor>
37 | void SparseMatrix<T, indexT, 1, columnMajor>::encodeValueType() {
38 |     uint8_t byte0 = sizeof(T);
39 |     uint8_t byte1 = std::is_floating_point<T>::value ? 1 : 0;
40 |     uint8_t byte2 = std::is_signed<T>::value ? 1 : 0;
41 |     uint8_t byte3 = columnMajor ? 1 : 0;
42 | 
43 |     val_t = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0;
44 | }
45 | 
46 | // Checks if the value type is correct for the matrix
47 | template <typename T, typename indexT, bool columnMajor>
48 | void SparseMatrix<T, indexT, 1, columnMajor>::checkValueType() {
49 |     uint8_t byte0 = val_t & 0xFF;
50 |     uint8_t byte1 = (val_t >> 8) & 0xFF;
51 |     uint8_t byte2 = (val_t >> 16) & 0xFF;
52 |     uint8_t byte3 = (val_t >> 24) & 0xFF;
53 |     assert(byte0 == sizeof(T) && "Value type size does not match");
54 |     assert(byte1 == std::is_floating_point<T>::value &&
55 |            "Value type is not floating point");
56 |     assert(byte2 == std::is_signed<T>::value && "Value type is not signed");
57 |     assert(byte3 == columnMajor && "Major direction does not match");
58 | }
59 | 
60 | // performs some simple user checks on the matrices metadata
61 | template <typename T, typename indexT, bool columnMajor>
62 | void SparseMatrix<T, indexT, 1, columnMajor>::userChecks() {
63 |     assert((innerDim > 1 || outerDim > 1 || nnz > 1) &&
64 |            "The matrix must have at least one row, column, and nonzero value");
65 |     assert(std::is_floating_point<indexT>::value == false &&
66 |            "The index type must be a non-floating point type");
67 |     assert((std::is_arithmetic<T>::value && std::is_arithmetic<indexT>::value) &&
68 |            "The value and index types must be numeric types");
69 |     assert((std::is_same<indexT, bool>::value == false) &&
70 |            "The index type must not be bool");
71 |     assert((innerDim < std::numeric_limits<indexT>::max() &&
72 |             outerDim < std::numeric_limits<indexT>::max()) &&
73 |            "The number of rows and columns must be less than the maximum value "
74 |            "of the index type");
75 |     checkValueType();
76 | }
77 | 
78 | // Calculates the current byte size of the matrix in memory
79 | template <typename T, typename indexT, bool columnMajor>
80 | void SparseMatrix<T, indexT, 1, columnMajor>::calculateCompSize() {
81 |     // set compSize to zero
82 |     compSize = 0;
83 | 
84 |     // add the size of the metadata
85 |     compSize += META_DATA_SIZE;
86 | 
87 |     // add the csc vectors
88 |     compSize += sizeof(T) * nnz;                  // values
89 |     compSize += sizeof(indexT) * nnz;             // innerIdx
90 |     compSize += sizeof(indexT) * (outerDim + 1);  // outerPtr
91 | }
92 | 
93 | }  // namespace IVSparse


--------------------------------------------------------------------------------
/inst/include/src/IVSparse_Base_Methods.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file IVSparse_Base_Methods.hpp
 3 |  * @author Skyler Ruiter and Seth Wolfgang
 4 |  * @brief IVSparse Sparse Matrix Base Methods
 5 |  * @version 0.1
 6 |  * @date 2023-07-03
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | namespace IVSparse {
12 | 
13 |     // Calculates the number of bytes needed to store a value
14 |     inline uint8_t SparseMatrix::byteWidth(size_t size) {
15 |         if (size <= 0xFF){
16 |             return 1;
17 |         }
18 |         else if (size <= 0xFFFF){
19 |             return 2;
20 |         }
21 |         else if (size <= 0xFFFFFF){
22 |             return 3;
23 |         }
24 |         else if (size <= 0xFFFFFFFF){
25 |             return 4;
26 |         }
27 |         else if (size <= 0xFFFFFFFFFF){
28 |             return 5;
29 |         }
30 |         else if (size <= 0xFFFFFFFFFFFF){
31 |             return 6;
32 |         }
33 |         else if (size <= 0xFFFFFFFFFFFFFF){
34 |             return 7;
35 |         }
36 |         else{
37 |             return 8;
38 |         }
39 | 
40 |     }
41 | 
42 |     // Gets the number of rows in the matrix
43 |     uint32_t SparseMatrix::rows() const { return numRows; }
44 | 
45 |     // Gets the number of columns in the matrix
46 |     uint32_t SparseMatrix::cols() const { return numCols; }
47 | 
48 |     // Gets the inner dimension of the matrix
49 |     uint32_t SparseMatrix::innerSize() const { return innerDim; }
50 | 
51 |     // Gets the outer dimension of the matrix
52 |     uint32_t SparseMatrix::outerSize() const { return outerDim; }
53 | 
54 |     // Gets the number of non-zero elements in the matrix
55 |     uint32_t SparseMatrix::nonZeros() const { return nnz; }
56 | 
57 |     // Gets the number of bytes needed to store the matrix
58 |     size_t SparseMatrix::byteSize() const { return compSize; }
59 | 
60 | }  // namespace IVSparse


--------------------------------------------------------------------------------
/inst/include/src/IVSparse_SparseMatrixBase.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file IVSparse_SparseMatrixBase.hpp
  3 |  * @author Skyler Ruiter and Seth Wolfgang
  4 |  * @brief IVSparse Sparse Matrix Base Class Declerations
  5 |  * @version 0.1
  6 |  * @date 2023-07-03
  7 |  */
  8 | 
  9 | #pragma once
 10 | 
 11 | namespace IVSparse {
 12 | 
 13 |     /**
 14 |      * IVSparse Sparse Matrix Base Class \n \n
 15 |      * This is the overarching base class for the IVSparse Sparse Matrix
 16 |      * Library. It contains methods and variables shared between all
 17 |      * compression levels of IVSparse Sparse Matrices and serves to reduce
 18 |      * code duplication.
 19 |      */
 20 |     class SparseMatrixBase {
 21 |         private:
 22 |         //* The Matrix Info *//
 23 | 
 24 |         uint32_t innerDim = 0;  // The inner dimension of the matrix
 25 |         uint32_t outerDim = 0;  // The outer dimension of the matrix
 26 | 
 27 |         uint32_t numRows = 0;  // The number of rows in the matrix
 28 |         uint32_t numCols = 0;  // The number of columns in the matrix
 29 | 
 30 |         uint32_t nnz = 0;  // The number of non-zero values in the matrix
 31 | 
 32 |         size_t compSize = 0;  // The size of the compressed matrix in bytes
 33 | 
 34 |         //* The Value and Index Types *//
 35 | 
 36 |         uint32_t val_t;  // Information about the value type (size, signededness, etc.)
 37 |         uint32_t index_t;  // Information about the index type (size)
 38 | 
 39 |         uint32_t* metadata = nullptr;  // The metadata of the matrix
 40 | 
 41 |         //* Private Methods *//
 42 | 
 43 |         // Calculates the number of bytes needed to store a value
 44 |         inline uint8_t byteWidth(size_t size);
 45 | 
 46 |         // Creates value type information
 47 |         virtual void encodeValueType() = 0;
 48 | 
 49 |         // Checks the value type information
 50 |         virtual void checkValueType() = 0;
 51 | 
 52 |         // User checks to confirm a valid matrix
 53 |         virtual void userChecks() = 0;
 54 | 
 55 |         // Calculates the size of the matrix in bytes
 56 |         virtual void calculateCompSize() = 0;
 57 | 
 58 |         public:
 59 |         //* Friends *//
 60 | 
 61 |         // IVSparse Sparse Matrix Class
 62 |         template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
 63 |         friend class SparseMatrix;
 64 | 
 65 |         //* Constructors *//
 66 | 
 67 |         // Default Constructor
 68 |         SparseMatrixBase() {};
 69 | 
 70 |         //* Getters *//
 71 | 
 72 |         /**
 73 |          * @returns The number of rows in the matrix.
 74 |          */
 75 |         uint32_t rows() const;
 76 | 
 77 |         /**
 78 |          * @returns The number of columns in the matrix.
 79 |          */
 80 |         uint32_t cols() const;
 81 | 
 82 |         /**
 83 |          * @returns The inner dimension of the matrix.
 84 |          */
 85 |         uint32_t innerSize() const;
 86 | 
 87 |         /**
 88 |          * @returns The outer dimension of the matrix.
 89 |          */
 90 |         uint32_t outerSize() const;
 91 | 
 92 |         /**
 93 |          * @returns The number of non-zero elements in the matrix.
 94 |          */
 95 |         uint32_t nonZeros() const;
 96 | 
 97 |         /**
 98 |          * @returns The size of the matrix in bytes.
 99 |          */
100 |         uint64_t byteSize() const;
101 | 
102 |         //* Utility Methods *//
103 | 
104 |         /**
105 |          * Writes the matrix to a file with the given filename.
106 |          */
107 |         virtual void write(const char* filename) = 0;
108 | 
109 |         /**
110 |          * Prints the matrix to the console.
111 |          */
112 |         virtual void print() = 0;
113 | 
114 |     };  // class SparseMatrixBase
115 | 
116 | }  // namespace IVSparse


--------------------------------------------------------------------------------
/inst/include/src/InnerIterators/CSC_Iterator.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file CSC_Iterator.hpp
  3 |  * @author Skyler Ruiter and Seth Wolfgang
  4 |  * @brief Inner Iterator for CSC Declerations
  5 |  * @version 0.1
  6 |  * @date 2023-07-03
  7 |  */
  8 | 
  9 | #pragma once
 10 | 
 11 | namespace IVSparse {
 12 | 
 13 |     /**
 14 |      * CSC Inner Iterator Class \n \n
 15 |      * The CSC Inner Iterator is a forward traversal iterator like the others in the
 16 |      * IVSparse library. It's very low overhead and is used to traverse over the
 17 |      * nonzeros of a single vector of a matrix or a vector on its own.
 18 |      */
 19 |     template <typename T, typename indexT, bool columnMajor>
 20 |     class SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator {
 21 |         private:
 22 |         //* Private Class Variables *//
 23 | 
 24 |         T* val;        // Current value
 25 |         indexT index;  // Current index
 26 |         indexT outer;  // Outer dimension
 27 | 
 28 |         T* vals;
 29 |         indexT* indices;
 30 |         indexT* endPtr;
 31 | 
 32 |         public:
 33 |         //* Constructors & Destructor *//
 34 |         /** @name Constructors
 35 |          */
 36 |          ///@{
 37 | 
 38 |          /**
 39 |           * Default Iterator Constructor \n \n
 40 |           * Creates an empty iterator that can't be used on its own.
 41 |           */
 42 |         InnerIterator() {};
 43 | 
 44 |         /**
 45 |          * CSC Matrix InnerIterator Constructor \n \n
 46 |          * The main constructor for the Inner Iterator. Given a matrix the iterator
 47 |          * will forward traverse over the given vector of the matrix. The traversal
 48 |          * is sorted by index.
 49 |          */
 50 |         InnerIterator(SparseMatrix<T, indexT, 1, columnMajor>& mat, uint32_t vec);
 51 | 
 52 |         /**
 53 |          * CSC Vector InnerIterator Constructor \n \n
 54 |          * Same as the previous constructor but for a single standalone vector.
 55 |          * Can be used in the same way as the previous constructor.
 56 |          */
 57 |         InnerIterator(SparseMatrix<T, indexT, 1, columnMajor>::Vector& vec);
 58 | 
 59 |         ///@}
 60 | 
 61 |         //* Getters *//
 62 |         /** @name Getters
 63 |          */
 64 |          ///@{
 65 | 
 66 |          /**
 67 |           * @returns The current index of the iterator.
 68 |           */
 69 |         indexT getIndex();
 70 | 
 71 |         /**
 72 |          * @returns The current outer dimension of the iterator.
 73 |          */
 74 |         indexT outerDim();
 75 | 
 76 |         /**
 77 |          * @returns The current row of the iterator.
 78 |          */
 79 |         indexT row();
 80 | 
 81 |         /**
 82 |          * @returns The current column of the iterator.
 83 |          */
 84 |         indexT col();
 85 | 
 86 |         /**
 87 |          * @returns The current value of the iterator.
 88 |          */
 89 |         T value();
 90 | 
 91 |         /**
 92 |          * Changes the value where the iterator is pointing.
 93 |          *
 94 |          * @note This is the only way to update elements in the IVSparse format.
 95 |          *
 96 |          * @warning This method may break things if used without care, IVSparse is not
 97 |          * meant to update values.
 98 |          */
 99 |         void coeff(T newValue);
100 | 
101 |         ///@}
102 | 
103 |         //* Operator Overloads *//
104 | 
105 |         // Prefix Increment Operator
106 |         void __attribute__((hot)) operator++();
107 | 
108 |         // Equality Operator
109 |         bool operator==(const InnerIterator& other);
110 | 
111 |         // Inequality Operator
112 |         bool operator!=(const InnerIterator& other);
113 | 
114 |         // Less Than Operator
115 |         bool operator<(const InnerIterator& other);
116 | 
117 |         // Greater Than Operator
118 |         bool operator>(const InnerIterator& other);
119 | 
120 |         // Dereference Operator
121 |         T& operator*();
122 | 
123 |         // Bool Operator
124 |         inline __attribute__((hot)) operator bool() { return indices < endPtr; };
125 | 
126 |     };  // class InnerIterator
127 | 
128 | }  // namespace IVSparse


--------------------------------------------------------------------------------
/inst/include/src/InnerIterators/CSC_Iterator_Methods.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file CSC_Iterator_Methods.hpp
  3 |  * @author Skyler Ruiter and Seth Wolfgang
  4 |  * @brief Iterator Methods for CSC Sparse Matrices
  5 |  * @version 0.1
  6 |  * @date 2023-07-03
  7 |  */
  8 | 
  9 | #pragma once
 10 | 
 11 | namespace IVSparse {
 12 | 
 13 |     //* Constructors *//
 14 | 
 15 |     // CSC Matrix Constructor
 16 |     template <typename T, typename indexT, bool columnMajor>
 17 |     inline SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator::InnerIterator(
 18 |         SparseMatrix<T, indexT, 1, columnMajor>& mat, uint32_t vec) {
 19 | 
 20 |         this->outer = vec;
 21 | 
 22 |         // check if the vector is empty
 23 |         if (mat.getOuterPointers()[vec] == mat.getOuterPointers()[vec + 1]) {
 24 |             vals = nullptr;
 25 |             indices = nullptr;
 26 |             endPtr = nullptr;
 27 |             return;
 28 |         }
 29 | 
 30 |         // set the pointers to the correct locations
 31 |         vals = &mat.vals[mat.outerPtr[vec]];
 32 |         indices = &mat.innerIdx[mat.outerPtr[vec]];
 33 |         endPtr = &mat.innerIdx[mat.outerPtr[vec + 1]];
 34 | 
 35 |         // set the values of the iterator
 36 |         val = vals;
 37 |         index = indices[0];
 38 |     }
 39 | 
 40 |     // CSC Vector Constructor
 41 |     template <typename T, typename indexT, bool columnMajor>
 42 |     SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator::InnerIterator(
 43 |         SparseMatrix<T, indexT, 1, columnMajor>::Vector& vec) {
 44 | 
 45 |         this->outer = 0;
 46 | 
 47 |         // set the pointers to the correct locations
 48 |         vals = vec.values();
 49 |         indices = vec.indexPtr();
 50 |         endPtr = vec.indexPtr() + vec.nonZeros();
 51 | 
 52 |         // set the values of the iterator
 53 |         val = vals;
 54 |         index = indices[0];
 55 |     }
 56 | 
 57 |     //* Overloaded Operators *//
 58 | 
 59 |     // Increment Operator
 60 |     template <typename T, typename indexT, bool columnMajor>
 61 |     inline void SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator::operator++() {
 62 |         vals++;
 63 |         indices++;
 64 | 
 65 |         // check if the iterator is at the end of the vector
 66 |         if (indices == endPtr) {
 67 |             return;
 68 |         }
 69 | 
 70 |         // set the values of the iterator
 71 |         val = vals;
 72 |         index = *indices;
 73 |     }
 74 | 
 75 |     // Equality Operator
 76 |     template <typename T, typename indexT, bool columnMajor>
 77 |     bool SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator::operator==(const InnerIterator& other) {
 78 |         return (vals == other.vals && indices == other.index);
 79 |     }
 80 | 
 81 |     // Inequality Operator
 82 |     template <typename T, typename indexT, bool columnMajor>
 83 |     bool SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator::operator!=(const InnerIterator& other) {
 84 |         return (vals != other.vals || indices != other.index);
 85 |     }
 86 | 
 87 |     // Less Than Operator
 88 |     template <typename T, typename indexT, bool columnMajor>
 89 |     bool SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator::operator<(const InnerIterator& other) {
 90 |         return (vals < other.vals && indices < other.index);
 91 |     }
 92 | 
 93 |     // Greater Than Operator
 94 |     template <typename T, typename indexT, bool columnMajor>
 95 |     bool SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator::operator>(const InnerIterator& other) {
 96 |         return (vals > other.vals && indices > other.index);
 97 |     }
 98 | 
 99 |     // Dereference Operator
100 |     template <typename T, typename indexT, bool columnMajor>
101 |     T& SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator::operator*() {
102 |         return val;
103 |     }
104 | 
105 |     //* Getters & Setters *//
106 | 
107 |     // Get the current index of the iterator
108 |     template <typename T, typename indexT, bool columnMajor>
109 |     indexT SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator::getIndex() {
110 |         return index;
111 |     }
112 | 
113 |     // Get the current outer dimension of the iterator
114 |     template <typename T, typename indexT, bool columnMajor>
115 |     indexT SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator::outerDim() {
116 |         return outer;
117 |     }
118 | 
119 |     // Get the current row of the iterator
120 |     template <typename T, typename indexT, bool columnMajor>
121 |     indexT SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator::row() {
122 |         if (columnMajor) {
123 |             return index;
124 |         }
125 |         else {
126 |             return outer;
127 |         }
128 |     }
129 | 
130 |     // Get the current column of the iterator
131 |     template <typename T, typename indexT, bool columnMajor>
132 |     indexT SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator::col() {
133 |         if (columnMajor) {
134 |             return outer;
135 |         }
136 |         else {
137 |             return index;
138 |         }
139 |     }
140 | 
141 |     // Get the current value of the iterator
142 |     template <typename T, typename indexT, bool columnMajor>
143 |     T SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator::value() {
144 |         return *val;
145 |     }
146 | 
147 |     // coefficent access method
148 |     template <typename T, typename indexT, bool columnMajor>
149 |     void SparseMatrix<T, indexT, 1, columnMajor>::InnerIterator::coeff(T value) {
150 |         *val = value;
151 |     }
152 | 
153 | }  // namespace IVSparse


--------------------------------------------------------------------------------
/inst/include/src/InnerIterators/IVCSC_Iterator.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file IVCSC_Iterator.hpp
  3 |  * @author Skyler Ruiter and Seth Wolfgang
  4 |  * @brief Inner Iterator for IVCSC Declerations
  5 |  * @version 0.1
  6 |  * @date 2023-07-03
  7 |  */
  8 | 
  9 | #pragma once
 10 | 
 11 | namespace IVSparse {
 12 | 
 13 |     /**
 14 |      * @tparam T The type of the values in the matrix
 15 |      * @tparam indexT The type of the indices in the matrix
 16 |      * @tparam compressionLevel The level of compression used in the matrix
 17 |      * @tparam columnMajor Whether the matrix is column major or not
 18 |      *
 19 |      * IVCSC Inner Iterator Class \n \n
 20 |      * The IVCSC Inner Iterator is a forward traversal iterator like the others in
 21 |      * the IVSparse library. The IVCSC Iterator is slower than the others due to
 22 |      * needing to decode compressed data.
 23 |      */
 24 |     template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
 25 |     class SparseMatrix<T, indexT, compressionLevel, columnMajor>::InnerIterator {
 26 |         private:
 27 |         //* Private Class Variables *//
 28 | 
 29 |         indexT outer;      // Outer dimension
 30 |         indexT index;      // Current index
 31 |         T* val = nullptr;  // Current value
 32 | 
 33 |         indexT newIndex = 0;  // Next index
 34 | 
 35 |         uint8_t indexWidth = 1;  // Width of the current run
 36 | 
 37 |         void* data;    // Pointer to the current data
 38 |         void* endPtr;  // Pointer to the end of the data
 39 | 
 40 |         bool firstIndex = true;  // Is this the first index of the vector
 41 | 
 42 |         //* Private Class Methods *//
 43 | 
 44 |         // Decodes the index from the data pointer
 45 |         void __attribute__((hot)) decodeIndex();
 46 | 
 47 |         public:
 48 |         //* Constructors & Destructor *//
 49 |         /** @name Constructors
 50 |          */
 51 |          ///@{
 52 | 
 53 |          /**
 54 |           * Default Iterator Constructor \n \n
 55 |           * Creates an empty iterator that can't be used on its own.
 56 |           */
 57 |         InnerIterator() {};
 58 | 
 59 |         /**
 60 |          * IVCSC Matrix InnerIterator Constructor \n \n
 61 |          * The main constructor for the Inner Iterator. Given a matrix the iterator
 62 |          * will forward traverse over the given vector of the matrix. The traversal
 63 |          * is sorted by value in ascending order.
 64 |          */
 65 |         InnerIterator(SparseMatrix<T, indexT, compressionLevel, columnMajor>& mat,
 66 |                       uint32_t col);
 67 | 
 68 |         /**
 69 |          * IVCSC Vector InnerIterator Constructor \n \n
 70 |          * Same as the previous constructor but for a single standalone vector.
 71 |          * Can be used in the same way as the previous constructor.
 72 |          */
 73 |         InnerIterator(
 74 |             SparseMatrix<T, indexT, compressionLevel, columnMajor>::Vector& vec);
 75 | 
 76 |         ///@}
 77 | 
 78 |         //* Getters *//
 79 |         /** @name Getters
 80 |          */
 81 |          ///@{
 82 | 
 83 |          /**
 84 |           * @returns The current index of the iterator.
 85 |           */
 86 |         indexT getIndex();
 87 | 
 88 |         /**
 89 |          * @returns The current outer dimension of the iterator.
 90 |          */
 91 |         indexT outerDim();
 92 | 
 93 |         /**
 94 |          * @returns The current row of the iterator.
 95 |          */
 96 |         indexT row();
 97 | 
 98 |         /**
 99 |          * @returns The current column of the iterator.
100 |          */
101 |         indexT col();
102 | 
103 |         /**
104 |          * @returns The current value of the iterator.
105 |          */
106 |         T value();
107 | 
108 |         /**
109 |          * Changes the value where the iterator is pointing.
110 |          *
111 |          * @note This is the only way to update elements in the IVSparse format.
112 |          */
113 |         void coeff(T newValue);
114 | 
115 |         /**
116 |          * @returns If the iterator is at the beginning of a new run.
117 |          */
118 |         bool isNewRun();
119 | 
120 |         ///@}
121 | 
122 |         //* Operator Overloads *//
123 | 
124 |         // Increment Operator
125 |         void __attribute__((hot)) operator++();
126 | 
127 |         // Equality Operators
128 |         bool operator==(const InnerIterator& other);
129 | 
130 |         // Inequality Operators
131 |         bool operator!=(const InnerIterator& other);
132 | 
133 |         // Less Than Operator
134 |         bool operator<(const InnerIterator& other);
135 | 
136 |         // Greater Than Operator
137 |         bool operator>(const InnerIterator& other);
138 | 
139 |         // Bool Operator
140 |         inline __attribute__((hot)) operator bool() {
141 |             return ((char*)endPtr - indexWidth > data);
142 |         }
143 | 
144 |         // Dereference Operator
145 |         T& operator*();
146 | 
147 |     };  // End of InnerIterator Class
148 | 
149 | }  // namespace IVSparse
150 | 


--------------------------------------------------------------------------------
/inst/include/src/InnerIterators/VCSC_Iterator.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file VCSC_Iterator.hpp
  3 |  * @author Skyler Ruiter and Seth Wolfgang
  4 |  * @brief Inner Iterator for VCSC Declerations
  5 |  * @version 0.1
  6 |  * @date 2023-07-03
  7 |  */
  8 | 
  9 | #pragma once
 10 | 
 11 | namespace IVSparse {
 12 | 
 13 |     /**
 14 |      * VCSC Inner Iterator Class \n \n
 15 |      * The VCSC Inner Iterator is a forward traversal iterator like the others in
 16 |      * the IVSparse library. It's very low overhead and is used to traverse over the
 17 |      * nonzeros of a single vector of a matrix or a vector on its own. The VCSC
 18 |      * Inner Iterator differs from the CSC Iterator in that it travereses a counts
 19 |      * vector in the VCSC Matrix instead of a outer pointers vector.
 20 |      */
 21 |     template <typename T, typename indexT, bool columnMajor>
 22 |     class SparseMatrix<T, indexT, 2, columnMajor>::InnerIterator {
 23 |         private:
 24 |         //* Private Class Variables *//
 25 | 
 26 |         indexT outer = 0;     // Outer dimension
 27 |         indexT index = 0;     // Current index
 28 |         indexT newIndex = 0;  // Next index
 29 |         T* val = nullptr;     // Current value
 30 | 
 31 |         T* vals = nullptr;          // Pointer to values
 32 |         indexT* counts = nullptr;   // Pointer to counts
 33 |         indexT* indices = nullptr;  // Pointer to indices
 34 | 
 35 |         indexT valsSize = 0;   // Number of unique values
 36 |         indexT indexSize = 0;  // Number of indices
 37 | 
 38 |         indexT count = 0;       // Current count
 39 |         indexT countIndex = 0;  // Current count of indices
 40 | 
 41 |         //* Private Class Methods *//
 42 | 
 43 |         public:
 44 |         //* Constructors & Destructor *//
 45 |         /** @name Constructors
 46 |          */
 47 |          ///@{
 48 | 
 49 |          /**
 50 |           * Default Iterator Constructor \n \n
 51 |           * Creates an empty iterator that can't be used on its own.
 52 |           */
 53 |         InnerIterator() {};
 54 | 
 55 |         /**
 56 |          * VCSC Matrix InnerIterator Constructor \n \n
 57 |          * The main constructor for the Inner Iterator. Given a matrix the iterator
 58 |          * will forward traverse over the given vector of the matrix. The traversal
 59 |          * is sorted by value in ascending order.
 60 |          */
 61 |         InnerIterator(SparseMatrix<T, indexT, 2, columnMajor>& mat, uint32_t col);
 62 | 
 63 |         /**
 64 |          * VCSC Vector InnerIterator Constructor \n \n
 65 |          * Same as the previous constructor but for a single standalone vector.
 66 |          * Can be used in the same way as the previous constructor.
 67 |          */
 68 |         InnerIterator(SparseMatrix<T, indexT, 2, columnMajor>::Vector& vec);
 69 | 
 70 |         ///@}
 71 | 
 72 |         //* Getters *//
 73 |         /** @name Getters
 74 |          */
 75 |          ///@{
 76 | 
 77 |          /**
 78 |           * @returns The current index of the iterator.
 79 |           */
 80 |         indexT getIndex();
 81 | 
 82 |         /**
 83 |          * @returns The current outer dimension of the iterator.
 84 |          */
 85 |         indexT outerDim();
 86 | 
 87 |         /**
 88 |          * @returns The current row of the iterator.
 89 |          */
 90 |         indexT row();
 91 | 
 92 |         /**
 93 |          * @returns The current column of the iterator.
 94 |          */
 95 |         indexT col();
 96 | 
 97 |         /**
 98 |          * @returns The current value of the iterator.
 99 |          */
100 |         T value();
101 | 
102 |         /**
103 |          * Changes the value where the iterator is pointing.
104 |          *
105 |          * @note This is the only way to update elements in the IVSparse format.
106 |          */
107 |         void coeff(T newValue);
108 | 
109 |         /**
110 |          * @returns If the iterator is at the beginning of a new run.
111 |          */
112 | 
113 |          ///@}
114 | 
115 |          //* Operator Overloads *//
116 | 
117 |          // Prefix increment operator
118 |         void __attribute__((hot)) operator++();
119 | 
120 |         // Equality operator
121 |         bool operator==(const InnerIterator& other);
122 | 
123 |         // Inequality operator
124 |         bool operator!=(const InnerIterator& other);
125 | 
126 |         // Less than operator
127 |         bool operator<(const InnerIterator& other);
128 | 
129 |         // Greater than operator
130 |         bool operator>(const InnerIterator& other);
131 | 
132 |         // Boolean operator
133 |         inline __attribute__((hot)) operator bool() { return countIndex < indexSize; }
134 | 
135 |         // Dereference operator
136 |         T& operator*();
137 | 
138 |     };  // End of VCSC Inner Iterator Class
139 | 
140 | }  // namespace IVSparse


--------------------------------------------------------------------------------
/inst/include/src/Vectors/CSC_Vector.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file CSC_Vector.hpp
  3 |  * @author Skyler Ruiter and Seth Wolfgang
  4 |  * @brief CSC Vector Class Declerations
  5 |  * @version 0.1
  6 |  * @date 2023-07-03
  7 |  */
  8 | 
  9 | #pragma once
 10 | 
 11 | namespace IVSparse {
 12 | 
 13 | /**
 14 |  * CSC Vector Class \n \n
 15 |  * The CSC Vector class is a vector class that is used to work with
 16 |  * CSC matrices. It works with the same logic as the corresponding
 17 |  * matrix compression level and is useful when working with these matrices.
 18 |  */
 19 | template <typename T, typename indexT, bool columnMajor>
 20 | class SparseMatrix<T, indexT, 1, columnMajor>::Vector {
 21 |  private:
 22 |   //* Private Class Variables *//
 23 | 
 24 |   size_t size = 0;  // size of the vector in bytes
 25 | 
 26 |   T *vals = nullptr;           // values of the vector
 27 |   indexT *innerIdx = nullptr;  // inner indices of the vector
 28 | 
 29 |   uint32_t length = 0;  // length of the vector
 30 |   uint32_t nnz = 0;     // number of non-zero elements in the vector
 31 | 
 32 |   //* Private Class Methods *//
 33 | 
 34 |   // User checks to confirm a valid vector
 35 |   void userChecks();
 36 | 
 37 |   // Calculates the size of the vector in bytes
 38 |   void calculateCompSize();
 39 | 
 40 |  public:
 41 |   //* Constructors & Destructor *//
 42 |   /** @name Constructors
 43 |    */
 44 |   ///@{
 45 | 
 46 |   /**
 47 |    * Default Vector Constructor \n \n
 48 |    * Creates an empty vector with everything set to null/zero.
 49 |    */
 50 |   Vector(){};
 51 | 
 52 |   /**
 53 |    * IVSparse Matrix to Vector Constructor \n \n
 54 |    * Creates a vector from a CSC Matrix at the given vector index.
 55 |    *
 56 |    * @note Can only get a vector from a matrix in the storage order of the
 57 |    * matrix.
 58 |    */
 59 |   Vector(IVSparse::SparseMatrix<T, indexT, 1, columnMajor> &mat, uint32_t vec);
 60 | 
 61 |   /**
 62 |    * Deep Copy Vector Constructor \n \n
 63 |    * Creates a deep copy of the given vector.
 64 |    */
 65 |   Vector(IVSparse::SparseMatrix<T, indexT, 1, columnMajor>::Vector &vec);
 66 | 
 67 |   /**
 68 |    * Destroys the vector.
 69 |    */
 70 |   ~Vector();
 71 | 
 72 |   ///@}
 73 | 
 74 |   //* Getters *//
 75 |   /** @name Getters
 76 |    */
 77 |   ///@{
 78 | 
 79 |   /**
 80 |    * @returns The coefficient at the given index.
 81 |    */
 82 |   T coeff(uint32_t index);
 83 | 
 84 |   /**
 85 |    * @returns The size of the vector in bytes.
 86 |    */
 87 |   size_t byteSize();
 88 | 
 89 |   /**
 90 |    * @returns The inner size of the vector.
 91 |    */
 92 |   uint32_t innerSize();
 93 | 
 94 |   /**
 95 |    * @returns The outer size of the vector.
 96 |    */
 97 |   uint32_t outerSize();
 98 | 
 99 |   /**
100 |    * @returns The number of non-zero elements in the vector.
101 |    */
102 |   uint32_t nonZeros();
103 | 
104 |   /**
105 |    * @returns The length of the vector.
106 |    */
107 |   uint32_t getLength();
108 | 
109 |   /**
110 |    * @returns A pointer to the values of the vector.
111 |    */
112 |   T *getValues() const;
113 | 
114 |   /**
115 |    * @returns A pointer to the inner indices of the vector.
116 |    */
117 |   indexT *getInnerIndices() const;
118 | 
119 |   ///@}
120 | 
121 |   //* Utility Methods *//
122 |   /** @name Utility Methods
123 |    */
124 |   ///@{
125 | 
126 |   /**
127 |    * Prints the vector dense to the console.
128 |    */
129 |   void print();
130 | 
131 |   ///@}
132 | 
133 |   //* Operator Overloads *//
134 | 
135 |   // Coefficient Access Operator
136 |   T operator[](uint32_t index);
137 | 
138 |   // Assignment Operator
139 |   typename SparseMatrix<T, indexT, 1, columnMajor>::Vector operator=(
140 |       typename SparseMatrix<T, indexT, 1, columnMajor>::Vector &vec);
141 | 
142 |   // Equality Operator
143 |   bool operator==(
144 |       typename SparseMatrix<T, indexT, 1, columnMajor>::Vector &vec);
145 | 
146 |   // Inequality Operator
147 |   bool operator!=(
148 |       typename SparseMatrix<T, indexT, 1, columnMajor>::Vector &vec);
149 | 
150 | };  // class Vector
151 | 
152 | }  // namespace IVSparse


--------------------------------------------------------------------------------
/inst/include/src/Vectors/IVCSC_Vector.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file IVCSC_Vector.hpp
  3 |  * @author Skyler Ruiter and Seth Wolfgang
  4 |  * @brief IVCSC Vector Class Declerations
  5 |  * @version 0.1
  6 |  * @date 2023-07-03
  7 |  */
  8 | 
  9 | #pragma once
 10 | 
 11 | namespace IVSparse {
 12 | 
 13 | /**
 14 |  * @tparam T Type of the values in the matrix
 15 |  * @tparam indexT Type of the indices in the matrix
 16 |  * @tparam compressionLevel Compression level of the matrix
 17 |  * @tparam columnMajor Storage order of the matrix
 18 |  *
 19 |  * IVCSC Vector Class \n \n
 20 |  * The IVCSC Vector class is a vector class that is used to work with
 21 |  * IVCSC matrices. It works with the same logic as the corresponding
 22 |  * matrix compression level and is useful when working with these matrices.
 23 |  */
 24 | template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
 25 | class SparseMatrix<T, indexT, compressionLevel, columnMajor>::Vector {
 26 |  private:
 27 |   //* Private Class Variables *//
 28 | 
 29 |   size_t size = 0;  // size of the vector in bytes
 30 | 
 31 |   void *data = nullptr;    // data of the vector
 32 |   void *endPtr = nullptr;  // pointer to the end of the vector
 33 | 
 34 |   uint32_t length = 0;  // length of the vector
 35 | 
 36 |   uint8_t indexWidth = 1;  // width of the indices
 37 | 
 38 |   uint32_t nnz = 0;  // number of non-zero elements in the vector
 39 | 
 40 |   //* Private Class Methods *//
 41 | 
 42 |   // User checks to confirm a valid vector
 43 |   void userChecks();
 44 | 
 45 |   // Calculates the size of the vector in bytes
 46 |   void calculateCompSize();
 47 | 
 48 |  public:
 49 |   //* Constructors & Destructor *//
 50 |   /** @name Constructors
 51 |    */
 52 |   ///@{
 53 | 
 54 |   /**
 55 |    * Default Vector Constructor \n \n
 56 |    * Creates an empty vector with everything set to null/zero.
 57 |    */
 58 |   Vector(){};
 59 | 
 60 |   /**
 61 |    * Length Vector Constructor \n \n
 62 |    * Creates a vector of the given length with everything set to null/zero.
 63 |    */
 64 |   Vector(uint32_t length);
 65 | 
 66 |   /**
 67 |    * IVSparse Matrix to Vector Constructor \n \n
 68 |    * Creates a vector from a IVCSC Matrix at the given vector index.
 69 |    *
 70 |    * @note Can only get a vector from a matrix in the storage order of the
 71 |    * matrix.
 72 |    */
 73 |   Vector(IVSparse::SparseMatrix<T, indexT, compressionLevel, columnMajor> &mat, uint32_t vec);
 74 | 
 75 |   /**
 76 |    * Deep Copy Vector Constructor \n \n
 77 |    * Creates a deep copy of the given vector.
 78 |    */
 79 |   Vector(IVSparse::SparseMatrix<T, indexT, compressionLevel, columnMajor>::Vector &vec);
 80 | 
 81 |   /**
 82 |    * Destroys the vector.
 83 |    */
 84 |   ~Vector();
 85 | 
 86 |   ///@}
 87 | 
 88 |   //* Getters *//
 89 |   /** @name Getters
 90 |    */
 91 |   ///@{
 92 | 
 93 |   /**
 94 |    * @returns The coefficient at the given index.
 95 |    */
 96 |   T coeff(uint32_t index);
 97 | 
 98 |   /**
 99 |    * @returns A pointer to the beginning of the vector.
100 |    */
101 |   void *begin();
102 | 
103 |   /**
104 |    * @returns A pointer to the end of the vector.
105 |    */
106 |   void *end();
107 | 
108 |   /**
109 |    * @returns The size of the vector in bytes.
110 |    */
111 |   size_t byteSize();
112 | 
113 |   /**
114 |    * @returns The inner size of the vector.
115 |    */
116 |   uint32_t innerSize();
117 | 
118 |   /**
119 |    * @returns The outer size of the vector.
120 |    */
121 |   uint32_t outerSize();
122 | 
123 |   /**
124 |    * @returns The number of non-zero elements in the vector.
125 |    */
126 |   uint32_t nonZeros();
127 | 
128 |   /**
129 |    * @returns The length of the vector.
130 |    */
131 |   uint32_t getLength();
132 | 
133 |   ///@}
134 | 
135 |   //* Utility Methods *//
136 |   /** @name Utility Methods
137 |    */
138 |   ///@{
139 | 
140 |   /**
141 |    * Prints the vector dense to the console.
142 |    */
143 |   void print();
144 | 
145 |   ///@}
146 | 
147 |   //* Calculations *//
148 |   /** @name Calculation Methods
149 |    */
150 |   ///@{
151 | 
152 |   /**
153 |    * @returns The norm of the vector.
154 |    */
155 |   double norm();
156 | 
157 |   /**
158 |    * @returns The sum of the vector.
159 |    */
160 |   T sum();
161 | 
162 |   /**
163 |    * @returns The dot product of the vector and an Eigen Dense Vector.
164 |    */
165 |   double dot(Eigen::Matrix<T, -1, 1> &other);
166 | 
167 |   /**
168 |    * @returns The dot product of the vector and an Eigen Sparse Vector.
169 |    */
170 |   double dot(Eigen::SparseVector<T, -1> &other);
171 | 
172 |   ///@}
173 | 
174 |   //* Operator Overloads *//
175 | 
176 |   // In place scalar multiplication
177 |   void operator*=(T scalar);
178 | 
179 |   // scalar multiplication
180 |   typename IVSparse::SparseMatrix<T, indexT, compressionLevel, columnMajor>::Vector operator*(T scalar);
181 | 
182 |   // equality operator
183 |   bool operator==(typename SparseMatrix<T, indexT, compressionLevel,
184 |                                         columnMajor>::Vector &vec);
185 | 
186 |   // inequality operator
187 |   bool operator!=(typename SparseMatrix<T, indexT, compressionLevel,
188 |                                         columnMajor>::Vector &vec);
189 | 
190 |   // coefficient access
191 |   T operator[](uint32_t index);
192 | 
193 |   // boolean operator
194 |   operator bool() { return (char *)endPtr - indexWidth > data; };
195 | 
196 |   // assignment operator
197 |   typename SparseMatrix<T, indexT, compressionLevel, columnMajor>::Vector
198 |   operator=(typename SparseMatrix<T, indexT, compressionLevel, columnMajor>::Vector &vec);
199 | 
200 | };  // class Vector
201 | 
202 | }  // namespace IVSparse


--------------------------------------------------------------------------------
/inst/include/src/Vectors/VCSC_Vector.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file VCSC_Vector.hpp
  3 |  * @author Skyler Ruiter and Seth Wolfgang
  4 |  * @brief VCSC Vector Class Declerations
  5 |  * @version 0.1
  6 |  * @date 2023-07-03
  7 |  */
  8 | 
  9 | #pragma once
 10 | 
 11 | namespace IVSparse {
 12 | 
 13 | /**
 14 |  * VCSC Vector Class \n \n
 15 |  * The VCSC Vector class is a vector class that is used to work with
 16 |  * VCSC matrices. It works with the same logic as the corresponding
 17 |  * matrix compression level and is useful when working with these matrices.
 18 |  */
 19 | template <typename T, typename indexT, bool columnMajor>
 20 | class SparseMatrix<T, indexT, 2, columnMajor>::Vector {
 21 |  private:
 22 |   //* Private Class Variables *//
 23 | 
 24 |   size_t size = 0;  // size of the vector in bytes
 25 | 
 26 |   std::map<T, std::vector<indexT>> data;  // map of values to indices
 27 | 
 28 |   uint32_t length = 0;  // length of the vector
 29 | 
 30 |   uint8_t indexWidth = 1;  // width of the indices
 31 | 
 32 |   uint32_t nnz = 0;  // number of non-zero elements in the vector
 33 | 
 34 |   //* Private Class Methods *//
 35 | 
 36 |   // User checks to confirm a valid vector
 37 |   void userChecks();
 38 | 
 39 |   // Calculates the size of the vector in bytes
 40 |   void calculateCompSize();
 41 | 
 42 |  public:
 43 |   //* Constructors & Destructor *//
 44 |   /** @name Constructors
 45 |    */
 46 |   ///@{
 47 | 
 48 |   /**
 49 |    * Default Vector Constructor \n \n
 50 |    * Creates an empty vector with everything set to null/zero.
 51 |    */
 52 |   Vector(){};
 53 | 
 54 |   /**
 55 |    * IVSparse Matrix to Vector Constructor \n \n
 56 |    * Creates a vector from a VCSC Matrix at the given vector index.
 57 |    *
 58 |    * @note Can only get a vector from a matrix in the storage order of the
 59 |    * matrix.
 60 |    */
 61 |   Vector(IVSparse::SparseMatrix<T, indexT, 2, columnMajor> &mat, uint32_t vec);
 62 | 
 63 |   /**
 64 |    * Deep Copy Vector Constructor \n \n
 65 |    * Creates a deep copy of the given vector.
 66 |    */
 67 |   Vector(IVSparse::SparseMatrix<T, indexT, 2, columnMajor>::Vector &vec);
 68 | 
 69 |   /**
 70 |    * Destroys the vector.
 71 |    */
 72 |   ~Vector();
 73 | 
 74 |   ///@}
 75 | 
 76 |   //* Getters *//
 77 |   /** @name Getters
 78 |    */
 79 |   ///@{
 80 | 
 81 |   /**
 82 |    * @returns The coefficient at the given index.
 83 |    */
 84 |   T coeff(uint32_t index);
 85 | 
 86 |   /**
 87 |    * @returns The size of the vector in bytes.
 88 |    */
 89 |   size_t byteSize();
 90 | 
 91 |   /**
 92 |    * @returns The inner size of the vector.
 93 |    */
 94 |   uint32_t innerSize();
 95 | 
 96 |   /**
 97 |    * @returns The outer size of the vector.
 98 |    */
 99 |   uint32_t outerSize();
100 | 
101 |   /**
102 |    * @returns The number of non-zero elements in the vector.
103 |    */
104 |   uint32_t nonZeros();
105 | 
106 |   /**
107 |    * @returns The length of the vector.
108 |    */
109 |   uint32_t getLength();
110 | 
111 |   /**
112 |    * @returns A pointer to the values of the vector.
113 |    */
114 |   std::vector<T> getValues();
115 | 
116 |   /**
117 |    * @returns A pointer to the counts of the vector.
118 |    */
119 |   std::vector<indexT> getCounts();
120 | 
121 |   /**
122 |    * @returns A pointer to the indices of the vector.
123 |    */
124 |   std::vector<indexT> getIndices();
125 | 
126 |   /**
127 |    * @returns The underlying data map
128 |   */
129 |   std::map<T, std::vector<indexT>> getData();
130 | 
131 |   /**
132 |    * @returns The number of unique values in the vector.
133 |    */
134 |   indexT uniqueVals();
135 | 
136 |   ///@}
137 | 
138 |   //* Utility Methods *//
139 |   /** @name Utility Methods
140 |    */
141 |   ///@{
142 | 
143 |   /**
144 |    * Prints the vector dense to the console.
145 |    */
146 |   void print();
147 | 
148 |   ///@}
149 | 
150 |   //* Calculations *//
151 |   /** @name Calculation Methods
152 |    */
153 |   ///@{
154 | 
155 |   /**
156 |    * @returns The norm of the vector.
157 |    */
158 |   double norm();
159 | 
160 |   /**
161 |    * @returns The sum of the vector.
162 |    */
163 |   T sum();
164 | 
165 |   /**
166 |    * @returns The dot product of the vector and an Eigen Dense Vector.
167 |    */
168 |   double dot(Eigen::Matrix<T, -1, 1> &other);
169 | 
170 |   /**
171 |    * @returns The dot product of the vector and an Eigen Sparse Vector.
172 |    */
173 |   double dot(Eigen::SparseVector<T, -1> &other);
174 | 
175 |   ///@}
176 | 
177 |   //* Operator Overloads *//
178 | 
179 |   // Coefficient Access Operator
180 |   T operator[](uint32_t index);
181 | 
182 |   // Assignment Operator
183 |   typename SparseMatrix<T, indexT, 2, columnMajor>::Vector operator=(
184 |       typename SparseMatrix<T, indexT, 2, columnMajor>::Vector &vec);
185 | 
186 |   // Equality Operators
187 |   bool operator==(
188 |       typename SparseMatrix<T, indexT, 2, columnMajor>::Vector &vec);
189 | 
190 |   // Inequality Operators
191 |   bool operator!=(
192 |       typename SparseMatrix<T, indexT, 2, columnMajor>::Vector &vec);
193 | 
194 |   // Scalar Multiplication Operator (In Place)
195 |   void operator*=(T scalar);
196 | 
197 |   // Scalar Multiplication Operator (Copy)
198 |   typename IVSparse::SparseMatrix<T, indexT, 2, columnMajor>::Vector operator*(
199 |       T scalar);
200 | 
201 | };  // class Vector
202 | 
203 | }  // namespace IVSparse


--------------------------------------------------------------------------------
/man/AnnotateNMF.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AnnotateNMF.R
 3 | \name{AnnotateNMF}
 4 | \alias{AnnotateNMF}
 5 | \alias{AnnotateNMF.DimReduc}
 6 | \alias{AnnotateNMF.Seurat}
 7 | \alias{AnnotateNMF.nmf}
 8 | \title{annotate an NMF model}
 9 | \usage{
10 | AnnotateNMF(object, ...)
11 | 
12 | \method{AnnotateNMF}{DimReduc}(
13 |   object,
14 |   meta.data = NULL,
15 |   columns = NULL,
16 |   designs = NULL,
17 |   center = TRUE,
18 |   scale = FALSE,
19 |   max.levels = 200,
20 |   ...
21 | )
22 | 
23 | \method{AnnotateNMF}{Seurat}(object, columns = NULL, reduction = "nmf", ...)
24 | 
25 | \method{AnnotateNMF}{nmf}(
26 |   object,
27 |   meta.data,
28 |   columns = NULL,
29 |   designs = NULL,
30 |   center = TRUE,
31 |   scale = FALSE,
32 |   max.levels = 200,
33 |   ...
34 | )
35 | }
36 | \arguments{
37 | \item{object}{an object suitable for annotation (Seurat, DimReduc, or nmf)}
38 | 
39 | \item{...}{not implemented}
40 | 
41 | \item{meta.data}{a data.frame, if one is not already part of the object}
42 | 
43 | \item{columns}{factor columns of meta.data (see below) to annotate against}
44 | 
45 | \item{designs}{named list of design matrices (supersedes meta.data/columns)}
46 | 
47 | \item{center}{center the factor matrix for testing? (TRUE)}
48 | 
49 | \item{scale}{scale the factor matrix for testing? (FALSE)}
50 | 
51 | \item{max.levels}{maximum number of levels a factor may have in order to be included in analysis}
52 | 
53 | \item{reduction}{the reductions slot in the Seurat object containing the model to annotate}
54 | }
55 | \description{
56 | annotate an NMF model
57 | 
58 | Annotate NMF model with cell or sample metadata
59 | 
60 | Annotate NMF model with cell metadata
61 | }
62 | \details{
63 | Maps factor information in an RcppML::nmf object against meta.data
64 | }
65 | \examples{
66 | \dontrun{
67 | get_pbmc3k_data() \%>\%
68 |   NormalizeData() \%>\%
69 |   RunNMF() -> pbmc3k
70 | AnnotateNMF(pbmc3k)
71 | }
72 | }
73 | 


--------------------------------------------------------------------------------
/man/AnnotationPlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AnnotationPlot.R
 3 | \name{AnnotationPlot}
 4 | \alias{AnnotationPlot}
 5 | \alias{AnnotationPlot.Seurat}
 6 | \alias{AnnotationPlot.DimReduc}
 7 | \alias{AnnotationPlot.nmf}
 8 | \alias{AnnotationPlot.list}
 9 | \alias{AnnotationPlot.data.frame}
10 | \title{Plot annotations from an NMF model or other compatible objects.}
11 | \usage{
12 | AnnotationPlot(object, ...)
13 | 
14 | \method{AnnotationPlot}{Seurat}(
15 |   object,
16 |   plot.field = NULL,
17 |   reduction = "nmf",
18 |   dropEmpty = TRUE,
19 |   ...
20 | )
21 | 
22 | \method{AnnotationPlot}{DimReduc}(object, plot.field = NULL, dropEmpty = TRUE, ...)
23 | 
24 | \method{AnnotationPlot}{nmf}(object, plot.field = NULL, dropEmpty = TRUE, ...)
25 | 
26 | \method{AnnotationPlot}{list}(object, plot.field, dropEmpty = TRUE, ...)
27 | 
28 | \method{AnnotationPlot}{data.frame}(object, plot.field, dropEmpty = TRUE, ...)
29 | }
30 | \arguments{
31 | \item{object}{a compatible object (Seurat, DimReduc, nmf, data.frame)}
32 | 
33 | \item{...}{additional arguments passed to called functions}
34 | 
35 | \item{plot.field}{metadata grouping to plot}
36 | 
37 | \item{reduction}{the reduction to plot (default is 'nmf')}
38 | 
39 | \item{dropEmpty}{drop factors without significant associations? (TRUE)}
40 | }
41 | \value{
42 | a ggplot2 object
43 | }
44 | \description{
45 | After running \code{AnnotateNMF}, this function returns 
46 | a dot plot of the results
47 | 
48 | After running \code{AnnotateNMF}, this function returns 
49 | a dot plot of the results.  Right now the code is the same as for DimReduc.
50 | }
51 | \examples{
52 | \dontrun{
53 | get_pbmc3k_data() \%>\% NormalizeData \%>\% RunNMF \%>\% AnnotateNMF -> pbmc3k
54 | AnnotationPlot(pbmc3k, "cell_type")
55 | }
56 | \dontrun{
57 | dat <- pbmc3k@reductions$nmf@misc$annotations$cell_type
58 | AnnotationPlot(dat, "cell_type")
59 | 
60 | # if running interactively:
61 | library(plotly)
62 | ggplotly(AnnotationPlot(dat, "cell_type"))
63 | }
64 | }
65 | 


--------------------------------------------------------------------------------
/man/FindLocalNeighbors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FindLocalNeighbors.R
 3 | \name{FindLocalNeighbors.Seurat}
 4 | \alias{FindLocalNeighbors.Seurat}
 5 | \alias{FindLocalNeighbors}
 6 | \title{(Shared) Local Nearest-neighbor graph construction}
 7 | \usage{
 8 | \method{FindLocalNeighbors}{Seurat}(
 9 |   object,
10 |   k.param = 20,
11 |   spatial.radius = 4,
12 |   spatial.reduction = "spatial",
13 |   reduction = "nmf",
14 |   nn.metric = "jaccard",
15 |   use.dist = FALSE,
16 |   compute.SNN = TRUE,
17 |   prune.SNN = 1/15,
18 |   prune.KNN = 1/10,
19 |   return.dist = FALSE,
20 |   verbose = FALSE,
21 |   dims = NULL,
22 |   graph.name = NULL,
23 |   threads = 0,
24 |   ...
25 | )
26 | 
27 | FindLocalNeighbors(object, ...)
28 | }
29 | \arguments{
30 | \item{object}{An object}
31 | 
32 | \item{k.param}{Defines k for the k-nearest neighbor algorithm}
33 | 
34 | \item{spatial.reduction}{Spatial coordinates to use as input for building the (S)NN. Ensure that radius is given in the same units as spatial coordinates, and that spatial coordinates are fixed on both axes (not scaled).}
35 | 
36 | \item{reduction}{Reduction to use as input for building the (S)NN}
37 | 
38 | \item{nn.metric}{Distance metric for nearest neighbors search. Options include: jaccard, cosine, euclidean, manhattan, hamming, and kl (kullback-leibler divergence).}
39 | 
40 | \item{use.dist}{use distance instead of similarity (i.e. find k-furthest-neighbors). Useful for edge detection. Applies only to \code{metric = c("jaccard", "cosine")}.}
41 | 
42 | \item{compute.SNN}{also compute the shared nearest neighbor graph}
43 | 
44 | \item{prune.SNN}{Sets the cutoff for acceptable Jaccard index when computing the neighborhood overlap for the SNN construction. Any edges with values less than or equal to this will be set to 0 and removed from the SNN graph. Essentially sets the stringency of pruning (0 = no pruning, 1 = prune everything).}
45 | 
46 | \item{prune.KNN}{Sets the cutoff for acceptable distance when computing the neighborhood for the Local KNN graph construction. Any edges with values less than or equal to this will be set to 0 and removed from the KNN graph.  Essentially sets the stringency of pruning (0 = no pruning, 1 = prune everything when distance is "jaccard" or "cosine", otherwise whatever the equivalent is in the distance specified).}
47 | 
48 | \item{return.dist}{return distances to nearest neighbors rather than a binary result}
49 | 
50 | \item{verbose}{print output to the console}
51 | 
52 | \item{dims}{Dimensions of the reduction to use as input (\code{NULL} = use all dimensions in reduction)}
53 | 
54 | \item{graph.name}{Naming parameter for stored (S)NN graph. Default is \code{<reduction>_local_(s)nn}. To store both the neighbor graph and the shared nearest neighbor graph, you must supply a vector containing two names to the \code{graph.name} parameter. The first element in the vector will be used to store the nearest neighbor graph, and the second element will be used to store the shared nearest neighbor graph. If only one name is supplied, only the nearest neighbor graph is stored.}
55 | 
56 | \item{threads}{number of threads to use for parallelization}
57 | 
58 | \item{...}{not implemented}
59 | }
60 | \value{
61 | an object (Seurat object with graph, or just a graph)
62 | }
63 | \description{
64 | Computes the \code{k.param} nearest neighbors within a spatial radius for a given dataset. Can also optionally (via \code{compute.SNN}), construct a shared nearest neighbor graph by calculating the neighborhood overlap (Jaccard index) between every cell and it's \code{k.param} nearest neighbors. Local KNN calculations are exact.
65 | }
66 | \details{
67 | IMPORTANT: You must make sure that your \code{radius} is given in the same units as your 
68 | \code{spatial.reduction} coordinates, and that your \code{spatial.reduction} gives fixed 
69 | coordinates. This means distance on x-coordinates must be equal to distance on y-coordinates.
70 |  Many spatial assays store distances in x and y scaled between 0 and 1, which is NOT going 
71 |  to work. You must use \code{\link{RescaleSpatial}} to convert back to a fixed coordinate 
72 |  system. If your radius is 5, this function will look for neighbors within a distance of 5 
73 |  from a given point as determined by your spatial coordinates.
74 | }
75 | 


--------------------------------------------------------------------------------
/man/GSEAHeatmap.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/GSEAHeatmap.R
 3 | \name{GSEAHeatmap}
 4 | \alias{GSEAHeatmap}
 5 | \title{Plot GSEA results on a heatmap}
 6 | \usage{
 7 | GSEAHeatmap(
 8 |   object,
 9 |   reduction = "nmf",
10 |   max.terms.per.factor = 3,
11 |   dropcommon = TRUE
12 | )
13 | }
14 | \arguments{
15 | \item{object}{Seurat or RcppML::nmf object}
16 | 
17 | \item{reduction}{a dimensional reduction for which GSEA analysis has been performed}
18 | 
19 | \item{max.terms.per.factor}{show this number of top terms for each factor}
20 | 
21 | \item{dropcommon}{drop broadly enriched terms across factors? (TRUE)}
22 | }
23 | \value{
24 | ggplot2 object
25 | }
26 | \description{
27 | Plot top GSEA terms for each NMF factor on a heatmap
28 | }
29 | 


--------------------------------------------------------------------------------
/man/GetBestRank.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/GetBestRank.R
 3 | \name{GetBestRank}
 4 | \alias{GetBestRank}
 5 | \title{determine the appropriate rank for an AutoNMF decomposition}
 6 | \usage{
 7 | GetBestRank(df, tol.overfit = 1e-04, ...)
 8 | }
 9 | \arguments{
10 | \item{df}{a data.frame of output from crossvalidation: rep, rank, error}
11 | 
12 | \item{tol.overfit}{tolerance for increase in test set reconstruction error relative to minimum observed value during fitting}
13 | 
14 | \item{...}{not implemented}
15 | }
16 | \value{
17 | the lowest rank that minimizes the reconstruction error
18 | }
19 | \description{
20 | determine the appropriate rank for an AutoNMF decomposition
21 | }
22 | 


--------------------------------------------------------------------------------
/man/MetadataSummary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MetadataHeatmap.R, R/MetadataSummary.R,
 3 | %   R/plot.nmf_metadata_summary.R
 4 | \name{MetadataHeatmap}
 5 | \alias{MetadataHeatmap}
 6 | \alias{MetadataSummary}
 7 | \alias{plot.nmf_metadata_summary}
 8 | \title{Summarize contribution of sample groups to NMF factors}
 9 | \usage{
10 | MetadataHeatmap(x)
11 | 
12 | MetadataSummary(h, factor_data, reorder = TRUE)
13 | 
14 | \method{plot}{nmf_metadata_summary}(x, ...)
15 | }
16 | \arguments{
17 | \item{x}{a data.frame}
18 | 
19 | \item{h}{matrix giving factors as rows and samples as columns}
20 | 
21 | \item{factor_data}{a factor of the same length as the number of columns in \code{h}}
22 | 
23 | \item{reorder}{sort results by proportion in each group (uses \code{hclust} if >2 groups)}
24 | 
25 | \item{...}{not implemented}
26 | }
27 | \value{
28 | \code{data.frame} of mean weights for each sample group within each factor of class \code{nmf_metadata_summary}. Use the \code{plot} method to visualize.
29 | }
30 | \description{
31 | Calculate the mean weight of samples in discrete and unique groups to each factor
32 | }
33 | 


--------------------------------------------------------------------------------
/man/PreprocessData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PreprocessData.R
 3 | \name{PreprocessData.Seurat}
 4 | \alias{PreprocessData.Seurat}
 5 | \alias{PreprocessData.Assay}
 6 | \alias{PreprocessData.dgCMatrix}
 7 | \alias{PreprocessData}
 8 | \title{Normalize count data}
 9 | \usage{
10 | \method{PreprocessData}{Seurat}(object, scale.factor = 10000, assay = NULL, ...)
11 | 
12 | \method{PreprocessData}{Assay}(object, scale.factor = 10000, ...)
13 | 
14 | \method{PreprocessData}{dgCMatrix}(object, scale.factor = 10000, ...)
15 | 
16 | PreprocessData(object, scale.factor, ...)
17 | }
18 | \arguments{
19 | \item{object}{Seurat object}
20 | 
21 | \item{scale.factor}{value by which to multiply all columns after unit normalization and before \code{log1p} transformation}
22 | 
23 | \item{assay}{assay in which the counts matrix resides}
24 | 
25 | \item{...}{arguments to \code{Seurat::LogNormalize}}
26 | }
27 | \description{
28 | Standard log-normalization equivalent to \code{Seurat::LogNormalize}
29 | }
30 | 


--------------------------------------------------------------------------------
/man/ProjectData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ProjectData.R
 3 | \name{ProjectData.Seurat}
 4 | \alias{ProjectData.Seurat}
 5 | \alias{ProjectData}
 6 | \alias{ProjectData.SingleCellExperiment}
 7 | \title{Project data onto a factor model}
 8 | \usage{
 9 | \method{ProjectData}{Seurat}(
10 |   object,
11 |   w,
12 |   split.by = NULL,
13 |   assay = NULL,
14 |   L1 = 0.01,
15 |   L2 = 0,
16 |   reduction.name = "nmf_projection",
17 |   reduction.key = "NNLS_",
18 |   threads = 0,
19 |   reorder = FALSE,
20 |   ...
21 | )
22 | 
23 | \method{ProjectData}{SingleCellExperiment}(
24 |   object,
25 |   w,
26 |   split.by = NULL,
27 |   assay = "logcounts",
28 |   L1 = 0.01,
29 |   L2 = 0,
30 |   reduction.name = "NNLS",
31 |   reduction.key = "NMF_",
32 |   threads = 0,
33 |   reorder = FALSE,
34 |   ...
35 | )
36 | 
37 | ProjectData(object, ...)
38 | }
39 | \arguments{
40 | \item{object}{A Seurat or SingleCellExperiment object}
41 | 
42 | \item{w}{factor loadings with nrow(w) equal to nrow(object)}
43 | 
44 | \item{split.by}{column name in \code{colData} giving a \code{factor} with multiple levels for splitting. Data will be weighted such that each level in the factor contributes equally to the NMF model.}
45 | 
46 | \item{assay}{Assay to use, defaults to logcounts}
47 | 
48 | \item{L1}{L1/LASSO penalty to increase sparsity of the model}
49 | 
50 | \item{L2}{L2/Ridge-like penalty to increase angles between factors}
51 | 
52 | \item{reduction.name}{Name to store resulting DimReduc object as ("NMF")}
53 | 
54 | \item{reduction.key}{Key for resulting DimReduc ("NMF")}
55 | 
56 | \item{threads}{number of threads to use (0 = let OpenMP use all available threads)}
57 | 
58 | \item{reorder}{reorder the factors of the projection by d? (FALSE)}
59 | 
60 | \item{...}{not implemented}
61 | }
62 | \value{
63 | Returns a Seurat object with the projection stored in the reductions slot
64 | 
65 | a SingleCellExperiment with projection stored in reducedDim(, "NNLS")
66 | }
67 | \description{
68 | Non-negative Least Squares (NNLS) projection of assay data onto a factor model for transfer learning
69 | 
70 | Non-negative Least Squares (NNLS) projection of assay data onto a factor model for transfer learning
71 | }
72 | \details{
73 | Use \code{set.seed()} to guarantee reproducibility!
74 | 
75 | Use \code{set.seed()} to guarantee reproducibility!
76 | }
77 | \seealso{
78 | \code{\link{RunLNMF}}, \code{\link{MetadataSummary}}
79 | 
80 | \code{\link{RunLNMF}}, \code{\link{MetadataSummary}}
81 | }
82 | 


--------------------------------------------------------------------------------
/man/RankPlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RankPlot.R
 3 | \name{RankPlot}
 4 | \alias{RankPlot}
 5 | \alias{RankPlot.Seurat}
 6 | \title{Plot NMF cross-validation results}
 7 | \usage{
 8 | RankPlot(object, reduction = "nmf", ...)
 9 | 
10 | \method{RankPlot}{Seurat}(object, reduction = "nmf", detail.level = 1, ...)
11 | }
12 | \arguments{
13 | \item{object}{a Seurat object or a \code{data.frame} that is the result of \code{RunNMF}}
14 | 
15 | \item{reduction}{the NMF reduction slot name (result of \code{RunNMF} where \code{k} was an array)}
16 | 
17 | \item{...}{not implemented}
18 | 
19 | \item{detail.level}{of detail to plot, \code{1} for test set reconstruction error at convergence of each factorization, \code{2} for test set reconstruction error at each fitting iteration of each factorization}
20 | }
21 | \value{
22 | A ggplot2 object
23 | }
24 | \description{
25 | Given a NMF reduction at multiple ranks, plot rank vs. test set reconstruction error to determine the optimal rank.
26 | 
27 | S3 method for Seurat that runs the \code{singlet::RunNMF} function.
28 | }
29 | 


--------------------------------------------------------------------------------
/man/RasterizeRowwise.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rasterize_rowwise.R
 3 | \name{RasterizeRowwise}
 4 | \alias{RasterizeRowwise}
 5 | \title{Bin together values from every block of \code{n} rows and calculate mean value, with a sparse \code{dgCMatrix} as input and a dense \code{matrix} as output. This technique is useful in some genomics applications.}
 6 | \usage{
 7 | RasterizeRowwise(A, n = 10, threads = 0)
 8 | }
 9 | \arguments{
10 | \item{A}{matrix to be rasterized}
11 | 
12 | \item{n}{row-wise binning size}
13 | 
14 | \item{threads}{number of threads to use (0 to let OpenMP decide how many are available and use them all)}
15 | }
16 | \description{
17 | Bin together values from every block of \code{n} rows and calculate mean value, with a sparse \code{dgCMatrix} as input and a dense \code{matrix} as output. This technique is useful in some genomics applications.
18 | }
19 | 


--------------------------------------------------------------------------------
/man/RescaleSpatial.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RescaleSpatial.R
 3 | \name{RescaleSpatial.Seurat}
 4 | \alias{RescaleSpatial.Seurat}
 5 | \alias{RescaleSpatial}
 6 | \title{Rescale spatial coordinates}
 7 | \usage{
 8 | \method{RescaleSpatial}{Seurat}(object, reduction = "spatial")
 9 | }
10 | \arguments{
11 | \item{object}{Seurat object}
12 | 
13 | \item{reduction}{the name of the spatial reduction to use}
14 | }
15 | \value{
16 | Seurat object with rescaled spatial coordinates
17 | }
18 | \description{
19 | Convert coordinates in the "spatial" reduction to natural numbers rather than values between 0 and 1. This allows for intuitive graph construction based on the radius surrounding any given cell (i.e. a radius of one corresponds to all cells next to the cell of interest)
20 | }
21 | 


--------------------------------------------------------------------------------
/man/RunGCNMF.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RescaleSpatial.R, R/RunGCNMF.R
 3 | \name{RunGCNMF}
 4 | \alias{RunGCNMF}
 5 | \alias{RescaleSpatial}
 6 | \alias{RunGCNMF.Seurat}
 7 | \title{Run Graph-Convolutional Non-negative Matrix Factorization}
 8 | \usage{
 9 | RescaleSpatial(object, ...)
10 | 
11 | \method{RunGCNMF}{Seurat}(
12 |   object,
13 |   graph,
14 |   k,
15 |   split.by = NULL,
16 |   assay = NULL,
17 |   tol = 1e-05,
18 |   L1 = 0.01,
19 |   L2 = 0,
20 |   verbose = 2,
21 |   reduction.name = "gcnmf",
22 |   reduction.key = "GCNMF_",
23 |   maxit = 100,
24 |   threads = 0,
25 |   features = NULL,
26 |   ...
27 | )
28 | 
29 | RunGCNMF(object, ...)
30 | }
31 | \arguments{
32 | \item{object}{A Seurat or SingleCellExperiment object}
33 | 
34 | \item{...}{not implemented}
35 | 
36 | \item{graph}{A graph to use, either directed or undirected}
37 | 
38 | \item{k}{rank of the factorization (no automatic rank determination for GCNMF. Use \code{\link{RunNMF}}). Alternatively, specify an initial \code{w} matrix of dimensions \code{m x k}, where \code{m} is the number of rows in the matrix to be factorized.}
39 | 
40 | \item{split.by}{column name in \code{colData} giving a \code{factor} with multiple levels for splitting. Data will be weighted such that each level in the factor contributes equally to the NMF model.}
41 | 
42 | \item{assay}{Assay to use, defaults to logcounts}
43 | 
44 | \item{tol}{tolerance of the fit (correlation distance of the model across consecutive iterations). Cross-validation fits are 10x coarser than this tolerance.}
45 | 
46 | \item{L1}{L1/LASSO penalty to increase sparsity of the model}
47 | 
48 | \item{L2}{L2/Ridge-like penalty to increase angles between factors}
49 | 
50 | \item{verbose}{print updates to console}
51 | 
52 | \item{reduction.name}{Name to store resulting DimReduc object as ("NMF")}
53 | 
54 | \item{reduction.key}{Key for resulting DimReduc ("NMF")}
55 | 
56 | \item{maxit}{maximum number of fitting iterations}
57 | 
58 | \item{threads}{number of threads to use (0 = let OpenMP use all available threads)}
59 | 
60 | \item{features}{unused for this method}
61 | }
62 | \value{
63 | Returns a Seurat object with the GCNMF model stored in the reductions slot
64 | }
65 | \description{
66 | Run NMF with weighted convolution determined by edges in a graph of dimensions \code{n x n}, where \code{n} is the number of columns in the matrix.
67 | }
68 | \details{
69 | Use \code{set.seed()} to guarantee reproducibility!
70 | }
71 | \seealso{
72 | \code{\link{RunNMF}}
73 | }
74 | 


--------------------------------------------------------------------------------
/man/RunGSEA.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RunGSEA.R
 3 | \name{RunGSEA}
 4 | \alias{RunGSEA}
 5 | \title{Run Gene Set Enrichment Analysis on a Reduction}
 6 | \usage{
 7 | RunGSEA(
 8 |   object,
 9 |   reduction = "nmf",
10 |   species = "Homo sapiens",
11 |   category = "C5",
12 |   min.size = 10,
13 |   max.size = 500,
14 |   dims = NULL,
15 |   verbose = TRUE,
16 |   padj.sig = 0.01,
17 |   ...
18 | )
19 | }
20 | \arguments{
21 | \item{object}{a Seurat or RcppML::nmf object}
22 | 
23 | \item{reduction}{dimensional reduction to use (if Seurat)}
24 | 
25 | \item{species}{species for which to load gene sets}
26 | 
27 | \item{category}{msigdbr gene set category (i.e. "H", "C5", etc.)}
28 | 
29 | \item{min.size}{minimum number of terms in a gene set}
30 | 
31 | \item{max.size}{maximum number of terms in a gene set}
32 | 
33 | \item{dims}{factors in the reduction to use, default \code{NULL} for all factors}
34 | 
35 | \item{verbose}{print progress to console}
36 | 
37 | \item{padj.sig}{significance cutoff for BH-adjusted p-values (default 0.01)}
38 | 
39 | \item{...}{additional params to pass to msigdbr}
40 | }
41 | \value{
42 | a Seurat or nmf object, with GSEA information in the misc slot. BH-adj p-values are on a -log10 scale.
43 | }
44 | \description{
45 | Run GSEA to identify gene sets that are enriched within NMF factors.
46 | }
47 | 


--------------------------------------------------------------------------------
/man/RunLNMF.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/GetSharedFactors.R, R/GetUniqueFactors.R,
 3 | %   R/MetadataPlot.R, R/RunLNMF.R
 4 | \name{GetSharedFactors}
 5 | \alias{GetSharedFactors}
 6 | \alias{GetUniqueFactors}
 7 | \alias{MetadataPlot}
 8 | \alias{MetadataPlot.Seurat}
 9 | \alias{RunLNMF}
10 | \alias{RunLNMF.Seurat}
11 | \title{Run Linked NMF on a Seurat object}
12 | \usage{
13 | GetSharedFactors(object, split.by, reduction = "lnmf")
14 | 
15 | GetUniqueFactors(object, split.by, reduction = "lnmf")
16 | 
17 | MetadataPlot(object, ...)
18 | 
19 | \method{MetadataPlot}{Seurat}(object, split.by, reduction = "lnmf", ...)
20 | 
21 | RunLNMF(object, ...)
22 | 
23 | \method{RunLNMF}{Seurat}(
24 |   object,
25 |   split.by,
26 |   reduction.use = "nmf",
27 |   reduction.name = "lnmf",
28 |   reduction.key = "LNMF_",
29 |   verbose = TRUE,
30 |   link.cutoff = 0.5,
31 |   tol = 1e-05,
32 |   maxit = 100,
33 |   L1 = 0.01,
34 |   L2 = 0,
35 |   threads = 0,
36 |   ...
37 | )
38 | }
39 | \arguments{
40 | \item{object}{A Seurat or SingleCellExperiment object}
41 | 
42 | \item{split.by}{column name in \code{@meta.data} giving a \code{Factor} with multiple levels for splitting. Data will be weighted such that each group contributes equally to the LNMF model.}
43 | 
44 | \item{reduction}{reduction to use for metadata analysis}
45 | 
46 | \item{...}{not implemented}
47 | 
48 | \item{reduction.use}{NMF reduction to use for initializing the linked factorization.}
49 | 
50 | \item{reduction.name}{name to store resulting DimReduc object as}
51 | 
52 | \item{reduction.key}{key for resulting DimReduc}
53 | 
54 | \item{verbose}{print fitting progress to console}
55 | 
56 | \item{link.cutoff}{if the relative contribution of samples in any given group to a factor falls below \code{link.cutoff}, unlink it from the factor. \code{link.cutoff = 1} means a factor must contribute exactly equally before being unlinked.}
57 | 
58 | \item{tol}{tolerance of the fit (correlation distance of the model across consecutive iterations).}
59 | 
60 | \item{maxit}{maximum number of fitting iterations}
61 | 
62 | \item{L1}{L1/LASSO penalty to increase sparsity of the model}
63 | 
64 | \item{L2}{L2/Ridge-like penalty to increase angles between factors}
65 | 
66 | \item{threads}{number of threads to use (0 = let OpenMP use all available threads)}
67 | }
68 | \value{
69 | a Seurat object with the NMF model stored in the reductions slot
70 | }
71 | \description{
72 | Run a Linked Non-negative Matrix Factorization to separate shared and unique signals for integration or signature extraction.
73 | 
74 | S3 method for Seurat that runs the \code{singlet::RunLNMF} function.
75 | }
76 | \details{
77 | Use \code{set.seed()} to guarantee reproducibility!
78 | }
79 | \seealso{
80 | \code{\link{RunNMF}}, \code{\link{RankPlot}}, \code{\link{MetadataSummary}}
81 | }
82 | 


--------------------------------------------------------------------------------
/man/RunNMF.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/RunNMF.R
  3 | \name{RunNMF.Seurat}
  4 | \alias{RunNMF.Seurat}
  5 | \alias{RunNMF}
  6 | \alias{RunNMF.SingleCellExperiment}
  7 | \title{Run NMF on a Seurat object}
  8 | \usage{
  9 | \method{RunNMF}{Seurat}(
 10 |   object,
 11 |   split.by = NULL,
 12 |   k = NULL,
 13 |   assay = NULL,
 14 |   reps = 3,
 15 |   tol = 1e-05,
 16 |   L1 = 0.01,
 17 |   L2 = 0,
 18 |   verbose = 2,
 19 |   reduction.name = "nmf",
 20 |   reduction.key = "NMF_",
 21 |   maxit = 100,
 22 |   test.set.density = 0.05,
 23 |   learning.rate = 0.8,
 24 |   tol.overfit = 1e-04,
 25 |   trace.test.mse = 5,
 26 |   threads = 0,
 27 |   features = NULL,
 28 |   ...
 29 | )
 30 | 
 31 | RunNMF(object, ...)
 32 | 
 33 | \method{RunNMF}{SingleCellExperiment}(
 34 |   object,
 35 |   split.by = NULL,
 36 |   k = NULL,
 37 |   assay = NULL,
 38 |   reps = 3,
 39 |   tol = 1e-05,
 40 |   L1 = 0.01,
 41 |   L2 = 0,
 42 |   verbose = 2,
 43 |   reduction.name = "nmf",
 44 |   reduction.key = "NMF_",
 45 |   maxit = 100,
 46 |   test.set.density = 0.05,
 47 |   learning.rate = 0.8,
 48 |   tol.overfit = 1e-04,
 49 |   trace.test.mse = 5,
 50 |   threads = 0,
 51 |   features = NULL,
 52 |   ...
 53 | )
 54 | }
 55 | \arguments{
 56 | \item{object}{A Seurat or SingleCellExperiment object}
 57 | 
 58 | \item{split.by}{column name in \code{colData} giving a \code{factor} with multiple levels for splitting. Data will be weighted such that each level in the factor contributes equally to the NMF model.}
 59 | 
 60 | \item{k}{either \code{NULL} for automatic rank determination, a single integer giving the desired rank, or a vector of ranks to use for cross-validation.}
 61 | 
 62 | \item{assay}{Assay to use, defaults to logcounts}
 63 | 
 64 | \item{reps}{number of replicates for cross-validation}
 65 | 
 66 | \item{tol}{tolerance of the fit (correlation distance of the model across consecutive iterations). Cross-validation fits are 10x coarser than this tolerance.}
 67 | 
 68 | \item{L1}{L1/LASSO penalty to increase sparsity of the model}
 69 | 
 70 | \item{L2}{L2/Ridge-like penalty to increase angles between factors}
 71 | 
 72 | \item{verbose}{Level of console output (0/FALSE, 1/TRUE, 2)}
 73 | 
 74 | \item{reduction.name}{Name to store resulting DimReduc object as ("NMF")}
 75 | 
 76 | \item{reduction.key}{Key for resulting DimReduc ("NMF")}
 77 | 
 78 | \item{maxit}{maximum number of fitting iterations}
 79 | 
 80 | \item{test.set.density}{approximate density of the test set (default 0.05)}
 81 | 
 82 | \item{learning.rate}{exponent on step size for automatic rank determination}
 83 | 
 84 | \item{tol.overfit}{tolerance for increase in test set reconstruction error relative to minimum observed value during fitting}
 85 | 
 86 | \item{trace.test.mse}{during automatic rank determination, calculate test set reconstruction error every trace iterations}
 87 | 
 88 | \item{threads}{number of threads to use (0 = let OpenMP use all available threads)}
 89 | 
 90 | \item{features}{unused for this method}
 91 | 
 92 | \item{...}{not implemented}
 93 | }
 94 | \value{
 95 | Returns a Seurat object with the NMF model stored in the reductions slot
 96 | 
 97 | Returns an SCE with the NMF model stored in reducedDim
 98 | }
 99 | \description{
100 | Run Non-negative Matrix Factorization with rank determined by CV
101 | 
102 | Run Non-negative Matrix Factorization with rank determined by CV
103 | }
104 | \details{
105 | Use \code{set.seed()} to guarantee reproducibility!
106 | 
107 | Use \code{set.seed()} to guarantee reproducibility!
108 | }
109 | \examples{
110 | \dontrun{
111 | get_pbmc3k_data() \%>\%
112 |   NormalizeData() \%>\%
113 |   RunNMF() -> pbmc3k
114 | }
115 | }
116 | \seealso{
117 | \code{\link{RunLNMF}}, \code{\link{RankPlot}}, \code{\link{MetadataSummary}}
118 | }
119 | 


--------------------------------------------------------------------------------
/man/ard_nmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ard_nmf.R
 3 | \name{ard_nmf}
 4 | \alias{ard_nmf}
 5 | \title{Automatic Rank Determination NMF}
 6 | \usage{
 7 | ard_nmf(
 8 |   A,
 9 |   k_init = 2,
10 |   k_max = 100,
11 |   k_min = 2,
12 |   n_replicates = 1,
13 |   tol = 1e-05,
14 |   cv_tol = 1e-04,
15 |   maxit = 100,
16 |   verbose = 1,
17 |   L1 = 0.01,
18 |   L2 = 0,
19 |   threads = 0,
20 |   test_density = 0.05,
21 |   learning_rate = 1,
22 |   tol_overfit = 0.001,
23 |   trace_test_mse = 1
24 | )
25 | }
26 | \arguments{
27 | \item{A}{sparse matrix giving normalized counts for genes x cells (rows x columns), or a list of sparse matrices with equal number of rows and identical rownames}
28 | 
29 | \item{k_init}{initial rank at which to begin search for local minimum. \code{k_init = 2} is a reasonable default, higher values can lead to swift convergence to a local minmum.}
30 | 
31 | \item{k_max}{maximum rank to consider during automatic rank determination}
32 | 
33 | \item{k_min}{minimum rank to consider during automatic rank determination (cannot be less than 2)}
34 | 
35 | \item{n_replicates}{number of random test sets}
36 | 
37 | \item{tol}{tolerance of the final fit}
38 | 
39 | \item{cv_tol}{tolerance for cross-validation}
40 | 
41 | \item{maxit}{maximum number of iterations}
42 | 
43 | \item{verbose}{no output (0/FALSE), rank-level output (1/TRUE) and step size info (2) and individual model fitting updates (3)}
44 | 
45 | \item{L1}{L1/LASSO penalty to increase sparsity of model}
46 | 
47 | \item{L2}{L2/Ridge penalty to increase angles between factors}
48 | 
49 | \item{threads}{number of threads for parallelization across CPUs, 0 = use all available threads}
50 | 
51 | \item{test_density}{fraction of values to include in the test set}
52 | 
53 | \item{learning_rate}{exponent on step size for automatic rank determination}
54 | 
55 | \item{tol_overfit}{stopping criterion, maximum increase in test set reconstruction error at any iteration compared to test set reconstruction error at \code{trace_test_mse}}
56 | 
57 | \item{trace_test_mse}{first iteration at which to calculate test set reconstruction error, and the error to compare all later iterations to when determining whether overfitting has occurred.}
58 | }
59 | \description{
60 | ARD NMF quickly finds the optimal rank for an NMF model using an exponentially variable learning rate and basic coordinate descent.
61 | }
62 | \details{
63 | If running ard_nmf() standalone, the following coercion can be useful:
64 | 
65 | res <- ard_nmf(data_matrix, ...)
66 | plot(res$cv_data) # rank finding
67 | nmfres <- as(res, "nmf") # other
68 | 
69 | This coercion allows AnnotateNMF, AnnotationPlot, etc. to work on `nmfres`
70 | directly, rather than assuming a Seurat-like class structure is present.
71 | The coercion simply checks the dimensions of res$w, res$d, and res$h,
72 | then shoves all other list elements from res into nmfres@misc.
73 | }
74 | 


--------------------------------------------------------------------------------
/man/cellxgene_pipeline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cellxgene_pipeline.R
 3 | \name{cellxgene_pipeline}
 4 | \alias{cellxgene_pipeline}
 5 | \title{Learn an NMF model from a cellxgene Seurat object}
 6 | \usage{
 7 | cellxgene_pipeline(filename, reps = 1, verbose = 3, L1 = 0.05, ...)
 8 | }
 9 | \arguments{
10 | \item{...}{arguments to \code{RunNMF}}
11 | 
12 | \item{url}{download url for a Seurat v4 object}
13 | }
14 | \description{
15 | Provide a link to download a cellxgene Seurat object, and this pipeline will return a standardized annotated NMF object at the optimal rank
16 | }
17 | \details{
18 | This pipeline runs the following steps:
19 | \enumerate{
20 | \item Download a Seurat v4 object from the provided URL
21 | \item Preprocess the data and run NMF using parameters specified in the \code{...} argument
22 | \item Annotate the NMF model against existing multi-level factor information
23 | \item Extract the model and annotations and save to an RDS file
24 | }
25 | }
26 | 


--------------------------------------------------------------------------------
/man/checkColumns.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/checkColumns.R
 3 | \name{checkColumns}
 4 | \alias{checkColumns}
 5 | \title{verify that columns for auto-annotation are factors with > 1 level}
 6 | \usage{
 7 | checkColumns(meta.data, columns = NULL, max.levels = 200)
 8 | }
 9 | \arguments{
10 | \item{meta.data}{the meta.data (or a Seurat object if needs be)}
11 | 
12 | \item{columns}{the columns (optional; if NULL, will check all columns)}
13 | 
14 | \item{max.levels}{maximum number of levels permitted for a factor to be kept}
15 | }
16 | \value{
17 | a vector of suitable columns (may be length 0)
18 | }
19 | \description{
20 | verify that columns for auto-annotation are factors with > 1 level
21 | }
22 | 


--------------------------------------------------------------------------------
/man/checkDesigns.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/checkDesigns.R
 3 | \name{checkDesigns}
 4 | \alias{checkDesigns}
 5 | \title{verify that a list of matrices is in fact a named list of model matrices}
 6 | \usage{
 7 | checkDesigns(designs)
 8 | }
 9 | \arguments{
10 | \item{designs}{an alleged list of model matrices}
11 | }
12 | \value{
13 | the list of model matrices, assuming it passes
14 | }
15 | \description{
16 | verify that a list of matrices is in fact a named list of model matrices
17 | }
18 | \details{
19 | this function will squawk and stop if the list is no good
20 | }
21 | 


--------------------------------------------------------------------------------
/man/cross_validate_nmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cross_validate_nmf.R,
 3 | %   R/plot.cross_validate_nmf_data.R
 4 | \name{cross_validate_nmf}
 5 | \alias{cross_validate_nmf}
 6 | \alias{plot.cross_validate_nmf_data}
 7 | \title{Determine best rank for NMF using cross-validation}
 8 | \usage{
 9 | cross_validate_nmf(
10 |   A,
11 |   ranks,
12 |   n_replicates = 3,
13 |   tol = 1e-04,
14 |   maxit = 100,
15 |   verbose = 1,
16 |   L1 = 0.01,
17 |   L2 = 0,
18 |   threads = 0,
19 |   test_density = 0.05,
20 |   tol_overfit = 1e-04,
21 |   trace_test_mse = 5
22 | )
23 | 
24 | \method{plot}{cross_validate_nmf_data}(x, detail = 2, tol.overfit = 1e-04, ...)
25 | }
26 | \arguments{
27 | \item{A}{sparse matrix giving normalized counts for genes x cells (rows x columns), or a list of sparse matrices with equal number of rows and identical rownames}
28 | 
29 | \item{ranks}{a vector of ranks at which to fit a model and compute test set reconstruction error}
30 | 
31 | \item{n_replicates}{number of random test sets}
32 | 
33 | \item{tol}{tolerance of the fit (1e-5 for publication quality, 1e-4 for cross-validation)}
34 | 
35 | \item{maxit}{maximum number of iterations}
36 | 
37 | \item{verbose}{verbosity level}
38 | 
39 | \item{L1}{L1/LASSO penalty to increase sparsity of model}
40 | 
41 | \item{L2}{L2/Ridge penalty to increase angles between factors}
42 | 
43 | \item{threads}{number of threads for parallelization across CPUs, 0 = use all available threads}
44 | 
45 | \item{test_density}{fraction of values to include in the test set}
46 | 
47 | \item{tol_overfit}{stopping criterion, maximum increase in test set reconstruction error at any iteration compared to test set reconstruction error at \code{trace_test_mse}}
48 | 
49 | \item{trace_test_mse}{first iteration at which to calculate test set reconstruction error, and the error to compare all later iterations to when determining whether overfitting has occurred.}
50 | 
51 | \item{x}{the result of \code{cross_validate_nmf} (a data.frame)}
52 | 
53 | \item{detail}{level of detail to plot}
54 | 
55 | \item{...}{additional arguments (not implemented)}
56 | }
57 | \value{
58 | a \code{data.frame} of test set reconstruction error vs. rank of class \code{nmf_cross_validate_data}. Use \code{plot} method to visualize or \code{min} to compute optimal rank.
59 | }
60 | \description{
61 | Find the rank that minimizes the mean squared error of test set reconstruction using cross-validation.
62 | }
63 | 


--------------------------------------------------------------------------------
/man/getDesigns.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getDesigns.R
 3 | \name{getDesigns}
 4 | \alias{getDesigns}
 5 | \title{Refactored out from AnnotateNMF to ease argument handling}
 6 | \usage{
 7 | getDesigns(columns = NULL, meta.data = NULL, designs = NULL, max.levels = 200)
 8 | }
 9 | \arguments{
10 | \item{columns}{factor columns of meta.data, optional if !is.null(designs)}
11 | 
12 | \item{meta.data}{a data.frame of annotations, optional if !is.null(designs)}
13 | 
14 | \item{designs}{named list of design matrices (supersedes meta.data/columns)}
15 | 
16 | \item{max.levels}{maximum number of levels permitted for a factor to be kept}
17 | }
18 | \value{
19 | a named list of design matrices, if one was not provided
20 | }
21 | \description{
22 | Refactored out from AnnotateNMF to ease argument handling
23 | }
24 | 


--------------------------------------------------------------------------------
/man/getModelFit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getModelFit.R
 3 | \name{getModelFit}
 4 | \alias{getModelFit}
 5 | \title{get linear all-pairs comparisons fits for a design matrix and data matrix}
 6 | \usage{
 7 | getModelFit(design, object, center = TRUE, ...)
 8 | }
 9 | \arguments{
10 | \item{design}{a model.matrix (or a sparse.model.matrix, perhaps)}
11 | 
12 | \item{object}{a data.matrix, Seurat DimReduc, or RcppML nmf object}
13 | 
14 | \item{center}{center the factor matrix for testing? (TRUE)}
15 | 
16 | \item{...}{additional arguments, passed to base::scale}
17 | }
18 | \description{
19 | Continuing along with the theme of "stupid limma tricks", this function 
20 | fits and shrinks a means model for a factor. The proportion of factors
21 | assumed to have a fold-change > 0 is 1%, and a robust fit is applied.
22 | }
23 | \examples{
24 | if (FALSE) { 
25 |   get_pbmc3k_data() \%>\% NormalizeData() -> pbmc3k
26 |   design <- model.matrix(~ 0 + cell_type, data=pbmc3k@meta.data)
27 |   fit <- getModelFit(design, pbmc3k) # toy fit on lognormcounts  
28 |   # Subsetting data to non-NA observations to match design matrix.
29 |   limma::topTable(fit)
30 | }
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/man/getModelMatrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getModelMatrix.R
 3 | \name{getModelMatrix}
 4 | \alias{getModelMatrix}
 5 | \title{automatically generate a means model (one-vs-all group associations)}
 6 | \usage{
 7 | getModelMatrix(field, meta.data = NULL, sparse = FALSE, ova = TRUE, ...)
 8 | }
 9 | \arguments{
10 | \item{field}{the name of a column in the data.frame, or the column}
11 | 
12 | \item{meta.data}{a data.frame with one or more factor columns, or NULL}
13 | 
14 | \item{sparse}{fit a sparse model.matrix? (FALSE)}
15 | 
16 | \item{ova}{fit a One-Vs-All model matrix (no referent)? (TRUE)}
17 | 
18 | \item{...}{any additional params to pass to model.matrix}
19 | }
20 | \value{
21 | a model.matrix or sparse.model.matrix (if sparse==TRUE)
22 | }
23 | \description{
24 | A little-known trick in limma is to fit ~ 0 + group for a means model.
25 | This function automates that for a data.frame and a factor column of it.
26 | }
27 | \details{
28 | If a factor (and no meta.data) is supplied (usually by with(meta.data, ...)),
29 | getModelMatrix will attempt to figure out the text to remove from the matrix
30 | column names by using deparse() and match.call() on the arguments (voodoo!).
31 | In order to fit one-vs-all comparisons, a means model is the default. If you
32 | have a referent group (e.g. normal bone marrow vs. a bunch of leukemia cells)
33 | or simply don't want a means model, set `ova` (one vs all) to FALSE.
34 | }
35 | \examples{
36 | 
37 | covs <- get_pbmc3k_data()@meta.data
38 | design <- getModelMatrix("cell_type", covs)
39 | head(design)
40 | sparsedesign <- getModelMatrix("cell_type", covs, sparse=TRUE)
41 | head(sparsedesign)
42 | 
43 | if (FALSE) {
44 |   # test Seurat and SCE support too
45 |   mm1 <- getModelMatrix("cell_type", pbmc3k)
46 |   mm2 <- getModelMatrix("cell_type", pbmc)
47 |   identical(mm1, mm2)
48 |   # [1] TRUE
49 |   fit1 <- getModelFit(mm2, pbmc3k)
50 |   fit2 <- getModelFit(mm1, pbmc)
51 |   identical(fit1, fit2)
52 |   # [1] TRUE
53 |   limma::topTable(fit1)
54 | }
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/man/getModelResults.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getModelResults.R
 3 | \name{getModelResults}
 4 | \alias{getModelResults}
 5 | \title{extract data.frame of lods and pvalues for differential factor representation}
 6 | \usage{
 7 | getModelResults(fit, noneg = TRUE, noint = TRUE)
 8 | }
 9 | \arguments{
10 | \item{fit}{an lmFit result from limma, shrunken with eBayes()}
11 | 
12 | \item{noneg}{drop results with negative lods scores? (TRUE)}
13 | 
14 | \item{noint}{drop any results for '(Intercept)'? (TRUE)}
15 | }
16 | \value{
17 | a data.frame with columns 'factor', 'group', 'fc', and 'p'
18 | }
19 | \description{
20 | log-odds of non-null differences for a response by a factor are in fit$lods
21 | (which will usually be a matrix), and one-sided p-values for the moderated t
22 | test are computed from fit$t and fit$df.total using pt(t, df, lower=FALSE),
23 | then adjusted using the step-up procedure of Benjamini & Hochberg.
24 | }
25 | \details{
26 | If an (Intercept) term is found, it will be dropped, and if
27 |              negative LODS scores are encountered, they will be dropped,
28 |              unless `noneg` and/or `noint` are FALSE.
29 | }
30 | 


--------------------------------------------------------------------------------
/man/get_pbmc3k_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_pbmc3k_data.R
 3 | \name{get_pbmc3k_data}
 4 | \alias{get_pbmc3k_data}
 5 | \title{Load the pbmc3k dataset}
 6 | \usage{
 7 | get_pbmc3k_data()
 8 | }
 9 | \value{
10 | Seurat object with \code{$cell_type} info in the \code{meta.data} slot.
11 | }
12 | \description{
13 | This dataset is adapted directly from the Satija lab "pbmc3k" dataset used in their popular tutorial on guided clustering. It is provided in this package for convenience since "SeuratData" is not available on CRAN.
14 | 
15 | For more information, please see their documentation.
16 | }
17 | \details{
18 | 2,700 peripheral blood mononuclear cells (PBMC) from 10x genomics taken from the "SeuratData" package
19 | }
20 | 


--------------------------------------------------------------------------------
/man/pbmc3k.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pbmc3k.R
 3 | \docType{data}
 4 | \name{pbmc3k}
 5 | \alias{pbmc3k}
 6 | \title{Compressed form of pbmc3k dataset}
 7 | \format{
 8 | compressed version of the \code{dgCMatrix}, use \code{\link{get_pbmc3k_data}} to use this dataset.
 9 | }
10 | \usage{
11 | data(pbmc3k)
12 | }
13 | \description{
14 | See \code{\link{get_pbmc3k_data}}
15 | }
16 | \keyword{datasets}
17 | 


--------------------------------------------------------------------------------
/man/plotFactorWeights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotFactorWeights.R
 3 | \name{plotFactorWeights}
 4 | \alias{plotFactorWeights}
 5 | \title{convenience function to map one or more factors along a genome using igvR}
 6 | \usage{
 7 | plotFactorWeights(object, gr, factors = 1:3, plot = FALSE)
 8 | }
 9 | \arguments{
10 | \item{object}{an nmf object or something with a @w weights matrix}
11 | 
12 | \item{gr}{a GRanges object with coordinates for the features}
13 | 
14 | \item{factors}{which factors to plot weights for (default: 1, 2, 3)}
15 | 
16 | \item{plot}{use igvR to plot the factors? (TRUE, if igvR detected)}
17 | }
18 | \value{
19 | the GRanges gr, but with factor weights added as mcols
20 | }
21 | \description{
22 | convenience function to map one or more factors along a genome using igvR
23 | }
24 | \details{
25 | This function presumes a GRanges object will be supplied, which in turn
26 |  presumes that the GenomicRanges package is installed from Bioconductor. 
27 |  Further, if plot == TRUE, the igvR package is presumed to be installed. 
28 |  If either of these presumptions are false, or if factor weights cannot
29 |  be mapped to identifiers in the GRanges, this function will fail.
30 | }
31 | 


--------------------------------------------------------------------------------
/man/project_model.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ProjectData.R
 3 | \name{project_model}
 4 | \alias{project_model}
 5 | \title{Project a factor model}
 6 | \usage{
 7 | project_model(A, w, L1 = 0.01, L2 = 0, threads = 0)
 8 | }
 9 | \arguments{
10 | \item{A}{sparse matrix giving normalized counts for genes x cells (rows x columns), or a list of sparse matrices with equal number of rows and identical rownames}
11 | 
12 | \item{w}{matrix giving the factor model, of dimensions \code{nrow(A) x k}}
13 | 
14 | \item{L1}{L1/LASSO penalty to increase sparsity of model}
15 | 
16 | \item{L2}{L2/Ridge penalty to increase angles between factors}
17 | 
18 | \item{threads}{number of threads for parallelization across CPUs, 0 = use all available threads}
19 | }
20 | \value{
21 | list of \code{h} and \code{d}, where \code{d} gives the relative contribution of each factor in \code{h} to the model
22 | }
23 | \description{
24 | Project a dataset onto a factor model for transfer learning
25 | }
26 | 


--------------------------------------------------------------------------------
/man/run_linked_nmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RunLNMF.R
 3 | \name{run_linked_nmf}
 4 | \alias{run_linked_nmf}
 5 | \title{Run Linked Non-negative Matrix Factorization}
 6 | \usage{
 7 | run_linked_nmf(
 8 |   A,
 9 |   w,
10 |   link_h = NULL,
11 |   link_w = NULL,
12 |   tol = 1e-04,
13 |   maxit = 100,
14 |   verbose = TRUE,
15 |   L1 = 0.01,
16 |   L2 = 0,
17 |   threads = 0
18 | )
19 | }
20 | \arguments{
21 | \item{A}{sparse matrix giving normalized counts for genes x cells (rows x columns), or a list of sparse matrices with equal number of rows and identical rownames}
22 | 
23 | \item{w}{initial matrix for 'w', usually taken from the result of \code{run_nmf}, of dimensions \code{nrow(A) x rank}.}
24 | 
25 | \item{link_h}{matrix giving the linkage weight (usually in the range \code{(0, 1)}) of dimensions \code{rank x ncol(A)}.}
26 | 
27 | \item{link_w}{matrix giving the linkage weight of dimensions \code{nrow(A) x rank}.}
28 | 
29 | \item{tol}{tolerance of the fit (1e-5 for publication quality, 1e-4 for cross-validation)}
30 | 
31 | \item{maxit}{maximum number of iterations}
32 | 
33 | \item{verbose}{verbosity level}
34 | 
35 | \item{L1}{L1/LASSO penalty to increase sparsity of model}
36 | 
37 | \item{L2}{L2/Ridge penalty to increase angles between factors}
38 | 
39 | \item{threads}{number of threads for parallelization across CPUs, 0 = use all available threads}
40 | }
41 | \description{
42 | Run LNMF, initialized from any NMF model, where factors may be "linked" to certain samples.
43 | }
44 | 


--------------------------------------------------------------------------------
/man/run_nmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/run_nmf.R
 3 | \name{run_nmf}
 4 | \alias{run_nmf}
 5 | \title{Run Non-negative Matrix Factorization}
 6 | \usage{
 7 | run_nmf(
 8 |   A,
 9 |   rank,
10 |   tol = 1e-04,
11 |   maxit = 100,
12 |   verbose = TRUE,
13 |   L1 = 0.01,
14 |   L2 = 0,
15 |   threads = 0,
16 |   compression_level = 3
17 | )
18 | }
19 | \arguments{
20 | \item{A}{sparse matrix giving normalized counts for genes x cells (rows x columns), or a list of sparse matrices with equal number of rows and identical rownames}
21 | 
22 | \item{rank}{factorization rank}
23 | 
24 | \item{tol}{tolerance of the fit (1e-5 for publication quality, 1e-4 for cross-validation)}
25 | 
26 | \item{maxit}{maximum number of iterations}
27 | 
28 | \item{verbose}{verbosity level}
29 | 
30 | \item{L1}{L1/LASSO penalty to increase sparsity of model}
31 | 
32 | \item{L2}{L2/Ridge penalty to increase angles between factors}
33 | 
34 | \item{threads}{number of threads for parallelization across CPUs, 0 = use all available threads}
35 | 
36 | \item{compression_level}{either 2 or 3, for VCSC or IVCSC, respectively. For development purposes.}
37 | }
38 | \description{
39 | Run NMF on a sparse matrix with automatic rank determination by cross-validation
40 | }
41 | 


--------------------------------------------------------------------------------
/man/singlet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/singlet.R
 3 | \docType{package}
 4 | \name{singlet}
 5 | \alias{singlet}
 6 | \alias{singlet-package}
 7 | \title{Singlet}
 8 | \description{
 9 | Fast single-cell analysis with non-negative dimensional reductions
10 | }
11 | \details{
12 | There are reasons to not use PCA.
13 | \itemize{
14 | \item PCA fits to missing signal,
15 | \item considers only highly variable features,
16 | \item is almost useless without further graph-based analysis,
17 | \item requires centering and scaling of your data,
18 | \item and is robust only within experiments.
19 | }
20 | 
21 | Instead, you should use Non-negative Matrix Factorization (NMF).
22 | \itemize{
23 | \item NMF imputes missing signal,
24 | \item learns models using all features,
25 | \item does everything PCA does and provides useful information itself,
26 | \item requires only variance stabilization,
27 | \item and is robust across experiments.
28 | }
29 | 
30 | Singlet is all about extremely fast NMF for single-cell dimensional reduction and integration.
31 | 
32 | See the vignettes to get started.
33 | }
34 | \seealso{
35 | Useful links:
36 | \itemize{
37 |   \item \url{https://github.com/zdebruine/singlet}
38 |   \item Report bugs at \url{https://github.com/zdebruine/singlet/issues}
39 | }
40 | 
41 | }
42 | \author{
43 | Zach DeBruine
44 | }
45 | 


--------------------------------------------------------------------------------
/man/write_IVCSC.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{write_IVCSC}
 4 | \alias{write_IVCSC}
 5 | \title{Write an IVCSC matrix}
 6 | \usage{
 7 | write_IVCSC(L, verbose = TRUE)
 8 | }
 9 | \arguments{
10 | \item{L}{input dgCMatrix list}
11 | 
12 | \item{verbose}{print outputs}
13 | }
14 | \description{
15 | Write an IVCSC matrix
16 | }
17 | 


--------------------------------------------------------------------------------
/singlet.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | PKG_CPPFLAGS = -I../inst/include/
2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
3 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DEIGEN_INITIALIZE_MATRICES_BY_ZERO -DEIGEN_NO_DEBUG
4 | CXX_STD = CXX11


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | PKG_CPPFLAGS = -I../inst/include/
2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
3 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DEIGEN_INITIALIZE_MATRICES_BY_ZERO -DEIGEN_NO_DEBUG
4 | CXX_STD = CXX11


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | test_check("singlet")
3 | 


--------------------------------------------------------------------------------
/tests/testthat/helper.R:
--------------------------------------------------------------------------------
1 | Sys.setlocale("LC_COLLATE", "C") ## What CRAN does; affects sort order
2 | set.seed(999) ## To ensure that tests that involve randomness are reproducible
3 | options(warn=1)
4 | 


--------------------------------------------------------------------------------
/tests/testthat/test-pbmc3k.R:
--------------------------------------------------------------------------------
1 | test_that("Testing pbmc3k data set",
2 |           {
3 |             data("pbmc3k", package="singlet")
4 |             all(c("i", "p", "Dim", "Dimnames", "x", "cell_type") 
5 |                 %in% names(pbmc3k))
6 |             expect_true(TRUE)
7 |           })
8 | 


--------------------------------------------------------------------------------
/vignettes/Batch_Integration_with_Linked_NMF.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Batch Integration with Linked NMF"
  3 | author: "Zach DeBruine"
  4 | date: "`r Sys.Date()`"
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{Batch Integration with Linked NMF}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding{UTF-8}
 10 | ---
 11 | 
 12 | ```{r setup, include=FALSE}
 13 | knitr::opts_chunk$set(echo = TRUE)
 14 | ```
 15 | 
 16 | ## Get Started
 17 | 
 18 | Use the Seurat `ifnb` dataset:
 19 | 
 20 | ```{R, warning = FALSE, message = FALSE, get-started}
 21 | library(Seurat)
 22 | library(ggplot2)
 23 | library(singlet)
 24 | library(SeuratData)
 25 | library(cowplot)
 26 | data(ifnb)
 27 | ifnb <- NormalizeData(ifnb, verbose = FALSE)
 28 | ```
 29 | 
 30 | ## Determine NMF Rank
 31 | 
 32 | First we determine number of factors using cross-validation. 
 33 | 
 34 | Note the use of `split.by = "stim"`, where we are indicating that the "stim" field in the `meta.data` slot of our Seurat object is a factor giving discrete groupings of samples (either stimulated or unstimulated).  The `RunNMF` function will weight samples from both groups equally in the NMF objective, regardless of whether the groups are of equal size.
 35 | 
 36 | ```{R, message = FALSE, warning = FALSE, results = 'hide', run-nmf}
 37 | set.seed(123)
 38 | ifnb <- RunNMF(ifnb, split.by = "stim")
 39 | ```
 40 | 
 41 | ```{R, fig.width = 4, fig.height = 4}
 42 | RankPlot(ifnb) + scale_y_continuous(limits = c(1, 1.1))
 43 | ```
 44 | 
 45 | Visualize contribution of groups to both factors:
 46 | 
 47 | ```{R, fig.width = 6, plot-metadata}
 48 | MetadataPlot(ifnb, split.by = "stim", reduction = "nmf")
 49 | ```
 50 | 
 51 | Some factors are almost exclusively explaining signal from one dataset, and not the other.
 52 | 
 53 | ## Run Linked NMF
 54 | 
 55 | Linked NMF will uncouple sample groups from factors in which they are only weakly represented. LNMF is initialized with the joint NMF model that we trained before, we just specify a cutoff for the minimum fractional representation of any sample group in any given factor at which it will be uncoupled from the factor.
 56 | 
 57 | ```{R, message = FALSE, warning = FALSE, run-lnmf}
 58 | ifnb <- RunLNMF(
 59 |   ifnb, 
 60 |   split.by = "stim", 
 61 |   reduction.use = "nmf", 
 62 |   link.cutoff = 0.7, 
 63 |   verbose = FALSE)
 64 | ```
 65 | 
 66 | LNMF creates a new reduction in the Seurat object, `lnmf`. Now examine how each group is represented in NMF factors:
 67 | 
 68 | ```{R, fig.width = 6, plot-lnmf-metadata}
 69 | MetadataPlot(ifnb, split.by = "stim", reduction = "lnmf")
 70 | ```
 71 | 
 72 | We can visualize these models on UMAP coordinates using the joint model, the entire linked NMF model, and the linked NMF model using only shared factors:
 73 | 
 74 | ```{R, message = FALSE, warning = FALSE, results = 'hide', run-umap}
 75 | ifnb <- RunUMAP(ifnb, 
 76 |                 reduction = "nmf", 
 77 |                 dims = 1:ncol(ifnb@reductions$nmf), 
 78 |                 reduction.name = "jnmf_all", 
 79 |                 verbose = FALSE)
 80 | 
 81 | ifnb <- RunUMAP(ifnb, 
 82 |                 reduction = "lnmf", 
 83 |                 dims = GetSharedFactors(ifnb, split.by = "stim"), 
 84 |                 reduction.name = "lnmf_shared", 
 85 |                 verbose = FALSE)
 86 | 
 87 | p_jnmf_umap <- DimPlot(ifnb, reduction = "jnmf_all", group.by = "stim")
 88 | p_lnmf_umap <- DimPlot(ifnb, reduction = "lnmf_shared", group.by = "stim")
 89 | ```
 90 | 
 91 | ## Visualize
 92 | 
 93 | Plot the results:
 94 | 
 95 | ```{R, fig.width = 8, fig.height = 4, plot-umap}
 96 | plot_grid(
 97 |   p_jnmf_umap + 
 98 |     ggtitle("joint NMF") + 
 99 |     theme(legend.position = "none"), 
100 |   p_lnmf_umap + 
101 |     ggtitle("linked NMF") + 
102 |     theme(legend.position = "none"), 
103 |   get_legend(p_jnmf_umap), 
104 |   ncol = 3, 
105 |   rel_widths = c(1, 1, 0.2)
106 | )
107 | ```


--------------------------------------------------------------------------------