├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
    ├── RcppExports.R
    ├── align.R
    ├── bulk_de_wrappers.R
    ├── differential_geometry.R
    ├── estimate_linear_coefficients.R
    ├── find_de_neighborhoods.R
    ├── geodesic_regression.R
    ├── glioblastoma_example_data.R
    ├── handle_data_parameter.R
    ├── handle_design_parameter.R
    ├── handle_test_data_parameter.R
    ├── harmony_wrapper.R
    ├── lemur-package.R
    ├── lemur.R
    ├── lemur_fit.R
    ├── parse_contrasts.R
    ├── pca.R
    ├── predict.R
    ├── project_on_fit.R
    ├── recursive_least_squares.R
    ├── ridge_regression.R
    ├── test_de.R
    └── util.R
├── README.Rmd
├── README.md
├── data
    └── glioblastoma_example_data.rda
├── inst
    └── CITATION
├── man
    ├── align_harmony.Rd
    ├── align_impl.Rd
    ├── cash-lemur_fit-method.Rd
    ├── figures
    │   ├── README-fig-Neighborhood_size_vs_significance-1.png
    │   ├── README-fig-lemur_umap-1.png
    │   ├── README-fig-raw_umap-1.png
    │   ├── README-fig-tumor_cell_annotation1-1.png
    │   ├── README-fig-tumor_cell_annotation2-1.png
    │   ├── README-fig-tumor_de_neighborhood_plot-1.png
    │   ├── README-fig-umap_de-1.png
    │   ├── README-fig-umap_de-2.png
    │   ├── README-fig-umap_de2-1.png
    │   ├── README-fig-umap_de3-1.png
    │   ├── README-fig-volcano_plot-1.png
    │   ├── equation_schematic.png
    │   └── lemur-art.jpg
    ├── find_de_neighborhoods.Rd
    ├── fold_left.Rd
    ├── glioblastoma_example_data.Rd
    ├── grapes-zero_dom_mat_mult-grapes.Rd
    ├── grassmann_geodesic_regression.Rd
    ├── grassmann_lm.Rd
    ├── harmony_new_object.Rd
    ├── lemur.Rd
    ├── lemur_fit.Rd
    ├── mply_dbl.Rd
    ├── one_hot_encoding.Rd
    ├── predict.lemur_fit.Rd
    ├── project_on_lemur_fit.Rd
    ├── pseudoinverse.Rd
    ├── recursive_least_squares.Rd
    ├── reexports.Rd
    ├── residuals-lemur_fit-method.Rd
    ├── ridge_regression.Rd
    ├── stack_slice.Rd
    ├── test_de.Rd
    └── test_global.Rd
├── src
    ├── .gitignore
    ├── Makevars
    ├── Makevars.win
    ├── RcppExports.cpp
    └── util.cpp
├── tests
    ├── testthat.R
    └── testthat
    │   ├── helper.R
    │   ├── test-align.R
    │   ├── test-differential_geometry.R
    │   ├── test-find_de_neighborhoods.R
    │   ├── test-geodesic_regression.R
    │   ├── test-lemur.R
    │   ├── test-parse_contrasts.R
    │   ├── test-pca.R
    │   ├── test-recursive_least_squares.R
    │   ├── test-ridge_penalty.R
    │   ├── test-test_de.R
    │   └── test-util.R
└── vignettes
    ├── .gitignore
    ├── Introduction.qmd
    └── man
        └── figures
            └── equation_schematic.png


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^LICENSE\.md$
4 | ^man/.*\.ai$
5 | ^man/figures/README-.*$
6 | ^vignettes/man/figures/README-.*$
7 | ^doc$
8 | ^Meta$
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.Rproj
2 | .Rproj.user
3 | .Rhistory
4 | .RData
5 | .Ruserdata
6 | tmp_scripts
7 | inst/doc
8 | *.DS_Store
9 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: lemur
 2 | Type: Package
 3 | Title: Latent Embedding Multivariate Regression
 4 | Version: 1.3.6
 5 | Authors@R: person("Constantin", "Ahlmann-Eltze", email = "artjom31415@googlemail.com", 
 6 |                role = c("aut", "cre"), comment = c(ORCID = "0000-0002-3762-068X"))
 7 | Description: Fit a latent embedding multivariate regression (LEMUR) model to multi-condition
 8 |   single-cell data. The model provides a parametric description of single-cell data measured
 9 |   with treatment vs. control or more complex experimental designs. 
10 |   The parametric model is used to (1) align conditions, (2) predict
11 |   log fold changes between conditions for all cells, and (3) identify cell neighborhoods with 
12 |   consistent log fold changes. For those neighborhoods, a pseudobulked differential expression test
13 |   is conducted to assess which genes are significantly changed.
14 | URL: https://github.com/const-ae/lemur
15 | BugReports: https://github.com/const-ae/lemur/issues
16 | License: MIT + file LICENSE
17 | Encoding: UTF-8
18 | LazyData: false
19 | Imports: 
20 |     stats,
21 |     utils,
22 |     irlba,
23 |     methods,
24 |     SingleCellExperiment,
25 |     SummarizedExperiment,
26 |     rlang (>= 1.1.0),
27 |     vctrs (>= 0.6.0),
28 |     glmGamPoi (>= 1.12.0),
29 |     BiocGenerics,
30 |     S4Vectors,
31 |     Matrix,
32 |     DelayedMatrixStats,
33 |     HDF5Array,
34 |     MatrixGenerics,
35 |     matrixStats,
36 |     Rcpp,
37 |     harmony (>= 1.2.0),
38 |     limma,
39 |     BiocNeighbors
40 | Suggests: 
41 |     testthat (>= 3.0.0),
42 |     tidyverse,
43 |     uwot,
44 |     dplyr,
45 |     edgeR,
46 |     knitr,
47 |     quarto,
48 |     BiocStyle
49 | LinkingTo: 
50 |     Rcpp,
51 |     RcppArmadillo
52 | Depends: 
53 |     R (>= 4.1)
54 | Config/testthat/edition: 3
55 | Roxygen: list(markdown = TRUE)
56 | RoxygenNote: 7.3.1
57 | biocViews: Transcriptomics, DifferentialExpression, SingleCell, DimensionReduction, Regression
58 | VignetteBuilder: 
59 |     quarto
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2023
2 | COPYRIGHT HOLDER: lemur authors
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2023 lemur authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(.DollarNames,lemur_fit)
 4 | S3method(predict,lemur_fit)
 5 | export(.lemur_fit)
 6 | export(align_by_grouping)
 7 | export(align_harmony)
 8 | export(find_de_neighborhoods)
 9 | export(lemur)
10 | export(project_on_lemur_fit)
11 | export(test_de)
12 | export(vars)
13 | exportClasses(lemur_fit)
14 | exportMethods("[")
15 | exportMethods(design)
16 | exportMethods(residuals)
17 | import(SingleCellExperiment, except = weights)
18 | import(methods)
19 | import(stats)
20 | importFrom(BiocGenerics,design)
21 | importFrom(Matrix,t)
22 | importFrom(Rcpp,sourceCpp)
23 | importFrom(S4Vectors,DataFrame)
24 | importFrom(S4Vectors,`metadata<-`)
25 | importFrom(S4Vectors,metadata)
26 | importFrom(SingleCellExperiment,reducedDims)
27 | importFrom(SummarizedExperiment,`assay<-`)
28 | importFrom(SummarizedExperiment,`colData<-`)
29 | importFrom(SummarizedExperiment,`rowData<-`)
30 | importFrom(SummarizedExperiment,assay)
31 | importFrom(SummarizedExperiment,assayNames)
32 | importFrom(SummarizedExperiment,colData)
33 | importFrom(SummarizedExperiment,rowData)
34 | importFrom(glmGamPoi,vars)
35 | importFrom(utils,.DollarNames)
36 | importFrom(utils,capture.output)
37 | importFrom(utils,head)
38 | importFrom(utils,setTxtProgressBar)
39 | importFrom(utils,txtProgressBar)
40 | useDynLib(lemur, .registration = TRUE)
41 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
  1 | # v1.3
  2 | 
  3 | * `lemur` now automatically inserts the variables from the design formula into
  4 | the `group_by` argument for `find_de_neighborhoods`. (thanks Katha for pushing for this feature)
  5 | * The formula parsing automatically detects global variables and adds them to the 
  6 | colData. This avoids problems with the random test / training assignment.
  7 | * Duplicate column names in colData are now longer allowed.
  8 | * Require harmony version >= 1.2.0 (thanks Maija for reporting the problem)
  9 | 
 10 | 
 11 | # v1.1
 12 | 
 13 | * Make `predict` function faster and less memory intensive for subset fits.
 14 | * Speed-up internal function `get_groups`
 15 | * Gracefully handle duplicated column names in `colData(fit)`
 16 | * Give better error message in `test_de` if `cond(..)` is used for a fit
 17 | that was not specified with a design formula (thanks @MaximilianNuber for reporting)
 18 | 
 19 | # v1.0
 20 | 
 21 | * Bug fix in subsetting logic affecting `predict` and `test_de`. The problem occured if a `fit` object was
 22 | subsetted with indices or gene names and the order changed, and resulted in a wrong order of the predictions.
 23 | 
 24 | # v0.99.1-0.99.8
 25 | 
 26 | * Submission to Bioconductor, thus the jump in version number.
 27 | * Adjusted internal code to handle breaking changes in `harmony` v1.0.0.
 28 | * Multiple small fixes to comply with Bioconductor guidelines 
 29 | (see https://github.com/Bioconductor/Contributions/issues/3152)
 30 | 
 31 | # v0.0.27
 32 | 
 33 | * Instead of `include_complement`, the `find_de_neighborhoods` function gains a `add_diff_in_diff` argument. If it is true, the function calculates the difference between the DE results inside the neighborhood vs. outside.
 34 | * Change `indices` columns to `neighborhood` and store list of cell name vectors in output of `find_de_neighborhoods`.
 35 | * Enforce unique column and row names.
 36 | 
 37 | # v0.0.26
 38 | 
 39 | * Make the neighborhoods more consistent: (1) include cells which are connected to many cells inside the neighborhood, (2) exclude cells from the neighborhood which are not well connected to the other cells in the neighborhood.
 40 | * Add a `control_parameters` argument to `find_de_neighborhoods`.
 41 | * Add `BiocNeighbor` as a dependency.
 42 | 
 43 | # v0.0.25
 44 | 
 45 | * Detect problematic neighborhoods and skip them.
 46 | * Replace `test_data_cell_size_factors` by `size_factor_method`, which is more flexibel. Setting `size_factor_method = "ratio"` uses the size factor method described in the original DESeq paper
 47 | 
 48 | 
 49 | # v0.0.24
 50 | 
 51 | * Fix bug in `find_de_neighborhoods` that meant that accidentally additionally zeros where included in each
 52 | neighborhood pseudobulk. The test should have more power now.
 53 | * Expose `min_neighborhood_size` argument in `find_de_neighborhoods`.
 54 | * Add `test_data_cell_size_factors` argument to `find_de_neighborhoods` which is useful if the function is called
 55 | with a subsetted `fit` argument.
 56 | 
 57 | # v0.0.23
 58 | 
 59 | * Improve alignment functions: simplify algorithm, find linear approximation to Harmony's steps,
 60 | include an intercept.
 61 | * Avoid calling private methods from `harmony`.
 62 | * Convert character columns in `colData` to factors to avoid problems when dividing data into
 63 | test and training data.
 64 | * Fix bug in `find_de_neighborhoods` where I didn't embrace an argument.
 65 | * Remove `BiocNeighbors` dependency.
 66 | 
 67 | # v0.0.21
 68 | 
 69 | * Minor bug fix in `find_de_neighborhoods`. The function threw an error if `alignment_design != design`. 
 70 | * Better error messages if `find_de_neighborhoods` is called without having called `test_de` before.
 71 | 
 72 | # v0.0.20
 73 | 
 74 | * Change defaults for `find_de_neighborhoods`. Increase the `ridge_penalty` and add a `min_neighborhood_size = 10` argument
 75 | to avoid creation of very small neighborhoods.
 76 | 
 77 | # v0.0.19
 78 | 
 79 | * Add new `test_fraction` argument to `lemur()` function. It automatically defines a hold-out datasets for the fitting step.
 80 | These hold-out data is used to infer the differential expression of the neighborhoods in `find_de_neighborhoods`. This change
 81 | addresses the double-dipping problem, where it was previously left to the user to provide an independent matrix for the 
 82 | `find_de_neighborhoods` function.
 83 | * As a consequence of these changes, the structure of `lemur_fit` objects has changed. They gain three new fields called
 84 | `fit$test_data`, `fit$training_data`, and `fit$is_test_data`.
 85 | * The order and names of the arguments for `find_de_neighborhoods` has changed.
 86 | 
 87 | # v0.0.18
 88 | 
 89 | * Remove `alignment_method` field from `lemur_fit` objects as it was not used for anything.
 90 | 
 91 | # v0.0.17
 92 | 
 93 | * Rename argument name for `align_by_template` from `alignment_template` to `template`
 94 | * Tweak algorithm for alignment to take cluster sizes into account during optimization
 95 | 
 96 | # v0.0.13-v0.0.16
 97 | 
 98 | * Change in the alignment model. Previously, the method tried to align cells using
 99 | rotations and / or stretching, however, the method could not represent reflections! 
100 | To fix this, I now allow arbitrary linear transformations where $R(x) = (I + sum_k x_k V_k)^{-1}$. The
101 | new alignment is more flexible and easier to infer. The downside is the term inside the parantheses can be 
102 | singular which would lead to an error.
103 | * Skip iteration step: first infer centering and then infer latent space. Previously, I iterated between these steps 
104 | but that either didn't add anything or actually degraded the results.
105 | * Set `center = FALSE` in `find_base_point`. Centering the data before fitting the base point caused
106 | problems and made the data look less integrated in some cases.
107 | * Remove ambient PCA step. This was originally conceived as an performance optimization, however
108 | it had detrimental effects on the inference. Since a few version it was skipped per default, so removing
109 | it should not change the inference.
110 | * Add `linear_coefficient_estimator` to give more flexibility how or if the conditions are centered.
111 | * Reduce the `minimum_cluster_membership` default to `0.001` in `align_harmony` to make it more sensitive.
112 | * Make `test_global` an internal function again until further more extensive testing.
113 | * Remove `base_point` argument from `lemur()`. It wasn't used anyways.
114 | 
115 | # pre v0.0.13
116 | * Refactor `find_de_neighborhoods`: the function can now combine the results of
117 | different directions, selection criteria, and pseudobulk test (on counts or 
118 | continuous values). To implement this, I changed the names of the arguments and
119 | added parameters.
120 | * Remove many superfluous method generics and only provide the accession via `$`
121 | * Fix documentation warnings
122 | * Rename class from 'lemur_fit_obj' to 'lemur_fit'
123 | * Store 'contrast' in `lemur_fit` after calling `test_de`
124 | * Add option to fit count model in `find_de_neighborhoods` with [edgeR](https://bioconductor.org/packages/edgeR/)
125 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | cumz_which_abs_max <- function(x, min_neighborhood_size) {
 5 |     .Call(`_lemur_cumz_which_abs_max`, x, min_neighborhood_size)
 6 | }
 7 | 
 8 | cum_brls_which_abs_max <- function(y, X, group, contrast, penalty, min_neighborhood_size) {
 9 |     .Call(`_lemur_cum_brls_which_abs_max`, y, X, group, contrast, penalty, min_neighborhood_size)
10 | }
11 | 
12 | count_neighbors_fast <- function(knn_mat, indices) {
13 |     .Call(`_lemur_count_neighbors_fast`, knn_mat, indices)
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/R/align.R:
--------------------------------------------------------------------------------
  1 | #' Enforce additional alignment of cell clusters beyond the direct differential embedding
  2 | #'
  3 | #' @param fit a `lemur_fit` object
  4 | #' @param grouping argument specific for `align_by_grouping`. Either a vector which assigns
  5 | #'   each cell to one group or a matrix with `ncol(fit)` columns where the rows are a soft-assignment
  6 | #'   to a cluster (i.e., columns sum to `1`). `NA`'s are allowed.
  7 | #' @param design a specification of the design (matrix or formula) that is used
  8 | #'   for the transformation. Default: `fit$design_matrix`
  9 | #' @param ridge_penalty specification how much the flexibility of the transformation
 10 | #'   should be regularized. Default: `0.01`
 11 | #' @param max_iter argument specific for `align_harmony`. The number of iterations. Default: `10`
 12 | #' @param preserve_position_of_NAs argument specific for `align_by_grouping`.
 13 | #'   Boolean flag to decide if `NA`s in the `grouping` mean that these cells should stay where they are (if
 14 | #'   possible) or if they are free to move around. Default: `FALSE`
 15 | #' @param ... additional parameters that are passed on to relevant functions
 16 | #' @param verbose Should the method print information during the fitting. Default: `TRUE`.
 17 | #'
 18 | #' @returns The `fit` object with the updated `fit$embedding` and `fit$alignment_coefficients`.
 19 | #'
 20 | #' @examples
 21 | #' data(glioblastoma_example_data)
 22 | #' fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition,
 23 | #'              n_emb = 5, verbose = FALSE)
 24 | #' # Creating some grouping for illustration
 25 | #' cell_types <- sample(c("tumor cell", "neuron", "leukocyte"), size = ncol(fit), replace = TRUE)
 26 | #' fit_al1 <- align_by_grouping(fit, grouping = cell_types)
 27 | #'
 28 | #' # Alternatively, use harmony to automatically group cells
 29 | #' fit_al2 <- align_harmony(fit)
 30 | #' fit_al2
 31 | #'
 32 | #' # The alignment coefficients are a 3D array
 33 | #' fit_al2$alignment_coefficients
 34 | #'
 35 | #' @export
 36 | align_harmony <- function(fit, design = fit$alignment_design,
 37 |                           ridge_penalty = 0.01, max_iter = 10, ..., verbose = TRUE){
 38 |   if(verbose) message("Select cells that are considered close with 'harmony'")
 39 |   if(is.null(attr(design, "ignore_degeneracy"))){
 40 |     # It doesn't matter for harmony if the design is degenerate
 41 |     attr(design, "ignore_degeneracy") <- TRUE
 42 |   }
 43 |   design_matrix <- handle_design_parameter(design, fit, glmGamPoi:::get_col_data(fit, NULL), verbose = verbose)$design_matrix
 44 |   act_design_matrix <- design_matrix[!fit$is_test_data,,drop=FALSE]
 45 | 
 46 |   if(! requireNamespace("harmony", quietly = TRUE)){
 47 |     stop("'harmony' is not installed. Please install it from CRAN.")
 48 |   }
 49 |   training_fit <- fit$training_data
 50 |   # Ignore best practice and call private methods from harmony
 51 |   harm_obj <- harmony_init(training_fit$embedding, act_design_matrix, ..., verbose = verbose)
 52 |   for(idx in seq_len(max_iter)){
 53 |     harm_obj <- harmony_max_div_clustering(harm_obj)
 54 | 
 55 |     alignment <- align_impl(training_fit$embedding, harm_obj$R, act_design_matrix, ridge_penalty = ridge_penalty)
 56 | 
 57 |     harm_obj$Z_corr <- alignment$embedding
 58 |     harm_obj$Z_cos <- t(t(harm_obj$Z_corr) / sqrt(colSums(harm_obj$Z_corr^2)))
 59 |     if(harm_obj$check_convergence(1)){
 60 |       if(verbose) message("Converged")
 61 |       break
 62 |     }
 63 |   }
 64 | 
 65 |   correct_fit(fit, alignment$alignment_coefficients, design)
 66 | }
 67 | 
 68 | #' @rdname align_harmony
 69 | #' @export
 70 | align_by_grouping <- function(fit, grouping, design = fit$alignment_design,
 71 |                               ridge_penalty = 0.01, preserve_position_of_NAs = FALSE, verbose = TRUE){
 72 |   if(verbose) message("Received sets of cells that are considered close")
 73 | 
 74 |   if(is.list(grouping)){
 75 |     stop("'grouping' must be a vector/factor with distinct elements for each group or ",
 76 |          "a matrix with `ncol(fit)` columns and one row per group.")
 77 |   }else if(! is.matrix(grouping)){
 78 |     stopifnot(length(grouping) == ncol(fit))
 79 |     grouping <- grouping[! fit$is_test_data]
 80 |   }else{
 81 |     stopifnot(ncol(grouping) == ncol(fit))
 82 |     grouping <- grouping[,! fit$is_test_data,drop=FALSE]
 83 |   }
 84 | 
 85 |   design_matrix <- handle_design_parameter(design, fit, glmGamPoi:::get_col_data(fit, NULL), verbose = verbose)$design_matrix
 86 |   act_design_matrix <- design_matrix[!fit$is_test_data,,drop=FALSE]
 87 | 
 88 |   res <- align_impl(fit$training_data$embedding, grouping, act_design_matrix,
 89 |                     ridge_penalty = ridge_penalty, calculate_new_embedding = FALSE)
 90 | 
 91 |   correct_fit(fit, res$alignment_coefficients, design)
 92 | }
 93 | 
 94 | 
 95 | #' Align the points according to some grouping
 96 | #'
 97 | #' @returns A list with the new embedding and the coefficients
 98 | #'
 99 | #' @keywords internal
100 | align_impl <- function(embedding, grouping, design_matrix, ridge_penalty = 0.01,
101 |                        preserve_position_of_NAs = FALSE, calculate_new_embedding = TRUE){
102 |   if(! is.matrix(grouping)){
103 |     grouping_matrix <- one_hot_encoding(grouping)
104 |   }else{
105 |     stopifnot(ncol(grouping) == ncol(embedding))
106 |     stopifnot(all(grouping >= 0, na.rm = TRUE))
107 |     # Make sure the entries sum to 1 (and don't touch them if the column is all zero)
108 |     col_sums <- MatrixGenerics::colSums2(grouping)
109 |     col_sums[col_sums == 0] <- 1
110 |     grouping_matrix <- t(t(grouping) / col_sums)
111 |   }
112 | 
113 |   # NA's are converted to zero columns ensuring that `diff %*% grouping_matrix = 0`
114 |   grouping_matrix[,MatrixGenerics::colAnyNAs(grouping_matrix)] <- 0
115 |   if(! preserve_position_of_NAs){
116 |     all_zero_col <- MatrixGenerics::colSums2(grouping_matrix) == 0
117 |     grouping_matrix <- grouping_matrix[,! all_zero_col,drop=FALSE]
118 |     embedding <- embedding[,! all_zero_col,drop=FALSE]
119 |     design_matrix <- design_matrix[! all_zero_col,,drop=FALSE]
120 |   }
121 | 
122 |   stopifnot(ncol(embedding) == ncol(grouping_matrix))
123 |   stopifnot(ncol(embedding) == nrow(design_matrix))
124 | 
125 |   n_groups <- nrow(grouping_matrix)
126 |   n_emb <- nrow(embedding)
127 |   K <- ncol(design_matrix)
128 | 
129 |   conditions <- get_groups(design_matrix)
130 |   conds <- unique(conditions)
131 | 
132 |   # Calculate mean per cell_type+condition
133 |   cond_ct_means <- lapply(conds, \(co){
134 |     t(mply_dbl(seq_len(n_groups), \(idx){
135 |       MatrixGenerics::rowWeightedMeans(embedding, w = grouping_matrix[idx,], cols = conditions == co)
136 |     }, ncol = n_emb))
137 |   })
138 | 
139 |   # Calculate target as mean of `cond_ct_means` per cell_type
140 |   target <- matrix(NA, nrow = n_emb, ncol = n_groups)
141 |   for(idx in seq_len(n_groups)){
142 |     target[,idx] <- colMeans(mply_dbl(conds, \(co) cond_ct_means[[co]][,idx], ncol = n_emb), na.rm = TRUE)
143 |   }
144 | 
145 |   # Shift all cells by `ctc_mean - ct_target` (`new_pos`)
146 |   new_pos <- embedding
147 |   for(co in conds){
148 |     diff <- target - cond_ct_means[[co]]
149 |     new_pos[,conditions == co] <- new_pos[,conditions == co] + diff %zero_dom_mat_mult% grouping_matrix[,conditions == co]
150 |   }
151 | 
152 |   # Approximate shift by regressing `new_pos ~ S(x) * orig_pos + offset(x)`
153 |   interact_design_matrix <- duplicate_cols(design_matrix, each = n_emb + 1) * duplicate_cols(t(rbind(1, embedding)), times = ncol(design_matrix))
154 |   # interact_design_matrix <- duplicate_cols(design_matrix, each = n_emb) * duplicate_cols(t(embedding), times = ncol(design_matrix))
155 |   alignment_coefs <- ridge_regression(new_pos - embedding, interact_design_matrix, ridge_penalty = ridge_penalty)
156 |   alignment_coefs <- array(alignment_coefs, dim = c(n_emb, n_emb + 1, ncol(design_matrix)))
157 | 
158 |   new_emb <-if(calculate_new_embedding){
159 |     apply_linear_transformation(embedding, alignment_coefs, design_matrix)
160 |   }else{
161 |     NULL
162 |   }
163 |   list(alignment_coefficients = alignment_coefs, embedding = new_emb)
164 | }
165 | 
166 | #' Take a vector and convert it to a one-hot encoded matrix
167 | #'
168 | #' @returns A matrix with `length(unique(groups))` rows and `length(groups)` columns.
169 | #'
170 | #' @keywords internal
171 | one_hot_encoding <- function(groups){
172 |   if(is.factor(groups)){
173 |     levels <- levels(groups)
174 |   }else{
175 |     levels <- unique(groups)
176 |   }
177 | 
178 |   res <- matrix(0, nrow = length(levels), ncol = length(groups), dimnames = list(levels, names(groups)))
179 |   for(i in seq_along(levels)){
180 |     if(is.na(levels[i])){
181 |       # Do nothing
182 |     }else{
183 |       res[i, groups == levels[i]] <- 1
184 |     }
185 |   }
186 |   res
187 | }
188 | 
189 | forward_linear_transformation <- function(alignment_coefficients, design_vector){
190 |   n_emb <- dim(alignment_coefficients)[1]
191 |   if(n_emb == 0){
192 |     sum_tangent_vectors(alignment_coefficients, design_vector)
193 |   }else{
194 |     cbind(0, diag(nrow = n_emb)) + sum_tangent_vectors(alignment_coefficients, design_vector)
195 |   }
196 | }
197 | 
198 | reverse_linear_transformation <- function(alignment_coefficients, design_vector){
199 |   n_embedding <- dim(alignment_coefficients)[1]
200 |   if(n_embedding == 0){
201 |     matrix(nrow = 0, ncol = 0)
202 |   }else{
203 |     solve(diag(nrow = n_embedding) + sum_tangent_vectors(alignment_coefficients[,-1,,drop=FALSE], design_vector))
204 |   }
205 | }
206 | 
207 | apply_linear_transformation <- function(A, alignment_coefs, design){
208 |   mm_groups <- get_groups(design)
209 |   groups <- unique(mm_groups)
210 |   for(gr in groups){
211 |     A[,mm_groups == gr] <- forward_linear_transformation(alignment_coefs,  design[which(mm_groups == gr)[1],])  %*% rbind(1, A[,mm_groups == gr,drop=FALSE])
212 |   }
213 |   A
214 | }
215 | 
216 | correct_fit <- function(fit, alignment_coefs, design){
217 |   old <- S4Vectors:::disableValidity()
218 |   if (!isTRUE(old)) {
219 |     S4Vectors:::disableValidity(TRUE)
220 |     on.exit(S4Vectors:::disableValidity(old))
221 |   }
222 | 
223 |   if(! all(fit$alignment_coefficients == 0)) stop("Can only apply alignment once")
224 | 
225 |   metadata(fit)[["alignment_coefficients"]] <- alignment_coefs
226 |   des <- handle_design_parameter(design, fit, fit$colData)
227 |   metadata(fit)[["alignment_design"]] <- des$design_formula
228 |   metadata(fit)[["alignment_design_matrix"]] <- des$design_matrix
229 |   colData(fit) <- des$col_data
230 |   reducedDim(fit, "embedding") <- t(apply_linear_transformation(fit$embedding, alignment_coefs, des$design_matrix))
231 |   fit
232 | }
233 | 
234 | handle_ridge_penalty_parameter <- function(ridge_penalty){
235 |   if(any(names(ridge_penalty) %in% c("rotation", "stretching"))){
236 |     stop("The alignment function has changed and the rotation and stretching specification is now defunct")
237 |   }
238 |   ridge_penalty
239 | }
240 | 


--------------------------------------------------------------------------------
/R/bulk_de_wrappers.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | edger_fit <- function(counts, design, offset, col_data = NULL,
 4 |                       abundance.trend = TRUE, robust = TRUE){
 5 | 
 6 |   if(is.matrix(design)){
 7 |     design_matrix <- design
 8 |   }else{
 9 |     design_matrix <- model.matrix(design, data = col_data)
10 |   }
11 | 
12 |   edger_y <- edgeR::DGEList(counts = counts)
13 |   edger_y <- edgeR::scaleOffset(edger_y, offset = offset)
14 |   edger_y <- edgeR::estimateDisp(edger_y, design_matrix)
15 |   edgeR::glmQLFit(edger_y, design_matrix, abundance.trend = abundance.trend, robust = robust)
16 | }
17 | 
18 | 
19 | edger_test_de <- function(edger_fit, contrast, design = NULL){
20 |   cntrst <- parse_contrast({{contrast}}, design, simplify = TRUE)
21 | 
22 |   edger_fit <- edgeR::glmQLFTest(edger_fit, contrast = cntrst)
23 |   edger_res <- edgeR::topTags(edger_fit, n = nrow(edger_fit$counts), sort.by = "none")$table
24 |   data.frame(name = rownames(edger_res), pval = edger_res$PValue, adj_pval = edger_res$FDR,
25 |              f_statistic = edger_res$F, df1 = edger_fit$df.test, df2 = edger_fit$df.total, lfc = edger_res$logFC)
26 | }
27 | 
28 | 
29 | limma_fit <- function(values, design, col_data = NULL){
30 |   if(is.matrix(design)){
31 |     design_matrix <- design
32 |   }else{
33 |     design_matrix <- model.matrix(design, data = col_data)
34 |   }
35 |   if(! is_contrast_estimable(cntrst, design_matrix)){
36 |     stop("The contrast is not estimable from the design_matrix")
37 |   }
38 | 
39 |   suppressWarnings({
40 |     # limma warns about missing values. Here we expect missing values though.
41 |     lm_fit <- limma::lmFit(values, design_matrix)
42 |   })
43 |   lm_fit
44 | }
45 | 
46 | limma_test_de <- function(lm_fit, contrast, design, values = NULL, shrink = TRUE, trend = TRUE, robust = TRUE){
47 |   cntrst <- matrix(parse_contrast({{contrast}}, formula = design, simplify = TRUE), ncol = 1)
48 |   cntrst <- evaluate_contrast_tree(cntrst, cntrst, \(x, .) x)
49 | 
50 |   lm_fit <- limma::contrasts.fit(lm_fit, contrasts = cntrst)
51 |   if(shrink){
52 |     lm_fit <- tryCatch({
53 |       limma::eBayes(lm_fit, trend = trend, robust = robust)
54 |     }, error = function(err){
55 |       limma::eBayes(lm_fit, trend = FALSE, robust = TRUE)
56 |     })
57 |   }else{
58 |     lm_fit <- limma_eBayes_without_shrinkage(lm_fit)
59 |   }
60 |   tt <- limma::topTable(lm_fit, number = nrow(lm_fit$coefficients), adjust.method = "BH", sort.by = "none")
61 |   if(! is.null(values)){
62 |     for(row in which(MatrixGenerics::rowAnyNAs(values))){
63 |       # limma can return misleading results if there missing values in the wrong
64 |       # places (see https://support.bioconductor.org/p/9150300/)
65 |       if(! is_contrast_estimable(cntrst, lm_fit$design[!is.na(values[row,]),,drop=FALSE])){
66 |         tt[row, c("P.Value", "adj.P.Val", "t", "logFC")] <- NA_real_
67 |       }
68 |     }
69 |   }
70 |   data.frame(name = rownames(tt), pval = tt$P.Value, adj_pval = tt$adj.P.Val, t_statistic = tt$t, lfc = tt$logFC)
71 | }
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/R/differential_geometry.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # Grassmann (Gr(n, k))
 3 | 
 4 | grassmann_map <- function(x, base_point){
 5 |   # Adapted from https://github.com/JuliaManifolds/Manifolds.jl/blob/master/src/manifolds/GrassmannStiefel.jl#L93
 6 |   if(ncol(base_point) == 0 || nrow(base_point) == 0){
 7 |     base_point
 8 |   }else if(any(is.na(x))){
 9 |     matrix(NA, nrow = nrow(x), ncol = ncol(x))
10 |   }else{
11 |     svd <- svd(x)
12 |     z <- base_point %*% svd$v %*% diag(cos(svd$d), nrow = length(svd$d)) %*% t(svd$v) +
13 |       svd$u %*% diag(sin(svd$d), nrow = length(svd$d)) %*% t(svd$v)
14 |     # Calling `qr.Q(qr(z))` is problematic because it can flip the signs
15 |     z
16 |   }
17 | }
18 | 
19 | grassmann_log <- function(p, q){
20 |   # Adapted from https://github.com/JuliaManifolds/Manifolds.jl/blob/master/src/manifolds/GrassmannStiefel.jl#L174
21 |   # The Grassmann manifold handbook proposes an alternative algorithm in section 5.2
22 |   n <- nrow(p)
23 |   k <- ncol(p)
24 |   stopifnot(nrow(q) == n, ncol(q) == k)
25 |   if(n == 0 || k == 0){
26 |     p
27 |   }else{
28 |     z <- t(q) %*% p
29 |     At <- t(q) - z %*% t(p)
30 |     Bt <- lm.fit(z, At)$coefficients
31 |     svd <- svd(t(Bt), k, k)
32 |     svd$u %*% diag(atan(svd$d), nrow = k) %*% t(svd$v)
33 |   }
34 | }
35 | 
36 | project_grassmann <- function(x){
37 |   qr.Q(qr(x))
38 | }
39 | 
40 | project_grassmann_tangent <- function(x, base_point){
41 |   x - base_point %*% t(base_point) %*% x
42 | }
43 | 
44 | 
45 | random_grassmann_point <- function(n, k, ...){
46 |   V <- randn(n, k, ...)
47 |   project_grassmann(V)
48 | }
49 | 
50 | random_grassmann_tangent <- function(p, ...){
51 |   n <- nrow(p)
52 |   k <- ncol(p)
53 |   Z <- randn(n, k, ...)
54 |   project_grassmann_tangent(Z, p)
55 | }
56 | 
57 | 
58 | grassmann_angle_from_tangent <- function(x, normalized = TRUE){
59 |   # Conversion of tangent vector to angle taken from Proposition 5.1 of the Grassmann manifold handbook
60 |   thetas <- (svd(x)$d / pi * 180)
61 |   if(normalized){
62 |     thetas <- thetas %% 180
63 |     max(pmin(thetas, 180 - thetas))
64 |   }else{
65 |     thetas[1]
66 |   }
67 | }
68 | 
69 | grassmann_angle_from_points <- function(p, q){
70 |   grassmann_angle_from_tangent(grassmann_log(p, q))
71 | }
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/R/estimate_linear_coefficients.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | estimate_linear_coefficient <- function(Y, design_matrix, method = c("linear", "mean", "cluster_median", "zero")){
 4 |   method <- match.arg(method)
 5 | 
 6 |   if(method == "linear"){
 7 |     linear_fit <- lm.fit(design_matrix, t(Y))
 8 |     t(linear_fit$coefficients)
 9 |   }else if(method == "mean"){
10 |     # Check for intercept column / columns
11 |     ones <- rep(1, nrow(design_matrix))
12 |     intercept_fit <- lm.fit(design_matrix, ones)
13 |     if(! sum(intercept_fit$residuals^2) < 1e-12){
14 |       stop("The design matrix does not have an intercept. Cannot apply a single mean offset. Please change",
15 |            "'linear_coefficient_estimator' to 'linear', 'cluster_median', or 'zero'.")
16 |     }
17 |     means <- MatrixGenerics::rowMeans2(Y)
18 |     matrix(means, ncol = 1) %*% matrix(intercept_fit$coefficients, nrow = 1)
19 |   }else if(method == "zero"){
20 |     matrix(0, nrow = nrow(Y), ncol = ncol(design_matrix))
21 |   }else if(method == "cluster_median"){
22 |     min_cluster_membership <- 0.01
23 |     pca <- pca(Y, n = 20)
24 |     harm_obj <- harmony_init(pca$embedding, design_matrix, nclust = 30, verbose = FALSE)
25 |     harm_obj <- harmony_max_div_clustering(harm_obj)
26 |     Yt <- as.matrix(t(Y))
27 |     coef <- do.call(cbind, lapply(seq_len(nrow(harm_obj$R)), \(cl){
28 |       threshold <- min(min_cluster_membership, max(harm_obj$R) * 0.5)
29 |       sel <- harm_obj$R[cl, ] > threshold
30 |       tryCatch({
31 |         fit <- lm.wfit(design_matrix[sel,,drop=FALSE], y = Yt[sel,,drop=FALSE], w = harm_obj$R[cl, sel])
32 |         as.numeric(t(fit$coefficients))
33 |       }, error = function(e){
34 |         rep(NA_real_, nrow(Y) * ncol(design_matrix))
35 |       })
36 |     }))
37 |     wmed <- matrixStats::rowWeightedMedians(coef, w = rowSums(harm_obj$R), na.rm = TRUE, interpolate = FALSE)
38 |     matrix(wmed, nrow = nrow(Y), ncol = ncol(design_matrix))
39 |   }
40 | }
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/R/geodesic_regression.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #######################
  3 | # Grassmann Manifold  #
  4 | #######################
  5 | 
  6 | 
  7 | #' Solve d(P, exp_p(V * x))^2 for V
  8 | #'
  9 | #' @returns A three-dimensional array with the coefficients `V`.
 10 | #'
 11 | #' @keywords internal
 12 | grassmann_geodesic_regression <- function(coordsystems, design, base_point, weights = 1, tangent_regression = FALSE){
 13 |   # Validate input
 14 |   n_obs <- nrow(design)
 15 |   n_coef <- ncol(design)
 16 |   n_amb <- nrow(base_point)
 17 |   n_emb <- ncol(base_point)
 18 | 
 19 |   coordsystems <- if(is.list(coordsystems)){
 20 |     coordsystems
 21 |   }else if(is.array(coordsystems)){
 22 |     stopifnot(length(dim(coordsystems)) == 3)
 23 |     destack_slice(coordsystems)
 24 |   }else{
 25 |     stop("Cannot handle coordsystems of type: ", toString(class(coordsystems), width = 100))
 26 |   }
 27 |   stopifnot(length(coordsystems) == n_obs)
 28 |   stopifnot(all(vapply(coordsystems, \(emb) nrow(emb) == n_amb && ncol(emb) == n_emb, FUN.VALUE = logical(1L))))
 29 |   # stopifnot(all(vapply(coordsystems, \(emb) is_grassmann_element(emb), FUN.VALUE = logical(1L))))
 30 |   weights <- rep_len(weights, n_obs)
 31 | 
 32 | 
 33 | 
 34 |   # Initialize with tangent regression (if possible)
 35 |   tangent_vecs <- lapply(coordsystems, \(emb) as.vector(grassmann_log(base_point, emb)))
 36 |   merged_vecs <- stack_cols(tangent_vecs)
 37 |   tangent_fit <- if(nrow(merged_vecs) == 0){
 38 |     matrix(nrow = 0, ncol = ncol(design))
 39 |   }else{
 40 |     t(lm.wfit(design, t(merged_vecs), w = weights)$coefficients)
 41 |   }
 42 |   coef <- stack_slice(lapply(seq_len(ncol(tangent_fit)), \(idx) matrix(tangent_fit[,idx], nrow = n_amb, ncol = n_emb)))
 43 |   dimnames(coef) <- list(NULL, NULL, colnames(tangent_fit))
 44 | 
 45 | 
 46 |   if(tangent_regression){
 47 |     coef
 48 |   }else{
 49 |     # warning("Refine regression using Riemannian optimization. (Not yet implemented)")
 50 |     coef
 51 |   }
 52 | }
 53 | 
 54 | #' Solve ||Y - exp_p(V * x) Y ||^2_2 for V
 55 | #'
 56 | #' @returns A three-dimensional array with the coefficients `V`.
 57 | #'
 58 | #' @keywords internal
 59 | grassmann_lm <- function(data, design, base_point, tangent_regression = FALSE){
 60 |   nas <- apply(data, 2, anyNA) | apply(design, 1, anyNA)
 61 |   data <- data[,!nas,drop=FALSE]
 62 |   design <- design[!nas,,drop=FALSE]
 63 | 
 64 |   n_obs <- nrow(design)
 65 |   n_coef <- ncol(design)
 66 |   n_amb <- nrow(base_point)
 67 |   n_emb <- ncol(base_point)
 68 | 
 69 |   # Initialize with tangent regression
 70 |   mm_groups <- get_groups(design)
 71 |   groups <- unique(mm_groups)
 72 |   reduced_design <- mply_dbl(groups, \(gr) design[which(mm_groups == gr)[1],], ncol = ncol(design))
 73 |   if(any(table(mm_groups) < n_emb)){
 74 |     n_occur <- c(table(mm_groups))
 75 |     sel_indices <- head(order(n_occur), n = 4)
 76 |     problematic_mat <- cbind(n_occurrences = n_occur[sel_indices], reduced_design[sel_indices,,drop=FALSE])
 77 |     stop("Too few datapoints in some design matrix group.\nIf a covariate is continuous, ",
 78 |          "please discretize it into an (ordered) factor.\nIf for one factor level there are too few cells, consider removing ",
 79 |          "that factor level or merging it with another.\n\n", glmGamPoi:::format_matrix(problematic_mat),
 80 |          "\nEach row must occurr at least n_embedding=", n_emb, " times.\n")
 81 |   }
 82 |   group_planes <- lapply(groups, \(gr) pca(data[,mm_groups == gr,drop=FALSE], n = n_emb, center = FALSE)$coordsystem)
 83 |   group_sizes <- vapply(groups, \(gr) sum(mm_groups == gr), FUN.VALUE = 0L)
 84 |   coef <- grassmann_geodesic_regression(group_planes, design = reduced_design, base_point = base_point, weights = group_sizes, tangent_regression = TRUE)
 85 |   if(tangent_regression){
 86 |     coef
 87 |   }else{
 88 |     # warning("Refine regression using Riemannian optimization. (Not yet implemented)")
 89 |     coef
 90 |   }
 91 | }
 92 | 
 93 | 
 94 | get_groups <- function (design_matrix) {
 95 |   vctrs::vec_group_id(unclass(design_matrix))
 96 | }
 97 | 
 98 | 
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/R/glioblastoma_example_data.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' The `glioblastoma_example_data` dataset
 3 | #'
 4 | #' The dataset is a [`SingleCellExperiment`] object subset to 5,000 cells and
 5 | #' 300 genes. The `colData` contain an entry for each cell from which patient
 6 | #' it came and to which treatment condition it belonged (`"ctrl"` or `"panobinostat"`).
 7 | #'
 8 | #' The original data was collected by Zhao et al. (2021).
 9 | #'
10 | #' @returns A [`SingleCellExperiment`] object.
11 | #'
12 | #' @references
13 | #' * Zhao, Wenting, Athanassios Dovas, Eleonora Francesca Spinazzi, Hanna Mendes Levitin, Matei Alexandru Banu, Pavan Upadhyayula, Tejaswi Sudhakar, et al.
14 | #'    “Deconvolution of Cell Type-Specific Drug Responses in Human Tumor Tissue with Single-Cell RNA-Seq.” Genome Medicine 13, no. 1
15 | #'    (December 2021): 82. https://doi.org/10.1186/s13073-021-00894-y.
16 | #'
17 | #' @name glioblastoma_example_data
18 | NULL
19 | 
20 | 


--------------------------------------------------------------------------------
/R/handle_data_parameter.R:
--------------------------------------------------------------------------------
 1 | 
 2 | handle_data_parameter <- function(data, on_disk, assay){
 3 |   if(is.matrix(data)){
 4 |     if(! is.numeric(data)){
 5 |       stop("The data argument must consist of numeric values and not of ", mode(data), " values")
 6 |     }
 7 |     if(is.null(on_disk) || isFALSE(on_disk)){
 8 |       data_mat <- data
 9 |     }else if(isTRUE(on_disk)){
10 |       data_mat <- HDF5Array::writeHDF5Array(data)
11 |     }else{
12 |       stop("Illegal argument type for on_disk. Can only handle 'NULL', 'TRUE', or 'FALSE'")
13 |     }
14 |   }else if(is(data, "DelayedArray")){
15 |     if(is.null(on_disk) || isTRUE(on_disk)){
16 |       data_mat <- data
17 |     }else if(isFALSE(on_disk)){
18 |       data_mat <- as.matrix(data)
19 |     }else{
20 |       stop("Illegal argument type for on_disk. Can only handle 'NULL', 'TRUE', or 'FALSE'")
21 |     }
22 |   }else if(is(data, "SummarizedExperiment")){
23 |     data_mat <- handle_data_parameter(SummarizedExperiment::assay(data, assay), on_disk)
24 |   }else if(canCoerce(data, "SummarizedExperiment")){
25 |     se <- as(data, "SummarizedExperiment")
26 |     data_mat <- handle_data_parameter(SummarizedExperiment::assay(se, assay), on_disk)
27 |   }else if(is(data, "dgCMatrix") || is(data, "dgTMatrix")) {
28 |     if(isTRUE(on_disk)){
29 |       data_mat <- HDF5Array::writeHDF5Array(data)
30 |     }else if(isFALSE(on_disk)){
31 |       data_mat <- as.matrix(data)
32 |     }else{
33 |       stop("glmGamPoi does not yet support sparse input data of type '", class(data),"'. ",
34 |            "Please explicitly set the 'on_disk' parameter to force a conversion to a dense format either in-memory ('on_disk = FALSE') ",
35 |            "or on-disk ('on_disk = TRUE')")
36 |     }
37 |   }else{
38 |     stop("Cannot handle data of class '", class(data), "'.",
39 |          "It must be of type dense matrix object (i.e., a base matrix or DelayedArray),",
40 |          " or a container for such a matrix (for example: SummarizedExperiment).")
41 |   }
42 | 
43 |   if(! is.null(colnames(data_mat))){
44 |     if(ncol(data_mat) != length(unique(colnames(data_mat)))) stop("The colnames of the data are not unique.")
45 |   }
46 |   if(! is.null(rownames(data_mat))){
47 |     if(nrow(data_mat) != length(unique(rownames(data_mat)))) stop("The rownames of the data are not unique.")
48 |   }
49 | 
50 |   data_mat
51 | }
52 | 
53 | 
54 | convert_dataframe_cols_chr_to_fct <- function(col_data){
55 |   character_cols <- vapply(col_data, is.character, logical(1L))
56 |   col_data[character_cols] <- lapply(col_data[character_cols], as.factor)
57 |   col_data
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/R/handle_design_parameter.R:
--------------------------------------------------------------------------------
  1 | 
  2 | handle_design_parameter <- function(design, data, col_data, verbose = FALSE){
  3 |   n_samples <- ncol(data)
  4 | 
  5 |   ignore_degeneracy <- isTRUE(attr(design, "ignore_degeneracy"))
  6 | 
  7 |   # Handle the design parameter
  8 |   if(is.matrix(design)){
  9 |     design_matrix <- design
 10 |     design_formula <- NULL
 11 |   }else if((is.vector(design) || is.factor(design))){
 12 |     if(length(design) != n_samples){
 13 |       if(length(design) == 1 && design == 1){
 14 |         stop("The specified design vector length (", length(design), ") does not match ",
 15 |              "the number of samples: ", n_samples, "\n",
 16 |              "Did you maybe mean: `design = ~ 1`?")
 17 |       }else{
 18 |         stop("The specified design vector length (", length(design), ") does not match ",
 19 |              "the number of samples: ", n_samples)
 20 |       }
 21 |     }
 22 |     tmp <- glmGamPoi:::convert_chr_vec_to_model_matrix(design, NULL)
 23 |     design_matrix <- tmp$model_matrix
 24 |     design_formula <- NULL
 25 |   }else if(inherits(design,"formula")){
 26 |     tmp <- convert_formula_to_design_matrix(design, col_data)
 27 |     design_matrix <- tmp$design_matrix
 28 |     design_formula <- tmp$formula
 29 |     col_data <- add_global_variables_to_col_data(design, col_data)
 30 |     attr(design_formula, "constructed_from") <- "formula"
 31 |   }else{
 32 |     stop("design argment of class ", class(design), " is not supported. Please ",
 33 |          "specify a `design_matrix`, a `character vector`, or a `formula`.")
 34 |   }
 35 | 
 36 |   if(nrow(design_matrix) != ncol(data)) stop("Number of rows in col_data does not match number of columns of data.")
 37 |   if(! is.null(rownames(design_matrix)) &&
 38 |      ! all(rownames(design_matrix) == as.character(seq_len(nrow(design_matrix)))) && # That's the default rownames
 39 |      ! is.null(colnames(data))){
 40 |     if(! all(rownames(design_matrix) == colnames(data))){
 41 |       if(setequal(rownames(design_matrix), colnames(data))){
 42 |         # Rearrange the rows to match the columns of data
 43 |         design_matrix <- design_matrix[colnames(data), ,drop=FALSE]
 44 |       }else{
 45 |         stop("The rownames of the design_matrix / col_data do not match the column names of data.")
 46 |       }
 47 |     }
 48 |   }
 49 | 
 50 |   if(any(matrixStats::rowAnyNAs(design_matrix))){
 51 |     stop("The design matrix contains 'NA's for sample ",
 52 |          paste0(head(which(DelayedMatrixStats::rowAnyNAs(design_matrix))), collapse = ", "),
 53 |          ". Please remove them before you call 'lemur()'.")
 54 |   }
 55 | 
 56 |   if(ncol(design_matrix) >= n_samples && ! ignore_degeneracy){
 57 |     stop("The design_matrix has more columns (", ncol(design_matrix),
 58 |          ") than the there are samples in the data matrix (", n_samples, " columns).\n",
 59 |          "Too few replicates / too many coefficients to fit model.\n",
 60 |          "The head of the design matrix: \n", glmGamPoi:::format_matrix(head(design_matrix, n = 3)))
 61 |   }
 62 | 
 63 |   if(verbose && is.null(design_formula)){
 64 |     message("The 'design' was not specified with a formula. This means that you cannot use 'cond(...)' in 'test_de(...)'.")
 65 |   }
 66 | 
 67 |   # Check rank of design_matrix
 68 |   qr_mm <- qr(design_matrix)
 69 |   if(qr_mm$rank < ncol(design_matrix) && n_samples > 0  && ! ignore_degeneracy){
 70 |     is_zero_column <- DelayedMatrixStats::colCounts(design_matrix, value = 0) == nrow(design_matrix)
 71 |     if(any(is_zero_column)){
 72 |       stop("The model matrix seems degenerate ('matrix_rank(design_matrix) < ncol(design_matrix)'). ",
 73 |            "Column ", paste0(head(which(is_zero_column), n=10), collapse = ", "), " contains only zeros. \n",
 74 |            "The head of the design matrix: \n", glmGamPoi:::format_matrix(head(design_matrix, n = 3)))
 75 |     }else{
 76 |       stop("The model matrix seems degenerate ('matrix_rank(design_matrix) < ncol(design_matrix)'). ",
 77 |            "Some columns are perfectly collinear. Did you maybe include the same coefficient twice?\n",
 78 |            "The head of the design matrix: \n", glmGamPoi:::format_matrix(head(design_matrix, n = 3)))
 79 |     }
 80 |   }
 81 | 
 82 |   rownames(design_matrix) <- colnames(data)
 83 |   validate_design_matrix(design_matrix, data)
 84 |   list(design_matrix = design_matrix, design_formula = design_formula, col_data = col_data)
 85 | }
 86 | 
 87 | 
 88 | 
 89 | convert_formula_to_design_matrix <- function(formula, col_data){
 90 |   attr(col_data, "na.action") <- "na.pass"
 91 |   tryCatch({
 92 |     mf <- model.frame(formula, data = col_data, drop.unused.levels = FALSE)
 93 |     terms <- attr(mf, "terms")
 94 |     # xlevels is used for reconstructing the model matrix
 95 |     attr(terms, "xlevels") <- stats::.getXlevels(terms, mf)
 96 |     # vars_xlevels is used to check input to cond(...)
 97 |     attr(terms, "vars_xlevels") <- xlevels_for_formula_vars(terms, col_data)
 98 |     mm <- stats::model.matrix.default(terms, mf)
 99 |     attr(terms, "contrasts") <- attr(mm, "contrasts")
100 |   }, error = function(e){
101 |     # Try to extract text from error message
102 |     match <- regmatches(e$message, regexec("object '(.+)' not found", e$message))[[1]]
103 |     if(length(match) == 2){
104 |       stop("Problem parsing the formula (", formula, ").\n",
105 |            "Variable '", match[2], "' not found in col_data or global environment. Possible variables are:\n",
106 |            paste0(colnames(col_data), collapse = ", "), call. = FALSE)
107 |     }else{
108 |       stop(e$message)
109 |     }
110 |   })
111 | 
112 |   # Otherwise every copy of the model stores the whole global environment!
113 |   attr(terms, ".Environment") <- c()
114 |   colnames(mm)[colnames(mm) == "(Intercept)"] <- "Intercept"
115 |   list(formula = terms, design_matrix = mm)
116 | }
117 | 
118 | add_global_variables_to_col_data <- function(formula, col_data){
119 |   # Check if var is global and put it into col_data
120 |   formula_env <- attr(formula, ".Environment")
121 |   if(is.null(formula_env)) formula_env <- rlang::empty_env()
122 |   for(gv in setdiff(all.vars(formula), colnames(col_data))){
123 |     value <- rlang::eval_tidy(rlang::sym(gv), data = NULL, env = formula_env)
124 |     is_vector_type <- (is(col_data, "DFrame") && is(value, "Vector")) || vctrs::obj_is_vector(value)
125 |     if(is_vector_type){
126 |       has_correct_length <- NROW(value) == 1 || NROW(value) == nrow(col_data)
127 |       if(! has_correct_length){
128 |         stop("Trying store global variables from formula in colData, however '", gv, "' ",
129 |              "has length ", NROW(value), ", but it needs to be 1 or nrow(col_data) (", nrow(col_data), ").")
130 |       }
131 |     }else{
132 |       stop("Trying store global variables from formula in colData, however '", gv, "' ",
133 |            "is of type ",  class(value)[1]," and not a vector-type.")
134 |     }
135 |     col_data[[gv]] <- value
136 |   }
137 |   col_data
138 | }
139 | 
140 | xlevels_for_formula_vars <- function(formula, data){
141 |   # if(! is.null( attr(formula, "xlevels"))){
142 |   #   attr(formula, "xlevels")
143 |   if(! is.null( attr(formula, "vars_xlevels"))){
144 |     attr(formula, "vars_xlevels")
145 |   }else{
146 |     # For all character / factor vars get xlevel
147 |     all_vars <- all.vars(formula)
148 |     formula_env <- attr(formula, ".Environment")
149 |     if(is.null(formula_env)) formula_env <- rlang::empty_env()
150 |     xlev <- lapply(all_vars, \(v){
151 |       value <- rlang::eval_tidy(rlang::sym(v), data = as.data.frame(data), env = formula_env)
152 |       if(is.character(value)){
153 |         levels(as.factor(value))
154 |       }else if(is.factor(value)){
155 |         levels(value)
156 |       }else{
157 |         NULL
158 |       }
159 |     })
160 |     names(xlev) <- all_vars
161 |     xlev[!vapply(xlev, is.null, TRUE)]
162 |   }
163 | }
164 | 
165 | validate_design_matrix <- function(matrix, data){
166 |   stopifnot(is.matrix(matrix))
167 |   stopifnot(nrow(matrix) == ncol(data))
168 | }
169 | 
170 | 


--------------------------------------------------------------------------------
/R/handle_test_data_parameter.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | handle_test_data_parameter <- function(fit, test_data, test_data_col_data, continuous_assay_name){
 4 |   if(is(test_data, "SummarizedExperiment")){
 5 |     if(! continuous_assay_name %in% assayNames(test_data)){
 6 |       stop("Cannot find assay '", continuous_assay_name, "' in the assays of 'independent_data'")
 7 |     }
 8 |     # Nothing else to be done here
 9 |   }else if(is.list(test_data)){
10 |     if(! continuous_assay_name %in% names(test_data)){
11 |       stop("Cannot find assay '", continuous_assay_name, "' in the names of 'independent_data'")
12 |     }
13 |     if(is.null(test_data_col_data)){
14 |       stop("'independent_data_col_data' must not be NULL")
15 |     }
16 |     test_data <- SingleCellExperiment(assays = test_data)
17 |   }else if(is.matrix(test_data)){
18 |     message("'independent_data' is a matrix treating it as continuous values")
19 |     if(is.null(test_data_col_data)){
20 |       stop("'independent_data_col_data' must not be NULL")
21 |     }
22 |     test_data <- SingleCellExperiment(assays = setNames(list(test_data), continuous_assay_name))
23 |   }else if(is.null(test_data)){
24 |     # This is necessary to satisfy model.matrix in 'project_on_lemur_fit'
25 |     col_data_copy <- fit$colData
26 |     character_cols <- vapply(col_data_copy, is.character, logical(1L))
27 |     col_data_copy[character_cols] <- lapply(col_data_copy[character_cols], as.factor)
28 |     test_data <- SingleCellExperiment(assays = setNames(list(matrix(nrow = nrow(fit), ncol = 0) * 1.0), continuous_assay_name),
29 |                                       colData = col_data_copy[integer(0L),,drop=FALSE])
30 |   }else{
31 |     stop("Cannot handle 'indepdendet_data' of type: ", toString(class(test_data), width = 100))
32 |   }
33 | 
34 |   colData(test_data) <- S4Vectors::DataFrame(glmGamPoi:::get_col_data(test_data, test_data_col_data), check.names = FALSE)
35 | 
36 |   test_data
37 | }
38 | 


--------------------------------------------------------------------------------
/R/harmony_wrapper.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | harmony_init <- function(embedding, design_matrix,
 4 |                          theta = 2, lambda = 1, sigma = 0.1, nclust = min(round(ncol(embedding) / 30), 100),
 5 |                          tau = 0, block.size = 0.05,  max.iter.cluster = 200,
 6 |                          epsilon.cluster = 1e-5, epsilon.harmony = 1e-4, verbose = TRUE){
 7 | 
 8 |   mm_groups <- get_groups(design_matrix)
 9 | 
10 |   # Adapted from https://github.com/immunogenomics/harmony/blob/c8f4901ef92d6e9b4e1373c52de3b67ff052db3e/R/ui.R#L161
11 |   n_groups <- length(unique(mm_groups))
12 |   phi <- matrix(0, nrow = n_groups, ncol = ncol(embedding))
13 |   phi[mm_groups + n_groups * (seq_along(mm_groups)-1)] <- 1
14 |   phi <- as(phi, "sparseMatrix")
15 | 
16 |   N <- ncol(embedding)
17 |   N_b <- MatrixGenerics::rowSums2(phi)
18 |   B_vec <- rep(1, n_groups)
19 | 
20 |   theta <- rep_len(theta, n_groups)
21 |   theta <- theta * (1 - exp(-(N_b / (nclust * tau)) ^ 2))
22 | 
23 |   lambda <- rep_len(lambda, n_groups)
24 |   lambda_vec <- c(0, lambda)
25 | 
26 | 
27 |   sigma <- rep_len(sigma, nclust)
28 |   lambda_range = c(0.1, 10)
29 |   if(packageVersion("harmony") < "1.2.0"){
30 |     stop("Your 'harmony' version is outdated: ", packageVersion("harmony"), ". Please update to version >= 1.2.0")
31 |   }else{
32 |     alpha <- 0.2
33 |     harmonyObj <- harmony::RunHarmony(embedding, mm_groups, nclust = nclust, max.iter = 0, return_object = TRUE, verbose = FALSE)
34 |     harmonyObj$setup(
35 |       embedding, phi,
36 |       sigma, theta, lambda_vec, alpha, max.iter.cluster, epsilon.cluster,
37 |       epsilon.harmony, nclust, block.size, B_vec, verbose
38 |     )
39 |   }
40 |   harmony_init_clustering(harmonyObj)
41 |   harmonyObj
42 | }
43 | 
44 | #' Create an arbitrary Harmony object so that I can modify it later
45 | #'
46 | #' @returns The full [`harmony`] object (R6 reference class type).
47 | #'
48 | #' @keywords internal
49 | harmony_new_object <- function(){
50 |   Y <- randn(3, 100)
51 |   harmony::RunHarmony(Y, rep(c("a", "b"), length.out = 100), nclust = 2, max.iter = 0, return_object = TRUE, verbose = FALSE)
52 |   harmonyObj
53 | }
54 | 
55 | harmony_init_clustering <- function(harmonyObj, iter.max = 25, nstart = 10){
56 |   stopifnot(is(harmonyObj, "Rcpp_harmony"))
57 |   harmonyObj$Y <- t(stats::kmeans(t(harmonyObj$Z_cos), centers = harmonyObj$K, iter.max = iter.max, nstart = nstart)$centers)
58 |   harmonyObj$init_cluster_cpp()
59 |   harmonyObj
60 | }
61 | 
62 | harmony_max_div_clustering <- function(harmonyObj){
63 |   stopifnot(is(harmonyObj, "Rcpp_harmony"))
64 |   err_status <- harmonyObj$cluster_cpp()
65 |   if (err_status == -1) {
66 |     stop('terminated by user')
67 |   } else if (err_status != 0) {
68 |     stop(gettextf('Harmony exited with non-zero exit status: %d',
69 |                   err_status))
70 |   }
71 |   harmonyObj
72 | }
73 | 
74 | 


--------------------------------------------------------------------------------
/R/lemur-package.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #'
 3 | #' @import methods
 4 | #' @import stats
 5 | #' @rawNamespace import(SingleCellExperiment, except = weights)
 6 | #' @importFrom S4Vectors metadata `metadata<-` DataFrame
 7 | #' @importFrom SummarizedExperiment assayNames assay `assay<-` colData `colData<-` rowData `rowData<-`
 8 | #' @importFrom SingleCellExperiment reducedDims
 9 | #' @importFrom BiocGenerics design
10 | #' @importFrom utils .DollarNames capture.output head txtProgressBar setTxtProgressBar
11 | #' @importFrom Matrix t
12 | NULL
13 | 
14 | ## usethis namespace: start
15 | #' @importFrom Rcpp sourceCpp
16 | ## usethis namespace: end
17 | NULL
18 | 
19 | ## usethis namespace: start
20 | #' @useDynLib lemur, .registration = TRUE
21 | ## usethis namespace: end
22 | NULL
23 | 
24 | # Satisfy NOTE from CRAN CMD check
25 | # Both are used in 'find_de_neighborhoods'
26 | utils::globalVariables(c("cntrst", "..did_indicator"))
27 | 


--------------------------------------------------------------------------------
/R/lemur.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' Main function to fit the latent embedding multivariate regression (LEMUR) model
  4 | #'
  5 | #' @param data a matrix with observations in the columns and features in the rows.
  6 | #'   Or a `SummarizedExperiment` / `SingleCellExperiment` object
  7 | #' @param design a formula referring to global objects or column in the `colData` of `data`
  8 | #'   and `col_data` argument
  9 | #' @param col_data an optional data frame with `ncol(data)` rows.
 10 | #' @param n_embedding the dimension of the $k$-plane that is rotated through space.
 11 | #' @param linear_coefficient_estimator specify which estimator is used to center the conditions.
 12 | #'   `"linear"` runs simple regression it works well in many circumstances but can produce poor
 13 | #'   results if the composition of the cell types changes between conditions (e.g., one cell type
 14 | #'   disappears). `"mean"`, `"cluster_median"` and `"zero"` are alternative estimators, which
 15 | #'   are each supposed to be more robust against compositional changes but cannot account
 16 | #'   for genes that change for all cells between conditions.
 17 | #'   `"linear"` is the default as it works best with subsequent alignment steps.
 18 | #' @param use_assay if `data` is a `SummarizedExperiment` / `SingleCellExperiment` object,
 19 | #'   which assay should be used.
 20 | #' @param test_fraction the fraction of cells that are split of before the model fit to keep an
 21 | #'   independent set of test observations. Alternatively, a logical vector of length `ncol(data)`.
 22 | #'   Default: 20% (`0.2`).
 23 | #' @param ... additional parameters that are passed on to the internal function `lemur_impl`.
 24 | #' @param verbose Should the method print information during the fitting. Default: `TRUE`.
 25 | #'
 26 | #' @return An object of class `lemur_fit` which extends [`SingleCellExperiment`]. Accordingly,
 27 | #'   all functions that work for `sce`'s also work for `lemur_fit`'s. In addition, we
 28 | #'   give easy access to the fitted values using the dollar notation (e.g., `fit$embedding`).
 29 | #'   For details see the [lemur_fit-class] help page.
 30 | #'
 31 | #' @references
 32 | #'   * Ahlmann-Eltze, C. & Huber, W. (2023). Analysis of multi-condition single-cell data with latent
 33 | #'   embedding multivariate regression. bioRxiv [https://doi.org/10.1101/2023.03.06.531268](https://doi.org/10.1101/2023.03.06.531268)
 34 | #'
 35 | #' @seealso [`align_by_grouping`], [`align_harmony`], [`test_de`], [`find_de_neighborhoods`]
 36 | #'
 37 | #' @examples
 38 | #' data(glioblastoma_example_data)
 39 | #' fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5)
 40 | #' fit
 41 | #'
 42 | #'
 43 | #' @export
 44 | lemur <- function(data, design = ~ 1, col_data = NULL,
 45 |                   n_embedding = 15,
 46 |                   linear_coefficient_estimator = c("linear", "mean", "cluster_median", "zero"),
 47 |                   use_assay = "logcounts",
 48 |                   test_fraction = 0.2,
 49 |                   ...,
 50 |                   verbose = TRUE){
 51 | 
 52 |   data_mat <- handle_data_parameter(data, on_disk = FALSE, assay = use_assay)
 53 |   col_data <- glmGamPoi:::get_col_data(data, col_data)
 54 |   if(! all(colnames(col_data) == make.names(colnames(col_data), unique = TRUE))){
 55 |     stop("The column names of 'colData(fit)' are not unique. Please fix this for example by calling: \n",
 56 |             "`colnames(colData(data)) <- make.names(colnames(colData(data)), unique = TRUE)`")
 57 |   }
 58 | 
 59 |   des <- handle_design_parameter(design, data, col_data, verbose = verbose)
 60 |   al_des <- des
 61 |   col_data <- des$col_data
 62 | 
 63 |   if(! is(data, "SummarizedExperiment")){
 64 |     data <- SingleCellExperiment(assays = setNames(list(data_mat), use_assay), colData = col_data)
 65 |   }
 66 | 
 67 |   # Avoid problems when splitting into test and training data
 68 |   col_data <- convert_dataframe_cols_chr_to_fct(col_data)
 69 | 
 70 |   # Create indicator vector which cells are used for training and which for testing
 71 |   is_test_data <- rep(FALSE, ncol(data))
 72 |   if(is.logical(test_fraction) && length(ncol(data))){
 73 |     if(any(is.na(test_fraction))) stop("test_fraction must not contain 'NA's.")
 74 |     is_test_data <- test_fraction
 75 |   }else if(length(test_fraction) != 1){
 76 |     stop("'test_fraction' must be a boolean vector of length 'ncol(data)' or a single number between 0 and 1.")
 77 |   }else if(test_fraction < 0 || test_fraction >= 1){
 78 |     stop("'test_fraction' must be at least 0 and smaller than 1.")
 79 |   }else{
 80 |     if(verbose) message("Storing ", round(test_fraction, 2) * 100, "% of the data (", round(ncol(data) * test_fraction), " cells)",
 81 |                         " as test data.")
 82 |     is_test_data[sample.int(ncol(data), size = round(ncol(data) * test_fraction), replace = FALSE)] <- TRUE
 83 |   }
 84 | 
 85 |   design_matrix <- des$design_matrix[!is_test_data,,drop=FALSE]
 86 |   res <- lemur_impl(data_mat[,!is_test_data,drop=FALSE], design_matrix, n_embedding = n_embedding,
 87 |                     linear_coefficient_estimator = linear_coefficient_estimator, verbose = verbose, ...)
 88 |   alignment_design <- if(matrix_equals(design_matrix, res$alignment_design_matrix)){
 89 |     des$design_formula
 90 |   }else{
 91 |     NULL
 92 |   }
 93 |   embedding <- matrix(NA, nrow = res$n_embedding, ncol = ncol(data))
 94 |   embedding[,!is_test_data] <- res$embedding
 95 |   embedding[,is_test_data] <- project_on_lemur_fit_impl(Y = data_mat[,is_test_data,drop=FALSE], design_matrix = des$design_matrix[is_test_data,,drop=FALSE],
 96 |                                                       alignment_design_matrix = al_des$design_matrix[is_test_data,,drop=FALSE],
 97 |                                                       coefficients = res$coefficients, linear_coefficients = res$linear_coefficients,
 98 |                                                       alignment_coefficients = res$alignment_coefficients, base_point = res$base_point)
 99 | 
100 |   lemur_fit(data, col_data = col_data,
101 |             row_data = if(is(data, "SummarizedExperiment")) rowData(data) else NULL,
102 |             n_embedding = res$n_embedding,
103 |             design = des$design_formula,
104 |             design_matrix = des$design_matrix,
105 |             linear_coefficients = res$linear_coefficients,
106 |             base_point = res$base_point,
107 |             coefficients = res$coefficients,
108 |             embedding = embedding,
109 |             alignment_coefficients = res$alignment_coefficients,
110 |             alignment_design = alignment_design,
111 |             alignment_design_matrix = al_des$design_matrix,
112 |             use_assay = use_assay,
113 |             is_test_data = is_test_data)
114 | }
115 | 
116 | 
117 | lemur_impl <- function(Y, design_matrix,
118 |                        n_embedding = 15,
119 |                        base_point = c("global_embedding", "mean"),
120 |                        linear_coefficient_estimator = c("linear", "mean", "cluster_median", "zero"),
121 |                        linear_coefficients = NULL,
122 |                        coefficients = NULL,
123 |                        embedding = NULL,
124 |                        alignment_coefficients = NULL,
125 |                        alignment_design_matrix = NULL,
126 |                        n_iter = 10, tol = 1e-8,
127 |                        reshuffling_fraction = 0,
128 |                        verbose = TRUE){
129 |   alignment_coef_fixed_but_embedding_fitted <- ! is.null(alignment_coefficients) && is.null(embedding)
130 |   linear_coefficient_estimator <- match.arg(linear_coefficient_estimator)
131 | 
132 |   # Set reduced dimensions
133 |   stopifnot(n_embedding >= 0)
134 |   n_ambient_eff <- nrow(Y)
135 |   n_embedding <- min(n_embedding, nrow(Y), ncol(Y))
136 |   linear_coef_fixed <-  ! is.null(linear_coefficients)
137 |   diffemb_coef_fixed <- ! is.null(coefficients)
138 |   embedding_fixed <- ! is.null(embedding)
139 |   alignment_coef_fixed <- ! is.null(alignment_coefficients)
140 |   if(is.null(alignment_design_matrix)){
141 |     alignment_design_matrix <- design_matrix
142 |   }
143 | 
144 |   # Initialize values
145 |   if(linear_coef_fixed){
146 |     if(length(linear_coefficients) == 1){
147 |       linear_coefficients <- matrix(linear_coefficients, nrow = n_ambient_eff, ncol = ncol(design_matrix))
148 |     }
149 |     stopifnot(nrow(linear_coefficients) == n_ambient_eff & ncol(linear_coefficients))
150 | 
151 |   }else{
152 |     if(verbose) message("Regress out global effects using ", linear_coefficient_estimator, " method.")
153 |     linear_coefficients <- estimate_linear_coefficient(Y = Y, design_matrix = design_matrix, method = linear_coefficient_estimator)
154 |   }
155 |   if(linear_coefficient_estimator == "zero"){
156 |     Y_clean <- Y
157 |   }else{
158 |     Y_clean <- Y - linear_coefficients %*% t(design_matrix)
159 |   }
160 |   if(!is.matrix(base_point)){
161 |     if(verbose) message("Find base point for differential embedding")
162 |     base_point <- find_base_point(Y_clean, base_point, n_embedding = n_embedding)
163 |   }
164 | 
165 |   initial_error <- sum(Y_clean^2)
166 |   if(verbose) message("Fit differential embedding model")
167 |   if(verbose) message("Initial error: ", sprintf("%.3g", initial_error))
168 |   if(! diffemb_coef_fixed){
169 |     coefficients <- array(0, dim = c(n_ambient_eff, n_embedding, ncol(design_matrix)))
170 |   }
171 |   if(! embedding_fixed){
172 |     embedding <- project_data_on_diffemb(Y_clean, design = design_matrix,
173 |                                          coefficients = coefficients, base_point = base_point)
174 |   }
175 | 
176 |   if(! diffemb_coef_fixed){
177 |     if(verbose) message("---Fit Grassmann linear model")
178 |     coefficients <- grassmann_lm(Y_clean, design = design_matrix, base_point = base_point)
179 |   }
180 |   if(! embedding_fixed){
181 |     embedding <- project_data_on_diffemb(Y_clean, design = design_matrix,
182 |                                                  coefficients = coefficients, base_point = base_point)
183 |   }
184 |   if(verbose){
185 |     residuals <- Y - project_diffemb_into_data_space(embedding, design = design_matrix, coefficients = coefficients, base_point = base_point) - linear_coefficients %*% t(design_matrix)
186 |     error <- sum(residuals^2)
187 |     message("Final error: ", sprintf("%.3g", error))
188 |   }
189 | 
190 | 
191 |   if(alignment_coef_fixed_but_embedding_fitted){
192 |     # Rotate the embedding if it wasn't provided
193 |     stop("Fixing 'alignment_coefficients' without fixing 'embedding' is not implemented")
194 |   }else{
195 |     alignment_coefficients <- array(0, c(n_embedding, n_embedding+1, ncol(alignment_design_matrix)))
196 |   }
197 | 
198 |   # Make sure that axes are ordered by variance
199 |   if(prod(dim(embedding)) > 0 && all(!is.na(embedding))){
200 |     svd_emb <- svd(embedding)
201 |     rot <- svd_emb$u
202 |     base_point <- base_point %*% rot
203 |     for(idx in seq_len(dim(coefficients)[3])) {
204 |       coefficients[,,idx] <- coefficients[,,idx] %*% rot
205 |     }
206 |     embedding <- t(svd_emb$v) * svd_emb$d
207 |   }
208 | 
209 |   list(n_embedding = n_embedding,
210 |        design_matrix = design_matrix, data = Y,
211 |        linear_coefficients = linear_coefficients,
212 |        base_point = base_point,
213 |        coefficients = coefficients,
214 |        embedding = embedding,
215 |        alignment_coefficients = alignment_coefficients)
216 | }
217 | 
218 | 
219 | find_base_point <- function(Y_clean, base_point, n_embedding){
220 |   n_genes <- nrow(Y_clean)
221 |   if(is.matrix(base_point)){
222 |     stopifnot(nrow(base_point) == n_genes)
223 |     stopifnot(ncol(base_point) == n_embedding)
224 | 
225 |     # Check if it is orthogonal
226 |     orth <- t(base_point) %*% base_point
227 |     if(sum((orth - diag(nrow = n_embedding))^2) > 1e-8){
228 |       stop("The provided 'base_point'  is not orthogonal")
229 |     }
230 |     base_point
231 |   }else{
232 |     base_point_meth <- match.arg(base_point, c("global_embedding", "mean"))
233 |     if(base_point_meth == "global_embedding"){
234 |       pca(Y_clean, n = n_embedding, center = FALSE)$coordsystem
235 |     }else if(base_point_meth == "mean"){
236 |       stop("'base_point = \"mean\"' is not implemented. Please use 'global_embedding'.")
237 |     }
238 |   }
239 | }
240 | 
241 | 
242 | project_diffemb_into_data_space <- function(embedding, design, coefficients, base_point){
243 |   n_genes <- nrow(base_point)
244 |   res <- matrix(NA, nrow = n_genes, ncol = ncol(embedding))
245 |   mm_groups <- get_groups(design)
246 |   for(gr in unique(mm_groups)){
247 |     covars <- design[which(mm_groups == gr)[1], ]
248 |     res[,mm_groups == gr] <- grassmann_map(sum_tangent_vectors(coefficients, covars), base_point) %*% embedding[,mm_groups == gr,drop=FALSE]
249 |   }
250 |   res
251 | }
252 | 
253 | project_data_on_diffemb <- function(Y_clean, design, coefficients, base_point){
254 |   n_emb <- ncol(base_point)
255 |   res <- matrix(NA, nrow = n_emb, ncol = ncol(Y_clean))
256 |   mm_groups <- get_groups(design)
257 |   for(gr in unique(mm_groups)){
258 |     covars <- design[which(mm_groups == gr)[1], ]
259 |     res[,mm_groups == gr] <- t(grassmann_map(sum_tangent_vectors(coefficients, covars), base_point)) %*% Y_clean[,mm_groups == gr,drop=FALSE]
260 |   }
261 |   res
262 | }
263 | 
264 | sum_tangent_vectors <- function(tangent_block, covariates){
265 |   stopifnot(length(covariates) == dim(tangent_block)[3])
266 |   res <- matrix(0, nrow = dim(tangent_block)[1], ncol = dim(tangent_block)[2])
267 |   for(idx in seq_len(length(covariates))){
268 |     res <- res + tangent_block[,,idx] * covariates[idx]
269 |   }
270 |   res
271 | }
272 | 
273 | 
274 | 
275 | 
276 | 
277 | 
278 | 
279 | 


--------------------------------------------------------------------------------
/R/lemur_fit.R:
--------------------------------------------------------------------------------
  1 | #' The `lemur_fit` class
  2 | #'
  3 | #' The `lemur_fit` class extends [`SingleCellExperiment`] and provides
  4 | #' additional accessors to get the values of the fit produced by [`lemur`]. It is the class of the result returned by [`lemur`].
  5 | #'
  6 | #' @param object the `lemur_fit` object for the [`BiocGenerics::design`] generic
  7 | #' @param x,i,j,...,drop the `lemur_fit` object and indices for the `[` subsetting operator
  8 | #'
  9 | #' @details
 10 | #'
 11 | #' To access the values produced by [`lemur`], use the dollar notation (`$`):
 12 | #' \describe{
 13 | #'  \item{`fit$n_embedding`}{the dimension of the latent space.}
 14 | #'  \item{`fit$design`}{the specification of the design in [`lemur`]. Usually this is a design formula, see [`stats::formula`].}
 15 | #'  \item{`fit$base_point`}{a matrix of size `nrow(fit)` x `fit$n_embedding` with the base point for the Grassmann exponential map.}
 16 | #'  \item{`fit$coefficients`}{a three-dimensional tensor of size `nrow(fit)` x `fit$n_embedding` x `ncol(fit$design_matrix)` with the coefficients for the exponential map.}
 17 | #'  \item{`fit$embedding`}{a matrix of size `fit$n_embedding` x `ncol(fit)` with the latent space coordinates of each cell.}
 18 | #'  \item{`fit$design_matrix`}{a matrix with the covariate values for each cell, of size `ncol(fit)` x `ncol(fit$design_matrix)`.}
 19 | #'  \item{`fit$linear_coefficients`}{a matrix (of size `nrow(fit)` x `ncol(fit$design_matrix)` with the coefficients for the linear regression.}
 20 | #'  \item{`fit$alignment_coefficients`}{a 3-tensor with the coefficients for the alignment, of size `fit$n_embedding` x `fit$n_embedding` x `ncol(fit$design_matrix)`.}
 21 | #'  \item{`fit$alignment_design`}{an alternative specification of the alignment, using a design, typically a [`stats::formula`].}
 22 | #'  \item{`fit$alignment_design_matrix`}{an alternative specification of the alignment, using a design matrix.}
 23 | #'  \item{`fit$contrast`}{a parsed version of the contrast specification from the `test_de` function or `NULL`.}
 24 | #'  \item{`fit$colData`}{the column annotation `DataFrame`.}
 25 | #'  \item{`fit$rowData`}{the row annotation `DataFrame`.}
 26 | #' }
 27 | #'
 28 | #' @seealso [`lemur`], [`predict`][predict.lemur_fit], [`residuals`][residuals,lemur_fit-method]
 29 | #'
 30 | #' @returns An object of class `lemur_fit`.
 31 | #'
 32 | #' @rdname lemur_fit
 33 | #' @aliases lemur_fit
 34 | #'
 35 | #' @examples
 36 | #' # The easiest way to make a lemur_fit object is to call `lemur`
 37 | #' data("glioblastoma_example_data")
 38 | #' fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition,
 39 | #'              n_emb = 5, verbose = FALSE)
 40 | #'
 41 | #' fit$n_embedding
 42 | #' fit$embedding[,1:10]
 43 | #' fit$n_embedding
 44 | #' fit$embedding[,1:10]
 45 | #' fit$design_matrix[1:10,]
 46 | #' fit$coefficients[1:3,,]
 47 | #'
 48 | #' @export
 49 | .lemur_fit <- setClass("lemur_fit", contains = "SingleCellExperiment")
 50 | 
 51 | lemur_fit <- function(data, col_data, row_data,
 52 |                       n_embedding,
 53 |                       design, design_matrix, linear_coefficients,
 54 |                       base_point, coefficients, embedding,
 55 |                       alignment_coefficients,
 56 |                       alignment_design, alignment_design_matrix,
 57 |                       use_assay, is_test_data, row_mask = NULL){
 58 |   if(is.null(data)){
 59 |     data <- SingleCellExperiment(assays = list())
 60 |   }else if(! is(data, "SummarizedExperiment")){
 61 |     data <- SingleCellExperiment(assays = setNames(list(data), use_assay))
 62 |   }else{
 63 |     data <- as(data, "SingleCellExperiment")
 64 |   }
 65 | 
 66 |   n_features <- nrow(data)
 67 |   linearFit <- LinearEmbeddingMatrix(design_matrix, linear_coefficients)
 68 |   colnames(linearFit) <- colnames(linear_coefficients)
 69 | 
 70 |   # Do some more graceful merging of the existing and the new information.
 71 |   colData(data) <- (colData(data) %update_values% col_data) |> as("DataFrame")
 72 |   SingleCellExperiment::int_colData(data)$is_test_data <- is_test_data
 73 |   rowData(data) <- (rowData(data) %update_values% row_data) |> as("DataFrame")
 74 |   reducedDims(data) <- reducedDims(data) %update_values% list(linearFit = linearFit, embedding = t(embedding))
 75 |   metadata(data) <- metadata(data) %update_values% list(
 76 |     n_embedding = n_embedding, design = design, base_point = base_point,
 77 |     coefficients = coefficients, alignment_coefficients = alignment_coefficients,
 78 |     alignment_design = alignment_design, alignment_design_matrix = alignment_design_matrix,
 79 |     use_assay = use_assay, row_mask = if(is.null(row_mask)) seq_len(n_features) else row_mask)
 80 | 
 81 |   .lemur_fit(data)
 82 | }
 83 | 
 84 | 
 85 | S4Vectors::setValidity2("lemur_fit", function(obj){
 86 |   old <- S4Vectors:::disableValidity()
 87 |   if (!isTRUE(old)) {
 88 |     S4Vectors:::disableValidity(TRUE)
 89 |     on.exit(S4Vectors:::disableValidity(old))
 90 |   }
 91 | 
 92 |   msg <- NULL
 93 | 
 94 |   row_mask <- metadata(obj)$row_mask
 95 |   n_features_original <- nrow(obj$base_point)
 96 |   n_features <- nrow(obj)
 97 |   n_obs <- ncol(obj)
 98 | 
 99 |   n_embedding <-  obj$n_embedding
100 |   if(is.null(n_embedding)) msg <- c(msg, "'n_embedding' must not be NULL")
101 |   base_point <- obj$base_point
102 |   if(is.null(base_point)) msg <- c(msg, "'base_point' must not be NULL")
103 |   coefficients <- obj$coefficients
104 |   if(is.null(coefficients)) msg <- c(msg, "'coefficients' must not be NULL")
105 |   alignment_coefficients <- obj$alignment_coefficients
106 |   if(is.null(alignment_coefficients)) msg <- c(msg, "'alignment_coefficients' must not be NULL")
107 |   alignment_design <- obj$alignment_design
108 |   alignment_design_matrix <- obj$alignment_design_matrix
109 |   if(is.null(alignment_design_matrix)) msg <- c(msg, "'alignment_design_matrix' must not be NULL")
110 |   embedding <- obj$embedding
111 |   if(is.null(embedding)) msg <- c(msg, "'embedding' must not be NULL")
112 |   design <- obj$design
113 |   design_matrix <- sampleFactors(reducedDim(obj, "linearFit"))
114 |   if(is.null(design_matrix)) msg <- c(msg, "'design_matrix' must not be NULL")
115 |   linear_coefficients <- featureLoadings(reducedDim(obj, "linearFit"))
116 |   if(is.null(linear_coefficients)) msg <- c(msg, "'linear_coefficients' must not be NULL")
117 |   use_assay <- obj$use_assay
118 |   if(is.null(use_assay)) msg <- c(msg, "'use_assay' must not be NULL")
119 |   is_test_data <- obj$is_test_data
120 |   if(is.null(is_test_data)) msg <- c(msg, "'is_test_data' must not be NULL")
121 |   col_names <- colnames(obj)
122 |   row_names <- rownames(obj)
123 | 
124 |   if(! is.null(design_matrix) && nrow(design_matrix) != n_obs) msg <- c(msg, "`nrow(design_matrix)` does not match number of observations")
125 |   if(! is.null(linear_coefficients) && nrow(linear_coefficients) != n_features_original) msg <- c(msg, "`nrow(linear_coefficients)` does not match `n_features_original`")
126 |   if(! is.null(linear_coefficients) && ncol(linear_coefficients) != ncol(design_matrix)) msg <- c(msg, "`ncol(linear_coefficients)` does not match `ncol(design_matrix)`")
127 |   if(! is.null(base_point) && nrow(base_point) != n_features_original) msg <- c(msg, "`nrow(base_point)` does not match `n_features_original`")
128 |   if(! is.null(base_point) && ncol(base_point) != n_embedding) msg <- c(msg, "`ncol(base_point)` does not match `n_embedding`")
129 |   if(! is.null(coefficients) && ! is.array(coefficients) || length(dim(coefficients)) != 3) msg <- c(msg, "`coefficients` must be a three dimensional array")
130 |   if(! is.null(coefficients) && dim(coefficients)[1] != n_features_original) msg <- c(msg, "`dim(coefficients)[1]` does not match `n_features_original`")
131 |   if(! is.null(coefficients) && dim(coefficients)[2] != n_embedding) msg <- c(msg, "`dim(coefficients)[2]` does not match `n_embedding`")
132 |   if(! is.null(coefficients) && dim(coefficients)[3] != ncol(design_matrix)) msg <- c(msg, "`dim(coefficients)[3]` does not match `ncol(design_matrix)`")
133 |   if(! is.null(embedding) && nrow(embedding) != n_embedding) msg <- c(msg, "`nrow(embedding)` does not match `n_embedding`")
134 |   if(! is.null(embedding) && ncol(embedding) != n_obs) msg <- c(msg, "`ncol(embedding)` does not match number of observations")
135 |   if(! is.null(alignment_coefficients) && ! is.array(alignment_coefficients) || length(dim(alignment_coefficients)) != 3) msg <- c(msg, "`alignment_coefficients` must be a three dimensional array")
136 |   if(! is.null(alignment_coefficients) && dim(alignment_coefficients)[1] != n_embedding) msg <- c(msg, "`dim(alignment_coefficients)[1]` does not match `n_embedding`")
137 |   if(! is.null(alignment_coefficients) && dim(alignment_coefficients)[2] != n_embedding + 1) msg <- c(msg, "`dim(alignment_coefficients)[2]` does not match `n_embedding + 1`")
138 |   if(! is.null(alignment_coefficients) && dim(alignment_coefficients)[3] != ncol(alignment_design_matrix)) msg <- c(msg, "`dim(alignment_coefficients)[3]` does not match `ncol(alignment_design_matrix)`")
139 |   if(! is.null(alignment_design_matrix) && nrow(alignment_design_matrix) != n_obs) msg <- c(msg, "`nrow(alignment_design_matrix)` does not match number of observations")
140 |   if(! is.null(alignment_design) &&  ! inherits(alignment_design, "formula")) msg <- c(msg, "`alignment_design` must inherit from formula or be NULL")
141 |   if(! is.null(design) &&  ! inherits(design, "formula")) msg <- c(msg, "`design` must inherit from formula or be NULL")
142 |   if(! is.null(use_assay) &&  ! use_assay %in% assayNames(obj)) msg <- c(msg, "`use_assay` must be one of the assays")
143 |   if(! is.null(is_test_data) &&  ! is.logical(is_test_data)) msg <- c(msg, "`is_test_data` must be a logical vector")
144 |   if(! is.null(is_test_data) &&  length(is_test_data) != n_obs ) msg <- c(msg, "length `is_test_data` must match the number of observations")
145 |   if(! is.null(col_names) && length(col_names) != length(unique(col_names))) msg <- c(msg, "`colnames` are not unique")
146 |   if(! is.null(row_names) && length(row_names) != length(unique(row_names))) msg <- c(msg, "`rownames` are not unique")
147 |   if(is.logical(row_mask)) msg <- c(msg, "`row_mask` is a logical. This is no longer allowed (changed in version 1.0.4 to fix a bug). Please rerun `lemur`.")
148 |   if(max(row_mask) > n_features_original || min(row_mask) < 0 || any(is.na(row_mask))) msg <- c(msg, "`row_mask` contains illegal index. This is a bug!")
149 | 
150 |   if(is.null(msg)){
151 |     TRUE
152 |   }else{
153 |     msg
154 |   }
155 | })
156 | 
157 | # Subsetting
158 | 
159 | #' @rdname lemur_fit
160 | #' @export
161 | setMethod("[", c("lemur_fit", "ANY", "ANY"), function(x, i, j, ...) {
162 |   old <- S4Vectors:::disableValidity()
163 |   if (!isTRUE(old)) {
164 |     S4Vectors:::disableValidity(TRUE)
165 |     on.exit(S4Vectors:::disableValidity(old))
166 |   }
167 | 
168 |   i_missing <- missing(i)
169 |   j_missing <- missing(j)
170 | 
171 |   if (! i_missing) {
172 |     # Update metadata
173 |     ii <- convert_subset_to_index(i, rownames(x))
174 |     metadata(x)$row_mask <- metadata(x)$row_mask[ii]
175 |   }
176 |   if(! j_missing){
177 |     jj <- convert_subset_to_index(j, colnames(x))
178 |     metadata(x)[["alignment_design_matrix"]] <- metadata(x)[["alignment_design_matrix"]][jj,,drop=FALSE]
179 |   }
180 | 
181 |   callNextMethod()
182 | })
183 | 
184 | convert_subset_to_index <- function(subset, names){
185 |   if (is.character(subset)) {
186 |     orig <- subset
187 |     subset <- match(subset, names)
188 |     if (any(bad <- is.na(subset))) {
189 |       bad_examples <- toString(orig[bad], width = 100)
190 |       stop("index out of bounds: ", bad_examples)
191 |     }
192 |   }else if(is.logical(subset)){
193 |     subset <- which(subset)
194 |   }
195 |   return(as.vector(subset))
196 | }
197 | 
198 | .methods_to_suggest <- c("n_embedding", "embedding",
199 |                          "design", "design_matrix", "base_point",
200 |                          "coefficients", "linear_coefficients", "alignment_coefficients",
201 |                          "alignment_design", "alignment_design_matrix",
202 |                          "contrast", "use_assay", "colData", "rowData",
203 |                          "test_data", "training_data", "is_test_data")
204 | 
205 | 
206 | #' @rdname lemur_fit
207 | #' @export
208 | setMethod("design", signature = "lemur_fit", function(object){
209 |   metadata(object)[["design"]]
210 | })
211 | 
212 | 
213 | 
214 | #' @rdname cash-lemur_fit-method
215 | #' @export
216 | .DollarNames.lemur_fit <- function(x, pattern = ""){
217 |   grep(pattern, .methods_to_suggest, value = TRUE)
218 | }
219 | 
220 | #' Access values from a `lemur_fit`
221 | #'
222 | #' @param x the `lemur_fit`
223 | #' @param pattern the pattern from looking up potential values interactively
224 | #' @param name the name of the value behind the dollar
225 | #' @param value the replacement value. This only works for `colData` and
226 | #'   `rowData`.
227 | #'
228 | #' @returns The respective value stored in the `lemur_fit` object.
229 | #'
230 | #' @seealso [`lemur_fit-class`] for more documentation on the
231 | #'   accessor functions.
232 | #' @aliases dollar_methods
233 | setMethod("$", "lemur_fit",
234 |           function(x, name){
235 |   old <- S4Vectors:::disableValidity()
236 |   if (!isTRUE(old)) {
237 |     S4Vectors:::disableValidity(TRUE)
238 |     on.exit(S4Vectors:::disableValidity(old))
239 |   }
240 | 
241 |   if(! name %in% .methods_to_suggest){
242 |     stop("Illegal name after '$' sign: ", name)
243 |   }
244 |   switch(name,
245 |     n_embedding =             metadata(x)[["n_embedding"]],
246 |     design =                  design(x),
247 |     base_point =              metadata(x)[["base_point"]],
248 |     coefficients =            metadata(x)[["coefficients"]],
249 |     embedding =               t(reducedDim(x, "embedding")),
250 |     design_matrix =           sampleFactors(reducedDim(x, "linearFit")),
251 |     linear_coefficients =     featureLoadings(reducedDim(x, "linearFit")),
252 |     alignment_design =        metadata(x)[["alignment_design"]],
253 |     alignment_design_matrix = metadata(x)[["alignment_design_matrix"]],
254 |     alignment_coefficients =  metadata(x)[["alignment_coefficients"]],
255 |     contrast =                metadata(x)[["contrast"]],
256 |     use_assay =               metadata(x)[["use_assay"]],
257 |     colData =                 colData(x),
258 |     rowData =                 rowData(x),
259 |     test_data =               get_test_data(x),
260 |     training_data =           get_training_data(x),
261 |     is_test_data =            int_colData(x)[["is_test_data"]],
262 |     stop("Invalid `name` value.")
263 |   )
264 | })
265 | 
266 | get_test_data <- function(fit){
267 |   fit[,fit$is_test_data]
268 | }
269 | 
270 | get_training_data <- function(fit){
271 |   fit[,!fit$is_test_data]
272 | }
273 | 
274 | #' @rdname cash-lemur_fit-method
275 | setReplaceMethod("$", "lemur_fit",
276 |                  function(x, name, value){
277 |   if(! name %in% .methods_to_suggest){
278 |    stop("Illegal name after '$' sign: ", name)
279 |   }
280 |   switch(name,
281 |         colData = {SummarizedExperiment::colData(x) <- value},
282 |         rowData = {SummarizedExperiment::rowData(x) <- value},
283 |         stop("It is illegal to modify the content of lemur_fit object")
284 |   )
285 |   x
286 | })
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 
297 | 
298 | 
299 | 
300 | 
301 | 
302 | 
303 | 
304 | 
305 | 


--------------------------------------------------------------------------------
/R/parse_contrasts.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # This function is adapted from proDA
  3 | parse_contrast <- function(contrast, formula, simplify = FALSE) {
  4 | 
  5 |   if(missing(contrast)){
  6 |     stop("No contrast argument was provided!")
  7 |   }
  8 |   covar <- all.vars(formula)
  9 | 
 10 |   cnt_capture <- rlang::enquo(contrast)
 11 |   if(is.null(formula)){
 12 |     data_mask <- NULL
 13 |   }else{
 14 |     data_mask <- create_contrast_data_mask(formula)
 15 |   }
 16 | 
 17 |   tryCatch({
 18 |     res <- rlang::eval_tidy(cnt_capture, data = data_mask)
 19 |     if(! is.numeric(res)){
 20 |       if(is.character(res)){
 21 |         # If contrast was a string, eval will just spit it out the same way
 22 |         res <- rlang::eval_tidy(rlang::parse_expr(res), data = data_mask)
 23 |       }
 24 |     }
 25 |   }, error = function(e){
 26 |     # Try to extract text from error message
 27 |     wrong_arg_error <- regmatches(e$message, regexec("object '(.+)' not found", e$message))[[1]]
 28 |     no_cond_error <- grepl(e$message, "could not find function \"cond\"")
 29 |     if(length(wrong_arg_error) == 2){
 30 |       covars1 <- paste0(paste0(covar, " = ?"), collapse = ", ")
 31 |       covars2 <- paste0(paste0(covar, " = ?"), collapse = ", ")
 32 |       stop("Object '", wrong_arg_error[2], "' not found. Please specify the contrast using:\n",
 33 |            "'cond(", covars1, ") - cond(", covars2, ")'", call. = FALSE)
 34 |     }else if(no_cond_error && is.null(formula)){
 35 |       stop("'fit$design' or 'fit$alignment_design' is 'NULL'. This means that you cannot use 'cond(...)' to ",
 36 |            "specify the contrast. Typically, 'fit$design' or 'fit$alignment_design' are 'NULL' if they are defined ",
 37 |            "using a matrix and not with a formula.")
 38 |     }else{
 39 |       stop(e$message)
 40 |     }
 41 |   })
 42 | 
 43 |   if(simplify){
 44 |     evaluate_contrast_tree(res, res, \(x, y) x)
 45 |   }else{
 46 |     res
 47 |   }
 48 | }
 49 | 
 50 | 
 51 | create_contrast_data_mask <- function(formula){
 52 |   top <- rlang::new_environment(list(
 53 |     cond = function(...){
 54 |       .cond(formula, rlang::dots_list(..., .homonyms = "error", .check_assign = TRUE))
 55 |     },
 56 |     "+" = .plus, "-" = .minus, "/" = .divide, "*" = .multiply,
 57 |     "==" = .equal,
 58 |     "<" = .lt, "<=" = .lt,
 59 |     ">" = .gt, ">=" = .gt
 60 |   ))
 61 |   bottom <- rlang::new_environment(parent = top)
 62 |   data_mask <- rlang::new_data_mask(bottom = bottom, top = top)
 63 |   data_mask$.cntrst <- rlang::as_data_pronoun(bottom)
 64 |   data_mask
 65 | }
 66 | 
 67 | 
 68 | .cond <- function(formula, level_sets = list()){
 69 |   if(is.null(formula)){
 70 |     stop("You called 'cond()' inside the contrast, however the original model ",
 71 |          "was not specified with a formula. Thus 'cond()' doesn't work and you ",
 72 |          "need to specify the contrast using the column names of the design matrix.")
 73 |   }
 74 |   if(is.null(attr(formula, "xlevels"))){
 75 |     warning("The formula has no 'xlevels' attribute. This is supicious and might indicate a bug.")
 76 |   }
 77 |   if(any(names(level_sets) == "")){
 78 |     stop("All arguments to 'cond()' must be named.")
 79 |   }
 80 |   if(any(duplicated(names(level_sets)))){
 81 |     stop("All arguments to 'cond()' must be unique.")
 82 |   }
 83 |   covar <- all.vars(formula)
 84 |   new_dat <- as.list(rep(0, length(covar)))
 85 |   names(new_dat) <- covar
 86 |   xlevels <- attr(formula, "vars_xlevels")
 87 |   for(n in names(xlevels)){
 88 |     new_dat[[n]] <- factor(xlevels[[n]][1], levels = xlevels[[n]])
 89 |   }
 90 |   for(n in names(level_sets)){
 91 |     if(! n %in% names(new_dat)){
 92 |       stop("Setting the level of '", n, "' failed. You can only set the level of the following variables: ", paste0(covar, collapse = ", "))
 93 |     }
 94 |     if(length(level_sets[[n]]) != 1){
 95 |       stop("Each argument to 'cond()' must be length one. '", n, "' has length ", length(level_sets[[n]]))
 96 |     }
 97 |     if(n %in% names(xlevels)){
 98 |       if(! level_sets[[n]] %in% xlevels[[n]]){
 99 |         stop("You are trying to set '", n, "=", level_sets[[n]], "'. However only the following values for ", n,
100 |              " are valid: ", paste0(xlevels[[n]], collapse = ", "))
101 |       }
102 |       new_dat[[n]] <- factor(level_sets[[n]], levels = xlevels[[n]])
103 |     }else{
104 |       new_dat[[n]] <- level_sets[[n]]
105 |     }
106 |   }
107 |   res <- drop(model.matrix(formula, new_dat, contrasts.arg = attr(formula, "contrasts"), xlev = attr(formula, "xlevels")))
108 |   attr(res, "assign") <- NULL
109 |   attr(res, "contrasts") <- NULL
110 |   class(res) <- "model_vec"
111 |   res
112 | }
113 | 
114 | 
115 | evaluate_contrast_tree <-function(c1, c2, FUN){
116 |   stopifnot(all(class(c2) == class(c1)))
117 |   if(inherits(c1, "contrast_relation")){
118 |     stopifnot(c1$relation == c2$relation)
119 |     if(c1$relation == "minus" && is.null(c1$rhs)){ # Unary minus
120 |       - evaluate_contrast_tree(c1$lhs, c2$lhs, FUN = FUN)
121 |     }else if(c1$relation == "minus" && is.null(c1$rhs)){ # Unary plus
122 |       + evaluate_contrast_tree(c1$lhs, c2$lhs, FUN = FUN)
123 |     }else if(c1$relation == "minus"){
124 |       evaluate_contrast_tree(c1$lhs, c2$lhs, FUN = FUN) - evaluate_contrast_tree(c1$rhs, c2$rhs, FUN = FUN)
125 |     }else if(c1$relation == "plus"){
126 |       evaluate_contrast_tree(c1$lhs, c2$lhs, FUN = FUN) + evaluate_contrast_tree(c1$rhs, c2$rhs, FUN = FUN)
127 |     }else if(c1$relation == "multiply"){
128 |       evaluate_contrast_tree(c1$lhs, c2$lhs, FUN = FUN) * evaluate_contrast_tree(c1$rhs, c2$rhs, FUN = FUN)
129 |     }else if(c1$relation == "divide"){
130 |       evaluate_contrast_tree(c1$lhs, c2$lhs, FUN = FUN) / evaluate_contrast_tree(c1$rhs, c2$rhs, FUN = FUN)
131 |     }else if(c1$relation %in% c("equal", "less_than", "greater_than")){
132 |       stop("(In)equalities are not allowed in contrasts")
133 |     }else{
134 |       stop("Canot handle contrast relationship of type: ", c1$relation)
135 |     }
136 |   }else if(inherits(c1, "model_vec")){
137 |     FUN(c1, c2)
138 |   }else{
139 |     stopifnot(all(c1 == c2))
140 |     c1
141 |   }
142 | }
143 | 
144 | 
145 | .divide <- function(x, y){
146 |   res <- list(lhs = x, rhs = y, relation =  "divide")
147 |   class(res) <- "contrast_relation"
148 |   res
149 | }
150 | 
151 | .multiply <- function(x, y){
152 |   res <- list(lhs = x, rhs = y, relation =  "multiply")
153 |   class(res) <- "contrast_relation"
154 |   res
155 | }
156 | 
157 | .plus <- function(x, y){
158 |   if(missing(y)){
159 |     # Unary plus
160 |     y <- NULL
161 |   }
162 |   res <- list(lhs = x, rhs = y, relation =  "plus")
163 |   class(res) <- "contrast_relation"
164 |   res
165 | }
166 | 
167 | .minus <- function(x, y){
168 |   if(missing(y)){
169 |     # Unary minus
170 |     y <- NULL
171 |   }
172 |   res <- list(lhs = x, rhs = y, relation =  "minus")
173 |   class(res) <- "contrast_relation"
174 |   res
175 | }
176 | 
177 | .equal <- function(x, y){
178 |   res <- list(lhs = x, rhs = y, relation =  "equal")
179 |   class(res) <- "contrast_relation"
180 |   res
181 | }
182 | 
183 | .lt <- function(x, y){
184 |   res <- list(lhs = x, rhs = y, relation =  "less_than")
185 |   class(res) <- "contrast_relation"
186 |   res
187 | }
188 | 
189 | .gt <- function(x, y){
190 |   res <- list(lhs = x, rhs = y, relation = "greater_than")
191 |   class(res) <- "contrast_relation"
192 |   res
193 | }
194 | 
195 | 
196 | 


--------------------------------------------------------------------------------
/R/pca.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | pca <- function(Y, n, center = TRUE){
 4 |   center_ind <- center
 5 |   min_dim <- min(dim(Y))
 6 | 
 7 |   # Center
 8 |   center <- if(center_ind){
 9 |     MatrixGenerics::rowMeans2(Y)
10 |   }else{
11 |     rep(0, nrow(Y))
12 |   }
13 | 
14 |   if(n == 0){
15 |     res <- list(rotation = matrix(nrow = nrow(Y), ncol = 0), x = matrix(nrow = 0, ncol = ncol(Y)))
16 |   }else if(min_dim <= n * 2){
17 |     # Do exact PCA
18 |     res <- prcomp(t(Y), rank. = n, center = center_ind, scale. = FALSE)
19 |   }else{
20 |     # Do approximate PCA
21 |     res <- irlba::prcomp_irlba(t(Y), n = n, center = center_ind, scale. = FALSE)
22 |   }
23 |   list(coordsystem = unname(res$rotation), embedding = unname(t(res$x)), offset = center)
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/R/predict.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' Predict values from `lemur_fit` object
  4 | #'
  5 | #' @param object an `lemur_fit` object
  6 | #' @param newdata a data.frame which passed to [`model.matrix`] with
  7 | #'   `design` to make the `newdesign` matrix
  8 | #' @param newdesign a matrix with the covariates for which the output
  9 | #'   is predicted. If `NULL`, the `object$design_matrix` is used. If
 10 | #'   it is a vector it is repeated `ncol(embedding)` times to create
 11 | #'   a design matrix with the same entry for each cell.
 12 | #' @param newcondition an unquoted expression with a call to `cond()` specifying
 13 | #'   the covariates of the prediction. See the `contrast` argument in [test_de]
 14 | #'   for more details. Note that combinations of multiple calls to `cond()` are
 15 | #'   not allowed (e.g., `cond(a = 1) - cond(a = 2)`). If specified, `newdata`
 16 | #'   and `newdesign` are ignored.
 17 | #' @param embedding the low-dimensional cell position for which the
 18 | #'   output is predicted.
 19 | #' @param with_linear_model a boolean to indicate if the linear regression
 20 | #'   offset is included in the prediction.
 21 | #' @param with_embedding a boolean to indicate if the embedding contributes
 22 | #'   to the output.
 23 | #' @param with_alignment a boolean to indicate if the alignment effect
 24 | #'   is removed from the output.
 25 | #' @param ... additional parameters passed to `predict_impl`.
 26 | #'
 27 | #' @returns A matrix with the same dimension `nrow(object) * nrow(newdesign)`.
 28 | #'
 29 | #'
 30 | #' @seealso [`residuals`][residuals,lemur_fit-method]
 31 | #'
 32 | #' @examples
 33 | #'
 34 | #' data(glioblastoma_example_data)
 35 | #' fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition,
 36 | #'              n_emb = 5, verbose = FALSE)
 37 | #'
 38 | #' pred <- predict(fit)
 39 | #'
 40 | #' pred_ctrl <- predict(fit, newdesign = c(1, 0, 0, 0, 0, 0))
 41 | #' pred_trt <-  predict(fit, newdesign = c(1, 0, 0, 0, 0, 1))
 42 | #' # This is the same as the test_de result
 43 | #' fit <- test_de(fit, cond(condition = "panobinostat") - cond(condition = "ctrl"))
 44 | #' all.equal(SummarizedExperiment::assay(fit, "DE"), pred_trt - pred_ctrl,
 45 | #'           check.attributes = FALSE)
 46 | #'
 47 | #' @export
 48 | predict.lemur_fit <- function(object, newdata = NULL, newdesign = NULL,
 49 |                               newcondition = NULL,
 50 |                                embedding = object$embedding,
 51 |                                with_linear_model = TRUE,
 52 |                                with_embedding = TRUE,
 53 |                                with_alignment = TRUE,
 54 |                                ...){
 55 |   predict_impl(object, newdata = newdata, newdesign = newdesign, newcondition = {{newcondition}},
 56 |                embedding = embedding, with_linear_model = with_linear_model,
 57 |                with_embedding = with_embedding, with_alignment = with_alignment, ...)
 58 | 
 59 | }
 60 | 
 61 | predict_impl <- function(object, newdata = NULL, newdesign = NULL,
 62 |                          newcondition = NULL,
 63 |                          embedding = object$embedding,
 64 |                          with_linear_model = TRUE,
 65 |                          with_embedding = TRUE,
 66 |                          with_alignment = TRUE,
 67 |                          n_embedding = object$n_embedding,
 68 |                          design_matrix = object$design_matrix,
 69 |                          design = object$design,
 70 |                          linear_coefficients = object$linear_coefficients,
 71 |                          coefficients = object$coefficients,
 72 |                          base_point = object$base_point,
 73 |                          alignment_coefficients = object$alignment_coefficients,
 74 |                          alignment_design = object$alignment_design,
 75 |                          alignment_design_matrix = object$alignment_design_matrix,
 76 |                          row_mask = metadata(object)$row_mask,
 77 |                          ...){
 78 |   if(! rlang::quo_is_null(rlang::quo({{newcondition}}))){
 79 |     if(! is.null(newdesign) || !is.null(newdata)) warning("If 'newcondition' is used, 'newdesign' and 'newdata' are ignored.")
 80 |     newdesign <- parse_contrast({{newcondition}}, design)
 81 |     alignment_design_matrix <- parse_contrast({{newcondition}}, alignment_design)
 82 |     if(inherits(newdesign, "contrast_relation")) stop("Contrast relations using + or -  are not allowed")
 83 |     newdesign <- matrix(newdesign, nrow = ncol(embedding), ncol = length(newdesign), byrow = TRUE)
 84 |     alignment_design_matrix <- matrix(alignment_design_matrix, nrow = ncol(embedding), ncol = length(alignment_design_matrix), byrow = TRUE)
 85 |   }else if(is.null(newdesign) && is.null(newdata)){
 86 |     newdesign <- design_matrix
 87 |   }else if(! is.null(newdata)){
 88 |     if(is.null(design)) stop("'newdata' is provided, but 'object' does not contain a design formula.")
 89 |     newdesign <- model.matrix(design, newdata)
 90 |   }else if(! is.matrix(newdesign)){
 91 |     newdesign <- matrix(newdesign, nrow = ncol(embedding), ncol = length(newdesign), byrow = TRUE)
 92 |   }
 93 |   if(! is.matrix(alignment_design_matrix)){
 94 |     alignment_design_matrix <- matrix(alignment_design_matrix, nrow = ncol(embedding), ncol = length(alignment_design_matrix), byrow = TRUE)
 95 |   }
 96 | 
 97 |   if(all(dim(design_matrix) == dim(alignment_design_matrix)) && all(design_matrix == alignment_design_matrix)){
 98 |     # The design matrices were identical, presume that the newdesign should also be identical
 99 |     alignment_design_matrix <- newdesign
100 |   }
101 | 
102 |   if(nrow(newdesign) != ncol(embedding)){
103 |     stop("The number of rows in 'newdesign' must match the number of columns in 'embedding'")
104 |   }
105 |   if(nrow(newdesign) != nrow(alignment_design_matrix)){
106 |     stop("The number of rows in 'newdesign' (", nrow(newdesign) ,") and 'alignment_design_matrix'(", nrow(alignment_design_matrix) ,")  must be the same")
107 |   }
108 |   approx <- if(with_linear_model){
109 |     linear_coefficients[row_mask,,drop=FALSE] %*% t(newdesign)
110 |   }else{
111 |     matrix(0, nrow = length(row_mask), ncol = nrow(newdesign))
112 |   }
113 | 
114 |   if(with_embedding){
115 |     mm_groups <- get_groups(newdesign)
116 |     mm_al_groups <- get_groups(alignment_design_matrix)
117 |     stopifnot(length(mm_groups) == length(mm_al_groups))
118 |     mmg <- unique(cbind(mm_groups, mm_al_groups))
119 |     for(idx in seq_len(nrow(mmg))){
120 |       gr1 <- mmg[idx,1]
121 |       gr2 <- mmg[idx,2]
122 |       covar1 <- newdesign[which(mm_groups == gr1)[1],]
123 |       diffemb <- grassmann_map(sum_tangent_vectors(coefficients, covar1), base_point)
124 |       if(with_alignment){
125 |         covar2 <- alignment_design_matrix[which(mm_al_groups == gr2)[1],]
126 |         alignment <- reverse_linear_transformation(alignment_coefficients, covar2)
127 |         offset <- c(matrix(alignment_coefficients[,1,], ncol = length(covar2)) %*% covar2)
128 |       }else{
129 |         alignment <- diag(nrow = n_embedding)
130 |         offset <- 0
131 |       }
132 |       sel <- gr1 == mm_groups & gr2 == mm_al_groups
133 |       approx[,sel] <- approx[,sel] + diffemb[row_mask,,drop=FALSE] %*% (alignment %*% (embedding[,sel] - offset))
134 |     }
135 |   }
136 | 
137 |   colnames(approx) <- rownames(newdesign)
138 |   rownames(approx) <- rownames(object)
139 |   approx
140 | }
141 | 
142 | 
143 | #' Predict values from `lemur_fit` object
144 | #'
145 | #' @inheritParams predict.lemur_fit
146 | #' @param ... ignored.
147 | #'
148 | #' @returns A matrix with the same dimension `dim(object)`.
149 | #'
150 | #' @seealso [predict.lemur_fit]
151 | #'
152 | #' @examples
153 | #' data(glioblastoma_example_data)
154 | #' fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition,
155 | #'              n_emb = 5, verbose = FALSE)
156 | #'
157 | #' resid <- residuals(fit)
158 | #' dim(resid)
159 | #'
160 | #'
161 | #' @export
162 | setMethod("residuals", signature = "lemur_fit", function(object,
163 |                                                           with_linear_model = TRUE,
164 |                                                           with_embedding = TRUE, ...){
165 |   residuals_impl(object, with_linear_model = with_linear_model, with_embedding = with_embedding)
166 | })
167 | 
168 | 
169 | residuals_impl <- function(object,
170 |                            with_linear_model = TRUE,
171 |                            with_embedding = TRUE){
172 |   assay(object, object$use_assay) - predict(object, with_linear_model = with_linear_model, with_embedding = with_embedding)
173 | }
174 | 
175 | 
176 | 
177 | get_residuals_for_alt_fit <- function(fit, Y = assay(fit, fit$use_assay), reduced_design_mat, with_linear_model = TRUE, with_embedding = TRUE){
178 |   if(with_embedding){
179 |     fit_alt <- lemur_impl(Y, design_matrix = reduced_design_mat,  n_embedding = fit$n_embedding,
180 |                           base_point = fit$base_point,  verbose = FALSE)
181 |     Y - predict_impl(object = fit_alt, embedding = fit_alt$embedding,
182 |                  with_linear_model = TRUE, with_embedding = TRUE,
183 |                  n_embedding = fit_alt$n_embedding,
184 |                  design_matrix = fit_alt$design_matrix, design = fit_alt$design,
185 |                  linear_coefficients = fit_alt$linear_coefficients, coefficients = fit_alt$coefficients,
186 |                  base_point = fit_alt$base_point, alignment_design_matrix = fit_alt$alignment_design_matrix,
187 |                  alignment_coefficients = fit_alt$alignment_coefficients,
188 |                  row_mask = rep(TRUE, nrow(Y)))
189 |   }else{
190 |     fit_alt <- lemur_impl(Y, design_matrix = reduced_design_mat,
191 |                           n_embedding = 0,
192 |                           base_point = matrix(nrow = nrow(fit), ncol = 0),
193 |                           verbose = FALSE)
194 |     Y - predict_impl(object = fit_alt, embedding = fit_alt$embedding,
195 |                  with_linear_model = TRUE, with_embedding = FALSE,
196 |                  n_embedding = fit_alt$n_embedding,
197 |                  design_matrix = fit_alt$design_matrix, design = fit_alt$design,
198 |                  linear_coefficients = fit_alt$linear_coefficients, coefficients = fit_alt$coefficients,
199 |                  base_point = fit_alt$base_point, alignment_design_matrix = fit$alignment_design_matrix,
200 |                  alignment_coefficients = fit_alt$alignment_coefficients,
201 |                  row_mask = rep(TRUE, nrow(Y)))
202 |   }
203 | }
204 | 
205 | 
206 | 


--------------------------------------------------------------------------------
/R/project_on_fit.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Project new data onto the latent spaces of an existing lemur fit
 3 | #'
 4 | #' @param fit an `lemur_fit` object
 5 | #' @param data a matrix with observations in the columns and features in the rows.
 6 | #'   Or a `SummarizedExperiment` / `SingleCellExperiment` object. The features must
 7 | #'   match the features in `fit`.
 8 | #' @param col_data col_data an optional data frame with `ncol(data)` rows.
 9 | #' @param use_assay if `data` is a `SummarizedExperiment` / `SingleCellExperiment` object,
10 | #'   which assay should be used.
11 | #' @param design,alignment_design the design formulas or design matrices that are used
12 | #'   to project the data on the correct latent subspace. Both default to the designs
13 | #'   from the `fit` object.
14 | #' @param return which data structure is returned.
15 | #'
16 | #'
17 | #' @returns Either a matrix with the low-dimensional embeddings of the `data` or
18 | #'   an object of class `lemur_fit` wrapping that embedding.
19 | #'
20 | #' @examples
21 | #'
22 | #' data(glioblastoma_example_data)
23 | #'
24 | #' subset1 <- glioblastoma_example_data[,1:2500]
25 | #' subset2 <- glioblastoma_example_data[,2501:5000]
26 | #'
27 | #' fit <- lemur(subset1, design = ~ condition, n_emb = 5,
28 | #'              test_fraction = 0, verbose = FALSE)
29 | #'
30 | #' # Returns a `lemur_fit` object with the projection of `subset2`
31 | #' fit2 <- project_on_lemur_fit(fit, subset2, return = "lemur_fit")
32 | #' fit2
33 | #'
34 | #'
35 | #'
36 | #' @export
37 | project_on_lemur_fit <- function(fit, data, col_data = NULL, use_assay = "logcounts",
38 |                                  design = fit$design, alignment_design = fit$alignment_design,
39 |                                  return = c("matrix", "lemur_fit")){
40 |   return <- match.arg(return)
41 |   Y <- handle_data_parameter(data, on_disk = FALSE, assay = use_assay)
42 |   col_data <- glmGamPoi:::get_col_data(data, col_data)
43 | 
44 |   xlevel <- attr(design, "xlevel") %default_to% attr(alignment_design, "xlevel")
45 |   for(lvl in names(xlevel)){
46 |     if(lvl %in% names(col_data)){
47 |       col_data[[lvl]] <- factor(col_data[[lvl]], levels = xlevel[[lvl]])
48 |     }else{
49 |       stop("The column data does not contain the covariate ", lvl, ". This is a problem",
50 |            "because it was used in the original design.")
51 |     }
52 |   }
53 |   attr(design, "ignore_degeneracy") <- TRUE
54 |   attr(alignment_design, "ignore_degeneracy") <- TRUE
55 | 
56 |   des <- handle_design_parameter(design, data, col_data)
57 |   col_data <- des$col_data
58 |   al_des <- handle_design_parameter(alignment_design, data, col_data)
59 |   col_data <- al_des$col_data
60 |   embedding <- project_on_lemur_fit_impl(Y, des$design_matrix, al_des$design_matrix,
61 |                                          fit$coefficients, fit$linear_coefficients, fit$alignment_coefficients,
62 |                                          fit$base_point)
63 |   colnames(embedding) <- colnames(data)
64 | 
65 |   if(return == "matrix"){
66 |     embedding
67 |   }else if(return == "lemur_fit"){
68 |     lemur_fit(data, col_data = col_data,
69 |               row_data = if(is(data, "SummarizedExperiment")) rowData(data) else NULL,
70 |               n_embedding = fit$n_embedding,
71 |               design = des$design_formula, design_matrix = des$design_matrix,
72 |               linear_coefficients = fit$linear_coefficients,
73 |               base_point = fit$base_point,
74 |               coefficients = fit$coefficients,
75 |               embedding = embedding,
76 |               alignment_coefficients = fit$alignment_coefficients,
77 |               alignment_design = al_des$design_formula,
78 |               alignment_design_matrix = al_des$design_matrix,
79 |               use_assay = use_assay, is_test_data = rep(FALSE, ncol(embedding)),
80 |               row_mask = metadata(fit)$row_mask)
81 |   }
82 | }
83 | 
84 | project_on_lemur_fit_impl <- function(Y, design_matrix, alignment_design_matrix, coefficients, linear_coefficients, alignment_coefficients, base_point){
85 |   Y_clean <- Y - linear_coefficients %*% t(design_matrix)
86 |   embedding <- project_data_on_diffemb(Y_clean, design = design_matrix, coefficients = coefficients, base_point = base_point)
87 |   embedding <- apply_linear_transformation(embedding, alignment_coefficients, alignment_design_matrix)
88 |   # TODO: subset to row_mask? And then potentially also remove the check in find_de_neighborhoods line 143.
89 |   embedding
90 | }
91 | 
92 | 


--------------------------------------------------------------------------------
/R/recursive_least_squares.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | 
 5 | #' Iteratively calculate the least squares solution
 6 | #'
 7 | #' Both functions are for testing purposes. There is a faster implementation
 8 | #' called `cum_brls_which_abs_max`.
 9 | #'
10 | #' @param y a vector with observations
11 | #' @param X a design matrix
12 | #'
13 | #' @returns a matrix where column i is the
14 | #'   solution to `y[1:i] ~ X[1:i,]`.
15 | #'
16 | #'
17 | #' @keywords internal
18 | recursive_least_squares <- function(y, X){
19 |   stopifnot(length(y) == nrow(X))
20 |   n <- length(y)
21 |   k <- ncol(X)
22 |   res <- matrix(NA, nrow = k, ncol = n)
23 |   gamma <- solve(crossprod(X[seq_len(k),]))
24 |   beta <- gamma %*% t(X[seq_len(k),]) %*% y[seq_len(k)]
25 |   res[,k] <- beta
26 |   for(idx in seq(k+1, n)){
27 |     yi <- y[idx]
28 |     xi <- t(X[idx,,drop=FALSE])
29 |     gamma <- gamma - (gamma %*% xi %*% t(xi) %*% gamma) / c(1 + t(xi) %*% gamma %*% xi)
30 |     beta <- beta - gamma %*% xi %*% (t(xi) %*% beta - yi)
31 |     res[,idx] <- beta
32 |   }
33 |   res
34 | }
35 | 
36 | 
37 | #' Enable pseudobulking and directly calculate the contrast
38 | #'
39 | #' @rdname recursive_least_squares
40 | bulked_recursive_least_squares_contrast <- function(y, X, group, contrast, ridge_penalty = 1e-6){
41 |   stopifnot(length(y) == nrow(X))
42 |   stopifnot(length(y) == length(group))
43 |   if(! is.matrix(contrast)){
44 |     contrast <- matrix(contrast, nrow = 1)
45 |   }
46 |   stopifnot(nrow(contrast) == 1)
47 |   stopifnot(ncol(contrast) == ncol(X))
48 | 
49 |   n <- length(y)
50 |   k <- ncol(X)
51 |   g <- max(group)
52 | 
53 |   m <- rep(0, g)
54 |   count <- rep(0, g)
55 |   X_act <- matrix(0, nrow = g, ncol = k)
56 | 
57 |   res <- matrix(NA, nrow = k, ncol = n)
58 |   t_stat <- rep(NA, n)
59 |   gamma <- diag(1/ridge_penalty, nrow = k)
60 |   beta <- rep(0, k)
61 |   rss <- 0
62 |   n_obs <- 0
63 | 
64 |   for(idx in seq(1, n)){
65 |     yi <- y[idx]
66 |     xi <- t(X[idx,,drop=FALSE])
67 |     gi <- group[idx]
68 | 
69 |     # Alternative formula for mu: m[gi] <- (m[gi] * count[gi] + yi) / (count[gi] + 1)
70 |     delta_m <- 1/(count[gi] + 1) * yi - (1 - count[gi] / (count[gi] + 1)) * m[gi]
71 |     m[gi] <- m[gi] + delta_m
72 |     count[gi] <- count[gi] + 1
73 | 
74 |     if(count[gi] == 1){
75 |       X_act[gi,] <- xi
76 |       n_obs <- n_obs + 1L
77 |       gamma <- gamma - (gamma %*% xi %*% t(xi) %*% gamma) / c(1 + t(xi) %*% gamma %*% xi)
78 |       # Below is a more efficient version of: beta <- gamma %*% t(X_act) %*% m
79 |       beta <- beta + gamma %*% xi %*% (m[gi] - t(xi) %*% beta)
80 |     }else{
81 |       beta <- beta + gamma %*% (xi * delta_m)
82 |     }
83 |     # I can't find a recursive way to calculate the residual sum of squares
84 |     rss <- max(1e-6, sum((m - X_act %*% beta)^2))
85 |     # Avoid zero or negative numbers
86 |     covar <- rss / max(1e-8, n_obs - k) * gamma
87 |     se_sq <- contrast %*% covar %*% t(contrast)
88 |     if(se_sq > 0){
89 |       t_stat[idx] <- sum(drop(contrast) * beta) / sqrt(se_sq)
90 |     }
91 |     res[,idx] <- beta
92 |   }
93 |   list(coef = res, t_stat = t_stat)
94 | }
95 | 
96 | 
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/R/ridge_regression.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Ridge regression
 3 | #'
 4 | #' The function does not treat the intercept special.
 5 | #'
 6 | #' @param Y the observations matrix (`features x samples`)
 7 | #' @param X the design matrix (`samples x covariates`)
 8 | #' @param ridge_penalty a numeric vector or matrix of size (`covariates` or
 9 | #'   `covariates x covariates` respectively)
10 | #' @param weights a vector of observation weights
11 | #'
12 | #' @returns The matrix of coefficients.
13 | #'
14 | #' @keywords internal
15 | ridge_regression <- function(Y, X, ridge_penalty = 0, weights = rep(1, nrow(X))){
16 |   stopifnot(length(weights) == nrow(X))
17 |   if(! is.matrix(ridge_penalty)){
18 |     stopifnot(length(ridge_penalty) == 1 || length(ridge_penalty) == ncol(X))
19 |     ridge_penalty <- diag(ridge_penalty, nrow = ncol(X))
20 |   }
21 |   ridge_penalty_sq <- sqrt(sum(weights)) * (t(ridge_penalty) %*% ridge_penalty)
22 |   weights_sqrt <- sqrt(weights)
23 | 
24 |   X_extended <- rbind(X * weights_sqrt, ridge_penalty_sq)
25 |   Y_extended <- cbind(t(t(Y) * weights_sqrt), matrix(0, nrow = nrow(Y), ncol = ncol(X)))
26 |   qr <- qr(X_extended)
27 |   res <- t(solve(qr, t(Y_extended)))
28 |   colnames(res) <- colnames(X)
29 |   res
30 | }
31 | 


--------------------------------------------------------------------------------
/R/test_de.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' Predict log fold changes between conditions for each cell
  3 | #'
  4 | #' @param fit the result of calling [`lemur()`]
  5 | #' @param contrast Specification of the contrast: a call to `cond()` specifying a full observation
  6 | #'    (e.g. `cond(treatment = "A", sex = "male") - cond(treatment = "C", sex = "male")` to
  7 | #'    compare treatment A vs C for male observations). Unspecified factors default to the reference level.
  8 | #' @param embedding matrix of size `n_embedding` \eqn{\times} `n` that specifies where in the latent space
  9 | #'   the differential expression is tested. It defaults to the position of all cells from the original fit.
 10 | #' @param consider specify which part of the model are considered for the differential expression test.
 11 | #' @param new_assay_name the name of the assay added to the `fit` object. Default: `"DE"`.
 12 | #'
 13 | #' @returns If `is.null(embedding)` the `fit` object with a new assay called `"DE"`. Otherwise
 14 | #'  return a matrix with the differential expression values.
 15 | #'
 16 | #' @seealso [find_de_neighborhoods]
 17 | #'
 18 | #' @examples
 19 | #' library(SummarizedExperiment)
 20 | #' library(SingleCellExperiment)
 21 | #'
 22 | #' data(glioblastoma_example_data)
 23 | #' fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition,
 24 | #'              n_emb = 5, verbose = FALSE)
 25 | #' # Optional alignment
 26 | #' # fit <- align_harmony(fit)
 27 | #' fit <- test_de(fit, contrast = cond(condition = "panobinostat") - cond(condition = "ctrl"))
 28 | #'
 29 | #' # The fit object contains a new assay called "DE"
 30 | #' assayNames(fit)
 31 | #'
 32 | #' # The DE assay captures differences between conditions
 33 | #' is_ctrl_cond <- fit$colData$condition == "ctrl"
 34 | #' mean(logcounts(fit)[1,!is_ctrl_cond]) - mean(logcounts(fit)[1,is_ctrl_cond])
 35 | #' mean(assay(fit, "DE")[1,])
 36 | #'
 37 | #' @export
 38 | test_de <- function(fit,
 39 |                     contrast,
 40 |                     embedding = NULL,
 41 |                     consider = c("embedding+linear", "embedding", "linear"),
 42 |                     new_assay_name = "DE"){
 43 |   if(is.null(embedding)){
 44 |     embedding <- fit$embedding
 45 |     use_provided_diff_emb <- FALSE
 46 |   }else{
 47 |     use_provided_diff_emb <- TRUE
 48 |   }
 49 | 
 50 |   consider <- match.arg(consider)
 51 |   with_lm <- consider == "embedding+linear" || consider == "linear"
 52 |   with_emb <- consider == "embedding+linear" || consider == "embedding"
 53 | 
 54 |   cntrst <- parse_contrast({{contrast}}, formula = fit$design)
 55 |   al_cntrst <- parse_contrast({{contrast}}, formula = fit$alignment_design)
 56 |   diff <- evaluate_contrast_tree(cntrst, al_cntrst, \(x, y){
 57 |     predict(fit, newdesign = x, alignment_design_matrix = y, embedding = embedding, with_linear_model = with_lm, with_embedding = with_emb)
 58 |   })
 59 | 
 60 |   colnames(diff) <- colnames(embedding)
 61 |   rownames(diff) <- rownames(fit)
 62 |   if(use_provided_diff_emb){
 63 |     diff
 64 |   }else{
 65 |     assay(fit, new_assay_name) <- diff
 66 |     metadata(fit)[["contrast"]] <- cntrst
 67 | 
 68 |     fit
 69 |   }
 70 | }
 71 | 
 72 | 
 73 | #' Differential embedding for each condition
 74 | #'
 75 | #' @inheritParams test_de
 76 | #' @param reduced_design an alternative specification of the null hypothesis.
 77 | #' @param consider specify which part of the model are considered for the differential expression test.
 78 | #' @param variance_est How or if the variance should be estimated. `'analytical'` is only compatible with `consider = "linear"`. `'resampling'` is the most flexible (to adapt the number
 79 | #'   of resampling iterations, set `n_resampling_iter`. Default: `100`)
 80 | #' @param verbose should the method print information during the fitting. Default: `TRUE`.
 81 | #' @param ... additional arguments.
 82 | #'
 83 | #' @return a data.frame
 84 | #'
 85 | test_global <- function(fit,
 86 |                         contrast,
 87 |                         reduced_design = NULL,
 88 |                         consider = c("embedding+linear", "embedding", "linear"),
 89 |                         variance_est = c("analytical", "resampling", "none"), verbose = TRUE,
 90 |                         ...){
 91 | 
 92 | 
 93 |   variance_est <- match.arg(variance_est)
 94 |   full_design <- fit$design_matrix
 95 |   consider <- match.arg(consider)
 96 |   with_lm <- consider == "embedding+linear" || consider == "linear"
 97 |   with_emb <- consider == "embedding+linear" || consider == "embedding"
 98 | 
 99 |   # Implement with a likelihood ratio test (see glmGamPoi)
100 |   if(is.null(reduced_design) == missing(contrast)){
101 |     stop("Please provide either an alternative design (formula or matrix) or a contrast.")
102 |   }else if(! missing(contrast)){
103 |     cntrst <- parse_contrast({{contrast}}, formula = fit$design)
104 |     if(inherits(cntrst, "contrast_relation") && cntrst$relation == "minus" &&
105 |         inherits(cntrst$lhs, "model_vec") && inherits(cntrst$rhs, "model_vec")){
106 |       lfc_diffemb <- grassmann_log(grassmann_map(sum_tangent_vectors(fit$coefficients, c(cntrst$lhs)), fit$base_point),
107 |                                    grassmann_map(sum_tangent_vectors(fit$coefficients, c(cntrst$rhs)), fit$base_point))
108 |       cntrst <- cntrst$lhs - cntrst$rhs
109 |     }else{
110 |       cntrst <- evaluate_contrast_tree(cntrst, cntrst, \(x, .) x) # Collapse tree
111 |       lfc_diffemb <- sum_tangent_vectors(fit$coefficients, c(cntrst))
112 |     }
113 | 
114 |     cntrst <- as.matrix(cntrst)
115 |     if(nrow(cntrst) != ncol(full_design)){
116 |       stop("The length of the contrast vector does not match the number of coefficients in the model (",
117 |            ncol(full_design), ")\n", glmGamPoi:::format_matrix(cntrst))
118 |     }
119 |     # The modifying matrix of reduced_design has ncol(design_matrix) - 1 columns and rank.
120 |     # The columns are all orthogonal to cntrst.
121 |     # see: https://scicomp.stackexchange.com/a/27835/36204
122 |     # Think about this as a rotation of of the design matrix. The QR decomposition approach
123 |     # has the added benefit that the new columns are all orthogonal to each other, which
124 |     # isn't necessary, but makes fitting more robust
125 |     # The following is a simplified version of edgeR's glmLRT (line 159 in glmfit.R)
126 |     qrc <- qr(cntrst)
127 |     rot <- qr.Q(qrc, complete = TRUE)[,-1,drop=FALSE]
128 |     reduced_design_mat <- full_design %*% rot
129 | 
130 |     lfc_linear_model <- fit$linear_coefficients %*% cntrst
131 |   }else{
132 |     reduced_design_mat <- handle_design_parameter(reduced_design, fit, fit$colData)$design_matrix
133 |     if(ncol(reduced_design_mat) >= ncol(full_design)){
134 |       stop("The reduced model is as complex (or even more complex) than ",
135 |            "the 'fit' model. The 'reduced_design' should contain fewer terms ",
136 |            "the original 'design'.")
137 |     }
138 |     rot <- matrix(lm.fit(full_design, reduced_design_mat)$coefficients, ncol = ncol(reduced_design_mat))
139 |     if(any(abs(reduced_design_mat - full_design %*% rot) > 1e-10)){
140 |       warning("Although the 'reduced_design' matrix has fewer columns than ",
141 |               "'fit$design_matrix', it appears that the 'reduced_design' is not ",
142 |               "nested in the 'fit$design_matrix'. Accordingly, the results of the ",
143 |               "statistical test will be unreliable.")
144 |     }
145 |   }
146 | 
147 | 
148 | 
149 |   if(variance_est == "analytical"){
150 |     if(with_emb){
151 |       stop("Analytical differential embedding test is not implemented. You can set 'consider=\"linear\"")
152 |     }else{  # only linear test
153 |       resid_full <- as.matrix(residuals(fit, with_linear_model = TRUE, with_embedding = FALSE))
154 |       resid_red <- as.matrix(get_residuals_for_alt_fit(fit, reduced_design_mat = reduced_design_mat, with_linear_model = TRUE, with_embedding = FALSE))
155 |       pval <- multivar_wilks_ftest(RSS_full = resid_full %*% t(resid_full),
156 |                                    RSS_red = resid_red %*% t(resid_red),
157 |                                    n_features = nrow(fit), full_design, reduced_design_mat)
158 |     }
159 |   }else if(variance_est == "resampling"){
160 |     if("n_resampling_iter" %in% ...names()){
161 |       n_resampling_iter <- list(...)[["n_resampling_iter"]]
162 |     }else{
163 |       n_resampling_iter <- 99
164 |     }
165 |     if(verbose) message("Estimating null distribution of deviance using ", n_resampling_iter, " iterations.")
166 |     # Applying the Freedman-Lane (1983) permutation method of the residuals
167 |     # Fit the full
168 |     deviance_ref <- sum(residuals(fit, with_linear_model = with_lm, with_embedding = with_emb)^2)
169 |     # Fit the reduced model
170 |     resid_red <- get_residuals_for_alt_fit(fit, reduced_design_mat = reduced_design_mat, with_linear_model = with_lm, with_embedding = with_emb)
171 |     predict_red <- assay(fit, fit$use_assay) - resid_red
172 |     deviance_red <- sum(resid_red^2)
173 |     deviance_delta_null <- vapply(seq_len(n_resampling_iter), \(iter){
174 |       new_Y <- predict_red + resid_red[,sample.int(ncol(resid_red), replace = FALSE),drop=FALSE]
175 |       deviance_ref_new <- sum(get_residuals_for_alt_fit(fit, Y = new_Y, reduced_design_mat = full_design, with_linear_model = with_lm, with_embedding = with_emb)^2)
176 |       deviance_red_new <- sum(get_residuals_for_alt_fit(fit, Y = new_Y, reduced_design_mat = reduced_design_mat, with_linear_model = with_lm, with_embedding = with_emb)^2)
177 |       deviance_red_new - deviance_ref_new
178 |     }, FUN.VALUE = numeric(1L))
179 |     pval <- (sum((deviance_red - deviance_ref) < deviance_delta_null) + 1) / (n_resampling_iter + 1)
180 |   }else{ # variance_est == "none"
181 |     pval <- NA
182 |   }
183 | 
184 | 
185 |   if(! missing(contrast)){
186 |     data.frame(contrast = rlang::as_label(rlang::enquo(contrast)),
187 |                pval = pval,
188 |                delta_diffemb = I(list(lfc_diffemb)),
189 |                delta_linear = I(list(lfc_linear_model)),
190 |                angle_degrees = grassmann_angle_from_tangent(lfc_diffemb, normalized = TRUE))
191 |   }else{
192 |     data.frame(full_design = if(! is.null(fit$design)) rlang::as_label(fit$design) else rlang::as_label(fit$design_matrix),
193 |                reduced_design = rlang::as_label(reduced_design),
194 |                pval = pval)
195 |   }
196 | }
197 | 
198 | 
199 | multivar_wilks_ftest <- function(RSS_full, RSS_red, n_features, design_matrix_full, design_matrix_red){
200 |   # Following https://socialsciences.mcmaster.ca/jfox/Books/Companion/appendices/Appendix-Multivariate-Linear-Models.pdf
201 |   stopifnot(nrow(design_matrix_full) == nrow(design_matrix_red))
202 |   k1 <- ncol(design_matrix_full)
203 |   k2 <- ncol(design_matrix_red)
204 | 
205 |   lambdas <- Re(eigen((RSS_red - RSS_full) %*% solve(RSS_full))$values)
206 |   wilks_lambda <- prod(1/(1 + lambdas))
207 |   df <- k1 - k2
208 |   r <- nrow(design_matrix_full) - k1 - 1 - (n_features - df + 1) / 2
209 |   u <- (n_features * df - 2) / 4
210 |   t <- ifelse(n_features^2 + df^2 - 5 <= 0, 0, sqrt(n_features^2 * df^2 - 4) / (n_features^2 + df^2 - 5))
211 |   fstat <- (1 - wilks_lambda^(1/t)) / wilks_lambda^(1/t) * (r * t - 2 * u) / (n_features * df)
212 |   pf(fstat, df1 = n_features * df, df2 = r * t - 2 * u, lower.tail = FALSE)
213 | }
214 | 
215 | 


--------------------------------------------------------------------------------
/R/util.R:
--------------------------------------------------------------------------------
  1 | randn <- function(n, m, ...){
  2 |   matrix(rnorm(n * m, ...), nrow = n, ncol = m)
  3 | }
  4 | 
  5 | skew <- function(M){
  6 |   0.5 * (M - t(M))
  7 | }
  8 | 
  9 | sym <- function(M){
 10 |   0.5 * (M + t(M))
 11 | }
 12 | 
 13 | `%update_values%` <- function(x, y){
 14 |   if(is.null(x) && is.null(y)){
 15 |     NULL
 16 |   }else if(is.null(x)){
 17 |     y
 18 |   }else if(is.null(y)){
 19 |     x
 20 |   }else{
 21 |     for(n in names(y)){
 22 |       x[[n]] <- y[[n]]
 23 |     }
 24 |     x
 25 |   }
 26 | }
 27 | 
 28 | `%default_to%` <- function(x, y){
 29 |   if(is.null(x) && is.null(y)){
 30 |     NULL
 31 |   }else if(is.null(x)){
 32 |     y
 33 |   }else if(is.null(y)){
 34 |     x
 35 |   }else{
 36 |     for(n in setdiff(names(y), names(x))){
 37 |       x[[n]] <- y[[n]]
 38 |     }
 39 |     x
 40 |   }
 41 | }
 42 | 
 43 | 
 44 | 
 45 | #' Iterating function that returns a matrix
 46 | #'
 47 | #' The length of `x` determines the number of rows. The length of
 48 | #' `FUN(x[i])` determines the number of columns. Must match `ncol`.
 49 | #'
 50 | #' @param x the sequence that is mapped to a matrix
 51 | #' @param FUN the function that returns a vector of length `ncol`
 52 | #' @param ncol the length of the output vector
 53 | #' @param ... additional arguments that are passed to `FUN`
 54 | #'
 55 | #' @returns A matrix with `length(x)` / `nrow(x)` rows and `ncol` columns.
 56 | #'   For `msply_dbl` the number of columns depends on the output of `FUN`.
 57 | #'
 58 | #' @keywords internal
 59 | mply_dbl <- function(x, FUN, ncol=1, ...){
 60 |   if(!is.matrix(x)){
 61 |     res <- vapply(x, FUN, FUN.VALUE=rep(0.0, times=ncol), ...)
 62 |   }else{
 63 |     res <- apply(x, 1, FUN, ...) * 1.0
 64 |     if(nrow(x) > 0 && length(res) == 0){
 65 |       # Empty result, make matrix
 66 |       res <- matrix(numeric(0),nrow=0, ncol=nrow(x))
 67 |     }else if(nrow(x) == 0){
 68 |       res <- matrix(numeric(0), nrow=ncol, ncol=0)
 69 |     }
 70 |     if((ncol == 1 && ! is.vector(res)) || (ncol > 1 && nrow(res) != ncol)){
 71 |       stop("values must be length ", ncol, ", but result is length ", nrow(res))
 72 |     }
 73 |   }
 74 | 
 75 |   if(ncol == 1){
 76 |     as.matrix(res, nrow=length(res), ncol=1)
 77 |   }else{
 78 |     t(res)
 79 |   }
 80 | }
 81 | 
 82 | 
 83 | 
 84 | #'
 85 | #' @describeIn mply_dbl Each list element becomes a row in a matrix
 86 | stack_rows <- function(x){
 87 |   stopifnot(is.list(x))
 88 |   do.call(rbind, x)
 89 | }
 90 | 
 91 | #'
 92 | #' @describeIn mply_dbl Each list element becomes a row in a matrix
 93 | stack_cols <- function(x){
 94 |   stopifnot(is.list(x))
 95 |   do.call(cbind, x)
 96 | }
 97 | 
 98 | #' Make a cube from a list of matrices
 99 | #'
100 | #' The length of the list will become the third dimension of the cube.
101 | #'
102 | #' @param x a list of vectors/matrices that are stacked
103 | #'
104 | #' @returns A three-dimensional array.
105 | #'
106 | #' @keywords internal
107 | stack_slice <- function(x){
108 |   stopifnot(is.list(x))
109 |   x <- lapply(x, as.matrix)
110 |   if(length(x) == 0){
111 |     array(dim = c(0, 0, 0))
112 |   }else{
113 |     dim <- dim(x[[1]])
114 |     res <- array(NA, dim = c(dim, length(x)))
115 |     for(idx in seq_along(x)){
116 |       elem <- x[[idx]]
117 |       if(nrow(elem) != dim[1] || ncol(elem) != dim[2]){
118 |         stop("Size doesn't match")
119 |       }
120 |       res[,,idx] <- elem
121 |     }
122 |     res
123 |   }
124 | }
125 | 
126 | #' @describeIn stack_slice Make a list of matrices from a cube
127 | #'
128 | destack_slice <- function(x){
129 |   stopifnot(is.array(x))
130 |   stopifnot(length(dim(x)) == 3)
131 |   lapply(seq_len(dim(x)[3]), \(idx) x[,,idx])
132 | }
133 | 
134 | 
135 | duplicate_rows <- function(m, times, each){
136 |   ncols <- if(is.matrix(m)) ncol(m) else length(m)
137 |   if(missing(times) && missing(each)){
138 |     do.call(rbind, list(m))
139 |   }else if(! missing(times)){
140 |     if(times == 0){
141 |       matrix(nrow = 0, ncol = ncols)
142 |     }else{
143 |       do.call(rbind, lapply(seq_len(times), \(i) m))
144 |     }
145 |   }else if(! missing(each)){
146 |     if(each == 0){
147 |       matrix(nrow = 0, ncol = ncols)
148 |     }else{
149 |       matrix(rep(m, each = each), nrow = each  * nrow(m), ncol = ncol(m))
150 |     }
151 |   }else{
152 |     stop("Specify either 'times' or 'each'")
153 |   }
154 | }
155 | 
156 | duplicate_cols <- function(m, times, each){
157 |   t(duplicate_rows(t(m), times = times, each = each))
158 | }
159 | 
160 | 
161 | normalize_vec_length <- function(x){
162 |   vec_lens <- sqrt(colSums(x^2))
163 |   t(t(x) / vec_lens)
164 | }
165 | 
166 | #' Fold left over a sequence
167 | #'
168 | #' @param init initial value. If not specified `NULL`
169 | #' @param x the sequence to iterate over
170 | #' @param FUN a function with first argument named `elem` and second argument
171 | #'   named `accum`
172 | #'
173 | #'
174 | #' @examples
175 | #'   \dontrun{
176 | #'     # This produces ...
177 | #'     fold_left(0)(1:10, \(elem, accum) accum + elem)
178 | #'     # ... the same as
179 | #'     sum(1:10)
180 | #'   }
181 | #'
182 | #' @returns The final value of `accum`.
183 | #'
184 | #' @keywords internal
185 | fold_left <- function(init){
186 |   if(missing(init)){
187 |     init <- NULL
188 |   }
189 |   function(x, FUN){
190 |     val <- init
191 |     for(elem in x){
192 |       val <- FUN(elem = elem, accum = val)
193 |     }
194 |     val
195 |   }
196 | }
197 | 
198 | #' Fold right over a sequence
199 | #' @rdname fold_left
200 | fold_right<- function(init){
201 |   if(missing(init)){
202 |     init <- NULL
203 |   }
204 |   function(x, FUN){
205 |     val <- init
206 |     for(elem in rev(x)){
207 |       val <- FUN(elem = elem, accum = val)
208 |     }
209 |     val
210 |   }
211 | }
212 | 
213 | 
214 | 
215 | resample <- function(size, cluster = NULL){
216 |   if(is.null(cluster)){
217 |     sample.int(size, replace = TRUE)
218 |   }else{
219 |     # For a description see "Using Cluster Bootstrapping to Analyze Nested Data With a Few Clusters"
220 |     # by Huang (2018)
221 |     stopifnot(length(cluster) == size)
222 |     indices <- seq_len(size)
223 |     cluster_levels <- unique(cluster)
224 |     resamp <- sample(cluster_levels, size = length(cluster_levels), replace = TRUE)
225 |     unlist(lapply(resamp, \(lvl) indices[cluster == lvl]))
226 |   }
227 | }
228 | 
229 | matrix_equals <- function(m1, m2){
230 |   all(dim(m1) == dim(m2)) && all(m1 == m2)
231 | }
232 | 
233 | 
234 | seq_excl <- function(start, end){
235 |   if(start >= end){
236 |     integer(0L)
237 |   }else{
238 |     seq(start + 1L, end)
239 |   }
240 | }
241 | 
242 | which_extreme <- function(x, ignore = NULL){
243 |   if(is.null(ignore)){
244 |     which.max(abs(x))
245 |   }else{
246 |     stopifnot(length(ignore) == length(x))
247 |     extreme_idx <- NA_integer_
248 |     max <- -Inf
249 |     for(idx in seq_along(x)){
250 |       if(! ignore[idx] && abs(x[idx]) > max){
251 |         extreme_idx <- idx
252 |         max <- abs(x[idx])
253 |       }
254 |     }
255 |     extreme_idx
256 |   }
257 | }
258 | 
259 | 
260 | aggregate_matrix <- function(mat, group_split, aggr_fnc, col_sel = TRUE, ...){
261 |   group_split_lgl <- lapply(group_split, \(idx){
262 |     lgl <- rep(FALSE, ncol(mat))
263 |     lgl[idx] <- TRUE
264 |     lgl
265 |   })
266 |   if(all(col_sel == TRUE)){
267 |     new_data_mat <- t(mply_dbl(group_split_lgl, \(split_sel){
268 |       aggr_fnc(mat, cols = split_sel, ...)
269 |     }, ncol = nrow(mat)))
270 |   }else{
271 |     if(! is.logical(col_sel) && length(col_sel) == ncol(mat)){
272 |       stop("Illegal 'col_sel' argument")
273 |     }
274 |     new_data_mat <- t(mply_dbl(group_split_lgl, \(split_sel){
275 |       aggr_fnc(mat, cols = split_sel & col_sel, ...)
276 |     }, ncol = nrow(mat)))
277 |   }
278 |   rownames(new_data_mat) <- rownames(mat)
279 |   new_data_mat
280 | }
281 | 
282 | 
283 | limma_eBayes_without_shrinkage <- function(lm_fit){
284 |   lm_fit$t <- lm_fit$coefficients / lm_fit$stdev.unscaled / lm_fit$sigma
285 |   lm_fit$p.value <- 2 * pt(-abs(lm_fit$t), df = lm_fit$df.residual)
286 |   lm_fit
287 | }
288 | 
289 | 
290 | nullspace <- function(X){
291 |   dim <- nrow(X)
292 |   n_obs <- ncol(X)
293 |   if(dim == 0){
294 |     return(matrix(nrow = dim, ncol = 0))
295 |   }else if(n_obs == 0){
296 |     diag(nrow = dim)
297 |   }
298 | 
299 |   qrX <- qr(X)
300 |   rank <- qrX$rank
301 |   qr.Q(qrX, complete = TRUE)[,seq_excl(rank, dim),drop=FALSE]
302 | }
303 | 
304 | is_contrast_estimable <- function(contrast, design_matrix, tol = sqrt(.Machine$double.eps)){
305 |   # The algorithm is inspired by 'lmerTest::is_estimable()'.
306 |   ns <- nullspace(t(design_matrix))
307 |   if(ncol(ns) == 0){
308 |     return(TRUE)
309 |   }
310 |   abs(sum(c(contrast) %*% ns)) < tol
311 | }
312 | 
313 | #' Moore-Penrose pseudoinverse calculated via SVD
314 | #'
315 | #' In the simplest case, the pseudoinverse is
316 | #' \deqn{X^{+} = (X^T X)^{-1} X^T.}
317 | #'
318 | #' To handle the more general case, the pseudoinverse can expressed using a SVD
319 | #' \eqn{X = U D V^T}:
320 | #' \deqn{X^{+} = V D^{-1} U^T}
321 | #'
322 | #' @param X a matrix X
323 | #'
324 | #' @returns The matrix \eqn{X^{+}}.
325 | #'
326 | #' @keywords internal
327 | pseudoinverse <- function(X){
328 |   # Moore-Penrose inverse via SVD (https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_inverse#Singular_value_decomposition_(SVD))
329 |   # See also MASS::ginv or pracma::pinv
330 |   tol <- sqrt(.Machine$double.eps)
331 |   svd <- svd(X)
332 |   not_null <- svd$d > max(tol * svd$d[1L], 0)
333 |   if(all(not_null)){
334 |     with(svd, v %*% (1/d * t(u)))
335 |   }else if(all(! not_null)){
336 |     matrix(0, nrow = ncol(X), ncol = nrow(X))
337 |   }else{
338 |     with(svd, v[,not_null,drop=FALSE] %*% (1/d[not_null] * t(u[,not_null,drop=FALSE])))
339 |   }
340 | }
341 | 
342 | 
343 | 
344 | 
345 | #' Helper function that makes sure that NA * 0 = 0 in matrix multiply
346 | #'
347 | #' @param X a matrix of size `n*m`
348 | #' @param Y a matrix of size `m*p`
349 | #'
350 | #' @return a matrix of size `n*p`
351 | #'
352 | #' @keywords internal
353 | `%zero_dom_mat_mult%` <- function(X, Y){
354 |   X[is.infinite(X)] <- NA
355 |   Y[is.infinite(Y)] <- NA
356 |   X_cp <- X
357 |   X_cp[is.na(X_cp)] <- 0
358 |   Y_cp <- Y
359 |   Y_cp[is.na(Y_cp)] <- 0
360 | 
361 |   res <- X_cp %*% Y_cp
362 |   mask1 <- (is.na(X)) %*% (is.na(Y) | Y != 0)
363 |   mask2 <- (is.na(X) | X != 0) %*% (is.na(Y))
364 |   res[mask1 + mask2 != 0] <- NA
365 |   res
366 | }
367 | 
368 | 
369 | 
370 | as_dgTMatrix <- function(x){
371 |   if(utils::packageVersion("Matrix") >= "1.4.2"){
372 |     # See email from Martin Maechler from 2022-08-12
373 |     as(as(as(x, "dMatrix"), "generalMatrix"), "TsparseMatrix")
374 |   }else{
375 |     # This approach is deprecated since 1.4.2 and triggers warnings
376 |     as(x, "dgTMatrix")
377 |   }
378 | }
379 | 
380 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | 
 5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 6 | 
 7 | ```{r setup, include = FALSE}
 8 | if(! exists("..options_set") || isFALSE(..options_set)){
 9 |   knitr::opts_chunk$set(
10 |     collapse = TRUE,
11 |     comment = "#>",
12 |     fig.path = "man/figures/README-",
13 |     out.width = "80%",
14 |     fig.asp = 0.5,
15 |     fig.align = "center",
16 |     dpi = 300
17 |   )
18 |   ..options_set <- TRUE
19 | }
20 | ```
21 | 
22 | ```{r child = "vignettes/Introduction.qmd"}
23 | 
24 | ```
25 | 
26 | 


--------------------------------------------------------------------------------
/data/glioblastoma_example_data.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/data/glioblastoma_example_data.rda


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
1 | bibentry(
2 |   bibtype  = "Article",
3 |   title    = "Analysis of multi-condition single-cell data with latent embedding multivariate regression",
4 |   author   = "Constantin Ahlmann-Eltze and Wolfgang Huber",
5 |   journal  = "bioRxiv",
6 |   year     = 2023,
7 |   doi      = "10.1101/2023.03.06.531268"
8 | )
9 | 


--------------------------------------------------------------------------------
/man/align_harmony.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/align.R
 3 | \name{align_harmony}
 4 | \alias{align_harmony}
 5 | \alias{align_by_grouping}
 6 | \title{Enforce additional alignment of cell clusters beyond the direct differential embedding}
 7 | \usage{
 8 | align_harmony(
 9 |   fit,
10 |   design = fit$alignment_design,
11 |   ridge_penalty = 0.01,
12 |   max_iter = 10,
13 |   ...,
14 |   verbose = TRUE
15 | )
16 | 
17 | align_by_grouping(
18 |   fit,
19 |   grouping,
20 |   design = fit$alignment_design,
21 |   ridge_penalty = 0.01,
22 |   preserve_position_of_NAs = FALSE,
23 |   verbose = TRUE
24 | )
25 | }
26 | \arguments{
27 | \item{fit}{a \code{lemur_fit} object}
28 | 
29 | \item{design}{a specification of the design (matrix or formula) that is used
30 | for the transformation. Default: \code{fit$design_matrix}}
31 | 
32 | \item{ridge_penalty}{specification how much the flexibility of the transformation
33 | should be regularized. Default: \code{0.01}}
34 | 
35 | \item{max_iter}{argument specific for \code{align_harmony}. The number of iterations. Default: \code{10}}
36 | 
37 | \item{...}{additional parameters that are passed on to relevant functions}
38 | 
39 | \item{verbose}{Should the method print information during the fitting. Default: \code{TRUE}.}
40 | 
41 | \item{grouping}{argument specific for \code{align_by_grouping}. Either a vector which assigns
42 | each cell to one group or a matrix with \code{ncol(fit)} columns where the rows are a soft-assignment
43 | to a cluster (i.e., columns sum to \code{1}). \code{NA}'s are allowed.}
44 | 
45 | \item{preserve_position_of_NAs}{argument specific for \code{align_by_grouping}.
46 | Boolean flag to decide if \code{NA}s in the \code{grouping} mean that these cells should stay where they are (if
47 | possible) or if they are free to move around. Default: \code{FALSE}}
48 | }
49 | \value{
50 | The \code{fit} object with the updated \code{fit$embedding} and \code{fit$alignment_coefficients}.
51 | }
52 | \description{
53 | Enforce additional alignment of cell clusters beyond the direct differential embedding
54 | }
55 | \examples{
56 | data(glioblastoma_example_data)
57 | fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition,
58 |              n_emb = 5, verbose = FALSE)
59 | # Creating some grouping for illustration
60 | cell_types <- sample(c("tumor cell", "neuron", "leukocyte"), size = ncol(fit), replace = TRUE)
61 | fit_al1 <- align_by_grouping(fit, grouping = cell_types)
62 | 
63 | # Alternatively, use harmony to automatically group cells
64 | fit_al2 <- align_harmony(fit)
65 | fit_al2
66 | 
67 | # The alignment coefficients are a 3D array
68 | fit_al2$alignment_coefficients
69 | 
70 | }
71 | 


--------------------------------------------------------------------------------
/man/align_impl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/align.R
 3 | \name{align_impl}
 4 | \alias{align_impl}
 5 | \title{Align the points according to some grouping}
 6 | \usage{
 7 | align_impl(
 8 |   embedding,
 9 |   grouping,
10 |   design_matrix,
11 |   ridge_penalty = 0.01,
12 |   preserve_position_of_NAs = FALSE,
13 |   calculate_new_embedding = TRUE
14 | )
15 | }
16 | \value{
17 | A list with the new embedding and the coefficients
18 | }
19 | \description{
20 | Align the points according to some grouping
21 | }
22 | \keyword{internal}
23 | 


--------------------------------------------------------------------------------
/man/cash-lemur_fit-method.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lemur_fit.R
 3 | \name{.DollarNames.lemur_fit}
 4 | \alias{.DollarNames.lemur_fit}
 5 | \alias{$,lemur_fit-method}
 6 | \alias{dollar_methods}
 7 | \alias{$<-,lemur_fit-method}
 8 | \title{Access values from a \code{lemur_fit}}
 9 | \usage{
10 | \method{.DollarNames}{lemur_fit}(x, pattern = "")
11 | 
12 | \S4method{$}{lemur_fit}(x, name)
13 | 
14 | \S4method{$}{lemur_fit}(x, name) <- value
15 | }
16 | \arguments{
17 | \item{x}{the \code{lemur_fit}}
18 | 
19 | \item{pattern}{the pattern from looking up potential values interactively}
20 | 
21 | \item{name}{the name of the value behind the dollar}
22 | 
23 | \item{value}{the replacement value. This only works for \code{colData} and
24 | \code{rowData}.}
25 | }
26 | \value{
27 | The respective value stored in the \code{lemur_fit} object.
28 | }
29 | \description{
30 | Access values from a \code{lemur_fit}
31 | }
32 | \seealso{
33 | \code{\linkS4class{lemur_fit}} for more documentation on the
34 | accessor functions.
35 | }
36 | 


--------------------------------------------------------------------------------
/man/figures/README-fig-Neighborhood_size_vs_significance-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/man/figures/README-fig-Neighborhood_size_vs_significance-1.png


--------------------------------------------------------------------------------
/man/figures/README-fig-lemur_umap-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/man/figures/README-fig-lemur_umap-1.png


--------------------------------------------------------------------------------
/man/figures/README-fig-raw_umap-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/man/figures/README-fig-raw_umap-1.png


--------------------------------------------------------------------------------
/man/figures/README-fig-tumor_cell_annotation1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/man/figures/README-fig-tumor_cell_annotation1-1.png


--------------------------------------------------------------------------------
/man/figures/README-fig-tumor_cell_annotation2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/man/figures/README-fig-tumor_cell_annotation2-1.png


--------------------------------------------------------------------------------
/man/figures/README-fig-tumor_de_neighborhood_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/man/figures/README-fig-tumor_de_neighborhood_plot-1.png


--------------------------------------------------------------------------------
/man/figures/README-fig-umap_de-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/man/figures/README-fig-umap_de-1.png


--------------------------------------------------------------------------------
/man/figures/README-fig-umap_de-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/man/figures/README-fig-umap_de-2.png


--------------------------------------------------------------------------------
/man/figures/README-fig-umap_de2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/man/figures/README-fig-umap_de2-1.png


--------------------------------------------------------------------------------
/man/figures/README-fig-umap_de3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/man/figures/README-fig-umap_de3-1.png


--------------------------------------------------------------------------------
/man/figures/README-fig-volcano_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/man/figures/README-fig-volcano_plot-1.png


--------------------------------------------------------------------------------
/man/figures/equation_schematic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/man/figures/equation_schematic.png


--------------------------------------------------------------------------------
/man/figures/lemur-art.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/man/figures/lemur-art.jpg


--------------------------------------------------------------------------------
/man/find_de_neighborhoods.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/find_de_neighborhoods.R
  3 | \name{find_de_neighborhoods}
  4 | \alias{find_de_neighborhoods}
  5 | \title{Find differential expression neighborhoods}
  6 | \usage{
  7 | find_de_neighborhoods(
  8 |   fit,
  9 |   group_by,
 10 |   contrast = fit$contrast,
 11 |   selection_procedure = c("zscore", "contrast"),
 12 |   directions = c("random", "contrast", "axis_parallel"),
 13 |   min_neighborhood_size = 50,
 14 |   de_mat = SummarizedExperiment::assays(fit)[["DE"]],
 15 |   test_data = fit$test_data,
 16 |   test_data_col_data = NULL,
 17 |   test_method = c("glmGamPoi", "edgeR", "limma", "none"),
 18 |   continuous_assay_name = fit$use_assay,
 19 |   count_assay_name = "counts",
 20 |   size_factor_method = NULL,
 21 |   design = fit$design,
 22 |   alignment_design = fit$alignment_design,
 23 |   add_diff_in_diff = TRUE,
 24 |   make_neighborhoods_consistent = FALSE,
 25 |   skip_confounded_neighborhoods = FALSE,
 26 |   control_parameters = NULL,
 27 |   verbose = TRUE
 28 | )
 29 | }
 30 | \arguments{
 31 | \item{fit}{the \code{lemur_fit} generated by \code{lemur()}}
 32 | 
 33 | \item{group_by}{If the \code{independent_matrix} is provided, \code{group_by} defines
 34 | how the pseudobulks are formed. This is typically the variable in the column
 35 | data that represents the independent unit of replication of the experiment
 36 | (e.g., the mouse or patient ID). The argument has to be wrapped in \code{vars(...)}.}
 37 | 
 38 | \item{contrast}{a specification which contrast to fit. This defaults to the
 39 | \code{contrast} argument that was used for \code{test_de} and is stored in \code{fit$contrast}.}
 40 | 
 41 | \item{selection_procedure}{specify the algorithm that is used to select the
 42 | neighborhoods for each gene. Broadly, \code{selection_procedure = "zscore"} is faster
 43 | but less precise than \code{selection_procedure = "contrast"}.}
 44 | 
 45 | \item{directions}{a string to define the algorithm to select the direction onto
 46 | which the cells are projected before searching for the neighborhood.
 47 | \code{directions = "random"} produces denser neighborhoods, whereas \code{directions = "contrast"}
 48 | has usually more power. \cr
 49 | Alternatively, this can also be a matrix with one direction for each gene
 50 | (i.e., a matrix of size \code{nrow(fit) * fit$n_embedding}).}
 51 | 
 52 | \item{min_neighborhood_size}{the minimum number of cells per neighborhood. Default: \code{50}.}
 53 | 
 54 | \item{de_mat}{the matrix with the differential expression values and is only relevant if
 55 | \code{selection_procedure = "zscore"} or \code{directions = "random"}. Defaults
 56 | to an assay called \code{"DE"} that is produced by \code{lemur::test_de()}.}
 57 | 
 58 | \item{test_data}{a \code{SummarizedExperiment} object or a named list of matrices. The
 59 | data is used to test if the neighborhood inferred on the training data contain a
 60 | reliable significant change. If \code{test_method} is \code{"glmGamPoi"} or \code{"edgeR"} a test
 61 | using raw counts is conducted and two matching assays are needed: (1) the continuous
 62 | assay (with \code{continuous_assay_name}) is projected onto the LEMUR fit to find the latent
 63 | position of each cell and (2) the count assay (\code{count_assay_name}) is used for
 64 | forming the pseudobulk. If \code{test_method == "limma"}, only the continuous assay is needed. \cr
 65 | The arguments defaults to the test data split of when calling \code{lemur()}.}
 66 | 
 67 | \item{test_data_col_data}{additional column data for the \code{test_data} argument.}
 68 | 
 69 | \item{test_method}{choice of test for the pseudobulked differential expression.
 70 | \href{https://bioconductor.org/packages/glmGamPoi/}{glmGamPoi} and
 71 | \href{https://bioconductor.org/packages/edgeR/}{edgeR} work on an count assay.
 72 | \href{http://bioconductor.org/packages/limma/}{limma} works on the continuous assay.}
 73 | 
 74 | \item{continuous_assay_name, count_assay_name}{the assay or list names of \code{independent_data}.}
 75 | 
 76 | \item{size_factor_method}{Set the procedure to calculate the size factor after pseudobulking. This argument
 77 | is only relevant if \code{test_method} is \code{"glmGamPoi"} or \code{"edgeR"}. If \code{fit} is subsetted, using a
 78 | vector with the sequencing depth per cell ensures reasonable results.
 79 | Default: \code{NULL} which means that \code{colSums(assay(fit$test_data, count_assay_name))} is used.}
 80 | 
 81 | \item{design, alignment_design}{the design to use for the fit. Default: \code{fit$design}}
 82 | 
 83 | \item{add_diff_in_diff}{a boolean to specify if the log-fold change (plus significance) of
 84 | the DE in the neighborhood against the DE in the complement of the neighborhood is calculated.
 85 | If \code{TRUE}, the result includes three additional columns starting with \code{"did_"} short for
 86 | difference-in-difference. Default: \code{TRUE}.}
 87 | 
 88 | \item{make_neighborhoods_consistent}{Include cells from outside the neighborhood if they are
 89 | at least 10 times in the k-nearest neighbors of the cells inside the neighborhood. Secondly,
 90 | remove cells from the neighborhood which are less than 10 times in the k-nearest neighbors of the
 91 | other cells in the neighborhood. Default \code{FALSE}}
 92 | 
 93 | \item{skip_confounded_neighborhoods}{Sometimes the inferred neighborhoods are not limited to
 94 | a single cell state; this becomes problematic if the cells of the conditions compared in the contrast
 95 | are unequally distributed between the cell states. Default: \code{FALSE}}
 96 | 
 97 | \item{control_parameters}{named list with additional parameters passed to underlying functions.}
 98 | 
 99 | \item{verbose}{Should the method print information during the fitting. Default: \code{TRUE}.}
100 | }
101 | \value{
102 | a data frame with one entry per gene
103 | \describe{
104 | \item{\code{name}}{The gene name.}
105 | \item{\code{neighborhood}}{A list column where each element is a vector with the cell names included
106 | in that neighborhood.}
107 | \item{\code{n_cells}}{the number of cells in the neighborhood (\code{lengths(neighborhood)}).}
108 | \item{\code{sel_statistic}}{The statistic that is maximized by the \code{selection_procedure}.}
109 | \item{\code{pval}, \code{adj_pval}, \code{t_statistic}, \code{lfc}}{The p-value, Benjamini-Hochberg adjusted p-value (FDR), the
110 | t-statistic, and the log2 fold change of the differential expression test defined by \code{contrast} for the
111 | cells inside the neighborhood (calculated using \code{test_method}). Only present if \code{test_data} is not \code{NULL}.}
112 | \item{\code{did_pval}, \code{did_adj_pval}, \code{did_lfc}}{The measurement if the differential expression of the cells
113 | inside the neighborhood is significantly different from the differential expression of the cells outside
114 | the neighborhood. Only present if \code{add_diff_in_diff = TRUE}.}
115 | }
116 | }
117 | \description{
118 | Find differential expression neighborhoods
119 | }
120 | \examples{
121 | data(glioblastoma_example_data)
122 | fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition,
123 |              n_emb = 5, verbose = FALSE)
124 | # Optional alignment
125 | # fit <- align_harmony(fit)
126 | fit <- test_de(fit, contrast = cond(condition = "panobinostat") - cond(condition = "ctrl"))
127 | nei <- find_de_neighborhoods(fit, group_by = vars(patient_id))
128 | head(nei)
129 | 
130 | }
131 | 


--------------------------------------------------------------------------------
/man/fold_left.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{fold_left}
 4 | \alias{fold_left}
 5 | \alias{fold_right}
 6 | \title{Fold left over a sequence}
 7 | \usage{
 8 | fold_left(init)
 9 | 
10 | fold_right(init)
11 | }
12 | \arguments{
13 | \item{init}{initial value. If not specified \code{NULL}}
14 | 
15 | \item{x}{the sequence to iterate over}
16 | 
17 | \item{FUN}{a function with first argument named \code{elem} and second argument
18 | named \code{accum}}
19 | }
20 | \value{
21 | The final value of \code{accum}.
22 | }
23 | \description{
24 | Fold left over a sequence
25 | 
26 | Fold right over a sequence
27 | }
28 | \examples{
29 |   \dontrun{
30 |     # This produces ...
31 |     fold_left(0)(1:10, \(elem, accum) accum + elem)
32 |     # ... the same as
33 |     sum(1:10)
34 |   }
35 | 
36 | }
37 | \keyword{internal}
38 | 


--------------------------------------------------------------------------------
/man/glioblastoma_example_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/glioblastoma_example_data.R
 3 | \name{glioblastoma_example_data}
 4 | \alias{glioblastoma_example_data}
 5 | \title{The \code{glioblastoma_example_data} dataset}
 6 | \value{
 7 | A \code{\link{SingleCellExperiment}} object.
 8 | }
 9 | \description{
10 | The dataset is a \code{\link{SingleCellExperiment}} object subset to 5,000 cells and
11 | 300 genes. The \code{colData} contain an entry for each cell from which patient
12 | it came and to which treatment condition it belonged (\code{"ctrl"} or \code{"panobinostat"}).
13 | }
14 | \details{
15 | The original data was collected by Zhao et al. (2021).
16 | }
17 | \references{
18 | \itemize{
19 | \item Zhao, Wenting, Athanassios Dovas, Eleonora Francesca Spinazzi, Hanna Mendes Levitin, Matei Alexandru Banu, Pavan Upadhyayula, Tejaswi Sudhakar, et al.
20 | “Deconvolution of Cell Type-Specific Drug Responses in Human Tumor Tissue with Single-Cell RNA-Seq.” Genome Medicine 13, no. 1
21 | (December 2021): 82. https://doi.org/10.1186/s13073-021-00894-y.
22 | }
23 | }
24 | 


--------------------------------------------------------------------------------
/man/grapes-zero_dom_mat_mult-grapes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{\%zero_dom_mat_mult\%}
 4 | \alias{\%zero_dom_mat_mult\%}
 5 | \title{Helper function that makes sure that NA * 0 = 0 in matrix multiply}
 6 | \usage{
 7 | X \%zero_dom_mat_mult\% Y
 8 | }
 9 | \arguments{
10 | \item{X}{a matrix of size \code{n*m}}
11 | 
12 | \item{Y}{a matrix of size \code{m*p}}
13 | }
14 | \value{
15 | a matrix of size \code{n*p}
16 | }
17 | \description{
18 | Helper function that makes sure that NA * 0 = 0 in matrix multiply
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/grassmann_geodesic_regression.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/geodesic_regression.R
 3 | \name{grassmann_geodesic_regression}
 4 | \alias{grassmann_geodesic_regression}
 5 | \title{Solve d(P, exp_p(V * x))^2 for V}
 6 | \usage{
 7 | grassmann_geodesic_regression(
 8 |   coordsystems,
 9 |   design,
10 |   base_point,
11 |   weights = 1,
12 |   tangent_regression = FALSE
13 | )
14 | }
15 | \value{
16 | A three-dimensional array with the coefficients \code{V}.
17 | }
18 | \description{
19 | Solve d(P, exp_p(V * x))^2 for V
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/man/grassmann_lm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/geodesic_regression.R
 3 | \name{grassmann_lm}
 4 | \alias{grassmann_lm}
 5 | \title{Solve ||Y - exp_p(V * x) Y ||^2_2 for V}
 6 | \usage{
 7 | grassmann_lm(data, design, base_point, tangent_regression = FALSE)
 8 | }
 9 | \value{
10 | A three-dimensional array with the coefficients \code{V}.
11 | }
12 | \description{
13 | Solve ||Y - exp_p(V * x) Y ||^2_2 for V
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/harmony_new_object.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/harmony_wrapper.R
 3 | \name{harmony_new_object}
 4 | \alias{harmony_new_object}
 5 | \title{Create an arbitrary Harmony object so that I can modify it later}
 6 | \usage{
 7 | harmony_new_object()
 8 | }
 9 | \value{
10 | The full \code{\link{harmony}} object (R6 reference class type).
11 | }
12 | \description{
13 | Create an arbitrary Harmony object so that I can modify it later
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/lemur.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lemur.R
 3 | \name{lemur}
 4 | \alias{lemur}
 5 | \title{Main function to fit the latent embedding multivariate regression (LEMUR) model}
 6 | \usage{
 7 | lemur(
 8 |   data,
 9 |   design = ~1,
10 |   col_data = NULL,
11 |   n_embedding = 15,
12 |   linear_coefficient_estimator = c("linear", "mean", "cluster_median", "zero"),
13 |   use_assay = "logcounts",
14 |   test_fraction = 0.2,
15 |   ...,
16 |   verbose = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{data}{a matrix with observations in the columns and features in the rows,
21 | or a \code{SummarizedExperiment} / \code{SingleCellExperiment} object}
22 | 
23 | \item{design}{a formula referring to global objects, 
24 |   \code{colData} (column annotations) of \code{data} if it is a \code{SummarizedExperiment} / \code{SingleCellExperiment} object,
25 |   or column names of the \code{col_data} argument}
26 | 
27 | \item{col_data}{an optional data frame with \code{ncol(data)} rows, with
28 |   annotations ("metadata") on the observations, i.e, on the columns of \code{data}.}
29 | 
30 | \item{n_embedding}{the dimension of the linear subspace (latent space).}
31 | 
32 | \item{linear_coefficient_estimator}{specify which estimator is used to center the conditions.
33 | \code{"linear"} runs simple regression. It works well in many circumstances, but can produce unsatisfactory
34 | results if the composition of the cell types changes drastically between conditions (e.g., one cell type
35 | disappears). \code{"mean"}, \code{"cluster_median"} and \code{"zero"} are alternative estimators, which
36 | are each supposed to be more robust against compositional changes, but cannot account
37 | for genes that change for all cells between conditions.
38 | \code{"linear"} is the default as it works best with subsequent alignment steps.}
39 | 
40 | \item{use_assay}{if \code{data} is a \code{SummarizedExperiment} / \code{SingleCellExperiment} object:
41 | which of its \code{assay} slots should be used?}
42 | 
43 | \item{test_fraction}{the fraction of cells (observations) that are set aside before the model fit, to keep an
44 | independent set of test observations. Alternatively, a logical vector of length \code{ncol(data)}.}
45 | 
46 | \item{...}{additional parameters that are passed on to the internal function \code{lemur_impl}.}
47 | 
48 | \item{verbose}{Should the method print information during the fitting.}
49 | }
50 | 
51 | \value{
52 | An object of class \code{lemur_fit}, which extends \code{\link{SingleCellExperiment}}. Accordingly,
53 | all functions that work for that class also work for \code{lemur_fit}
54 | objects. In addition, slots of these objects can be accessed using the
55 | dollar notation, e.g., \code{fit$embedding}).
56 | For details see the \linkS4class{lemur_fit} help page.
57 | }
58 | \description{
59 | The main function of this package, to fit the latent embedding multivariate regression (LEMUR) model.
60 | }
61 | \examples{
62 | data("glioblastoma_example_data")
63 | fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition)
64 | fit
65 | 
66 | }
67 | \references{
68 | \itemize{
69 | \item Ahlmann-Eltze, C. & Huber, W. (2023). Analysis of multi-condition single-cell data with latent
70 | embedding multivariate regression. bioRxiv \url{https://doi.org/10.1101/2023.03.06.531268}
71 | }
72 | }
73 | \seealso{
74 | \code{\link{align_by_grouping}}, \code{\link{align_harmony}}, \code{\link{test_de}}, \code{\link{find_de_neighborhoods}}
75 | }
76 | 


--------------------------------------------------------------------------------
/man/lemur_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lemur_fit.R
 3 | \docType{class}
 4 | \name{lemur_fit-class}
 5 | \alias{lemur_fit-class}
 6 | \alias{.lemur_fit}
 7 | \alias{lemur_fit}
 8 | \alias{[,lemur_fit,ANY,ANY,ANY-method}
 9 | \alias{design,lemur_fit-method}
10 | \title{The \code{lemur_fit} class}
11 | \usage{
12 | \S4method{[}{lemur_fit,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE)
13 | 
14 | \S4method{design}{lemur_fit}(object)
15 | }
16 | \arguments{
17 | \item{x, i, j, ..., drop}{the \code{lemur_fit} object and indices for the \code{[} subsetting operator}
18 | 
19 | \item{object}{the \code{lemur_fit} object for the \code{\link[BiocGenerics:dge]{BiocGenerics::design}} generic}
20 | }
21 | \value{
22 | An object of class \code{lemur_fit}.
23 | }
24 | \description{
25 | The \code{lemur_fit} class extends \code{\link{SingleCellExperiment}} and provides
26 | additional accessors to get the values of the values produced by \code{\link{lemur}}.
27 | }
28 | \details{
29 | To access the values produced by \code{\link{lemur}}, use the dollar notation (\code{$}):
30 | \describe{
31 | \item{\code{fit$n_embedding}}{the number of embedding dimensions.}
32 | \item{\code{fit$design}}{the specification of the design in \code{\link{lemur}}. Usually this is a \code{\link[stats:formula]{stats::formula}}.}
33 | \item{\code{fit$base_point}}{a matrix (\code{nrow(fit) * fit$n_embedding}) with the base point for the Grassmann exponential map.}
34 | \item{\code{fit$coefficients}}{a three-dimensional tensor (\code{nrow(fit) * fit$n_embedding * ncol(fit$design_matrix)}) with the coefficients for
35 | the exponential map.}
36 | \item{\code{fit$embedding}}{a matrix (\code{fit$n_embedding * ncol(fit)}) with the low dimensional position for each cell.}
37 | \item{\code{fit$design_matrix}}{a matrix with covariates for each cell (\code{ncol(fit) * ncol(fit$design_matrix)}).}
38 | \item{\code{fit$linear_coefficients}}{a matrix (\code{nrow(fit) * ncol(fit$design_matrix)}) with the coefficients for the linear regression.}
39 | \item{\code{fit$alignment_coefficients}}{a 3D tensor with the coefficients for the alignment (\code{fit$n_embedding * fit$n_embedding * ncol(fit$design_matrix)})}
40 | \item{\code{fit$alignment_design}}{an alternative design specification for the alignment. This is typically a \code{\link[stats:formula]{stats::formula}}.}
41 | \item{\code{fit$alignment_design_matrix}}{an alternative design matrix specification for the alignment.}
42 | \item{\code{fit$contrast}}{a parsed version of the contrast specification from the \code{test_de} function or \code{NULL}.}
43 | \item{\code{fit$colData}}{the column annotation \code{DataFrame}.}
44 | \item{\code{fit$rowData}}{the row annotation \code{DataFrame}.}
45 | }
46 | }
47 | \examples{
48 | # The easiest way to make a lemur_fit object, is to call `lemur`
49 | data(glioblastoma_example_data)
50 | fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition,
51 |              n_emb = 5, verbose = FALSE)
52 | 
53 | fit$n_embedding
54 | fit$embedding[,1:10]
55 | fit$n_embedding
56 | fit$embedding[,1:10]
57 | fit$design_matrix[1:10,]
58 | fit$coefficients[1:3,,]
59 | 
60 | }
61 | \seealso{
62 | \code{\link{lemur}}, \code{\link[=predict.lemur_fit]{predict}}, \code{\link[=residuals,lemur_fit-method]{residuals}}
63 | }
64 | 


--------------------------------------------------------------------------------
/man/mply_dbl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{mply_dbl}
 4 | \alias{mply_dbl}
 5 | \alias{stack_rows}
 6 | \alias{stack_cols}
 7 | \title{Iterating function that returns a matrix}
 8 | \usage{
 9 | mply_dbl(x, FUN, ncol = 1, ...)
10 | 
11 | stack_rows(x)
12 | 
13 | stack_cols(x)
14 | }
15 | \arguments{
16 | \item{x}{the sequence that is mapped to a matrix}
17 | 
18 | \item{FUN}{the function that returns a vector of length \code{ncol}}
19 | 
20 | \item{ncol}{the length of the output vector}
21 | 
22 | \item{...}{additional arguments that are passed to \code{FUN}}
23 | }
24 | \value{
25 | A matrix with \code{length(x)} / \code{nrow(x)} rows and \code{ncol} columns.
26 | For \code{msply_dbl} the number of columns depends on the output of \code{FUN}.
27 | }
28 | \description{
29 | The length of \code{x} determines the number of rows. The length of
30 | \code{FUN(x[i])} determines the number of columns. Must match \code{ncol}.
31 | }
32 | \section{Functions}{
33 | \itemize{
34 | \item \code{stack_rows()}: Each list element becomes a row in a matrix
35 | 
36 | \item \code{stack_cols()}: Each list element becomes a row in a matrix
37 | 
38 | }}
39 | \keyword{internal}
40 | 


--------------------------------------------------------------------------------
/man/one_hot_encoding.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/align.R
 3 | \name{one_hot_encoding}
 4 | \alias{one_hot_encoding}
 5 | \title{Take a vector and convert it to a one-hot encoded matrix}
 6 | \usage{
 7 | one_hot_encoding(groups)
 8 | }
 9 | \value{
10 | A matrix with \code{length(unique(groups))} rows and \code{length(groups)} columns.
11 | }
12 | \description{
13 | Take a vector and convert it to a one-hot encoded matrix
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/predict.lemur_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.R
 3 | \name{predict.lemur_fit}
 4 | \alias{predict.lemur_fit}
 5 | \title{Predict values from \code{lemur_fit} object}
 6 | \usage{
 7 | \method{predict}{lemur_fit}(
 8 |   object,
 9 |   newdata = NULL,
10 |   newdesign = NULL,
11 |   newcondition = NULL,
12 |   embedding = object$embedding,
13 |   with_linear_model = TRUE,
14 |   with_embedding = TRUE,
15 |   with_alignment = TRUE,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{object}{an \code{lemur_fit} object}
21 | 
22 | \item{newdata}{a data.frame which passed to \code{\link{model.matrix}} with
23 | \code{design} to make the \code{newdesign} matrix}
24 | 
25 | \item{newdesign}{a matrix with the covariates for which the output
26 | is predicted. If \code{NULL}, the \code{object$design_matrix} is used. If
27 | it is a vector it is repeated \code{ncol(embedding)} times to create
28 | a design matrix with the same entry for each cell.}
29 | 
30 | \item{newcondition}{an unquoted expression with a call to \code{cond()} specifying
31 | the covariates of the prediction. See the \code{contrast} argument in \link{test_de}
32 | for more details. Note that combinations of multiple calls to \code{cond()} are
33 | not allowed (e.g., \code{cond(a = 1) - cond(a = 2)}). If specified, \code{newdata}
34 | and \code{newdesign} are ignored.}
35 | 
36 | \item{embedding}{the low-dimensional cell position for which the
37 | output is predicted.}
38 | 
39 | \item{with_linear_model}{a boolean to indicate if the linear regression
40 | offset is included in the prediction.}
41 | 
42 | \item{with_embedding}{a boolean to indicate if the embedding contributes
43 | to the output.}
44 | 
45 | \item{with_alignment}{a boolean to indicate if the alignment effect
46 | is removed from the output.}
47 | 
48 | \item{...}{additional parameters passed to \code{predict_impl}.}
49 | }
50 | \value{
51 | A matrix with the same dimension \code{nrow(object) * nrow(newdesign)}.
52 | }
53 | \description{
54 | Predict values from \code{lemur_fit} object
55 | }
56 | \examples{
57 | 
58 | data(glioblastoma_example_data)
59 | fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition,
60 |              n_emb = 5, verbose = FALSE)
61 | 
62 | pred <- predict(fit)
63 | 
64 | pred_ctrl <- predict(fit, newdesign = c(1, 0, 0, 0, 0, 0))
65 | pred_trt <-  predict(fit, newdesign = c(1, 0, 0, 0, 0, 1))
66 | # This is the same as the test_de result
67 | fit <- test_de(fit, cond(condition = "panobinostat") - cond(condition = "ctrl"))
68 | all.equal(SummarizedExperiment::assay(fit, "DE"), pred_trt - pred_ctrl,
69 |           check.attributes = FALSE)
70 | 
71 | }
72 | \seealso{
73 | \code{\link[=residuals,lemur_fit-method]{residuals}}
74 | }
75 | 


--------------------------------------------------------------------------------
/man/project_on_lemur_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/project_on_fit.R
 3 | \name{project_on_lemur_fit}
 4 | \alias{project_on_lemur_fit}
 5 | \title{Project new data onto the latent spaces of an existing lemur fit}
 6 | \usage{
 7 | project_on_lemur_fit(
 8 |   fit,
 9 |   data,
10 |   col_data = NULL,
11 |   use_assay = "logcounts",
12 |   design = fit$design,
13 |   alignment_design = fit$alignment_design,
14 |   return = c("matrix", "lemur_fit")
15 | )
16 | }
17 | \arguments{
18 | \item{fit}{an \code{lemur_fit} object}
19 | 
20 | \item{data}{a matrix with observations in the columns and features in the rows.
21 | Or a \code{SummarizedExperiment} / \code{SingleCellExperiment} object. The features must
22 | match the features in \code{fit}.}
23 | 
24 | \item{col_data}{col_data an optional data frame with \code{ncol(data)} rows.}
25 | 
26 | \item{use_assay}{if \code{data} is a \code{SummarizedExperiment} / \code{SingleCellExperiment} object,
27 | which assay should be used.}
28 | 
29 | \item{design, alignment_design}{the design formulas or design matrices that are used
30 | to project the data on the correct latent subspace. Both default to the designs
31 | from the \code{fit} object.}
32 | 
33 | \item{return}{which data structure is returned.}
34 | }
35 | \value{
36 | Either a matrix with the low-dimensional embeddings of the \code{data} or
37 | an object of class \code{lemur_fit} wrapping that embedding.
38 | }
39 | \description{
40 | Project new data onto the latent spaces of an existing lemur fit
41 | }
42 | \examples{
43 | 
44 | data(glioblastoma_example_data)
45 | 
46 | subset1 <- glioblastoma_example_data[,1:2500]
47 | subset2 <- glioblastoma_example_data[,2501:5000]
48 | 
49 | fit <- lemur(subset1, design = ~ condition, n_emb = 5,
50 |              test_fraction = 0, verbose = FALSE)
51 | 
52 | # Returns a `lemur_fit` object with the projection of `subset2`
53 | fit2 <- project_on_lemur_fit(fit, subset2, return = "lemur_fit")
54 | fit2
55 | 
56 | 
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/man/pseudoinverse.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{pseudoinverse}
 4 | \alias{pseudoinverse}
 5 | \title{Moore-Penrose pseudoinverse calculated via SVD}
 6 | \usage{
 7 | pseudoinverse(X)
 8 | }
 9 | \arguments{
10 | \item{X}{a matrix X}
11 | }
12 | \value{
13 | The matrix \eqn{X^{+}}.
14 | }
15 | \description{
16 | In the simplest case, the pseudoinverse is
17 | \deqn{X^{+} = (X^T X)^{-1} X^T.}
18 | }
19 | \details{
20 | To handle the more general case, the pseudoinverse can expressed using a SVD
21 | \eqn{X = U D V^T}:
22 | \deqn{X^{+} = V D^{-1} U^T}
23 | }
24 | \keyword{internal}
25 | 


--------------------------------------------------------------------------------
/man/recursive_least_squares.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/recursive_least_squares.R
 3 | \name{recursive_least_squares}
 4 | \alias{recursive_least_squares}
 5 | \alias{bulked_recursive_least_squares_contrast}
 6 | \title{Iteratively calculate the least squares solution}
 7 | \usage{
 8 | recursive_least_squares(y, X)
 9 | 
10 | bulked_recursive_least_squares_contrast(
11 |   y,
12 |   X,
13 |   group,
14 |   contrast,
15 |   ridge_penalty = 1e-06
16 | )
17 | }
18 | \arguments{
19 | \item{y}{a vector with observations}
20 | 
21 | \item{X}{a design matrix}
22 | }
23 | \value{
24 | a matrix where column i is the
25 | solution to \code{y[1:i] ~ X[1:i,]}.
26 | }
27 | \description{
28 | Both functions are for testing purposes. There is a faster implementation
29 | called \code{cum_brls_which_abs_max}.
30 | }
31 | \keyword{internal}
32 | 


--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/find_de_neighborhoods.R
 3 | \docType{import}
 4 | \name{reexports}
 5 | \alias{reexports}
 6 | \alias{vars}
 7 | \title{Objects exported from other packages}
 8 | \value{
 9 | see \link[glmGamPoi:vars]{glmGamPoi::vars}.
10 | }
11 | \examples{
12 |   # `vars` quotes expressions (just like in dplyr)
13 |   vars(condition, sample)
14 | 
15 | }
16 | \keyword{internal}
17 | \description{
18 | These objects are imported from other packages. Follow the links
19 | below to see their documentation.
20 | 
21 | \describe{
22 |   \item{glmGamPoi}{\code{\link[glmGamPoi]{vars}}}
23 | }}
24 | 
25 | 


--------------------------------------------------------------------------------
/man/residuals-lemur_fit-method.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.R
 3 | \name{residuals,lemur_fit-method}
 4 | \alias{residuals,lemur_fit-method}
 5 | \title{Predict values from \code{lemur_fit} object}
 6 | \usage{
 7 | \S4method{residuals}{lemur_fit}(object, with_linear_model = TRUE, with_embedding = TRUE, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{an \code{lemur_fit} object}
11 | 
12 | \item{with_linear_model}{a boolean to indicate if the linear regression
13 | offset is included in the prediction.}
14 | 
15 | \item{with_embedding}{a boolean to indicate if the embedding contributes
16 | to the output.}
17 | 
18 | \item{...}{ignored.}
19 | }
20 | \value{
21 | A matrix with the same dimension \code{dim(object)}.
22 | }
23 | \description{
24 | Predict values from \code{lemur_fit} object
25 | }
26 | \examples{
27 | data(glioblastoma_example_data)
28 | fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition,
29 |              n_emb = 5, verbose = FALSE)
30 | 
31 | resid <- residuals(fit)
32 | dim(resid)
33 | 
34 | 
35 | }
36 | \seealso{
37 | \link{predict.lemur_fit}
38 | }
39 | 


--------------------------------------------------------------------------------
/man/ridge_regression.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ridge_regression.R
 3 | \name{ridge_regression}
 4 | \alias{ridge_regression}
 5 | \title{Ridge regression}
 6 | \usage{
 7 | ridge_regression(Y, X, ridge_penalty = 0, weights = rep(1, nrow(X)))
 8 | }
 9 | \arguments{
10 | \item{Y}{the observations matrix (\verb{features x samples})}
11 | 
12 | \item{X}{the design matrix (\verb{samples x covariates})}
13 | 
14 | \item{ridge_penalty}{a numeric vector or matrix of size (\code{covariates} or
15 | \verb{covariates x covariates} respectively)}
16 | 
17 | \item{weights}{a vector of observation weights}
18 | }
19 | \value{
20 | The matrix of coefficients.
21 | }
22 | \description{
23 | The function does not treat the intercept special.
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/stack_slice.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{stack_slice}
 4 | \alias{stack_slice}
 5 | \alias{destack_slice}
 6 | \title{Make a cube from a list of matrices}
 7 | \usage{
 8 | stack_slice(x)
 9 | 
10 | destack_slice(x)
11 | }
12 | \arguments{
13 | \item{x}{a list of vectors/matrices that are stacked}
14 | }
15 | \value{
16 | A three-dimensional array.
17 | }
18 | \description{
19 | The length of the list will become the third dimension of the cube.
20 | }
21 | \section{Functions}{
22 | \itemize{
23 | \item \code{destack_slice()}: Make a list of matrices from a cube
24 | 
25 | }}
26 | \keyword{internal}
27 | 


--------------------------------------------------------------------------------
/man/test_de.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/test_de.R
 3 | \name{test_de}
 4 | \alias{test_de}
 5 | \title{Predict log fold changes between conditions for each cell}
 6 | \usage{
 7 | test_de(
 8 |   fit,
 9 |   contrast,
10 |   embedding = NULL,
11 |   consider = c("embedding+linear", "embedding", "linear"),
12 |   new_assay_name = "DE"
13 | )
14 | }
15 | \arguments{
16 | \item{fit}{the result of calling \code{\link[=lemur]{lemur()}}}
17 | 
18 | \item{contrast}{Specification of the contrast: a call to \code{cond()} specifying a full observation
19 | (e.g. \code{cond(treatment = "A", sex = "male") - cond(treatment = "C", sex = "male")} to
20 | compare treatment A vs C for male observations). Unspecified factors default to the reference level.}
21 | 
22 | \item{embedding}{matrix of size \code{n_embedding} \eqn{\times} \code{n} that specifies where in the latent space
23 | the differential expression is tested. It defaults to the position of all cells from the original fit.}
24 | 
25 | \item{consider}{specify which part of the model are considered for the differential expression test.}
26 | 
27 | \item{new_assay_name}{the name of the assay added to the \code{fit} object. Default: \code{"DE"}.}
28 | }
29 | \value{
30 | If \code{is.null(embedding)} the \code{fit} object with a new assay called \code{"DE"}. Otherwise
31 | return a matrix with the differential expression values.
32 | }
33 | \description{
34 | Predict log fold changes between conditions for each cell
35 | }
36 | \examples{
37 | library(SummarizedExperiment)
38 | library(SingleCellExperiment)
39 | 
40 | data(glioblastoma_example_data)
41 | fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition,
42 |              n_emb = 5, verbose = FALSE)
43 | # Optional alignment
44 | # fit <- align_harmony(fit)
45 | fit <- test_de(fit, contrast = cond(condition = "panobinostat") - cond(condition = "ctrl"))
46 | 
47 | # The fit object contains a new assay called "DE"
48 | assayNames(fit)
49 | 
50 | # The DE assay captures differences between conditions
51 | is_ctrl_cond <- fit$colData$condition == "ctrl"
52 | mean(logcounts(fit)[1,!is_ctrl_cond]) - mean(logcounts(fit)[1,is_ctrl_cond])
53 | mean(assay(fit, "DE")[1,])
54 | 
55 | }
56 | \seealso{
57 | \link{find_de_neighborhoods}
58 | }
59 | 


--------------------------------------------------------------------------------
/man/test_global.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/test_de.R
 3 | \name{test_global}
 4 | \alias{test_global}
 5 | \title{Differential embedding for each condition}
 6 | \usage{
 7 | test_global(
 8 |   fit,
 9 |   contrast,
10 |   reduced_design = NULL,
11 |   consider = c("embedding+linear", "embedding", "linear"),
12 |   variance_est = c("analytical", "resampling", "none"),
13 |   verbose = TRUE,
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{fit}{the result of calling \code{\link[=lemur]{lemur()}}}
19 | 
20 | \item{contrast}{Specification of the contrast: a call to \code{cond()} specifying a full observation
21 | (e.g. \code{cond(treatment = "A", sex = "male") - cond(treatment = "C", sex = "male")} to
22 | compare treatment A vs C for male observations). Unspecified factors default to the reference level.}
23 | 
24 | \item{reduced_design}{an alternative specification of the null hypothesis.}
25 | 
26 | \item{consider}{specify which part of the model are considered for the differential expression test.}
27 | 
28 | \item{variance_est}{How or if the variance should be estimated. \code{'analytical'} is only compatible with \code{consider = "linear"}. \code{'resampling'} is the most flexible (to adapt the number
29 | of resampling iterations, set \code{n_resampling_iter}. Default: \code{100})}
30 | 
31 | \item{verbose}{should the method print information during the fitting. Default: \code{TRUE}.}
32 | 
33 | \item{...}{additional arguments.}
34 | }
35 | \value{
36 | a data.frame
37 | }
38 | \description{
39 | Differential embedding for each condition
40 | }
41 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
3 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
3 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | #include <RcppArmadillo.h>
 5 | #include <Rcpp.h>
 6 | 
 7 | using namespace Rcpp;
 8 | 
 9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
10 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
11 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
12 | #endif
13 | 
14 | // cumz_which_abs_max
15 | List cumz_which_abs_max(NumericVector x, int min_neighborhood_size);
16 | RcppExport SEXP _lemur_cumz_which_abs_max(SEXP xSEXP, SEXP min_neighborhood_sizeSEXP) {
17 | BEGIN_RCPP
18 |     Rcpp::RObject rcpp_result_gen;
19 |     Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP);
20 |     Rcpp::traits::input_parameter< int >::type min_neighborhood_size(min_neighborhood_sizeSEXP);
21 |     rcpp_result_gen = Rcpp::wrap(cumz_which_abs_max(x, min_neighborhood_size));
22 |     return rcpp_result_gen;
23 | END_RCPP
24 | }
25 | // cum_brls_which_abs_max
26 | List cum_brls_which_abs_max(const NumericVector y, const arma::mat& X, const IntegerVector group, const arma::rowvec& contrast, const double penalty, int min_neighborhood_size);
27 | RcppExport SEXP _lemur_cum_brls_which_abs_max(SEXP ySEXP, SEXP XSEXP, SEXP groupSEXP, SEXP contrastSEXP, SEXP penaltySEXP, SEXP min_neighborhood_sizeSEXP) {
28 | BEGIN_RCPP
29 |     Rcpp::RObject rcpp_result_gen;
30 |     Rcpp::traits::input_parameter< const NumericVector >::type y(ySEXP);
31 |     Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP);
32 |     Rcpp::traits::input_parameter< const IntegerVector >::type group(groupSEXP);
33 |     Rcpp::traits::input_parameter< const arma::rowvec& >::type contrast(contrastSEXP);
34 |     Rcpp::traits::input_parameter< const double >::type penalty(penaltySEXP);
35 |     Rcpp::traits::input_parameter< int >::type min_neighborhood_size(min_neighborhood_sizeSEXP);
36 |     rcpp_result_gen = Rcpp::wrap(cum_brls_which_abs_max(y, X, group, contrast, penalty, min_neighborhood_size));
37 |     return rcpp_result_gen;
38 | END_RCPP
39 | }
40 | // count_neighbors_fast
41 | IntegerVector count_neighbors_fast(NumericMatrix knn_mat, IntegerVector indices);
42 | RcppExport SEXP _lemur_count_neighbors_fast(SEXP knn_matSEXP, SEXP indicesSEXP) {
43 | BEGIN_RCPP
44 |     Rcpp::RObject rcpp_result_gen;
45 |     Rcpp::traits::input_parameter< NumericMatrix >::type knn_mat(knn_matSEXP);
46 |     Rcpp::traits::input_parameter< IntegerVector >::type indices(indicesSEXP);
47 |     rcpp_result_gen = Rcpp::wrap(count_neighbors_fast(knn_mat, indices));
48 |     return rcpp_result_gen;
49 | END_RCPP
50 | }
51 | 
52 | static const R_CallMethodDef CallEntries[] = {
53 |     {"_lemur_cumz_which_abs_max", (DL_FUNC) &_lemur_cumz_which_abs_max, 2},
54 |     {"_lemur_cum_brls_which_abs_max", (DL_FUNC) &_lemur_cum_brls_which_abs_max, 6},
55 |     {"_lemur_count_neighbors_fast", (DL_FUNC) &_lemur_count_neighbors_fast, 2},
56 |     {NULL, NULL, 0}
57 | };
58 | 
59 | RcppExport void R_init_lemur(DllInfo *dll) {
60 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
61 |     R_useDynamicSymbols(dll, FALSE);
62 | }
63 | 


--------------------------------------------------------------------------------
/src/util.cpp:
--------------------------------------------------------------------------------
  1 | #include <RcppArmadillo.h>
  2 | using namespace Rcpp;
  3 | 
  4 | 
  5 | // [[Rcpp::export(rng = FALSE)]]
  6 | List cumz_which_abs_max(NumericVector x, int min_neighborhood_size){
  7 |   int size = x.size();
  8 |   min_neighborhood_size = std::min(min_neighborhood_size, size);
  9 |   int max_idx = 0;
 10 |   double max = -std::numeric_limits<float>::infinity();
 11 |   int sign = +1;
 12 |   if(size == 0){
 13 |     return max_idx;
 14 |   }else if(size == 1){
 15 |     return 1;
 16 |   }
 17 |   double m = x[0];
 18 |   double msq = 0;
 19 | 
 20 |   for(int i = 2; i <= size; i++){
 21 |     double delta = x[i-1] - m;
 22 |     m += delta / i;
 23 |     double delta2 = x[i-1] - m;
 24 |     msq = (msq * (i - 1) + delta * delta2) / i;
 25 |     double val = std::abs(m / sqrt(msq / (i-1)));
 26 |     if(i >= min_neighborhood_size - 1 && val > max){
 27 |       max = val;
 28 |       sign = m < 0 ? -1 : +1;
 29 |       max_idx = i;
 30 |     }
 31 |   }
 32 |   return List::create(Named("max") = sign * max , Named("idx") = max_idx);
 33 | }
 34 | 
 35 | 
 36 | // [[Rcpp::export(rng = FALSE)]]
 37 | List cum_brls_which_abs_max(const NumericVector y, const arma::mat& X, const IntegerVector group,
 38 |                             const arma::rowvec& contrast, const double penalty, int min_neighborhood_size){
 39 |   int size = y.size();
 40 |   min_neighborhood_size = std::min(min_neighborhood_size, size);
 41 | 
 42 |   int max_idx = 0;
 43 |   double max_val = -std::numeric_limits<float>::infinity();
 44 |   int sign = +1;
 45 |   if(size == 0){
 46 |     return  List::create(Named("index") = max_idx, Named("max") = max_val);
 47 |   }else if(size == 1){
 48 |     return  List::create(Named("index") = 1, Named("max") = y);
 49 |   }
 50 |   int k = X.n_cols;
 51 |   int g = max(group);
 52 |   arma::colvec m(g);
 53 |   IntegerVector count(g);
 54 |   arma::mat X_act(g, k);
 55 |   arma::mat gamma = 1/penalty * arma::eye(k, k);
 56 |   arma::colvec beta(k);
 57 |   int n_obs = 0;
 58 |   double se_pre = 0;
 59 | 
 60 |   for(int i = 0; i < size; ++i){
 61 |     double yi = y[i];
 62 |     arma::colvec xi = X.row(i).t();
 63 |     int gi = group[i] - 1;
 64 |     double delta_m = 1/(count[gi] + 1.0) * yi - (1 - count[gi] / (count[gi] + 1.0)) * m(gi);
 65 |     m(gi) += delta_m;
 66 |     count(gi) += 1;
 67 |     if(count(gi) == 1){
 68 |       X_act.row(gi) = xi.t();
 69 |       n_obs += 1;
 70 |       gamma -= (gamma * xi * xi.t() * gamma) / arma::as_scalar(1 + xi.t() * gamma * xi);
 71 |       se_pre = arma::as_scalar(contrast * gamma * contrast.t());
 72 |       beta += gamma * xi * (m[gi] - xi.t() * beta);
 73 |     }else{
 74 |       beta += gamma * (xi * delta_m);
 75 |     }
 76 |     if(n_obs > k){
 77 |       double rss = std::max(1e-6, arma::accu(arma::pow(m - X_act * beta, 2)));
 78 |       double se = sqrt(se_pre * rss / (n_obs - k));
 79 |       double t_stat = arma::as_scalar(contrast * beta) / se;
 80 |       if(i >= min_neighborhood_size-1 && std::abs(t_stat) > max_val){
 81 |         max_val = std::abs(t_stat);
 82 |         sign = t_stat < 0 ? -1 : +1;
 83 |         max_idx = i + 1;
 84 |       }
 85 |     }
 86 |   }
 87 |   return List::create(Named("idx") = max_idx, Named("max") = sign * max_val);
 88 | }
 89 | 
 90 | 
 91 | // [[Rcpp::export(rng = FALSE)]]
 92 | IntegerVector count_neighbors_fast(NumericMatrix knn_mat, IntegerVector indices){
 93 |   int n_cells = knn_mat.nrow();
 94 |   int knn = knn_mat.ncol();
 95 |   int n = indices.length();
 96 |   IntegerVector counter(n_cells);
 97 |   for(int i = 0; i < n; ++i){
 98 |     for(int k = 0; k < knn; ++k){
 99 |       counter(knn_mat(indices(i)-1, k)-1) += 1;
100 |     }
101 |   }
102 |   return counter;
103 | }
104 | 
105 | 
106 | /*** R
107 | 
108 | cum_z_stat2 <- function(x){
109 |   out <- rep(NA_real_, length(x))
110 |   m <- x[1]
111 |   msq <- 0
112 |   out[1] <- NA
113 |   for(idx in seq_along(x)[-1]){
114 |     delta <- x[idx] - m
115 |     m <- m + delta / idx
116 |     delta2 <- x[idx] - m
117 |     msq <- (msq * (idx-1) + delta * delta2) / idx
118 | 
119 |     out[idx] <- m / sqrt(msq / (idx-1))
120 |   }
121 |   out
122 | }
123 | 
124 | cum_z_stat2(x[1:10])
125 | idx <- 10
126 | .x <- x[seq_len(idx)]
127 | msq <- sum((mean(.x) - .x)^2)
128 | mean(.x) / sqrt(msq / idx / (idx-1))
129 | mean(.x) / (sd(.x) / sqrt(idx))
130 | 
131 | x <- rnorm(16000)
132 | cumz_which_abs_max(x)
133 | max(abs(cum_z_stat2(x)), na.rm = TRUE)
134 | bench::mark(
135 |   which.max(abs(cum_z_stat2(x))),
136 |   which.max(abs(cumz(x))),
137 |   cumz2(x),
138 |   check = TRUE
139 | )
140 | 
141 | */
142 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(lemur)
3 | 
4 | test_check("lemur")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/helper.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | make_synthetic_data <- function(n_genes = 30, n_cells = 500, n_centers = 4, n_lat = 2, treatment_effect = 0.1){
 4 |   n_lat <- min(n_lat, n_genes)
 5 |   centers <- duplicate_cols(randn(n_lat, n_centers, sd = 2), ceiling(n_cells / n_centers))[,seq_len(n_cells)]
 6 |   true_Z <- centers + rnorm(n_cells * n_lat, sd = 0.1)
 7 |   stopifnot(n_centers <= length(LETTERS))
 8 |   cell_type <- rep(LETTERS[seq_len(n_centers)], length.out = n_cells)
 9 |   condition <- sample(letters[1:3], n_cells, replace = TRUE)
10 |   design_matrix <- model.matrix(~ condition)
11 |   plane <- qr.Q(qr(randn(n_genes, n_genes)))[seq_len(n_genes), seq_len(n_lat)]
12 | 
13 |   true_P <- nullspace(plane) %*% cbind(intercept = randn(n_genes - n_lat, 1, sd = 0),
14 |                                         beta1 = randn(n_genes - n_lat, 1, sd = treatment_effect),
15 |                                         beta2 = randn(n_genes - n_lat, 1, sd = treatment_effect))
16 |   true_P <- true_P + rnorm(prod(dim(true_P)), sd = 1e-8)
17 |   true_Beta <- randn(n_genes, ncol(design_matrix), sd = 0.1)
18 | 
19 |   dir <- scale(randn(n_lat, 1), center = FALSE)
20 |   Y <- true_Beta %*% t(design_matrix) + plane %*% true_Z + true_P %*% (t(design_matrix) * duplicate_rows(t(dir) %*% true_Z, 3))
21 | 
22 |   linear_effect <- LinearEmbeddingMatrix(design_matrix, true_Beta)
23 |   linear_embedding <- LinearEmbeddingMatrix(t(true_Z), plane)
24 |   interaction_embedding <- LinearEmbeddingMatrix(t(t(design_matrix) * duplicate_rows(t(dir) %*% true_Z, 3)), true_P)
25 | 
26 |   colnames(Y) <- paste0("cell_", seq_len(n_cells))
27 |   rownames(Y) <- paste0("gene_", seq_len(n_genes))
28 |   sce <- SingleCellExperiment(list(logcounts = Y), colData = data.frame(condition = condition, cell_type = cell_type),
29 |                        reducedDims = list(linear_effect = linear_effect, linear_embedding = linear_embedding, interaction_embedding = interaction_embedding))
30 | 
31 | }
32 | 
33 | 
34 | make_vectors <- function(n_genes, n_obs, sd = 0.1){
35 |   x <- randn(n_genes, n_obs)
36 |   x <- apply(x, 2, \(.x) .x / sqrt(sum(.x^2)))
37 |   z <- randn(n_genes, 1)
38 |   z <- apply(z, 2, \(.z) .z / sqrt(sum(.z^2)))
39 |   bp <- diag(nrow=n_genes)
40 |   pert <- project_rotation_tangent(randn(n_genes, n_genes, sd = sd), bp)
41 |   y <- rotation_map(pert, bp) %*% x
42 |   list(x=x, y=y, z=z, bp=bp, pert=pert)
43 | }
44 | 
45 | make_vectors2 <- function(n_genes, n_obs, sd = 0.1){
46 |   x <- randn(n_genes, n_obs)
47 |   z <- randn(n_genes, 1)
48 |   bp <- diag(nrow=n_genes)
49 |   pert <- project_spd_tangent(randn(n_genes, n_genes, sd = sd), bp)
50 |   y <- spd_map(pert, bp) %*% x
51 |   list(x=x, y=y, z=z, bp=bp, pert=pert)
52 | }
53 | 
54 | principal_angle <- function(A, B){
55 |   acos(pmax(0, pmin(1, svd(t(qr.Q(qr(A))) %*% qr.Q(qr(B)))$d))) / pi * 180
56 | }
57 | 


--------------------------------------------------------------------------------
/tests/testthat/test-align.R:
--------------------------------------------------------------------------------
  1 | set.seed(1)
  2 | dat <- make_synthetic_data(n_centers = 4, n_genes = 50)
  3 | dat$patient <- sample(paste0("p", 1:3), 500, replace = TRUE)
  4 | fit <- lemur(dat, ~ condition + patient, n_embedding = 5, test_fraction = 0, verbose = FALSE)
  5 | 
  6 | test_that("forward and reverse transformation cancel", {
  7 |   coef <- array(cbind(randn(5, 6), randn(5, 6)), dim = c(5,6,2))
  8 |   vec <- rnorm(2)
  9 |   forward <- forward_linear_transformation(coef, vec)
 10 |   reverse <- reverse_linear_transformation(coef, vec)
 11 |   expect_equal(reverse %*% forward[,-1], diag(nrow = 5))
 12 | })
 13 | 
 14 | 
 15 | test_that("alignment with Harmony work", {
 16 |   fit_al <- align_harmony(fit, verbose = FALSE, max_iter = 1)
 17 | 
 18 |   n_coef <- ncol(fit$design_matrix)
 19 |   n_lat <- fit$n_embedding
 20 |   expect_equal(fit$alignment_coefficients, array(0, dim = c(n_lat, n_lat+1, n_coef)))
 21 |   expect_equal(dim(fit_al$alignment_coefficients), c(5,6,5))
 22 | 
 23 |   pred0 <- predict(fit)
 24 |   pred1 <- predict(fit_al)
 25 |   expect_equal(pred0, pred1)
 26 | 
 27 |   pred0_fixed <- predict(fit, newdesign = c(1, 0, 0, 0, 0))
 28 |   pred1_fixed <- predict(fit_al, newdesign = c(1, 0, 0, 0, 0))
 29 |   unchanged_subset <- fit$colData$condition == "a" & fit$colData$patient == "p1"
 30 |   expect_equal(pred0_fixed[,unchanged_subset], pred0[,unchanged_subset], ignore_attr = "dimnames")
 31 |   expect_equal(pred0_fixed[,unchanged_subset], pred1_fixed[,unchanged_subset])
 32 | 
 33 |   pred2_fixed <- predict(fit_al, newdesign = c(1, 0, 0, 0, 0),
 34 |                      alignment_design_matrix = c(1, 0, 0, 0, 0))
 35 |   expect_equal(pred0_fixed[,unchanged_subset], pred2_fixed[,unchanged_subset])
 36 |   expect_equal(pred2_fixed, pred1_fixed)
 37 | 
 38 |   de1 <- test_de(fit_al, contrast = cond(condition = "a") - cond(condition = "b"))
 39 |   de2 <- test_de(fit_al, contrast = cond(condition = "a", patient = "p1") - cond(condition = "b", patient = "p1"))
 40 |   expect_equal(de1, de2)
 41 | })
 42 | 
 43 | 
 44 | test_that("harmony is fine with degenerate designs", {
 45 |   al_design <- cbind(fit$design_matrix, rep(rnorm(2), each = 250), 0, 0, 1)
 46 |   # harmony ignores 'verbose = FALSE'
 47 |   expect_silent(
 48 |     align_harmony(fit, design = al_design, max_iter = 1, verbose = FALSE)
 49 |   )
 50 |   attr(al_design, "ignore_degeneracy") <- FALSE
 51 |   expect_error(
 52 |     align_harmony(fit, design = al_design, max_iter = 1, verbose = FALSE)
 53 |   )
 54 | })
 55 | 
 56 | 
 57 | test_that("alignment with custom alignment_design works", {
 58 |   fit_al <- align_harmony(fit, verbose = FALSE)
 59 |   set.seed(1)
 60 |   fit_al2 <- align_harmony(fit, design = ~ patient * condition, verbose = FALSE)
 61 |   set.seed(1)
 62 |   align_mm <- model.matrix(~ patient * condition, data = colData(dat))
 63 |   fit_al3 <- align_harmony(fit, design = align_mm, verbose = FALSE)
 64 | 
 65 |   expect_equal(fit_al2$alignment_design_matrix, fit_al3$alignment_design_matrix, ignore_attr = c("dimnames", "ignore_degeneracy"))
 66 |   expect_equal(fit_al2$alignment_coefficients, fit_al3$alignment_coefficients, ignore_attr = "dimnames")
 67 | 
 68 |   pred <- predict(fit_al, newdesign = c(1, 0, 0, 0, 0))
 69 | 
 70 |   de1 <- test_de(fit_al, contrast = cond(condition = "a", patient = "p2") - cond(condition = "b", patient = "p2"))
 71 |   de2 <- test_de(fit_al2, contrast = cond(condition = "a", patient = "p2") - cond(condition = "b", patient = "p2"))
 72 |   expect_error({
 73 |     test_de(fit_al3, contrast = cond(condition = "a", patient = "p2") - cond(condition = "b", patient = "p2"))
 74 |   })
 75 | })
 76 | 
 77 | test_that("handle_ridge_penalty_parameter works", {
 78 |   expect_equal(handle_ridge_penalty_parameter(3), 3)
 79 |   expect_error(handle_ridge_penalty_parameter(c(rotation = 2)))
 80 |   expect_error(handle_ridge_penalty_parameter(c(stretching = 1)))
 81 |   expect_error(handle_ridge_penalty_parameter(c(stretching = 5, rotation = 2)))
 82 |   expect_error(handle_ridge_penalty_parameter(list(rotation = diag(nrow = 5))))
 83 | })
 84 | 
 85 | 
 86 | 
 87 | test_that("check that aligning points works perfectly for low number of points", {
 88 |   n_genes <- 10
 89 |   n_emb <- 8
 90 |   n_points <- n_emb + 1
 91 |   df <- data.frame(tmp = rep(c("a", "b"), each = n_points))
 92 |   design_matrix <- model.matrix(~ tmp, data = df)
 93 |   mat <- randn(n_emb, n_points * 2)
 94 | 
 95 |   fit <- lemur_fit(randn(n_genes, n_points * 2), col_data = df, row_data = NULL,
 96 |             n_embedding = n_emb, design = ~ tmp, design_matrix = design_matrix,
 97 |             linear_coefficients = matrix(0, nrow = n_genes, ncol = 2),
 98 |             base_point = diag(nrow = n_genes, ncol = n_emb), coefficients = array(0, dim = c(n_genes, n_emb, 2)),
 99 |             embedding = mat,
100 |             alignment_coefficients = array(0, dim = c(n_emb, n_emb+1, 2)),
101 |             alignment_design = NULL, alignment_design_matrix = design_matrix,
102 |             use_assay = "foo", is_test_data = rep(FALSE, ncol(mat)))
103 |   gr <- rep(seq_len(n_points), times = 2)
104 |   fit_al <- align_by_grouping(fit, design = fit$alignment_design_matrix, grouping = gr, ridge_penalty = 0, verbose = FALSE)
105 |   expect_equal(fit_al$embedding[,df$tmp == "a"], fit_al$embedding[,df$tmp == "b"], tolerance = 1e-8)
106 | })
107 | 
108 | 
109 | 
110 | test_that("check that harmony alignment works as expected", {
111 |   set.seed(1)
112 |   n_genes <- 10
113 |   n_emb <- 2
114 |   n_points <- n_emb + 1
115 |   df <- data.frame(tmp = rep(c("a", "b"), each = n_points))
116 |   design_matrix <- model.matrix(~ tmp, data = df)
117 |   mat <- randn(n_emb, n_points)
118 |   mat <- cbind(mat, diag(1.1, nrow = n_emb) %*% mat)
119 | 
120 |   fit <- lemur_fit(randn(n_genes, n_points * 2), col_data = df, row_data = NULL,
121 |                    n_embedding = n_emb, design = ~ tmp, design_matrix = design_matrix,
122 |                    linear_coefficients = matrix(0, nrow = n_genes, ncol = 2),
123 |                    base_point = diag(nrow = n_genes, ncol = n_emb), coefficients = array(0, dim = c(n_genes, n_emb, 2)),
124 |                    embedding = mat,
125 |                    alignment_coefficients = array(0, dim = c(n_emb, n_emb+1, 2)),
126 |                    alignment_design = NULL, alignment_design_matrix = design_matrix,
127 |                    use_assay = "foo", is_test_data = rep(FALSE, ncol(mat)))
128 |   gr <- rep(seq_len(n_points), times = 2)
129 |   suppressWarnings({
130 |     fit_al2 <- align_harmony(fit, design = fit$alignment_design_matrix, nclust = n_points, ridge_penalty = 1e-3, verbose = FALSE)
131 |   })
132 |   set.seed(1)
133 |   suppressWarnings({
134 |     harm <- t(harmony::RunHarmony(mat, meta_data = df, vars_use = "tmp", nclust = n_points, lambda = 1e-8, verbose = FALSE))
135 |   })
136 | 
137 |   expect_equal(fit_al2$embedding[,df$tmp == "a"], fit_al2$embedding[,df$tmp == "b"], tolerance = 1e-2)
138 |   expect_equal(harm[,df$tmp == "a"], harm[,df$tmp == "b"], tolerance = 1e-2)
139 | 
140 |   # Reimplement harmony correction
141 |   set.seed(1)
142 |   suppressWarnings({
143 |     harm_obj <- harmony_init(mat, design_matrix, nclust = n_points, lambda = 1e-8, verbose = FALSE)
144 |   })
145 |   harm_obj <- harmony_max_div_clustering(harm_obj)
146 |   Z_corr <- harm_obj$Z_orig
147 |   for(k in seq_len(harm_obj$K)){
148 |     Phi_Rk <- as.matrix(harm_obj$Phi_moe %*% diag(harm_obj$R[k,]))
149 |     W <- solve(Phi_Rk %*% t(harm_obj$Phi_moe) + diag(c(harm_obj$lambda))) %*% Phi_Rk %*% t(harm_obj$Z_orig)
150 |     W[1,] <- 0
151 |     Z_corr <- Z_corr - t(W) %*% Phi_Rk
152 |   }
153 |   expect_equal(Z_corr, harm, tolerance = 1e-3)
154 | })
155 | 
156 | 
157 | 
158 | 
159 | 


--------------------------------------------------------------------------------
/tests/testthat/test-differential_geometry.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_that("Random Grassmann manifold functions are correct", {
 3 |   p <- random_grassmann_point(5, 2)
 4 |   expect_equal(t(p) %*% p, diag(nrow = 2))
 5 |   v <- random_grassmann_tangent(p) * 10
 6 |   # Make sure that v is outside the injective radius
 7 |   expect_gt(svd(v)$d[1] / pi * 180, 90)
 8 | 
 9 | 
10 |   expect_equal(t(p) %*% v + t(v) %*% p, matrix(0, nrow = 2, ncol = 2))
11 |   p2 <- grassmann_map(v, p)
12 |   valt <- grassmann_log(p, p2)
13 |   expect_equal(qr(cbind(grassmann_map(valt, p),  grassmann_map(v, p)))$rank, 2)
14 |   # v and valt are somehow equivalent, in the sense that going to direction
15 |   # v/valt from p results in the same space.
16 |   # valt is the minimal transition from p to p2
17 |   expect_lt(sum(valt^2), sum(v^2))
18 | 
19 |   # check that grassmann_map and log are consistent
20 |   p3 <- random_grassmann_point(5, 2)
21 |   p4 <- random_grassmann_point(5, 2)
22 | 
23 |   v34 <- grassmann_log(p3, p4)
24 |   expect_equal(t(p3) %*% v34 + t(v34) %*% p3, matrix(0, nrow = 2, ncol = 2))
25 |   expect_equal(qr(grassmann_map(v34, p3), p4)$rank, 2) # They span the same space
26 |   expect_equal(grassmann_log(p3, grassmann_map(v34, p3)), v34)
27 | })
28 | 
29 | test_that("Grassmann angles work", {
30 |   for(idx in 1:10){
31 |     p <- random_grassmann_point(10, 3)
32 |     v <- random_grassmann_tangent(p, sd =  runif(n = 1, 0.01, 2))
33 |     theta <- grassmann_angle_from_tangent(v, normalized = TRUE)
34 |     expect_gt(theta, 0)
35 |     expect_lt(theta, 90)
36 |     q <- grassmann_map(v, p)
37 |     expect_equal(theta, tail(principal_angle(p, q), n = 1))
38 |   }
39 | })
40 | 
41 | 
42 | 
43 | test_that("zero-dimensional arguments work", {
44 |   zero_dim_mat <- matrix(NA_real_, nrow = 0, ncol = 0)
45 |   expect_equal(random_grassmann_point(0, 0), zero_dim_mat)
46 |   expect_equal(random_grassmann_tangent(zero_dim_mat), zero_dim_mat)
47 | 
48 |   expect_equal(grassmann_map(zero_dim_mat, zero_dim_mat), zero_dim_mat)
49 |   expect_equal(grassmann_log(zero_dim_mat, zero_dim_mat), zero_dim_mat)
50 | })
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/tests/testthat/test-geodesic_regression.R:
--------------------------------------------------------------------------------
 1 | test_that("grassmann_geodesic_regression works", {
 2 | 
 3 |   base_point <- qr.Q(qr(randn(5, 2)))
 4 |   coordsystems <- lapply(1:10, \(idx) qr.Q(qr(randn(5, 2))))
 5 |   x <- seq(1, 10)
 6 |   design_matrix <- model.matrix(~ x)
 7 |   fit <- grassmann_geodesic_regression(coordsystems, design_matrix, base_point = base_point)
 8 |   expect_equal(dim(fit), c(5, 2, 2))
 9 |   proj <- grassmann_map(fit[,,1], base_point)
10 |   expect_lt(sum((t(proj) %*% proj - diag(nrow = 2))^2), 1e-8)
11 |   proj <- grassmann_map(fit[,,2], base_point)
12 |   expect_lt(sum((t(proj) %*% proj - diag(nrow = 2))^2), 1e-8)
13 | 
14 | })
15 | 
16 | 
17 | test_that("grassmann_lm works", {
18 |   n_obs <- 100
19 |   data <- randn(5, n_obs)
20 |   col_data <- data.frame(x = sample(letters[1:3], size = n_obs, replace = TRUE))
21 |   des <- model.matrix(~ x, col_data)
22 |   base_point <- qr.Q(qr(randn(5, 2)))
23 |   fit <- grassmann_lm(data, des, base_point)
24 |   plane_a <- pca(data[,col_data$x == "a"], n = 2, center = FALSE)$coordsystem
25 |   plane_b <- pca(data[,col_data$x == "b"], n = 2, center = FALSE)$coordsystem
26 |   plane_c <- pca(data[,col_data$x == "c"], n = 2, center = FALSE)$coordsystem
27 |   expect_equal(principal_angle(grassmann_map(fit[,,"(Intercept)"], base_point), plane_a), c(0,0),
28 |                tolerance = 1e-5)
29 |   expect_equal(principal_angle(grassmann_map(fit[,,"(Intercept)"] + fit[,,"xb"], base_point), plane_b), c(0,0),
30 |                tolerance = 1e-5)
31 |   expect_equal(principal_angle(grassmann_map(fit[,,"(Intercept)"] + fit[,,"xc"], base_point), plane_c), c(0,0),
32 |                tolerance = 1e-5)
33 | 
34 |   expect_equal(fit[,,"(Intercept)"], grassmann_log(base_point, plane_a))
35 |   expect_equal(fit[,,"(Intercept)"] + fit[,,"xb"], grassmann_log(base_point, plane_b))
36 |   # x = f(p, a)
37 |   # x + y = f(p, b)
38 |   # -!-> y = f(b, a)
39 |   #expect_equal(fit[,,"xb"], grassmann_log(plane_b, plane_a))
40 |   # ---> y  = f(p, b) - f(p, a)
41 |   expect_equal(fit[,,"xb"], grassmann_log(base_point, plane_b) - grassmann_log(base_point, plane_a))
42 | })
43 | 
44 | test_that("grassmann_lm throws a helpful error message", {
45 |   n_obs <- 100
46 |   data <- randn(5, n_obs)
47 |   col_data <- data.frame(x = sample(letters[1:3], size = n_obs, replace = TRUE))
48 |   col_data$x[1] <- "new_element"
49 |   des <- model.matrix(~ x, col_data)
50 |   base_point <- qr.Q(qr(randn(5, 2)))
51 |   expect_error({
52 |     fit <- grassmann_lm(data, des, base_point)
53 |   })
54 | })
55 | 
56 | 
57 | test_that("get_groups works and is fast", {
58 |   df <- data.frame(let = sample(letters[1:2], size = 100, replace = TRUE),
59 |                    num = rnorm(100))
60 |   mat <- model.matrix(~ let, data = df)
61 |   vec <- ifelse(df$let == df$let[1], 1, 2)
62 |   attr(vec, "n") <- 2
63 |   expect_equal(get_groups(mat), vec)
64 | 
65 |   mat <- model.matrix(~ let + num, data = df)
66 |   vec <- seq_len(100)
67 |   attr(vec, "n") <- 100
68 |   expect_equal(get_groups(mat), vec)
69 | 
70 |   # This is fast and grows roughly linear
71 |   df <- data.frame(num = rnorm(1e6))
72 |   mat <- model.matrix(~ num, data = df)
73 |   vec <- seq_len(1e6)
74 |   attr(vec, "n") <- 1e6
75 |   expect_equal(get_groups(mat), vec)
76 | })
77 | 
78 | 


--------------------------------------------------------------------------------
/tests/testthat/test-parse_contrasts.R:
--------------------------------------------------------------------------------
  1 | test_that("parse contrasts works", {
  2 |   expect_error(parse_contrast(A - B, formula = ~ A + B))
  3 |   expect_warning(contr <- parse_contrast(cond(A = 3, B = 4), formula = ~ A + B))
  4 |   expect_equal(unname(contr), c(1, 3, 4), ignore_attr = "class")
  5 | })
  6 | 
  7 | 
  8 | test_that("factor based contrast specification works", {
  9 |   set.seed(1)
 10 |   n_obs <- 500
 11 |   col_data <- data.frame(group = sample(LETTERS[1:3], size = n_obs, replace = TRUE),
 12 |                          cont = rnorm(n_obs),
 13 |                          city = sample(c("New York", "Paris", "London"), size = n_obs, replace = TRUE),
 14 |                          y = rnorm(n_obs),
 15 |                          stringsAsFactors = TRUE)
 16 |   Y <- matrix(0, nrow = 10, ncol = n_obs)
 17 |   des <- handle_design_parameter(data = Y, design = ~ group + cont, col_data = col_data)
 18 |   form <- des$design_formula
 19 | 
 20 |   # cond
 21 |   expect_equal(parse_contrast(cond(group = "B"), form),
 22 |                c(1, 1, 0, 0), ignore_attr = c("names", "class"))
 23 |   expect_equal(parse_contrast(cond(group = "A"), form),
 24 |                c(1, 0, 0, 0), ignore_attr = c("names", "class"))
 25 |   expect_equal(parse_contrast(cond(group = "B") - cond(group = "A"), form),
 26 |                .minus(c(1,1,0,0), c(1,0,0,0)), ignore_attr = c("names", "class"))
 27 |   expect_equal(parse_contrast(cond(group = "B") + cond(group = "A"), form),
 28 |                .plus(c(1,1,0,0), c(1,0,0,0)), ignore_attr = c("names", "class"))
 29 |   expect_equal(parse_contrast(cond(group = "B") * 3, form),
 30 |                .multiply(c(1,1,0,0), 3), ignore_attr = c("names", "class"))
 31 |   expect_equal(parse_contrast((cond(group = "B") - cond(group = "C")) / 3, form),
 32 |                .divide(.minus(c(1,1,0,0), c(1,0,1,0)), 3), ignore_attr = c("names", "class"))
 33 | 
 34 | 
 35 |   des <- handle_design_parameter(data = Y, design = ~ group + cont + city:group, col_data = col_data)
 36 |   form <- des$design_formula
 37 |   mm <- des$design_matrix
 38 |   expect_equal(parse_contrast(cond(group = "B", city = "New York"), form),
 39 |                c(1, 1, 0, 0, 0, 1, 0, 0, 0, 0), ignore_attr = c("names", "class"))
 40 |   expect_equal(parse_contrast(cond(group = "B", city = "New York") - cond(group = "B", city = "Paris"), form),
 41 |                .minus(c(1, 1, 0, 0, 0, 1, 0, 0, 0, 0), c(1, 1, 0, 0, 0, 0, 0, 0, 1, 0)), ignore_attr = c("names", "class"))
 42 | 
 43 |   # Contrast relation
 44 |   des <- handle_design_parameter(data = Y, design = ~ group + cont, col_data = col_data)
 45 |   form <- des$design_formula
 46 |   expect_equal(unclass(parse_contrast(cond(group = "B") == cond(group = "A"), form)),
 47 |                list(lhs = c(1, 1, 0, 0), rhs = c(1, 0, 0, 0), relation = "equal"),
 48 |                ignore_attr = c("names", "class"))
 49 | 
 50 |   expect_equal(unclass(parse_contrast(cond(group = "A", cont = 3) <= cond(group = "A", cont = 5), form)),
 51 |                list(lhs = c(1, 0, 0, 3), rhs = c(1, 0, 0, 5), relation = "less_than"),
 52 |                ignore_attr = c("names", "class"))
 53 | 
 54 | })
 55 | 
 56 | 
 57 | test_that("cond() works with custom contrasts", {
 58 |   n_obs <- 50
 59 |   col_data <- data.frame(group = sample(LETTERS[1:3], size = n_obs, replace = TRUE),
 60 |                          cont = rnorm(n_obs),
 61 |                          city = sample(c("New York", "Paris", "London"), size = n_obs, replace = TRUE),
 62 |                          y = rnorm(n_obs),
 63 |                          stringsAsFactors = TRUE)
 64 |   col_data$group <- C(col_data$group, contr.sum)
 65 |   Y <- matrix(0, nrow = 10, ncol = n_obs)
 66 |   des <- handle_design_parameter(data = Y, design = ~ group + cont, col_data = col_data)
 67 |   form <- des$design_formula
 68 |   mm <- des$design_matrix
 69 |   expect_equal(parse_contrast(cond(group = "A"), form), c(1, 1, 0, 0), ignore_attr = c("names", "class"))
 70 |   expect_equal(parse_contrast(cond(group = "B"), form), c(1, 0, 1, 0), ignore_attr = c("names", "class"))
 71 |   expect_equal(parse_contrast(cond(group = "C"), form), c(1, -1, -1, 0), ignore_attr = c("names", "class"))
 72 | })
 73 | 
 74 | test_that("evaluate_contrast_tree works", {
 75 |   n_obs <- 50
 76 |   col_data <- data.frame(group = sample(LETTERS[1:3], size = n_obs, replace = TRUE),
 77 |                          cont = rnorm(n_obs),
 78 |                          city = sample(c("New York", "Paris", "London"), size = n_obs, replace = TRUE),
 79 |                          y = rnorm(n_obs),
 80 |                          stringsAsFactors = TRUE)
 81 |   Y <- matrix(0, nrow = 10, ncol = n_obs)
 82 |   des <- handle_design_parameter(data = Y, design = ~ group + cont, col_data = col_data)
 83 |   form <- des$design_formula
 84 |   al_des <- handle_design_parameter(data = Y, design = ~ group * cont, col_data = col_data)
 85 |   al_form <- al_des$design_formula
 86 | 
 87 |   cntrst <- parse_contrast(cond(group = "A", cont = 5) - cond(group = "B", cont = 5), form)
 88 |   al_cntrst <- parse_contrast(cond(group = "A", cont = 5) - cond(group = "B", cont = 5), al_form)
 89 | 
 90 |   sum <- evaluate_contrast_tree(cntrst, al_cntrst, \(x, y){
 91 |     sum(x) + sum(y)
 92 |   })
 93 |   expect_equal(sum, 6 * 2 - (7 + 12))
 94 | 
 95 |   contrast_spec <- rlang::quo((cond(group = "A", cont = 5) - cond(group = "B", cont = 5)) * 9)
 96 |   sum <- evaluate_contrast_tree(parse_contrast(!!contrast_spec, form),
 97 |                                 parse_contrast(!!contrast_spec, al_form),
 98 |                                 \(x, y) sum(x) + sum(y))
 99 |   expect_equal(sum, (6 * 2) * 9 - (7 + 12) * 9)
100 | 
101 |   contrast_spec <- rlang::quo(cond(group = "A", cont = 5) / cond(group = "B", cont = 5))
102 |   sum <- evaluate_contrast_tree(parse_contrast(!!contrast_spec, form),
103 |                                 parse_contrast(!!contrast_spec, al_form),
104 |                                 \(x, y) sum(x) + sum(y))
105 |   expect_equal(sum, (6 * 2) / (7 + 12))
106 | })
107 | 
108 | 
109 | test_that("parse_contrast works in dynamic contexts", {
110 |   n_obs <- 50
111 |   col_data <- data.frame(group = sample(LETTERS[1:3], size = n_obs, replace = TRUE),
112 |                          cont = rnorm(n_obs),
113 |                          city = sample(c("New York", "Paris", "London"), size = n_obs, replace = TRUE),
114 |                          y = rnorm(n_obs),
115 |                          stringsAsFactors = TRUE)
116 |   Y <- matrix(0, nrow = 10, ncol = n_obs)
117 |   des <- handle_design_parameter(data = Y, design = ~ group + cont, col_data = col_data)
118 |   form <- des$design_formula
119 | 
120 |   res <- parse_contrast(cond(group = "B"), form)
121 |   fun <- function(cov, lvl){
122 |     parse_contrast(cond({{cov}} := lvl), form)
123 |   }
124 |   res2 <- fun("group", "B")
125 |   val <- list("group")
126 |   res3 <- parse_contrast(cond(!!val[[1]] := "B"), form)
127 |   expect_equal(res, res2)
128 |   expect_equal(res, res3)
129 | })
130 | 
131 | test_that("parse_contrast throws appropriate error message", {
132 |   expect_error(parse_contrast(cond(hello = "123"), formula = NULL))
133 |   data <- matrix(1, nrow = 5, ncol  =10)
134 |   expect_message(handle_design_parameter(matrix(1:10, ncol = 1), data, NULL, verbose = TRUE), "The 'design' was not specified with a formula")
135 | })
136 | 
137 | test_that("parse_contrast and handle_design_parameter work", {
138 |   n_obs <- 50
139 |   col_data <- data.frame(group = sample(LETTERS[1:3], size = n_obs, replace = TRUE),
140 |                          cont = rnorm(n_obs),
141 |                          city = sample(c("New York", "Paris", "London"), size = n_obs, replace = TRUE),
142 |                          y = rnorm(n_obs),
143 |                          stringsAsFactors = TRUE)
144 |   Y <- matrix(0, nrow = 10, ncol = n_obs)
145 |   des <- handle_design_parameter(data = Y, design = ~ group + cont, col_data = col_data)
146 |   form <- des$design_formula
147 |   res <- parse_contrast(cond(group = "B"), form)
148 | })
149 | 


--------------------------------------------------------------------------------
/tests/testthat/test-pca.R:
--------------------------------------------------------------------------------
 1 | test_that("pca function works", {
 2 | 
 3 |   Y <- matrix(rnorm(5 * 10, mean = 3), nrow = 5, ncol = 10)
 4 |   decomp <- pca(Y, n = 3)
 5 |   expect_named(decomp, c("coordsystem", "embedding", "offset"))
 6 |   expect_equal(ncol(decomp$coordsystem), 3)
 7 |   expect_equal(nrow(decomp$embedding), 3)
 8 | 
 9 |   centered_Y <- scale(t(Y), center = TRUE, scale = FALSE)
10 |   expect_equal(decomp$offset, attr(centered_Y, "scaled:center"))
11 |   alt_pca <- prcomp(t(Y), rank. = 3)
12 |   expect_equal(decomp$coordsystem, alt_pca$rotation, ignore_attr = "dimnames")
13 |   expect_equal(decomp$embedding, t(alt_pca$x), ignore_attr = "dimnames")
14 |   expect_equal(decomp$offset, alt_pca$center)
15 | })
16 | 


--------------------------------------------------------------------------------
/tests/testthat/test-recursive_least_squares.R:
--------------------------------------------------------------------------------
  1 | test_that("recursive least squares works", {
  2 |   n <- 30
  3 |   y <- rnorm(n = n)
  4 |   X <- matrix(rnorm(n * 3), nrow = n, ncol = 3)
  5 |   res <- recursive_least_squares(y, X)
  6 |   manual_res <- sapply(seq_len(n), \(idx){
  7 |     lm.fit(X[seq_len(idx),,drop=FALSE], y[seq_len(idx)])$coefficients
  8 |   })
  9 |   expect_equal(res[,-c(1,2)], manual_res[,-c(1,2)], ignore_attr = "dimnames")
 10 | 
 11 | 
 12 |   # dat <- data.frame(group = rep(letters[1:3], times = 10),
 13 |   #                   cont = rep(c(0.1, 5, 50, 0.2, 6, 20), times = 5))
 14 | 
 15 |   # mm <- model.matrix(~ group, dat)
 16 |   mm <- duplicate_rows(matrix(rnorm(3 * 3), nrow = 3, ncol = 3), times = 10)
 17 |   y <- rnorm(30)
 18 |   group <- c(rep(1:6, times = 5))
 19 | 
 20 |   # Contrast
 21 |   contr <-  matrix(c(1, 0, -1), nrow = 1)
 22 |   res <- bulked_recursive_least_squares_contrast(y, mm, group, contrast = contr)
 23 |   lm_fit <- lm(tapply(y, group, mean) ~ mm[1:6,] - 1)
 24 | 
 25 |   rdf <- lm_fit$df.residual
 26 |   # Same as summary(lm_fit)$sigma^2
 27 |   sigma_sq <- sum(lm_fit$residuals^2) / rdf
 28 |   covar <- sigma_sq * solve(t(mm[1:6,]) %*% mm[1:6,])
 29 |   t_stat <- contr %*% matrix(coef(lm_fit), ncol = 1)  / sqrt(contr %*% covar %*% t(contr))
 30 |   expect_equal(res$t_stat[30], drop(t_stat), tolerance = 1e-3)
 31 |   expect_equal(res$coef[,30], coef(lm_fit), tolerance = 1e-3, ignore_attr = "names")
 32 | 
 33 |   pval <- pt(t_stat, df = lm_fit$df.residual, lower.tail = TRUE)
 34 |   # expect_equal(min(pval, 1 - pval) * 2,  summary(multcomp::glht(lm_fit, contr))$test$pvalues,
 35 |   #              ignore_attr = "error")
 36 | 
 37 |   manual_res <- sapply(seq_len(n), \(idx){
 38 |     lm.fit(mm[1:min(6, idx),,drop=FALSE], tapply(y[seq_len(idx)], group[seq_len(idx)], mean))$coefficients
 39 |   })
 40 |   expect_equal(res$coef[,-c(1:5)], unname(manual_res[,-c(1:5)]), tolerance = 1e-3)
 41 | 
 42 | 
 43 |   # Comparison with C++ implementation
 44 |   cpp_res <- cum_brls_which_abs_max(y, mm, group, contrast = contr, penalty = 1e-6, min_neighborhood_size = 0)
 45 |   expect_equal(cpp_res$idx, which.max(abs(res$t_stat)))
 46 |   expect_equal(cpp_res$max, res$t_stat[which.max(abs(res$t_stat))])
 47 | })
 48 | 
 49 | 
 50 | test_that("min_neighborhood_size argument of cum_brls_which_abs_max works", {
 51 |   mm <- duplicate_rows(matrix(rnorm(3 * 3), nrow = 3, ncol = 3), times = 10)
 52 |   y <- rnorm(30)
 53 |   group <- c(rep(1:6, times = 5))
 54 |   contr <-  matrix(c(1, 0, -1), nrow = 1)
 55 | 
 56 |   res1 <- cum_brls_which_abs_max(y, mm, group, contrast = contr, penalty = 1e-6, min_neighborhood_size = 28)
 57 |   # res1$idx # either 28, 29, or 30
 58 |   expect_gte(res1$idx, 28)
 59 | 
 60 |   res2 <- cum_brls_which_abs_max(y, mm, group, contrast = contr, penalty = 1e-6, min_neighborhood_size = 50)
 61 |   expect_equal(res2$idx, 30)
 62 | })
 63 | 
 64 | 
 65 | # test_that("bulked_recursive_least_squares_contrast is fast", {
 66 | #
 67 | #   n <- 1e6
 68 | #   n_gr <- 30
 69 | #   n_col <- 5
 70 | #   # contrast <- 1
 71 | #   contrast <- c(0, 1, 0, -1, 0)
 72 | #   y <- rnorm(n = n)
 73 | #   gr <- sample(1:n_gr, size = n, replace = TRUE)
 74 | #   ref <- matrix(rnorm(n_gr * n_col), nrow = 30, ncol = n_col)
 75 | #   mm <- do.call(rbind, lapply(gr, \(g){
 76 | #     ref[g,]
 77 | #   }))
 78 | #   system.time(
 79 | #     res <- bulked_recursive_least_squares_contrast(y, mm, gr, contrast)
 80 | #   )
 81 | #   profvis::profvis(
 82 | #     bulked_recursive_least_squares_contrast_fast(y, mm, gr, contrast, ridge_penalty = 1e-6)
 83 | #   )
 84 | #
 85 | #   bulked_recursive_least_squares_contrast_fast(y, mm, gr, contrast, ridge_penalty = 1e-6)
 86 | #   system.time(
 87 | #     cum_brls_which_abs_max(y, mm, gr, contrast, penalty = 1e-6)
 88 | #   )
 89 | #
 90 | #   bench::mark(
 91 | #     # which.max(abs(bulked_recursive_least_squares_contrast(y, mm, gr, contrast)$t_stat)),
 92 | #     # bulked_recursive_least_squares_contrast_fast(y, mm, gr, contrast)$index,
 93 | #     cum_brls_which_abs_max(y, mm, gr, contrast, penalty = 1e-6)$index,
 94 | #     cum_brls_which_abs_max_faster(y, mm, gr, contrast, penalty = 1e-6)$index,
 95 | #     check = FALSE
 96 | #   )
 97 | #
 98 | # })
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/tests/testthat/test-ridge_penalty.R:
--------------------------------------------------------------------------------
 1 | test_that("ridge_regression works", {
 2 |   Y <- randn(5, 30)
 3 |   X <- randn(30, 2)
 4 | 
 5 |   lm_fit <- t(coef(lm(t(Y) ~ X - 1)))
 6 |   expect_equal(ridge_regression(Y, X, ridge_penalty = 0), lm_fit, ignore_attr = "dimnames")
 7 |   expect_lt(sum(ridge_regression(Y, X, ridge_penalty = 3)^2), sum(lm_fit^2))
 8 | })
 9 | 
10 | test_that("weighted ridge_regression works", {
11 |   Y <- randn(5, 30)
12 |   X <- randn(30, 2)
13 |   weights <- rexp(n = 30, rate = 2)
14 |   weights <- weights / sum(weights) * ncol(X)
15 | 
16 |   lm_fit <- t(coef(lm(t(Y) ~ X - 1, weights = weights)))
17 |   expect_equal(ridge_regression(Y, X, ridge_penalty = 0, weights = weights), lm_fit, ignore_attr = "dimnames")
18 |   expect_lt(sum(ridge_regression(Y, X, ridge_penalty = 3, weights = weights)^2), sum(lm_fit^2))
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-test_de.R:
--------------------------------------------------------------------------------
  1 | 
  2 | test_that("test_de works", {
  3 |   dat <- make_synthetic_data(n_genes = 30, n_cells = 500, n_lat = 3, n_centers = 5)
  4 |   fit <- lemur(dat, design = ~ condition, n_embedding = 3, verbose = FALSE)
  5 |   fit <- align_by_grouping(fit, grouping = fit$colData$cell_type, verbose = FALSE)
  6 |   fit <- fit[,1:10]
  7 | 
  8 |   res <- test_de(fit, cond(condition = "b") - cond(condition = "a"))
  9 |   res2 <- test_de(fit, cond(condition = "b"))
 10 | 
 11 |   expect_equal(dim(res), dim(res2))
 12 | })
 13 | 
 14 | test_that("my implementation of Welford's algorithm works", {
 15 |   x <- rnorm(1000)
 16 |   res <- fold_left(list(mean = 0, msq = 0, iter = 1))(x, \(elem, accum){
 17 |     diff <- elem
 18 |     delta <- diff - accum$mean
 19 |     accum$mean <- accum$mean + delta / accum$iter
 20 |     accum$msq <- accum$msq + delta * (diff - accum$mean)
 21 |     accum$iter <- accum$iter + 1
 22 |     accum
 23 |   })
 24 |   expect_equal(res$mean, mean(x))
 25 |   sd <- sqrt(res$msq / (length(x) - 1))
 26 |   expect_equal(sd, sd(x))
 27 | })
 28 | 
 29 | test_that("test_de works with custom embedding", {
 30 |   dat <- make_synthetic_data(n_genes = 30, n_cells = 500, n_lat = 3, n_centers = 5)
 31 |   fit <- lemur(dat, design = ~ condition, n_embedding = 3, verbose = FALSE)
 32 |   fit <- align_by_grouping(fit, grouping = fit$colData$cell_type, verbose = FALSE)
 33 |   fit <- fit[,1:10]
 34 |   test_point <- matrix(0, nrow = 3, ncol = 1)
 35 |   colnames(test_point) <- "zero"
 36 |   res <- test_de(fit, cond(condition = "b") - cond(condition = "a"),
 37 |                                       embedding = test_point)
 38 |   res2 <- test_de(fit, contrast = cond() - cond(condition = "b"), embedding = test_point)
 39 | 
 40 |   expect_equal(dim(res), c(30, 1))
 41 |   expect_equal(dim(res2), c(30, 1))
 42 | })
 43 | 
 44 | 
 45 | test_that("test_global works", {
 46 |   skip("Something is broken in 'test_global'")
 47 |   dat <- make_synthetic_data(n_genes = 30, n_cells = 500, n_lat = 3, n_centers = 5)
 48 |   fit <- lemur(dat, design = ~ condition, n_embedding = 3, verbose = FALSE)
 49 |   fit <- align_by_grouping(fit, grouping = dat$cell_type, verbose = FALSE)
 50 | 
 51 |   res <- test_global(fit, reduced_design = ~ 1, consider = "linear", variance_est = "analytical", verbose = FALSE)
 52 |   expect_s3_class(res, "data.frame")
 53 | 
 54 |   res3 <- test_global(fit, contrast = cond(condition = "a") - cond(condition = "b"),
 55 |                       variance_est = "resampling", verbose = FALSE)
 56 |   expect_s3_class(res3, "data.frame")
 57 | })
 58 | 
 59 | test_that("the angle between planes is correctly calculated", {
 60 |   n_emb <- 4
 61 |   dat <- make_synthetic_data(n_genes = 30, n_cells = 5000, n_lat = 5, n_centers = 3)
 62 |   fitlm <- lm(t(assay(dat)) ~ dat$condition)
 63 |   assay(dat) <- t(fitlm$residuals)
 64 |   fit <- lemur(dat, design = ~ condition, n_embedding = n_emb, test_fraction = 0, verbose = FALSE)
 65 |   expect_equal(fit$linear_coefficients, matrix(0, nrow = nrow(fit), ncol = 3), ignore_attr = "dimnames")
 66 |   # The angle and delta_diffemb for a left-right contrast are slightly different than the results
 67 |   # for a one-sided contrast. The left-right contrast is slighly more accurate because it calculate
 68 |   # log(map(a, p), map(b, p)) instead of simply a - b
 69 |   res <- test_global(fit, contrast = cond(condition = "a") - cond(condition = "b"),
 70 |                                       variance_est = "none", verbose = FALSE)
 71 |   res2 <- test_global(fit, contrast = cond(condition = "a") - cond(condition = "b"),
 72 |                                      variance_est = "none", verbose = FALSE)
 73 |   plane_a <- pca(assay(dat)[,dat$condition == "a"], n = n_emb)$coordsystem
 74 |   plane_b <- pca(assay(dat)[,dat$condition == "b"], n = n_emb)$coordsystem
 75 |   expect_equal(res2$angle_degrees, tail(principal_angle(plane_a, plane_b), n = 1))
 76 | })
 77 | 
 78 | test_that("test_global's analytical test produces uniform p-values", {
 79 |   skip("Long running test")
 80 | 
 81 |   # Analytical test for linear part
 82 |   res <- do.call(rbind, lapply(1:40, function(idx){
 83 |     print(paste0("Round: ", idx))
 84 |     dat <- make_synthetic_data(n_genes = 30, n_cells = 500, n_lat = 3, n_centers = 5,
 85 |                                treatment_effect = 0.8)
 86 |     dat$rand_cond <- sample(LETTERS[1:3], ncol(dat), replace = TRUE)
 87 |     fit <- lemur(dat, design = ~ rand_cond, n_embedding = 3, verbose = FALSE)
 88 |     test_global(fit, reduced_design = ~ 1, variance_est = "analytical",
 89 |                                 consider = "linear", verbose = FALSE)
 90 |   }))
 91 |   hist(res$pval, breaks = 40)
 92 |   plot(sort(res$pval), ppoints(nrow(res)), asp = 1, log = "xy"); abline(0,1)
 93 | 
 94 |   # Multi-var Z test for both
 95 |   res <- do.call(rbind, lapply(1:30, function(idx){
 96 |     print(paste0("Round: ", idx))
 97 |     dat <- randn(8, 500)
 98 |     rand_cond <- sample(LETTERS[1:2], ncol(dat), replace = TRUE)
 99 |     fit <- lemur(dat, design = ~ rand_cond, n_embedding = 2, verbose = FALSE)
100 |     test_global(fit, contrast = rand_condB, verbose = FALSE)
101 |   }))
102 |   hist(res$pval, breaks = 40)
103 |   plot(sort(res$pval), ppoints(nrow(res)), asp = 1, log = "xy"); abline(0,1)
104 | 
105 |   # Resampling-based test
106 |   res <- do.call(rbind, lapply(1:100, function(idx){
107 |     print(paste0("Round: ", idx))
108 |     dat <- make_synthetic_data(n_genes = 30, n_cells = 500, n_lat = 3, n_centers = 5,
109 |                                treatment_effect = 0.8)
110 |     dat$rand_cond <- sample(LETTERS[1:3], ncol(dat), replace = TRUE)
111 |     dat$num <- round(runif(ncol(dat), min = -0.7, max = 0.7))
112 |     fit <- lemur(dat, design = ~ rand_cond + num, n_embedding = 2, verbose = FALSE)
113 |     test_global(fit, contrast = rand_condB,
114 |                                 variance_est = "resampling", verbose = FALSE)
115 |   }))
116 |   hist(res$pval, breaks = 40)
117 |   plot(sort(res$pval), ppoints(nrow(res)), asp = 1, log = "xy"); abline(0,1)
118 | 
119 |   # Check that multi-var Z test and resampling are consistent
120 |   res <- do.call(rbind, lapply(1:30, function(idx){
121 |     print(paste0("Round: ", idx))
122 |     dat <- make_synthetic_data(n_genes = 30, n_cells = 500, n_lat = 3, n_centers = 5,
123 |                                treatment_effect = 0.8)
124 |     dat$rand_cond <- sample(LETTERS[1:3], ncol(dat), replace = TRUE)
125 |     dat$num <- round(runif(ncol(dat), min = -0.7, max = 0.7))
126 |     fit <- lemur(dat, design = ~ rand_cond + num, n_embedding = 0, verbose = FALSE)
127 |     res1 <- test_global(fit, contrast = rand_condB, variance_est = "resampling", verbose = FALSE)
128 | 
129 |     res3 <- test_global(fit, contrast = rand_condB,
130 |                                         variance_est = "analytical", consider = "linear", verbose = FALSE)
131 |     tmp <- rbind(res1, res3)
132 |     tmp$method <- c("resampling", "analytical")
133 |     tmp
134 |   }))
135 |   res
136 | })
137 | 
138 | 
139 | 


--------------------------------------------------------------------------------
/tests/testthat/test-util.R:
--------------------------------------------------------------------------------
  1 | test_that("mply_dbl works", {
  2 | 
  3 |   mat1 <- mply_dbl(1:4, \(i) rep(i * 2, 7), ncol = 7)
  4 |   expect_equal(dim(mat1), c(4, 7))
  5 | 
  6 |   tmp <- lapply(1:4, \(i)  rep(i * 2, 7))
  7 |   expect_equal(dim(stack_rows(tmp)), c(4, 7))
  8 |   expect_equal(dim(stack_cols(tmp)), c(7, 4))
  9 | 
 10 | })
 11 | 
 12 | test_that("duplicate_cols and duplicate_rows works", {
 13 |   mat <- matrix(1:10, nrow = 5, ncol = 2)
 14 |   expect_equal(duplicate_cols(mat, 2), cbind(mat, mat))
 15 |   expect_equal(duplicate_rows(mat, 2), rbind(mat, mat))
 16 | 
 17 |   expect_equal(duplicate_cols(mat, 0), matrix(nrow = 5, ncol = 0))
 18 |   expect_equal(duplicate_rows(mat, 0), matrix(nrow = 0, ncol = 2))
 19 | })
 20 | 
 21 | 
 22 | test_that("fold_left works", {
 23 |   expect_equal(fold_left(0)(1:10, \(elem, accum) accum + elem), sum(1:10))
 24 |   expect_equal(fold_left(1)(1:10, \(elem, accum) accum * elem), prod(1:10))
 25 |   expect_equal(fold_left(NULL)(2:10, \(elem, accum) if(is.null(accum)) elem * 5 else accum * elem), 5 * prod(2:10))
 26 |   expect_error(fold_left(0)(1:10, \(x, y) accum + elem))
 27 | })
 28 | 
 29 | 
 30 | test_that("resample works", {
 31 | 
 32 |   samp <- resample(3)
 33 |   expect_equal(length(samp), 3)
 34 |   expect_true(all(samp %in% (1:3)))
 35 | 
 36 |   group <- sample(letters[1:3], 100, replace = TRUE)
 37 |   samp <- resample(100, group)
 38 | 
 39 | })
 40 | 
 41 | 
 42 | test_that("which_extreme works", {
 43 |   x <- abs(rnorm(10))
 44 |   expect_equal(which_extreme(x), which.max(x))
 45 |   expect_equal(which_extreme(-x), which.min(-x))
 46 | 
 47 |   ignore <- rep(c(FALSE, TRUE), each = 5)
 48 |   expect_equal(which_extreme(x, ignore), which.max(x[! ignore]))
 49 | })
 50 | 
 51 | 
 52 | # test_that("nullspace works", {
 53 | #   mat <- randn(30, 3)
 54 | #   # n1 <- lmerTest:::nullspace(mat, type = "left")
 55 | #   n2 <- nullspace(mat)
 56 | #   n3 <- MASS::Null(mat)
 57 | #   n4 <- pracma::nullspace(t(mat))
 58 | #   # expect_equal(grassmann_angle_from_points(n1, n2), 0)
 59 | #   expect_equal(grassmann_angle_from_points(n3, n2), 0)
 60 | #   expect_equal(grassmann_angle_from_points(n4, n2), 0)
 61 | # })
 62 | 
 63 | 
 64 | test_that("estimability test works", {
 65 |   n <- 40
 66 |   dat <- data.frame(group = sample(letters[1:3], size = n, replace = TRUE),
 67 |                     cont1 = rnorm(n),
 68 |                     cont2 = rnorm(n))
 69 | 
 70 |   mm <- model.matrix(~ group + cont1 + cont2, data = dat)
 71 | 
 72 |   expect_true(is_contrast_estimable(c(0, 1, 0, 0, 0), mm))
 73 |   expect_false(is_contrast_estimable(c(0, 1, 0, 0, 0),  mm[dat$group != "a",]))
 74 |   expect_true( is_contrast_estimable(c(0, 0, 0, 1, 0),  mm[dat$group != "a",]))
 75 |   expect_true( is_contrast_estimable(c(0, 0, 0, 1, -1), mm[dat$group != "a",]))
 76 |   expect_true( is_contrast_estimable(c(0, 0, 0, 2, -1), mm[dat$group != "a",]))
 77 |   expect_false(is_contrast_estimable(c(1, 0, 0, 2, -1), mm[dat$group != "a",]))
 78 |   expect_true( is_contrast_estimable(c(0, 1, -1, 0, 0), mm[dat$group != "a",]))
 79 |   expect_false(is_contrast_estimable(c(0, 2, -1, 0, 0), mm[dat$group != "a",]))
 80 | })
 81 | 
 82 | 
 83 | test_that("pseudoinverse works", {
 84 |   # Works well for full rank matrices
 85 |   mat <- randn(20, 3)
 86 |   pmat <- pseudoinverse(mat)
 87 |   expect_equal(mat %*% pmat %*% mat, mat)
 88 |   expect_equal(pmat %*% mat %*% pmat, pmat)
 89 |   expect_equal(pmat, solve(t(mat) %*% mat) %*% t(mat))
 90 | 
 91 | 
 92 |   # Works well for non-full rank matrices
 93 |   mat <- randn(20, 2)
 94 |   mat <- cbind(mat, mat[,2])
 95 |   pmat <- pseudoinverse(mat)
 96 |   expect_equal(mat %*% pmat %*% mat, mat)
 97 |   expect_equal(pmat %*% mat %*% pmat, pmat)
 98 |   # expect_equal(pmat, solve(t(mat) %*% mat) %*% t(mat))
 99 | 
100 | })
101 | 
102 | 
103 | test_that("update_values works", {
104 |   df1 <- S4Vectors::DataFrame(x = 1:5, y = letters[1:5])
105 |   df2 <- data.frame(a = 10^(0:4), b = "red", x = letters[10:14])
106 | 
107 |   expect_equal(NULL %update_values% NULL, NULL)
108 |   expect_equal(df1 %update_values% NULL, df1)
109 |   expect_equal(NULL %update_values% df2, df2)
110 |   expect_equal(df1 %update_values% df2, S4Vectors::DataFrame(x = df2$x, y = df1$y, a = df2$a, b = df2$b))
111 | })
112 | 
113 | test_that("default_to works", {
114 |   df1 <- S4Vectors::DataFrame(x = 1:5, y = letters[1:5])
115 |   df2 <- data.frame(a = 10^(0:4), b = "red", x = letters[10:14])
116 | 
117 |   expect_equal(NULL %default_to% NULL, NULL)
118 |   expect_equal(df1 %default_to% NULL, df1)
119 |   expect_equal(NULL %default_to% df2, df2)
120 |   expect_equal(df1 %default_to% df2, S4Vectors::DataFrame(x = df1$x, y = df1$y, a = df2$a, b = df2$b))
121 | })
122 | 
123 | 
124 | test_that("aggregate_matrix works", {
125 |   mat <- matrix(rnorm(10 * 5), nrow = 10, ncol = 5)
126 |   res <- aggregate_matrix(mat, group_split = list(c(1,3), c(2,4,5)),
127 |                    aggr_fnc = MatrixGenerics::rowSums2)
128 |   expect_equal(res, cbind(rowSums(mat[,c(1,3)]), rowSums(mat[,c(2,4,5)])))
129 | 
130 |   res <- aggregate_matrix(as(mat, "dgCMatrix"), group_split = list(c(1,3), c(2,4,5)),
131 |                           aggr_fnc = MatrixGenerics::rowSums2)
132 |   expect_equal(res, cbind(rowSums(mat[,c(1,3)]), rowSums(mat[,c(2,4,5)])))
133 | })
134 | 
135 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/man/figures/equation_schematic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/lemur/a9dda42650c03fa8ed2ed4adf6d0f4fa76e2e249/vignettes/man/figures/equation_schematic.png


--------------------------------------------------------------------------------