├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── R ├── assign_neighbourhoods.R ├── calc_AUC_per_neighbourhood.R ├── convert_de_stat.R ├── de_test_neighbourhoods.R ├── estimate_neighbourhood_sizes.R ├── filter_neighbourhoods.R ├── plotting_functions.R ├── rank_neighbourhoods_by_DE_magnitude.R ├── sce_mouseEmbryo.R ├── spatial_pval_adjustment.R └── utils.R ├── README.md ├── data-raw └── sce_mouseEmbryo.R ├── data └── sce_mouseEmbryo.rda ├── man ├── assign_neighbourhoods.Rd ├── calc_AUC_per_neighbourhood.Rd ├── convert_de_stat.Rd ├── de_test_neighbourhoods.Rd ├── de_test_single_neighbourhood.Rd ├── estimate_neighbourhood_sizes.Rd ├── filter_neighbourhoods.Rd ├── plot_DE_gene_set.Rd ├── plot_DE_single_gene.Rd ├── plot_beeswarm_gene_set.Rd ├── plot_beeswarm_single_gene.Rd ├── plot_milo_by_single_metric.Rd ├── rank_neighbourhoods_by_DE_magnitude.Rd ├── sce_mouseEmbryo.Rd └── spatial_pval_adjustment.Rd ├── miloDE.Rproj ├── miloDE_cartoon.png ├── tests ├── testthat.R └── testthat │ ├── test-assign_neighbourhoods.R │ ├── test-calc_AUC_per_neighbourhood.R │ ├── test-convert_de_stat.R │ ├── test-de_test_neighbourhoods.R │ ├── test-estimate_neighbourhood_sizes.R │ ├── test-filter_neighbourhoods.R │ ├── test-rank_neighbourhoods_by_DE_magnitude.R │ └── test-spatial_pval_adjustment.R └── vignettes ├── .gitignore └── miloDE__mouse_embryo.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^miloDE\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^data-raw$ 4 | ^LICENSE\.md$ 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # User-specific files 9 | .Ruserdata 10 | 11 | # Example code in package build process 12 | *-Ex.R 13 | 14 | # Output files from R CMD build 15 | /*.tar.gz 16 | 17 | # Output files from R CMD check 18 | /*.Rcheck/ 19 | 20 | # RStudio files 21 | .Rproj.user/ 22 | 23 | # produced vignettes 24 | vignettes/*.html 25 | vignettes/*.pdf 26 | 27 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 28 | .httr-oauth 29 | 30 | # knitr and R markdown default cache directories 31 | *_cache/ 32 | /cache/ 33 | 34 | # Temporary files created by R markdown 35 | *.utf8.md 36 | *.knit.md 37 | 38 | # R Environment Variables 39 | .Renviron 40 | .Rproj.user 41 | inst/doc 42 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: miloDE 2 | Title: Sensitive DE testing using neighbourhoods instead of discrete clusters 3 | Version: 0.0.0.9000 4 | Authors@R: 5 | person("Alsu", "Missarova", , "alsu@ebi.ac.uk", role = c("aut", "cre"), 6 | comment = c(ORCID = "YOUR-ORCID-ID")) 7 | Description: Sensitive DE testing using neighbourhoods instead of discrete clusters. 8 | License: MIT + file LICENSE 9 | Encoding: UTF-8 10 | Roxygen: list(markdown = TRUE) 11 | RoxygenNote: 7.3.0 12 | Imports: 13 | dplyr, 14 | ggplot2, 15 | ggpubr, 16 | Matrix, 17 | scuttle, 18 | SingleCellExperiment, 19 | SummarizedExperiment, 20 | stats, 21 | tibble, 22 | RcppGreedySetCover, 23 | igraph, 24 | miloR, 25 | edgeR, 26 | Augur, 27 | S4Vectors, 28 | reshape2, 29 | ggraph, 30 | grDevices, 31 | BiocParallel, 32 | RColorBrewer, 33 | methods, 34 | ggbeeswarm, 35 | limma 36 | Remotes: 37 | github::neurorestore/Augur, 38 | github::Bioconductor/MatrixGenerics, 39 | github::const-ae/sparseMatrixStats 40 | Depends: 41 | R (>= 2.10) 42 | Suggests: 43 | rmarkdown, 44 | knitr, 45 | testthat (>= 3.0.0), 46 | scran, 47 | Seurat, 48 | scWGCNA, 49 | uwot, 50 | MouseGastrulationData, 51 | viridis 52 | VignetteBuilder: knitr 53 | LazyData: true 54 | Config/testthat/edition: 3 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2023 2 | COPYRIGHT HOLDER: miloDE authors 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2023 miloDE authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(assign_neighbourhoods) 4 | export(calc_AUC_per_neighbourhood) 5 | export(convert_de_stat) 6 | export(de_test_neighbourhoods) 7 | export(de_test_single_neighbourhood) 8 | export(estimate_neighbourhood_sizes) 9 | export(filter_neighbourhoods) 10 | export(plot_DE_gene_set) 11 | export(plot_DE_single_gene) 12 | export(plot_beeswarm_gene_set) 13 | export(plot_beeswarm_single_gene) 14 | export(plot_milo_by_single_metric) 15 | export(rank_neighbourhoods_by_DE_magnitude) 16 | export(spatial_pval_adjustment) 17 | import(Augur) 18 | import(Matrix) 19 | import(ggplot2) 20 | importFrom(BiocParallel,bplapply) 21 | importFrom(RColorBrewer,brewer.pal) 22 | importFrom(RcppGreedySetCover,greedySetCover) 23 | importFrom(S4Vectors,DataFrame) 24 | importFrom(S4Vectors,isEmpty) 25 | importFrom(SingleCellExperiment,SingleCellExperiment) 26 | importFrom(SingleCellExperiment,counts) 27 | importFrom(SingleCellExperiment,logcounts) 28 | importFrom(SingleCellExperiment,reducedDim) 29 | importFrom(SingleCellExperiment,reducedDimNames) 30 | importFrom(SingleCellExperiment,reducedDims) 31 | importFrom(SummarizedExperiment,"assay<-") 32 | importFrom(SummarizedExperiment,"assayNames<-") 33 | importFrom(SummarizedExperiment,"assays<-") 34 | importFrom(SummarizedExperiment,"colData<-") 35 | importFrom(SummarizedExperiment,assay) 36 | importFrom(SummarizedExperiment,assayNames) 37 | importFrom(SummarizedExperiment,assays) 38 | importFrom(SummarizedExperiment,colData) 39 | importFrom(dplyr,"%>%") 40 | importFrom(dplyr,arrange) 41 | importFrom(dplyr,distinct) 42 | importFrom(dplyr,mutate) 43 | importFrom(edgeR,DGEList) 44 | importFrom(edgeR,calcNormFactors) 45 | importFrom(edgeR,estimateDisp) 46 | importFrom(edgeR,filterByExpr) 47 | importFrom(edgeR,glmQLFTest) 48 | importFrom(edgeR,glmQLFit) 49 | importFrom(edgeR,topTags) 50 | importFrom(ggbeeswarm,geom_quasirandom) 51 | importFrom(ggpubr,ggarrange) 52 | importFrom(ggraph,geom_edge_link) 53 | importFrom(ggraph,geom_edge_link0) 54 | importFrom(ggraph,geom_node_point) 55 | importFrom(ggraph,ggraph) 56 | importFrom(ggraph,scale_edge_width) 57 | importFrom(grDevices,colorRampPalette) 58 | importFrom(igraph,"E<-") 59 | importFrom(igraph,"V<-") 60 | importFrom(igraph,"vertex_attr<-") 61 | importFrom(igraph,E) 62 | importFrom(igraph,V) 63 | importFrom(igraph,connect) 64 | importFrom(igraph,count_triangles) 65 | importFrom(igraph,delete.edges) 66 | importFrom(igraph,induced_subgraph) 67 | importFrom(igraph,is_igraph) 68 | importFrom(igraph,neighborhood) 69 | importFrom(igraph,permute) 70 | importFrom(igraph,set_vertex_attr) 71 | importFrom(igraph,simplify) 72 | importFrom(igraph,vertex_attr) 73 | importFrom(limma,makeContrasts) 74 | importFrom(methods,as) 75 | importFrom(methods,is) 76 | importFrom(miloR,"graph<-") 77 | importFrom(miloR,"nhoodGraph<-") 78 | importFrom(miloR,"nhoodIndex<-") 79 | importFrom(miloR,"nhoods<-") 80 | importFrom(miloR,Milo) 81 | importFrom(miloR,buildGraph) 82 | importFrom(miloR,buildNhoodGraph) 83 | importFrom(miloR,graph) 84 | importFrom(miloR,nhoodGraph) 85 | importFrom(miloR,nhoodIndex) 86 | importFrom(miloR,nhoods) 87 | importFrom(reshape2,dcast) 88 | importFrom(reshape2,melt) 89 | importFrom(scuttle,summarizeAssayByGroup) 90 | importFrom(stats,as.formula) 91 | importFrom(stats,model.matrix) 92 | importFrom(stats,p.adjust) 93 | importFrom(stats,quantile) 94 | importFrom(stats,sd) 95 | importFrom(tibble,rownames_to_column) 96 | -------------------------------------------------------------------------------- /R/assign_neighbourhoods.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' assign_neighbourhoods 4 | #' 5 | #' Assign neighbourhoods to single-cell RNA-seq data (in \code{SingleCellExperiment} format) 6 | #' @param x A \code{\linkS4class{SingleCellExperiment}} object. 7 | #' @param reducedDim_name Defines the assay in \code{reducedDim(x)} to use as the embedding for graph construction. 8 | #' @param k Positive integer, defines how many neighbours to use for the neighbourhood assignment. Default \code{k = 25}. 9 | #' @param prop Numerical, between 0 and 1, defines which fraction of cells to use as neighbourhood centres. Default \code{prop = 0.2}. 10 | #' @param order In \code{c(1,2)}, defines which order of neighbours to use. Default \code{order = 2}. 11 | #' @param filtering In \code{c(TRUE,FALSE)}, defines whether to refine the assignment. Default \code{filtering = TRUE}. 12 | #' @param k_init Positive integer, defines how many neighbours to use for identifying anchor cells (for this step we use 1st-order kNN). Default \code{k_init = 50}. 13 | #' @param d Positive integer, defines how many dimensions from \code{reducedDim(x)} to use. Default \code{d = 30}. 14 | #' @param verbose Boolean specifying whether to print intermediate output messages. Default \code{verbose = TRUE}. 15 | #' @details 16 | #' This function assigns neighbourhoods to single-cell data. This includes assigning graph representation, selecting \sQuote{index} cells and, finally, for each index cell, assigning it along with its neighbourhoors to one neighbourhood. 17 | #' 18 | #' Specifically, algorithm goes as follows: 19 | #' 1. Assigning \sQuote{loose} graph (i.e. ~low k, 1st-order kNN) to select index cells for the selected \code{prop} (greatly reduces computational time to look for \sQuote{index} cells in a loose graph). 20 | #' 2. Reassigning graph following entered by the user \code{order} and \code{k}. 21 | #' 3. Assigning neighbourhoods. 22 | #' 4. (Optional but recommended) Refining the neighbourhood assignment (check \code{\link{filter_neighbourhoods}}). 23 | #' 24 | #' @return \code{\linkS4class{Milo}} object containing cell-neighbourhood matrix in \code{nhoods(out)} slot 25 | #' @export 26 | #' @importFrom miloR Milo buildGraph graph<- graph nhoods<- nhoodIndex<- buildNhoodGraph 27 | #' @importFrom SingleCellExperiment reducedDim 28 | #' @importFrom SummarizedExperiment assay 29 | #' @import Matrix 30 | #' @importFrom igraph connect V neighborhood 31 | #' @importFrom methods as 32 | #' @examples 33 | #' require(SingleCellExperiment) 34 | #' n_row = 500 35 | #' n_col = 100 36 | #' n_latent = 5 37 | #' sce = SingleCellExperiment(assays = 38 | #' list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 39 | #' rownames(sce) = as.factor(1:n_row) 40 | #' colnames(sce) = c(1:n_col) 41 | #' sce$cell = colnames(sce) 42 | #' reducedDim(sce , "reduced_dim") = 43 | #' matrix(rnorm(n_col*n_latent), ncol=n_latent) 44 | #' out = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim") 45 | assign_neighbourhoods = function(x , reducedDim_name , k = 25, prop = 0.2, order = 2, filtering = TRUE, k_init = 50, d = 30, verbose = TRUE){ 46 | 47 | #args = c(as.list(environment())) 48 | #out = .general_check_arguments(args) & .check_reducedDim_in_sce(sce , reducedDim_name) 49 | out = .check_argument_correct(x, .check_sce, "Check x - something is wrong (gene names unique?)") & 50 | .check_argument_correct(k, .check_positive_integer, "Check k - should be positive integer") & 51 | .check_argument_correct(prop, .check_prop, "Check prop - should be positive number between 0 and 1") & 52 | .check_argument_correct(order, function(x) .check_arg_within_options(x, c(1,2)), 53 | "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)") & 54 | .check_argument_correct(filtering, .check_boolean, "Check filtering - should be either TRUE or FALSE") & 55 | .check_argument_correct(reducedDim_name, is.character, "Check reducedDim_name - should be character vector") & 56 | .check_argument_correct(k_init, .check_positive_integer, "Check k_init - should be positive integer") & 57 | .check_argument_correct(d, .check_positive_integer, "Check d - should be positive integer") & 58 | .check_argument_correct(verbose, .check_boolean, "Check verbose - should be either TRUE or FALSE") & 59 | .check_reducedDim_in_sce(x , reducedDim_name) 60 | 61 | 62 | d <- min(d , ncol(reducedDim(x , reducedDim_name))) 63 | k_init <- min(k , k_init) 64 | 65 | if (is.null(colnames(x))){ 66 | colnames(x) = as.character(c(1:ncol(x))) 67 | } 68 | if (is(x , "SingleCellExperiment") && !is(x , "Milo")){ 69 | x = Milo(x) 70 | # build 1st order to sample vertices 71 | k_init <- min(k , k_init) 72 | x <- suppressMessages(buildGraph(x, k = k_init, d = d, reduced.dim = reducedDim_name)) 73 | } 74 | else { 75 | message("SCE is Milo object. Checking if graph is already constructed.") 76 | if (length(miloR::graph(x)) == 0){ 77 | message("Graph is not constructed yet. Building now.") 78 | x <- suppressMessages(buildGraph(x, k = k_init, d = d, reduced.dim = reducedDim_name)) 79 | } 80 | } 81 | # find anchor cells 82 | sampled_vertices <- .get_graph_refined_sampling(graph(x), prop) 83 | 84 | # rebuild to the actual graph, with parameters specified by user 85 | if (!k == k_init){ 86 | x <- suppressMessages(buildGraph(x, k = k, d = d, reduced.dim = reducedDim_name)) 87 | } 88 | # if order == 2 -- reassign edges 89 | if (order == 2){ 90 | graph(x) = connect(graph(x),order) 91 | } 92 | 93 | # create nhoods 94 | nh_mat <- Matrix(data = 0, nrow=ncol(x), ncol=length(sampled_vertices), sparse = TRUE) 95 | v.class <- V(graph(x))$name 96 | rownames(nh_mat) <- colnames(x) 97 | for (X in seq_len(length(sampled_vertices))){ 98 | nh_mat[unlist(neighborhood(graph(x), order = 1, nodes = sampled_vertices[X])), X] <- 1 99 | } 100 | colnames(nh_mat) <- as.character(sampled_vertices) 101 | nhoodIndex(x) <- as(sampled_vertices, "list") 102 | nhoods(x) <- nh_mat 103 | 104 | # filter 105 | if (!filtering){ 106 | x = suppressMessages(buildNhoodGraph(x)) 107 | } 108 | else { 109 | if (verbose){ 110 | message("Filtering redundant neighbourhoods.") 111 | } 112 | x = suppressMessages(filter_neighbourhoods(x)) 113 | } 114 | 115 | if (verbose){ 116 | stat_print =.calc_quick_stat(x , nhoods(x)) 117 | message(paste0("Finished successfully.\nNumber of neighbourhoods assigned: ", stat_print$n_hoods , 118 | ";\naverage neighbourhood size: ", stat_print$avg_hood_size , 119 | ";\nnumber of unassigned cells: ", stat_print$n_cells_unocovered)) 120 | } 121 | return(x) 122 | } 123 | 124 | 125 | #' 126 | #' 127 | #' @importFrom igraph V set_vertex_attr induced_subgraph count_triangles neighborhood 128 | #' @importFrom miloR graph nhoodIndex nhoods<- 129 | #' @importFrom dplyr %>% 130 | .get_graph_refined_sampling <- function(graph, prop){ 131 | random_vertices <- sample(V(graph), size=floor(prop*length(V(graph)))) 132 | random_vertices <- as.vector(random_vertices) 133 | X_graph <- set_vertex_attr(graph, "name", value = 1:length(V(graph))) 134 | refined_vertices <- lapply(seq_along(random_vertices), function(i){ 135 | target_vertices <- unlist(neighborhood(X_graph, order = 1, nodes = random_vertices[i])) #get neighborhood of random vertex 136 | target_vertices <- target_vertices[-1] #remove first entry which is the random vertex itself 137 | rv_induced_subgraph <- induced_subgraph(graph = X_graph, vids = target_vertices) 138 | triangles <- count_triangles(rv_induced_subgraph) 139 | max_triangles <- max(triangles) 140 | max_triangles_indices <- which(triangles == max_triangles) 141 | #note - take first max_ego_index in the next line of code 142 | resulting_vertices <- V(rv_induced_subgraph)[max_triangles_indices]$name[1] 143 | return(resulting_vertices) 144 | }) %>% unlist() %>% as.integer() 145 | refined_vertices = unique(refined_vertices) 146 | return(refined_vertices) 147 | } 148 | 149 | #' 150 | .calc_quick_stat = function(x , nhoods_sce){ 151 | n_hoods = ncol(nhoods_sce) 152 | avg_hood_size = round(mean(colSums(nhoods_sce))) 153 | n_cells_unocovered = ncol(x) - sum(rowSums(nhoods_sce) > 0) 154 | out = list(n_hoods = n_hoods , 155 | avg_hood_size = avg_hood_size , 156 | n_cells_unocovered = n_cells_unocovered) 157 | return(out) 158 | } 159 | -------------------------------------------------------------------------------- /R/calc_AUC_per_neighbourhood.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' calc_AUC_per_neighbourhood 4 | #' 5 | #' Returns per neighbourhood AUC from Augur based (RF) classifiers 6 | #' 7 | #' @param x A \code{\linkS4class{Milo}} object. 8 | #' @param genes Character vector specifying genes to be passed for the testing. Default \code{genes = rownames(x)}. 9 | #' @param sample_id Character specifying which variable should be used as a replicate ID. 10 | #' Should be in \code{colnames(colData(x))}. Default \code{sample_id = "sample"}. 11 | #' @param condition_id Character specifying which variable should be used as a condition ID. 12 | #' Should be in \code{colnames(colData(x))}. 13 | #' @param conditions In case of multiple comparable groups, character vector specifying which conditions should be tested for separation. 14 | #' Default \code{conditions = NULL} and assumes that only 2 different conditions are present. 15 | #' @param min_n_cells_per_sample Positive integer specifying the minimum number of cells per replicate to be included in testing. 16 | #' Default \code{min_n_cells_per_sample = 3}. 17 | #' @param n_threads Positive integer specifying the number of cores to be used to calculate AUC. 18 | #' Higher number results in faster calculation, but its feasibility depends on the specs of your machine. 19 | #' Only relevant if \code{BPPARAM = NULL}. Default \code{n_threads = 2}. 20 | #' @param BPPARAM NULL or \code{\link{MulticoreParam}} object. Default \code{BPPARAM = NULL} assuming no parallelisation. 21 | #' @details 22 | #' This function calculates for each neighbourhood whether cells between 2 conditions can be separated 23 | #' with Random Forest based classifiers (adapted from \code{\link[Augur]{calculate_auc}}). 24 | #' Accordingly, AUCs of the classifiers represent how well we can separate 2 conditions. 25 | #' 26 | #' We suggest that neighbourhoods with AUC > 0.5 suggest a certain degree of separation between 2 conditions that can further be examined 27 | #' with DE testing (and, accordingly, neighbourhoods with AUC <= 0.5 can be safely discarded). You also can set your own AUC threshold if desired as well as use AUCs to rank neighbourhoods. 28 | #' 29 | #' \emph{Note that this function is only relevant for \dQuote{simple} models (e.g. not nested or no interactions.) 30 | #' Also, it is hard-coded that for all neighbourhoods, in which total number of cells is less than 4 in at least one condition, 31 | #' AUC will be set to NaN (classifiers for such low numbers will not be built)}. 32 | #' 33 | #' @return \code{data.frame} object, with AUC calculated for each neighbourhood 34 | #' @export 35 | #' @importFrom SummarizedExperiment colData assayNames 36 | #' @importFrom miloR nhoods 37 | #' @importFrom BiocParallel bplapply 38 | #' 39 | #' @examples 40 | #' require(SingleCellExperiment) 41 | #' n_row = 500 42 | #' n_col = 100 43 | #' n_latent = 5 44 | #' sce = SingleCellExperiment(assays = list(counts = 45 | #' floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 46 | #' logcounts(sce) = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4 47 | #' rownames(sce) = as.factor(1:n_row) 48 | #' colnames(sce) = c(1:n_col) 49 | #' sce$cell = colnames(sce) 50 | #' sce$sample = floor(runif(n = n_col , min = 1 , max = 5)) 51 | #' sce$type = ifelse(sce$sample %in% c(1,2) , "ref" , "query") 52 | #' reducedDim(sce , "reduced_dim") = matrix(rnorm(n_col*n_latent), 53 | #' ncol=n_latent) 54 | #' sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim") 55 | #' sce = calc_AUC_per_neighbourhood(sce, condition_id = "type") 56 | calc_AUC_per_neighbourhood <- function(x , genes = rownames(x) , sample_id = "sample" , 57 | condition_id , conditions = NULL, 58 | min_n_cells_per_sample = 3, n_threads = 2 , BPPARAM = NULL){ 59 | 60 | out = .check_argument_correct(x, .check_sce, "Check x - something is wrong (are gene names unique?)") & 61 | .check_sce_milo(x) & 62 | .check_argument_correct(sample_id, is.character, "Check sample_id - should be character vector") & 63 | .check_argument_correct(condition_id, is.character, "Check condition_id - should be character vector") & 64 | .check_argument_correct(min_n_cells_per_sample, .check_positive_integer, "Check min_n_cells_per_sample - should be positive integer") & 65 | .check_argument_correct(n_threads , .check_positive_integer , "Check n_threads - should be positive integer") & 66 | .check_var_in_coldata_sce(x , sample_id , "sample_id") & .check_condition_in_coldata_sce(x , condition_id) & 67 | .check_sample_and_condition_id_valid(x , condition_id , sample_id) & 68 | .check_genes_in_sce(x , genes) 69 | 70 | if (!"logcounts" %in% assayNames(x)){ 71 | stop("Please calculate log-normalised counts first if you want to calculate AUC per neighbourhood.") 72 | } 73 | 74 | x = x[genes, ] 75 | 76 | if (is.null(colnames(x))){ 77 | colnames(x) = as.character(c(1:ncol(x))) 78 | } 79 | # assign condition and sample ids 80 | coldata <- as.data.frame(colData(x)) 81 | x$milo_sample_id <- as.factor( coldata[, sample_id] ) 82 | x$milo_condition_id <- as.factor( coldata[, condition_id] ) 83 | 84 | if (is.null(conditions)){ 85 | tab = table(x$milo_condition_id) 86 | if (!length(tab) == 2){ 87 | stop("If conditions == NULL, there should be exactly two levels for tested conditions.") 88 | } 89 | } else { 90 | if (mean(conditions %in% unique(x$milo_condition_id)) < 1){ 91 | stop("All specified conditions should be present.") 92 | } 93 | if (!length(conditions) == 2){ 94 | stop("Conditions should be a vector of 2 elements.") 95 | } 96 | x = x[, x$milo_condition_id %in% conditions] 97 | } 98 | 99 | nhoods_sce = nhoods(x) 100 | # filter out for relevant cells 101 | current_cols = colnames(nhoods_sce) 102 | nhoods_sce = as.matrix( nhoods_sce[rownames(nhoods_sce) %in% colnames(x), ] ) 103 | colnames(nhoods_sce) = current_cols 104 | # delete neighbourhoods that contain 0 cells (possible due to the upstream filtering) 105 | idx = which(colSums(nhoods_sce) > 0) 106 | current_cols = colnames(nhoods_sce)[idx] 107 | nhoods_sce = as.matrix( nhoods_sce[, idx] ) 108 | colnames(nhoods_sce) = current_cols 109 | 110 | if (is.null(BPPARAM)){ 111 | auc_stat = lapply(colnames(nhoods_sce) , function(hood_id){ 112 | out = .get_auc_single_hood(x , nhoods_sce , hood_id , min_cells = 4 , min_n_cells_per_sample = min_n_cells_per_sample , n_threads = n_threads) 113 | return(out) 114 | }) 115 | } 116 | else { 117 | auc_stat = bplapply(colnames(nhoods_sce) , function(hood_id){ 118 | out = .get_auc_single_hood(x , nhoods_sce , hood_id , min_cells = 4 , min_n_cells_per_sample = min_n_cells_per_sample , n_threads = 1) 119 | return(out) 120 | } , BPPARAM = BPPARAM) 121 | } 122 | auc_stat = do.call(rbind , auc_stat) 123 | # add Nhood 124 | meta_nhoods = data.frame(Nhood = 1:ncol(nhoods_sce) , Nhood_center = colnames(nhoods_sce)) 125 | auc_stat = merge(auc_stat , meta_nhoods , all.x = TRUE , all.y = FALSE) 126 | auc_stat = auc_stat[, c("Nhood", "Nhood_center" , "auc" , "auc_calculated")] 127 | auc_stat = auc_stat[order(auc_stat$Nhood) , ] 128 | return(auc_stat) 129 | } 130 | 131 | 132 | 133 | #' @importFrom SingleCellExperiment logcounts 134 | #' @importFrom SummarizedExperiment colData 135 | #' @import Augur 136 | .get_auc_single_hood = function(x , nhoods_sce , hood_id , min_cells = 4 , min_n_cells_per_sample = 3 , n_threads = 2){ 137 | out = .check_argument_correct(min_cells, .check_positive_integer, "Check min_cells - should be positive integer") 138 | # select cells 139 | current.cells = which(nhoods_sce[,hood_id] == 1) 140 | current.cells = rownames(nhoods_sce)[current.cells] 141 | current.sce = x[,colnames(x) %in% current.cells] 142 | current.sce = .filter_samples_with_low_n_cells_in_hood(current.sce , min_n_cells_per_sample = min_n_cells_per_sample) 143 | 144 | if (ncol(current.sce) > 0){ 145 | current.sce$celltype.dummy = "dummy" 146 | meta = as.data.frame(colData(current.sce)) 147 | tab = table(as.character(current.sce$milo_condition_id)) 148 | if (length(tab) == 2 & tab[1] >= min_cells & tab[2] >= min_cells){ 149 | auc = calculate_auc(logcounts(current.sce), meta, cell_type_col = "celltype.dummy", 150 | label_col = "milo_condition_id" , n_subsamples = 0 , 151 | subsample_size = min_cells , min_cells = min_cells , 152 | feature_perc = 1 , n_threads = n_threads , show_progress = FALSE) 153 | out = as.data.frame(auc$AUC) 154 | out$auc_calculated = TRUE 155 | 156 | } else { 157 | out = data.frame(cell_type = "dummy" , auc = NaN , auc_calculated = FALSE) 158 | } 159 | } else { 160 | out = data.frame(cell_type = "dummy" , auc = NaN , auc_calculated = FALSE) 161 | } 162 | out$Nhood_center = hood_id 163 | return(out) 164 | } 165 | 166 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /R/convert_de_stat.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' convert_de_stat 4 | #' 5 | #' Converts output of miloDE between \code{\link[base]{data.frame}} and \code{\linkS4class{SingleCellExperiment}} formats 6 | #' @param de_stat miloDE results, output of \code{\link{de_test_neighbourhoods}}; either in \code{data.frame} or \code{SingleCellExperiment} format. 7 | #' @param assay_names Character string specifying which fields should be treated as assays. 8 | #' Note that \code{logFC}, \code{pval}, \code{pval_corrected_across_genes} and \code{pval_corrected_across_nhoods} are hard-coded to be included in assays. 9 | #' @param coldata_names Character string specifying which fields should be treated as neighbourhood metadata. 10 | #' Note that \code{Nhood}, \code{Nhood_center}, \code{test_performed} are hard-coded to be included in coldata. 11 | #' 12 | #' \emph{Please note that \code{coldata_names} have to be the attributes of neighbourhoods (i.e. same values across different genes for the same neighbourhood).} 13 | #' @details 14 | #' This function converts results of \code{\link{de_test_neighbourhoods}} between \code{data.frame} object and \code{SingleCellExperiment}. 15 | #' 16 | #' \code{data.frame} object is more commonly used and might be easier to navigate, however, if total number of tests (i.e. gene x neighboourhoods) 17 | #' is overwhelmingly large, \code{SingleCellExperiment} might be more suitable and faster to work with. 18 | #' @return A \code{SingleCellExperiment} object or \code{data.frame} object, containing miloDE results 19 | #' @export 20 | #' @examples 21 | #' de_stat = expand.grid(gene = paste0("gene" , c(1:5)) , Nhood = c(1:10)) 22 | #' de_stat$Nhood_center = paste0("nhood_" , de_stat$Nhood) 23 | #' de_stat$logFC = sample(seq(-2,2,1) , nrow(de_stat) , 1) 24 | #' de_stat$pval = sample(c(0,1),nrow(de_stat),1) 25 | #' de_stat$pval_corrected_across_genes = sample(c(0,1),nrow(de_stat),1) 26 | #' de_stat$pval_corrected_across_nhoods = sample(c(0,1),nrow(de_stat),1) 27 | #' de_stat$test_performed = TRUE 28 | #' de_stat = convert_de_stat(de_stat) 29 | #' de_stat = convert_de_stat(de_stat) 30 | #' 31 | convert_de_stat = function(de_stat , 32 | assay_names = NULL, 33 | coldata_names = NULL){ 34 | 35 | assay_names = unique( c("logFC" , "pval" , "pval_corrected_across_genes" , "pval_corrected_across_nhoods", assay_names)) 36 | coldata_names = unique( c("Nhood" , "Nhood_center" , "test_performed" , coldata_names)) 37 | out = .check_de_stat_valid(de_stat , assay_names , coldata_names) 38 | 39 | if (is(de_stat , "SingleCellExperiment")){ 40 | message("Converting de_stat to 'data.frame' format") 41 | de_stat = .convert_from_sce(de_stat , assay_names = assay_names , coldata_names = coldata_names) 42 | } else if (is(de_stat , "data.frame")){ 43 | message("Converting de_stat to 'SingleCellExperiment' format") 44 | de_stat = .convert_from_df(de_stat , assay_names = assay_names , coldata_names = coldata_names) 45 | } 46 | return(de_stat) 47 | } 48 | 49 | 50 | #' @importFrom SingleCellExperiment SingleCellExperiment 51 | #' @importFrom S4Vectors DataFrame 52 | .convert_from_df = function(de_stat , assay_names , coldata_names){ 53 | 54 | de_stat = de_stat[order(de_stat$Nhood) , ] 55 | # convert assays 56 | de_assays = lapply(assay_names , function(assay_name){ 57 | return(.convert_from_df_one_var(de_stat , assay_name)) 58 | }) 59 | names(de_assays) = assay_names 60 | 61 | # convert coldata 62 | meta_nhoods = unique(de_stat[, coldata_names]) 63 | meta_nhoods = meta_nhoods[order(meta_nhoods$Nhood) , ] 64 | 65 | # combine 66 | de_stat = SingleCellExperiment(de_assays, colData = DataFrame(meta_nhoods)) 67 | 68 | colnames(de_stat) = meta_nhoods$Nhood 69 | return(de_stat) 70 | } 71 | 72 | 73 | 74 | #' @importFrom reshape2 dcast 75 | .convert_from_df_one_var = function(de_stat , var){ 76 | df = dcast(de_stat , formula = gene ~ Nhood , value.var = var) 77 | df$gene = as.character(df$gene) 78 | n_hoods = ncol(df)-1 79 | n_genes = nrow(df) 80 | genes = df$gene 81 | df = df[, 2:ncol(df)] 82 | df = as.matrix(df , ncol = n_hoods , nrow = n_genes) 83 | rownames(df) = genes 84 | return(df) 85 | } 86 | 87 | 88 | #' @importFrom SingleCellExperiment SingleCellExperiment 89 | #' @importFrom SummarizedExperiment colData 90 | .convert_from_sce = function(de_stat , assay_names , coldata_names){ 91 | 92 | de_stat = de_stat[ , order(de_stat$Nhood)] 93 | 94 | # combine assays 95 | de_assays = lapply(assay_names , function(assay_name){ 96 | return(.convert_from_sce_one_var(de_stat , assay_name)) 97 | }) 98 | de_assays = as.data.frame( do.call(cbind , de_assays) ) 99 | colnames(de_assays) = assay_names 100 | 101 | # add gene and Nhood 102 | df = as.data.frame( assay(de_stat , assay_names[1]) ) 103 | df = rownames_to_column(df , var = "gene") 104 | df = melt(df , id = "gene") 105 | colnames(df) = c("gene" , "Nhood" , "var") 106 | df$Nhood = as.numeric(as.character(df$Nhood)) 107 | df$Nhood = as.integer(df$Nhood) 108 | df = df[order(df$Nhood) , ] 109 | df = df[, c("gene" , "Nhood")] 110 | 111 | de_assays = cbind(df , de_assays) 112 | meta_nhoods = as.data.frame(colData(de_stat)) 113 | de_assays = merge(de_assays , meta_nhoods , by = c("Nhood") , all.x = TRUE) 114 | 115 | return(de_assays) 116 | } 117 | 118 | 119 | #' @importFrom SummarizedExperiment assay assay<- 120 | #' @importFrom tibble rownames_to_column 121 | #' @importFrom reshape2 melt 122 | .convert_from_sce_one_var = function(de_stat , var){ 123 | df = as.data.frame( assay(de_stat , var) ) 124 | df = rownames_to_column(df , var = "gene") 125 | df = melt(df , id = "gene") 126 | colnames(df) = c("gene" , "Nhood" , "var") 127 | df$Nhood = as.numeric(as.character(df$Nhood)) 128 | df$Nhood = as.integer(df$Nhood) 129 | df = df[order(df$Nhood) , ] 130 | return(df$var) 131 | } 132 | 133 | 134 | -------------------------------------------------------------------------------- /R/estimate_neighbourhood_sizes.R: -------------------------------------------------------------------------------- 1 | #' estimate_neighbourhood_sizes 2 | #' 3 | #' For a grid of \code{k}, returns neighbourhood size distribution; this will help a user to select an appropriate \code{k} 4 | #' @param x A \code{\linkS4class{SingleCellExperiment}} object. 5 | #' @param reducedDim_name Defines the slot in \code{reducedDim(x)} to use as the embedding for graph construction. 6 | #' @param k_grid Vector of positive integers, defines how many neighbours to use for the neighbourhood assignment. 7 | #' @param prop Numerical, between 0 and 1, defines which fraction of cells to use as neighbourhood centres. Default \code{prop = 0.2}. 8 | #' @param order In \code{c(1,2)}, defines which order of neighbours to use. Default \code{order = 2}. 9 | #' @param filtering In \code{c(TRUE,FALSE)}, defines whether to filter neighbourhoods (reduces computing time downstream). Default \code{filtering = TRUE}. 10 | #' @param k_init Positive integer, defines how many neighbours to use for identifying \sQuote{index} cells. Default \code{k_init = 50}. 11 | #' @param d Positive integer, defines how many dimensions from \code{reducedDim(x)} to use. Default \code{d = 30}. 12 | #' @param cluster_id Character specifying which field in \code{colData(x)} to use for 'localised' neighbourhood size estimation. 13 | #' This might be useful if dataset is rather big (which will result in an excessive running time). 14 | #' In case \code{cluster_id} is provided, we will calculate neighbourhood size distribution within individual clusters and aggregate results 15 | #' across clusters in order to speed up the process (note that it might result in slightly biased estimates). 16 | #' Default \code{cluster_id = NULL}, in which case neighbourhood sizes will be estimated for the whole dataset. 17 | #' @param plot_stat Boolean specifying whether to plot the stat. Default \code{plot_stat = TRUE}. 18 | #' @param verbose Boolean specifying whether to print intermediate output messages. Default \code{verbose = TRUE}. 19 | #' @details 20 | #' This function returns an estimated distribution of neighbourhood sizes for different \code{k} values 21 | #' (for the selected by user \code{order}; if you want to estimates for both 1st and 2nd \code{order}, run this twice with changing \code{order}). 22 | #' This can help to gauge whether the neighbourhood size distribution is appropriate for the selected \code{k}, since \code{\link{de_test_neighbourhoods}} takes a while to complete. 23 | #' 24 | #' Note that this function also might take a while to complete on big datasets (> 70k cells), and in this case we provide an option to estimate neighbourhood 25 | #' sizes within annotated clusters (passed in \code{cluster_id}), which will be considerably faster, however, might result in slightly biased estimates. 26 | #' @return \code{data.frame} object, in which each row corresponds to \code{k} and 5 columns correspond to min, q25, median, q75 and max of neighbourhood size distributions; also returns a boxplot 27 | #' @export 28 | #' @importFrom miloR Milo buildGraph graph<- graph nhoods<- nhoodIndex<- buildNhoodGraph 29 | #' @importFrom tibble rownames_to_column 30 | #' @importFrom SummarizedExperiment colData 31 | #' @import ggplot2 32 | #' @importFrom grDevices colorRampPalette 33 | #' @importFrom stats quantile 34 | #' @examples 35 | #' require(SingleCellExperiment) 36 | #' n_row = 500 37 | #' n_col = 100 38 | #' n_latent = 5 39 | #' sce = SingleCellExperiment(assays = list(counts = 40 | #' floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 41 | #' rownames(sce) = as.factor(1:n_row) 42 | #' colnames(sce) = c(1:n_col) 43 | #' sce$cell = colnames(sce) 44 | #' reducedDim(sce , "reduced_dim") = 45 | #' matrix(rnorm(n_col*n_latent), ncol=n_latent) 46 | #' out = estimate_neighbourhood_sizes(sce, k_grid = c(5,10), 47 | #' reducedDim_name = "reduced_dim") 48 | #' 49 | estimate_neighbourhood_sizes = function(x, reducedDim_name , k_grid = seq(10,100,10) , order = 2, prop = 0.1 , filtering = TRUE, 50 | k_init = 50 , d = 30 , cluster_id = NULL, plot_stat = TRUE , verbose = TRUE){ 51 | 52 | out = .check_argument_correct(x, .check_sce, "Check x - something is wrong (gene names unique? reducedDim.name is not present?)") & 53 | .check_argument_correct(k_grid, is.numeric, "Check k_grid - should be numeric vector") & 54 | .check_argument_correct(prop, .check_prop, "Check prop - should be positive number between 0 and 1") & 55 | .check_argument_correct(order, function(x) .check_arg_within_options(x, c(1,2)), 56 | "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)") & 57 | .check_argument_correct(filtering, .check_boolean, "Check filtering - should be either TRUE or FALSE") & 58 | .check_argument_correct(reducedDim_name, is.character, "Check reducedDim_name - should be character vector") & 59 | .check_argument_correct(k_init, .check_positive_integer, "Check k_init - should be positive integer") & 60 | .check_argument_correct(d, .check_positive_integer, "Check d - should be positive integer") & 61 | .check_argument_correct(verbose, .check_boolean, "Check verbose - should be either TRUE or FALSE") & 62 | .check_reducedDim_in_sce(x , reducedDim_name) & .check_k_grid(k_grid) 63 | 64 | # check that cluster_id is in colData(x) 65 | if (!is.null(cluster_id)){ 66 | if (!cluster_id %in% colnames(colData(x))){ 67 | stop("If cluster_id not NULL, it should be in colnames(colData(x))") 68 | } 69 | } 70 | 71 | if (is.null(colnames(x))){ 72 | colnames(x) = as.character(c(1:ncol(x))) 73 | } 74 | 75 | quantile_vec = seq(0,1,0.25) 76 | # check that k_grid reasonable -- at least 2 values, the the highest is smaller than 1000; 77 | # otherwise warn 78 | k_grid = sort(unique(k_grid)) 79 | if (verbose){ 80 | message(paste0("Running for next k values:\n" , paste(k_grid , collapse = ", "))) 81 | } 82 | 83 | if (is.null(cluster_id)){ 84 | stat = lapply(k_grid , function(k){ 85 | sce_milo = assign_neighbourhoods(x , k = k , prop = prop , order = order , filtering = filtering, 86 | reducedDim_name = reducedDim_name , k_init = k_init , d = d , verbose = FALSE) 87 | out = quantile(colSums(nhoods(sce_milo)) , probs = quantile_vec) 88 | if (verbose){ 89 | message(paste0("Finished for k = ", k)) 90 | } 91 | return(out) 92 | }) 93 | } else { 94 | meta = as.data.frame(colData(x)) 95 | clusters = table( meta[, cluster_id] ) 96 | # select only big clusters 97 | clusters = names(clusters)[clusters > 2*max(k_grid)] 98 | if (length(clusters) == 0){ 99 | stop("All specified clusters have # cells < 2*max(k). We recommed to provide lower clustering resolution, decreasing max(k) or set cluster_id = NULL.") 100 | } 101 | else { 102 | stat = lapply(k_grid , function(k){ 103 | stat_per_k = sapply(clusters , function(cluster){ 104 | idx = which(meta[, cluster_id] == cluster) 105 | sce_milo = assign_neighbourhoods(x[,idx] , k = k , prop = prop , order = order , filtering = filtering, 106 | reducedDim_name = reducedDim_name , k_init = k_init , d = d , verbose = FALSE) 107 | out = colSums(nhoods(sce_milo)) 108 | return(out) 109 | }) 110 | stat_per_k = unlist(stat_per_k) 111 | out = quantile(stat_per_k , probs = quantile_vec) 112 | if (verbose){ 113 | message(paste0("Finished for k = ", k)) 114 | } 115 | return(out) 116 | }) 117 | } 118 | } 119 | 120 | stat = as.data.frame( do.call(rbind , stat) ) 121 | rownames(stat) = k_grid 122 | stat = rownames_to_column(stat , var = "k") 123 | if (verbose){ 124 | message(paste0("Finished the estimation of neighbourhood sizes ~ k dependancy (order = " , order , ").")) 125 | } 126 | colnames(stat) = c("k" , "min" , "q25" , "med" , "q75" , "max") 127 | stat$k = factor(stat$k , levels = k_grid) 128 | 129 | if (plot_stat){ 130 | p_stat = tryCatch( 131 | { 132 | p = ggplot(stat, aes(k)) + 133 | geom_boxplot( aes(ymin = min, lower = q25, middle = med, upper = q75, ymax = max , fill = k), stat = "identity") + 134 | theme_bw() + 135 | scale_fill_manual(values = colorRampPalette(brewer.pal(11, "Spectral"))(length(k_grid))) + 136 | labs( y = "Neighbourhood size") 137 | p 138 | }, 139 | error=function(err){ 140 | warning("Can not return plot.") 141 | return(NULL) 142 | } 143 | ) 144 | print(p_stat) 145 | } 146 | return(stat) 147 | } 148 | 149 | -------------------------------------------------------------------------------- /R/filter_neighbourhoods.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' filter_neighbourhoods 4 | #' 5 | #' Filtering redundant neighbourhoods, using the greedy approach to set cover problem 6 | #' @param x A \code{\linkS4class{Milo}} object. 7 | #' @details 8 | #' This function refines neighbourhood assignment and discards redundant neighbourhoods. This call is highly recommended since it reduces computational time greatly. 9 | #' It is called directly in \code{\link{assign_neighbourhoods}} if \code{filtering=TRUE}, and also can be called independently post hoc. 10 | #' 11 | #' Under the hood, it adapts \code{\link[RcppGreedySetCover]{greedySetCover}} function, which implements greedy solution to NP-hard \sQuote{set cover} problem. 12 | #' In this solution, all neighbourhoods are sorted in the decreasing order of their size (i.e. number of cells), 13 | #' and neighbourhoods are iteratively added to the final neighbourhood set if they contain at least one cell not \sQuote{covered} by previously added neighbourhoods. 14 | #' @return \code{Milo} object with refined neighbourhood assignment 15 | #' @export 16 | #' @importFrom RcppGreedySetCover greedySetCover 17 | #' @importFrom miloR buildNhoodGraph nhoodIndex nhoods graph graph<- 18 | #' @examples 19 | #' require(SingleCellExperiment) 20 | #' n_row = 500 21 | #' n_col = 100 22 | #' n_latent = 5 23 | #' sce = SingleCellExperiment(assays = 24 | #' list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 25 | #' rownames(sce) = as.factor(1:n_row) 26 | #' colnames(sce) = c(1:n_col) 27 | #' sce$cell = colnames(sce) 28 | #' reducedDim(sce , "reduced_dim") = 29 | #' matrix(rnorm(n_col*n_latent), ncol=n_latent) 30 | #' sce = assign_neighbourhoods(sce, 31 | #' reducedDim_name = "reduced_dim" , k = 10 , order = 1) 32 | #' sce = filter_neighbourhoods(sce) 33 | filter_neighbourhoods = function(x){ 34 | 35 | #args = c(as.list(environment())) 36 | #out = .general_check_arguments(args) 37 | out = .check_argument_correct(x, .check_sce_milo, "Check x - something is wrong. Calculate 'assign_neighbourhoods' first.)") 38 | 39 | nhoods_sce = nhoods(x) 40 | stat_hoods = lapply(1:ncol(nhoods_sce) , function(i){ 41 | current.cells = which(nhoods_sce[,i] == 1) 42 | out = data.frame(set = colnames(nhoods_sce)[i], 43 | element = names(current.cells)) 44 | return(out) 45 | }) 46 | stat_hoods = do.call(rbind, stat_hoods) 47 | stat_filtered = suppressMessages( greedySetCover(stat_hoods) ) 48 | hoods_filtered = unique(stat_filtered$set) 49 | if (length(hoods_filtered) > 1){ 50 | nhoods_sce = nhoods_sce[, colnames(nhoods_sce) %in% hoods_filtered] 51 | } 52 | else { 53 | nhoods_sce = as.matrix(nhoods_sce[, colnames(nhoods_sce) %in% hoods_filtered]) 54 | colnames(nhoods_sce) = hoods_filtered 55 | } 56 | nhoods(x) = nhoods_sce 57 | x = buildNhoodGraph(x) 58 | nhoodIndex(x) = nhoodIndex(x)[match(colnames(nhoods(x)) , nhoodIndex(x))] 59 | return(x) 60 | } 61 | -------------------------------------------------------------------------------- /R/rank_neighbourhoods_by_DE_magnitude.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' rank_neighbourhoods_by_DE_magnitude 4 | #' 5 | #' Ranks neighbourhoods by the magnitude of DE: number of DE genes and number of \sQuote{specifically} DE genes 6 | #' @param de_stat Output of miloDE (\code{\link{de_test_neighbourhoods}}), either in \code{data.frame} or \code{SingleCellExperiment} format. 7 | #' @param pval.thresh A scalar specifying which threshold to use for deciding on significance for gene being DE in a neighbourhood. Default \code{pval.thresh = 0.1}. 8 | #' @param z.thresh A scalar specifying which threshold to use for deciding on which z-normalised p-values are going to be considered specifically DE. Default \code{z.thresh = -3}. 9 | #' @details 10 | #' To calculate number of DE genes per neighbourhood, we use \code{pval_corrected_across_genes}. 11 | #' Accordingly, for each neighbourhood we calculate how many genes has p-values lower than designated threshold. 12 | #' 13 | #' To calculate number of \sQuote{specifically} DE genes, we first z-normalise \code{pval_corrected_across_nhoods} (for each gene) and then for each 14 | #' neighbourhood, calculate how many genes have z-normalised p-values lower than designated threshold. 15 | #' 16 | #' \emph{Note that for this analysis we set NaN p-values (raw and corrected) to 1 - interpret accordingly.} 17 | #' @return \code{data.frame}, with calculated number-DE-genes and number-specific-DE-genes for each neighbourhood 18 | #' @export 19 | #' @importFrom SummarizedExperiment colData 20 | #' @importFrom stats sd 21 | #' @examples 22 | #' de_stat = expand.grid(gene = paste0("gene" , c(1:5)) , Nhood = c(1:10)) 23 | #' de_stat$Nhood_center = paste0("nhood_" , de_stat$Nhood) 24 | #' de_stat$logFC = sample(seq(-2,2,1) , nrow(de_stat) , 1) 25 | #' de_stat$pval = sample(c(0,1),nrow(de_stat),1) 26 | #' de_stat$pval_corrected_across_genes = sample(c(0,1),nrow(de_stat),1) 27 | #' de_stat$pval_corrected_across_nhoods = sample(c(0,1),nrow(de_stat),1) 28 | #' de_stat$test_performed = TRUE 29 | #' out = rank_neighbourhoods_by_DE_magnitude(de_stat) 30 | #' 31 | rank_neighbourhoods_by_DE_magnitude = function(de_stat, pval.thresh = 0.1, z.thresh = -3 ){ 32 | 33 | out = .check_de_stat_valid(de_stat , 34 | assay_names = c("logFC" , "pval" , "pval_corrected_across_nhoods" , "pval_corrected_across_genes") , 35 | coldata_names = c("Nhood" , "Nhood_center")) & 36 | .check_pval_thresh(pval.thresh) & .check_z_thresh(z.thresh) 37 | 38 | if (is(de_stat , "data.frame")){ 39 | de_stat = convert_de_stat(de_stat , 40 | assay_names = c("logFC" , "pval" , "pval_corrected_across_nhoods" , "pval_corrected_across_genes") , 41 | coldata_names = c("Nhood" , "Nhood_center" , "test_performed")) 42 | de_stat = de_stat[ , order(de_stat$Nhood)] 43 | } 44 | #de_stat = de_stat[, de_stat$design_matrix_suitable] 45 | 46 | # calculate number of DE genes 47 | assay_pval_corrected_across_genes = assay(de_stat , "pval_corrected_across_genes") 48 | idx = which(is.na(assay_pval_corrected_across_genes)) 49 | assay_pval_corrected_across_genes[idx] = 1 50 | out_n_genes = colSums(assay_pval_corrected_across_genes < pval.thresh , na.rm = T) 51 | 52 | # calculate number of specifically DE genes 53 | assay_pval_corrected_across_nhoods = assay(de_stat , "pval_corrected_across_nhoods") 54 | idx = which(is.na(assay_pval_corrected_across_nhoods)) 55 | assay_pval_corrected_across_nhoods[idx] = 1 56 | assay_pval_corrected_across_nhoods = apply(assay_pval_corrected_across_nhoods , 1 , function(x){ 57 | return((x - mean(x, na.rm = T))/sd(x, na.rm = T)) 58 | }) 59 | assay_pval_corrected_across_nhoods = t(assay_pval_corrected_across_nhoods) 60 | out_n_specific_genes = colSums(assay_pval_corrected_across_nhoods < z.thresh , na.rm = T) 61 | 62 | out = cbind(out_n_genes , out_n_specific_genes) 63 | colnames(out) = c("n_DE_genes" , "n_specific_DE_genes") 64 | 65 | meta_nhoods = as.data.frame(colData(de_stat)) 66 | meta_nhoods = cbind(meta_nhoods , out) 67 | 68 | return(meta_nhoods) 69 | 70 | } 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /R/sce_mouseEmbryo.R: -------------------------------------------------------------------------------- 1 | #' Chimeric mouse embryo, Tal1- (Pijuan-Sala et al., 2019) 2 | #' 3 | #' \code{SingleCellExperiment} object, containing counts 4 | #' (raw are pulled using \code{\link[MouseGastrulationData]{Tal1ChimeraData}} 5 | #' and log-normalised are estimated using \code{\link[scuttle]{logNormCounts}}) matrices 6 | #' for chimeric mouse embryos (both Tal1+ and Tal1-). 7 | #' 8 | #' Additionally, we subselected only 3 cell types (Endothelium , Blood progenitors 2, Neural crest) 9 | #' and 300 HVGs. 10 | #' 11 | #' \code{colData(sce_mouseEmbryo)} contains information about replicate (\code{sample}) and Tal1 status (\code{tomato = TRUE} means Tal1-). 12 | #' 13 | #' @docType data 14 | #' @usage data(sce_mouseEmbryo) 15 | #' 16 | #' @format \code{\linkS4class{SingleCellExperiment}} object 17 | #' 18 | #' @name sce_mouseEmbryo 19 | #' @source MouseGastrulationData package 20 | NULL 21 | -------------------------------------------------------------------------------- /R/spatial_pval_adjustment.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' spatial_pval_adjustment 4 | #' 5 | #' Performs p-values multiple testing correction across neighbourhoods, with accounting for the overlap 6 | #' @param nhoods_x Should be extracted from x as \code{nhoods(x)}. 7 | #' @param pvalues Vector of p-values. 8 | #' @details 9 | #' This function is not intended to be run by itself, but can be useful if the user wants to perform \sQuote{spatially aware} multiple testing correction. 10 | #' 11 | #' Under the hood it performs weighted version of BH correction, where weights are reciprocal to the local desnity of a neighbourhood. 12 | #' Accordingly, big and/or highly connected neighbourhoods will have lower weights. 13 | #' 14 | #' @return Vector with \sQuote{spatially} (i.e. across neighbourhoods) adjusted p-values 15 | #' @export 16 | #' @examples 17 | #' require(SingleCellExperiment) 18 | #' require(miloR) 19 | #' n_row = 500 20 | #' n_col = 100 21 | #' n_latent = 5 22 | #' sce = SingleCellExperiment(assays = list(counts = 23 | #' floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 24 | #' rownames(sce) = as.factor(1:n_row) 25 | #' colnames(sce) = c(1:n_col) 26 | #' sce$cell = colnames(sce) 27 | #' reducedDim(sce , "reduced_dim") = 28 | #' matrix(rnorm(n_col*n_latent), ncol=n_latent) 29 | #' sce = assign_neighbourhoods(sce, 30 | #' reducedDim_name = "reduced_dim" , k = 10 , order = 1) 31 | #' nhoods_x = nhoods(sce) 32 | #' pvalues = runif(n = ncol(nhoods_x) , min = 0 , max = 1) 33 | #' out = spatial_pval_adjustment(nhoods_x, pvalues = pvalues) 34 | spatial_pval_adjustment = function(nhoods_x , pvalues){ 35 | # we will only calculate weights for neighbourhoods in which we are testing (not NaNs) 36 | idx_not_nan = which(!is.na(pvalues)) 37 | if (length(idx_not_nan) > 1){ 38 | weights = .get_weights(as.matrix(nhoods_x[,idx_not_nan])) 39 | 40 | out = .check_weights_and_pvals(weights , pvalues[idx_not_nan] , as.matrix(nhoods_x[,idx_not_nan])) & .check_nhoods_matrix(nhoods_x) 41 | 42 | n_comparisons = length(pvalues) 43 | pvalues = pvalues[idx_not_nan] 44 | 45 | # calc correction 46 | o <- order(pvalues) 47 | pvalues <- pvalues[o] 48 | weights <- weights[o] 49 | adjp <- numeric(length(o)) 50 | adjp[o] <- rev(cummin(rev(sum(weights)*pvalues/cumsum(weights)))) 51 | adjp <- pmin(adjp, 1) 52 | 53 | # put eveyrhting together 54 | adjp_total = rep(NaN, 1,n_comparisons) 55 | adjp_total[idx_not_nan] = adjp 56 | } else { 57 | adjp_total = pvalues 58 | } 59 | return(adjp_total) 60 | } 61 | 62 | 63 | 64 | .get_weights = function(nhoods_x){ 65 | out = .check_nhoods_matrix(nhoods_x) 66 | intersect_mat <- crossprod(nhoods_x) 67 | t.connect <- unname(rowSums(intersect_mat)) 68 | weights<- 1/unlist(t.connect) 69 | return(weights) 70 | } 71 | 72 | 73 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | # Contains various check functions to examine whether variables are of the right format 2 | 3 | 4 | 5 | ### checks ### 6 | 7 | # sce-milo check: that is used in de_test_all_hoods, de_test_single_hood, filter_hoods, subset_milo 8 | #' @importFrom miloR Milo nhoods graph graph<- nhoods<- nhoodIndex<- buildNhoodGraph 9 | #' @importFrom methods is 10 | .check_sce_milo = function(x){ 11 | if (!is(x , "Milo")){ 12 | stop("x should be a Milo object. Please run `assign_neighbourhoods` first.") 13 | return(FALSE) 14 | } else if (length(miloR::graph(x)) == 0){ 15 | stop("x should contain non-trivial graph. Please run `assign_neighbourhoods` first.") 16 | return(FALSE) 17 | } else if (sum(sum(nhoods(x))) == 0){ 18 | stop("x should have calculated nhoods. Please run 'assign_neighbourhoods' first.") 19 | return(FALSE) 20 | } else if ( nrow(nhoods(x)) == 1 & ncol(nhoods(x)) == 1 ){ 21 | stop("x should contain non-trivial nhoods. Please run `assign_neighbourhoods` first.") 22 | return(FALSE) 23 | } else if (isEmpty(nhoodIndex(x))){ 24 | stop("x should have calculated nhoodIndex. Please run 'assign_neighbourhoods' first.") 25 | return(FALSE) 26 | } else { 27 | return(TRUE) 28 | } 29 | } 30 | 31 | # sce-milo check wo/ calculation of the length of graph: that is used in plotting functions 32 | #' @importFrom miloR Milo nhoods graph graph<- nhoods<- nhoodIndex<- buildNhoodGraph 33 | #' @importFrom methods is 34 | .check_sce_milo_wo_graph = function(x){ 35 | if (!is(x , "Milo")){ 36 | stop("x should be a Milo object. Please run `assign_neighbourhoods` first.") 37 | return(FALSE) 38 | } else if (sum(sum(nhoods(x))) == 0){ 39 | stop("x should have calculated nhoods. Please run 'assign_neighbourhoods' first.") 40 | return(FALSE) 41 | } else if ( nrow(nhoods(x)) == 1 & ncol(nhoods(x)) == 1 ){ 42 | stop("x should contain non-trivial nhoods. Please run `assign_neighbourhoods` first.") 43 | return(FALSE) 44 | } else if (isEmpty(nhoodIndex(x))){ 45 | stop("x should have calculated nhoodIndex. Please run 'assign_neighbourhoods' first.") 46 | return(FALSE) 47 | } else { 48 | return(TRUE) 49 | } 50 | } 51 | 52 | 53 | 54 | # sce check: that is used in add_embedding 55 | #' @importFrom SingleCellExperiment reducedDimNames 56 | #' @importFrom SummarizedExperiment assays<- assays 57 | #' @importFrom miloR Milo 58 | #' @importFrom methods is 59 | .check_sce = function(x){ 60 | if (!is(x , "SingleCellExperiment") & !is(x , "Milo")){ 61 | stop("x should be a SingleCellExperiment or Milo object.") 62 | return(FALSE) 63 | } else if (!("counts" %in% names(assays(x)))){ 64 | stop("x should contain 'counts' assay that will be used to calculate DE. If counts are stored in different assay, please move them to slot 'counts'.") 65 | return(FALSE) 66 | } else if (length(unique(rownames(x))) < nrow(x) ){ 67 | stop("x should have unique rownames.") 68 | return(FALSE) 69 | } else if (!is.null(colnames(x)) & length(unique(colnames(x))) < ncol(x)){ 70 | stop("If colnames(x) exist, they should be unique.") 71 | return(FALSE) 72 | } else { 73 | return(TRUE) 74 | } 75 | } 76 | 77 | 78 | 79 | 80 | 81 | #' @importFrom igraph is_igraph 82 | .valid_nhood <- function(x){ 83 | # check for a valid nhood slot 84 | n_neigh <- ncol(nhoods(x)) 85 | is_not_empty <- n_neigh > 0 86 | if (is_not_empty) { 87 | return(TRUE) 88 | } else { 89 | return(FALSE) 90 | } 91 | } 92 | 93 | #' @importFrom igraph is_igraph 94 | .valid_graph <- function(x){ 95 | # check for a valid graph 96 | if(isTRUE(is_igraph(x))){ 97 | return(TRUE) 98 | } else{ 99 | return(FALSE) 100 | } 101 | } 102 | 103 | 104 | 105 | #' @importFrom SummarizedExperiment assayNames 106 | .check_assay_in_sce = function(x , assay_type){ 107 | if (.check_sce(x)){ 108 | if (!assay_type %in% assayNames(x)){ 109 | stop("assay_type should be in assayNames(x)") 110 | return(FALSE) 111 | } 112 | else { 113 | return(TRUE) 114 | } 115 | } 116 | else { 117 | return(FALSE) 118 | } 119 | } 120 | 121 | 122 | #' @importFrom SummarizedExperiment colData 123 | .check_condition_in_coldata_sce = function(x , condition_id){ 124 | if (.check_sce(x)){ 125 | if (!(condition_id %in% colnames(colData(x)))){ 126 | stop("'condition_id' should be in colData(x)") 127 | return(FALSE) 128 | } 129 | else { 130 | meta = as.data.frame(colData(x)) 131 | tab = table(meta[, condition_id]) 132 | if (length(tab) < 2){ 133 | stop("x should have at least two levels for tested conditions.") 134 | return(FALSE) 135 | } else{ 136 | return(TRUE) 137 | } 138 | } 139 | } 140 | else { 141 | return(FALSE) 142 | } 143 | } 144 | 145 | #' @importFrom SummarizedExperiment colData 146 | .check_sample_and_condition_id_valid = function(x , condition_id , sample_id){ 147 | 148 | if (condition_id == sample_id){ 149 | stop("'sample_id' and 'condition_id' can not be the same") 150 | return(FALSE) 151 | } 152 | else { 153 | meta = as.data.frame(colData(x)) 154 | tab = table(meta[, sample_id] , meta[, condition_id]) 155 | tab = sapply(1:nrow(tab) , function(i) sum(tab[i,] > 0)) 156 | if (mean(tab == 1) < 1){ 157 | stop("Each sample_id should be associated with one condition") 158 | return(FALSE) 159 | } 160 | else { 161 | return(TRUE) 162 | } 163 | } 164 | } 165 | 166 | #' @importFrom SummarizedExperiment colData 167 | .check_var_in_coldata_sce = function( x , var , var_intended){ 168 | if (.check_sce(x)){ 169 | if (!(var %in% colnames(colData(x)))){ 170 | stop(paste0("'", var_intended, "'", " should be in colData(x)")) 171 | return(FALSE) 172 | } 173 | else { 174 | return(TRUE) 175 | } 176 | } 177 | else { 178 | return(FALSE) 179 | } 180 | } 181 | 182 | 183 | #' @importFrom miloR nhoods 184 | .check_nhood_stat = function(nhood_stat , x){ 185 | if (!is(nhood_stat , "data.frame")){ 186 | stop("'nhood_stat' should be a data.frame.") 187 | return(FALSE) 188 | } 189 | else { 190 | if (!"Nhood" %in% colnames(nhood_stat)){ 191 | stop("'nhood_stat' should contain column 'Nhood'.") 192 | return(FALSE) 193 | } 194 | else { 195 | if (!is.numeric(nhood_stat$Nhood)){ 196 | stop("'nhood_stat$Nhood' should be numeric.") 197 | return(FALSE) 198 | } 199 | else { 200 | nhoods_sce = nhoods(x) 201 | if (mean(nhood_stat$Nhood %in% c(1:ncol(nhoods_sce))) < 1){ 202 | stop("'nhood_stat$Nhood' should be within c(1:ncol(nhoods(x))).") 203 | return(FALSE) 204 | } 205 | else { 206 | return(TRUE) 207 | } 208 | } 209 | } 210 | } 211 | } 212 | 213 | 214 | #' @importFrom SummarizedExperiment colData 215 | .check_covariates_in_coldata_sce = function(x , covariates){ 216 | if (.check_sce(x)){ 217 | if (is.null(covariates)){ 218 | return(TRUE) 219 | } 220 | else { 221 | coldata = colnames(colData(x)) 222 | 223 | covariates_exist = sapply(covariates , function(covariate){ 224 | out = as.numeric(covariate %in% coldata) 225 | return(out) 226 | }) 227 | if (sum(covariates_exist) < length(covariates)){ 228 | stop("All covariates should be colnames of colData(x).") 229 | return(FALSE) 230 | } 231 | else { 232 | covariates_w_contrast = sapply(covariates , function(covariate){ 233 | tab = table(colData(x)[, covariate]) 234 | out = as.numeric(length(tab) != 1) 235 | return(out) 236 | }) 237 | if (sum(covariates_w_contrast) < length(covariates)){ 238 | stop("All covariates should have more than 1 contrast.") 239 | return(FALSE) 240 | } 241 | else { 242 | return(TRUE) 243 | } 244 | } 245 | } 246 | } 247 | else { 248 | return(FALSE) 249 | } 250 | } 251 | 252 | 253 | 254 | 255 | #' @importFrom SummarizedExperiment colData 256 | .check_cell_id_in_sce = function(x , cell_id){ 257 | if (is.null(cell_id) & is.null(colnames(x))){ 258 | stop("If colnames(x) are NULL, cell_id has to be specified in order to assgin unique cell identifiers.") 259 | return(FALSE) 260 | } else { 261 | if (is.null(colnames(x))){ 262 | if (!cell_id %in% colnames(colData(x))){ 263 | stop("cell_id should be in colData(x)") 264 | return(FALSE) 265 | } 266 | else { 267 | return(TRUE) 268 | } 269 | } 270 | else { 271 | return(TRUE) 272 | } 273 | } 274 | } 275 | 276 | 277 | 278 | .check_genes_in_sce = function(x, genes){ 279 | if (.check_sce(x)){ 280 | if (!is.null(genes)){ 281 | if (mean(genes %in% rownames(x)) < 1){ 282 | stop("Some gene names are missing from x") 283 | return(FALSE) 284 | } 285 | else { 286 | return(TRUE) 287 | } 288 | } 289 | else { 290 | return(TRUE) 291 | } 292 | } 293 | else { 294 | return(FALSE) 295 | } 296 | } 297 | 298 | 299 | #' @importFrom SingleCellExperiment reducedDimNames 300 | .check_reducedDim_in_sce = function(x , reducedDim_name){ 301 | if (.check_sce(x)){ 302 | if (!reducedDim_name %in% reducedDimNames(x)){ 303 | stop("reducedDim_name should be in reducedDimNames(x).") 304 | return(FALSE) 305 | } 306 | } 307 | else { 308 | return(FALSE) 309 | } 310 | } 311 | 312 | 313 | #' @importFrom S4Vectors isEmpty 314 | .check_cells_ref_and_query = function(cells_sce , cells_ref , cells_query){ 315 | if (mean(cells_ref %in% cells_sce) < 1){ 316 | stop("Some of cells_ref are not present.") 317 | return(FALSE) 318 | } else if (mean(cells_query %in% cells_sce) < 1){ 319 | stop("Some of cells_query are not present.") 320 | return(FALSE) 321 | } else if (!isEmpty(intersect(cells_ref , cells_query))){ 322 | stop("cells_ref and cells_query can not overlap.") 323 | return(FALSE) 324 | } else { 325 | return(TRUE) 326 | } 327 | } 328 | 329 | 330 | #' @importFrom SummarizedExperiment assayNames assayNames<- colData 331 | .check_de_stat_valid = function(de_stat , assay_names , coldata_names){ 332 | if (!class(de_stat) %in% c("data.frame" , "SingleCellExperiment")){ 333 | stop("de_stat should be either data.frame or SingleCellExperiment object.\n 334 | To get valid de_stat object, please run 'de_test_neighbourhoods.R'") 335 | return(FALSE) 336 | } else if (length(intersect(assay_names, coldata_names)) > 0){ 337 | stop("assay_names and coldata_names can not overlap") 338 | return(FALSE) 339 | } else if (is(de_stat , "data.frame")){ 340 | cols = colnames(de_stat) 341 | cols_required = c(assay_names , coldata_names) 342 | if (mean(cols_required %in% cols) < 1){ 343 | stop("colnames(de_stat) missing some of the assay_names or coldata_names.") 344 | return(FALSE) 345 | } else if (!is.numeric(de_stat$Nhood)) { 346 | stop("Nhood field should be numeric. To get valid de_stat object, please run 'de_test_neighbourhoods.R'") 347 | return(FALSE) 348 | } 349 | } else if (is(de_stat , "SingleCellExperiment")){ 350 | cols = assayNames(de_stat) 351 | if (mean(assay_names %in% cols) < 1){ 352 | stop("de_stat missing some of the required assays.") 353 | return(FALSE) 354 | } else { 355 | meta_nhoods = as.data.frame(colData(de_stat)) 356 | if (mean(coldata_names %in% colnames(meta_nhoods)) < 1){ 357 | stop("de_stat missing some of the coldata_names") 358 | return(FALSE) 359 | } else if (!is.numeric(de_stat$Nhood)) { 360 | stop("colData field 'Nhood' should be numeric. To get valid de_stat object, please run 'de_test_neighbourhoods.R'") 361 | return(FALSE) 362 | } 363 | } 364 | } else { 365 | return(TRUE) 366 | } 367 | } 368 | 369 | 370 | # 371 | # .check_argument_correct = function(dots, arg_name , fun , message){ 372 | # if (arg_name %in% names(dots)){ 373 | # arg = dots[[which(names(dots) == arg_name)]] 374 | # out = fun(arg) 375 | # if (!out){ 376 | # stop(message) 377 | # } 378 | # return(out) 379 | # } 380 | # else { 381 | # return(TRUE) 382 | # } 383 | # } 384 | # 385 | 386 | .check_weights = function(weights){ 387 | if (!is.numeric(weights)){ 388 | stop("weights should be a numeric vector. To get valid weights, run 'get_weights.R'") 389 | return(FALSE) 390 | } else { 391 | if (sum(is.na(weights)) > 0){ 392 | stop("weights can not contain NaNs. To get valid weights, run 'get_weights.R'") 393 | return(FALSE) 394 | } else if (sum(weights <= 0) > 0){ 395 | stop("weights should be positive. To get valid weights, run 'get_weights.R'") 396 | return(FALSE) 397 | } else { 398 | return(TRUE) 399 | } 400 | } 401 | } 402 | 403 | 404 | .check_weights_and_pvals = function(weights , pvalues , nhoods_sce){ 405 | if (!length(weights) == length(pvalues)){ 406 | stop("weights should be of the same size as pvalues.") 407 | return(FALSE) 408 | } else if (!length(weights) == ncol(nhoods_sce)){ 409 | stop("weights should be of the same size as number of columns in nhoods_sce.") 410 | return(FALSE) 411 | } else { 412 | return(TRUE) 413 | } 414 | } 415 | 416 | 417 | 418 | 419 | .check_argument_correct = function(arg , fun , message){ 420 | out = fun(arg) 421 | if (!out){ 422 | stop(message) 423 | } 424 | return(out) 425 | } 426 | 427 | 428 | .check_quantile_vec = function(quantile_vec){ 429 | if (!is.numeric(quantile_vec)){ 430 | stop("Check quantile_vec - should be numeric vector") 431 | return(FALSE) 432 | } else { 433 | quantile_vec = sort(unique(quantile_vec)) 434 | if (min(quantile_vec) < 0 | max(quantile_vec) > 1){ 435 | stop("quantile_vec should have all its values between 0 and 1. Please enter valid quantile_vec") 436 | return(FALSE) 437 | } 438 | else { 439 | return(TRUE) 440 | } 441 | } 442 | } 443 | 444 | 445 | .check_k_grid = function(k_grid){ 446 | if (!is.numeric(k_grid)){ 447 | stop("Check k_grid - should be numeric vector") 448 | return(FALSE) 449 | } else { 450 | k_grid = sort(unique(k_grid)) 451 | if (min(k_grid) < 0 | max(k_grid%%1 > 0)){ 452 | stop("Values of k_grid should be positive integers. Please enter valid k_grid.") 453 | return(FALSE) 454 | } 455 | else { 456 | if (length(k_grid) == 1){ 457 | warning("You only selected one value for k. If it is intended, we recommend to run directly 'assign_neighbourhoods'.") 458 | } 459 | if (max(k_grid) >= 1000){ 460 | warning("The highest selected value is > 1000. It is gonna cost computationally, and we generally do not recommend such high k. Consider reducing.") 461 | } 462 | return(TRUE) 463 | } 464 | } 465 | } 466 | 467 | 468 | .check_pval_thresh = function(pval.thresh){ 469 | if (!is.numeric(pval.thresh)){ 470 | stop("pval.thresh should be numeric") 471 | return(FALSE) 472 | } 473 | else { 474 | if (!length(pval.thresh) == 1){ 475 | stop("pval.thresh should be a single number") 476 | return(FALSE) 477 | } 478 | else { 479 | if (pval.thresh <= 0 | pval.thresh >=1){ 480 | stop("pval.thresh should be between 0 and 1") 481 | return(FALSE) 482 | } 483 | else { 484 | return(TRUE) 485 | } 486 | } 487 | } 488 | } 489 | 490 | 491 | 492 | .check_z_thresh = function(z.thresh){ 493 | if (!is.numeric(z.thresh)){ 494 | stop("z.thresh should be numeric") 495 | return(FALSE) 496 | } 497 | else { 498 | if (!length(z.thresh) == 1){ 499 | stop("z.thresh should be a single number") 500 | return(FALSE) 501 | } 502 | else { 503 | if (z.thresh > 0){ 504 | stop("z.thresh should be not higher than 0") 505 | return(FALSE) 506 | } 507 | else { 508 | return(TRUE) 509 | } 510 | } 511 | } 512 | } 513 | 514 | 515 | # 516 | # .general_check_arguments = function(dots){ 517 | # out = TRUE 518 | # out = .check_argument_correct(dots, "sce", .check_sce, "Check sce - something is wrong (gene names unique? reducedDim.name is not present?)") 519 | # out = .check_argument_correct(dots, "sce_milo", .check_sce_milo, "Check sce_milo - something is wrong. Calculate 'assign_hoods' first.)") 520 | # out = .check_argument_correct(dots, "genes", .check_string_or_null, "Check genes - should be NULL or character vector") 521 | # out = .check_argument_correct(dots, "genes_2_exclude", .check_string_or_null, "Check genes_2_exclude - should be NULL or character vector") 522 | # out = .check_argument_correct(dots, "n_hvgs", .check_positive_integer, "Check n_hvgs - should be positive integer") 523 | # out = .check_argument_correct(dots, "assay_type", function(x) .check_arg_within_options(x, c("counts", "logcounts")), 524 | # "Check assay_type - should be either 'counts' or 'logcounts'") 525 | # out = .check_argument_correct(dots, "reduction_type", function(x) .check_arg_within_options(x, c("Azimuth", "MNN")), 526 | # "Check reduction_type - should be either 'Azimuth' or 'MNN'") 527 | # out = .check_argument_correct(dots, "reducedDim_name", is.character, "Check reducedDim_name - should be character vector") 528 | # out = .check_argument_correct(dots, "sample_id", is.character, "Check sample_id - should be character vector") 529 | # out = .check_argument_correct(dots, "condition_id", is.character, "Check condition_id - should be character vector") 530 | # out = .check_argument_correct(dots, "cell_id", .check_string_or_null, "Check cell_id - should be NULL or string") 531 | # out = .check_argument_correct(dots, "d", .check_positive_integer, "Check d - should be positive integer") 532 | # out = .check_argument_correct(dots, "order", function(x) .check_arg_within_options(x, c(1,2)), 533 | # "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)") 534 | # out = .check_argument_correct(dots, "k", .check_positive_integer, "Check k - should be positive integer") 535 | # out = .check_argument_correct(dots, "k_init", .check_positive_integer, "Check k_init - should be positive integer") 536 | # out = .check_argument_correct(dots, "prop", .check_prop, "Check prop - should be positive number between 0 and 1") 537 | # out = .check_argument_correct(dots, "filtering", .check_boolean, "Check filtering - should be either TRUE or FALSE") 538 | # out = .check_argument_correct(dots, "k.grid", is.numeric, "Check k.grid - should be numeric vector") 539 | # out = .check_argument_correct(dots, "quantile_vec", is.numeric, "Check quantile_vec - should be numeric vector") 540 | # out = .check_argument_correct(dots, "discard_not_perturbed_hoods", .check_boolean, "Check discard_not_perturbed_hoods - should be either TRUE or FALSE") 541 | # out = .check_argument_correct(dots, "gene_selection", function(x) .check_arg_within_options(x, c("all", "none", "per_hood")), 542 | # "Check gene_selection - should be either 'all', 'none' or 'per_hood'") 543 | # out = .check_argument_correct(dots, "min_n_cells_per_sample", .check_positive_integer, "Check min_n_cells_per_sample - should be positive integer") 544 | # out = .check_argument_correct(dots, "min_count", .check_positive_integer, "Check min_count - should be positive integer") 545 | # out = .check_argument_correct(dots, "run_separately", .check_boolean, "Check run_separately - should be either TRUE or FALSE") 546 | # out = .check_argument_correct(dots, "covariates", .check_string_or_null, "Check covariates - should be NULL or character vector") 547 | # out = .check_argument_correct(dots, "seed", .check_number_or_null, "Check seed - should be NULL or number") 548 | # return(out) 549 | # } 550 | 551 | 552 | .check_positive_integer = function(x){ 553 | out = TRUE 554 | if (!is.numeric(x)){ 555 | out = FALSE 556 | } else if (!x%%1 == 0 | x <= 0){ 557 | out = FALSE 558 | } 559 | return(out) 560 | } 561 | 562 | 563 | .check_non_negative = function(x){ 564 | out = TRUE 565 | if (!is.numeric(x)){ 566 | out = FALSE 567 | } else if (x < 0){ 568 | out = FALSE 569 | } 570 | return(out) 571 | } 572 | 573 | .check_design = function(x){ 574 | if (is(x , "formula")){ 575 | return(TRUE) 576 | } 577 | else { 578 | return(FALSE) 579 | } 580 | } 581 | 582 | 583 | .check_number_or_null = function(x){ 584 | out = TRUE 585 | if (!is.null(x)){ 586 | if (!is.numeric(x)){ 587 | out = FALSE 588 | } 589 | } 590 | return(out) 591 | } 592 | 593 | 594 | .check_arg_within_options = function(x , options){ 595 | out = TRUE 596 | if (is.null(x)){ 597 | out = FALSE 598 | } 599 | else if (!x %in% options){ 600 | out = FALSE 601 | } 602 | return(out) 603 | } 604 | 605 | .check_string_or_null = function(x){ 606 | out = TRUE 607 | if (!is.null(x)){ 608 | if (!is.character(x)){ 609 | out = FALSE 610 | } 611 | } 612 | return(out) 613 | } 614 | 615 | .check_prop = function(x){ 616 | out = TRUE 617 | if (!is.numeric(x)){ 618 | out = FALSE 619 | } else if (x <= 0 | x > 1){ 620 | out = FALSE 621 | } 622 | return(out) 623 | } 624 | 625 | 626 | .check_boolean = function(x){ 627 | out = TRUE 628 | if (is.null(x)){ 629 | out = FALSE 630 | } 631 | else if (!x %in% c(TRUE, FALSE)){ 632 | out = FALSE 633 | } 634 | return(out) 635 | } 636 | 637 | 638 | 639 | .check_nhoods_matrix = function(nhoods_sce){ 640 | unq_elements = unique(as.numeric(nhoods_sce)) 641 | if (mean(unq_elements %in% c(0,1)) < 1){ 642 | stop("All elements of nhoods matrix should be either 0 or 1. To get valid matrix, run nhoods(x).") 643 | return(FALSE) 644 | } 645 | else { 646 | return(TRUE) 647 | } 648 | } 649 | 650 | 651 | .check_subset_nhoods = function(subset_nhoods, nhoods_sce){ 652 | if (!is.numeric(subset_nhoods) & !is.logical(subset_nhoods)){ 653 | stop("'subset_nhoods' should be either numeric or logical.") 654 | return(FALSE) 655 | } 656 | else if (is.numeric(subset_nhoods)){ 657 | if (mean(subset_nhoods %in% c(1:ncol(nhoods_sce))) < 1){ 658 | stop("If 'subset_nhoods' is numeric vector, it should lie within c(1:ncol(nhoods(x))).") 659 | return(FALSE) 660 | } 661 | else { 662 | return(TRUE) 663 | } 664 | } 665 | else { 666 | if (!length(subset_nhoods) == ncol(nhoods_sce)){ 667 | stop("If 'subset_nhoods' is logical vector, it should be the same size as ncol(nhoods(x)).") 668 | return(FALSE) 669 | } 670 | else { 671 | return(TRUE) 672 | } 673 | } 674 | } 675 | 676 | #' @importFrom SummarizedExperiment colData 677 | #' @importFrom dplyr distinct 678 | #' @importFrom stats model.matrix 679 | .check_design_and_covariates_match = function(x , design , sample_id , covariates){ 680 | design.df = as.data.frame(colData(x)[,c(sample_id , covariates)]) 681 | design.df = distinct(design.df) 682 | out = tryCatch( 683 | { 684 | design = model.matrix(design , data = design.df) 685 | TRUE 686 | }, 687 | error=function(err){ 688 | stop("Some of the design's arguments are not in the covariate vector.") 689 | return(FALSE) 690 | } 691 | ) 692 | return(out) 693 | } 694 | 695 | 696 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # miloDE 3 | Framework for sensitive DE testing (using neighbourhoods). 4 | 5 | miloDE builds on an existing framework for DA testing called [Milo](https://pubmed.ncbi.nlm.nih.gov/34594043/). 6 | It exploits the notion of overlapping neighborhoods of homogeneous cells, constructed from graph-representation of scRNA-seq data, and performs testing within each neighborhood. Multiple testing correction is performed either across neighborhoods or across genes. 7 | 8 | Please see the [preprint](https://www.biorxiv.org/content/10.1101/2023.03.08.531744v1) for greater details. 9 | 10 |

11 | 12 |

13 | 14 | In addition to DE testing, we provide functionality to rank neighbourhoods by degree of the DE as well as plotting functions to visualise results. In the vignette, we showcase how you can carry out clustering analysis to group genes in co-regulated transcriptional modules. 15 | 16 | 17 | 18 | ## Installation 19 | 20 | ``` 21 | # Install development version: 22 | library(devtools) 23 | devtools::install_github("MarioniLab/miloDE") 24 | library(miloDE) 25 | 26 | *Note*: if you have troubles installing miloDE directly, try first to install miloR: 27 | BiocManager::install('miloR') 28 | 29 | ## If you plan to use parallelisation (desired for big datasets), 30 | please load `BiocParallel` and enable milticore parallel evaluation: 31 | library(BiocParallel) 32 | ncores = MY_NCORES 33 | mcparam = MulticoreParam(workers = ncores) 34 | register(mcparam) 35 | 36 | 37 | ## Not an immediate functionality of miloDE, but we illustrate in our vignette how to adapt WGCNA 38 | approach to discover DE patterns and co-regulated gene modules. 39 | If you want to perform similar analysis, please install Seurat and scWGCNA (note that scWGCNA does not work with V5 Seurat so please ensure that your Seurat version is V4): 40 | install.packages('Seurat', version = "4.4.0") 41 | devtools::install_github("cferegrino/scWGCNA", ref="main") 42 | 43 | 44 | ``` 45 | 46 | 47 | ## Pipeline 48 | 49 | 0. **Input**. The input of `miloDE` is scRNA-seq data, provided as `SingleCellExperiment` object. 50 | Additionally, we require that: 51 | 52 | * Latent dimensions (used for graph construction) are pre-computed and stored in `reducedDim(sce)`. 53 | * `colData(sce)` has to contain metadata corresponding assigning cells to individual replicates (passed to `sample_id`) and tested condition (e.g. healthy or disease, passed to `condition_id`). 54 | 55 | You can explore toy data here: 56 | 57 | ``` 58 | data("sce_mouseEmbryo", package = "miloDE") 59 | print(sce_mouseEmbryo) 60 | # `pca.corrected` in reducedDim(sce) - PCs that we will use for graph construction 61 | 62 | head(colData(sce_mouseEmbryo)) 63 | # `tomato` corresponds to condition id 64 | # `sample` corresponds to individual replicates. There are 2 samples per each condition: 65 | table(colData(sce_mouseEmbryo)[,c('sample','tomato')]) 66 | 67 | ``` 68 | 69 | 1. **Neighbourhood assignment**: First step is to assign neighbourhoods using graph representation of scRNA-seq data' 70 | 71 | ``` 72 | 73 | sce_mouseEmbryo = assign_neighbourhoods(sce_mouseEmbryo, k = 20, order = 2, 74 | filtering = TRUE, reducedDim_name = "pca.corrected") 75 | 76 | ``` 77 | 78 | 2. **DE testing**: Once neighbourhoods are assigned, we can carry out DE testing. Output is returned in either `data.frame` or `SingleCellExperiment format`. For each tested gene-neighbourhood, we return `logFC`, `pvalue`, `pvalue corrected across genes` and `pvalue corrected across nhoods`. We also return boolean flag if test is performed. 79 | 80 | ``` 81 | 82 | de_stat = de_test_neighbourhoods(sce_mouseEmbryo , sample_id = "sample", 83 | design = ~tomato, covariates = c("tomato")) 84 | 85 | 86 | ``` 87 | 88 | 89 | 90 | ## Vignette 91 | 92 | Please check the vignette to grasp additional functions aiding interpretation and analysis of miloDE output. 93 | 94 | [Effect of Tal1 knock out on mouse development](https://rawcdn.githack.com/MarioniLab/miloDE_tutorials/3d3781237011695f802dc1c0f0193bea12a108de/miloDE__mouse_embryo.html). 95 | 96 | 97 | Cite the code: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12686748.svg)](https://zenodo.org/records/12686748). 98 | 99 | -------------------------------------------------------------------------------- /data-raw/sce_mouseEmbryo.R: -------------------------------------------------------------------------------- 1 | ## code to prepare `sce_mouseEmbryo` dataset goes here 2 | 3 | usethis::use_data(sce_mouseEmbryo, overwrite = TRUE) 4 | 5 | 6 | # load libraries 7 | library(SingleCellExperiment) 8 | library(MouseGastrulationData) 9 | library(geneBasisR) 10 | 11 | # subset of 3 cell types 12 | cts = c("Endothelium" , "Blood progenitors 2", "Neural crest") 13 | 14 | # load chimera Tal1 15 | sce = Tal1ChimeraData() 16 | # select CTs 17 | sce = sce[, sce$celltype.mapped %in% cts] 18 | # delete row for tomato 19 | sce = sce[!rownames(sce) == "tomato-td" , ] 20 | 21 | # add logcounts 22 | sce = scuttle::logNormCounts(sce) 23 | 24 | 25 | # add covariates 26 | sce$sex = sapply(1:ncol(sce) , function(i) ifelse(sce$sample[i] %in% c("3" , "4") , "F" , "M")) 27 | set.seed(32) 28 | sce$toy_cov_1 = sample(1:5 , ncol(sce),1) 29 | 30 | # select only 1000 genes 31 | sce = retain_informative_genes(sce , n = 300) 32 | sce_mouseEmbryo = sce 33 | usethis::use_data(sce_mouseEmbryo , overwrite = TRUE , compress = "xz") 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /data/sce_mouseEmbryo.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarioniLab/miloDE/d253df8972a54459d63096028ae41afcb4a1063a/data/sce_mouseEmbryo.rda -------------------------------------------------------------------------------- /man/assign_neighbourhoods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assign_neighbourhoods.R 3 | \name{assign_neighbourhoods} 4 | \alias{assign_neighbourhoods} 5 | \title{assign_neighbourhoods} 6 | \usage{ 7 | assign_neighbourhoods( 8 | x, 9 | reducedDim_name, 10 | k = 25, 11 | prop = 0.2, 12 | order = 2, 13 | filtering = TRUE, 14 | k_init = 50, 15 | d = 30, 16 | verbose = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{A \code{\linkS4class{SingleCellExperiment}} object.} 21 | 22 | \item{reducedDim_name}{Defines the assay in \code{reducedDim(x)} to use as the embedding for graph construction.} 23 | 24 | \item{k}{Positive integer, defines how many neighbours to use for the neighbourhood assignment. Default \code{k = 25}.} 25 | 26 | \item{prop}{Numerical, between 0 and 1, defines which fraction of cells to use as neighbourhood centres. Default \code{prop = 0.2}.} 27 | 28 | \item{order}{In \code{c(1,2)}, defines which order of neighbours to use. Default \code{order = 2}.} 29 | 30 | \item{filtering}{In \code{c(TRUE,FALSE)}, defines whether to refine the assignment. Default \code{filtering = TRUE}.} 31 | 32 | \item{k_init}{Positive integer, defines how many neighbours to use for identifying anchor cells (for this step we use 1st-order kNN). Default \code{k_init = 50}.} 33 | 34 | \item{d}{Positive integer, defines how many dimensions from \code{reducedDim(x)} to use. Default \code{d = 30}.} 35 | 36 | \item{verbose}{Boolean specifying whether to print intermediate output messages. Default \code{verbose = TRUE}.} 37 | } 38 | \value{ 39 | \code{\linkS4class{Milo}} object containing cell-neighbourhood matrix in \code{nhoods(out)} slot 40 | } 41 | \description{ 42 | Assign neighbourhoods to single-cell RNA-seq data (in \code{SingleCellExperiment} format) 43 | } 44 | \details{ 45 | This function assigns neighbourhoods to single-cell data. This includes assigning graph representation, selecting \sQuote{index} cells and, finally, for each index cell, assigning it along with its neighbourhoors to one neighbourhood. 46 | 47 | Specifically, algorithm goes as follows: 48 | \enumerate{ 49 | \item Assigning \sQuote{loose} graph (i.e. ~low k, 1st-order kNN) to select index cells for the selected \code{prop} (greatly reduces computational time to look for \sQuote{index} cells in a loose graph). 50 | \item Reassigning graph following entered by the user \code{order} and \code{k}. 51 | \item Assigning neighbourhoods. 52 | \item (Optional but recommended) Refining the neighbourhood assignment (check \code{\link{filter_neighbourhoods}}). 53 | } 54 | } 55 | \examples{ 56 | require(SingleCellExperiment) 57 | n_row = 500 58 | n_col = 100 59 | n_latent = 5 60 | sce = SingleCellExperiment(assays = 61 | list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 62 | rownames(sce) = as.factor(1:n_row) 63 | colnames(sce) = c(1:n_col) 64 | sce$cell = colnames(sce) 65 | reducedDim(sce , "reduced_dim") = 66 | matrix(rnorm(n_col*n_latent), ncol=n_latent) 67 | out = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim") 68 | } 69 | -------------------------------------------------------------------------------- /man/calc_AUC_per_neighbourhood.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calc_AUC_per_neighbourhood.R 3 | \name{calc_AUC_per_neighbourhood} 4 | \alias{calc_AUC_per_neighbourhood} 5 | \title{calc_AUC_per_neighbourhood} 6 | \usage{ 7 | calc_AUC_per_neighbourhood( 8 | x, 9 | genes = rownames(x), 10 | sample_id = "sample", 11 | condition_id, 12 | conditions = NULL, 13 | min_n_cells_per_sample = 3, 14 | n_threads = 2, 15 | BPPARAM = NULL 16 | ) 17 | } 18 | \arguments{ 19 | \item{x}{A \code{\linkS4class{Milo}} object.} 20 | 21 | \item{genes}{Character vector specifying genes to be passed for the testing. Default \code{genes = rownames(x)}.} 22 | 23 | \item{sample_id}{Character specifying which variable should be used as a replicate ID. 24 | Should be in \code{colnames(colData(x))}. Default \code{sample_id = "sample"}.} 25 | 26 | \item{condition_id}{Character specifying which variable should be used as a condition ID. 27 | Should be in \code{colnames(colData(x))}.} 28 | 29 | \item{conditions}{In case of multiple comparable groups, character vector specifying which conditions should be tested for separation. 30 | Default \code{conditions = NULL} and assumes that only 2 different conditions are present.} 31 | 32 | \item{min_n_cells_per_sample}{Positive integer specifying the minimum number of cells per replicate to be included in testing. 33 | Default \code{min_n_cells_per_sample = 3}.} 34 | 35 | \item{n_threads}{Positive integer specifying the number of cores to be used to calculate AUC. 36 | Higher number results in faster calculation, but its feasibility depends on the specs of your machine. 37 | Only relevant if \code{BPPARAM = NULL}. Default \code{n_threads = 2}.} 38 | 39 | \item{BPPARAM}{NULL or \code{\link{MulticoreParam}} object. Default \code{BPPARAM = NULL} assuming no parallelisation.} 40 | } 41 | \value{ 42 | \code{data.frame} object, with AUC calculated for each neighbourhood 43 | } 44 | \description{ 45 | Returns per neighbourhood AUC from Augur based (RF) classifiers 46 | } 47 | \details{ 48 | This function calculates for each neighbourhood whether cells between 2 conditions can be separated 49 | with Random Forest based classifiers (adapted from \code{\link[Augur]{calculate_auc}}). 50 | Accordingly, AUCs of the classifiers represent how well we can separate 2 conditions. 51 | 52 | We suggest that neighbourhoods with AUC > 0.5 suggest a certain degree of separation between 2 conditions that can further be examined 53 | with DE testing (and, accordingly, neighbourhoods with AUC <= 0.5 can be safely discarded). You also can set your own AUC threshold if desired as well as use AUCs to rank neighbourhoods. 54 | 55 | \emph{Note that this function is only relevant for \dQuote{simple} models (e.g. not nested or no interactions.) 56 | Also, it is hard-coded that for all neighbourhoods, in which total number of cells is less than 4 in at least one condition, 57 | AUC will be set to NaN (classifiers for such low numbers will not be built)}. 58 | } 59 | \examples{ 60 | require(SingleCellExperiment) 61 | n_row = 500 62 | n_col = 100 63 | n_latent = 5 64 | sce = SingleCellExperiment(assays = list(counts = 65 | floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 66 | logcounts(sce) = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4 67 | rownames(sce) = as.factor(1:n_row) 68 | colnames(sce) = c(1:n_col) 69 | sce$cell = colnames(sce) 70 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5)) 71 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query") 72 | reducedDim(sce , "reduced_dim") = matrix(rnorm(n_col*n_latent), 73 | ncol=n_latent) 74 | sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim") 75 | sce = calc_AUC_per_neighbourhood(sce, condition_id = "type") 76 | } 77 | -------------------------------------------------------------------------------- /man/convert_de_stat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convert_de_stat.R 3 | \name{convert_de_stat} 4 | \alias{convert_de_stat} 5 | \title{convert_de_stat} 6 | \usage{ 7 | convert_de_stat(de_stat, assay_names = NULL, coldata_names = NULL) 8 | } 9 | \arguments{ 10 | \item{de_stat}{miloDE results, output of \code{\link{de_test_neighbourhoods}}; either in \code{data.frame} or \code{SingleCellExperiment} format.} 11 | 12 | \item{assay_names}{Character string specifying which fields should be treated as assays. 13 | Note that \code{logFC}, \code{pval}, \code{pval_corrected_across_genes} and \code{pval_corrected_across_nhoods} are hard-coded to be included in assays.} 14 | 15 | \item{coldata_names}{Character string specifying which fields should be treated as neighbourhood metadata. 16 | Note that \code{Nhood}, \code{Nhood_center}, \code{test_performed} are hard-coded to be included in coldata. 17 | 18 | \emph{Please note that \code{coldata_names} have to be the attributes of neighbourhoods (i.e. same values across different genes for the same neighbourhood).}} 19 | } 20 | \value{ 21 | A \code{SingleCellExperiment} object or \code{data.frame} object, containing miloDE results 22 | } 23 | \description{ 24 | Converts output of miloDE between \code{\link[base]{data.frame}} and \code{\linkS4class{SingleCellExperiment}} formats 25 | } 26 | \details{ 27 | This function converts results of \code{\link{de_test_neighbourhoods}} between \code{data.frame} object and \code{SingleCellExperiment}. 28 | 29 | \code{data.frame} object is more commonly used and might be easier to navigate, however, if total number of tests (i.e. gene x neighboourhoods) 30 | is overwhelmingly large, \code{SingleCellExperiment} might be more suitable and faster to work with. 31 | } 32 | \examples{ 33 | de_stat = expand.grid(gene = paste0("gene" , c(1:5)) , Nhood = c(1:10)) 34 | de_stat$Nhood_center = paste0("nhood_" , de_stat$Nhood) 35 | de_stat$logFC = sample(seq(-2,2,1) , nrow(de_stat) , 1) 36 | de_stat$pval = sample(c(0,1),nrow(de_stat),1) 37 | de_stat$pval_corrected_across_genes = sample(c(0,1),nrow(de_stat),1) 38 | de_stat$pval_corrected_across_nhoods = sample(c(0,1),nrow(de_stat),1) 39 | de_stat$test_performed = TRUE 40 | de_stat = convert_de_stat(de_stat) 41 | de_stat = convert_de_stat(de_stat) 42 | 43 | } 44 | -------------------------------------------------------------------------------- /man/de_test_neighbourhoods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/de_test_neighbourhoods.R 3 | \name{de_test_neighbourhoods} 4 | \alias{de_test_neighbourhoods} 5 | \title{de_test_neighbourhoods} 6 | \usage{ 7 | de_test_neighbourhoods( 8 | x, 9 | sample_id = "sample", 10 | design, 11 | covariates, 12 | contrasts = NULL, 13 | subset_nhoods = NULL, 14 | min_n_cells_per_sample = 3, 15 | min_count = 3, 16 | output_type = "data.frame", 17 | plot_summary_stat = FALSE, 18 | layout = "UMAP", 19 | BPPARAM = NULL, 20 | verbose = TRUE 21 | ) 22 | } 23 | \arguments{ 24 | \item{x}{A \code{\linkS4class{Milo}} object.} 25 | 26 | \item{sample_id}{Character specifying which variable should be used as a replicate ID. 27 | Should be in \code{colnames(colData(x))}. Default \code{sample_id = "sample"}.} 28 | 29 | \item{design}{A \code{formula} object describing the experimental design for DE testing. 30 | Note that if \code{contrasts = NULL} (default), the last column column of model matrix will be used for testing.} 31 | 32 | \item{covariates}{Vector specifying all covariates that are passed into experimental design. 33 | 34 | \emph{It should contain all columns used in the design formula (except \code{sample_id})}.} 35 | 36 | \item{contrasts}{NULL (default) or character string specifying what comparison to perform. 37 | If you are unsure regarding the appropriate syntax for the \code{contrasts} in your data, 38 | check \url{https://www.bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf}. 39 | 40 | \emph{Note that at the moment we only support one comparison (i.e. one contrast); if you wish to perform several comparisons, please run \code{\link{de_test_neighbourhoods}} for each comparison separately.}} 41 | 42 | \item{subset_nhoods}{NULL or character vector specifying the set of neighbourhoods that will be tested for DE. 43 | Default \code{subset_nhoods = NULL} meaning no subsetting.} 44 | 45 | \item{min_n_cells_per_sample}{Positive integer specifying the minimum number of cells per replicate to be included in testing. 46 | Default \code{min_n_cells_per_sample = 3}.} 47 | 48 | \item{min_count}{Positive integer, specifying \code{min.count} for gene selection (employes \code{\link[edgeR]{filterByExpr}}). 49 | Default \code{min_count = 3}.} 50 | 51 | \item{output_type}{In \code{c("data.frame","SCE")} Specifying the output format - either in \code{data.frame} or \code{\linkS4class{SingleCellExperiment}}. 52 | Default \code{output_type = "data.frame"}.} 53 | 54 | \item{plot_summary_stat}{Boolean specifying if we plot Milo neighbourhood plot summarising (per neighbourhood) whether testing was performed. 55 | Default \code{plot_summary_stat = FALSE}.} 56 | 57 | \item{layout}{A character indicating the name of the \code{reducedDim} slot in the \code{\linkS4class{Milo}} object to use for the layout of the plot(default \code{layout = "UMAP"}). 58 | Only relevant if \code{plot_summary_stat = TRUE}.} 59 | 60 | \item{BPPARAM}{NULL or \code{\link{MulticoreParam}} object to use for parallelisation (see \code{README} for the usage). Default \code{BPPARAM = NULL} meaning no parallelisation. 61 | Note that if possible we recommend to parallel this in order to reduce computational time.} 62 | 63 | \item{verbose}{Boolean specifying whether to print intermediate output messages. Default \code{verbose = TRUE}.} 64 | } 65 | \value{ 66 | \code{data.frame} or \code{SingleCellExperiment} object containing miloDE results for all supplied neighbourhoods. 67 | For each tested gene-neighbourhood pair, we return logFC and p-values (raw and corrected across genes or neighbourhoods). 68 | } 69 | \description{ 70 | Performs DE testing within each neighbourhood + post hoc p-value correction across neighbourhoods. If a test for a gene x neighbourhood pair is not performed 71 | (i.e. gene is not expressed in this neighbourhood and filtered out prior to testing), we returns NaNs. 72 | } 73 | \details{ 74 | We employ edgeR testing (using \code{\link[edgeR]{glmQLFit}}) within each neighbourhood. 75 | We allow the user to submit the desired experimental design and incorporate various covariates, which is beneficial in the context of large cohort studies. 76 | } 77 | \examples{ 78 | require(SingleCellExperiment) 79 | n_row = 500 80 | n_col = 100 81 | n_latent = 5 82 | sce = SingleCellExperiment(assays = list(counts = 83 | floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 84 | rownames(sce) = as.factor(1:n_row) 85 | colnames(sce) = c(1:n_col) 86 | sce$cell = colnames(sce) 87 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5)) 88 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query") 89 | reducedDim(sce , "reduced_dim") = 90 | matrix(rnorm(n_col*n_latent), ncol=n_latent) 91 | sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim") 92 | de_stat = de_test_neighbourhoods(sce , design = ~type, 93 | covariates = c("type"), plot_summary_stat = FALSE) 94 | de_stat = convert_de_stat(de_stat) 95 | de_stat = convert_de_stat(de_stat) 96 | } 97 | -------------------------------------------------------------------------------- /man/de_test_single_neighbourhood.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/de_test_neighbourhoods.R 3 | \name{de_test_single_neighbourhood} 4 | \alias{de_test_single_neighbourhood} 5 | \title{de_test_single_neighbourhood} 6 | \usage{ 7 | de_test_single_neighbourhood( 8 | x, 9 | nhoods_x, 10 | hood_id, 11 | sample_id, 12 | design, 13 | covariates, 14 | contrasts = NULL, 15 | min_n_cells_per_sample = 1, 16 | min_count = 3, 17 | run_separately = TRUE 18 | ) 19 | } 20 | \arguments{ 21 | \item{x}{A \code{\linkS4class{Milo}} object} 22 | 23 | \item{nhoods_x}{Should be extracted from x as \code{nhoods(x)} prior to running the function.} 24 | 25 | \item{hood_id}{Numeric specifying for which neighbourhood we should perform testing. Should be in \code{c(1:ncol(nhoods_x))}.} 26 | 27 | \item{sample_id}{Character specifying which variable should be used as a replicate ID. 28 | Should be in \code{colnames(colData(x))}. Default \code{sample_id = "sample"}.} 29 | 30 | \item{design}{A \code{formula} object describing the experimental design for DE testing. 31 | If \code{contrasts = NULL} (default), the last column column of model matrix will be used for testing.} 32 | 33 | \item{covariates}{Vector specifying all covariates that should be passed into experimental design. 34 | 35 | \emph{It should contain all columns used in the design formula (except \code{sample_id}).}} 36 | 37 | \item{contrasts}{NULL (default) or character string specifying what comparison to perform. 38 | If you are unsure regarding the appropriate syntax for the \code{contrasts} in your data, 39 | check \url{https://www.bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf}. 40 | 41 | \emph{Note that at the moment we only support one comparison (i.e. one contrast), if you wish to perform several comparisons, please run \code{de_test_neighbourhoods} for each comparison separately.}} 42 | 43 | \item{min_n_cells_per_sample}{Positive integer specifying the minimum number of cells per replicate to be included in testing. 44 | Default \code{min_n_cells_per_sample = 3}.} 45 | 46 | \item{min_count}{Positive integer, specifying \code{min.count} for gene selection (employes \code{\link[edgeR]{filterByExpr}}). 47 | Default \code{min_count = 3}.} 48 | 49 | \item{run_separately}{A boolean parameter specifying whether the function is to be run as a part of \code{\link{de_test_neighbourhoods}} (FALSE) or as a stand-alone run (TRUE). Default \code{run_separately = TRUE}.} 50 | } 51 | \value{ 52 | \code{data.frame} object containing miloDE results for the selected neighbourhood 53 | } 54 | \description{ 55 | Tests single neighbourhood for DE; not intended to be used by itself (however allowed to), but rather as a part of \code{\link{de_test_neighbourhoods}} 56 | } 57 | \details{ 58 | We employ edgeR testing (using \code{\link[edgeR]{glmQLFit}}) within the selected neighbourhood (using numeric \code{hood_id} which corresponds to which column of \code{nhoods(x)} to use). 59 | We allow user to submit the desired experimental design and incorporate various covariates, which is beneficial in the context of large cohort studies. 60 | } 61 | \examples{ 62 | require(SingleCellExperiment) 63 | require(miloR) 64 | n_row = 500 65 | n_col = 100 66 | n_latent = 5 67 | sce = SingleCellExperiment(assays = list(counts = 68 | floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 69 | rownames(sce) = as.factor(1:n_row) 70 | colnames(sce) = c(1:n_col) 71 | sce$cell = colnames(sce) 72 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5)) 73 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query") 74 | reducedDim(sce , "reduced_dim") = 75 | matrix(rnorm(n_col*n_latent), ncol=n_latent) 76 | sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim") 77 | nhoods_x = nhoods(sce) 78 | de_stat = de_test_single_neighbourhood(sce , nhoods_x = nhoods_x, 79 | hood_id = 1 , sample_id = "sample" , 80 | design = ~type, covariates = c("type")) 81 | 82 | } 83 | -------------------------------------------------------------------------------- /man/estimate_neighbourhood_sizes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/estimate_neighbourhood_sizes.R 3 | \name{estimate_neighbourhood_sizes} 4 | \alias{estimate_neighbourhood_sizes} 5 | \title{estimate_neighbourhood_sizes} 6 | \usage{ 7 | estimate_neighbourhood_sizes( 8 | x, 9 | reducedDim_name, 10 | k_grid = seq(10, 100, 10), 11 | order = 2, 12 | prop = 0.1, 13 | filtering = TRUE, 14 | k_init = 50, 15 | d = 30, 16 | cluster_id = NULL, 17 | plot_stat = TRUE, 18 | verbose = TRUE 19 | ) 20 | } 21 | \arguments{ 22 | \item{x}{A \code{\linkS4class{SingleCellExperiment}} object.} 23 | 24 | \item{reducedDim_name}{Defines the slot in \code{reducedDim(x)} to use as the embedding for graph construction.} 25 | 26 | \item{k_grid}{Vector of positive integers, defines how many neighbours to use for the neighbourhood assignment.} 27 | 28 | \item{order}{In \code{c(1,2)}, defines which order of neighbours to use. Default \code{order = 2}.} 29 | 30 | \item{prop}{Numerical, between 0 and 1, defines which fraction of cells to use as neighbourhood centres. Default \code{prop = 0.2}.} 31 | 32 | \item{filtering}{In \code{c(TRUE,FALSE)}, defines whether to filter neighbourhoods (reduces computing time downstream). Default \code{filtering = TRUE}.} 33 | 34 | \item{k_init}{Positive integer, defines how many neighbours to use for identifying \sQuote{index} cells. Default \code{k_init = 50}.} 35 | 36 | \item{d}{Positive integer, defines how many dimensions from \code{reducedDim(x)} to use. Default \code{d = 30}.} 37 | 38 | \item{cluster_id}{Character specifying which field in \code{colData(x)} to use for 'localised' neighbourhood size estimation. 39 | This might be useful if dataset is rather big (which will result in an excessive running time). 40 | In case \code{cluster_id} is provided, we will calculate neighbourhood size distribution within individual clusters and aggregate results 41 | across clusters in order to speed up the process (note that it might result in slightly biased estimates). 42 | Default \code{cluster_id = NULL}, in which case neighbourhood sizes will be estimated for the whole dataset.} 43 | 44 | \item{plot_stat}{Boolean specifying whether to plot the stat. Default \code{plot_stat = TRUE}.} 45 | 46 | \item{verbose}{Boolean specifying whether to print intermediate output messages. Default \code{verbose = TRUE}.} 47 | } 48 | \value{ 49 | \code{data.frame} object, in which each row corresponds to \code{k} and 5 columns correspond to min, q25, median, q75 and max of neighbourhood size distributions; also returns a boxplot 50 | } 51 | \description{ 52 | For a grid of \code{k}, returns neighbourhood size distribution; this will help a user to select an appropriate \code{k} 53 | } 54 | \details{ 55 | This function returns an estimated distribution of neighbourhood sizes for different \code{k} values 56 | (for the selected by user \code{order}; if you want to estimates for both 1st and 2nd \code{order}, run this twice with changing \code{order}). 57 | This can help to gauge whether the neighbourhood size distribution is appropriate for the selected \code{k}, since \code{\link{de_test_neighbourhoods}} takes a while to complete. 58 | 59 | Note that this function also might take a while to complete on big datasets (> 70k cells), and in this case we provide an option to estimate neighbourhood 60 | sizes within annotated clusters (passed in \code{cluster_id}), which will be considerably faster, however, might result in slightly biased estimates. 61 | } 62 | \examples{ 63 | require(SingleCellExperiment) 64 | n_row = 500 65 | n_col = 100 66 | n_latent = 5 67 | sce = SingleCellExperiment(assays = list(counts = 68 | floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 69 | rownames(sce) = as.factor(1:n_row) 70 | colnames(sce) = c(1:n_col) 71 | sce$cell = colnames(sce) 72 | reducedDim(sce , "reduced_dim") = 73 | matrix(rnorm(n_col*n_latent), ncol=n_latent) 74 | out = estimate_neighbourhood_sizes(sce, k_grid = c(5,10), 75 | reducedDim_name = "reduced_dim") 76 | 77 | } 78 | -------------------------------------------------------------------------------- /man/filter_neighbourhoods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/filter_neighbourhoods.R 3 | \name{filter_neighbourhoods} 4 | \alias{filter_neighbourhoods} 5 | \title{filter_neighbourhoods} 6 | \usage{ 7 | filter_neighbourhoods(x) 8 | } 9 | \arguments{ 10 | \item{x}{A \code{\linkS4class{Milo}} object.} 11 | } 12 | \value{ 13 | \code{Milo} object with refined neighbourhood assignment 14 | } 15 | \description{ 16 | Filtering redundant neighbourhoods, using the greedy approach to set cover problem 17 | } 18 | \details{ 19 | This function refines neighbourhood assignment and discards redundant neighbourhoods. This call is highly recommended since it reduces computational time greatly. 20 | It is called directly in \code{\link{assign_neighbourhoods}} if \code{filtering=TRUE}, and also can be called independently post hoc. 21 | 22 | Under the hood, it adapts \code{\link[RcppGreedySetCover]{greedySetCover}} function, which implements greedy solution to NP-hard \sQuote{set cover} problem. 23 | In this solution, all neighbourhoods are sorted in the decreasing order of their size (i.e. number of cells), 24 | and neighbourhoods are iteratively added to the final neighbourhood set if they contain at least one cell not \sQuote{covered} by previously added neighbourhoods. 25 | } 26 | \examples{ 27 | require(SingleCellExperiment) 28 | n_row = 500 29 | n_col = 100 30 | n_latent = 5 31 | sce = SingleCellExperiment(assays = 32 | list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 33 | rownames(sce) = as.factor(1:n_row) 34 | colnames(sce) = c(1:n_col) 35 | sce$cell = colnames(sce) 36 | reducedDim(sce , "reduced_dim") = 37 | matrix(rnorm(n_col*n_latent), ncol=n_latent) 38 | sce = assign_neighbourhoods(sce, 39 | reducedDim_name = "reduced_dim" , k = 10 , order = 1) 40 | sce = filter_neighbourhoods(sce) 41 | } 42 | -------------------------------------------------------------------------------- /man/plot_DE_gene_set.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotting_functions.R 3 | \name{plot_DE_gene_set} 4 | \alias{plot_DE_gene_set} 5 | \title{plot_DE_gene_set} 6 | \usage{ 7 | plot_DE_gene_set( 8 | x, 9 | de_stat, 10 | genes, 11 | logFC_correction = TRUE, 12 | significance_by = "pval_corrected_across_nhoods", 13 | alpha = 0.1, 14 | layout = "UMAP", 15 | subset_nhoods = NULL, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{A \code{\linkS4class{Milo}} object.} 21 | 22 | \item{de_stat}{miloDE stat (output of \code{\link{de_test_neighbourhoods}}). 23 | It does not have to be direct output of \code{\link{de_test_neighbourhoods}} i.e. it is allowed if \code{de_stat} has some additional assays/columns (e.g. calculated post hoc metrics on neighbourhoods/genes).} 24 | 25 | \item{genes}{A character vector specifying genes.} 26 | 27 | \item{logFC_correction}{Boolean specifying whether to perform logFC correction. 28 | If TRUE (default), logFC will be set to 0 if corrected p-value (defined by \code{significance_by}) < alpha.} 29 | 30 | \item{significance_by}{Character specifying which column to use to decide on significance for logFC. 31 | Should be an in \code{assays(de_stat)} or in \code{colnames(de_stat)} (depends on \code{de_stat} format). Default \code{significance_by = "pval_corrected_across_nhoods"}. 32 | Expected to be in \code{c("pval", "pval_corrected_across_nhoods", "pval_corrected_across_genes")}.} 33 | 34 | \item{alpha}{A scalar (between 0 and 1) specifying the significance level used. Default \code{alpha = 0.1}.} 35 | 36 | \item{layout}{A character indicating the name of the \code{reducedDim} slot in the \code{\linkS4class{Milo}} object to use for layout. Default \code{layout = "UMAP"}.} 37 | 38 | \item{subset_nhoods}{A vector (or NULL) specifying which neighbourhoods will be plotted. 39 | Default \code{subset_nhoods = NULL} meaning that no subsetting is performed. 40 | If not NULL, should be a numeric vector, which values lie within \code{c(1:ncol(nhoods(x)))}.} 41 | 42 | \item{...}{Arguments to pass to \code{plot_milo_by_single_metric} (e.g. \code{size_range}, \code{node_stroke} etc)).} 43 | } 44 | \value{ 45 | \sQuote{Neighbourhood} plot, in which each neighbourhood is coloured by average logFC across the selected genes; neighbourhood size corresponds to the fraction of genes that are DE in this neighbourhood (based on \code{pval_corrected_across_nhoods}). 46 | } 47 | \description{ 48 | Returns \sQuote{neighbourhood} plot, in which colour of nodes correspond to average logFC across selected genes; size corresponds to how many genes show significant DE in the neighbourhood (using \code{significance_by}) 49 | } 50 | \details{ 51 | Note that for this plot, in untested gene-neighbourhood pairs, we set logFC to 0 and p-values to 1. 52 | } 53 | \examples{ 54 | require(SingleCellExperiment) 55 | n_row = 500 56 | n_col = 100 57 | n_latent = 5 58 | sce = SingleCellExperiment(assays = 59 | list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 60 | rownames(sce) = as.factor(1:n_row) 61 | colnames(sce) = c(1:n_col) 62 | sce$cell = colnames(sce) 63 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5)) 64 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query") 65 | reducedDim(sce , "reduced_dim") = 66 | matrix(rnorm(n_col*n_latent), ncol=n_latent) 67 | sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim") 68 | de_stat = de_test_neighbourhoods(sce , 69 | design = ~type , covariates = c("type") ) 70 | umaps = as.data.frame(matrix(rnorm(n_col*2), ncol=2)) 71 | colnames(umaps) = c("V1" , "V2") 72 | reducedDim(sce , "UMAP") = umaps 73 | genes = c("1","2") 74 | p = plot_DE_gene_set(sce, de_stat , genes = c("1","2")) 75 | 76 | } 77 | -------------------------------------------------------------------------------- /man/plot_DE_single_gene.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotting_functions.R 3 | \name{plot_DE_single_gene} 4 | \alias{plot_DE_single_gene} 5 | \title{plot_DE_single_gene} 6 | \usage{ 7 | plot_DE_single_gene( 8 | x, 9 | de_stat, 10 | gene, 11 | alpha = 0.1, 12 | layout = "UMAP", 13 | subset_nhoods = NULL, 14 | set_na_to_0 = TRUE, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{x}{A \code{\linkS4class{Milo}} object.} 20 | 21 | \item{de_stat}{miloDE stat (output of \code{\link{de_test_neighbourhoods}}). 22 | It does not have to be direct output of \code{\link{de_test_neighbourhoods}} i.e. it is allowed if \code{de_stat} has some additional assays/columns (e.g. calculated post hoc metrics on neighbourhoods/genes).} 23 | 24 | \item{gene}{A character specifying gene.} 25 | 26 | \item{alpha}{A scalar (between 0 and 1) specifying the significance level used. Default \code{alpha = 0.1}.} 27 | 28 | \item{layout}{A character indicating the name of the \code{reducedDim} slot in the \code{\linkS4class{Milo}} object to use for layout. Default \code{layout = "UMAP"}.} 29 | 30 | \item{subset_nhoods}{A vector (or NULL) specifying which neighbourhoods will be plotted. 31 | Default \code{subset_nhoods = NULL} meaning that no subsetting is performed. 32 | If not NULL, should be a numeric vector, which values lie within \code{c(1:ncol(nhoods(x)))}.} 33 | 34 | \item{set_na_to_0}{Boolean specifying whether in neighbourhoods in which gene is not tested, logFC would be set to 0 and p-values to 1. 35 | Default \code{set_na_to_0 = TRUE}, and in this case, they will be coloured in white (otherwise gray).} 36 | 37 | \item{...}{Arguments to pass to \code{plot_milo_by_single_metric} (e.g. \code{size_range}, \code{node_stroke} etc)).} 38 | } 39 | \value{ 40 | \sQuote{Neighbourhood} plot, in which each neighbourhood is coloured by logFC for the selected gene (if significant) 41 | } 42 | \description{ 43 | Returns \sQuote{neighbourhood} plot; each node is coloured by logFC, if \code{pval_corrected_across_nhoods < alpha}. 44 | } 45 | \examples{ 46 | require(SingleCellExperiment) 47 | n_row = 500 48 | n_col = 100 49 | n_latent = 5 50 | sce = SingleCellExperiment(assays = 51 | list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 52 | rownames(sce) = as.factor(1:n_row) 53 | colnames(sce) = c(1:n_col) 54 | sce$cell = colnames(sce) 55 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5)) 56 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query") 57 | reducedDim(sce , "reduced_dim") = 58 | matrix(rnorm(n_col*n_latent), ncol=n_latent) 59 | sce = assign_neighbourhoods(sce, 60 | reducedDim_name = "reduced_dim") 61 | de_stat = de_test_neighbourhoods(sce , 62 | design = ~type , covariates = c("type") ) 63 | umaps = as.data.frame(matrix(rnorm(n_col*2), ncol=2)) 64 | colnames(umaps) = c("V1" , "V2") 65 | reducedDim(sce , "UMAP") = umaps 66 | p = plot_DE_single_gene(sce, de_stat , gene = "1") 67 | 68 | } 69 | -------------------------------------------------------------------------------- /man/plot_beeswarm_gene_set.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotting_functions.R 3 | \name{plot_beeswarm_gene_set} 4 | \alias{plot_beeswarm_gene_set} 5 | \title{plot_beeswarm_gene_set} 6 | \usage{ 7 | plot_beeswarm_gene_set( 8 | de_stat, 9 | genes, 10 | nhoodGroup, 11 | levels = NULL, 12 | logFC_correction = TRUE, 13 | significance_by = "pval_corrected_across_nhoods", 14 | alpha = 0.1, 15 | subset_nhoods = NULL, 16 | size = 2 17 | ) 18 | } 19 | \arguments{ 20 | \item{de_stat}{miloDE stat (output of \code{\link{de_test_neighbourhoods}}). 21 | It does not have to be direct output of \code{\link{de_test_neighbourhoods}} i.e. it is allowed if \code{de_stat} has some additional assays/columns (e.g. calculated post hoc metrics on neighbourhoods/genes).} 22 | 23 | \item{genes}{A character vector specifying genes.} 24 | 25 | \item{nhoodGroup}{A character specifying which column to use for neighbourhood grouping. Should be an assay in \code{de_stat} (or in \code{colnames(de_stat)} if \code{class(de_stat) == "data.frame"}).} 26 | 27 | \item{levels}{NULL (default) or character vector specifying order for \code{nhoodGroup} values, in which they are to be plotted.} 28 | 29 | \item{logFC_correction}{Boolean specifying whether to perform logFC correction. 30 | If TRUE (default), logFC will be set to 0 if corrected p-value (defined by \code{significance_by < alpha}).} 31 | 32 | \item{significance_by}{Character specifying which column to use to decide on the significance.} 33 | 34 | \item{alpha}{A numeric between 0 and 1 specifying the significance threshold. Default \code{alpha = 0.1}.} 35 | 36 | \item{subset_nhoods}{A vector (or NULL) specifying which neighbourhoods will be plotted. 37 | Default \code{subset_nhoods = NULL} meaning that no subsetting is performed.} 38 | 39 | \item{size}{A positive number specifying size of the dots. Default \code{size = 2}.} 40 | } 41 | \value{ 42 | Beeswarm plot, broke down by provided groupings; each point is a neighbourhood, colour - average logFC across selected genes; x-axis - fraction of genes that are DE in the neighbourhood 43 | } 44 | \description{ 45 | For the selected genes, returns a beeswarm plot, in which the aggregated DE statistics is binned by provided cell grouping (e.g. enriched cell types) 46 | } 47 | \details{ 48 | Note that for this plot, in untested gene-neighbourhood pairs, we set logFC to 0 and p-values to 1. 49 | } 50 | \examples{ 51 | 52 | require(SingleCellExperiment) 53 | n_row = 500 54 | n_col = 100 55 | n_latent = 5 56 | sce = SingleCellExperiment(assays = list(counts = 57 | floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 58 | rownames(sce) = as.factor(1:n_row) 59 | colnames(sce) = c(1:n_col) 60 | sce$cell = colnames(sce) 61 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5)) 62 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query") 63 | reducedDim(sce , "reduced_dim") = 64 | matrix(rnorm(n_col*n_latent), ncol=n_latent) 65 | sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim") 66 | de_stat = de_test_neighbourhoods(sce , design = ~type , 67 | covariates = c("type") ) 68 | de_stat$celltype = 1 69 | p = plot_beeswarm_gene_set(de_stat , genes = c("1","2") , 70 | nhoodGroup = "celltype") 71 | 72 | } 73 | -------------------------------------------------------------------------------- /man/plot_beeswarm_single_gene.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotting_functions.R 3 | \name{plot_beeswarm_single_gene} 4 | \alias{plot_beeswarm_single_gene} 5 | \title{plot_beeswarm_single_gene} 6 | \usage{ 7 | plot_beeswarm_single_gene( 8 | de_stat, 9 | gene, 10 | nhoodGroup, 11 | levels = NULL, 12 | alpha = 0.1, 13 | subset_nhoods = NULL, 14 | size = 2 15 | ) 16 | } 17 | \arguments{ 18 | \item{de_stat}{miloDE stat (output of \code{\link{de_test_neighbourhoods}}). 19 | It does not have to be direct output of \code{\link{de_test_neighbourhoods}} i.e. it is allowed if \code{de_stat} has some additional assays/columns (e.g. calculated post hoc metrics on neighbourhoods/genes).} 20 | 21 | \item{gene}{A character specifying the gene.} 22 | 23 | \item{nhoodGroup}{A character specifying which values to use for neighbourhood grouping. Should be an assay in \code{de_stat} (or in \code{colnames(de_stat)} if \code{class(de_stat) == "data.frame"}).} 24 | 25 | \item{levels}{NULL (default) or character vector specifying order for \code{nhoodGroup} values, in which they are to be plotted. 26 | If \code{levels = NULL}, default order will be supplied.} 27 | 28 | \item{alpha}{A numeric between 0 and 1 specifying the significance threshold. All neighbourhoods that are defined as not significant, will coloured in gray. Default \code{alpha = 0.1}.} 29 | 30 | \item{subset_nhoods}{A vector (or NULL) specifying which neighbourhoods will be plotted. 31 | Default \code{subset_nhoods = NULL} meaning that no subsetting is performed.} 32 | 33 | \item{size}{A positive number specifying size of the dots. Default \code{size = 2}.} 34 | } 35 | \value{ 36 | Beeswarm plot, broke down by cell types; each point is a neighbourhood, colours and x-axis correspond to logFC (points are coloured if significant, based on \code{pval_corrected_across_nhoods}; otherwise in grey) 37 | } 38 | \description{ 39 | For the selected gene, returns a beeswarm plot, in which the DE statistics for the gene is binned by provided cell grouping (e.g. cell types); 40 | } 41 | \examples{ 42 | 43 | require(SingleCellExperiment) 44 | n_row = 500 45 | n_col = 100 46 | n_latent = 5 47 | sce = SingleCellExperiment(assays = 48 | list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 49 | rownames(sce) = as.factor(1:n_row) 50 | colnames(sce) = c(1:n_col) 51 | sce$cell = colnames(sce) 52 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5)) 53 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query") 54 | reducedDim(sce , "reduced_dim") = 55 | matrix(rnorm(n_col*n_latent), ncol=n_latent) 56 | sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim") 57 | de_stat = de_test_neighbourhoods(sce , 58 | design = ~type , covariates = c("type") ) 59 | de_stat$celltype = 1 60 | p = plot_beeswarm_single_gene(de_stat , 61 | gene = "1" , nhoodGroup = "celltype") 62 | 63 | } 64 | -------------------------------------------------------------------------------- /man/plot_milo_by_single_metric.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotting_functions.R 3 | \name{plot_milo_by_single_metric} 4 | \alias{plot_milo_by_single_metric} 5 | \title{plot_milo_by_single_metric} 6 | \usage{ 7 | plot_milo_by_single_metric( 8 | x, 9 | nhood_stat, 10 | colour_by = "logFC", 11 | significance_by = NULL, 12 | order_by = NULL, 13 | order_direction = FALSE, 14 | size_by = NULL, 15 | alpha = 0.1, 16 | layout = "UMAP", 17 | subset_nhoods = NULL, 18 | size_range = c(1, 3), 19 | node_stroke = 0.3, 20 | edge_width = c(0.2, 0.5), 21 | edge_weight.thresh = NULL 22 | ) 23 | } 24 | \arguments{ 25 | \item{x}{A \code{\linkS4class{Milo}} object.} 26 | 27 | \item{nhood_stat}{\code{data.frame} object, containing columns \code{Nhood} (should correspond to neighbourhoods from \code{nhoodGraph(x))}.} 28 | 29 | \item{colour_by}{A character specifying value used for neighbourhood colouring. Should be in \code{colnames(nhood_stat)}. Can be both numeric and categorical.} 30 | 31 | \item{significance_by}{A character specifying which values to use for \sQuote{thresholding}: if values for this column exceed \code{alpha}, \code{colour_by} will be set to 0. 32 | Should be in \code{colnames(nhood_stat)}. Default \code{significance_by = NULL} and in this case we will not use no correction. 33 | Please ensure that its values are numeric.} 34 | 35 | \item{order_by}{A character specifying which values to use to order neighbourhoods for plotting. 36 | Should be in \code{colnames(nhood_stat)} (or NULL). Default \code{order_by = NULL} and in this case we will order by \code{size_by} values.} 37 | 38 | \item{order_direction}{Boolean specifying the direction of neighbourhood ordering. Default \code{order_direction = FALSE}.} 39 | 40 | \item{size_by}{A character specifying which values to use for neighbourhood sizes. 41 | Should be in \code{colnames(nhood_stat)} (or NULL). Default \code{size_by = NULL} and in this case we will use neighbourhood size (i.e. number of cells in the neighbourhood). 42 | Please ensure that its values are numeric.} 43 | 44 | \item{alpha}{A scalar (between 0 and 1) specifying the significance level used. Default \code{alpha = 0.1}.} 45 | 46 | \item{layout}{A character indicating the name of the \code{reducedDim} slot in the \code{\linkS4class{Milo}} object to use for layout. Default \code{layout = "UMAP"}.} 47 | 48 | \item{subset_nhoods}{A vector (or NULL) specifying which neighbourhoods will be plotted. 49 | Default \code{subset_nhoods = NULL} meaning that no subsetting is performed. If not NULL, should be a numeric vector, which values lie within \code{c(1:ncol(nhoods(x)))}.} 50 | 51 | \item{size_range}{A numeric vector indicating the range (min and max) of node sizes to use for plotting (to avoid overplotting in the graph). 52 | Default \code{size_range = c(1,3)}.} 53 | 54 | \item{node_stroke}{A numeric indicating the desired thickness of the border around each node. Default \code{node_stroke = 0.3}.} 55 | 56 | \item{edge_width}{A numeric vector indicating the range (min and max) of edge widths to use for plotting. Default \code{edge_width = c(0.2,0.5)}.} 57 | 58 | \item{edge_weight.thresh}{A numeric (or NULL) specifying a threshold for minimum cells in common (between neighbourhoods) required for an edge to be plotted. 59 | Default \code{edge_weight.thresh = NULL} meaning that no minimum threshold is set.} 60 | } 61 | \value{ 62 | \sQuote{Neighbourhood} plot, in which each neighbourhood is coloured by the provided in \code{colour_by} column value 63 | } 64 | \description{ 65 | Returns \sQuote{neighbourhood} plot; each node is coloured by \code{colour_by} column from \code{nhood_stat}, if \code{significance_by < alpha} 66 | } 67 | \examples{ 68 | require(SingleCellExperiment) 69 | n_row = 500 70 | n_col = 100 71 | n_latent = 5 72 | sce = SingleCellExperiment(assays = 73 | list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 74 | rownames(sce) = as.factor(1:n_row) 75 | colnames(sce) = c(1:n_col) 76 | sce$cell = colnames(sce) 77 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5)) 78 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query") 79 | reducedDim(sce , "reduced_dim") = matrix(rnorm(n_col*n_latent), ncol=n_latent) 80 | sce = assign_neighbourhoods(sce, 81 | reducedDim_name = "reduced_dim") 82 | de_stat = de_test_neighbourhoods(sce , 83 | design = ~type , covariates = c("type") ) 84 | de_stat = de_stat[de_stat$gene == "1", ] 85 | umaps = as.data.frame(matrix(rnorm(n_col*2), ncol=2)) 86 | colnames(umaps) = c("V1" , "V2") 87 | reducedDim(sce , "UMAP") = umaps 88 | p = plot_milo_by_single_metric(sce, de_stat) 89 | 90 | } 91 | -------------------------------------------------------------------------------- /man/rank_neighbourhoods_by_DE_magnitude.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rank_neighbourhoods_by_DE_magnitude.R 3 | \name{rank_neighbourhoods_by_DE_magnitude} 4 | \alias{rank_neighbourhoods_by_DE_magnitude} 5 | \title{rank_neighbourhoods_by_DE_magnitude} 6 | \usage{ 7 | rank_neighbourhoods_by_DE_magnitude(de_stat, pval.thresh = 0.1, z.thresh = -3) 8 | } 9 | \arguments{ 10 | \item{de_stat}{Output of miloDE (\code{\link{de_test_neighbourhoods}}), either in \code{data.frame} or \code{SingleCellExperiment} format.} 11 | 12 | \item{pval.thresh}{A scalar specifying which threshold to use for deciding on significance for gene being DE in a neighbourhood. Default \code{pval.thresh = 0.1}.} 13 | 14 | \item{z.thresh}{A scalar specifying which threshold to use for deciding on which z-normalised p-values are going to be considered specifically DE. Default \code{z.thresh = -3}.} 15 | } 16 | \value{ 17 | \code{data.frame}, with calculated number-DE-genes and number-specific-DE-genes for each neighbourhood 18 | } 19 | \description{ 20 | Ranks neighbourhoods by the magnitude of DE: number of DE genes and number of \sQuote{specifically} DE genes 21 | } 22 | \details{ 23 | To calculate number of DE genes per neighbourhood, we use \code{pval_corrected_across_genes}. 24 | Accordingly, for each neighbourhood we calculate how many genes has p-values lower than designated threshold. 25 | 26 | To calculate number of \sQuote{specifically} DE genes, we first z-normalise \code{pval_corrected_across_nhoods} (for each gene) and then for each 27 | neighbourhood, calculate how many genes have z-normalised p-values lower than designated threshold. 28 | 29 | \emph{Note that for this analysis we set NaN p-values (raw and corrected) to 1 - interpret accordingly.} 30 | } 31 | \examples{ 32 | de_stat = expand.grid(gene = paste0("gene" , c(1:5)) , Nhood = c(1:10)) 33 | de_stat$Nhood_center = paste0("nhood_" , de_stat$Nhood) 34 | de_stat$logFC = sample(seq(-2,2,1) , nrow(de_stat) , 1) 35 | de_stat$pval = sample(c(0,1),nrow(de_stat),1) 36 | de_stat$pval_corrected_across_genes = sample(c(0,1),nrow(de_stat),1) 37 | de_stat$pval_corrected_across_nhoods = sample(c(0,1),nrow(de_stat),1) 38 | de_stat$test_performed = TRUE 39 | out = rank_neighbourhoods_by_DE_magnitude(de_stat) 40 | 41 | } 42 | -------------------------------------------------------------------------------- /man/sce_mouseEmbryo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sce_mouseEmbryo.R 3 | \docType{data} 4 | \name{sce_mouseEmbryo} 5 | \alias{sce_mouseEmbryo} 6 | \title{Chimeric mouse embryo, Tal1- (Pijuan-Sala et al., 2019)} 7 | \format{ 8 | \code{\linkS4class{SingleCellExperiment}} object 9 | } 10 | \source{ 11 | MouseGastrulationData package 12 | } 13 | \usage{ 14 | data(sce_mouseEmbryo) 15 | } 16 | \description{ 17 | \code{SingleCellExperiment} object, containing counts 18 | (raw are pulled using \code{\link[MouseGastrulationData]{Tal1ChimeraData}} 19 | and log-normalised are estimated using \code{\link[scuttle]{logNormCounts}}) matrices 20 | for chimeric mouse embryos (both Tal1+ and Tal1-). 21 | } 22 | \details{ 23 | Additionally, we subselected only 3 cell types (Endothelium , Blood progenitors 2, Neural crest) 24 | and 300 HVGs. 25 | 26 | \code{colData(sce_mouseEmbryo)} contains information about replicate (\code{sample}) and Tal1 status (\code{tomato = TRUE} means Tal1-). 27 | } 28 | -------------------------------------------------------------------------------- /man/spatial_pval_adjustment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/spatial_pval_adjustment.R 3 | \name{spatial_pval_adjustment} 4 | \alias{spatial_pval_adjustment} 5 | \title{spatial_pval_adjustment} 6 | \usage{ 7 | spatial_pval_adjustment(nhoods_x, pvalues) 8 | } 9 | \arguments{ 10 | \item{nhoods_x}{Should be extracted from x as \code{nhoods(x)}.} 11 | 12 | \item{pvalues}{Vector of p-values.} 13 | } 14 | \value{ 15 | Vector with \sQuote{spatially} (i.e. across neighbourhoods) adjusted p-values 16 | } 17 | \description{ 18 | Performs p-values multiple testing correction across neighbourhoods, with accounting for the overlap 19 | } 20 | \details{ 21 | This function is not intended to be run by itself, but can be useful if the user wants to perform \sQuote{spatially aware} multiple testing correction. 22 | 23 | Under the hood it performs weighted version of BH correction, where weights are reciprocal to the local desnity of a neighbourhood. 24 | Accordingly, big and/or highly connected neighbourhoods will have lower weights. 25 | } 26 | \examples{ 27 | require(SingleCellExperiment) 28 | require(miloR) 29 | n_row = 500 30 | n_col = 100 31 | n_latent = 5 32 | sce = SingleCellExperiment(assays = list(counts = 33 | floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4)) 34 | rownames(sce) = as.factor(1:n_row) 35 | colnames(sce) = c(1:n_col) 36 | sce$cell = colnames(sce) 37 | reducedDim(sce , "reduced_dim") = 38 | matrix(rnorm(n_col*n_latent), ncol=n_latent) 39 | sce = assign_neighbourhoods(sce, 40 | reducedDim_name = "reduced_dim" , k = 10 , order = 1) 41 | nhoods_x = nhoods(sce) 42 | pvalues = runif(n = ncol(nhoods_x) , min = 0 , max = 1) 43 | out = spatial_pval_adjustment(nhoods_x, pvalues = pvalues) 44 | } 45 | -------------------------------------------------------------------------------- /miloDE.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /miloDE_cartoon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarioniLab/miloDE/d253df8972a54459d63096028ae41afcb4a1063a/miloDE_cartoon.png -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(miloDE) 3 | 4 | test_check("miloDE") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-assign_neighbourhoods.R: -------------------------------------------------------------------------------- 1 | # context("Testing assign_neighbourhoods") 2 | library(miloDE) 3 | 4 | # load data 5 | data("sce_mouseEmbryo", package = "miloDE") 6 | 7 | 8 | # error msgs 9 | test_that("Wrong input gives errors", { 10 | 11 | # sce should be of the right format 12 | expect_error(assign_neighbourhoods(x = 1 , k = 10, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 13 | "x should be a SingleCellExperiment or Milo object.", 14 | fixed=TRUE 15 | ) 16 | sce_test = sce_mouseEmbryo 17 | colnames(sce_test) = rep(1,1,ncol(sce_test)) 18 | expect_error(assign_neighbourhoods(sce_test , k = 10, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 19 | "If colnames(x) exist, they should be unique.", 20 | fixed=TRUE 21 | ) 22 | 23 | 24 | # k should be positive integer 25 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 0, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 26 | "Check k - should be positive integer", 27 | fixed=TRUE 28 | ) 29 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = "1", prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 30 | "Check k - should be positive integer", 31 | fixed=TRUE 32 | ) 33 | 34 | # prop should be positive number between 0 and 1 35 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = "0.2", order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 36 | "Check prop - should be positive number between 0 and 1", 37 | fixed=TRUE 38 | ) 39 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 40 | "Check prop - should be positive number between 0 and 1", 41 | fixed=TRUE 42 | ) 43 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = -1, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 44 | "Check prop - should be positive number between 0 and 1", 45 | fixed=TRUE 46 | ) 47 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 48 | "Check prop - should be positive number between 0 and 1", 49 | fixed=TRUE 50 | ) 51 | 52 | 53 | # order should be 1 or 2 54 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 0, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 55 | "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)", 56 | fixed=TRUE 57 | ) 58 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 3, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 59 | "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)", 60 | fixed=TRUE 61 | ) 62 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 1.5, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 63 | "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)", 64 | fixed=TRUE 65 | ) 66 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = "x", filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 67 | "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)", 68 | fixed=TRUE 69 | ) 70 | 71 | # check filtering should be T or F 72 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, filtering = 2, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 73 | "Check filtering - should be either TRUE or FALSE", 74 | fixed=TRUE 75 | ) 76 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, filtering = "aa", reducedDim_name = "pca.corrected", k_init = 50, d = 30), 77 | "Check filtering - should be either TRUE or FALSE", 78 | fixed=TRUE 79 | ) 80 | 81 | # reducedDim should be character 82 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, filtering = TRUE, reducedDim_name = 1, k_init = 50, d = 30), 83 | "Check reducedDim_name - should be character vector", 84 | fixed=TRUE 85 | ) 86 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = TRUE, k_init = 50, d = 30), 87 | "Check reducedDim_name - should be character vector", 88 | fixed=TRUE 89 | ) 90 | 91 | 92 | # k_init - positive integer 93 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = "50", d = 30), 94 | "Check k_init - should be positive integer", 95 | fixed=TRUE 96 | ) 97 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 0, d = 30), 98 | "Check k_init - should be positive integer", 99 | fixed=TRUE 100 | ) 101 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = -10, d = 30), 102 | "Check k_init - should be positive integer", 103 | fixed=TRUE 104 | ) 105 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 10.5, d = 30), 106 | "Check k_init - should be positive integer", 107 | fixed=TRUE 108 | ) 109 | 110 | 111 | # d - positive integer 112 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = "30"), 113 | "Check d - should be positive integer", 114 | fixed=TRUE 115 | ) 116 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = 0), 117 | "Check d - should be positive integer", 118 | fixed=TRUE 119 | ) 120 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = -1), 121 | "Check d - should be positive integer", 122 | fixed=TRUE 123 | ) 124 | expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = 1.5), 125 | "Check d - should be positive integer", 126 | fixed=TRUE 127 | ) 128 | 129 | 130 | # reduced dim should be in reducedDimNames 131 | expect_error(assign_neighbourhoods(x = sce_mouseEmbryo , k = 10, prop = 0.2, k_init = 50, d = 30) 132 | ) 133 | 134 | expect_error(assign_neighbourhoods(x = sce_mouseEmbryo , k = 10, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca_corrected", k_init = 50, d = 30), 135 | "reducedDim_name should be in reducedDimNames(x).", 136 | fixed=TRUE 137 | ) 138 | 139 | }) 140 | 141 | 142 | 143 | # return of the correct output 144 | test_that("Return is the correct class", { 145 | require(miloR) 146 | # right class 147 | out = assign_neighbourhoods(x = sce_mouseEmbryo , k = 10, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 148 | expect_s4_class(out, "Milo") 149 | 150 | # nhoods have data for all the cells 151 | nhoods_out = nhoods(out) 152 | expect_equal(nrow(nhoods_out) , ncol(out)) 153 | expect_identical(sort(rownames(nhoods_out)) , sort(colnames(out))) 154 | 155 | }) 156 | 157 | 158 | # n-hoods/hood-size ~ k, order, filtering 159 | test_that("Scaling of neighbourhood sizes and numbers with k, order, filtering", { 160 | 161 | # smaller k - more neighbourhoods 162 | sce_1 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 10, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 163 | sce_2 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 20, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 164 | nhoods_1 = nhoods(sce_1) 165 | nhoods_2 = nhoods(sce_2) 166 | expect_gt(ncol(nhoods_1) , ncol(nhoods_2)) 167 | 168 | # smaller k - more neighbourhoods 169 | sce_1 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 25, prop = 0.2, order = 1, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 170 | sce_2 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 75, prop = 0.2, order = 1, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 171 | nhoods_1 = nhoods(sce_1) 172 | nhoods_2 = nhoods(sce_2) 173 | expect_gt(ncol(nhoods_1) , ncol(nhoods_2)) 174 | 175 | 176 | # order 1 - more neighbourhoods 177 | sce_1 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 25, prop = 0.1, order = 1, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 178 | sce_2 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 25, prop = 0.1, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 179 | nhoods_1 = nhoods(sce_1) 180 | nhoods_2 = nhoods(sce_2) 181 | expect_gt(ncol(nhoods_1) , ncol(nhoods_2)) 182 | 183 | 184 | # order 1 - more neighbourhoods 185 | sce_1 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 5, prop = 0.2, order = 1, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 186 | sce_2 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 5, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 187 | nhoods_1 = nhoods(sce_1) 188 | nhoods_2 = nhoods(sce_2) 189 | expect_gt(ncol(nhoods_1) , ncol(nhoods_2)) 190 | 191 | 192 | # filtering - less neighbourhoods 193 | sce_1 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 5, prop = 0.2, order = 1, filtering = F, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 194 | sce_2 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 5, prop = 0.2, order = 1, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 195 | nhoods_1 = nhoods(sce_1) 196 | nhoods_2 = nhoods(sce_2) 197 | expect_gt(ncol(nhoods_1) , ncol(nhoods_2)) 198 | 199 | 200 | # filtering - less neighbourhoods 201 | sce_1 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 20, prop = 0.1, order = 2, filtering = F, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 202 | sce_2 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 20, prop = 0.1, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 203 | nhoods_1 = nhoods(sce_1) 204 | nhoods_2 = nhoods(sce_2) 205 | expect_gt(ncol(nhoods_1) , ncol(nhoods_2)) 206 | 207 | 208 | }) 209 | 210 | 211 | 212 | 213 | 214 | -------------------------------------------------------------------------------- /tests/testthat/test-calc_AUC_per_neighbourhood.R: -------------------------------------------------------------------------------- 1 | # context("Testing calc_AUC_per_neighbourhood") 2 | library(miloDE) 3 | 4 | # load data 5 | data("sce_mouseEmbryo", package = "miloDE") 6 | set.seed(32) 7 | sce_mouseEmbryo_milo = assign_neighbourhoods(sce_mouseEmbryo, reducedDim_name = "pca.corrected") 8 | require(miloR) 9 | stat_auc = calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , condition_id = "tomato" , min_n_cells_per_sample = 1) 10 | stat_auc_w_annotated_hoods = annotateNhoods(sce_mouseEmbryo_milo , da.res = stat_auc , coldata_col = "celltype.mapped") 11 | 12 | # generate toy data 13 | require(SingleCellExperiment) 14 | n_row = 50 15 | n_col = 100 16 | n_latent = 5 17 | sce = SingleCellExperiment(list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4 , 18 | logcounts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4) ) 19 | rownames(sce) = as.factor(1:n_row) 20 | colnames(sce) = c(1:n_col) 21 | sce$cell = colnames(sce) 22 | sce$sample = c(1:ncol(sce)) 23 | reducedDim(sce , "reduced_dim") = matrix(rnorm(n_col*n_latent), ncol=n_latent) 24 | sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim") 25 | 26 | sce_1 = sce 27 | sce_1$condition = ifelse(logcounts(sce_1[1,]) >= 5 , 1 , 0) 28 | sce_2 = sce 29 | sce_2$condition = sample(c(0,1) , ncol(sce_2) , replace = T) 30 | 31 | sce_3 = sce 32 | sce_3$condition = sapply(c(1:ncol(sce_3)) , function(i){ 33 | if (logcounts(sce_3[1,i]) <= 2){ 34 | return(0) 35 | } else if (logcounts(sce_3[1,i]) <= 4){ 36 | return(1) 37 | } else { 38 | return(2) 39 | } 40 | }) 41 | sce_4 = sce 42 | sce_4$condition = sample(c(0,1,2) , ncol(sce_4) , replace = T) 43 | 44 | 45 | 46 | 47 | # error msgs 48 | test_that("Wrong input gives errors", { 49 | # x should be of the right format 50 | expect_error(calc_AUC_per_neighbourhood(x = 1 , condition_id = "test"), 51 | "x should be a SingleCellExperiment or Milo object.", 52 | fixed=TRUE 53 | ) 54 | sce_test = sce_mouseEmbryo_milo 55 | colnames(sce_test) = rep(1,1,ncol(sce_test)) 56 | expect_error(calc_AUC_per_neighbourhood(sce_test , condition_id = "test2"), 57 | "If colnames(x) exist, they should be unique.", 58 | fixed=TRUE 59 | ) 60 | expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo , condition_id = "test3"), 61 | "x should be a Milo object. Please run `assign_neighbourhoods` first.", 62 | fixed=TRUE 63 | ) 64 | 65 | 66 | # genes should be a subset of sce's rows 67 | expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , genes = c("1" , "11") , condition_id = "tomato"), 68 | "Some gene names are missing from x", 69 | fixed=TRUE 70 | ) 71 | 72 | # sample_id should be of the right format 73 | expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , condition_id = "tomato" , sample_id = 1), 74 | "Check sample_id - should be character vector", 75 | fixed=TRUE 76 | ) 77 | expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , condition_id = "tomato" , sample_id = TRUE), 78 | "Check sample_id - should be character vector", 79 | fixed=TRUE 80 | ) 81 | expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , condition_id = "tomato" , sample_id = "test"), 82 | "'sample_id' should be in colData(x)", 83 | fixed=TRUE 84 | ) 85 | 86 | 87 | # condition_id should be of the right format 88 | expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample" , condition_id = "sample" ), 89 | "'sample_id' and 'condition_id' can not be the same", 90 | fixed=TRUE 91 | ) 92 | expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample" , condition_id = 1 ), 93 | "Check condition_id - should be character vector", 94 | fixed=TRUE 95 | ) 96 | expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample" , condition_id = "test" ), 97 | "'condition_id' should be in colData(x)", 98 | fixed=TRUE 99 | ) 100 | expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample" , condition_id = "stage" ), 101 | "x should have at least two levels for tested conditions.", 102 | fixed=TRUE 103 | ) 104 | 105 | 106 | # conditions are of the right format 107 | expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample" , 108 | condition_id = "tomato" , conditions = c(1,"2") ), 109 | "All specified conditions should be present.", 110 | fixed=TRUE 111 | ) 112 | expect_error(calc_AUC_per_neighbourhood(sce_4 , sample_id = "sample" , 113 | condition_id = "condition" , conditions = NULL , min_n_cells_per_sample = 1), 114 | "If conditions == NULL, there should be exactly two levels for tested conditions.", 115 | fixed=TRUE 116 | ) 117 | expect_error(calc_AUC_per_neighbourhood(sce_3 , sample_id = "sample" , 118 | condition_id = "condition" , conditions = NULL , min_n_cells_per_sample = 1), 119 | "If conditions == NULL, there should be exactly two levels for tested conditions.", 120 | fixed=TRUE 121 | ) 122 | expect_error(calc_AUC_per_neighbourhood(sce_3 , sample_id = "sample" , 123 | condition_id = "condition" , conditions = c(0,1,2) , min_n_cells_per_sample = 1), 124 | "Conditions should be a vector of 2 elements.", 125 | fixed=TRUE 126 | ) 127 | 128 | 129 | # min_n_cells_per_sample - positive integer 130 | expect_error(calc_AUC_per_neighbourhood(x = sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", min_n_cells_per_sample = "aa"), 131 | "Check min_n_cells_per_sample - should be positive integer", 132 | fixed=TRUE 133 | ) 134 | expect_error(calc_AUC_per_neighbourhood(x = sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", min_n_cells_per_sample = 1.5), 135 | "Check min_n_cells_per_sample - should be positive integer", 136 | fixed=TRUE 137 | ) 138 | expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", min_n_cells_per_sample = 0), 139 | "Check min_n_cells_per_sample - should be positive integer", 140 | fixed=TRUE 141 | ) 142 | expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", min_n_cells_per_sample = -100), 143 | "Check min_n_cells_per_sample - should be positive integer", 144 | fixed=TRUE 145 | ) 146 | 147 | # n_threads - positive integer 148 | expect_error(calc_AUC_per_neighbourhood(x = sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", n_threads = "aa"), 149 | "Check n_threads - should be positive integer", 150 | fixed=TRUE 151 | ) 152 | expect_error(calc_AUC_per_neighbourhood(x = sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", n_threads = 1.5), 153 | "Check n_threads - should be positive integer", 154 | fixed=TRUE 155 | ) 156 | expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", n_threads = 0), 157 | "Check n_threads - should be positive integer", 158 | fixed=TRUE 159 | ) 160 | expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", n_threads = -100), 161 | "Check n_threads - should be positive integer", 162 | fixed=TRUE 163 | ) 164 | }) 165 | 166 | 167 | 168 | 169 | # right input returns the expected format (in default output) 170 | test_that("Return is the correct class", { 171 | # right class 172 | expect_s3_class(stat_auc , "data.frame") 173 | 174 | out = calc_AUC_per_neighbourhood(sce_1 , condition_id = "condition" , min_n_cells_per_sample = 1) 175 | expect_s3_class(out , "data.frame") 176 | out = calc_AUC_per_neighbourhood(sce_2 , condition_id = "condition" , conditions = c(0,1)) 177 | expect_s3_class(out , "data.frame") 178 | out = calc_AUC_per_neighbourhood(sce_3 , condition_id = "condition" , conditions = c(0,1)) 179 | expect_s3_class(out , "data.frame") 180 | out = calc_AUC_per_neighbourhood(sce_4 , condition_id = "condition" , conditions = c(2,1) , min_n_cells_per_sample = 1) 181 | expect_s3_class(out , "data.frame") 182 | 183 | 184 | # right dimensions 185 | expect_equal(ncol(nhoods(sce_mouseEmbryo_milo)) , nrow(stat_auc)) 186 | expect_equal(4, ncol(stat_auc)) 187 | cols = c("Nhood" , "Nhood_center" , "auc" , "auc_calculated") 188 | expect_identical(cols , colnames(stat_auc)) 189 | 190 | }) 191 | 192 | 193 | 194 | # check that AUCs are approx of what we expect 195 | ## real mouse data 196 | test_that("AUCs make sense for mouse", { 197 | 198 | # AUCs for BPs are either NaNs or > 0.5 199 | aucs = stat_auc_w_annotated_hoods$auc[stat_auc_w_annotated_hoods$celltype.mapped == "Blood progenitors 2"] 200 | expect_equal(0, length(which(aucs < 0.5))) 201 | 202 | # AUCs for Endo are all > 0.5 (and not nan) 203 | aucs = stat_auc_w_annotated_hoods$auc[stat_auc_w_annotated_hoods$celltype.mapped == "Endothelium"] 204 | expect_equal(0, sum(is.na(aucs))) 205 | expect_equal(length(aucs), sum(aucs > 0.5)) 206 | }) 207 | 208 | 209 | ## simulations - 2 conditions 210 | test_that("AUCs make sense for sim1", { 211 | stat_1 = calc_AUC_per_neighbourhood(sce_1 , condition_id = "condition" , min_n_cells_per_sample = 1) 212 | stat_1_gene_1 = calc_AUC_per_neighbourhood(sce_1 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1) 213 | stat_1_min_cells_3 = calc_AUC_per_neighbourhood(sce_1 , condition_id = "condition" , min_n_cells_per_sample = 3) 214 | 215 | stat_2 = calc_AUC_per_neighbourhood(sce_2 , condition_id = "condition" , min_n_cells_per_sample = 1) 216 | stat_2_gene_1 = calc_AUC_per_neighbourhood(sce_2 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1) 217 | stat_2_min_cells_3 = calc_AUC_per_neighbourhood(sce_2 , condition_id = "condition" , min_n_cells_per_sample = 3) 218 | 219 | expect_gt(stat_1$auc , stat_2$auc) 220 | expect_gt(stat_1$auc , 0.5) 221 | expect_gt(stat_1_gene_1$auc , stat_1$auc) 222 | expect_true(is.nan(stat_1_min_cells_3$auc)) 223 | expect_true(is.nan(stat_2_min_cells_3$auc)) 224 | expect_true(!is.nan(stat_2$auc)) 225 | expect_true(!is.nan(stat_2_gene_1$auc)) 226 | }) 227 | 228 | 229 | ## simulations - 3 conditions 230 | test_that("AUCs make sense for sim2", { 231 | 232 | stat_3_12 = calc_AUC_per_neighbourhood(sce_3 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1 , conditions = c(1,2)) 233 | stat_3_01 = calc_AUC_per_neighbourhood(sce_3 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1 , conditions = c(1,0)) 234 | stat_3_20 = calc_AUC_per_neighbourhood(sce_3 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1 , conditions = c(0,2)) 235 | 236 | stat_4_12 = calc_AUC_per_neighbourhood(sce_4 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1 , conditions = c(1,2)) 237 | stat_4_01 = calc_AUC_per_neighbourhood(sce_4 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1 , conditions = c(1,0)) 238 | stat_4_20 = calc_AUC_per_neighbourhood(sce_4 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1 , conditions = c(0,2)) 239 | 240 | expect_gte(stat_3_20$auc , stat_3_12$auc) 241 | expect_gte(stat_3_20$auc , stat_3_01$auc) 242 | 243 | expect_gt(stat_3_12$auc , stat_4_12$auc) 244 | expect_gt(stat_3_01$auc , stat_4_20$auc) 245 | expect_gt(stat_3_20$auc , stat_4_01$auc) 246 | 247 | }) 248 | 249 | 250 | 251 | 252 | 253 | 254 | -------------------------------------------------------------------------------- /tests/testthat/test-convert_de_stat.R: -------------------------------------------------------------------------------- 1 | # context("Testing convert_de_stat") 2 | library(miloDE) 3 | library(miloR) 4 | library(SingleCellExperiment) 5 | 6 | # load data 7 | data("sce_mouseEmbryo", package = "miloDE") 8 | set.seed(32) 9 | sce = assign_neighbourhoods(sce_mouseEmbryo , k = 25, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 10 | meta_sce = as.data.frame(colData(sce)) 11 | de_stat_sce = de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "SCE") 12 | de_stat_df = de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "data.frame") 13 | 14 | 15 | # wrong inputs return error 16 | # error msgs 17 | test_that("Wrong input gives errors", { 18 | 19 | # de_stat should be of the right format 20 | expect_error(convert_de_stat(de_stat = 1), 21 | "de_stat should be either data.frame or SingleCellExperiment object.\n 22 | To get valid de_stat object, please run 'de_test_neighbourhoods.R'", 23 | fixed=TRUE 24 | ) 25 | expect_error(convert_de_stat(de_stat = de_stat_df , assay_names = c("logFC") , coldata_names = c("logFC")), 26 | "assay_names and coldata_names can not overlap", 27 | fixed=TRUE 28 | ) 29 | expect_error(convert_de_stat(de_stat = de_stat_df , assay_names = c("test")), 30 | "colnames(de_stat) missing some of the assay_names or coldata_names.", 31 | fixed=TRUE 32 | ) 33 | expect_error(convert_de_stat(de_stat = de_stat_df , coldata_names = c("test")), 34 | "colnames(de_stat) missing some of the assay_names or coldata_names.", 35 | fixed=TRUE 36 | ) 37 | expect_error(convert_de_stat(de_stat = de_stat_sce , coldata_names = c("test")), 38 | "de_stat missing some of the coldata_names", 39 | fixed=TRUE 40 | ) 41 | expect_error(convert_de_stat(de_stat = de_stat_sce , assay_names = c("test")), 42 | "de_stat missing some of the required assays.", 43 | fixed=TRUE 44 | ) 45 | }) 46 | 47 | 48 | 49 | # right input returns the expected format (in default output) 50 | test_that("Return is the correct class", { 51 | # right class 52 | expect_s3_class(convert_de_stat(de_stat_sce) , "data.frame") 53 | expect_s4_class(convert_de_stat(de_stat_df) , "SingleCellExperiment") 54 | 55 | # right assay names, sce to df 56 | de_stat_test = convert_de_stat(de_stat_sce) 57 | out = mean(c("logFC" , "pval" , "pval_corrected_across_genes" , "pval_corrected_across_nhoods" , 58 | "Nhood" , "Nhood_center" , "test_performed") %in% colnames(de_stat_test)) 59 | expect_equal(1 , out) 60 | 61 | n_genes = length(unique(de_stat_test$gene)) 62 | expect_equal(n_genes , nrow(de_stat_sce)) 63 | n_hoods = length(unique(de_stat_test$Nhood)) 64 | expect_equal(n_hoods , ncol(de_stat_sce)) 65 | 66 | 67 | # right assay names, df to sce 68 | de_stat_test = convert_de_stat(de_stat_df) 69 | n_genes = length(unique(de_stat_df$gene)) 70 | expect_equal(n_genes , nrow(de_stat_test)) 71 | n_hoods = length(unique(de_stat_df$Nhood)) 72 | expect_equal(n_hoods , ncol(de_stat_test)) 73 | 74 | }) 75 | 76 | 77 | ## expect identical output if conversion is done post-hoc 78 | test_that("Conversion ad hoc is equal to conversion post hoc", { 79 | 80 | expect_identical(de_stat_df , convert_de_stat(de_stat_sce)) 81 | 82 | de_stat_sce_2 = convert_de_stat(de_stat_df) 83 | de_stat_sce = de_stat_sce[order(rownames(de_stat_sce)) , ] 84 | de_stat_sce_2 = de_stat_sce_2[order(rownames(de_stat_sce_2)) , ] 85 | 86 | expect_identical(assay(de_stat_sce, "logFC") , assay(de_stat_sce_2 , "logFC")) 87 | expect_identical(assay(de_stat_sce, "pval") , assay(de_stat_sce_2 , "pval")) 88 | expect_identical(assay(de_stat_sce, "pval_corrected_across_genes") , assay(de_stat_sce_2, "pval_corrected_across_genes")) 89 | expect_identical(assay(de_stat_sce, "pval_corrected_across_nhoods") , assay(de_stat_sce_2 , "pval_corrected_across_nhoods")) 90 | expect_identical(as.data.frame(colData(de_stat_sce)) , as.data.frame(colData(de_stat_sce_2))) 91 | 92 | }) 93 | 94 | 95 | ## expect conversion of right variables (coldata and assays) to the right format 96 | test_that("Conversion of additional metadata", { 97 | 98 | de_stat_df_test = de_stat_df 99 | de_stat_df_test$celltype = 0 100 | de_stat_df_test$celltype[de_stat_df_test$Nhood %in% c(1,2,5)] = 1 101 | 102 | de_stat_sce_test = convert_de_stat(de_stat_df_test , coldata_names = "celltype") 103 | meta_test = as.data.frame(colData(de_stat_sce_test)) 104 | 105 | out = rep(0,1,ncol(de_stat_sce_test)) 106 | out[c(1,2,5)] = 1 107 | expect_identical(meta_test$celltype , out) 108 | 109 | }) 110 | 111 | 112 | ## expect error if coldata is not legit 113 | test_that("Conversion of additional metadata legit only if metadata is legit", { 114 | 115 | de_stat_df_test = de_stat_df 116 | de_stat_df_test$celltype = 0 117 | unq_genes = unique(de_stat_df_test$gene) 118 | de_stat_df_test$celltype[de_stat_df_test$gene == unq_genes[1]] = 1 119 | 120 | expect_error(convert_de_stat(de_stat_df_test , coldata_names = "celltype")) 121 | expect_error(convert_de_stat(de_stat_df_test , assay_names = "celltype") , NA) 122 | 123 | de_stat_df_test = de_stat_sce 124 | assay(de_stat_df_test , "logFC_2") = assay(de_stat_sce , "logFC") 125 | out_1 = convert_de_stat(de_stat_df_test) 126 | out_2 = convert_de_stat(de_stat_df_test , assay_names = "logFC_2") 127 | 128 | expect_equal(FALSE , "logFC_2" %in% colnames(out_1)) 129 | expect_equal(TRUE , "logFC_2" %in% colnames(out_2)) 130 | }) 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /tests/testthat/test-de_test_neighbourhoods.R: -------------------------------------------------------------------------------- 1 | # context("Testing de_test_neighbourhoods") 2 | library(miloDE) 3 | library(miloR) 4 | library(SingleCellExperiment) 5 | 6 | # load data 7 | data("sce_mouseEmbryo", package = "miloDE") 8 | set.seed(32) 9 | sce = assign_neighbourhoods(sce_mouseEmbryo , k = 25, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 10 | meta_sce = as.data.frame(colData(sce)) 11 | de_stat_sce = de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "SCE") 12 | de_stat_df = de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "data.frame") 13 | 14 | # wrong inputs return error 15 | # error msgs 16 | test_that("Wrong input gives errors", { 17 | 18 | # x should be of the right format 19 | expect_error(de_test_neighbourhoods(x = 1 , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "SCE"), 20 | "x should be a SingleCellExperiment or Milo object.", 21 | fixed=TRUE 22 | ) 23 | sce_test = sce_mouseEmbryo 24 | colnames(sce_test) = rep(1,1,ncol(sce_test)) 25 | expect_error(de_test_neighbourhoods(x = sce_test , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "SCE"), 26 | "If colnames(x) exist, they should be unique.", 27 | fixed=TRUE 28 | ) 29 | 30 | # sample_id should be character 31 | expect_error(de_test_neighbourhoods(x = sce , sample_id = 11, design = ~tomato, covariates = c("tomato"), output_type = "SCE"), 32 | "Check sample_id - should be character vector", 33 | fixed=TRUE 34 | ) 35 | expect_error(de_test_neighbourhoods(x = sce , sample_id = TRUE, design = ~tomato, covariates = c("tomato"), output_type = "SCE"), 36 | "Check sample_id - should be character vector", 37 | fixed=TRUE 38 | ) 39 | expect_error(de_test_neighbourhoods(x = sce , sample_id = sce, design = ~tomato, covariates = c("tomato"), output_type = "SCE"), 40 | "Check sample_id - should be character vector", 41 | fixed=TRUE 42 | ) 43 | 44 | # design should be formula 45 | expect_error(de_test_neighbourhoods(sce , sample_id = "sample", design = "~tomato", covariates = c("tomato"), output_type = "SCE"), 46 | "Check design - should be formula object", 47 | fixed=TRUE 48 | ) 49 | expect_error(de_test_neighbourhoods(sce , sample_id = "sample", design = 1, covariates = c("tomato"), output_type = "SCE"), 50 | "Check design - should be formula object", 51 | fixed=TRUE 52 | ) 53 | 54 | # covariates should be vector 55 | expect_error(de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = 1, output_type = "SCE"), 56 | "Check covariates - should be character vector", 57 | fixed=TRUE 58 | ) 59 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, output_type = "SCE"), 60 | fixed=TRUE 61 | ) 62 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = NULL, output_type = "SCE"), 63 | "Check covariates - should be character vector", 64 | fixed=TRUE 65 | ) 66 | 67 | # contrasts should be NULL or character vector; if character - length 1 68 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), contrasts = c(1,2), output_type = "SCE"), 69 | "Check contrasts - should be NULL or character vector", 70 | fixed=TRUE 71 | ) 72 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), contrasts = c("1","2"), output_type = "SCE"), 73 | "At the moment we only support one comparison - contrasts should be of length 1. If you wish to perform several comparisons, please run separately for each of them.", 74 | fixed=TRUE 75 | ) 76 | 77 | # min_n_cells_per_sample - positive integer 78 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), min_n_cells_per_sample = "aa", output_type = "SCE"), 79 | "Check min_n_cells_per_sample - should be positive integer", 80 | fixed=TRUE 81 | ) 82 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), min_n_cells_per_sample = 1.5, output_type = "SCE"), 83 | "Check min_n_cells_per_sample - should be positive integer", 84 | fixed=TRUE 85 | ) 86 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), min_n_cells_per_sample = 0, output_type = "SCE"), 87 | "Check min_n_cells_per_sample - should be positive integer", 88 | fixed=TRUE 89 | ) 90 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), min_n_cells_per_sample = -100, output_type = "SCE"), 91 | "Check min_n_cells_per_sample - should be positive integer", 92 | fixed=TRUE 93 | ) 94 | 95 | 96 | # min_count - non negative 97 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), min_count = "aa", output_type = "SCE"), 98 | "Check min_count - should be non negative number", 99 | fixed=TRUE 100 | ) 101 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), min_count = -100, output_type = "SCE"), 102 | "Check min_count - should be non negative number", 103 | fixed=TRUE 104 | ) 105 | 106 | 107 | # output_type - either data.frame or SCE 108 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = 1 ), 109 | "Check output_type - should be either 'data.frame' or 'SCE'", 110 | fixed=TRUE 111 | ) 112 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = TRUE ), 113 | "Check output_type - should be either 'data.frame' or 'SCE'", 114 | fixed=TRUE 115 | ) 116 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = 'dataframe' ), 117 | "Check output_type - should be either 'data.frame' or 'SCE'", 118 | fixed=TRUE 119 | ) 120 | 121 | # plot_summary_stat - either TRUE or FALSE 122 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), plot_summary_stat = -1), 123 | "Check plot_summary_stat - should be either TRUE or FALSE", 124 | fixed=TRUE 125 | ) 126 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), plot_summary_stat = 10 ), 127 | "Check plot_summary_stat - should be either TRUE or FALSE", 128 | fixed=TRUE 129 | ) 130 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), plot_summary_stat = "2" ), 131 | "Check plot_summary_stat - should be either TRUE or FALSE", 132 | fixed=TRUE 133 | ) 134 | 135 | 136 | # sample_id - should be in colData x 137 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample1", design = ~tomato, covariates = c("tomato")), 138 | "'sample_id' should be in colData(x)", 139 | fixed=TRUE 140 | ) 141 | 142 | 143 | # check covariates in coldata 144 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~sex+sample1+tomato, covariates = c("tomato" , "sex" , "sample1")), 145 | "All covariates should be colnames of colData(x).", 146 | fixed=TRUE 147 | ) 148 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample",design = ~sex+stage+tomato, covariates = c("tomato" , "sex" , "stage")), 149 | "All covariates should have more than 1 contrast.", 150 | fixed=TRUE 151 | ) 152 | 153 | 154 | # if plot_stat = T, reducedDim should be in sce 155 | expect_error(de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), plot_summary_stat = T , layout = "pca"), 156 | "reducedDim_name should be in reducedDimNames(x).", 157 | fixed=TRUE 158 | ) 159 | 160 | # check subset_hoods 161 | subset_hoods = c(0,3,20) 162 | expect_error(de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), plot_summary_stat = F , subset_nhoods = c(0,3,20)), 163 | "If 'subset_nhoods' is numeric vector, it should lie within c(1:ncol(nhoods(x))).", 164 | fixed=TRUE 165 | ) 166 | 167 | # design and covariates should match 168 | expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~stage.mapped*tomato, covariates = c("tomato"), plot_summary_stat = F ), 169 | "Some of the design's arguments are not in the covariate vector.", 170 | fixed=TRUE 171 | ) 172 | 173 | }) 174 | 175 | 176 | 177 | # right input returns the expected format (in default output) 178 | test_that("Return is the correct class", { 179 | # right class 180 | expect_s4_class(de_stat_sce , "SingleCellExperiment") 181 | expect_s3_class(de_stat_df , "data.frame") 182 | 183 | cols_df = c("Nhood" , "gene" , "logFC" , "pval" , "pval_corrected_across_genes" , "pval_corrected_across_nhoods" , "Nhood_center" , "test_performed" ) 184 | expect_identical(cols_df , colnames(de_stat_df)) 185 | expect_identical(cols_df , colnames(de_stat_df)) 186 | 187 | cols_meta_sce = c("Nhood" , "Nhood_center" , "test_performed" ) 188 | expect_identical(cols_meta_sce , colnames(colData(de_stat_sce))) 189 | expect_identical(cols_meta_sce , colnames(colData(de_stat_sce))) 190 | 191 | cols_assay_sce = c("logFC" , "pval" , "pval_corrected_across_genes", "pval_corrected_across_nhoods" ) 192 | expect_identical(cols_assay_sce , assayNames(de_stat_sce)) 193 | expect_identical(cols_assay_sce , assayNames(de_stat_sce)) 194 | 195 | }) 196 | 197 | 198 | 199 | # subset hoods 200 | ## returns right hoods/colnames 201 | test_that("Subset hoods returns right nhoods", { 202 | 203 | de_stat = de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "SCE" , subset_nhoods = c(1,8,4,10)) 204 | expect_identical(colnames(de_stat) , c("1" , "4" , "8" , "10")) 205 | meta = as.data.frame(colData(de_stat)) 206 | expect_equal(meta$Nhood , c(1,4,8,10)) 207 | 208 | de_stat = convert_de_stat(de_stat) 209 | expect_equal(unique(de_stat$Nhood) , c(1,4,8,10)) 210 | 211 | }) 212 | 213 | ## same fdr,pval across genes but not spatfdr 214 | test_that("Subset hoods does not change FDR", { 215 | 216 | de_stat_subset = de_test_neighbourhoods(x = sce, sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "data.frame" , subset_nhoods = c(1,8,4,10)) 217 | de_stat_subset = de_stat_subset[, c("Nhood" , "gene" , "pval" , "pval_corrected_across_genes" , "pval_corrected_across_nhoods")] 218 | de_stat_subset = de_stat_subset[!is.na(de_stat_subset$pval) , ] 219 | colnames(de_stat_subset) = c("Nhood" , "gene" , "pval_1" , "pval_corrected_across_genes_1" , "pval_corrected_across_nhoods_1") 220 | 221 | de_stat_full = de_stat_df[de_stat_df$Nhood %in% c(10,4,8,1) , ] 222 | de_stat_full = de_stat_full[, c("Nhood" , "gene" , "pval" , "pval_corrected_across_genes" , "pval_corrected_across_nhoods")] 223 | de_stat_full = de_stat_full[!is.na(de_stat_full$pval) , ] 224 | colnames(de_stat_full) = c("Nhood" , "gene" , "pval_2" , "pval_corrected_across_genes_2" , "pval_corrected_across_nhoods_2") 225 | 226 | de_stat = merge(de_stat_subset , de_stat_full, by = c("gene" , "Nhood")) 227 | 228 | expect_identical(de_stat$pval_1 , de_stat$pval_2) 229 | expect_identical(de_stat$pval_corrected_across_genes_1 , de_stat$pval_corrected_across_genes_2) 230 | expect_false(isTRUE(all.equal(de_stat$pval_corrected_across_nhoods_1 , de_stat$pval_corrected_across_nhoods_2))) 231 | 232 | }) 233 | 234 | 235 | # min_n_cells_per_sample 236 | ## bigger min_n_cells_per_sample -- less hoods are tested 237 | test_that("Bigger min_n_cells_per_sample -- less hoods are tested", { 238 | de_stat_1 = de_test_neighbourhoods(sce , design = ~tomato, covariates = c("tomato"), output_type = "SCE" , min_n_cells_per_sample = 1) 239 | out_1 = as.data.frame(colData(de_stat_1)) 240 | out_1 = sum(out_1$test_performed) 241 | de_stat_2 = de_test_neighbourhoods(sce , design = ~tomato, covariates = c("tomato"), output_type = "SCE" , min_n_cells_per_sample = 100) 242 | out_2 = as.data.frame(colData(de_stat_2)) 243 | out_2 = sum(out_2$test_performed) 244 | expect_lt(out_2 , out_1) 245 | }) 246 | 247 | 248 | 249 | # min_count 250 | ## higher min_count -- less genes to be tested (in hood, not none) 251 | test_that("Higher min_count -- less genes are tested", { 252 | de_stat_sce_2 = de_test_neighbourhoods(sce , design = ~tomato, covariates = c("tomato"), output_type = "SCE" , min_count = 50) 253 | expect_gt(nrow(de_stat_sce) , nrow(de_stat_sce_2)) 254 | }) 255 | 256 | 257 | 258 | # covs 259 | test_that("Covariate check - wrong covariate matrix will give NULL", { 260 | de_stat = de_test_neighbourhoods(sce , design = ~sex+tomato, covariates = c("tomato" , "sex"), output_type = "SCE" , min_count = 3) 261 | expect_null(de_stat) 262 | 263 | de_stat = de_test_neighbourhoods(sce , design = ~sex+toy_cov_1+tomato, covariates = c("tomato","sex","toy_cov_1"), output_type = "SCE" , min_count = 3) 264 | expect_null(de_stat) 265 | 266 | de_stat = de_test_neighbourhoods(sce , design = ~toy_cov_1+tomato, covariates = c("tomato","toy_cov_1"), output_type = "SCE" , min_count = 3 ) 267 | meta = as.data.frame(colData(de_stat)) 268 | expect_gt(sum(meta$test_performed) , 0) 269 | }) 270 | 271 | # contrast check 272 | test_that("Contrasts check -- has to be formula colnames of model matrix", { 273 | expect_error(de_test_neighbourhoods(sce , design = ~0+tomato, covariates = c("tomato") , contrasts = c("tomatoT-tomatoF")), 274 | "contrasts are not right. All variables in the formula should be the colnames from model matrix: tomatoFALSE, tomatoTRUE", 275 | fixed=TRUE 276 | ) 277 | expect_error(de_test_neighbourhoods(sce , design = ~0+tomato, covariates = c("tomato") , contrasts = c("tomatoTRUE-tomatoFALSE")), 278 | NA 279 | ) 280 | expect_error(de_test_neighbourhoods(sce , design = ~0+tomato, covariates = c("tomato") , 281 | contrasts = c("tomatoTRUE-tomatoFALSE" , "tomatoTRUE+tomatoFALSE")), 282 | "At the moment we only support one comparison - contrasts should be of length 1. If you wish to perform several comparisons, please run separately for each of them.", 283 | fixed=TRUE 284 | ) 285 | # covariate as last model matrice's col (NULL) give same results 286 | out_1 = de_test_neighbourhoods(sce , design = ~tomato, covariates = c("tomato") , 287 | contrasts = c("tomatoTRUE") , output_type = "data.frame") 288 | out_2 = de_test_neighbourhoods(sce , design = ~tomato, covariates = c("tomato") , output_type = "data.frame") 289 | expect_identical(out_1 , out_2) 290 | }) 291 | 292 | 293 | 294 | 295 | -------------------------------------------------------------------------------- /tests/testthat/test-estimate_neighbourhood_sizes.R: -------------------------------------------------------------------------------- 1 | # context("Testing assign_neighbourhoods") 2 | library(miloDE) 3 | 4 | # load data 5 | data("sce_mouseEmbryo", package = "miloDE") 6 | 7 | stat = estimate_neighbourhood_sizes(x = sce_mouseEmbryo , k = c(5,10,20), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 8 | 9 | # error msgs 10 | test_that("Wrong input gives errors", { 11 | 12 | # x should be of the right format 13 | expect_error(estimate_neighbourhood_sizes(x = 1 , k_grid = c(10,30,50), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 14 | "x should be a SingleCellExperiment or Milo object.", 15 | fixed=TRUE 16 | ) 17 | sce_test = sce_mouseEmbryo 18 | colnames(sce_test) = rep(1,1,ncol(sce_test)) 19 | expect_error(estimate_neighbourhood_sizes(sce_test , k_grid = c(10,30,50), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 20 | "If colnames(x) exist, they should be unique.", 21 | fixed=TRUE 22 | ) 23 | 24 | 25 | # k_grid should be of the right format 26 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c("10" , "30" , "50"), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 27 | "Check k_grid - should be numeric vector", 28 | fixed=TRUE 29 | ) 30 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(-1,0,10), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 31 | "Values of k_grid should be positive integers. Please enter valid k_grid.", 32 | fixed=TRUE 33 | ) 34 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,20.5,30), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 35 | "Values of k_grid should be positive integers. Please enter valid k_grid.", 36 | fixed=TRUE 37 | ) 38 | expect_warning(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = 20, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 39 | "You only selected one value for k. If it is intended, we recommend to run directly 'assign_neighbourhoods'." 40 | ) 41 | expect_warning(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(5,20,1500), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 42 | "The highest selected value is > 1000. It is gonna cost computationally, and we generally do not recommend such high k. Consider reducing." 43 | ) 44 | 45 | 46 | # prop should be positive number between 0 and 1 47 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = "0.2", order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 48 | "Check prop - should be positive number between 0 and 1", 49 | fixed=TRUE 50 | ) 51 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 52 | "Check prop - should be positive number between 0 and 1", 53 | fixed=TRUE 54 | ) 55 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = -1, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 56 | "Check prop - should be positive number between 0 and 1", 57 | fixed=TRUE 58 | ) 59 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 60 | "Check prop - should be positive number between 0 and 1", 61 | fixed=TRUE 62 | ) 63 | 64 | 65 | # order should be 1 or 2 66 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 0, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 67 | "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)", 68 | fixed=TRUE 69 | ) 70 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30), prop = 0.1, order = 3, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 71 | "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)", 72 | fixed=TRUE 73 | ) 74 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30), prop = 0.1, order = 1.5, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 75 | "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)", 76 | fixed=TRUE 77 | ) 78 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = "x", filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 79 | "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)", 80 | fixed=TRUE 81 | ) 82 | 83 | # check filtering should be T or F 84 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, filtering = 2, reducedDim_name = "pca.corrected", k_init = 50, d = 30), 85 | "Check filtering - should be either TRUE or FALSE", 86 | fixed=TRUE 87 | ) 88 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, filtering = "aa", reducedDim_name = "pca.corrected", k_init = 50, d = 30), 89 | "Check filtering - should be either TRUE or FALSE", 90 | fixed=TRUE 91 | ) 92 | 93 | # reducedDim should be character 94 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, filtering = TRUE, reducedDim_name = 1, k_init = 50, d = 30), 95 | "Check reducedDim_name - should be character vector", 96 | fixed=TRUE 97 | ) 98 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = TRUE, k_init = 50, d = 30), 99 | "Check reducedDim_name - should be character vector", 100 | fixed=TRUE 101 | ) 102 | 103 | 104 | # k_init - positive integer 105 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = "50", d = 30), 106 | "Check k_init - should be positive integer", 107 | fixed=TRUE 108 | ) 109 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 0, d = 30), 110 | "Check k_init - should be positive integer", 111 | fixed=TRUE 112 | ) 113 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = -10, d = 30), 114 | "Check k_init - should be positive integer", 115 | fixed=TRUE 116 | ) 117 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 10.5, d = 30), 118 | "Check k_init - should be positive integer", 119 | fixed=TRUE 120 | ) 121 | 122 | 123 | # d - positive integer 124 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = "30"), 125 | "Check d - should be positive integer", 126 | fixed=TRUE 127 | ) 128 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = 0), 129 | "Check d - should be positive integer", 130 | fixed=TRUE 131 | ) 132 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = -1), 133 | "Check d - should be positive integer", 134 | fixed=TRUE 135 | ) 136 | expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = 1.5), 137 | "Check d - should be positive integer", 138 | fixed=TRUE 139 | ) 140 | 141 | 142 | # reduced dim should be in reducedDimNames 143 | expect_error(estimate_neighbourhood_sizes(x = sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.2, k_init = 50, d = 30) 144 | ) 145 | 146 | expect_error(estimate_neighbourhood_sizes(x = sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca_corrected", k_init = 50, d = 30), 147 | "reducedDim_name should be in reducedDimNames(x).", 148 | fixed=TRUE 149 | ) 150 | 151 | 152 | # cluster_id -- NULL or in the columns of colData 153 | expect_error(estimate_neighbourhood_sizes(x = sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.2, order = 2, filtering = T, 154 | reducedDim_name = "pca.corrected", k_init = 50, d = 30 , cluster_id = "ct"), 155 | "If cluster_id not NULL, it should be in colnames(colData(x))", 156 | fixed=TRUE 157 | ) 158 | 159 | }) 160 | 161 | 162 | 163 | # return of the correct output 164 | test_that("Return is the correct class", { 165 | # right class 166 | expect_s3_class(stat, "data.frame") 167 | 168 | # right colnames 169 | expected_colnames = c("k" , "min" , "q25" , "med" , "q75" , "max") 170 | expect_identical(colnames(stat) , expected_colnames) 171 | 172 | # right rownames 173 | expect_equal(nrow(stat) , 3) 174 | }) 175 | 176 | 177 | 178 | # values increasing in both directions 179 | test_that("Values increase as expected", { 180 | 181 | expect_gt(stat$min[2] , stat$min[1]) 182 | expect_gt(stat$min[3] , stat$min[2]) 183 | expect_gt(stat$q75[2] , stat$q25[2]) 184 | expect_gt(stat$med[3] , stat$q25[3]) 185 | expect_gt(stat$max[1] , stat$med[1]) 186 | expect_gt(stat$q25[2] , stat$min[3]) 187 | 188 | }) 189 | 190 | 191 | test_that("Finishes for diff cluster_id", { 192 | out_null = estimate_neighbourhood_sizes(x = sce_mouseEmbryo , k_grid = c(10,30), prop = 0.2, order = 2, filtering = T, 193 | reducedDim_name = "pca.corrected", k_init = 50, d = 30 , cluster_id = NULL) 194 | out_ct = estimate_neighbourhood_sizes(x = sce_mouseEmbryo , k_grid = c(10,30), prop = 0.2, order = 2, filtering = T, 195 | reducedDim_name = "pca.corrected", k_init = 50, d = 30 , cluster_id = "celltype.mapped") 196 | 197 | expect_s3_class(out_null, "data.frame") 198 | expect_s3_class(out_ct, "data.frame") 199 | 200 | }) 201 | 202 | 203 | test_that("CT grid should be reasonable", { 204 | expect_error(estimate_neighbourhood_sizes(x = sce_mouseEmbryo , k_grid = c(10,30,1000), prop = 0.2, order = 2, filtering = T, 205 | reducedDim_name = "pca.corrected", k_init = 50, d = 30 , cluster_id = "celltype.mapped"), 206 | "All specified clusters have # cells < 2*max(k). We recommed to provide lower clustering resolution, decreasing max(k) or set cluster_id = NULL.", 207 | fixed=TRUE 208 | ) 209 | }) 210 | 211 | 212 | 213 | -------------------------------------------------------------------------------- /tests/testthat/test-filter_neighbourhoods.R: -------------------------------------------------------------------------------- 1 | # context("Testing filter_neighbourhoods") 2 | library(miloDE) 3 | library(miloR) 4 | 5 | # load data 6 | data("sce_mouseEmbryo", package = "miloDE") 7 | 8 | 9 | # error msgs 10 | test_that("Wrong input gives errors", { 11 | # sce should be of the right format 12 | expect_error(filter_neighbourhoods(x = 1), 13 | "x should be a Milo object. Please run `assign_neighbourhoods` first.", 14 | fixed=TRUE 15 | ) 16 | expect_error(filter_neighbourhoods(sce_mouseEmbryo), 17 | "x should be a Milo object. Please run `assign_neighbourhoods` first.", 18 | fixed=TRUE 19 | ) 20 | sce_test = Milo(sce_mouseEmbryo) 21 | expect_error(filter_neighbourhoods(sce_test), 22 | "x should contain non-trivial graph. Please run `assign_neighbourhoods` first.", 23 | fixed=TRUE 24 | ) 25 | }) 26 | 27 | # return correct class 28 | test_that("Return is the correct class", { 29 | # right class 30 | out = assign_neighbourhoods(x = sce_mouseEmbryo , k = 10, prop = 0.2, order = 2, filtering = FALSE, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 31 | out = filter_neighbourhoods(out) 32 | expect_s4_class(out, "Milo") 33 | }) 34 | 35 | 36 | # filtering returns less nhoods 37 | test_that("Filtering returns less nhoods", { 38 | set.seed(15) 39 | sce_milo = assign_neighbourhoods(sce_mouseEmbryo , k = 25, filtering = FALSE, reducedDim_name = "pca.corrected") 40 | sce_milo_filtered = filter_neighbourhoods(sce_milo) 41 | 42 | # sce should be of the right format 43 | expect_gt(ncol(nhoods(sce_milo)) , ncol(nhoods(sce_milo_filtered))) 44 | expect_equal(nrow(nhoods(sce_milo)) , nrow(nhoods(sce_milo_filtered))) 45 | }) 46 | 47 | 48 | # filtering filtered data gives same result 49 | test_that("Filtering filtered data is futile_1", { 50 | set.seed(15) 51 | sce_milo = assign_neighbourhoods(sce_mouseEmbryo , k = 25, filtering = TRUE, reducedDim_name = "pca.corrected") 52 | sce_milo_filtered = filter_neighbourhoods(sce_milo) 53 | 54 | # sce should be of the right format 55 | expect_identical(nhoods(sce_milo) , nhoods(sce_milo_filtered)) 56 | expect_identical(as.numeric(nhoodIndex(sce_milo)) , as.numeric(nhoodIndex(sce_milo_filtered))) 57 | 58 | }) 59 | 60 | 61 | # initial filtering = post hoc filtering 62 | test_that("Filtering filtered data is futile_2", { 63 | set.seed(32) 64 | sce_milo_1 = assign_neighbourhoods(sce_mouseEmbryo , k = 25, filtering = TRUE, reducedDim_name = "pca.corrected") 65 | 66 | set.seed(32) 67 | sce_milo_2 = assign_neighbourhoods(sce_mouseEmbryo , k = 25, filtering = FALSE, reducedDim_name = "pca.corrected") 68 | sce_milo_2 = filter_neighbourhoods(sce_milo_2) 69 | 70 | # sce should be of the right format 71 | expect_identical(nhoods(sce_milo_1) , nhoods(sce_milo_2)) 72 | expect_identical( as.numeric(nhoodIndex(sce_milo_1)) , as.numeric(nhoodIndex(sce_milo_2))) 73 | 74 | }) 75 | 76 | -------------------------------------------------------------------------------- /tests/testthat/test-rank_neighbourhoods_by_DE_magnitude.R: -------------------------------------------------------------------------------- 1 | # context("Testing rank_neighbourhoods_by_DE_magnitude") 2 | library(miloDE) 3 | 4 | # load data 5 | data("sce_mouseEmbryo", package = "miloDE") 6 | set.seed(32) 7 | sce_milo = assign_neighbourhoods(sce_mouseEmbryo , k = 25, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 8 | meta_sce = as.data.frame(colData(sce_milo)) 9 | de_stat = de_test_neighbourhoods(sce_milo , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "SCE") 10 | 11 | 12 | # wrong inputs return error 13 | # error msgs 14 | test_that("Wrong input gives errors", { 15 | require(SingleCellExperiment) 16 | # de_stat should be right format 17 | expect_error(rank_neighbourhoods_by_DE_magnitude(1), 18 | "de_stat should be either data.frame or SingleCellExperiment object.\n 19 | To get valid de_stat object, please run 'de_test_neighbourhoods.R'", 20 | fixed=TRUE 21 | ) 22 | expect_error(rank_neighbourhoods_by_DE_magnitude(sce_milo), 23 | "de_stat should be either data.frame or SingleCellExperiment object.\n 24 | To get valid de_stat object, please run 'de_test_neighbourhoods.R'", 25 | fixed=TRUE 26 | ) 27 | 28 | 29 | out = data.frame(Nhood = c(1:10) , Nhood_center = c(1:10)) 30 | out$logFC = 0 31 | out$pval = 1 32 | expect_error(rank_neighbourhoods_by_DE_magnitude(out), 33 | "colnames(de_stat) missing some of the assay_names or coldata_names.", 34 | fixed=TRUE 35 | ) 36 | 37 | out$pval_corrected_across_nhoods = 1 38 | out$pval_corrected_across_genes = 1 39 | out$Nhood = as.character(out$Nhood) 40 | expect_error(rank_neighbourhoods_by_DE_magnitude(out), 41 | "Nhood field should be numeric. To get valid de_stat object, please run 'de_test_neighbourhoods.R'", 42 | fixed=TRUE 43 | ) 44 | 45 | 46 | out = SingleCellExperiment(list(logFC = assay(de_stat , "logFC") , 47 | pval = assay(de_stat , "pval") , 48 | pval_corrected_across_genes = assay(de_stat , "pval_corrected_across_genes") , 49 | pval_corrected_across_nhoods = assay(de_stat , "pval_corrected_across_nhoods") 50 | )) 51 | rownames(out) = rownames(de_stat) 52 | out$Nhood = de_stat$Nhood 53 | expect_error(rank_neighbourhoods_by_DE_magnitude(out), 54 | "de_stat missing some of the coldata_names", 55 | fixed=TRUE 56 | ) 57 | out$Nhood = as.character(out$Nhood) 58 | out$Nhood_center = out$Nhood 59 | expect_error(rank_neighbourhoods_by_DE_magnitude(out), 60 | "colData field 'Nhood' should be numeric. To get valid de_stat object, please run 'de_test_neighbourhoods.R'", 61 | fixed=TRUE 62 | ) 63 | 64 | 65 | # pvalue - positive number between 0 and 1 66 | expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = "a"), 67 | "pval.thresh should be numeric", 68 | fixed=TRUE 69 | ) 70 | expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = FALSE), 71 | "pval.thresh should be numeric", 72 | fixed=TRUE 73 | ) 74 | expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = c(1:10)), 75 | "pval.thresh should be a single number", 76 | fixed=TRUE 77 | ) 78 | expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = -1), 79 | "pval.thresh should be between 0 and 1", 80 | fixed=TRUE 81 | ) 82 | expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = 0), 83 | "pval.thresh should be between 0 and 1", 84 | fixed=TRUE 85 | ) 86 | expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = 1), 87 | "pval.thresh should be between 0 and 1", 88 | fixed=TRUE 89 | ) 90 | 91 | 92 | # z-thresh - neagtive number 93 | expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , z.thresh = "a"), 94 | "z.thresh should be numeric", 95 | fixed=TRUE 96 | ) 97 | expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , z.thresh = FALSE), 98 | "z.thresh should be numeric", 99 | fixed=TRUE 100 | ) 101 | expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , z.thresh = c(1:10)), 102 | "z.thresh should be a single number", 103 | fixed=TRUE 104 | ) 105 | expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , z.thresh = 0.01), 106 | "z.thresh should be not higher than 0", 107 | fixed=TRUE 108 | ) 109 | 110 | }) 111 | 112 | 113 | test_that("right format", { 114 | out = rank_neighbourhoods_by_DE_magnitude(de_stat) 115 | expect_s3_class(out, "data.frame") 116 | expect_equal(nrow(out) , ncol(nhoods(sce_milo))) 117 | 118 | nhood_stat = data.frame(Nhood = c(1:ncol(nhoods(sce_milo))) , Nhood_center = colnames(nhoods(sce_milo))) 119 | nhood_stat$Nhood_center = as.character(nhood_stat$Nhood_center) 120 | 121 | out = out[, c("Nhood" , "Nhood_center")] 122 | out$Nhood_center = as.character(out$Nhood_center) 123 | out = out[order(out$Nhood) , ] 124 | expect_identical(out$Nhood , nhood_stat$Nhood) 125 | expect_identical(out$Nhood_center , nhood_stat$Nhood_center) 126 | }) 127 | 128 | 129 | 130 | test_that("NaNs give 0s", { 131 | # de_stat should be right format 132 | test = convert_de_stat(de_stat) 133 | test$pval = NaN 134 | test$pval_corrected_across_nhoods = NaN 135 | test$pval_corrected_across_genes = NaN 136 | 137 | out = rank_neighbourhoods_by_DE_magnitude(test) 138 | expect_s3_class(out, "data.frame") 139 | expect_equal(1 , mean(out$n_DE_genes == 0)) 140 | expect_equal(1 , mean(out$n_specific_DE_genes == 0)) 141 | }) 142 | 143 | 144 | 145 | test_that("lower pvals - less genes", { 146 | out_1 = rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = 0.01) 147 | out_2 = rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = 0.1) 148 | out_3 = rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = 0.5) 149 | expect_gte(mean(out_3$n_DE_genes) , mean(out_2$n_DE_genes)) 150 | expect_gte(mean(out_2$n_DE_genes) , mean(out_1$n_DE_genes)) 151 | }) 152 | 153 | test_that("lower z - less genes", { 154 | out_1 = rank_neighbourhoods_by_DE_magnitude(de_stat , z.thresh = -1) 155 | out_2 = rank_neighbourhoods_by_DE_magnitude(de_stat , z.thresh = -2) 156 | out_3 = rank_neighbourhoods_by_DE_magnitude(de_stat , z.thresh = -3) 157 | expect_gte(mean(out_1$n_specific_DE_genes) , mean(out_2$n_specific_DE_genes)) 158 | expect_gte(mean(out_2$n_specific_DE_genes) , mean(out_3$n_specific_DE_genes)) 159 | }) 160 | 161 | 162 | -------------------------------------------------------------------------------- /tests/testthat/test-spatial_pval_adjustment.R: -------------------------------------------------------------------------------- 1 | # context("Testing spatial_pval_adjustment") 2 | library(miloDE) 3 | library(miloR) 4 | library(SingleCellExperiment) 5 | 6 | # load data 7 | data("sce_mouseEmbryo", package = "miloDE") 8 | set.seed(32) 9 | sce = assign_neighbourhoods(sce_mouseEmbryo , k = 25, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30) 10 | nhoods_sce = nhoods(sce) 11 | pvals_1 = rep(1,0.0001,ncol(nhoods_sce)) 12 | pvals_2 = rep(1,0.001,ncol(nhoods_sce)) 13 | pvals_3 = rep(1,0.01,ncol(nhoods_sce)) 14 | pvals_4 = rep(1,0.1,ncol(nhoods_sce)) 15 | pvals_5 = pvals_4 16 | pvals_5[c(4,1,8)] = NaN 17 | 18 | 19 | test_that("Right length", { 20 | out = spatial_pval_adjustment(nhoods_sce , pvals_2) 21 | expect_equal(ncol(nhoods_sce) , length(out)) 22 | }) 23 | 24 | test_that("NaNs at right places", { 25 | 26 | out = spatial_pval_adjustment(nhoods_sce , pvals_5) 27 | expect_identical(out[1] , NaN) 28 | expect_identical(out[4] , NaN) 29 | expect_identical(out[8] , NaN) 30 | }) 31 | 32 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/miloDE__mouse_embryo.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "miloDE: chimera mouse embryo, Tal1-" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{miloDE__mouse_embryo} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | library(knitr) 16 | 17 | ``` 18 | 19 | # Load libraries 20 | 21 | ```{r setup} 22 | 23 | library(miloDE) 24 | 25 | # library containing toy data 26 | suppressMessages(library(MouseGastrulationData)) 27 | 28 | # analysis libraries 29 | library(scuttle) 30 | suppressMessages(library(miloR)) 31 | suppressMessages(library(uwot)) 32 | library(scran) 33 | suppressMessages(library(dplyr)) 34 | library(reshape2) 35 | 36 | # packages for gene cluster analysis 37 | library(scWGCNA) 38 | suppressMessages(library(Seurat)) 39 | 40 | # plotting libraries 41 | library(ggplot2) 42 | library(viridis) 43 | library(ggpubr) 44 | 45 | ``` 46 | 47 | We will also show how we can parallel `de_test_neighbourhoods`. For this we need to load `BiocParallel` and enable multicore parallel evaluation. 48 | 49 | ```{r setup-parallel} 50 | 51 | library(BiocParallel) 52 | 53 | ncores = 4 54 | mcparam = MulticoreParam(workers = ncores) 55 | register(mcparam) 56 | 57 | ``` 58 | 59 | 60 | # Load data 61 | 62 | We will use data from mouse gastrulation scRNA-seq atlas [Pijuan-Sala et al., 2019](https://pubmed.ncbi.nlm.nih.gov/30787436/). As a part of this project, using chimera mouse embryos, authors characterised the impact of Tal1 knock out on the mouse development. The most prominent phenotype was a loss of blood cells in Tal1- cells. 63 | 64 | In this vignette, we will apply miloDE on cells contributing to blood lineage (endothelia and haematoendothelial progenitors (haem. prog-s.)) and assess whether we can detect and characterise more subtle phenotypes (i.e. DE). 65 | 66 | Tal1-/+ chimer data are processed and directly available within `MouseGastrulationData` package. As condition ID, we will use slot `tomato` which indicates whether cells carry Tal1 KO. 67 | 68 | 69 | ```{r load-data, fig.width=8 , fig.cap="UMAPs, coloured by cell types"} 70 | 71 | 72 | # load chimera Tal1 73 | sce = suppressMessages(MouseGastrulationData::Tal1ChimeraData()) 74 | 75 | # downsample to few selected cell types 76 | cts = c("Spinal cord" , "Haematoendothelial progenitors", "Endothelium" , "Blood progenitors 1" , "Blood progenitors 2") 77 | sce = sce[, sce$celltype.mapped %in% cts] 78 | # let's rename Haematoendothelial progenitors 79 | sce$celltype.mapped[sce$celltype.mapped == "Haematoendothelial progenitors"] = "Haem. prog-s." 80 | 81 | # delete row for tomato 82 | sce = sce[!rownames(sce) == "tomato-td" , ] 83 | 84 | # add logcounts 85 | sce = logNormCounts(sce) 86 | 87 | # update tomato field to be more interpretable 88 | sce$tomato = sapply(sce$tomato , function(x) ifelse(isTRUE(x) , "Tal1_KO" , "WT")) 89 | 90 | # for this exercise, we focus on 3000 highly variable genes (for computational efficiency) 91 | dec.sce = modelGeneVar(sce) 92 | hvg.genes = getTopHVGs(dec.sce, n = 3000) 93 | sce = sce[hvg.genes , ] 94 | # change rownames to symbol names 95 | rowdata = as.data.frame(rowData(sce)) 96 | rownames(sce) = rowdata$SYMBOL 97 | 98 | # add UMAPs 99 | set.seed(32) 100 | umaps = as.data.frame(uwot::umap(reducedDim(sce , "pca.corrected"))) 101 | # let's store UMAPs - we will use them for visualisation 102 | reducedDim(sce , "UMAP") = umaps 103 | 104 | # plot UMAPs, colored by cell types 105 | umaps = cbind(as.data.frame(colData(sce)) , reducedDim(sce , "UMAP")) 106 | names(EmbryoCelltypeColours)[names(EmbryoCelltypeColours) == "Haematoendothelial progenitors"] = "Haem. prog-s." 107 | cols_ct = EmbryoCelltypeColours[names(EmbryoCelltypeColours) %in% unique(umaps$celltype.mapped)] 108 | 109 | p = ggplot(umaps , aes(x = V1 , y = V2 , col = celltype.mapped)) + 110 | geom_point() + 111 | scale_color_manual(values = cols_ct) + 112 | facet_wrap(~tomato) + 113 | theme_bw() + 114 | labs(x = "UMAP-1", y = "UMAP-2") 115 | p 116 | 117 | 118 | ``` 119 | 120 | 121 | # Assign neighbourhoods 122 | 123 | ## Estimate k -> neighbourhood size 124 | 125 | `estimate_neighbourhood_sizes` allows to gauge how neighbourhood size distribution changes as a function of (order,k). It might be useful to run it first in order to determine optimal range that will return desired neighbourhood sizes. 126 | 127 | ```{r estimate-k, fig.width=6, fig.cap="Neighbourhood size distribution ~ k"} 128 | 129 | 130 | stat_k = estimate_neighbourhood_sizes(sce, k_grid = seq(10,40,5) , 131 | order = 2, prop = 0.1 , filtering = TRUE, 132 | reducedDim_name = "pca.corrected" , plot_stat = TRUE) 133 | 134 | kable(stat_k , caption = "Neighbourhood size distribution ~ k") 135 | 136 | ``` 137 | 138 | We will use k=20, order=2 --> that returns an average neighbourhood size ~400 cells. 139 | 140 | ## Assign neighbourhoods 141 | 142 | To assign neighbourhoods, use `assign_neighbourhoods`. Note that under the hood, there is a random sampling of index cells --> if you want to ensure the same neighbourhood assignment, please set seed prior to running this function. 143 | 144 | Note that we set `filtering = TRUE` to achieve a refined assignment in which redundant neighbourhoods are discarded. Alternatively this can be done post hoc, using `filter_neighbourhoods`. 145 | 146 | ```{r assign-nhoods} 147 | 148 | 149 | set.seed(32) 150 | sce_milo = assign_neighbourhoods(sce , k = 20 , order = 2, 151 | filtering = TRUE , reducedDim_name = "pca.corrected" , verbose = F) 152 | 153 | 154 | ``` 155 | 156 | In total we get 42 nhoods. A neighbourhood assignment can be visualised using Milo plots, in which each circle corresponds to a neighbourhood, and edges between them represent shared cells. The center of each neighbourhood are coordinated in 2D latent space (e.g. UMAP) for the center cell of the neighbourhood. We can also colour each neighbourhood by provided metric. In this plot, we will annotate each neighbourhood with its enriched cell type, and colour neighbourhoods by assigned cell types. 157 | 158 | 159 | ```{r plot-nhoods-by-cts , fig.width=5, fig.cap="Neighbourhood plot, coloured by cell types"} 160 | 161 | 162 | nhoods_sce = nhoods(sce_milo) 163 | # assign cell types for nhoods 164 | nhood_stat_ct = data.frame(Nhood = 1:ncol(nhoods_sce) , Nhood_center = colnames(nhoods_sce)) 165 | nhood_stat_ct = miloR::annotateNhoods(sce_milo , nhood_stat_ct , coldata_col = "celltype.mapped") 166 | p = plot_milo_by_single_metric(sce_milo, nhood_stat_ct, colour_by = "celltype.mapped" , 167 | layout = "UMAP" , size_range = c(1.5,3) , edge_width = c(0.2,0.5)) + 168 | scale_fill_manual(values = cols_ct , name = "Cell type") 169 | p 170 | 171 | 172 | ``` 173 | 174 | # DE testing 175 | 176 | ## Calculate AUC per neighbourhood 177 | 178 | Prior to DE testing, an optional step is to assess which neighbourhoods do not show any signs of perturbation and discard them prior to DE testing in order to facilitate the burden from multiple testing correction (by using `calc_AUC_per_neighbourhood`). To do so we build on [Augur](https://pubmed.ncbi.nlm.nih.gov/32690972/) that employs RF classifiers to separate cells between the conditions. As an output of `calc_AUC_per_neighbourhood`, we return data frame containing AUC of the per neighbourhood classifiers. 179 | We suggest that AUC <= 0.5 corresponds to neighbourhoods that can be safely discarded from DE testing (however, you may use your own threshold if desired). In addition, if classifier can not be built due to very low number of cells in at least one of the conditions, AUC will be set to NaN. 180 | 181 | Note that this part is rather computationally costly, and we recommend it to run if a substantial transcriptional regions are anticipated to be unperturbed. 182 | 183 | ```{r calc-auc-per-nhood, fig.width=5, fig.cap="Neighbourhood plot, coloured by AUC"} 184 | 185 | 186 | stat_auc = suppressWarnings(calc_AUC_per_neighbourhood(sce_milo , sample_id = "sample" , condition_id = "tomato", min_n_cells_per_sample = 1, BPPARAM = mcparam)) 187 | 188 | p = plot_milo_by_single_metric(sce_milo, stat_auc, colour_by = "auc" , 189 | layout = "UMAP" , size_range = c(1.5,3) , edge_width = c(0.2,0.5)) + 190 | scale_fill_viridis(name = "AUC") 191 | p 192 | 193 | 194 | ``` 195 | 196 | We observe that AUCs in the neighbourhoods containing mostly Blood progenitors is NaN which is consistent with absense of these cells in Tal1- cells. 197 | In addition, we observe higher AUCs in endothelial subregions, likely reflecting that these cells are more affected by Tal1 KO. 198 | 199 | ## DE testing 200 | 201 | Let's proceed with DE testing within each neighbourhood. We will test neighbourhoods in which AUC is not NaN (i.e. neighbourhoods in which there are enough cells from both conditions). 202 | 203 | ```{r de-testing, fig.width=5 , fig.cap="Neighbourhood plot, coloured by whether DE testing is performed"} 204 | 205 | 206 | de_stat = de_test_neighbourhoods(sce_milo , 207 | sample_id = "sample", 208 | design = ~tomato, 209 | covariates = c("tomato"), 210 | subset_nhoods = stat_auc$Nhood[!is.na(stat_auc$auc)], 211 | output_type = "SCE", 212 | plot_summary_stat = TRUE, 213 | layout = "UMAP", BPPARAM = mcparam , 214 | verbose = T) 215 | 216 | 217 | 218 | ``` 219 | 220 | # Analysis of miloDE results 221 | 222 | ## Get neighbourhood ranking by the extent of DE 223 | 224 | One explanatory question a user might have is an overall scan of which transcriptional regions show noteworthy signs of DE. To do so ona neighbourhood level, we provide the function `rank_neighbourhoods_by_DE_magnitude`. Within this function, we calculate two metrics: 225 | 226 | a) `n_DE_genes` - for each neighbourhood, we calculate how many genes are assigned as DE. Since we are doing it within each neighbourhood, we use `pval_corrected_across_genes` and we use default `pval.thresh=0.1` (can be changed). Note that there is no comparison between neighbourhoods here. 227 | 228 | 229 | b) `n_specific_DE_genes`. We also might be interested which neighbourhoods differ from others more so than we would expect. To assess this, we are interested in which neighbourhoods contain genes, that are DE 'specifically' in those neighbourhoods. To calculate this, for each gene we now use z-transformation of `pval_corrected_across_nhoods`, and we identify the neighbourhoods in which z-normalised p-values are lower than a threshold (default `z.thresh=-3`). This would tell us that the gene is signifciantly DE in the neighbourhood *compared to most other neighbourhoods*. We do so for each gene, and then for each neighbourhood we calculate how mane genes have z-normalised p-values are lower than a threshold. 230 | 231 | 232 | Note that for gene/neighbourhood combinations for which p-values are returned as NaNs (e.g. genes are not tested), for this function we set pvalues = 1. In other words, if a gene is only tested in few neighbourhoods to begin with, z-normalised p-value corrected across neighbourhoods is likely to be small for these neighbourhoods. 233 | 234 | 235 | ```{r rank-nhoods-by-DE-magnitude, fig.width=10 , fig.cap = "Neighbourhood plot, coloured by # of DE genes" , fig.height=5} 236 | 237 | stat_de_magnitude = rank_neighbourhoods_by_DE_magnitude(de_stat) 238 | 239 | p1 = plot_milo_by_single_metric(sce_milo, stat_de_magnitude, colour_by = "n_DE_genes" , 240 | layout = "UMAP" , size_range = c(1.5,3) , edge_width = c(0.2,0.5)) + 241 | scale_fill_viridis(name = "# DE genes") 242 | p2 = plot_milo_by_single_metric(sce_milo, stat_de_magnitude, colour_by = "n_specific_DE_genes" , 243 | layout = "UMAP" , size_range = c(1.5,3) , edge_width = c(0.2,0.5)) + 244 | scale_fill_viridis(name = "# specific\nDE genes" , option = "inferno") 245 | p = ggarrange(p1,p2) 246 | p 247 | 248 | ``` 249 | 250 | Reassuringly, we observe that same regions show both higher AUC and number of DE expressed genes. 251 | 252 | # Co-regulated programs 253 | 254 | The fine neighbourhood resolution allows to cluster genes based on logFC vectors (across the neighbourhoods). One way is to employ WGCNA approach, originally designed to finding co-expressed genes. Instead of gene counts we will use corrected logFC (logFC set to 0 in the neighbourhoods in which it is not DE). 255 | 256 | Below we outline a custom script to detect gene modules using WGCNA approach. We will use [scWGCNA](https://github.com/CFeregrino/scWGCNA), specifically designed to handle single-cell data. 257 | 258 | Note that WGCNA algorithm is exclusive and sensitive to input parameters - nevertheless, we suggest it is useful to explore what DE patterns exist in your data. 259 | 260 | ## Gene modules using WGCNA 261 | 262 | ```{r sc-wgcna} 263 | 264 | 265 | get_wgcna_modules = function(de_stat , subset_hoods = NULL , 266 | n_hoods_sig.thresh = 2 , 267 | npcs = 5 , 268 | pval.thresh = 0.1 ){ 269 | require(scWGCNA) 270 | require(Seurat) 271 | require(dplyr) 272 | require(reshape2) 273 | 274 | set.seed(32) 275 | # subset hoods 276 | if (!is.null(subset_hoods)){ 277 | de_stat = de_stat[de_stat$Nhood %in% subset_hoods , ] 278 | } 279 | 280 | # focus on genes that DE in at least 2 nhoods 281 | de_stat_per_gene = as.data.frame(de_stat %>% group_by(gene) %>% dplyr::summarise(n_hoods_sig = sum(pval_corrected_across_nhoods < pval.thresh , na.rm = TRUE))) 282 | genes_sig = de_stat_per_gene$gene[de_stat_per_gene$n_hoods_sig >= n_hoods_sig.thresh] 283 | 284 | de_stat = de_stat[de_stat$gene %in% genes_sig, ] 285 | de_stat = de_stat[order(de_stat$Nhood) , ] 286 | 287 | # discard neighbourhoods in which testing was not performed 288 | de_stat = de_stat[de_stat$test_performed , ] 289 | 290 | # for this analysis, set logFC to 0 and pvals to 1 if they are NaN 291 | de_stat$logFC[is.na(de_stat$logFC)] = 0 292 | de_stat$pval[is.na(de_stat$pval)] = 1 293 | de_stat$pval_corrected_across_genes[is.na(de_stat$pval_corrected_across_genes)] = 1 294 | de_stat$pval_corrected_across_nhoods[is.na(de_stat$pval_corrected_across_nhoods)] = 1 295 | 296 | # set logFC to 0 if pval_corrected_across_nhoods > pval.thresh 297 | de_stat$logFC[de_stat$pval_corrected_across_nhoods >= pval.thresh] = 0 298 | 299 | # move the object to Seurat 300 | de_stat = reshape2::dcast(data = de_stat, formula = gene~Nhood, value.var = "logFC") 301 | rownames(de_stat) = de_stat$gene 302 | de_stat = de_stat[,2:ncol(de_stat)] 303 | 304 | obj.seurat <- CreateSeuratObject(counts = de_stat) 305 | DefaultAssay(obj.seurat) <- "RNA" 306 | obj.seurat = FindVariableFeatures(obj.seurat) 307 | # scale 308 | obj.seurat[["RNA"]]@scale.data = as.matrix(obj.seurat[["RNA"]]@data) 309 | obj.seurat = RunPCA(obj.seurat , npcs = npcs) 310 | 311 | # run scwgcna 312 | clusters_scwgcna = run.scWGCNA(p.cells = obj.seurat, 313 | s.cells = obj.seurat, 314 | is.pseudocell = F, 315 | features = rownames(obj.seurat), 316 | less = TRUE , merging = TRUE) 317 | # compile stat 318 | clusters = lapply(1:length(clusters_scwgcna$module.genes) , function(i){ 319 | out = data.frame(cluster = i , gene = clusters_scwgcna$module.genes[[i]] , n_genes = length(clusters_scwgcna$module.genes[[i]])) 320 | return(out) 321 | }) 322 | clusters = do.call(rbind , clusters) 323 | # add colors 324 | genes_w_colors = clusters_scwgcna$dynamicCols 325 | genes_w_colors = data.frame(gene = names(genes_w_colors) , cluster_color = genes_w_colors) 326 | clusters = merge(clusters , genes_w_colors) 327 | 328 | return(clusters) 329 | } 330 | 331 | 332 | # for this vignette, for simplicity we will focus on genes that are DE in at least 4 neighbourhoods 333 | modules_wgcna = suppressMessages(get_wgcna_modules(convert_de_stat(de_stat) , n_hoods_sig.thresh = 4)) 334 | 335 | 336 | ``` 337 | 338 | 339 | ## Gene module plots 340 | 341 | We can visualise for which transcriptional regions different gene modules are relevant. 342 | 343 | For each gene module, we can plot neighbourhood plot, in which neighbourhood colour corresponds to average logFC (across genes from the module) and neighbourhood size corresponds to fraction of genes from the module, that are DE in this neighbourhood. 344 | 345 | ```{r sc-wgcna-plot-modules, fig.cap = "Co-reulated modules", fig.width=10, fig.height=8} 346 | 347 | 348 | plots = lapply(sort(unique(modules_wgcna$cluster)) , function(cluster){ 349 | p = plot_DE_gene_set(sce_milo, de_stat , genes = modules_wgcna$gene[modules_wgcna$cluster == cluster], 350 | layout = "UMAP" , size_range = c(0.5,3) , 351 | node_stroke = 0.3, edge_width = c(0.2,0.5)) + 352 | ggtitle(paste0("Module ", cluster, ", ", length(modules_wgcna$gene[modules_wgcna$cluster == cluster]) , " genes")) 353 | return(p) 354 | }) 355 | p = ggarrange(plotlist = plots) 356 | p 357 | 358 | 359 | ``` 360 | 361 | ## Break down by cell types 362 | 363 | We can also quantify the relevance of each module for different cell types. 364 | 365 | ```{r sc-wgcna-plot-modules-beeswarm, fig.width=10,fig.height=8 , fig.cap = "Co-regulated modules, break down by cell types."} 366 | 367 | 368 | # we first need to assign cell type label to each neighbourhood - we have calculated it previously 369 | # we will use data.frame format for de-stat; the conversion between SingleCellExperiment and data.frame formats can be done using `convert_de_stat` 370 | de_stat_df = convert_de_stat(de_stat) 371 | de_stat_df = merge(de_stat_df , nhood_stat_ct , by = c("Nhood" , "Nhood_center")) 372 | 373 | 374 | plots = lapply(sort(unique(modules_wgcna$cluster)) , function(cluster){ 375 | p = plot_beeswarm_gene_set(de_stat_df, 376 | genes = modules_wgcna$gene[modules_wgcna$cluster == cluster], 377 | nhoodGroup = "celltype.mapped") + 378 | ggtitle(paste0("Module ", cluster)) 379 | return(p) 380 | }) 381 | p = ggarrange(plotlist = plots) 382 | p 383 | 384 | 385 | ``` 386 | 387 | 388 | ## Visualising individual genes 389 | 390 | We can also visualise DE results for individual genes, in which each neighbourhood is coloured by its logFC if significant. 391 | 392 | Below we show couple examples of DE patterns of genes from modules 2 and 4. 393 | 394 | ## Module 2 395 | 396 | ```{r de-single-genes-mod-2 , fig.width=10,fig.height=15 , fig.cap = "Genes from module 2."} 397 | 398 | 399 | set.seed(1020) 400 | module = 2 401 | n_genes = 6 402 | genes = sample(modules_wgcna$gene[modules_wgcna$cluster == module] , n_genes) 403 | 404 | 405 | plots = lapply(genes , function(gene){ 406 | p = plot_DE_single_gene(sce_milo, de_stat , gene = gene , layout = "UMAP" , set_na_to_0 = TRUE) + 407 | ggtitle(gene) 408 | return(p) 409 | }) 410 | p = ggarrange(plotlist = plots , ncol = 2, nrow = 3) 411 | p 412 | 413 | 414 | 415 | ``` 416 | 417 | ## Module 4 418 | 419 | ```{r de-single-genes-mod-4 , fig.width=10,fig.height=15 , fig.cap = "Genes from module 4."} 420 | 421 | 422 | set.seed(1020) 423 | module = 4 424 | n_genes = 6 425 | genes = sample(modules_wgcna$gene[modules_wgcna$cluster == module] , n_genes) 426 | 427 | 428 | plots = lapply(genes , function(gene){ 429 | p = plot_DE_single_gene(sce_milo, de_stat , gene = gene , layout = "UMAP" , set_na_to_0 = TRUE) + 430 | ggtitle(gene) 431 | return(p) 432 | }) 433 | p = ggarrange(plotlist = plots, ncol=2, nrow=3) 434 | p 435 | 436 | 437 | 438 | ``` 439 | 440 | 441 | # Session Info 442 | 443 | ```{r sessinf} 444 | sessionInfo() 445 | ``` 446 | --------------------------------------------------------------------------------