├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── R
    ├── assign_neighbourhoods.R
    ├── calc_AUC_per_neighbourhood.R
    ├── convert_de_stat.R
    ├── de_test_neighbourhoods.R
    ├── estimate_neighbourhood_sizes.R
    ├── filter_neighbourhoods.R
    ├── plotting_functions.R
    ├── rank_neighbourhoods_by_DE_magnitude.R
    ├── sce_mouseEmbryo.R
    ├── spatial_pval_adjustment.R
    └── utils.R
├── README.md
├── data-raw
    └── sce_mouseEmbryo.R
├── data
    └── sce_mouseEmbryo.rda
├── man
    ├── assign_neighbourhoods.Rd
    ├── calc_AUC_per_neighbourhood.Rd
    ├── convert_de_stat.Rd
    ├── de_test_neighbourhoods.Rd
    ├── de_test_single_neighbourhood.Rd
    ├── estimate_neighbourhood_sizes.Rd
    ├── filter_neighbourhoods.Rd
    ├── plot_DE_gene_set.Rd
    ├── plot_DE_single_gene.Rd
    ├── plot_beeswarm_gene_set.Rd
    ├── plot_beeswarm_single_gene.Rd
    ├── plot_milo_by_single_metric.Rd
    ├── rank_neighbourhoods_by_DE_magnitude.Rd
    ├── sce_mouseEmbryo.Rd
    └── spatial_pval_adjustment.Rd
├── miloDE.Rproj
├── miloDE_cartoon.png
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-assign_neighbourhoods.R
    │   ├── test-calc_AUC_per_neighbourhood.R
    │   ├── test-convert_de_stat.R
    │   ├── test-de_test_neighbourhoods.R
    │   ├── test-estimate_neighbourhood_sizes.R
    │   ├── test-filter_neighbourhoods.R
    │   ├── test-rank_neighbourhoods_by_DE_magnitude.R
    │   └── test-spatial_pval_adjustment.R
└── vignettes
    ├── .gitignore
    └── miloDE__mouse_embryo.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^miloDE\.Rproj$
2 | ^\.Rproj\.user$
3 | ^data-raw$
4 | ^LICENSE\.md$
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | .RData
 7 | 
 8 | # User-specific files
 9 | .Ruserdata
10 | 
11 | # Example code in package build process
12 | *-Ex.R
13 | 
14 | # Output files from R CMD build
15 | /*.tar.gz
16 | 
17 | # Output files from R CMD check
18 | /*.Rcheck/
19 | 
20 | # RStudio files
21 | .Rproj.user/
22 | 
23 | # produced vignettes
24 | vignettes/*.html
25 | vignettes/*.pdf
26 | 
27 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
28 | .httr-oauth
29 | 
30 | # knitr and R markdown default cache directories
31 | *_cache/
32 | /cache/
33 | 
34 | # Temporary files created by R markdown
35 | *.utf8.md
36 | *.knit.md
37 | 
38 | # R Environment Variables
39 | .Renviron
40 | .Rproj.user
41 | inst/doc
42 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: miloDE
 2 | Title: Sensitive DE testing using neighbourhoods instead of discrete clusters
 3 | Version: 0.0.0.9000
 4 | Authors@R: 
 5 |     person("Alsu", "Missarova", , "alsu@ebi.ac.uk", role = c("aut", "cre"),
 6 |            comment = c(ORCID = "YOUR-ORCID-ID"))
 7 | Description: Sensitive DE testing using neighbourhoods instead of discrete clusters.
 8 | License: MIT + file LICENSE
 9 | Encoding: UTF-8
10 | Roxygen: list(markdown = TRUE)
11 | RoxygenNote: 7.3.0
12 | Imports: 
13 |     dplyr,
14 |     ggplot2,
15 |     ggpubr,
16 |     Matrix,
17 |     scuttle,
18 |     SingleCellExperiment,
19 |     SummarizedExperiment,
20 |     stats,
21 |     tibble,
22 |     RcppGreedySetCover,
23 |     igraph,
24 |     miloR,
25 |     edgeR, 
26 |     Augur,
27 |     S4Vectors,
28 |     reshape2,
29 |     ggraph,
30 |     grDevices,
31 |     BiocParallel,
32 |     RColorBrewer,
33 |     methods,
34 |     ggbeeswarm,
35 |     limma
36 | Remotes:
37 |     github::neurorestore/Augur,
38 |     github::Bioconductor/MatrixGenerics,
39 |     github::const-ae/sparseMatrixStats
40 | Depends: 
41 |     R (>= 2.10)
42 | Suggests: 
43 |     rmarkdown,
44 |     knitr,
45 |     testthat (>= 3.0.0),
46 |     scran,
47 |     Seurat,
48 |     scWGCNA,
49 |     uwot,
50 |     MouseGastrulationData,
51 |     viridis
52 | VignetteBuilder: knitr
53 | LazyData: true
54 | Config/testthat/edition: 3
55 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2023
2 | COPYRIGHT HOLDER: miloDE authors
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2023 miloDE authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(assign_neighbourhoods)
 4 | export(calc_AUC_per_neighbourhood)
 5 | export(convert_de_stat)
 6 | export(de_test_neighbourhoods)
 7 | export(de_test_single_neighbourhood)
 8 | export(estimate_neighbourhood_sizes)
 9 | export(filter_neighbourhoods)
10 | export(plot_DE_gene_set)
11 | export(plot_DE_single_gene)
12 | export(plot_beeswarm_gene_set)
13 | export(plot_beeswarm_single_gene)
14 | export(plot_milo_by_single_metric)
15 | export(rank_neighbourhoods_by_DE_magnitude)
16 | export(spatial_pval_adjustment)
17 | import(Augur)
18 | import(Matrix)
19 | import(ggplot2)
20 | importFrom(BiocParallel,bplapply)
21 | importFrom(RColorBrewer,brewer.pal)
22 | importFrom(RcppGreedySetCover,greedySetCover)
23 | importFrom(S4Vectors,DataFrame)
24 | importFrom(S4Vectors,isEmpty)
25 | importFrom(SingleCellExperiment,SingleCellExperiment)
26 | importFrom(SingleCellExperiment,counts)
27 | importFrom(SingleCellExperiment,logcounts)
28 | importFrom(SingleCellExperiment,reducedDim)
29 | importFrom(SingleCellExperiment,reducedDimNames)
30 | importFrom(SingleCellExperiment,reducedDims)
31 | importFrom(SummarizedExperiment,"assay<-")
32 | importFrom(SummarizedExperiment,"assayNames<-")
33 | importFrom(SummarizedExperiment,"assays<-")
34 | importFrom(SummarizedExperiment,"colData<-")
35 | importFrom(SummarizedExperiment,assay)
36 | importFrom(SummarizedExperiment,assayNames)
37 | importFrom(SummarizedExperiment,assays)
38 | importFrom(SummarizedExperiment,colData)
39 | importFrom(dplyr,"%>%")
40 | importFrom(dplyr,arrange)
41 | importFrom(dplyr,distinct)
42 | importFrom(dplyr,mutate)
43 | importFrom(edgeR,DGEList)
44 | importFrom(edgeR,calcNormFactors)
45 | importFrom(edgeR,estimateDisp)
46 | importFrom(edgeR,filterByExpr)
47 | importFrom(edgeR,glmQLFTest)
48 | importFrom(edgeR,glmQLFit)
49 | importFrom(edgeR,topTags)
50 | importFrom(ggbeeswarm,geom_quasirandom)
51 | importFrom(ggpubr,ggarrange)
52 | importFrom(ggraph,geom_edge_link)
53 | importFrom(ggraph,geom_edge_link0)
54 | importFrom(ggraph,geom_node_point)
55 | importFrom(ggraph,ggraph)
56 | importFrom(ggraph,scale_edge_width)
57 | importFrom(grDevices,colorRampPalette)
58 | importFrom(igraph,"E<-")
59 | importFrom(igraph,"V<-")
60 | importFrom(igraph,"vertex_attr<-")
61 | importFrom(igraph,E)
62 | importFrom(igraph,V)
63 | importFrom(igraph,connect)
64 | importFrom(igraph,count_triangles)
65 | importFrom(igraph,delete.edges)
66 | importFrom(igraph,induced_subgraph)
67 | importFrom(igraph,is_igraph)
68 | importFrom(igraph,neighborhood)
69 | importFrom(igraph,permute)
70 | importFrom(igraph,set_vertex_attr)
71 | importFrom(igraph,simplify)
72 | importFrom(igraph,vertex_attr)
73 | importFrom(limma,makeContrasts)
74 | importFrom(methods,as)
75 | importFrom(methods,is)
76 | importFrom(miloR,"graph<-")
77 | importFrom(miloR,"nhoodGraph<-")
78 | importFrom(miloR,"nhoodIndex<-")
79 | importFrom(miloR,"nhoods<-")
80 | importFrom(miloR,Milo)
81 | importFrom(miloR,buildGraph)
82 | importFrom(miloR,buildNhoodGraph)
83 | importFrom(miloR,graph)
84 | importFrom(miloR,nhoodGraph)
85 | importFrom(miloR,nhoodIndex)
86 | importFrom(miloR,nhoods)
87 | importFrom(reshape2,dcast)
88 | importFrom(reshape2,melt)
89 | importFrom(scuttle,summarizeAssayByGroup)
90 | importFrom(stats,as.formula)
91 | importFrom(stats,model.matrix)
92 | importFrom(stats,p.adjust)
93 | importFrom(stats,quantile)
94 | importFrom(stats,sd)
95 | importFrom(tibble,rownames_to_column)
96 | 


--------------------------------------------------------------------------------
/R/assign_neighbourhoods.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' assign_neighbourhoods
  4 | #'
  5 | #' Assign neighbourhoods to single-cell RNA-seq data (in \code{SingleCellExperiment} format)
  6 | #' @param x A \code{\linkS4class{SingleCellExperiment}} object.
  7 | #' @param reducedDim_name Defines the assay in \code{reducedDim(x)} to use as the embedding for graph construction.
  8 | #' @param k Positive integer, defines how many neighbours to use for the neighbourhood assignment. Default \code{k = 25}.
  9 | #' @param prop Numerical, between 0 and 1, defines which fraction of cells to use as neighbourhood centres. Default \code{prop = 0.2}.
 10 | #' @param order In \code{c(1,2)}, defines which order of neighbours to use. Default \code{order = 2}.
 11 | #' @param filtering In \code{c(TRUE,FALSE)}, defines whether to refine the assignment. Default \code{filtering = TRUE}.
 12 | #' @param k_init Positive integer, defines how many neighbours to use for identifying anchor cells (for this step we use 1st-order kNN). Default \code{k_init = 50}.
 13 | #' @param d Positive integer, defines how many dimensions from \code{reducedDim(x)} to use. Default \code{d = 30}.
 14 | #' @param verbose Boolean specifying whether to print intermediate output messages. Default \code{verbose = TRUE}.
 15 | #' @details
 16 | #' This function assigns neighbourhoods to single-cell data. This includes assigning graph representation, selecting \sQuote{index} cells and, finally, for each index cell, assigning it along with its neighbourhoors to one neighbourhood.
 17 | #'
 18 | #' Specifically, algorithm goes as follows:
 19 | #' 1. Assigning \sQuote{loose} graph (i.e. ~low k, 1st-order kNN) to select index cells for the selected \code{prop} (greatly reduces computational time to look for \sQuote{index} cells in a loose graph).
 20 | #' 2. Reassigning graph following entered by the user \code{order} and \code{k}.
 21 | #' 3. Assigning neighbourhoods.
 22 | #' 4. (Optional but recommended) Refining the neighbourhood assignment (check \code{\link{filter_neighbourhoods}}).
 23 | #'
 24 | #' @return \code{\linkS4class{Milo}} object containing cell-neighbourhood matrix in \code{nhoods(out)} slot
 25 | #' @export
 26 | #' @importFrom miloR Milo buildGraph graph<- graph nhoods<- nhoodIndex<- buildNhoodGraph
 27 | #' @importFrom SingleCellExperiment reducedDim
 28 | #' @importFrom SummarizedExperiment assay
 29 | #' @import Matrix
 30 | #' @importFrom igraph connect V neighborhood
 31 | #' @importFrom methods as
 32 | #' @examples
 33 | #' require(SingleCellExperiment)
 34 | #' n_row = 500
 35 | #' n_col = 100
 36 | #' n_latent = 5
 37 | #' sce = SingleCellExperiment(assays =
 38 | #' list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
 39 | #' rownames(sce) = as.factor(1:n_row)
 40 | #' colnames(sce) = c(1:n_col)
 41 | #' sce$cell = colnames(sce)
 42 | #' reducedDim(sce , "reduced_dim") =
 43 | #' matrix(rnorm(n_col*n_latent), ncol=n_latent)
 44 | #' out = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim")
 45 | assign_neighbourhoods = function(x , reducedDim_name , k = 25, prop = 0.2, order = 2, filtering = TRUE, k_init = 50, d = 30, verbose = TRUE){
 46 | 
 47 |   #args = c(as.list(environment()))
 48 |   #out = .general_check_arguments(args) & .check_reducedDim_in_sce(sce , reducedDim_name)
 49 |   out = .check_argument_correct(x, .check_sce, "Check x - something is wrong (gene names unique?)") &
 50 |         .check_argument_correct(k, .check_positive_integer, "Check k - should be positive integer") &
 51 |         .check_argument_correct(prop, .check_prop, "Check prop - should be positive number between 0 and 1") &
 52 |         .check_argument_correct(order, function(x) .check_arg_within_options(x, c(1,2)),
 53 |                                       "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)") &
 54 |         .check_argument_correct(filtering, .check_boolean, "Check filtering - should be either TRUE or FALSE") &
 55 |         .check_argument_correct(reducedDim_name, is.character, "Check reducedDim_name - should be character vector") &
 56 |         .check_argument_correct(k_init, .check_positive_integer, "Check k_init - should be positive integer") &
 57 |         .check_argument_correct(d, .check_positive_integer, "Check d - should be positive integer") &
 58 |         .check_argument_correct(verbose, .check_boolean, "Check verbose - should be either TRUE or FALSE") &
 59 |         .check_reducedDim_in_sce(x , reducedDim_name)
 60 | 
 61 | 
 62 |   d <- min(d , ncol(reducedDim(x , reducedDim_name)))
 63 |   k_init <- min(k , k_init)
 64 | 
 65 |   if (is.null(colnames(x))){
 66 |     colnames(x) = as.character(c(1:ncol(x)))
 67 |   }
 68 |   if (is(x , "SingleCellExperiment") && !is(x , "Milo")){
 69 |     x = Milo(x)
 70 |     # build 1st order to sample vertices
 71 |     k_init <- min(k , k_init)
 72 |     x <- suppressMessages(buildGraph(x, k = k_init, d = d, reduced.dim = reducedDim_name))
 73 |   }
 74 |   else {
 75 |     message("SCE is Milo object. Checking if graph is already constructed.")
 76 |     if (length(miloR::graph(x)) == 0){
 77 |       message("Graph is not constructed yet. Building now.")
 78 |       x <- suppressMessages(buildGraph(x, k = k_init, d = d, reduced.dim = reducedDim_name))
 79 |     }
 80 |   }
 81 |   # find anchor cells
 82 |   sampled_vertices <- .get_graph_refined_sampling(graph(x), prop)
 83 | 
 84 |   # rebuild to the actual graph, with parameters specified by user
 85 |   if (!k == k_init){
 86 |     x <- suppressMessages(buildGraph(x, k = k, d = d, reduced.dim = reducedDim_name))
 87 |   }
 88 |   # if order == 2 -- reassign edges
 89 |   if (order == 2){
 90 |     graph(x) = connect(graph(x),order)
 91 |   }
 92 | 
 93 |   # create nhoods
 94 |   nh_mat <- Matrix(data = 0, nrow=ncol(x), ncol=length(sampled_vertices), sparse = TRUE)
 95 |   v.class <- V(graph(x))$name
 96 |   rownames(nh_mat) <- colnames(x)
 97 |   for (X in seq_len(length(sampled_vertices))){
 98 |     nh_mat[unlist(neighborhood(graph(x), order = 1, nodes = sampled_vertices[X])), X] <- 1
 99 |   }
100 |   colnames(nh_mat) <- as.character(sampled_vertices)
101 |   nhoodIndex(x) <- as(sampled_vertices, "list")
102 |   nhoods(x) <- nh_mat
103 | 
104 |   # filter
105 |   if (!filtering){
106 |     x = suppressMessages(buildNhoodGraph(x))
107 |   }
108 |   else {
109 |     if (verbose){
110 |       message("Filtering redundant neighbourhoods.")
111 |     }
112 |     x = suppressMessages(filter_neighbourhoods(x))
113 |   }
114 | 
115 |   if (verbose){
116 |     stat_print =.calc_quick_stat(x , nhoods(x))
117 |     message(paste0("Finished successfully.\nNumber of neighbourhoods assigned: ", stat_print$n_hoods ,
118 |                  ";\naverage neighbourhood size: ", stat_print$avg_hood_size ,
119 |                  ";\nnumber of unassigned cells: ", stat_print$n_cells_unocovered))
120 |   }
121 |   return(x)
122 | }
123 | 
124 | 
125 | #'
126 | #'
127 | #' @importFrom igraph V set_vertex_attr induced_subgraph count_triangles neighborhood
128 | #' @importFrom miloR graph nhoodIndex nhoods<-
129 | #' @importFrom dplyr %>%
130 | .get_graph_refined_sampling <- function(graph, prop){
131 |   random_vertices <- sample(V(graph), size=floor(prop*length(V(graph))))
132 |   random_vertices <- as.vector(random_vertices)
133 |   X_graph <- set_vertex_attr(graph, "name", value = 1:length(V(graph)))
134 |   refined_vertices <- lapply(seq_along(random_vertices), function(i){
135 |     target_vertices <- unlist(neighborhood(X_graph, order = 1, nodes = random_vertices[i])) #get neighborhood of random vertex
136 |     target_vertices <- target_vertices[-1] #remove first entry which is the random vertex itself
137 |     rv_induced_subgraph <- induced_subgraph(graph = X_graph, vids = target_vertices)
138 |     triangles <- count_triangles(rv_induced_subgraph)
139 |     max_triangles <- max(triangles)
140 |     max_triangles_indices <- which(triangles == max_triangles)
141 |     #note - take first max_ego_index in the next line of code
142 |     resulting_vertices <- V(rv_induced_subgraph)[max_triangles_indices]$name[1]
143 |     return(resulting_vertices)
144 |   }) %>% unlist() %>% as.integer()
145 |   refined_vertices = unique(refined_vertices)
146 |   return(refined_vertices)
147 | }
148 | 
149 | #'
150 | .calc_quick_stat = function(x , nhoods_sce){
151 |   n_hoods = ncol(nhoods_sce)
152 |   avg_hood_size = round(mean(colSums(nhoods_sce)))
153 |   n_cells_unocovered = ncol(x) - sum(rowSums(nhoods_sce) > 0)
154 |   out = list(n_hoods = n_hoods ,
155 |              avg_hood_size = avg_hood_size ,
156 |              n_cells_unocovered = n_cells_unocovered)
157 |   return(out)
158 | }
159 | 


--------------------------------------------------------------------------------
/R/calc_AUC_per_neighbourhood.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' calc_AUC_per_neighbourhood
  4 | #'
  5 | #' Returns per neighbourhood AUC from Augur based (RF) classifiers
  6 | #'
  7 | #' @param x A \code{\linkS4class{Milo}} object.
  8 | #' @param genes Character vector specifying genes to be passed for the testing. Default \code{genes = rownames(x)}.
  9 | #' @param sample_id Character specifying which variable should be used as a replicate ID.
 10 | #' Should be in \code{colnames(colData(x))}. Default \code{sample_id = "sample"}.
 11 | #' @param condition_id Character specifying which variable should be used as a condition ID.
 12 | #' Should be in \code{colnames(colData(x))}.
 13 | #' @param conditions In case of multiple comparable groups, character vector specifying which conditions should be tested for separation.
 14 | #' Default \code{conditions = NULL} and assumes that only 2 different conditions are present.
 15 | #' @param min_n_cells_per_sample Positive integer specifying the minimum number of cells per replicate to be included in testing.
 16 | #' Default \code{min_n_cells_per_sample = 3}.
 17 | #' @param n_threads Positive integer specifying the number of cores to be used to calculate AUC.
 18 | #' Higher number results in faster calculation, but its feasibility depends on the specs of your machine.
 19 | #' Only relevant if \code{BPPARAM = NULL}. Default \code{n_threads = 2}.
 20 | #' @param BPPARAM NULL or \code{\link{MulticoreParam}} object. Default \code{BPPARAM = NULL} assuming no parallelisation.
 21 | #' @details
 22 | #' This function calculates for each neighbourhood whether cells between 2 conditions can be separated
 23 | #' with Random Forest based classifiers (adapted from \code{\link[Augur]{calculate_auc}}).
 24 | #' Accordingly, AUCs of the classifiers represent how well we can separate 2 conditions.
 25 | #'
 26 | #' We suggest that neighbourhoods with AUC > 0.5 suggest a certain degree of separation between 2 conditions that can further be examined
 27 | #' with DE testing (and, accordingly, neighbourhoods with AUC <= 0.5 can be safely discarded). You also can set your own AUC threshold if desired as well as use AUCs to rank neighbourhoods.
 28 | #'
 29 | #' \emph{Note that this function is only relevant for \dQuote{simple} models (e.g. not nested or no interactions.)
 30 | #' Also, it is hard-coded that for all neighbourhoods, in which total number of cells is less than 4 in at least one condition,
 31 | #' AUC will be set to NaN (classifiers for such low numbers will not be built)}.
 32 | #'
 33 | #' @return \code{data.frame} object, with AUC calculated for each neighbourhood
 34 | #' @export
 35 | #' @importFrom SummarizedExperiment colData assayNames
 36 | #' @importFrom miloR nhoods
 37 | #' @importFrom BiocParallel bplapply
 38 | #'
 39 | #' @examples
 40 | #' require(SingleCellExperiment)
 41 | #' n_row = 500
 42 | #' n_col = 100
 43 | #' n_latent = 5
 44 | #' sce = SingleCellExperiment(assays = list(counts =
 45 | #' floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
 46 | #' logcounts(sce) = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4
 47 | #' rownames(sce) = as.factor(1:n_row)
 48 | #' colnames(sce) = c(1:n_col)
 49 | #' sce$cell = colnames(sce)
 50 | #' sce$sample = floor(runif(n = n_col , min = 1 , max = 5))
 51 | #' sce$type = ifelse(sce$sample %in% c(1,2) , "ref" , "query")
 52 | #' reducedDim(sce , "reduced_dim") = matrix(rnorm(n_col*n_latent),
 53 | #' ncol=n_latent)
 54 | #' sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim")
 55 | #' sce = calc_AUC_per_neighbourhood(sce, condition_id = "type")
 56 | calc_AUC_per_neighbourhood <- function(x , genes = rownames(x) , sample_id = "sample" ,
 57 |                                        condition_id , conditions = NULL,
 58 |                                        min_n_cells_per_sample = 3, n_threads = 2 , BPPARAM = NULL){
 59 | 
 60 |   out = .check_argument_correct(x, .check_sce, "Check x - something is wrong (are gene names unique?)") &
 61 |     .check_sce_milo(x) &
 62 |     .check_argument_correct(sample_id, is.character, "Check sample_id - should be character vector") &
 63 |     .check_argument_correct(condition_id, is.character, "Check condition_id - should be character vector") &
 64 |     .check_argument_correct(min_n_cells_per_sample, .check_positive_integer, "Check min_n_cells_per_sample - should be positive integer") &
 65 |     .check_argument_correct(n_threads , .check_positive_integer , "Check n_threads - should be positive integer") &
 66 |     .check_var_in_coldata_sce(x , sample_id , "sample_id") & .check_condition_in_coldata_sce(x , condition_id) &
 67 |     .check_sample_and_condition_id_valid(x , condition_id , sample_id) &
 68 |     .check_genes_in_sce(x , genes)
 69 | 
 70 |   if (!"logcounts" %in% assayNames(x)){
 71 |     stop("Please calculate log-normalised counts first if you want to calculate AUC per neighbourhood.")
 72 |   }
 73 | 
 74 |   x = x[genes, ]
 75 | 
 76 |   if (is.null(colnames(x))){
 77 |     colnames(x) = as.character(c(1:ncol(x)))
 78 |   }
 79 |   # assign condition and sample ids
 80 |   coldata <- as.data.frame(colData(x))
 81 |   x$milo_sample_id <- as.factor( coldata[, sample_id] )
 82 |   x$milo_condition_id <- as.factor( coldata[, condition_id] )
 83 | 
 84 |   if (is.null(conditions)){
 85 |     tab = table(x$milo_condition_id)
 86 |     if (!length(tab) == 2){
 87 |       stop("If conditions == NULL, there should be exactly two levels for tested conditions.")
 88 |     }
 89 |   } else {
 90 |     if (mean(conditions %in% unique(x$milo_condition_id)) < 1){
 91 |       stop("All specified conditions should be present.")
 92 |     }
 93 |     if (!length(conditions) == 2){
 94 |       stop("Conditions should be a vector of 2 elements.")
 95 |     }
 96 |     x = x[, x$milo_condition_id %in% conditions]
 97 |   }
 98 | 
 99 |   nhoods_sce = nhoods(x)
100 |   # filter out for relevant cells
101 |   current_cols = colnames(nhoods_sce)
102 |   nhoods_sce = as.matrix( nhoods_sce[rownames(nhoods_sce) %in% colnames(x), ] )
103 |   colnames(nhoods_sce) = current_cols
104 |   # delete neighbourhoods that contain 0 cells (possible due to the upstream filtering)
105 |   idx = which(colSums(nhoods_sce) > 0)
106 |   current_cols = colnames(nhoods_sce)[idx]
107 |   nhoods_sce = as.matrix( nhoods_sce[, idx] )
108 |   colnames(nhoods_sce) = current_cols
109 | 
110 |   if (is.null(BPPARAM)){
111 |     auc_stat = lapply(colnames(nhoods_sce) , function(hood_id){
112 |       out = .get_auc_single_hood(x , nhoods_sce , hood_id , min_cells = 4 , min_n_cells_per_sample = min_n_cells_per_sample , n_threads = n_threads)
113 |       return(out)
114 |     })
115 |   }
116 |   else {
117 |     auc_stat = bplapply(colnames(nhoods_sce) , function(hood_id){
118 |       out = .get_auc_single_hood(x , nhoods_sce , hood_id , min_cells = 4 , min_n_cells_per_sample = min_n_cells_per_sample , n_threads = 1)
119 |       return(out)
120 |     } , BPPARAM = BPPARAM)
121 |   }
122 |   auc_stat = do.call(rbind , auc_stat)
123 |   # add Nhood
124 |   meta_nhoods = data.frame(Nhood = 1:ncol(nhoods_sce) , Nhood_center = colnames(nhoods_sce))
125 |   auc_stat = merge(auc_stat , meta_nhoods , all.x = TRUE , all.y = FALSE)
126 |   auc_stat = auc_stat[, c("Nhood", "Nhood_center" , "auc" , "auc_calculated")]
127 |   auc_stat = auc_stat[order(auc_stat$Nhood) , ]
128 |   return(auc_stat)
129 | }
130 | 
131 | 
132 | 
133 | #' @importFrom SingleCellExperiment logcounts
134 | #' @importFrom SummarizedExperiment colData
135 | #' @import Augur
136 | .get_auc_single_hood = function(x , nhoods_sce , hood_id , min_cells = 4 , min_n_cells_per_sample = 3 , n_threads = 2){
137 |   out = .check_argument_correct(min_cells, .check_positive_integer, "Check min_cells - should be positive integer")
138 |   # select cells
139 |   current.cells = which(nhoods_sce[,hood_id] == 1)
140 |   current.cells = rownames(nhoods_sce)[current.cells]
141 |   current.sce = x[,colnames(x) %in% current.cells]
142 |   current.sce = .filter_samples_with_low_n_cells_in_hood(current.sce , min_n_cells_per_sample = min_n_cells_per_sample)
143 | 
144 |   if (ncol(current.sce) > 0){
145 |     current.sce$celltype.dummy = "dummy"
146 |     meta = as.data.frame(colData(current.sce))
147 |     tab = table(as.character(current.sce$milo_condition_id))
148 |     if (length(tab) == 2 & tab[1] >= min_cells & tab[2] >= min_cells){
149 |       auc = calculate_auc(logcounts(current.sce), meta, cell_type_col = "celltype.dummy",
150 |                           label_col = "milo_condition_id" , n_subsamples = 0 ,
151 |                           subsample_size = min_cells , min_cells = min_cells ,
152 |                           feature_perc = 1 , n_threads = n_threads , show_progress = FALSE)
153 |       out = as.data.frame(auc$AUC)
154 |       out$auc_calculated = TRUE
155 | 
156 |     } else {
157 |       out = data.frame(cell_type = "dummy" , auc = NaN , auc_calculated = FALSE)
158 |     }
159 |   } else {
160 |     out = data.frame(cell_type = "dummy" , auc = NaN , auc_calculated = FALSE)
161 |   }
162 |   out$Nhood_center = hood_id
163 |   return(out)
164 | }
165 | 
166 | 
167 | 
168 | 
169 | 


--------------------------------------------------------------------------------
/R/convert_de_stat.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' convert_de_stat
  4 | #'
  5 | #' Converts output of miloDE between \code{\link[base]{data.frame}} and \code{\linkS4class{SingleCellExperiment}} formats
  6 | #' @param de_stat miloDE results, output of \code{\link{de_test_neighbourhoods}}; either in \code{data.frame} or \code{SingleCellExperiment} format.
  7 | #' @param assay_names Character string specifying which fields should be treated as assays.
  8 | #' Note that \code{logFC}, \code{pval}, \code{pval_corrected_across_genes} and \code{pval_corrected_across_nhoods} are hard-coded to be included in assays.
  9 | #' @param coldata_names Character string specifying which fields should be treated as neighbourhood metadata.
 10 | #' Note that \code{Nhood}, \code{Nhood_center}, \code{test_performed} are hard-coded to be included in coldata.
 11 | #'
 12 | #' \emph{Please note that \code{coldata_names} have to be the attributes of neighbourhoods (i.e. same values across different genes for the same neighbourhood).}
 13 | #' @details
 14 | #' This function converts results of \code{\link{de_test_neighbourhoods}} between \code{data.frame} object and \code{SingleCellExperiment}.
 15 | #'
 16 | #' \code{data.frame} object is more commonly used and might be easier to navigate, however, if total number of tests (i.e. gene x neighboourhoods)
 17 | #' is overwhelmingly large, \code{SingleCellExperiment} might be more suitable and faster to work with.
 18 | #' @return A \code{SingleCellExperiment} object or \code{data.frame} object, containing miloDE results
 19 | #' @export
 20 | #' @examples
 21 | #' de_stat = expand.grid(gene = paste0("gene" , c(1:5)) , Nhood = c(1:10))
 22 | #' de_stat$Nhood_center = paste0("nhood_" , de_stat$Nhood)
 23 | #' de_stat$logFC = sample(seq(-2,2,1) , nrow(de_stat) , 1)
 24 | #' de_stat$pval = sample(c(0,1),nrow(de_stat),1)
 25 | #' de_stat$pval_corrected_across_genes = sample(c(0,1),nrow(de_stat),1)
 26 | #' de_stat$pval_corrected_across_nhoods = sample(c(0,1),nrow(de_stat),1)
 27 | #' de_stat$test_performed = TRUE
 28 | #' de_stat = convert_de_stat(de_stat)
 29 | #' de_stat = convert_de_stat(de_stat)
 30 | #'
 31 | convert_de_stat = function(de_stat ,
 32 |                            assay_names = NULL,
 33 |                            coldata_names = NULL){
 34 | 
 35 |   assay_names = unique( c("logFC" , "pval" , "pval_corrected_across_genes" , "pval_corrected_across_nhoods", assay_names))
 36 |   coldata_names = unique( c("Nhood" , "Nhood_center" , "test_performed"  , coldata_names))
 37 |   out = .check_de_stat_valid(de_stat , assay_names , coldata_names)
 38 | 
 39 |   if (is(de_stat , "SingleCellExperiment")){
 40 |     message("Converting de_stat to 'data.frame' format")
 41 |     de_stat = .convert_from_sce(de_stat , assay_names = assay_names , coldata_names = coldata_names)
 42 |   } else if (is(de_stat , "data.frame")){
 43 |     message("Converting de_stat to 'SingleCellExperiment' format")
 44 |     de_stat = .convert_from_df(de_stat , assay_names = assay_names , coldata_names = coldata_names)
 45 |   }
 46 |   return(de_stat)
 47 | }
 48 | 
 49 | 
 50 | #' @importFrom SingleCellExperiment SingleCellExperiment
 51 | #' @importFrom S4Vectors DataFrame
 52 | .convert_from_df = function(de_stat , assay_names , coldata_names){
 53 | 
 54 |   de_stat = de_stat[order(de_stat$Nhood) , ]
 55 |   #  convert assays
 56 |   de_assays = lapply(assay_names , function(assay_name){
 57 |     return(.convert_from_df_one_var(de_stat , assay_name))
 58 |   })
 59 |   names(de_assays) = assay_names
 60 | 
 61 |   # convert coldata
 62 |   meta_nhoods = unique(de_stat[, coldata_names])
 63 |   meta_nhoods = meta_nhoods[order(meta_nhoods$Nhood) , ]
 64 | 
 65 |   # combine
 66 |   de_stat = SingleCellExperiment(de_assays, colData = DataFrame(meta_nhoods))
 67 | 
 68 |   colnames(de_stat) = meta_nhoods$Nhood
 69 |   return(de_stat)
 70 | }
 71 | 
 72 | 
 73 | 
 74 | #' @importFrom reshape2 dcast
 75 | .convert_from_df_one_var = function(de_stat , var){
 76 |   df = dcast(de_stat , formula = gene ~ Nhood , value.var = var)
 77 |   df$gene = as.character(df$gene)
 78 |   n_hoods = ncol(df)-1
 79 |   n_genes = nrow(df)
 80 |   genes = df$gene
 81 |   df = df[, 2:ncol(df)]
 82 |   df = as.matrix(df , ncol = n_hoods , nrow = n_genes)
 83 |   rownames(df) = genes
 84 |   return(df)
 85 | }
 86 | 
 87 | 
 88 | #' @importFrom SingleCellExperiment SingleCellExperiment
 89 | #' @importFrom SummarizedExperiment colData
 90 | .convert_from_sce = function(de_stat , assay_names , coldata_names){
 91 | 
 92 |   de_stat = de_stat[ , order(de_stat$Nhood)]
 93 | 
 94 |   # combine assays
 95 |   de_assays = lapply(assay_names , function(assay_name){
 96 |     return(.convert_from_sce_one_var(de_stat , assay_name))
 97 |   })
 98 |   de_assays = as.data.frame( do.call(cbind , de_assays) )
 99 |   colnames(de_assays) = assay_names
100 | 
101 |   # add gene and Nhood
102 |   df = as.data.frame( assay(de_stat , assay_names[1]) )
103 |   df = rownames_to_column(df , var = "gene")
104 |   df = melt(df , id = "gene")
105 |   colnames(df) = c("gene" , "Nhood" , "var")
106 |   df$Nhood = as.numeric(as.character(df$Nhood))
107 |   df$Nhood = as.integer(df$Nhood)
108 |   df = df[order(df$Nhood) , ]
109 |   df = df[, c("gene" , "Nhood")]
110 | 
111 |   de_assays = cbind(df , de_assays)
112 |   meta_nhoods = as.data.frame(colData(de_stat))
113 |   de_assays = merge(de_assays , meta_nhoods , by = c("Nhood") , all.x = TRUE)
114 | 
115 |   return(de_assays)
116 | }
117 | 
118 | 
119 | #' @importFrom SummarizedExperiment assay assay<-
120 | #' @importFrom tibble rownames_to_column
121 | #' @importFrom reshape2 melt
122 | .convert_from_sce_one_var = function(de_stat , var){
123 |   df = as.data.frame( assay(de_stat , var) )
124 |   df = rownames_to_column(df , var = "gene")
125 |   df = melt(df , id = "gene")
126 |   colnames(df) = c("gene" , "Nhood" , "var")
127 |   df$Nhood = as.numeric(as.character(df$Nhood))
128 |   df$Nhood = as.integer(df$Nhood)
129 |   df = df[order(df$Nhood) , ]
130 |   return(df$var)
131 | }
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/R/estimate_neighbourhood_sizes.R:
--------------------------------------------------------------------------------
  1 | #' estimate_neighbourhood_sizes
  2 | #'
  3 | #' For a grid of \code{k}, returns neighbourhood size distribution; this will help a user to select an appropriate \code{k}
  4 | #' @param x A \code{\linkS4class{SingleCellExperiment}} object.
  5 | #' @param reducedDim_name Defines the slot in \code{reducedDim(x)} to use as the embedding for graph construction.
  6 | #' @param k_grid Vector of positive integers, defines how many neighbours to use for the neighbourhood assignment.
  7 | #' @param prop Numerical, between 0 and 1, defines which fraction of cells to use as neighbourhood centres. Default \code{prop = 0.2}.
  8 | #' @param order In \code{c(1,2)}, defines which order of neighbours to use. Default \code{order = 2}.
  9 | #' @param filtering In \code{c(TRUE,FALSE)}, defines whether to filter neighbourhoods (reduces computing time downstream). Default \code{filtering = TRUE}.
 10 | #' @param k_init Positive integer, defines how many neighbours to use for identifying \sQuote{index} cells. Default \code{k_init = 50}.
 11 | #' @param d Positive integer, defines how many dimensions from \code{reducedDim(x)} to use. Default \code{d = 30}.
 12 | #' @param cluster_id Character specifying which field in \code{colData(x)} to use for 'localised' neighbourhood size estimation.
 13 | #' This might be useful if dataset is rather big (which will result in an excessive running time).
 14 | #' In case \code{cluster_id} is provided, we will calculate neighbourhood size distribution within individual clusters and aggregate results
 15 | #' across clusters in order to speed up the process (note that it might result in slightly biased estimates).
 16 | #' Default \code{cluster_id = NULL}, in which case neighbourhood sizes will be estimated for the whole dataset.
 17 | #' @param plot_stat Boolean specifying whether to plot the stat. Default \code{plot_stat = TRUE}.
 18 | #' @param verbose Boolean specifying whether to print intermediate output messages. Default \code{verbose = TRUE}.
 19 | #' @details
 20 | #' This function returns an estimated distribution of neighbourhood sizes for different \code{k} values
 21 | #' (for the selected by user \code{order}; if you want to estimates for both 1st and 2nd \code{order}, run this twice with changing \code{order}).
 22 | #' This can help to gauge whether the neighbourhood size distribution is appropriate for the selected \code{k}, since \code{\link{de_test_neighbourhoods}} takes a while to complete.
 23 | #'
 24 | #' Note that this function also might take a while to complete on big datasets (> 70k cells), and in this case we provide an option to estimate neighbourhood
 25 | #' sizes within annotated clusters (passed in \code{cluster_id}), which will be considerably faster, however, might result in slightly biased estimates.
 26 | #' @return \code{data.frame} object, in which each row corresponds to \code{k} and 5 columns correspond to min, q25, median, q75 and max of neighbourhood size distributions; also returns a boxplot
 27 | #' @export
 28 | #' @importFrom miloR Milo buildGraph graph<- graph nhoods<- nhoodIndex<- buildNhoodGraph
 29 | #' @importFrom tibble rownames_to_column
 30 | #' @importFrom SummarizedExperiment colData
 31 | #' @import ggplot2
 32 | #' @importFrom grDevices colorRampPalette
 33 | #' @importFrom stats quantile
 34 | #' @examples
 35 | #' require(SingleCellExperiment)
 36 | #' n_row = 500
 37 | #' n_col = 100
 38 | #' n_latent = 5
 39 | #' sce = SingleCellExperiment(assays = list(counts =
 40 | #' floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
 41 | #' rownames(sce) = as.factor(1:n_row)
 42 | #' colnames(sce) = c(1:n_col)
 43 | #' sce$cell = colnames(sce)
 44 | #' reducedDim(sce , "reduced_dim") =
 45 | #' matrix(rnorm(n_col*n_latent), ncol=n_latent)
 46 | #' out = estimate_neighbourhood_sizes(sce, k_grid = c(5,10),
 47 | #' reducedDim_name = "reduced_dim")
 48 | #'
 49 | estimate_neighbourhood_sizes = function(x, reducedDim_name , k_grid = seq(10,100,10) , order = 2, prop = 0.1 , filtering = TRUE,
 50 |                                         k_init = 50 , d = 30 , cluster_id = NULL, plot_stat = TRUE , verbose = TRUE){
 51 | 
 52 |   out = .check_argument_correct(x, .check_sce, "Check x - something is wrong (gene names unique? reducedDim.name is not present?)") &
 53 |     .check_argument_correct(k_grid, is.numeric, "Check k_grid - should be numeric vector") &
 54 |     .check_argument_correct(prop, .check_prop, "Check prop - should be positive number between 0 and 1") &
 55 |     .check_argument_correct(order, function(x) .check_arg_within_options(x, c(1,2)),
 56 |                             "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)") &
 57 |     .check_argument_correct(filtering, .check_boolean, "Check filtering - should be either TRUE or FALSE") &
 58 |     .check_argument_correct(reducedDim_name, is.character, "Check reducedDim_name - should be character vector") &
 59 |     .check_argument_correct(k_init, .check_positive_integer, "Check k_init - should be positive integer") &
 60 |     .check_argument_correct(d, .check_positive_integer, "Check d - should be positive integer") &
 61 |     .check_argument_correct(verbose, .check_boolean, "Check verbose - should be either TRUE or FALSE") &
 62 |     .check_reducedDim_in_sce(x , reducedDim_name) & .check_k_grid(k_grid)
 63 | 
 64 |   # check that cluster_id is in colData(x)
 65 |   if (!is.null(cluster_id)){
 66 |     if (!cluster_id %in% colnames(colData(x))){
 67 |       stop("If cluster_id not NULL, it should be in colnames(colData(x))")
 68 |     }
 69 |   }
 70 | 
 71 |   if (is.null(colnames(x))){
 72 |     colnames(x) = as.character(c(1:ncol(x)))
 73 |   }
 74 | 
 75 |   quantile_vec = seq(0,1,0.25)
 76 |   # check that k_grid reasonable -- at least 2 values, the the highest is smaller than 1000;
 77 |   # otherwise warn
 78 |   k_grid = sort(unique(k_grid))
 79 |   if (verbose){
 80 |     message(paste0("Running for next k values:\n" , paste(k_grid , collapse = ", ")))
 81 |   }
 82 | 
 83 |   if (is.null(cluster_id)){
 84 |     stat = lapply(k_grid , function(k){
 85 |       sce_milo = assign_neighbourhoods(x , k = k , prop = prop , order = order , filtering = filtering,
 86 |                                        reducedDim_name = reducedDim_name , k_init = k_init , d = d , verbose = FALSE)
 87 |       out = quantile(colSums(nhoods(sce_milo)) , probs = quantile_vec)
 88 |       if (verbose){
 89 |         message(paste0("Finished for k = ", k))
 90 |       }
 91 |       return(out)
 92 |     })
 93 |   } else {
 94 |     meta = as.data.frame(colData(x))
 95 |     clusters = table( meta[, cluster_id] )
 96 |     # select only big clusters
 97 |     clusters = names(clusters)[clusters > 2*max(k_grid)]
 98 |     if (length(clusters) == 0){
 99 |       stop("All specified clusters have # cells < 2*max(k). We recommed to provide lower clustering resolution, decreasing max(k) or set cluster_id = NULL.")
100 |     }
101 |     else {
102 |       stat = lapply(k_grid , function(k){
103 |         stat_per_k = sapply(clusters , function(cluster){
104 |           idx = which(meta[, cluster_id] == cluster)
105 |           sce_milo = assign_neighbourhoods(x[,idx] , k = k , prop = prop , order = order , filtering = filtering,
106 |                                            reducedDim_name = reducedDim_name , k_init = k_init , d = d , verbose = FALSE)
107 |           out = colSums(nhoods(sce_milo))
108 |           return(out)
109 |         })
110 |         stat_per_k = unlist(stat_per_k)
111 |         out = quantile(stat_per_k , probs = quantile_vec)
112 |         if (verbose){
113 |           message(paste0("Finished for k = ", k))
114 |         }
115 |         return(out)
116 |       })
117 |     }
118 |   }
119 | 
120 |   stat = as.data.frame( do.call(rbind , stat) )
121 |   rownames(stat) = k_grid
122 |   stat = rownames_to_column(stat , var = "k")
123 |   if (verbose){
124 |     message(paste0("Finished the estimation of neighbourhood sizes ~ k dependancy (order = " , order , ")."))
125 |   }
126 |   colnames(stat) = c("k" , "min" , "q25" , "med" , "q75" , "max")
127 |   stat$k = factor(stat$k , levels = k_grid)
128 | 
129 |   if (plot_stat){
130 |     p_stat = tryCatch(
131 |       {
132 |         p = ggplot(stat, aes(k)) +
133 |           geom_boxplot( aes(ymin = min, lower = q25, middle = med, upper = q75, ymax = max , fill = k), stat = "identity") +
134 |           theme_bw() +
135 |           scale_fill_manual(values = colorRampPalette(brewer.pal(11, "Spectral"))(length(k_grid))) +
136 |           labs( y = "Neighbourhood size")
137 |         p
138 |       },
139 |       error=function(err){
140 |         warning("Can not return plot.")
141 |         return(NULL)
142 |       }
143 |     )
144 |     print(p_stat)
145 |   }
146 |   return(stat)
147 | }
148 | 
149 | 


--------------------------------------------------------------------------------
/R/filter_neighbourhoods.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #' filter_neighbourhoods
 4 | #'
 5 | #' Filtering redundant neighbourhoods, using the greedy approach to set cover problem
 6 | #' @param x A \code{\linkS4class{Milo}} object.
 7 | #' @details
 8 | #' This function refines neighbourhood assignment and discards redundant neighbourhoods. This call is highly recommended since it reduces computational time greatly.
 9 | #' It is called directly in \code{\link{assign_neighbourhoods}} if \code{filtering=TRUE}, and also can be called independently post hoc.
10 | #'
11 | #' Under the hood, it adapts \code{\link[RcppGreedySetCover]{greedySetCover}} function, which implements greedy solution to NP-hard \sQuote{set cover} problem.
12 | #' In this solution, all neighbourhoods are sorted in the decreasing order of their size (i.e. number of cells),
13 | #' and neighbourhoods are iteratively added to the final neighbourhood set if they contain at least one cell not \sQuote{covered} by previously added neighbourhoods.
14 | #' @return \code{Milo} object with refined neighbourhood assignment
15 | #' @export
16 | #' @importFrom RcppGreedySetCover greedySetCover
17 | #' @importFrom miloR buildNhoodGraph nhoodIndex nhoods graph graph<-
18 | #' @examples
19 | #' require(SingleCellExperiment)
20 | #' n_row = 500
21 | #' n_col = 100
22 | #' n_latent = 5
23 | #' sce = SingleCellExperiment(assays =
24 | #' list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
25 | #' rownames(sce) = as.factor(1:n_row)
26 | #' colnames(sce) = c(1:n_col)
27 | #' sce$cell = colnames(sce)
28 | #' reducedDim(sce , "reduced_dim") =
29 | #' matrix(rnorm(n_col*n_latent), ncol=n_latent)
30 | #' sce = assign_neighbourhoods(sce,
31 | #' reducedDim_name = "reduced_dim" , k = 10 , order = 1)
32 | #' sce = filter_neighbourhoods(sce)
33 | filter_neighbourhoods = function(x){
34 | 
35 |   #args = c(as.list(environment()))
36 |   #out = .general_check_arguments(args)
37 |   out = .check_argument_correct(x, .check_sce_milo, "Check x - something is wrong. Calculate 'assign_neighbourhoods' first.)")
38 | 
39 |   nhoods_sce = nhoods(x)
40 |   stat_hoods = lapply(1:ncol(nhoods_sce) , function(i){
41 |     current.cells = which(nhoods_sce[,i] == 1)
42 |     out = data.frame(set = colnames(nhoods_sce)[i],
43 |                      element = names(current.cells))
44 |     return(out)
45 |   })
46 |   stat_hoods = do.call(rbind, stat_hoods)
47 |   stat_filtered = suppressMessages( greedySetCover(stat_hoods) )
48 |   hoods_filtered = unique(stat_filtered$set)
49 |   if (length(hoods_filtered) > 1){
50 |     nhoods_sce = nhoods_sce[, colnames(nhoods_sce) %in% hoods_filtered]
51 |   }
52 |   else {
53 |     nhoods_sce = as.matrix(nhoods_sce[, colnames(nhoods_sce) %in% hoods_filtered])
54 |     colnames(nhoods_sce) = hoods_filtered
55 |   }
56 |   nhoods(x) = nhoods_sce
57 |   x = buildNhoodGraph(x)
58 |   nhoodIndex(x) = nhoodIndex(x)[match(colnames(nhoods(x)) , nhoodIndex(x))]
59 |   return(x)
60 | }
61 | 


--------------------------------------------------------------------------------
/R/rank_neighbourhoods_by_DE_magnitude.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #' rank_neighbourhoods_by_DE_magnitude
 4 | #'
 5 | #' Ranks neighbourhoods by the magnitude of DE: number of DE genes and number of \sQuote{specifically} DE genes
 6 | #' @param de_stat Output of miloDE (\code{\link{de_test_neighbourhoods}}), either in \code{data.frame} or \code{SingleCellExperiment} format.
 7 | #' @param pval.thresh A scalar specifying which threshold to use for deciding on significance for gene being DE in a neighbourhood. Default \code{pval.thresh = 0.1}.
 8 | #' @param z.thresh A scalar specifying which threshold to use for deciding on which z-normalised p-values are going to be considered specifically DE. Default \code{z.thresh = -3}.
 9 | #' @details
10 | #' To calculate number of DE genes per neighbourhood, we use \code{pval_corrected_across_genes}.
11 | #' Accordingly, for each neighbourhood we calculate how many genes has p-values lower than designated threshold.
12 | #'
13 | #' To calculate number of \sQuote{specifically} DE genes, we first z-normalise \code{pval_corrected_across_nhoods} (for each gene) and then for each
14 | #' neighbourhood, calculate how many genes have z-normalised p-values lower than designated threshold.
15 | #'
16 | #' \emph{Note that for this analysis we set NaN p-values (raw and corrected) to 1 - interpret accordingly.}
17 | #' @return \code{data.frame}, with calculated number-DE-genes and number-specific-DE-genes for each neighbourhood
18 | #' @export
19 | #' @importFrom SummarizedExperiment colData
20 | #' @importFrom stats sd
21 | #' @examples
22 | #' de_stat = expand.grid(gene = paste0("gene" , c(1:5)) , Nhood = c(1:10))
23 | #' de_stat$Nhood_center = paste0("nhood_" , de_stat$Nhood)
24 | #' de_stat$logFC = sample(seq(-2,2,1) , nrow(de_stat) , 1)
25 | #' de_stat$pval = sample(c(0,1),nrow(de_stat),1)
26 | #' de_stat$pval_corrected_across_genes = sample(c(0,1),nrow(de_stat),1)
27 | #' de_stat$pval_corrected_across_nhoods = sample(c(0,1),nrow(de_stat),1)
28 | #' de_stat$test_performed = TRUE
29 | #' out = rank_neighbourhoods_by_DE_magnitude(de_stat)
30 | #'
31 | rank_neighbourhoods_by_DE_magnitude = function(de_stat, pval.thresh = 0.1, z.thresh = -3 ){
32 | 
33 |   out = .check_de_stat_valid(de_stat ,
34 |                              assay_names = c("logFC" , "pval" , "pval_corrected_across_nhoods" , "pval_corrected_across_genes") ,
35 |                              coldata_names = c("Nhood" , "Nhood_center")) &
36 |     .check_pval_thresh(pval.thresh) & .check_z_thresh(z.thresh)
37 | 
38 |   if (is(de_stat , "data.frame")){
39 |     de_stat = convert_de_stat(de_stat ,
40 |                               assay_names = c("logFC" , "pval" , "pval_corrected_across_nhoods" , "pval_corrected_across_genes") ,
41 |                               coldata_names = c("Nhood" , "Nhood_center" , "test_performed"))
42 |     de_stat = de_stat[ , order(de_stat$Nhood)]
43 |   }
44 |   #de_stat = de_stat[, de_stat$design_matrix_suitable]
45 | 
46 |   # calculate number of DE genes
47 |   assay_pval_corrected_across_genes = assay(de_stat , "pval_corrected_across_genes")
48 |   idx = which(is.na(assay_pval_corrected_across_genes))
49 |   assay_pval_corrected_across_genes[idx] = 1
50 |   out_n_genes = colSums(assay_pval_corrected_across_genes < pval.thresh , na.rm = T)
51 | 
52 |   # calculate number of specifically DE genes
53 |   assay_pval_corrected_across_nhoods = assay(de_stat , "pval_corrected_across_nhoods")
54 |   idx = which(is.na(assay_pval_corrected_across_nhoods))
55 |   assay_pval_corrected_across_nhoods[idx] = 1
56 |   assay_pval_corrected_across_nhoods = apply(assay_pval_corrected_across_nhoods , 1 , function(x){
57 |     return((x - mean(x, na.rm = T))/sd(x, na.rm = T))
58 |   })
59 |   assay_pval_corrected_across_nhoods = t(assay_pval_corrected_across_nhoods)
60 |   out_n_specific_genes = colSums(assay_pval_corrected_across_nhoods < z.thresh , na.rm = T)
61 | 
62 |   out = cbind(out_n_genes , out_n_specific_genes)
63 |   colnames(out) = c("n_DE_genes" , "n_specific_DE_genes")
64 | 
65 |   meta_nhoods = as.data.frame(colData(de_stat))
66 |   meta_nhoods = cbind(meta_nhoods , out)
67 | 
68 |   return(meta_nhoods)
69 | 
70 | }
71 | 
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/R/sce_mouseEmbryo.R:
--------------------------------------------------------------------------------
 1 | #' Chimeric mouse embryo, Tal1- (Pijuan-Sala et al., 2019)
 2 | #'
 3 | #' \code{SingleCellExperiment} object, containing counts
 4 | #' (raw are pulled using \code{\link[MouseGastrulationData]{Tal1ChimeraData}}
 5 | #' and log-normalised are estimated using \code{\link[scuttle]{logNormCounts}}) matrices
 6 | #' for chimeric mouse embryos (both Tal1+ and Tal1-).
 7 | #'
 8 | #' Additionally, we subselected only 3 cell types (Endothelium , Blood progenitors 2, Neural crest)
 9 | #' and 300 HVGs.
10 | #'
11 | #' \code{colData(sce_mouseEmbryo)} contains information about replicate (\code{sample}) and Tal1 status (\code{tomato = TRUE} means Tal1-).
12 | #'
13 | #' @docType data
14 | #' @usage data(sce_mouseEmbryo)
15 | #'
16 | #' @format \code{\linkS4class{SingleCellExperiment}} object
17 | #'
18 | #' @name sce_mouseEmbryo
19 | #' @source MouseGastrulationData package
20 | NULL
21 | 


--------------------------------------------------------------------------------
/R/spatial_pval_adjustment.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #' spatial_pval_adjustment
 4 | #'
 5 | #' Performs p-values multiple testing correction across neighbourhoods, with accounting for the overlap
 6 | #' @param nhoods_x Should be extracted from x as \code{nhoods(x)}.
 7 | #' @param pvalues Vector of p-values.
 8 | #' @details
 9 | #' This function is not intended to be run by itself, but can be useful if the user wants to perform \sQuote{spatially aware} multiple testing correction.
10 | #'
11 | #' Under the hood it performs weighted version of BH correction, where weights are reciprocal to the local desnity of a neighbourhood.
12 | #' Accordingly, big and/or highly connected neighbourhoods will have lower weights.
13 | #'
14 | #' @return Vector with \sQuote{spatially} (i.e. across neighbourhoods) adjusted p-values
15 | #' @export
16 | #' @examples
17 | #' require(SingleCellExperiment)
18 | #' require(miloR)
19 | #' n_row = 500
20 | #' n_col = 100
21 | #' n_latent = 5
22 | #' sce = SingleCellExperiment(assays = list(counts =
23 | #' floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
24 | #' rownames(sce) = as.factor(1:n_row)
25 | #' colnames(sce) = c(1:n_col)
26 | #' sce$cell = colnames(sce)
27 | #' reducedDim(sce , "reduced_dim") =
28 | #' matrix(rnorm(n_col*n_latent), ncol=n_latent)
29 | #' sce = assign_neighbourhoods(sce,
30 | #' reducedDim_name = "reduced_dim" , k = 10 , order = 1)
31 | #' nhoods_x = nhoods(sce)
32 | #' pvalues = runif(n = ncol(nhoods_x) , min = 0 , max = 1)
33 | #' out = spatial_pval_adjustment(nhoods_x, pvalues = pvalues)
34 | spatial_pval_adjustment = function(nhoods_x , pvalues){
35 |   # we will only calculate weights for neighbourhoods in which we are testing (not NaNs)
36 |   idx_not_nan = which(!is.na(pvalues))
37 |   if (length(idx_not_nan) > 1){
38 |     weights = .get_weights(as.matrix(nhoods_x[,idx_not_nan]))
39 | 
40 |     out = .check_weights_and_pvals(weights , pvalues[idx_not_nan] , as.matrix(nhoods_x[,idx_not_nan])) & .check_nhoods_matrix(nhoods_x)
41 | 
42 |     n_comparisons = length(pvalues)
43 |     pvalues = pvalues[idx_not_nan]
44 | 
45 |     # calc correction
46 |     o <- order(pvalues)
47 |     pvalues <- pvalues[o]
48 |     weights <- weights[o]
49 |     adjp <- numeric(length(o))
50 |     adjp[o] <- rev(cummin(rev(sum(weights)*pvalues/cumsum(weights))))
51 |     adjp <- pmin(adjp, 1)
52 | 
53 |     # put eveyrhting together
54 |     adjp_total = rep(NaN, 1,n_comparisons)
55 |     adjp_total[idx_not_nan] = adjp
56 |   } else {
57 |     adjp_total = pvalues
58 |   }
59 |   return(adjp_total)
60 | }
61 | 
62 | 
63 | 
64 | .get_weights = function(nhoods_x){
65 |   out = .check_nhoods_matrix(nhoods_x)
66 |   intersect_mat <- crossprod(nhoods_x)
67 |   t.connect <- unname(rowSums(intersect_mat))
68 |   weights<- 1/unlist(t.connect)
69 |   return(weights)
70 | }
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | # Contains various check functions to examine whether variables are of the right format
  2 | 
  3 | 
  4 | 
  5 | ### checks ###
  6 | 
  7 | # sce-milo check: that is used in de_test_all_hoods, de_test_single_hood, filter_hoods, subset_milo
  8 | #' @importFrom miloR Milo nhoods graph graph<- nhoods<- nhoodIndex<- buildNhoodGraph
  9 | #' @importFrom methods is
 10 | .check_sce_milo = function(x){
 11 |   if (!is(x , "Milo")){
 12 |     stop("x should be a Milo object. Please run `assign_neighbourhoods` first.")
 13 |     return(FALSE)
 14 |   } else if (length(miloR::graph(x)) == 0){
 15 |     stop("x should contain non-trivial graph. Please run `assign_neighbourhoods` first.")
 16 |     return(FALSE)
 17 |   } else if (sum(sum(nhoods(x))) == 0){
 18 |     stop("x should have calculated nhoods. Please run 'assign_neighbourhoods' first.")
 19 |     return(FALSE)
 20 |   } else if ( nrow(nhoods(x)) == 1 & ncol(nhoods(x)) == 1 ){
 21 |     stop("x should contain non-trivial nhoods. Please run `assign_neighbourhoods` first.")
 22 |     return(FALSE)
 23 |   } else if (isEmpty(nhoodIndex(x))){
 24 |     stop("x should have calculated nhoodIndex. Please run 'assign_neighbourhoods' first.")
 25 |     return(FALSE)
 26 |   } else {
 27 |     return(TRUE)
 28 |   }
 29 | }
 30 | 
 31 | # sce-milo check wo/ calculation of the length of graph: that is used in plotting functions
 32 | #' @importFrom miloR Milo nhoods graph graph<- nhoods<- nhoodIndex<- buildNhoodGraph
 33 | #' @importFrom methods is
 34 | .check_sce_milo_wo_graph = function(x){
 35 |   if (!is(x , "Milo")){
 36 |     stop("x should be a Milo object. Please run `assign_neighbourhoods` first.")
 37 |     return(FALSE)
 38 |   } else if (sum(sum(nhoods(x))) == 0){
 39 |     stop("x should have calculated nhoods. Please run 'assign_neighbourhoods' first.")
 40 |     return(FALSE)
 41 |   } else if ( nrow(nhoods(x)) == 1 & ncol(nhoods(x)) == 1 ){
 42 |     stop("x should contain non-trivial nhoods. Please run `assign_neighbourhoods` first.")
 43 |     return(FALSE)
 44 |   } else if (isEmpty(nhoodIndex(x))){
 45 |     stop("x should have calculated nhoodIndex. Please run 'assign_neighbourhoods' first.")
 46 |     return(FALSE)
 47 |   } else {
 48 |     return(TRUE)
 49 |   }
 50 | }
 51 | 
 52 | 
 53 | 
 54 | # sce check: that is used in add_embedding
 55 | #' @importFrom SingleCellExperiment reducedDimNames
 56 | #' @importFrom SummarizedExperiment assays<- assays
 57 | #' @importFrom miloR Milo
 58 | #' @importFrom methods is
 59 | .check_sce = function(x){
 60 |   if (!is(x , "SingleCellExperiment") & !is(x , "Milo")){
 61 |     stop("x should be a SingleCellExperiment or Milo object.")
 62 |     return(FALSE)
 63 |   } else if (!("counts" %in% names(assays(x)))){
 64 |     stop("x should contain 'counts' assay that will be used to calculate DE. If counts are stored in different assay, please move them to slot 'counts'.")
 65 |     return(FALSE)
 66 |   } else if (length(unique(rownames(x))) < nrow(x) ){
 67 |     stop("x should have unique rownames.")
 68 |     return(FALSE)
 69 |   } else if (!is.null(colnames(x)) & length(unique(colnames(x))) < ncol(x)){
 70 |     stop("If colnames(x) exist, they should be unique.")
 71 |     return(FALSE)
 72 |   } else {
 73 |     return(TRUE)
 74 |   }
 75 | }
 76 | 
 77 | 
 78 | 
 79 | 
 80 | 
 81 | #' @importFrom igraph is_igraph
 82 | .valid_nhood <- function(x){
 83 |   # check for a valid nhood slot
 84 |   n_neigh <- ncol(nhoods(x))
 85 |   is_not_empty <- n_neigh > 0
 86 |   if (is_not_empty) {
 87 |     return(TRUE)
 88 |   } else {
 89 |     return(FALSE)
 90 |   }
 91 | }
 92 | 
 93 | #' @importFrom igraph is_igraph
 94 | .valid_graph <- function(x){
 95 |   # check for a valid graph
 96 |   if(isTRUE(is_igraph(x))){
 97 |     return(TRUE)
 98 |   } else{
 99 |     return(FALSE)
100 |   }
101 | }
102 | 
103 | 
104 | 
105 | #' @importFrom SummarizedExperiment assayNames
106 | .check_assay_in_sce = function(x , assay_type){
107 |   if (.check_sce(x)){
108 |     if (!assay_type %in% assayNames(x)){
109 |       stop("assay_type should be in assayNames(x)")
110 |       return(FALSE)
111 |     }
112 |     else {
113 |       return(TRUE)
114 |     }
115 |   }
116 |   else {
117 |     return(FALSE)
118 |   }
119 | }
120 | 
121 | 
122 | #' @importFrom SummarizedExperiment colData
123 | .check_condition_in_coldata_sce = function(x , condition_id){
124 |   if (.check_sce(x)){
125 |     if (!(condition_id %in% colnames(colData(x)))){
126 |       stop("'condition_id' should be in colData(x)")
127 |       return(FALSE)
128 |     }
129 |     else {
130 |       meta = as.data.frame(colData(x))
131 |       tab = table(meta[, condition_id])
132 |       if (length(tab) < 2){
133 |         stop("x should have at least two levels for tested conditions.")
134 |         return(FALSE)
135 |       } else{
136 |         return(TRUE)
137 |       }
138 |     }
139 |   }
140 |   else {
141 |     return(FALSE)
142 |   }
143 | }
144 | 
145 | #' @importFrom SummarizedExperiment colData
146 | .check_sample_and_condition_id_valid = function(x , condition_id , sample_id){
147 | 
148 |   if (condition_id == sample_id){
149 |     stop("'sample_id' and 'condition_id' can not be the same")
150 |     return(FALSE)
151 |   }
152 |   else {
153 |     meta = as.data.frame(colData(x))
154 |     tab = table(meta[, sample_id] , meta[, condition_id])
155 |     tab = sapply(1:nrow(tab) , function(i) sum(tab[i,] > 0))
156 |     if (mean(tab == 1) < 1){
157 |       stop("Each sample_id should be associated with one condition")
158 |       return(FALSE)
159 |     }
160 |     else {
161 |       return(TRUE)
162 |     }
163 |   }
164 | }
165 | 
166 | #' @importFrom SummarizedExperiment colData
167 | .check_var_in_coldata_sce = function( x , var , var_intended){
168 |   if (.check_sce(x)){
169 |     if (!(var %in% colnames(colData(x)))){
170 |       stop(paste0("'", var_intended, "'", " should be in colData(x)"))
171 |       return(FALSE)
172 |     }
173 |     else {
174 |       return(TRUE)
175 |     }
176 |   }
177 |   else {
178 |     return(FALSE)
179 |   }
180 | }
181 | 
182 | 
183 | #' @importFrom miloR nhoods
184 | .check_nhood_stat = function(nhood_stat , x){
185 |   if (!is(nhood_stat , "data.frame")){
186 |     stop("'nhood_stat' should be a data.frame.")
187 |     return(FALSE)
188 |   }
189 |   else {
190 |     if (!"Nhood" %in% colnames(nhood_stat)){
191 |       stop("'nhood_stat' should contain column 'Nhood'.")
192 |       return(FALSE)
193 |     }
194 |     else {
195 |       if (!is.numeric(nhood_stat$Nhood)){
196 |         stop("'nhood_stat$Nhood' should be numeric.")
197 |         return(FALSE)
198 |       }
199 |       else {
200 |         nhoods_sce = nhoods(x)
201 |         if (mean(nhood_stat$Nhood %in% c(1:ncol(nhoods_sce))) < 1){
202 |           stop("'nhood_stat$Nhood' should be within c(1:ncol(nhoods(x))).")
203 |           return(FALSE)
204 |         }
205 |         else {
206 |           return(TRUE)
207 |         }
208 |       }
209 |     }
210 |   }
211 | }
212 | 
213 | 
214 | #' @importFrom SummarizedExperiment colData
215 | .check_covariates_in_coldata_sce = function(x , covariates){
216 |   if (.check_sce(x)){
217 |     if (is.null(covariates)){
218 |       return(TRUE)
219 |     }
220 |     else {
221 |       coldata = colnames(colData(x))
222 | 
223 |       covariates_exist = sapply(covariates , function(covariate){
224 |         out = as.numeric(covariate %in% coldata)
225 |         return(out)
226 |       })
227 |       if (sum(covariates_exist) < length(covariates)){
228 |         stop("All covariates should be colnames of colData(x).")
229 |         return(FALSE)
230 |       }
231 |       else {
232 |         covariates_w_contrast = sapply(covariates , function(covariate){
233 |           tab = table(colData(x)[, covariate])
234 |           out = as.numeric(length(tab) != 1)
235 |           return(out)
236 |         })
237 |         if (sum(covariates_w_contrast) < length(covariates)){
238 |           stop("All covariates should have more than 1 contrast.")
239 |           return(FALSE)
240 |         }
241 |         else {
242 |           return(TRUE)
243 |         }
244 |       }
245 |     }
246 |   }
247 |   else {
248 |     return(FALSE)
249 |   }
250 | }
251 | 
252 | 
253 | 
254 | 
255 | #' @importFrom SummarizedExperiment colData
256 | .check_cell_id_in_sce = function(x , cell_id){
257 |   if (is.null(cell_id) & is.null(colnames(x))){
258 |     stop("If colnames(x) are NULL, cell_id has to be specified in order to assgin unique cell identifiers.")
259 |     return(FALSE)
260 |   } else {
261 |     if (is.null(colnames(x))){
262 |       if (!cell_id %in% colnames(colData(x))){
263 |         stop("cell_id should be in colData(x)")
264 |         return(FALSE)
265 |       }
266 |       else {
267 |         return(TRUE)
268 |       }
269 |     }
270 |     else {
271 |       return(TRUE)
272 |     }
273 |   }
274 | }
275 | 
276 | 
277 | 
278 | .check_genes_in_sce = function(x, genes){
279 |   if (.check_sce(x)){
280 |     if (!is.null(genes)){
281 |       if (mean(genes %in% rownames(x)) < 1){
282 |         stop("Some gene names are missing from x")
283 |         return(FALSE)
284 |       }
285 |       else {
286 |         return(TRUE)
287 |       }
288 |     }
289 |     else {
290 |       return(TRUE)
291 |     }
292 |   }
293 |   else {
294 |     return(FALSE)
295 |   }
296 | }
297 | 
298 | 
299 | #' @importFrom SingleCellExperiment reducedDimNames
300 | .check_reducedDim_in_sce = function(x , reducedDim_name){
301 |   if (.check_sce(x)){
302 |     if (!reducedDim_name %in% reducedDimNames(x)){
303 |       stop("reducedDim_name should be in reducedDimNames(x).")
304 |       return(FALSE)
305 |     }
306 |   }
307 |   else {
308 |     return(FALSE)
309 |   }
310 | }
311 | 
312 | 
313 | #' @importFrom S4Vectors isEmpty
314 | .check_cells_ref_and_query = function(cells_sce , cells_ref , cells_query){
315 |   if (mean(cells_ref %in% cells_sce) < 1){
316 |     stop("Some of cells_ref are not present.")
317 |     return(FALSE)
318 |   } else if (mean(cells_query %in% cells_sce) < 1){
319 |     stop("Some of cells_query are not present.")
320 |     return(FALSE)
321 |   } else if (!isEmpty(intersect(cells_ref , cells_query))){
322 |     stop("cells_ref and cells_query can not overlap.")
323 |     return(FALSE)
324 |   } else {
325 |     return(TRUE)
326 |   }
327 | }
328 | 
329 | 
330 | #' @importFrom SummarizedExperiment assayNames assayNames<- colData
331 | .check_de_stat_valid = function(de_stat , assay_names , coldata_names){
332 |   if (!class(de_stat) %in% c("data.frame" , "SingleCellExperiment")){
333 |     stop("de_stat should be either data.frame or SingleCellExperiment object.\n
334 |          To get valid de_stat object, please run 'de_test_neighbourhoods.R'")
335 |     return(FALSE)
336 |   } else if (length(intersect(assay_names, coldata_names)) > 0){
337 |     stop("assay_names and coldata_names can not overlap")
338 |     return(FALSE)
339 |   } else if (is(de_stat , "data.frame")){
340 |     cols = colnames(de_stat)
341 |     cols_required = c(assay_names , coldata_names)
342 |     if (mean(cols_required %in% cols) < 1){
343 |       stop("colnames(de_stat) missing some of the assay_names or coldata_names.")
344 |       return(FALSE)
345 |     } else if (!is.numeric(de_stat$Nhood)) {
346 |       stop("Nhood field should be numeric. To get valid de_stat object, please run 'de_test_neighbourhoods.R'")
347 |       return(FALSE)
348 |     }
349 |   } else if (is(de_stat , "SingleCellExperiment")){
350 |     cols = assayNames(de_stat)
351 |     if (mean(assay_names %in% cols) < 1){
352 |       stop("de_stat missing some of the required assays.")
353 |       return(FALSE)
354 |     } else {
355 |       meta_nhoods = as.data.frame(colData(de_stat))
356 |       if (mean(coldata_names %in% colnames(meta_nhoods)) < 1){
357 |         stop("de_stat missing some of the coldata_names")
358 |         return(FALSE)
359 |       } else if (!is.numeric(de_stat$Nhood)) {
360 |         stop("colData field 'Nhood' should be numeric. To get valid de_stat object, please run 'de_test_neighbourhoods.R'")
361 |         return(FALSE)
362 |       }
363 |     }
364 |   } else {
365 |     return(TRUE)
366 |   }
367 | }
368 | 
369 | 
370 | #
371 | # .check_argument_correct = function(dots, arg_name , fun , message){
372 | #   if (arg_name %in% names(dots)){
373 | #     arg = dots[[which(names(dots) == arg_name)]]
374 | #     out = fun(arg)
375 | #     if (!out){
376 | #       stop(message)
377 | #     }
378 | #     return(out)
379 | #   }
380 | #   else {
381 | #     return(TRUE)
382 | #   }
383 | # }
384 | #
385 | 
386 | .check_weights = function(weights){
387 |   if (!is.numeric(weights)){
388 |     stop("weights should be a numeric vector. To get valid weights, run 'get_weights.R'")
389 |     return(FALSE)
390 |   } else {
391 |     if (sum(is.na(weights)) > 0){
392 |       stop("weights can not contain NaNs. To get valid weights, run 'get_weights.R'")
393 |       return(FALSE)
394 |     } else if (sum(weights <= 0) > 0){
395 |       stop("weights should be positive. To get valid weights, run 'get_weights.R'")
396 |       return(FALSE)
397 |     } else {
398 |       return(TRUE)
399 |     }
400 |   }
401 | }
402 | 
403 | 
404 | .check_weights_and_pvals = function(weights , pvalues , nhoods_sce){
405 |   if (!length(weights) == length(pvalues)){
406 |     stop("weights should be of the same size as pvalues.")
407 |     return(FALSE)
408 |   } else if (!length(weights) == ncol(nhoods_sce)){
409 |     stop("weights should be of the same size as number of columns in nhoods_sce.")
410 |     return(FALSE)
411 |   } else {
412 |     return(TRUE)
413 |   }
414 | }
415 | 
416 | 
417 | 
418 | 
419 | .check_argument_correct = function(arg , fun , message){
420 |   out = fun(arg)
421 |   if (!out){
422 |     stop(message)
423 |   }
424 |   return(out)
425 | }
426 | 
427 | 
428 | .check_quantile_vec = function(quantile_vec){
429 |   if (!is.numeric(quantile_vec)){
430 |     stop("Check quantile_vec - should be numeric vector")
431 |     return(FALSE)
432 |   } else {
433 |     quantile_vec = sort(unique(quantile_vec))
434 |     if (min(quantile_vec) < 0 | max(quantile_vec) > 1){
435 |       stop("quantile_vec should have all its values between 0 and 1. Please enter valid quantile_vec")
436 |       return(FALSE)
437 |     }
438 |     else {
439 |       return(TRUE)
440 |     }
441 |   }
442 | }
443 | 
444 | 
445 | .check_k_grid = function(k_grid){
446 |   if (!is.numeric(k_grid)){
447 |     stop("Check k_grid - should be numeric vector")
448 |     return(FALSE)
449 |   } else {
450 |     k_grid = sort(unique(k_grid))
451 |     if (min(k_grid) < 0 | max(k_grid%%1 > 0)){
452 |       stop("Values of k_grid should be positive integers. Please enter valid k_grid.")
453 |       return(FALSE)
454 |     }
455 |     else {
456 |       if (length(k_grid) == 1){
457 |         warning("You only selected one value for k. If it is intended, we recommend to run directly 'assign_neighbourhoods'.")
458 |       }
459 |       if (max(k_grid) >= 1000){
460 |         warning("The highest selected value is > 1000. It is gonna cost computationally, and we generally do not recommend such high k. Consider reducing.")
461 |       }
462 |       return(TRUE)
463 |     }
464 |   }
465 | }
466 | 
467 | 
468 | .check_pval_thresh = function(pval.thresh){
469 |   if (!is.numeric(pval.thresh)){
470 |     stop("pval.thresh should be numeric")
471 |     return(FALSE)
472 |   }
473 |   else {
474 |     if (!length(pval.thresh) == 1){
475 |       stop("pval.thresh should be a single number")
476 |       return(FALSE)
477 |     }
478 |     else {
479 |       if (pval.thresh <= 0 | pval.thresh >=1){
480 |         stop("pval.thresh should be between 0 and 1")
481 |         return(FALSE)
482 |       }
483 |       else {
484 |         return(TRUE)
485 |       }
486 |     }
487 |   }
488 | }
489 | 
490 | 
491 | 
492 | .check_z_thresh = function(z.thresh){
493 |   if (!is.numeric(z.thresh)){
494 |     stop("z.thresh should be numeric")
495 |     return(FALSE)
496 |   }
497 |   else {
498 |     if (!length(z.thresh) == 1){
499 |       stop("z.thresh should be a single number")
500 |       return(FALSE)
501 |     }
502 |     else {
503 |       if (z.thresh > 0){
504 |         stop("z.thresh should be not higher than 0")
505 |         return(FALSE)
506 |       }
507 |       else {
508 |         return(TRUE)
509 |       }
510 |     }
511 |   }
512 | }
513 | 
514 | 
515 | #
516 | # .general_check_arguments = function(dots){
517 | #   out = TRUE
518 | #   out = .check_argument_correct(dots, "sce", .check_sce, "Check sce - something is wrong (gene names unique? reducedDim.name is not present?)")
519 | #   out = .check_argument_correct(dots, "sce_milo", .check_sce_milo, "Check sce_milo - something is wrong. Calculate 'assign_hoods' first.)")
520 | #   out = .check_argument_correct(dots, "genes", .check_string_or_null, "Check genes - should be NULL or character vector")
521 | #   out = .check_argument_correct(dots, "genes_2_exclude", .check_string_or_null, "Check genes_2_exclude - should be NULL or character vector")
522 | #   out = .check_argument_correct(dots, "n_hvgs", .check_positive_integer, "Check n_hvgs - should be positive integer")
523 | #   out = .check_argument_correct(dots, "assay_type", function(x) .check_arg_within_options(x, c("counts", "logcounts")),
524 | #                                 "Check assay_type - should be either 'counts' or 'logcounts'")
525 | #   out = .check_argument_correct(dots, "reduction_type", function(x) .check_arg_within_options(x, c("Azimuth", "MNN")),
526 | #                                 "Check reduction_type - should be either 'Azimuth' or 'MNN'")
527 | #   out = .check_argument_correct(dots, "reducedDim_name", is.character, "Check reducedDim_name - should be character vector")
528 | #   out = .check_argument_correct(dots, "sample_id", is.character, "Check sample_id - should be character vector")
529 | #   out = .check_argument_correct(dots, "condition_id", is.character, "Check condition_id - should be character vector")
530 | #   out = .check_argument_correct(dots, "cell_id", .check_string_or_null, "Check cell_id - should be NULL or string")
531 | #   out = .check_argument_correct(dots, "d", .check_positive_integer, "Check d - should be positive integer")
532 | #   out = .check_argument_correct(dots, "order", function(x) .check_arg_within_options(x, c(1,2)),
533 | #                                 "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)")
534 | #   out = .check_argument_correct(dots, "k", .check_positive_integer, "Check k - should be positive integer")
535 | #   out = .check_argument_correct(dots, "k_init", .check_positive_integer, "Check k_init - should be positive integer")
536 | #   out = .check_argument_correct(dots, "prop", .check_prop, "Check prop - should be positive number between 0 and 1")
537 | #   out = .check_argument_correct(dots, "filtering", .check_boolean, "Check filtering - should be either TRUE or FALSE")
538 | #   out = .check_argument_correct(dots, "k.grid", is.numeric, "Check k.grid - should be numeric vector")
539 | #   out = .check_argument_correct(dots, "quantile_vec", is.numeric, "Check quantile_vec - should be numeric vector")
540 | #   out = .check_argument_correct(dots, "discard_not_perturbed_hoods", .check_boolean, "Check discard_not_perturbed_hoods - should be either TRUE or FALSE")
541 | #   out = .check_argument_correct(dots, "gene_selection", function(x) .check_arg_within_options(x, c("all", "none", "per_hood")),
542 | #                                 "Check gene_selection - should be either 'all', 'none' or 'per_hood'")
543 | #   out = .check_argument_correct(dots, "min_n_cells_per_sample", .check_positive_integer, "Check min_n_cells_per_sample - should be positive integer")
544 | #   out = .check_argument_correct(dots, "min_count", .check_positive_integer, "Check min_count - should be positive integer")
545 | #   out = .check_argument_correct(dots, "run_separately", .check_boolean, "Check run_separately - should be either TRUE or FALSE")
546 | #   out = .check_argument_correct(dots, "covariates", .check_string_or_null, "Check covariates - should be NULL or character vector")
547 | #   out = .check_argument_correct(dots, "seed", .check_number_or_null, "Check seed - should be NULL or number")
548 | #   return(out)
549 | # }
550 | 
551 | 
552 | .check_positive_integer = function(x){
553 |   out = TRUE
554 |   if (!is.numeric(x)){
555 |     out = FALSE
556 |   } else if (!x%%1 == 0 | x <= 0){
557 |     out = FALSE
558 |   }
559 |   return(out)
560 | }
561 | 
562 | 
563 | .check_non_negative = function(x){
564 |   out = TRUE
565 |   if (!is.numeric(x)){
566 |     out = FALSE
567 |   } else if (x < 0){
568 |     out = FALSE
569 |   }
570 |   return(out)
571 | }
572 | 
573 | .check_design = function(x){
574 |   if (is(x , "formula")){
575 |     return(TRUE)
576 |   }
577 |   else {
578 |     return(FALSE)
579 |   }
580 | }
581 | 
582 | 
583 | .check_number_or_null = function(x){
584 |   out = TRUE
585 |   if (!is.null(x)){
586 |     if (!is.numeric(x)){
587 |       out = FALSE
588 |     }
589 |   }
590 |   return(out)
591 | }
592 | 
593 | 
594 | .check_arg_within_options = function(x , options){
595 |   out = TRUE
596 |   if (is.null(x)){
597 |     out = FALSE
598 |   }
599 |   else  if (!x %in% options){
600 |     out = FALSE
601 |   }
602 |   return(out)
603 | }
604 | 
605 | .check_string_or_null = function(x){
606 |   out = TRUE
607 |   if (!is.null(x)){
608 |     if (!is.character(x)){
609 |       out = FALSE
610 |     }
611 |   }
612 |   return(out)
613 | }
614 | 
615 | .check_prop = function(x){
616 |   out = TRUE
617 |   if (!is.numeric(x)){
618 |     out = FALSE
619 |   } else if (x <= 0 | x > 1){
620 |     out = FALSE
621 |   }
622 |   return(out)
623 | }
624 | 
625 | 
626 | .check_boolean = function(x){
627 |   out = TRUE
628 |   if (is.null(x)){
629 |     out = FALSE
630 |   }
631 |   else if (!x %in% c(TRUE, FALSE)){
632 |     out = FALSE
633 |   }
634 |   return(out)
635 | }
636 | 
637 | 
638 | 
639 | .check_nhoods_matrix = function(nhoods_sce){
640 |   unq_elements = unique(as.numeric(nhoods_sce))
641 |   if (mean(unq_elements %in% c(0,1)) < 1){
642 |     stop("All elements of nhoods matrix should be either 0 or 1. To get valid matrix, run nhoods(x).")
643 |     return(FALSE)
644 |   }
645 |   else {
646 |     return(TRUE)
647 |   }
648 | }
649 | 
650 | 
651 | .check_subset_nhoods = function(subset_nhoods, nhoods_sce){
652 |   if (!is.numeric(subset_nhoods) & !is.logical(subset_nhoods)){
653 |     stop("'subset_nhoods' should be either numeric or logical.")
654 |     return(FALSE)
655 |   }
656 |   else if (is.numeric(subset_nhoods)){
657 |     if (mean(subset_nhoods %in% c(1:ncol(nhoods_sce))) < 1){
658 |       stop("If 'subset_nhoods' is numeric vector, it should lie within c(1:ncol(nhoods(x))).")
659 |       return(FALSE)
660 |     }
661 |     else {
662 |         return(TRUE)
663 |     }
664 |   }
665 |   else {
666 |     if (!length(subset_nhoods) == ncol(nhoods_sce)){
667 |       stop("If 'subset_nhoods' is logical vector, it should be the same size as ncol(nhoods(x)).")
668 |       return(FALSE)
669 |     }
670 |     else {
671 |       return(TRUE)
672 |     }
673 |   }
674 | }
675 | 
676 | #' @importFrom SummarizedExperiment colData
677 | #' @importFrom dplyr distinct
678 | #' @importFrom stats model.matrix
679 | .check_design_and_covariates_match = function(x , design , sample_id , covariates){
680 |   design.df = as.data.frame(colData(x)[,c(sample_id , covariates)])
681 |   design.df = distinct(design.df)
682 |   out = tryCatch(
683 |     {
684 |       design = model.matrix(design , data = design.df)
685 |       TRUE
686 |     },
687 |       error=function(err){
688 |         stop("Some of the design's arguments are not in the covariate vector.")
689 |         return(FALSE)
690 |     }
691 |   )
692 |   return(out)
693 | }
694 | 
695 | 
696 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # miloDE
 3 | Framework for sensitive DE testing (using neighbourhoods).
 4 | 
 5 | miloDE builds on an existing framework for DA testing called [Milo](https://pubmed.ncbi.nlm.nih.gov/34594043/). 
 6 | It exploits the notion of overlapping neighborhoods of homogeneous cells, constructed from graph-representation of scRNA-seq data, and performs testing within each neighborhood. Multiple testing correction is performed either across neighborhoods or across genes. 
 7 | 
 8 | Please see the [preprint](https://www.biorxiv.org/content/10.1101/2023.03.08.531744v1) for greater details.
 9 | 
10 | <p align="center">
11 |   <img src="miloDE_cartoon.png" width="750">
12 | </p>
13 | 
14 | In addition to DE testing, we provide functionality to rank neighbourhoods by degree of the DE as well as plotting functions to visualise results. In the vignette, we showcase how you can carry out clustering analysis to group genes in co-regulated transcriptional modules.
15 | 
16 | 
17 | 
18 | ## Installation
19 | 
20 | ```
21 | # Install development version:
22 | library(devtools)
23 | devtools::install_github("MarioniLab/miloDE") 
24 | library(miloDE)
25 | 
26 | *Note*: if you have troubles installing miloDE directly, try first to install miloR:
27 | BiocManager::install('miloR')
28 | 
29 | ## If you plan to use parallelisation (desired for big datasets), 
30 | please load `BiocParallel` and enable milticore parallel evaluation:
31 | library(BiocParallel)
32 | ncores = MY_NCORES
33 | mcparam = MulticoreParam(workers = ncores)
34 | register(mcparam)
35 | 
36 | 
37 | ## Not an immediate functionality of miloDE, but we illustrate in our vignette how to adapt WGCNA 
38 | approach to discover DE patterns and co-regulated gene modules. 
39 | If you want to perform similar analysis, please install Seurat and scWGCNA (note that scWGCNA does not work with V5 Seurat so please ensure that your Seurat version is V4):
40 | install.packages('Seurat', version = "4.4.0")
41 | devtools::install_github("cferegrino/scWGCNA", ref="main")
42 | 
43 | 
44 | ```
45 | 
46 | 
47 | ## Pipeline
48 | 
49 | 0. **Input**. The input of `miloDE` is scRNA-seq data, provided as `SingleCellExperiment` object. 
50 | Additionally, we require that:
51 | 
52 | * Latent dimensions (used for graph construction) are pre-computed and stored in `reducedDim(sce)`.
53 | * `colData(sce)` has to contain metadata corresponding assigning cells to individual replicates (passed to `sample_id`) and tested condition (e.g. healthy or disease, passed to `condition_id`).
54 | 
55 | You can explore toy data here:
56 | 
57 | ```
58 | data("sce_mouseEmbryo", package = "miloDE")
59 | print(sce_mouseEmbryo)
60 | # `pca.corrected` in reducedDim(sce) - PCs that we will use for graph construction
61 | 
62 | head(colData(sce_mouseEmbryo))
63 | # `tomato` corresponds to condition id  
64 | # `sample` corresponds to individual replicates. There are 2 samples per each condition:
65 | table(colData(sce_mouseEmbryo)[,c('sample','tomato')])
66 | 
67 | ```
68 | 
69 | 1. **Neighbourhood assignment**: First step is to assign neighbourhoods using graph representation of scRNA-seq data'
70 | 
71 | ```
72 | 
73 | sce_mouseEmbryo = assign_neighbourhoods(sce_mouseEmbryo, k = 20, order = 2, 
74 |                                         filtering = TRUE, reducedDim_name = "pca.corrected")
75 | 
76 | ```
77 | 
78 | 2. **DE testing**: Once neighbourhoods are assigned, we can carry out DE testing. Output is returned in either `data.frame` or `SingleCellExperiment format`. For each tested gene-neighbourhood, we return `logFC`, `pvalue`, `pvalue corrected across genes` and `pvalue corrected across nhoods`. We also return boolean flag if test is performed.
79 | 
80 | ```
81 | 
82 | de_stat = de_test_neighbourhoods(sce_mouseEmbryo , sample_id = "sample", 
83 |                                  design = ~tomato, covariates = c("tomato"))
84 | 
85 | 
86 | ```
87 | 
88 | 
89 | 
90 | ## Vignette
91 | 
92 | Please check the vignette to grasp additional functions aiding interpretation and analysis of miloDE output.
93 | 
94 | [Effect of Tal1 knock out on mouse development](https://rawcdn.githack.com/MarioniLab/miloDE_tutorials/3d3781237011695f802dc1c0f0193bea12a108de/miloDE__mouse_embryo.html). 
95 | 
96 | 
97 | Cite the code: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12686748.svg)](https://zenodo.org/records/12686748).
98 | 
99 | 


--------------------------------------------------------------------------------
/data-raw/sce_mouseEmbryo.R:
--------------------------------------------------------------------------------
 1 | ## code to prepare `sce_mouseEmbryo` dataset goes here
 2 | 
 3 | usethis::use_data(sce_mouseEmbryo, overwrite = TRUE)
 4 | 
 5 | 
 6 | # load libraries
 7 | library(SingleCellExperiment)
 8 | library(MouseGastrulationData)
 9 | library(geneBasisR)
10 | 
11 | # subset of 3 cell types
12 | cts = c("Endothelium" , "Blood progenitors 2", "Neural crest")
13 | 
14 | # load chimera Tal1
15 | sce = Tal1ChimeraData()
16 | # select CTs
17 | sce = sce[, sce$celltype.mapped %in% cts]
18 | # delete row for tomato
19 | sce = sce[!rownames(sce) == "tomato-td" , ]
20 | 
21 | # add logcounts
22 | sce = scuttle::logNormCounts(sce)
23 | 
24 | 
25 | # add covariates
26 | sce$sex = sapply(1:ncol(sce) , function(i) ifelse(sce$sample[i] %in% c("3" , "4") , "F" , "M"))
27 | set.seed(32)
28 | sce$toy_cov_1 = sample(1:5 , ncol(sce),1)
29 | 
30 | # select only 1000 genes
31 | sce = retain_informative_genes(sce , n = 300)
32 | sce_mouseEmbryo = sce
33 | usethis::use_data(sce_mouseEmbryo , overwrite = TRUE , compress = "xz")
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/data/sce_mouseEmbryo.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MarioniLab/miloDE/d253df8972a54459d63096028ae41afcb4a1063a/data/sce_mouseEmbryo.rda


--------------------------------------------------------------------------------
/man/assign_neighbourhoods.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/assign_neighbourhoods.R
 3 | \name{assign_neighbourhoods}
 4 | \alias{assign_neighbourhoods}
 5 | \title{assign_neighbourhoods}
 6 | \usage{
 7 | assign_neighbourhoods(
 8 |   x,
 9 |   reducedDim_name,
10 |   k = 25,
11 |   prop = 0.2,
12 |   order = 2,
13 |   filtering = TRUE,
14 |   k_init = 50,
15 |   d = 30,
16 |   verbose = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{x}{A \code{\linkS4class{SingleCellExperiment}} object.}
21 | 
22 | \item{reducedDim_name}{Defines the assay in \code{reducedDim(x)} to use as the embedding for graph construction.}
23 | 
24 | \item{k}{Positive integer, defines how many neighbours to use for the neighbourhood assignment. Default \code{k = 25}.}
25 | 
26 | \item{prop}{Numerical, between 0 and 1, defines which fraction of cells to use as neighbourhood centres. Default \code{prop = 0.2}.}
27 | 
28 | \item{order}{In \code{c(1,2)}, defines which order of neighbours to use. Default \code{order = 2}.}
29 | 
30 | \item{filtering}{In \code{c(TRUE,FALSE)}, defines whether to refine the assignment. Default \code{filtering = TRUE}.}
31 | 
32 | \item{k_init}{Positive integer, defines how many neighbours to use for identifying anchor cells (for this step we use 1st-order kNN). Default \code{k_init = 50}.}
33 | 
34 | \item{d}{Positive integer, defines how many dimensions from \code{reducedDim(x)} to use. Default \code{d = 30}.}
35 | 
36 | \item{verbose}{Boolean specifying whether to print intermediate output messages. Default \code{verbose = TRUE}.}
37 | }
38 | \value{
39 | \code{\linkS4class{Milo}} object containing cell-neighbourhood matrix in \code{nhoods(out)} slot
40 | }
41 | \description{
42 | Assign neighbourhoods to single-cell RNA-seq data (in \code{SingleCellExperiment} format)
43 | }
44 | \details{
45 | This function assigns neighbourhoods to single-cell data. This includes assigning graph representation, selecting \sQuote{index} cells and, finally, for each index cell, assigning it along with its neighbourhoors to one neighbourhood.
46 | 
47 | Specifically, algorithm goes as follows:
48 | \enumerate{
49 | \item Assigning \sQuote{loose} graph (i.e. ~low k, 1st-order kNN) to select index cells for the selected \code{prop} (greatly reduces computational time to look for \sQuote{index} cells in a loose graph).
50 | \item Reassigning graph following entered by the user \code{order} and \code{k}.
51 | \item Assigning neighbourhoods.
52 | \item (Optional but recommended) Refining the neighbourhood assignment (check \code{\link{filter_neighbourhoods}}).
53 | }
54 | }
55 | \examples{
56 | require(SingleCellExperiment)
57 | n_row = 500
58 | n_col = 100
59 | n_latent = 5
60 | sce = SingleCellExperiment(assays =
61 | list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
62 | rownames(sce) = as.factor(1:n_row)
63 | colnames(sce) = c(1:n_col)
64 | sce$cell = colnames(sce)
65 | reducedDim(sce , "reduced_dim") =
66 | matrix(rnorm(n_col*n_latent), ncol=n_latent)
67 | out = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim")
68 | }
69 | 


--------------------------------------------------------------------------------
/man/calc_AUC_per_neighbourhood.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calc_AUC_per_neighbourhood.R
 3 | \name{calc_AUC_per_neighbourhood}
 4 | \alias{calc_AUC_per_neighbourhood}
 5 | \title{calc_AUC_per_neighbourhood}
 6 | \usage{
 7 | calc_AUC_per_neighbourhood(
 8 |   x,
 9 |   genes = rownames(x),
10 |   sample_id = "sample",
11 |   condition_id,
12 |   conditions = NULL,
13 |   min_n_cells_per_sample = 3,
14 |   n_threads = 2,
15 |   BPPARAM = NULL
16 | )
17 | }
18 | \arguments{
19 | \item{x}{A \code{\linkS4class{Milo}} object.}
20 | 
21 | \item{genes}{Character vector specifying genes to be passed for the testing. Default \code{genes = rownames(x)}.}
22 | 
23 | \item{sample_id}{Character specifying which variable should be used as a replicate ID.
24 | Should be in \code{colnames(colData(x))}. Default \code{sample_id = "sample"}.}
25 | 
26 | \item{condition_id}{Character specifying which variable should be used as a condition ID.
27 | Should be in \code{colnames(colData(x))}.}
28 | 
29 | \item{conditions}{In case of multiple comparable groups, character vector specifying which conditions should be tested for separation.
30 | Default \code{conditions = NULL} and assumes that only 2 different conditions are present.}
31 | 
32 | \item{min_n_cells_per_sample}{Positive integer specifying the minimum number of cells per replicate to be included in testing.
33 | Default \code{min_n_cells_per_sample = 3}.}
34 | 
35 | \item{n_threads}{Positive integer specifying the number of cores to be used to calculate AUC.
36 | Higher number results in faster calculation, but its feasibility depends on the specs of your machine.
37 | Only relevant if \code{BPPARAM = NULL}. Default \code{n_threads = 2}.}
38 | 
39 | \item{BPPARAM}{NULL or \code{\link{MulticoreParam}} object. Default \code{BPPARAM = NULL} assuming no parallelisation.}
40 | }
41 | \value{
42 | \code{data.frame} object, with AUC calculated for each neighbourhood
43 | }
44 | \description{
45 | Returns per neighbourhood AUC from Augur based (RF) classifiers
46 | }
47 | \details{
48 | This function calculates for each neighbourhood whether cells between 2 conditions can be separated
49 | with Random Forest based classifiers (adapted from \code{\link[Augur]{calculate_auc}}).
50 | Accordingly, AUCs of the classifiers represent how well we can separate 2 conditions.
51 | 
52 | We suggest that neighbourhoods with AUC > 0.5 suggest a certain degree of separation between 2 conditions that can further be examined
53 | with DE testing (and, accordingly, neighbourhoods with AUC <= 0.5 can be safely discarded). You also can set your own AUC threshold if desired as well as use AUCs to rank neighbourhoods.
54 | 
55 | \emph{Note that this function is only relevant for \dQuote{simple} models (e.g. not nested or no interactions.)
56 | Also, it is hard-coded that for all neighbourhoods, in which total number of cells is less than 4 in at least one condition,
57 | AUC will be set to NaN (classifiers for such low numbers will not be built)}.
58 | }
59 | \examples{
60 | require(SingleCellExperiment)
61 | n_row = 500
62 | n_col = 100
63 | n_latent = 5
64 | sce = SingleCellExperiment(assays = list(counts =
65 | floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
66 | logcounts(sce) = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4
67 | rownames(sce) = as.factor(1:n_row)
68 | colnames(sce) = c(1:n_col)
69 | sce$cell = colnames(sce)
70 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5))
71 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query")
72 | reducedDim(sce , "reduced_dim") = matrix(rnorm(n_col*n_latent),
73 | ncol=n_latent)
74 | sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim")
75 | sce = calc_AUC_per_neighbourhood(sce, condition_id = "type")
76 | }
77 | 


--------------------------------------------------------------------------------
/man/convert_de_stat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/convert_de_stat.R
 3 | \name{convert_de_stat}
 4 | \alias{convert_de_stat}
 5 | \title{convert_de_stat}
 6 | \usage{
 7 | convert_de_stat(de_stat, assay_names = NULL, coldata_names = NULL)
 8 | }
 9 | \arguments{
10 | \item{de_stat}{miloDE results, output of \code{\link{de_test_neighbourhoods}}; either in \code{data.frame} or \code{SingleCellExperiment} format.}
11 | 
12 | \item{assay_names}{Character string specifying which fields should be treated as assays.
13 | Note that \code{logFC}, \code{pval}, \code{pval_corrected_across_genes} and \code{pval_corrected_across_nhoods} are hard-coded to be included in assays.}
14 | 
15 | \item{coldata_names}{Character string specifying which fields should be treated as neighbourhood metadata.
16 | Note that \code{Nhood}, \code{Nhood_center}, \code{test_performed} are hard-coded to be included in coldata.
17 | 
18 | \emph{Please note that \code{coldata_names} have to be the attributes of neighbourhoods (i.e. same values across different genes for the same neighbourhood).}}
19 | }
20 | \value{
21 | A \code{SingleCellExperiment} object or \code{data.frame} object, containing miloDE results
22 | }
23 | \description{
24 | Converts output of miloDE between \code{\link[base]{data.frame}} and \code{\linkS4class{SingleCellExperiment}} formats
25 | }
26 | \details{
27 | This function converts results of \code{\link{de_test_neighbourhoods}} between \code{data.frame} object and \code{SingleCellExperiment}.
28 | 
29 | \code{data.frame} object is more commonly used and might be easier to navigate, however, if total number of tests (i.e. gene x neighboourhoods)
30 | is overwhelmingly large, \code{SingleCellExperiment} might be more suitable and faster to work with.
31 | }
32 | \examples{
33 | de_stat = expand.grid(gene = paste0("gene" , c(1:5)) , Nhood = c(1:10))
34 | de_stat$Nhood_center = paste0("nhood_" , de_stat$Nhood)
35 | de_stat$logFC = sample(seq(-2,2,1) , nrow(de_stat) , 1)
36 | de_stat$pval = sample(c(0,1),nrow(de_stat),1)
37 | de_stat$pval_corrected_across_genes = sample(c(0,1),nrow(de_stat),1)
38 | de_stat$pval_corrected_across_nhoods = sample(c(0,1),nrow(de_stat),1)
39 | de_stat$test_performed = TRUE
40 | de_stat = convert_de_stat(de_stat)
41 | de_stat = convert_de_stat(de_stat)
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/man/de_test_neighbourhoods.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/de_test_neighbourhoods.R
 3 | \name{de_test_neighbourhoods}
 4 | \alias{de_test_neighbourhoods}
 5 | \title{de_test_neighbourhoods}
 6 | \usage{
 7 | de_test_neighbourhoods(
 8 |   x,
 9 |   sample_id = "sample",
10 |   design,
11 |   covariates,
12 |   contrasts = NULL,
13 |   subset_nhoods = NULL,
14 |   min_n_cells_per_sample = 3,
15 |   min_count = 3,
16 |   output_type = "data.frame",
17 |   plot_summary_stat = FALSE,
18 |   layout = "UMAP",
19 |   BPPARAM = NULL,
20 |   verbose = TRUE
21 | )
22 | }
23 | \arguments{
24 | \item{x}{A \code{\linkS4class{Milo}} object.}
25 | 
26 | \item{sample_id}{Character specifying which variable should be used as a replicate ID.
27 | Should be in \code{colnames(colData(x))}. Default \code{sample_id = "sample"}.}
28 | 
29 | \item{design}{A \code{formula} object describing the experimental design for DE testing.
30 | Note that if \code{contrasts = NULL} (default), the last column column of model matrix will be used for testing.}
31 | 
32 | \item{covariates}{Vector specifying all covariates that are passed into experimental design.
33 | 
34 | \emph{It should contain all columns used in the design formula (except \code{sample_id})}.}
35 | 
36 | \item{contrasts}{NULL (default) or character string specifying what comparison to perform.
37 | If you are unsure regarding the appropriate syntax for the \code{contrasts} in your data,
38 | check \url{https://www.bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf}.
39 | 
40 | \emph{Note that at the moment we only support one comparison (i.e. one contrast); if you wish to perform several comparisons, please run \code{\link{de_test_neighbourhoods}} for each comparison separately.}}
41 | 
42 | \item{subset_nhoods}{NULL or character vector specifying the set of neighbourhoods that will be tested for DE.
43 | Default \code{subset_nhoods = NULL} meaning no subsetting.}
44 | 
45 | \item{min_n_cells_per_sample}{Positive integer specifying the minimum number of cells per replicate to be included in testing.
46 | Default \code{min_n_cells_per_sample = 3}.}
47 | 
48 | \item{min_count}{Positive integer, specifying \code{min.count} for gene selection (employes \code{\link[edgeR]{filterByExpr}}).
49 | Default \code{min_count = 3}.}
50 | 
51 | \item{output_type}{In \code{c("data.frame","SCE")} Specifying the output format - either in \code{data.frame} or \code{\linkS4class{SingleCellExperiment}}.
52 | Default \code{output_type = "data.frame"}.}
53 | 
54 | \item{plot_summary_stat}{Boolean specifying if we plot Milo neighbourhood plot summarising (per neighbourhood) whether testing was performed.
55 | Default \code{plot_summary_stat = FALSE}.}
56 | 
57 | \item{layout}{A character indicating the name of the \code{reducedDim} slot in the \code{\linkS4class{Milo}} object to use for the layout of the plot(default \code{layout = "UMAP"}).
58 | Only relevant if \code{plot_summary_stat = TRUE}.}
59 | 
60 | \item{BPPARAM}{NULL or \code{\link{MulticoreParam}} object to use for parallelisation (see \code{README} for the usage). Default \code{BPPARAM = NULL} meaning no parallelisation.
61 | Note that if possible we recommend to parallel this in order to reduce computational time.}
62 | 
63 | \item{verbose}{Boolean specifying whether to print intermediate output messages. Default \code{verbose = TRUE}.}
64 | }
65 | \value{
66 | \code{data.frame} or \code{SingleCellExperiment} object containing miloDE results for all supplied neighbourhoods.
67 | For each tested gene-neighbourhood pair, we return logFC and p-values (raw and corrected across genes or neighbourhoods).
68 | }
69 | \description{
70 | Performs DE testing within each neighbourhood + post hoc p-value correction across neighbourhoods. If a test for a gene x neighbourhood pair is not performed
71 | (i.e. gene is not expressed in this neighbourhood and filtered out prior to testing), we returns NaNs.
72 | }
73 | \details{
74 | We employ edgeR testing (using \code{\link[edgeR]{glmQLFit}}) within each neighbourhood.
75 | We allow the user to submit the desired experimental design and incorporate various covariates, which is beneficial in the context of large cohort studies.
76 | }
77 | \examples{
78 | require(SingleCellExperiment)
79 | n_row = 500
80 | n_col = 100
81 | n_latent = 5
82 | sce = SingleCellExperiment(assays = list(counts =
83 | floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
84 | rownames(sce) = as.factor(1:n_row)
85 | colnames(sce) = c(1:n_col)
86 | sce$cell = colnames(sce)
87 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5))
88 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query")
89 | reducedDim(sce , "reduced_dim") =
90 | matrix(rnorm(n_col*n_latent), ncol=n_latent)
91 | sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim")
92 | de_stat = de_test_neighbourhoods(sce , design = ~type,
93 | covariates = c("type"), plot_summary_stat = FALSE)
94 | de_stat = convert_de_stat(de_stat)
95 | de_stat = convert_de_stat(de_stat)
96 | }
97 | 


--------------------------------------------------------------------------------
/man/de_test_single_neighbourhood.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/de_test_neighbourhoods.R
 3 | \name{de_test_single_neighbourhood}
 4 | \alias{de_test_single_neighbourhood}
 5 | \title{de_test_single_neighbourhood}
 6 | \usage{
 7 | de_test_single_neighbourhood(
 8 |   x,
 9 |   nhoods_x,
10 |   hood_id,
11 |   sample_id,
12 |   design,
13 |   covariates,
14 |   contrasts = NULL,
15 |   min_n_cells_per_sample = 1,
16 |   min_count = 3,
17 |   run_separately = TRUE
18 | )
19 | }
20 | \arguments{
21 | \item{x}{A \code{\linkS4class{Milo}} object}
22 | 
23 | \item{nhoods_x}{Should be extracted from x as \code{nhoods(x)} prior to running the function.}
24 | 
25 | \item{hood_id}{Numeric specifying for which neighbourhood we should perform testing. Should be in \code{c(1:ncol(nhoods_x))}.}
26 | 
27 | \item{sample_id}{Character specifying which variable should be used as a replicate ID.
28 | Should be in \code{colnames(colData(x))}. Default \code{sample_id = "sample"}.}
29 | 
30 | \item{design}{A \code{formula} object describing the experimental design for DE testing.
31 | If \code{contrasts = NULL} (default), the last column column of model matrix will be used for testing.}
32 | 
33 | \item{covariates}{Vector specifying all covariates that should be passed into experimental design.
34 | 
35 | \emph{It should contain all columns used in the design formula (except \code{sample_id}).}}
36 | 
37 | \item{contrasts}{NULL (default) or character string specifying what comparison to perform.
38 | If you are unsure regarding the appropriate syntax for the \code{contrasts} in your data,
39 | check \url{https://www.bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf}.
40 | 
41 | \emph{Note that at the moment we only support one comparison (i.e. one contrast), if you wish to perform several comparisons, please run \code{de_test_neighbourhoods} for each comparison separately.}}
42 | 
43 | \item{min_n_cells_per_sample}{Positive integer specifying the minimum number of cells per replicate to be included in testing.
44 | Default \code{min_n_cells_per_sample = 3}.}
45 | 
46 | \item{min_count}{Positive integer, specifying \code{min.count} for gene selection (employes \code{\link[edgeR]{filterByExpr}}).
47 | Default \code{min_count = 3}.}
48 | 
49 | \item{run_separately}{A boolean parameter specifying whether the function is to be run as a part of \code{\link{de_test_neighbourhoods}} (FALSE) or as a stand-alone run (TRUE). Default \code{run_separately = TRUE}.}
50 | }
51 | \value{
52 | \code{data.frame} object containing miloDE results for the selected neighbourhood
53 | }
54 | \description{
55 | Tests single neighbourhood for DE; not intended to be used by itself (however allowed to), but rather as a part of \code{\link{de_test_neighbourhoods}}
56 | }
57 | \details{
58 | We employ edgeR testing (using \code{\link[edgeR]{glmQLFit}}) within the selected neighbourhood (using numeric \code{hood_id} which corresponds to which column of \code{nhoods(x)} to use).
59 | We allow user to submit the desired experimental design and incorporate various covariates, which is beneficial in the context of large cohort studies.
60 | }
61 | \examples{
62 | require(SingleCellExperiment)
63 | require(miloR)
64 | n_row = 500
65 | n_col = 100
66 | n_latent = 5
67 | sce = SingleCellExperiment(assays = list(counts =
68 | floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
69 | rownames(sce) = as.factor(1:n_row)
70 | colnames(sce) = c(1:n_col)
71 | sce$cell = colnames(sce)
72 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5))
73 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query")
74 | reducedDim(sce , "reduced_dim") =
75 | matrix(rnorm(n_col*n_latent), ncol=n_latent)
76 | sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim")
77 | nhoods_x = nhoods(sce)
78 | de_stat = de_test_single_neighbourhood(sce , nhoods_x = nhoods_x,
79 | hood_id = 1 , sample_id = "sample" ,
80 | design = ~type, covariates = c("type"))
81 | 
82 | }
83 | 


--------------------------------------------------------------------------------
/man/estimate_neighbourhood_sizes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimate_neighbourhood_sizes.R
 3 | \name{estimate_neighbourhood_sizes}
 4 | \alias{estimate_neighbourhood_sizes}
 5 | \title{estimate_neighbourhood_sizes}
 6 | \usage{
 7 | estimate_neighbourhood_sizes(
 8 |   x,
 9 |   reducedDim_name,
10 |   k_grid = seq(10, 100, 10),
11 |   order = 2,
12 |   prop = 0.1,
13 |   filtering = TRUE,
14 |   k_init = 50,
15 |   d = 30,
16 |   cluster_id = NULL,
17 |   plot_stat = TRUE,
18 |   verbose = TRUE
19 | )
20 | }
21 | \arguments{
22 | \item{x}{A \code{\linkS4class{SingleCellExperiment}} object.}
23 | 
24 | \item{reducedDim_name}{Defines the slot in \code{reducedDim(x)} to use as the embedding for graph construction.}
25 | 
26 | \item{k_grid}{Vector of positive integers, defines how many neighbours to use for the neighbourhood assignment.}
27 | 
28 | \item{order}{In \code{c(1,2)}, defines which order of neighbours to use. Default \code{order = 2}.}
29 | 
30 | \item{prop}{Numerical, between 0 and 1, defines which fraction of cells to use as neighbourhood centres. Default \code{prop = 0.2}.}
31 | 
32 | \item{filtering}{In \code{c(TRUE,FALSE)}, defines whether to filter neighbourhoods (reduces computing time downstream). Default \code{filtering = TRUE}.}
33 | 
34 | \item{k_init}{Positive integer, defines how many neighbours to use for identifying \sQuote{index} cells. Default \code{k_init = 50}.}
35 | 
36 | \item{d}{Positive integer, defines how many dimensions from \code{reducedDim(x)} to use. Default \code{d = 30}.}
37 | 
38 | \item{cluster_id}{Character specifying which field in \code{colData(x)} to use for 'localised' neighbourhood size estimation.
39 | This might be useful if dataset is rather big (which will result in an excessive running time).
40 | In case \code{cluster_id} is provided, we will calculate neighbourhood size distribution within individual clusters and aggregate results
41 | across clusters in order to speed up the process (note that it might result in slightly biased estimates).
42 | Default \code{cluster_id = NULL}, in which case neighbourhood sizes will be estimated for the whole dataset.}
43 | 
44 | \item{plot_stat}{Boolean specifying whether to plot the stat. Default \code{plot_stat = TRUE}.}
45 | 
46 | \item{verbose}{Boolean specifying whether to print intermediate output messages. Default \code{verbose = TRUE}.}
47 | }
48 | \value{
49 | \code{data.frame} object, in which each row corresponds to \code{k} and 5 columns correspond to min, q25, median, q75 and max of neighbourhood size distributions; also returns a boxplot
50 | }
51 | \description{
52 | For a grid of \code{k}, returns neighbourhood size distribution; this will help a user to select an appropriate \code{k}
53 | }
54 | \details{
55 | This function returns an estimated distribution of neighbourhood sizes for different \code{k} values
56 | (for the selected by user \code{order}; if you want to estimates for both 1st and 2nd \code{order}, run this twice with changing \code{order}).
57 | This can help to gauge whether the neighbourhood size distribution is appropriate for the selected \code{k}, since \code{\link{de_test_neighbourhoods}} takes a while to complete.
58 | 
59 | Note that this function also might take a while to complete on big datasets (> 70k cells), and in this case we provide an option to estimate neighbourhood
60 | sizes within annotated clusters (passed in \code{cluster_id}), which will be considerably faster, however, might result in slightly biased estimates.
61 | }
62 | \examples{
63 | require(SingleCellExperiment)
64 | n_row = 500
65 | n_col = 100
66 | n_latent = 5
67 | sce = SingleCellExperiment(assays = list(counts =
68 | floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
69 | rownames(sce) = as.factor(1:n_row)
70 | colnames(sce) = c(1:n_col)
71 | sce$cell = colnames(sce)
72 | reducedDim(sce , "reduced_dim") =
73 | matrix(rnorm(n_col*n_latent), ncol=n_latent)
74 | out = estimate_neighbourhood_sizes(sce, k_grid = c(5,10),
75 | reducedDim_name = "reduced_dim")
76 | 
77 | }
78 | 


--------------------------------------------------------------------------------
/man/filter_neighbourhoods.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/filter_neighbourhoods.R
 3 | \name{filter_neighbourhoods}
 4 | \alias{filter_neighbourhoods}
 5 | \title{filter_neighbourhoods}
 6 | \usage{
 7 | filter_neighbourhoods(x)
 8 | }
 9 | \arguments{
10 | \item{x}{A \code{\linkS4class{Milo}} object.}
11 | }
12 | \value{
13 | \code{Milo} object with refined neighbourhood assignment
14 | }
15 | \description{
16 | Filtering redundant neighbourhoods, using the greedy approach to set cover problem
17 | }
18 | \details{
19 | This function refines neighbourhood assignment and discards redundant neighbourhoods. This call is highly recommended since it reduces computational time greatly.
20 | It is called directly in \code{\link{assign_neighbourhoods}} if \code{filtering=TRUE}, and also can be called independently post hoc.
21 | 
22 | Under the hood, it adapts \code{\link[RcppGreedySetCover]{greedySetCover}} function, which implements greedy solution to NP-hard \sQuote{set cover} problem.
23 | In this solution, all neighbourhoods are sorted in the decreasing order of their size (i.e. number of cells),
24 | and neighbourhoods are iteratively added to the final neighbourhood set if they contain at least one cell not \sQuote{covered} by previously added neighbourhoods.
25 | }
26 | \examples{
27 | require(SingleCellExperiment)
28 | n_row = 500
29 | n_col = 100
30 | n_latent = 5
31 | sce = SingleCellExperiment(assays =
32 | list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
33 | rownames(sce) = as.factor(1:n_row)
34 | colnames(sce) = c(1:n_col)
35 | sce$cell = colnames(sce)
36 | reducedDim(sce , "reduced_dim") =
37 | matrix(rnorm(n_col*n_latent), ncol=n_latent)
38 | sce = assign_neighbourhoods(sce,
39 | reducedDim_name = "reduced_dim" , k = 10 , order = 1)
40 | sce = filter_neighbourhoods(sce)
41 | }
42 | 


--------------------------------------------------------------------------------
/man/plot_DE_gene_set.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{plot_DE_gene_set}
 4 | \alias{plot_DE_gene_set}
 5 | \title{plot_DE_gene_set}
 6 | \usage{
 7 | plot_DE_gene_set(
 8 |   x,
 9 |   de_stat,
10 |   genes,
11 |   logFC_correction = TRUE,
12 |   significance_by = "pval_corrected_across_nhoods",
13 |   alpha = 0.1,
14 |   layout = "UMAP",
15 |   subset_nhoods = NULL,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{x}{A \code{\linkS4class{Milo}} object.}
21 | 
22 | \item{de_stat}{miloDE stat (output of \code{\link{de_test_neighbourhoods}}).
23 | It does not have to be direct output of \code{\link{de_test_neighbourhoods}} i.e. it is allowed if \code{de_stat} has some additional assays/columns (e.g. calculated post hoc metrics on neighbourhoods/genes).}
24 | 
25 | \item{genes}{A character vector specifying genes.}
26 | 
27 | \item{logFC_correction}{Boolean specifying whether to perform logFC correction.
28 | If TRUE (default), logFC will be set to 0 if corrected p-value (defined by \code{significance_by}) < alpha.}
29 | 
30 | \item{significance_by}{Character specifying which column to use to decide on significance for logFC.
31 | Should be an in \code{assays(de_stat)} or in \code{colnames(de_stat)} (depends on \code{de_stat} format). Default \code{significance_by = "pval_corrected_across_nhoods"}.
32 | Expected to be in \code{c("pval", "pval_corrected_across_nhoods", "pval_corrected_across_genes")}.}
33 | 
34 | \item{alpha}{A scalar (between 0 and 1) specifying the significance level used. Default \code{alpha = 0.1}.}
35 | 
36 | \item{layout}{A character indicating the name of the \code{reducedDim} slot in the \code{\linkS4class{Milo}} object to use for layout. Default \code{layout = "UMAP"}.}
37 | 
38 | \item{subset_nhoods}{A vector (or NULL) specifying which neighbourhoods will be plotted.
39 | Default \code{subset_nhoods = NULL} meaning that no subsetting is performed.
40 | If not NULL, should be a numeric vector, which values lie within \code{c(1:ncol(nhoods(x)))}.}
41 | 
42 | \item{...}{Arguments to pass to \code{plot_milo_by_single_metric} (e.g. \code{size_range}, \code{node_stroke} etc)).}
43 | }
44 | \value{
45 | \sQuote{Neighbourhood} plot, in which each neighbourhood is coloured by average logFC across the selected genes; neighbourhood size corresponds to the fraction of genes that are DE in this neighbourhood (based on \code{pval_corrected_across_nhoods}).
46 | }
47 | \description{
48 | Returns \sQuote{neighbourhood} plot, in which colour of nodes correspond to average logFC across selected genes; size corresponds to how many genes show significant DE in the neighbourhood (using \code{significance_by})
49 | }
50 | \details{
51 | Note that for this plot, in untested gene-neighbourhood pairs, we set logFC to 0 and p-values to 1.
52 | }
53 | \examples{
54 | require(SingleCellExperiment)
55 | n_row = 500
56 | n_col = 100
57 | n_latent = 5
58 | sce = SingleCellExperiment(assays =
59 | list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
60 | rownames(sce) = as.factor(1:n_row)
61 | colnames(sce) = c(1:n_col)
62 | sce$cell = colnames(sce)
63 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5))
64 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query")
65 | reducedDim(sce , "reduced_dim") =
66 | matrix(rnorm(n_col*n_latent), ncol=n_latent)
67 | sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim")
68 | de_stat = de_test_neighbourhoods(sce ,
69 | design = ~type , covariates = c("type") )
70 | umaps = as.data.frame(matrix(rnorm(n_col*2), ncol=2))
71 | colnames(umaps) = c("V1" , "V2")
72 | reducedDim(sce , "UMAP") = umaps
73 | genes = c("1","2")
74 | p = plot_DE_gene_set(sce, de_stat , genes = c("1","2"))
75 | 
76 | }
77 | 


--------------------------------------------------------------------------------
/man/plot_DE_single_gene.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{plot_DE_single_gene}
 4 | \alias{plot_DE_single_gene}
 5 | \title{plot_DE_single_gene}
 6 | \usage{
 7 | plot_DE_single_gene(
 8 |   x,
 9 |   de_stat,
10 |   gene,
11 |   alpha = 0.1,
12 |   layout = "UMAP",
13 |   subset_nhoods = NULL,
14 |   set_na_to_0 = TRUE,
15 |   ...
16 | )
17 | }
18 | \arguments{
19 | \item{x}{A \code{\linkS4class{Milo}} object.}
20 | 
21 | \item{de_stat}{miloDE stat (output of \code{\link{de_test_neighbourhoods}}).
22 | It does not have to be direct output of \code{\link{de_test_neighbourhoods}} i.e. it is allowed if \code{de_stat} has some additional assays/columns (e.g. calculated post hoc metrics on neighbourhoods/genes).}
23 | 
24 | \item{gene}{A character specifying gene.}
25 | 
26 | \item{alpha}{A scalar (between 0 and 1) specifying the significance level used. Default \code{alpha = 0.1}.}
27 | 
28 | \item{layout}{A character indicating the name of the \code{reducedDim} slot in the \code{\linkS4class{Milo}} object to use for layout. Default \code{layout = "UMAP"}.}
29 | 
30 | \item{subset_nhoods}{A vector (or NULL) specifying which neighbourhoods will be plotted.
31 | Default \code{subset_nhoods = NULL} meaning that no subsetting is performed.
32 | If not NULL, should be a numeric vector, which values lie within \code{c(1:ncol(nhoods(x)))}.}
33 | 
34 | \item{set_na_to_0}{Boolean specifying whether in neighbourhoods in which gene is not tested, logFC would be set to 0 and p-values to 1.
35 | Default \code{set_na_to_0 = TRUE}, and in this case, they will be coloured in white (otherwise gray).}
36 | 
37 | \item{...}{Arguments to pass to \code{plot_milo_by_single_metric} (e.g. \code{size_range}, \code{node_stroke} etc)).}
38 | }
39 | \value{
40 | \sQuote{Neighbourhood} plot, in which each neighbourhood is coloured by logFC for the selected gene (if significant)
41 | }
42 | \description{
43 | Returns \sQuote{neighbourhood} plot; each node is coloured by logFC, if \code{pval_corrected_across_nhoods < alpha}.
44 | }
45 | \examples{
46 | require(SingleCellExperiment)
47 | n_row = 500
48 | n_col = 100
49 | n_latent = 5
50 | sce = SingleCellExperiment(assays =
51 | list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
52 | rownames(sce) = as.factor(1:n_row)
53 | colnames(sce) = c(1:n_col)
54 | sce$cell = colnames(sce)
55 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5))
56 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query")
57 | reducedDim(sce , "reduced_dim") =
58 | matrix(rnorm(n_col*n_latent), ncol=n_latent)
59 | sce = assign_neighbourhoods(sce,
60 | reducedDim_name = "reduced_dim")
61 | de_stat = de_test_neighbourhoods(sce ,
62 | design = ~type , covariates = c("type") )
63 | umaps = as.data.frame(matrix(rnorm(n_col*2), ncol=2))
64 | colnames(umaps) = c("V1" , "V2")
65 | reducedDim(sce , "UMAP") = umaps
66 | p = plot_DE_single_gene(sce, de_stat , gene = "1")
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/man/plot_beeswarm_gene_set.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{plot_beeswarm_gene_set}
 4 | \alias{plot_beeswarm_gene_set}
 5 | \title{plot_beeswarm_gene_set}
 6 | \usage{
 7 | plot_beeswarm_gene_set(
 8 |   de_stat,
 9 |   genes,
10 |   nhoodGroup,
11 |   levels = NULL,
12 |   logFC_correction = TRUE,
13 |   significance_by = "pval_corrected_across_nhoods",
14 |   alpha = 0.1,
15 |   subset_nhoods = NULL,
16 |   size = 2
17 | )
18 | }
19 | \arguments{
20 | \item{de_stat}{miloDE stat (output of \code{\link{de_test_neighbourhoods}}).
21 | It does not have to be direct output of \code{\link{de_test_neighbourhoods}} i.e. it is allowed if \code{de_stat} has some additional assays/columns (e.g. calculated post hoc metrics on neighbourhoods/genes).}
22 | 
23 | \item{genes}{A character vector specifying genes.}
24 | 
25 | \item{nhoodGroup}{A character specifying which column to use for neighbourhood grouping. Should be an assay in \code{de_stat} (or in \code{colnames(de_stat)} if \code{class(de_stat) == "data.frame"}).}
26 | 
27 | \item{levels}{NULL (default) or character vector specifying order for \code{nhoodGroup} values, in which they are to be plotted.}
28 | 
29 | \item{logFC_correction}{Boolean specifying whether to perform logFC correction.
30 | If TRUE (default), logFC will be set to 0 if corrected p-value (defined by \code{significance_by < alpha}).}
31 | 
32 | \item{significance_by}{Character specifying which column to use to decide on the significance.}
33 | 
34 | \item{alpha}{A numeric between 0 and 1 specifying the significance threshold. Default \code{alpha = 0.1}.}
35 | 
36 | \item{subset_nhoods}{A vector (or NULL) specifying which neighbourhoods will be plotted.
37 | Default \code{subset_nhoods = NULL} meaning that no subsetting is performed.}
38 | 
39 | \item{size}{A positive number specifying size of the dots. Default \code{size = 2}.}
40 | }
41 | \value{
42 | Beeswarm plot, broke down by provided groupings; each point is a neighbourhood, colour - average logFC across selected genes; x-axis - fraction of genes that are DE in the neighbourhood
43 | }
44 | \description{
45 | For the selected genes, returns a beeswarm plot, in which the aggregated DE statistics is binned by provided cell grouping (e.g. enriched cell types)
46 | }
47 | \details{
48 | Note that for this plot, in untested gene-neighbourhood pairs, we set logFC to 0 and p-values to 1.
49 | }
50 | \examples{
51 | 
52 | require(SingleCellExperiment)
53 | n_row = 500
54 | n_col = 100
55 | n_latent = 5
56 | sce = SingleCellExperiment(assays = list(counts =
57 | floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
58 | rownames(sce) = as.factor(1:n_row)
59 | colnames(sce) = c(1:n_col)
60 | sce$cell = colnames(sce)
61 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5))
62 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query")
63 | reducedDim(sce , "reduced_dim") =
64 | matrix(rnorm(n_col*n_latent), ncol=n_latent)
65 | sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim")
66 | de_stat = de_test_neighbourhoods(sce , design = ~type ,
67 | covariates = c("type") )
68 | de_stat$celltype = 1
69 | p = plot_beeswarm_gene_set(de_stat , genes = c("1","2") ,
70 | nhoodGroup = "celltype")
71 | 
72 | }
73 | 


--------------------------------------------------------------------------------
/man/plot_beeswarm_single_gene.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{plot_beeswarm_single_gene}
 4 | \alias{plot_beeswarm_single_gene}
 5 | \title{plot_beeswarm_single_gene}
 6 | \usage{
 7 | plot_beeswarm_single_gene(
 8 |   de_stat,
 9 |   gene,
10 |   nhoodGroup,
11 |   levels = NULL,
12 |   alpha = 0.1,
13 |   subset_nhoods = NULL,
14 |   size = 2
15 | )
16 | }
17 | \arguments{
18 | \item{de_stat}{miloDE stat (output of \code{\link{de_test_neighbourhoods}}).
19 | It does not have to be direct output of \code{\link{de_test_neighbourhoods}} i.e. it is allowed if \code{de_stat} has some additional assays/columns (e.g. calculated post hoc metrics on neighbourhoods/genes).}
20 | 
21 | \item{gene}{A character specifying the gene.}
22 | 
23 | \item{nhoodGroup}{A character specifying which values to use for neighbourhood grouping. Should be an assay in \code{de_stat} (or in \code{colnames(de_stat)} if \code{class(de_stat) == "data.frame"}).}
24 | 
25 | \item{levels}{NULL (default) or character vector specifying order for \code{nhoodGroup} values, in which they are to be plotted.
26 | If \code{levels = NULL}, default order will be supplied.}
27 | 
28 | \item{alpha}{A numeric between 0 and 1 specifying the significance threshold. All neighbourhoods that are defined as not significant, will coloured in gray. Default \code{alpha = 0.1}.}
29 | 
30 | \item{subset_nhoods}{A vector (or NULL) specifying which neighbourhoods will be plotted.
31 | Default \code{subset_nhoods = NULL} meaning that no subsetting is performed.}
32 | 
33 | \item{size}{A positive number specifying size of the dots. Default \code{size = 2}.}
34 | }
35 | \value{
36 | Beeswarm plot, broke down by cell types; each point is a neighbourhood, colours and x-axis correspond to logFC (points are coloured if significant, based on \code{pval_corrected_across_nhoods}; otherwise in grey)
37 | }
38 | \description{
39 | For the selected gene, returns a beeswarm plot, in which the DE statistics for the gene is binned by provided cell grouping (e.g. cell types);
40 | }
41 | \examples{
42 | 
43 | require(SingleCellExperiment)
44 | n_row = 500
45 | n_col = 100
46 | n_latent = 5
47 | sce = SingleCellExperiment(assays =
48 | list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
49 | rownames(sce) = as.factor(1:n_row)
50 | colnames(sce) = c(1:n_col)
51 | sce$cell = colnames(sce)
52 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5))
53 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query")
54 | reducedDim(sce , "reduced_dim") =
55 | matrix(rnorm(n_col*n_latent), ncol=n_latent)
56 | sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim")
57 | de_stat = de_test_neighbourhoods(sce ,
58 | design = ~type , covariates = c("type") )
59 | de_stat$celltype = 1
60 | p = plot_beeswarm_single_gene(de_stat ,
61 | gene = "1" , nhoodGroup = "celltype")
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/man/plot_milo_by_single_metric.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{plot_milo_by_single_metric}
 4 | \alias{plot_milo_by_single_metric}
 5 | \title{plot_milo_by_single_metric}
 6 | \usage{
 7 | plot_milo_by_single_metric(
 8 |   x,
 9 |   nhood_stat,
10 |   colour_by = "logFC",
11 |   significance_by = NULL,
12 |   order_by = NULL,
13 |   order_direction = FALSE,
14 |   size_by = NULL,
15 |   alpha = 0.1,
16 |   layout = "UMAP",
17 |   subset_nhoods = NULL,
18 |   size_range = c(1, 3),
19 |   node_stroke = 0.3,
20 |   edge_width = c(0.2, 0.5),
21 |   edge_weight.thresh = NULL
22 | )
23 | }
24 | \arguments{
25 | \item{x}{A \code{\linkS4class{Milo}} object.}
26 | 
27 | \item{nhood_stat}{\code{data.frame} object, containing columns \code{Nhood} (should correspond to neighbourhoods from \code{nhoodGraph(x))}.}
28 | 
29 | \item{colour_by}{A character specifying value used for neighbourhood colouring. Should be in \code{colnames(nhood_stat)}. Can be both numeric and categorical.}
30 | 
31 | \item{significance_by}{A character specifying which values to use for \sQuote{thresholding}: if values for this column exceed \code{alpha}, \code{colour_by} will be set to 0.
32 | Should be in \code{colnames(nhood_stat)}. Default \code{significance_by = NULL} and in this case we will not use no correction.
33 | Please ensure that its values are numeric.}
34 | 
35 | \item{order_by}{A character specifying which values to use to order neighbourhoods for plotting.
36 | Should be in \code{colnames(nhood_stat)} (or NULL). Default \code{order_by = NULL} and in this case we will order by \code{size_by} values.}
37 | 
38 | \item{order_direction}{Boolean specifying the direction of neighbourhood ordering. Default \code{order_direction = FALSE}.}
39 | 
40 | \item{size_by}{A character specifying which values to use for neighbourhood sizes.
41 | Should be in \code{colnames(nhood_stat)} (or NULL). Default \code{size_by = NULL} and in this case we will use neighbourhood size (i.e. number of cells in the neighbourhood).
42 | Please ensure that its values are numeric.}
43 | 
44 | \item{alpha}{A scalar (between 0 and 1) specifying the significance level used. Default \code{alpha = 0.1}.}
45 | 
46 | \item{layout}{A character indicating the name of the \code{reducedDim} slot in the \code{\linkS4class{Milo}} object to use for layout. Default \code{layout = "UMAP"}.}
47 | 
48 | \item{subset_nhoods}{A vector (or NULL) specifying which neighbourhoods will be plotted.
49 | Default \code{subset_nhoods = NULL} meaning that no subsetting is performed. If not NULL, should be a numeric vector, which values lie within \code{c(1:ncol(nhoods(x)))}.}
50 | 
51 | \item{size_range}{A numeric vector indicating the range (min and max) of node sizes to use for plotting (to avoid overplotting in the graph).
52 | Default \code{size_range = c(1,3)}.}
53 | 
54 | \item{node_stroke}{A numeric indicating the desired thickness of the border around each node. Default \code{node_stroke = 0.3}.}
55 | 
56 | \item{edge_width}{A numeric vector indicating the range (min and max) of edge widths to use for plotting. Default \code{edge_width = c(0.2,0.5)}.}
57 | 
58 | \item{edge_weight.thresh}{A numeric (or NULL) specifying a threshold for minimum cells in common (between neighbourhoods) required for an edge to be plotted.
59 | Default \code{edge_weight.thresh = NULL} meaning that no minimum threshold is set.}
60 | }
61 | \value{
62 | \sQuote{Neighbourhood} plot, in which each neighbourhood is coloured by the provided in \code{colour_by} column value
63 | }
64 | \description{
65 | Returns \sQuote{neighbourhood} plot; each node is coloured by \code{colour_by} column from \code{nhood_stat}, if \code{significance_by < alpha}
66 | }
67 | \examples{
68 | require(SingleCellExperiment)
69 | n_row = 500
70 | n_col = 100
71 | n_latent = 5
72 | sce = SingleCellExperiment(assays =
73 | list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
74 | rownames(sce) = as.factor(1:n_row)
75 | colnames(sce) = c(1:n_col)
76 | sce$cell = colnames(sce)
77 | sce$sample = floor(runif(n = n_col , min = 1 , max = 5))
78 | sce$type = ifelse(sce$sample \%in\% c(1,2) , "ref" , "query")
79 | reducedDim(sce , "reduced_dim") = matrix(rnorm(n_col*n_latent), ncol=n_latent)
80 | sce = assign_neighbourhoods(sce,
81 | reducedDim_name = "reduced_dim")
82 | de_stat = de_test_neighbourhoods(sce ,
83 | design = ~type , covariates = c("type") )
84 | de_stat = de_stat[de_stat$gene == "1", ]
85 | umaps = as.data.frame(matrix(rnorm(n_col*2), ncol=2))
86 | colnames(umaps) = c("V1" , "V2")
87 | reducedDim(sce , "UMAP") = umaps
88 | p = plot_milo_by_single_metric(sce, de_stat)
89 | 
90 | }
91 | 


--------------------------------------------------------------------------------
/man/rank_neighbourhoods_by_DE_magnitude.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rank_neighbourhoods_by_DE_magnitude.R
 3 | \name{rank_neighbourhoods_by_DE_magnitude}
 4 | \alias{rank_neighbourhoods_by_DE_magnitude}
 5 | \title{rank_neighbourhoods_by_DE_magnitude}
 6 | \usage{
 7 | rank_neighbourhoods_by_DE_magnitude(de_stat, pval.thresh = 0.1, z.thresh = -3)
 8 | }
 9 | \arguments{
10 | \item{de_stat}{Output of miloDE (\code{\link{de_test_neighbourhoods}}), either in \code{data.frame} or \code{SingleCellExperiment} format.}
11 | 
12 | \item{pval.thresh}{A scalar specifying which threshold to use for deciding on significance for gene being DE in a neighbourhood. Default \code{pval.thresh = 0.1}.}
13 | 
14 | \item{z.thresh}{A scalar specifying which threshold to use for deciding on which z-normalised p-values are going to be considered specifically DE. Default \code{z.thresh = -3}.}
15 | }
16 | \value{
17 | \code{data.frame}, with calculated number-DE-genes and number-specific-DE-genes for each neighbourhood
18 | }
19 | \description{
20 | Ranks neighbourhoods by the magnitude of DE: number of DE genes and number of \sQuote{specifically} DE genes
21 | }
22 | \details{
23 | To calculate number of DE genes per neighbourhood, we use \code{pval_corrected_across_genes}.
24 | Accordingly, for each neighbourhood we calculate how many genes has p-values lower than designated threshold.
25 | 
26 | To calculate number of \sQuote{specifically} DE genes, we first z-normalise \code{pval_corrected_across_nhoods} (for each gene) and then for each
27 | neighbourhood, calculate how many genes have z-normalised p-values lower than designated threshold.
28 | 
29 | \emph{Note that for this analysis we set NaN p-values (raw and corrected) to 1 - interpret accordingly.}
30 | }
31 | \examples{
32 | de_stat = expand.grid(gene = paste0("gene" , c(1:5)) , Nhood = c(1:10))
33 | de_stat$Nhood_center = paste0("nhood_" , de_stat$Nhood)
34 | de_stat$logFC = sample(seq(-2,2,1) , nrow(de_stat) , 1)
35 | de_stat$pval = sample(c(0,1),nrow(de_stat),1)
36 | de_stat$pval_corrected_across_genes = sample(c(0,1),nrow(de_stat),1)
37 | de_stat$pval_corrected_across_nhoods = sample(c(0,1),nrow(de_stat),1)
38 | de_stat$test_performed = TRUE
39 | out = rank_neighbourhoods_by_DE_magnitude(de_stat)
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/man/sce_mouseEmbryo.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sce_mouseEmbryo.R
 3 | \docType{data}
 4 | \name{sce_mouseEmbryo}
 5 | \alias{sce_mouseEmbryo}
 6 | \title{Chimeric mouse embryo, Tal1- (Pijuan-Sala et al., 2019)}
 7 | \format{
 8 | \code{\linkS4class{SingleCellExperiment}} object
 9 | }
10 | \source{
11 | MouseGastrulationData package
12 | }
13 | \usage{
14 | data(sce_mouseEmbryo)
15 | }
16 | \description{
17 | \code{SingleCellExperiment} object, containing counts
18 | (raw are pulled using \code{\link[MouseGastrulationData]{Tal1ChimeraData}}
19 | and log-normalised are estimated using \code{\link[scuttle]{logNormCounts}}) matrices
20 | for chimeric mouse embryos (both Tal1+ and Tal1-).
21 | }
22 | \details{
23 | Additionally, we subselected only 3 cell types (Endothelium , Blood progenitors 2, Neural crest)
24 | and 300 HVGs.
25 | 
26 | \code{colData(sce_mouseEmbryo)} contains information about replicate (\code{sample}) and Tal1 status (\code{tomato = TRUE} means Tal1-).
27 | }
28 | 


--------------------------------------------------------------------------------
/man/spatial_pval_adjustment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/spatial_pval_adjustment.R
 3 | \name{spatial_pval_adjustment}
 4 | \alias{spatial_pval_adjustment}
 5 | \title{spatial_pval_adjustment}
 6 | \usage{
 7 | spatial_pval_adjustment(nhoods_x, pvalues)
 8 | }
 9 | \arguments{
10 | \item{nhoods_x}{Should be extracted from x as \code{nhoods(x)}.}
11 | 
12 | \item{pvalues}{Vector of p-values.}
13 | }
14 | \value{
15 | Vector with \sQuote{spatially} (i.e. across neighbourhoods) adjusted p-values
16 | }
17 | \description{
18 | Performs p-values multiple testing correction across neighbourhoods, with accounting for the overlap
19 | }
20 | \details{
21 | This function is not intended to be run by itself, but can be useful if the user wants to perform \sQuote{spatially aware} multiple testing correction.
22 | 
23 | Under the hood it performs weighted version of BH correction, where weights are reciprocal to the local desnity of a neighbourhood.
24 | Accordingly, big and/or highly connected neighbourhoods will have lower weights.
25 | }
26 | \examples{
27 | require(SingleCellExperiment)
28 | require(miloR)
29 | n_row = 500
30 | n_col = 100
31 | n_latent = 5
32 | sce = SingleCellExperiment(assays = list(counts =
33 | floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4))
34 | rownames(sce) = as.factor(1:n_row)
35 | colnames(sce) = c(1:n_col)
36 | sce$cell = colnames(sce)
37 | reducedDim(sce , "reduced_dim") =
38 | matrix(rnorm(n_col*n_latent), ncol=n_latent)
39 | sce = assign_neighbourhoods(sce,
40 | reducedDim_name = "reduced_dim" , k = 10 , order = 1)
41 | nhoods_x = nhoods(sce)
42 | pvalues = runif(n = ncol(nhoods_x) , min = 0 , max = 1)
43 | out = spatial_pval_adjustment(nhoods_x, pvalues = pvalues)
44 | }
45 | 


--------------------------------------------------------------------------------
/miloDE.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 | 
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 | 


--------------------------------------------------------------------------------
/miloDE_cartoon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MarioniLab/miloDE/d253df8972a54459d63096028ae41afcb4a1063a/miloDE_cartoon.png


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(miloDE)
3 | 
4 | test_check("miloDE")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-assign_neighbourhoods.R:
--------------------------------------------------------------------------------
  1 | # context("Testing assign_neighbourhoods")
  2 | library(miloDE)
  3 | 
  4 | # load data
  5 | data("sce_mouseEmbryo", package = "miloDE")
  6 | 
  7 | 
  8 | # error msgs
  9 | test_that("Wrong input gives errors", {
 10 | 
 11 |   # sce should be of the right format
 12 |   expect_error(assign_neighbourhoods(x = 1 , k = 10, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 13 |                "x should be a SingleCellExperiment or Milo object.",
 14 |                fixed=TRUE
 15 |   )
 16 |   sce_test = sce_mouseEmbryo
 17 |   colnames(sce_test) = rep(1,1,ncol(sce_test))
 18 |   expect_error(assign_neighbourhoods(sce_test , k = 10, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 19 |                "If colnames(x) exist, they should be unique.",
 20 |                fixed=TRUE
 21 |   )
 22 | 
 23 | 
 24 |   # k should be positive integer
 25 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 0, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 26 |                "Check k - should be positive integer",
 27 |                fixed=TRUE
 28 |   )
 29 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = "1", prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 30 |                "Check k - should be positive integer",
 31 |                fixed=TRUE
 32 |   )
 33 | 
 34 |   # prop should be positive number between 0 and 1
 35 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = "0.2", order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 36 |                "Check prop - should be positive number between 0 and 1",
 37 |                fixed=TRUE
 38 |   )
 39 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 40 |                "Check prop - should be positive number between 0 and 1",
 41 |                fixed=TRUE
 42 |   )
 43 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = -1, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 44 |                "Check prop - should be positive number between 0 and 1",
 45 |                fixed=TRUE
 46 |   )
 47 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 48 |                "Check prop - should be positive number between 0 and 1",
 49 |                fixed=TRUE
 50 |   )
 51 | 
 52 | 
 53 |   # order should be 1 or 2
 54 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 0, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 55 |                "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)",
 56 |                fixed=TRUE
 57 |   )
 58 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 3, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 59 |                "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)",
 60 |                fixed=TRUE
 61 |   )
 62 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 1.5, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 63 |                "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)",
 64 |                fixed=TRUE
 65 |   )
 66 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = "x", filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 67 |                "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)",
 68 |                fixed=TRUE
 69 |   )
 70 | 
 71 |   # check filtering should be T or F
 72 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, filtering = 2, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 73 |                "Check filtering - should be either TRUE or FALSE",
 74 |                fixed=TRUE
 75 |   )
 76 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, filtering = "aa", reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 77 |                "Check filtering - should be either TRUE or FALSE",
 78 |                fixed=TRUE
 79 |   )
 80 | 
 81 |  # reducedDim should be character
 82 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, filtering = TRUE, reducedDim_name = 1, k_init = 50, d = 30),
 83 |                "Check reducedDim_name - should be character vector",
 84 |                fixed=TRUE
 85 |   )
 86 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = TRUE, k_init = 50, d = 30),
 87 |                "Check reducedDim_name - should be character vector",
 88 |                fixed=TRUE
 89 |   )
 90 | 
 91 | 
 92 |   # k_init - positive integer
 93 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = "50", d = 30),
 94 |                "Check k_init - should be positive integer",
 95 |                fixed=TRUE
 96 |   )
 97 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 0, d = 30),
 98 |                "Check k_init - should be positive integer",
 99 |                fixed=TRUE
100 |   )
101 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = -10, d = 30),
102 |                "Check k_init - should be positive integer",
103 |                fixed=TRUE
104 |   )
105 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 10.5, d = 30),
106 |                "Check k_init - should be positive integer",
107 |                fixed=TRUE
108 |   )
109 | 
110 | 
111 |   # d - positive integer
112 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = "30"),
113 |                "Check d - should be positive integer",
114 |                fixed=TRUE
115 |   )
116 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = 0),
117 |                "Check d - should be positive integer",
118 |                fixed=TRUE
119 |   )
120 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = -1),
121 |                "Check d - should be positive integer",
122 |                fixed=TRUE
123 |   )
124 |   expect_error(assign_neighbourhoods(sce_mouseEmbryo , k = 2, prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = 1.5),
125 |                "Check d - should be positive integer",
126 |                fixed=TRUE
127 |   )
128 | 
129 | 
130 |   # reduced dim should be in reducedDimNames
131 |   expect_error(assign_neighbourhoods(x = sce_mouseEmbryo , k = 10, prop = 0.2, k_init = 50, d = 30)
132 |   )
133 | 
134 |   expect_error(assign_neighbourhoods(x = sce_mouseEmbryo , k = 10, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca_corrected", k_init = 50, d = 30),
135 |                "reducedDim_name should be in reducedDimNames(x).",
136 |                fixed=TRUE
137 |   )
138 | 
139 | })
140 | 
141 | 
142 | 
143 | # return of the correct output
144 | test_that("Return is the correct class", {
145 |   require(miloR)
146 |   # right class
147 |   out = assign_neighbourhoods(x = sce_mouseEmbryo , k = 10, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
148 |   expect_s4_class(out, "Milo")
149 | 
150 |   # nhoods have data for all the cells
151 |   nhoods_out = nhoods(out)
152 |   expect_equal(nrow(nhoods_out) , ncol(out))
153 |   expect_identical(sort(rownames(nhoods_out)) , sort(colnames(out)))
154 | 
155 | })
156 | 
157 | 
158 | # n-hoods/hood-size ~ k, order, filtering
159 | test_that("Scaling of neighbourhood sizes and numbers with k, order, filtering", {
160 | 
161 |   # smaller k - more neighbourhoods
162 |   sce_1 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 10, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
163 |   sce_2 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 20, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
164 |   nhoods_1 = nhoods(sce_1)
165 |   nhoods_2 = nhoods(sce_2)
166 |   expect_gt(ncol(nhoods_1) , ncol(nhoods_2))
167 | 
168 |   # smaller k - more neighbourhoods
169 |   sce_1 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 25, prop = 0.2, order = 1, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
170 |   sce_2 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 75, prop = 0.2, order = 1, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
171 |   nhoods_1 = nhoods(sce_1)
172 |   nhoods_2 = nhoods(sce_2)
173 |   expect_gt(ncol(nhoods_1) , ncol(nhoods_2))
174 | 
175 | 
176 |   # order 1 - more neighbourhoods
177 |   sce_1 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 25, prop = 0.1, order = 1, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
178 |   sce_2 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 25, prop = 0.1, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
179 |   nhoods_1 = nhoods(sce_1)
180 |   nhoods_2 = nhoods(sce_2)
181 |   expect_gt(ncol(nhoods_1) , ncol(nhoods_2))
182 | 
183 | 
184 |   # order 1 - more neighbourhoods
185 |   sce_1 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 5, prop = 0.2, order = 1, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
186 |   sce_2 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 5, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
187 |   nhoods_1 = nhoods(sce_1)
188 |   nhoods_2 = nhoods(sce_2)
189 |   expect_gt(ncol(nhoods_1) , ncol(nhoods_2))
190 | 
191 | 
192 |   # filtering - less neighbourhoods
193 |   sce_1 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 5, prop = 0.2, order = 1, filtering = F, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
194 |   sce_2 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 5, prop = 0.2, order = 1, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
195 |   nhoods_1 = nhoods(sce_1)
196 |   nhoods_2 = nhoods(sce_2)
197 |   expect_gt(ncol(nhoods_1) , ncol(nhoods_2))
198 | 
199 | 
200 |   # filtering - less neighbourhoods
201 |   sce_1 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 20, prop = 0.1, order = 2, filtering = F, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
202 |   sce_2 = assign_neighbourhoods(x = sce_mouseEmbryo , k = 20, prop = 0.1, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
203 |   nhoods_1 = nhoods(sce_1)
204 |   nhoods_2 = nhoods(sce_2)
205 |   expect_gt(ncol(nhoods_1) , ncol(nhoods_2))
206 | 
207 | 
208 | })
209 | 
210 | 
211 | 
212 | 
213 | 
214 | 


--------------------------------------------------------------------------------
/tests/testthat/test-calc_AUC_per_neighbourhood.R:
--------------------------------------------------------------------------------
  1 | # context("Testing calc_AUC_per_neighbourhood")
  2 | library(miloDE)
  3 | 
  4 | # load data
  5 | data("sce_mouseEmbryo", package = "miloDE")
  6 | set.seed(32)
  7 | sce_mouseEmbryo_milo = assign_neighbourhoods(sce_mouseEmbryo, reducedDim_name = "pca.corrected")
  8 | require(miloR)
  9 | stat_auc = calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , condition_id = "tomato" , min_n_cells_per_sample = 1)
 10 | stat_auc_w_annotated_hoods =  annotateNhoods(sce_mouseEmbryo_milo , da.res = stat_auc , coldata_col = "celltype.mapped")
 11 | 
 12 | # generate toy data
 13 | require(SingleCellExperiment)
 14 | n_row = 50
 15 | n_col = 100
 16 | n_latent = 5
 17 | sce = SingleCellExperiment(list(counts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4  ,
 18 |                            logcounts = floor(matrix(rnorm(n_row*n_col), ncol=n_col)) + 4) )
 19 | rownames(sce) = as.factor(1:n_row)
 20 | colnames(sce) = c(1:n_col)
 21 | sce$cell = colnames(sce)
 22 | sce$sample = c(1:ncol(sce))
 23 | reducedDim(sce , "reduced_dim") = matrix(rnorm(n_col*n_latent), ncol=n_latent)
 24 | sce = assign_neighbourhoods(sce, reducedDim_name = "reduced_dim")
 25 | 
 26 | sce_1 = sce
 27 | sce_1$condition = ifelse(logcounts(sce_1[1,]) >= 5 , 1 , 0)
 28 | sce_2 = sce
 29 | sce_2$condition = sample(c(0,1) , ncol(sce_2) , replace = T)
 30 | 
 31 | sce_3 = sce
 32 | sce_3$condition = sapply(c(1:ncol(sce_3)) , function(i){
 33 |   if (logcounts(sce_3[1,i]) <= 2){
 34 |     return(0)
 35 |   } else if (logcounts(sce_3[1,i]) <= 4){
 36 |     return(1)
 37 |   } else {
 38 |     return(2)
 39 |   }
 40 | })
 41 | sce_4 = sce
 42 | sce_4$condition = sample(c(0,1,2) , ncol(sce_4) , replace = T)
 43 | 
 44 | 
 45 | 
 46 | 
 47 | # error msgs
 48 | test_that("Wrong input gives errors", {
 49 |   # x should be of the right format
 50 |   expect_error(calc_AUC_per_neighbourhood(x = 1 , condition_id = "test"),
 51 |                "x should be a SingleCellExperiment or Milo object.",
 52 |                fixed=TRUE
 53 |   )
 54 |   sce_test = sce_mouseEmbryo_milo
 55 |   colnames(sce_test) = rep(1,1,ncol(sce_test))
 56 |   expect_error(calc_AUC_per_neighbourhood(sce_test , condition_id = "test2"),
 57 |                "If colnames(x) exist, they should be unique.",
 58 |                fixed=TRUE
 59 |   )
 60 |   expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo , condition_id = "test3"),
 61 |                "x should be a Milo object. Please run `assign_neighbourhoods` first.",
 62 |                fixed=TRUE
 63 |   )
 64 | 
 65 | 
 66 |   # genes should be a subset of sce's rows
 67 |   expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , genes = c("1" , "11") , condition_id = "tomato"),
 68 |                "Some gene names are missing from x",
 69 |                fixed=TRUE
 70 |   )
 71 | 
 72 |   # sample_id should be of the right format
 73 |   expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , condition_id = "tomato" , sample_id = 1),
 74 |                "Check sample_id - should be character vector",
 75 |                fixed=TRUE
 76 |   )
 77 |   expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , condition_id = "tomato" , sample_id = TRUE),
 78 |                "Check sample_id - should be character vector",
 79 |                fixed=TRUE
 80 |   )
 81 |   expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , condition_id = "tomato" , sample_id = "test"),
 82 |                "'sample_id' should be in colData(x)",
 83 |                fixed=TRUE
 84 |   )
 85 | 
 86 | 
 87 |   # condition_id should be of the right format
 88 |   expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample" , condition_id = "sample" ),
 89 |                "'sample_id' and 'condition_id' can not be the same",
 90 |                fixed=TRUE
 91 |   )
 92 |   expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample" , condition_id = 1 ),
 93 |                "Check condition_id - should be character vector",
 94 |                fixed=TRUE
 95 |   )
 96 |   expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample" , condition_id = "test" ),
 97 |                "'condition_id' should be in colData(x)",
 98 |                 fixed=TRUE
 99 |   )
100 |   expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample" , condition_id = "stage" ),
101 |                "x should have at least two levels for tested conditions.",
102 |                fixed=TRUE
103 |   )
104 | 
105 | 
106 |   # conditions are of the right format
107 |   expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample" ,
108 |                                           condition_id = "tomato" , conditions = c(1,"2") ),
109 |                "All specified conditions should be present.",
110 |                fixed=TRUE
111 |   )
112 |   expect_error(calc_AUC_per_neighbourhood(sce_4 , sample_id = "sample" ,
113 |                                           condition_id = "condition" , conditions = NULL , min_n_cells_per_sample = 1),
114 |                "If conditions == NULL, there should be exactly two levels for tested conditions.",
115 |                fixed=TRUE
116 |   )
117 |   expect_error(calc_AUC_per_neighbourhood(sce_3 , sample_id = "sample" ,
118 |                                           condition_id = "condition" , conditions = NULL , min_n_cells_per_sample = 1),
119 |                "If conditions == NULL, there should be exactly two levels for tested conditions.",
120 |                fixed=TRUE
121 |   )
122 |   expect_error(calc_AUC_per_neighbourhood(sce_3 , sample_id = "sample" ,
123 |                                           condition_id = "condition" , conditions = c(0,1,2) , min_n_cells_per_sample = 1),
124 |                "Conditions should be a vector of 2 elements.",
125 |                fixed=TRUE
126 |   )
127 | 
128 | 
129 |   # min_n_cells_per_sample - positive integer
130 |   expect_error(calc_AUC_per_neighbourhood(x = sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", min_n_cells_per_sample = "aa"),
131 |                "Check min_n_cells_per_sample - should be positive integer",
132 |                fixed=TRUE
133 |   )
134 |   expect_error(calc_AUC_per_neighbourhood(x = sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", min_n_cells_per_sample = 1.5),
135 |                "Check min_n_cells_per_sample - should be positive integer",
136 |                fixed=TRUE
137 |   )
138 |   expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", min_n_cells_per_sample = 0),
139 |                "Check min_n_cells_per_sample - should be positive integer",
140 |                fixed=TRUE
141 |   )
142 |   expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", min_n_cells_per_sample = -100),
143 |                "Check min_n_cells_per_sample - should be positive integer",
144 |                fixed=TRUE
145 |   )
146 | 
147 |   # n_threads - positive integer
148 |   expect_error(calc_AUC_per_neighbourhood(x = sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", n_threads = "aa"),
149 |                "Check n_threads - should be positive integer",
150 |                fixed=TRUE
151 |   )
152 |   expect_error(calc_AUC_per_neighbourhood(x = sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", n_threads = 1.5),
153 |                "Check n_threads - should be positive integer",
154 |                fixed=TRUE
155 |   )
156 |   expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", n_threads = 0),
157 |                "Check n_threads - should be positive integer",
158 |                fixed=TRUE
159 |   )
160 |   expect_error(calc_AUC_per_neighbourhood(sce_mouseEmbryo_milo , sample_id = "sample", condition_id = "tomato", n_threads = -100),
161 |                "Check n_threads - should be positive integer",
162 |                fixed=TRUE
163 |   )
164 | })
165 | 
166 | 
167 | 
168 | 
169 | # right input returns the expected format (in default output)
170 | test_that("Return is the correct class", {
171 |   # right class
172 |   expect_s3_class(stat_auc , "data.frame")
173 | 
174 |   out = calc_AUC_per_neighbourhood(sce_1 , condition_id = "condition" , min_n_cells_per_sample = 1)
175 |   expect_s3_class(out , "data.frame")
176 |   out = calc_AUC_per_neighbourhood(sce_2 , condition_id = "condition" , conditions = c(0,1))
177 |   expect_s3_class(out , "data.frame")
178 |   out = calc_AUC_per_neighbourhood(sce_3 , condition_id = "condition" , conditions = c(0,1))
179 |   expect_s3_class(out , "data.frame")
180 |   out = calc_AUC_per_neighbourhood(sce_4 , condition_id = "condition" , conditions = c(2,1) , min_n_cells_per_sample = 1)
181 |   expect_s3_class(out , "data.frame")
182 | 
183 | 
184 |   # right dimensions
185 |   expect_equal(ncol(nhoods(sce_mouseEmbryo_milo)) , nrow(stat_auc))
186 |   expect_equal(4, ncol(stat_auc))
187 |   cols = c("Nhood" , "Nhood_center" , "auc" , "auc_calculated")
188 |   expect_identical(cols , colnames(stat_auc))
189 | 
190 | })
191 | 
192 | 
193 | 
194 | # check that AUCs are approx of what we expect
195 | ## real mouse data
196 | test_that("AUCs make sense for mouse", {
197 | 
198 |   # AUCs for BPs are either NaNs or > 0.5
199 |   aucs = stat_auc_w_annotated_hoods$auc[stat_auc_w_annotated_hoods$celltype.mapped == "Blood progenitors 2"]
200 |   expect_equal(0, length(which(aucs < 0.5)))
201 | 
202 |   # AUCs for Endo are all > 0.5 (and not nan)
203 |   aucs = stat_auc_w_annotated_hoods$auc[stat_auc_w_annotated_hoods$celltype.mapped == "Endothelium"]
204 |   expect_equal(0, sum(is.na(aucs)))
205 |   expect_equal(length(aucs), sum(aucs > 0.5))
206 | })
207 | 
208 | 
209 | ## simulations - 2 conditions
210 | test_that("AUCs make sense for sim1", {
211 |   stat_1 = calc_AUC_per_neighbourhood(sce_1 , condition_id = "condition" , min_n_cells_per_sample = 1)
212 |   stat_1_gene_1 = calc_AUC_per_neighbourhood(sce_1 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1)
213 |   stat_1_min_cells_3 = calc_AUC_per_neighbourhood(sce_1 , condition_id = "condition" , min_n_cells_per_sample = 3)
214 | 
215 |   stat_2 = calc_AUC_per_neighbourhood(sce_2 , condition_id = "condition" , min_n_cells_per_sample = 1)
216 |   stat_2_gene_1 = calc_AUC_per_neighbourhood(sce_2 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1)
217 |   stat_2_min_cells_3 = calc_AUC_per_neighbourhood(sce_2 , condition_id = "condition" , min_n_cells_per_sample = 3)
218 | 
219 |   expect_gt(stat_1$auc , stat_2$auc)
220 |   expect_gt(stat_1$auc , 0.5)
221 |   expect_gt(stat_1_gene_1$auc , stat_1$auc)
222 |   expect_true(is.nan(stat_1_min_cells_3$auc))
223 |   expect_true(is.nan(stat_2_min_cells_3$auc))
224 |   expect_true(!is.nan(stat_2$auc))
225 |   expect_true(!is.nan(stat_2_gene_1$auc))
226 | })
227 | 
228 | 
229 | ## simulations - 3 conditions
230 | test_that("AUCs make sense for sim2", {
231 | 
232 |   stat_3_12 = calc_AUC_per_neighbourhood(sce_3 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1 , conditions = c(1,2))
233 |   stat_3_01 = calc_AUC_per_neighbourhood(sce_3 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1 , conditions = c(1,0))
234 |   stat_3_20 = calc_AUC_per_neighbourhood(sce_3 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1 , conditions = c(0,2))
235 | 
236 |   stat_4_12 = calc_AUC_per_neighbourhood(sce_4 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1 , conditions = c(1,2))
237 |   stat_4_01 = calc_AUC_per_neighbourhood(sce_4 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1 , conditions = c(1,0))
238 |   stat_4_20 = calc_AUC_per_neighbourhood(sce_4 , genes = c("1" , "2") , condition_id = "condition" , min_n_cells_per_sample = 1 , conditions = c(0,2))
239 | 
240 |   expect_gte(stat_3_20$auc , stat_3_12$auc)
241 |   expect_gte(stat_3_20$auc , stat_3_01$auc)
242 | 
243 |   expect_gt(stat_3_12$auc , stat_4_12$auc)
244 |   expect_gt(stat_3_01$auc , stat_4_20$auc)
245 |   expect_gt(stat_3_20$auc , stat_4_01$auc)
246 | 
247 | })
248 | 
249 | 
250 | 
251 | 
252 | 
253 | 
254 | 


--------------------------------------------------------------------------------
/tests/testthat/test-convert_de_stat.R:
--------------------------------------------------------------------------------
  1 | # context("Testing convert_de_stat")
  2 | library(miloDE)
  3 | library(miloR)
  4 | library(SingleCellExperiment)
  5 | 
  6 | # load data
  7 | data("sce_mouseEmbryo", package = "miloDE")
  8 | set.seed(32)
  9 | sce = assign_neighbourhoods(sce_mouseEmbryo , k = 25, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
 10 | meta_sce = as.data.frame(colData(sce))
 11 | de_stat_sce = de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "SCE")
 12 | de_stat_df = de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"),  output_type = "data.frame")
 13 | 
 14 | 
 15 | # wrong inputs return error
 16 | # error msgs
 17 | test_that("Wrong input gives errors", {
 18 | 
 19 |   # de_stat should be of the right format
 20 |   expect_error(convert_de_stat(de_stat = 1),
 21 |                "de_stat should be either data.frame or SingleCellExperiment object.\n
 22 |          To get valid de_stat object, please run 'de_test_neighbourhoods.R'",
 23 |                fixed=TRUE
 24 |   )
 25 |   expect_error(convert_de_stat(de_stat = de_stat_df , assay_names = c("logFC") , coldata_names = c("logFC")),
 26 |                "assay_names and coldata_names can not overlap",
 27 |                fixed=TRUE
 28 |   )
 29 |   expect_error(convert_de_stat(de_stat = de_stat_df , assay_names = c("test")),
 30 |                "colnames(de_stat) missing some of the assay_names or coldata_names.",
 31 |                fixed=TRUE
 32 |   )
 33 |   expect_error(convert_de_stat(de_stat = de_stat_df , coldata_names = c("test")),
 34 |                "colnames(de_stat) missing some of the assay_names or coldata_names.",
 35 |                fixed=TRUE
 36 |   )
 37 |   expect_error(convert_de_stat(de_stat = de_stat_sce , coldata_names = c("test")),
 38 |                "de_stat missing some of the coldata_names",
 39 |                fixed=TRUE
 40 |   )
 41 |   expect_error(convert_de_stat(de_stat = de_stat_sce , assay_names = c("test")),
 42 |                "de_stat missing some of the required assays.",
 43 |                fixed=TRUE
 44 |   )
 45 | })
 46 | 
 47 | 
 48 | 
 49 | # right input returns the expected format (in default output)
 50 | test_that("Return is the correct class", {
 51 |   # right class
 52 |   expect_s3_class(convert_de_stat(de_stat_sce) , "data.frame")
 53 |   expect_s4_class(convert_de_stat(de_stat_df) , "SingleCellExperiment")
 54 | 
 55 |   # right assay names, sce to df
 56 |   de_stat_test = convert_de_stat(de_stat_sce)
 57 |   out = mean(c("logFC" , "pval" , "pval_corrected_across_genes" , "pval_corrected_across_nhoods" ,
 58 |                "Nhood" , "Nhood_center" , "test_performed") %in% colnames(de_stat_test))
 59 |   expect_equal(1 , out)
 60 | 
 61 |   n_genes = length(unique(de_stat_test$gene))
 62 |   expect_equal(n_genes , nrow(de_stat_sce))
 63 |   n_hoods = length(unique(de_stat_test$Nhood))
 64 |   expect_equal(n_hoods , ncol(de_stat_sce))
 65 | 
 66 | 
 67 |   # right assay names, df to sce
 68 |   de_stat_test = convert_de_stat(de_stat_df)
 69 |   n_genes = length(unique(de_stat_df$gene))
 70 |   expect_equal(n_genes , nrow(de_stat_test))
 71 |   n_hoods = length(unique(de_stat_df$Nhood))
 72 |   expect_equal(n_hoods , ncol(de_stat_test))
 73 | 
 74 | })
 75 | 
 76 | 
 77 | ## expect identical output if conversion is done post-hoc
 78 | test_that("Conversion ad hoc is equal to conversion post hoc", {
 79 | 
 80 |   expect_identical(de_stat_df , convert_de_stat(de_stat_sce))
 81 | 
 82 |   de_stat_sce_2 = convert_de_stat(de_stat_df)
 83 |   de_stat_sce = de_stat_sce[order(rownames(de_stat_sce)) , ]
 84 |   de_stat_sce_2 = de_stat_sce_2[order(rownames(de_stat_sce_2)) , ]
 85 | 
 86 |   expect_identical(assay(de_stat_sce, "logFC") , assay(de_stat_sce_2 , "logFC"))
 87 |   expect_identical(assay(de_stat_sce, "pval") , assay(de_stat_sce_2 , "pval"))
 88 |   expect_identical(assay(de_stat_sce, "pval_corrected_across_genes") , assay(de_stat_sce_2, "pval_corrected_across_genes"))
 89 |   expect_identical(assay(de_stat_sce, "pval_corrected_across_nhoods") , assay(de_stat_sce_2 , "pval_corrected_across_nhoods"))
 90 |   expect_identical(as.data.frame(colData(de_stat_sce)) , as.data.frame(colData(de_stat_sce_2)))
 91 | 
 92 | })
 93 | 
 94 | 
 95 | ## expect conversion of right variables (coldata and assays) to the right format
 96 | test_that("Conversion of additional metadata", {
 97 | 
 98 |   de_stat_df_test = de_stat_df
 99 |   de_stat_df_test$celltype = 0
100 |   de_stat_df_test$celltype[de_stat_df_test$Nhood %in% c(1,2,5)] = 1
101 | 
102 |   de_stat_sce_test = convert_de_stat(de_stat_df_test , coldata_names = "celltype")
103 |   meta_test = as.data.frame(colData(de_stat_sce_test))
104 | 
105 |   out = rep(0,1,ncol(de_stat_sce_test))
106 |   out[c(1,2,5)] = 1
107 |   expect_identical(meta_test$celltype , out)
108 | 
109 | })
110 | 
111 | 
112 | ## expect error if coldata is not legit
113 | test_that("Conversion of additional metadata legit only if metadata is legit", {
114 | 
115 |   de_stat_df_test = de_stat_df
116 |   de_stat_df_test$celltype = 0
117 |   unq_genes = unique(de_stat_df_test$gene)
118 |   de_stat_df_test$celltype[de_stat_df_test$gene == unq_genes[1]] = 1
119 | 
120 |   expect_error(convert_de_stat(de_stat_df_test , coldata_names = "celltype"))
121 |   expect_error(convert_de_stat(de_stat_df_test , assay_names = "celltype") , NA)
122 | 
123 |   de_stat_df_test = de_stat_sce
124 |   assay(de_stat_df_test , "logFC_2") = assay(de_stat_sce , "logFC")
125 |   out_1 = convert_de_stat(de_stat_df_test)
126 |   out_2 = convert_de_stat(de_stat_df_test , assay_names = "logFC_2")
127 | 
128 |   expect_equal(FALSE , "logFC_2" %in% colnames(out_1))
129 |   expect_equal(TRUE , "logFC_2" %in% colnames(out_2))
130 | })
131 | 
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/tests/testthat/test-de_test_neighbourhoods.R:
--------------------------------------------------------------------------------
  1 | # context("Testing de_test_neighbourhoods")
  2 | library(miloDE)
  3 | library(miloR)
  4 | library(SingleCellExperiment)
  5 | 
  6 | # load data
  7 | data("sce_mouseEmbryo", package = "miloDE")
  8 | set.seed(32)
  9 | sce = assign_neighbourhoods(sce_mouseEmbryo , k = 25, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
 10 | meta_sce = as.data.frame(colData(sce))
 11 | de_stat_sce = de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "SCE")
 12 | de_stat_df = de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"),  output_type = "data.frame")
 13 | 
 14 | # wrong inputs return error
 15 | # error msgs
 16 | test_that("Wrong input gives errors", {
 17 | 
 18 |   # x should be of the right format
 19 |   expect_error(de_test_neighbourhoods(x = 1 , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "SCE"),
 20 |                "x should be a SingleCellExperiment or Milo object.",
 21 |                fixed=TRUE
 22 |   )
 23 |   sce_test = sce_mouseEmbryo
 24 |   colnames(sce_test) = rep(1,1,ncol(sce_test))
 25 |   expect_error(de_test_neighbourhoods(x = sce_test , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "SCE"),
 26 |                "If colnames(x) exist, they should be unique.",
 27 |                fixed=TRUE
 28 |   )
 29 | 
 30 |   # sample_id should be character
 31 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = 11, design = ~tomato, covariates = c("tomato"),  output_type = "SCE"),
 32 |                "Check sample_id - should be character vector",
 33 |                fixed=TRUE
 34 |   )
 35 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = TRUE, design = ~tomato, covariates = c("tomato"),  output_type = "SCE"),
 36 |                "Check sample_id - should be character vector",
 37 |                fixed=TRUE
 38 |   )
 39 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = sce, design = ~tomato, covariates = c("tomato"),  output_type = "SCE"),
 40 |                "Check sample_id - should be character vector",
 41 |                fixed=TRUE
 42 |   )
 43 | 
 44 |   # design should be formula
 45 |   expect_error(de_test_neighbourhoods(sce , sample_id = "sample", design = "~tomato", covariates = c("tomato"),  output_type = "SCE"),
 46 |                "Check design - should be formula object",
 47 |                fixed=TRUE
 48 |   )
 49 |   expect_error(de_test_neighbourhoods(sce , sample_id = "sample", design = 1, covariates = c("tomato"),  output_type = "SCE"),
 50 |                "Check design - should be formula object",
 51 |                fixed=TRUE
 52 |   )
 53 | 
 54 |   # covariates should be vector
 55 |   expect_error(de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = 1,  output_type = "SCE"),
 56 |                "Check covariates - should be character vector",
 57 |                fixed=TRUE
 58 |   )
 59 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato,  output_type = "SCE"),
 60 |                fixed=TRUE
 61 |   )
 62 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = NULL, output_type = "SCE"),
 63 |                "Check covariates - should be character vector",
 64 |                fixed=TRUE
 65 |   )
 66 | 
 67 |   # contrasts should be NULL or character vector; if character - length 1
 68 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), contrasts = c(1,2), output_type = "SCE"),
 69 |                "Check contrasts - should be NULL or character vector",
 70 |                fixed=TRUE
 71 |   )
 72 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), contrasts = c("1","2"), output_type = "SCE"),
 73 |                "At the moment we only support one comparison - contrasts should be of length 1. If you wish to perform several comparisons, please run separately for each of them.",
 74 |                fixed=TRUE
 75 |   )
 76 | 
 77 |   # min_n_cells_per_sample - positive integer
 78 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), min_n_cells_per_sample = "aa",  output_type = "SCE"),
 79 |                "Check min_n_cells_per_sample - should be positive integer",
 80 |                fixed=TRUE
 81 |   )
 82 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), min_n_cells_per_sample = 1.5,  output_type = "SCE"),
 83 |                "Check min_n_cells_per_sample - should be positive integer",
 84 |                fixed=TRUE
 85 |   )
 86 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), min_n_cells_per_sample = 0,  output_type = "SCE"),
 87 |                "Check min_n_cells_per_sample - should be positive integer",
 88 |                fixed=TRUE
 89 |   )
 90 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), min_n_cells_per_sample = -100,  output_type = "SCE"),
 91 |                "Check min_n_cells_per_sample - should be positive integer",
 92 |                fixed=TRUE
 93 |   )
 94 | 
 95 | 
 96 |   # min_count - non negative
 97 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), min_count = "aa",  output_type = "SCE"),
 98 |                "Check min_count - should be non negative number",
 99 |                fixed=TRUE
100 |   )
101 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), min_count = -100,  output_type = "SCE"),
102 |                "Check min_count - should be non negative number",
103 |                fixed=TRUE
104 |   )
105 | 
106 | 
107 |   # output_type - either data.frame or SCE
108 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = 1 ),
109 |                "Check output_type - should be either 'data.frame' or 'SCE'",
110 |                fixed=TRUE
111 |   )
112 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = TRUE ),
113 |                "Check output_type - should be either 'data.frame' or 'SCE'",
114 |                fixed=TRUE
115 |   )
116 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = 'dataframe' ),
117 |                "Check output_type - should be either 'data.frame' or 'SCE'",
118 |                fixed=TRUE
119 |   )
120 | 
121 |   # plot_summary_stat - either TRUE or FALSE
122 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), plot_summary_stat = -1),
123 |                "Check plot_summary_stat - should be either TRUE or FALSE",
124 |                fixed=TRUE
125 |   )
126 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), plot_summary_stat = 10 ),
127 |                "Check plot_summary_stat - should be either TRUE or FALSE",
128 |                fixed=TRUE
129 |   )
130 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), plot_summary_stat = "2" ),
131 |                "Check plot_summary_stat - should be either TRUE or FALSE",
132 |                fixed=TRUE
133 |   )
134 | 
135 | 
136 |   # sample_id - should be in colData x
137 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample1", design = ~tomato, covariates = c("tomato")),
138 |                "'sample_id' should be in colData(x)",
139 |                fixed=TRUE
140 |   )
141 | 
142 | 
143 |   # check covariates in coldata
144 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~sex+sample1+tomato, covariates = c("tomato" , "sex" , "sample1")),
145 |                "All covariates should be colnames of colData(x).",
146 |                fixed=TRUE
147 |   )
148 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample",design = ~sex+stage+tomato, covariates = c("tomato" , "sex" , "stage")),
149 |                "All covariates should have more than 1 contrast.",
150 |                fixed=TRUE
151 |   )
152 | 
153 | 
154 |   # if plot_stat = T, reducedDim should be in sce
155 |   expect_error(de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), plot_summary_stat = T , layout = "pca"),
156 |                "reducedDim_name should be in reducedDimNames(x).",
157 |                fixed=TRUE
158 |   )
159 | 
160 |   # check subset_hoods
161 |   subset_hoods = c(0,3,20)
162 |   expect_error(de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), plot_summary_stat = F , subset_nhoods = c(0,3,20)),
163 |                "If 'subset_nhoods' is numeric vector, it should lie within c(1:ncol(nhoods(x))).",
164 |                fixed=TRUE
165 |   )
166 | 
167 |   # design and covariates should match
168 |   expect_error(de_test_neighbourhoods(x = sce , sample_id = "sample", design = ~stage.mapped*tomato, covariates = c("tomato"), plot_summary_stat = F ),
169 |                "Some of the design's arguments are not in the covariate vector.",
170 |                fixed=TRUE
171 |   )
172 | 
173 | })
174 | 
175 | 
176 | 
177 | # right input returns the expected format (in default output)
178 | test_that("Return is the correct class", {
179 |   # right class
180 |   expect_s4_class(de_stat_sce , "SingleCellExperiment")
181 |   expect_s3_class(de_stat_df , "data.frame")
182 | 
183 |   cols_df = c("Nhood" , "gene" , "logFC" , "pval" , "pval_corrected_across_genes" , "pval_corrected_across_nhoods" , "Nhood_center" , "test_performed" )
184 |   expect_identical(cols_df , colnames(de_stat_df))
185 |   expect_identical(cols_df , colnames(de_stat_df))
186 | 
187 |   cols_meta_sce = c("Nhood" , "Nhood_center" , "test_performed" )
188 |   expect_identical(cols_meta_sce , colnames(colData(de_stat_sce)))
189 |   expect_identical(cols_meta_sce , colnames(colData(de_stat_sce)))
190 | 
191 |   cols_assay_sce = c("logFC" , "pval" , "pval_corrected_across_genes", "pval_corrected_across_nhoods" )
192 |   expect_identical(cols_assay_sce , assayNames(de_stat_sce))
193 |   expect_identical(cols_assay_sce , assayNames(de_stat_sce))
194 | 
195 | })
196 | 
197 | 
198 | 
199 | # subset hoods
200 | ## returns right hoods/colnames
201 | test_that("Subset hoods returns right nhoods", {
202 | 
203 |   de_stat = de_test_neighbourhoods(sce , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "SCE" , subset_nhoods = c(1,8,4,10))
204 |   expect_identical(colnames(de_stat) , c("1" , "4" , "8" , "10"))
205 |   meta = as.data.frame(colData(de_stat))
206 |   expect_equal(meta$Nhood , c(1,4,8,10))
207 | 
208 |   de_stat = convert_de_stat(de_stat)
209 |   expect_equal(unique(de_stat$Nhood) , c(1,4,8,10))
210 | 
211 | })
212 | 
213 | ## same fdr,pval across genes but not spatfdr
214 | test_that("Subset hoods does not change FDR", {
215 | 
216 |   de_stat_subset = de_test_neighbourhoods(x = sce, sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "data.frame" , subset_nhoods = c(1,8,4,10))
217 |   de_stat_subset = de_stat_subset[, c("Nhood" , "gene" , "pval" , "pval_corrected_across_genes" , "pval_corrected_across_nhoods")]
218 |   de_stat_subset = de_stat_subset[!is.na(de_stat_subset$pval) , ]
219 |   colnames(de_stat_subset) = c("Nhood" , "gene" , "pval_1" , "pval_corrected_across_genes_1" , "pval_corrected_across_nhoods_1")
220 | 
221 |   de_stat_full = de_stat_df[de_stat_df$Nhood %in% c(10,4,8,1) , ]
222 |   de_stat_full = de_stat_full[, c("Nhood" , "gene" , "pval" , "pval_corrected_across_genes" , "pval_corrected_across_nhoods")]
223 |   de_stat_full = de_stat_full[!is.na(de_stat_full$pval) , ]
224 |   colnames(de_stat_full) = c("Nhood" , "gene" , "pval_2" , "pval_corrected_across_genes_2" , "pval_corrected_across_nhoods_2")
225 | 
226 |   de_stat = merge(de_stat_subset , de_stat_full, by = c("gene" , "Nhood"))
227 | 
228 |   expect_identical(de_stat$pval_1 , de_stat$pval_2)
229 |   expect_identical(de_stat$pval_corrected_across_genes_1 , de_stat$pval_corrected_across_genes_2)
230 |   expect_false(isTRUE(all.equal(de_stat$pval_corrected_across_nhoods_1 , de_stat$pval_corrected_across_nhoods_2)))
231 | 
232 | })
233 | 
234 | 
235 | # min_n_cells_per_sample
236 | ## bigger min_n_cells_per_sample -- less hoods are tested
237 | test_that("Bigger min_n_cells_per_sample -- less hoods are tested", {
238 |   de_stat_1 = de_test_neighbourhoods(sce , design = ~tomato, covariates = c("tomato"), output_type = "SCE" , min_n_cells_per_sample = 1)
239 |   out_1 = as.data.frame(colData(de_stat_1))
240 |   out_1 = sum(out_1$test_performed)
241 |   de_stat_2 = de_test_neighbourhoods(sce , design = ~tomato, covariates = c("tomato"), output_type = "SCE" , min_n_cells_per_sample = 100)
242 |   out_2 = as.data.frame(colData(de_stat_2))
243 |   out_2 = sum(out_2$test_performed)
244 |   expect_lt(out_2 , out_1)
245 | })
246 | 
247 | 
248 | 
249 | # min_count
250 | ## higher min_count -- less genes to be tested (in hood, not none)
251 | test_that("Higher min_count -- less genes are tested", {
252 |   de_stat_sce_2 = de_test_neighbourhoods(sce , design = ~tomato, covariates = c("tomato"), output_type = "SCE" , min_count = 50)
253 |   expect_gt(nrow(de_stat_sce) , nrow(de_stat_sce_2))
254 | })
255 | 
256 | 
257 | 
258 | # covs
259 | test_that("Covariate check - wrong covariate matrix will give NULL", {
260 |   de_stat = de_test_neighbourhoods(sce , design = ~sex+tomato, covariates = c("tomato" , "sex"), output_type = "SCE" , min_count = 3)
261 |   expect_null(de_stat)
262 | 
263 |   de_stat = de_test_neighbourhoods(sce , design = ~sex+toy_cov_1+tomato, covariates = c("tomato","sex","toy_cov_1"), output_type = "SCE" , min_count = 3)
264 |   expect_null(de_stat)
265 | 
266 |   de_stat = de_test_neighbourhoods(sce , design = ~toy_cov_1+tomato, covariates = c("tomato","toy_cov_1"), output_type = "SCE" , min_count = 3 )
267 |   meta = as.data.frame(colData(de_stat))
268 |   expect_gt(sum(meta$test_performed) , 0)
269 | })
270 | 
271 | # contrast check
272 | test_that("Contrasts check -- has to be formula colnames of model matrix", {
273 |   expect_error(de_test_neighbourhoods(sce , design = ~0+tomato, covariates = c("tomato") , contrasts = c("tomatoT-tomatoF")),
274 |                "contrasts are not right. All variables in the formula should be the colnames from model matrix: tomatoFALSE, tomatoTRUE",
275 |                fixed=TRUE
276 |   )
277 |   expect_error(de_test_neighbourhoods(sce , design = ~0+tomato, covariates = c("tomato") , contrasts = c("tomatoTRUE-tomatoFALSE")),
278 |                NA
279 |   )
280 |   expect_error(de_test_neighbourhoods(sce , design = ~0+tomato, covariates = c("tomato") ,
281 |                                       contrasts = c("tomatoTRUE-tomatoFALSE" , "tomatoTRUE+tomatoFALSE")),
282 |                "At the moment we only support one comparison - contrasts should be of length 1. If you wish to perform several comparisons, please run separately for each of them.",
283 |                fixed=TRUE
284 |   )
285 |   # covariate as last model matrice's col (NULL) give same results
286 |   out_1 = de_test_neighbourhoods(sce , design = ~tomato, covariates = c("tomato") ,
287 |                                  contrasts = c("tomatoTRUE") , output_type = "data.frame")
288 |   out_2 = de_test_neighbourhoods(sce , design = ~tomato, covariates = c("tomato") , output_type = "data.frame")
289 |   expect_identical(out_1 , out_2)
290 | })
291 | 
292 | 
293 | 
294 | 
295 | 


--------------------------------------------------------------------------------
/tests/testthat/test-estimate_neighbourhood_sizes.R:
--------------------------------------------------------------------------------
  1 | # context("Testing assign_neighbourhoods")
  2 | library(miloDE)
  3 | 
  4 | # load data
  5 | data("sce_mouseEmbryo", package = "miloDE")
  6 | 
  7 | stat = estimate_neighbourhood_sizes(x = sce_mouseEmbryo , k = c(5,10,20), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
  8 | 
  9 | # error msgs
 10 | test_that("Wrong input gives errors", {
 11 | 
 12 |   # x should be of the right format
 13 |   expect_error(estimate_neighbourhood_sizes(x = 1 , k_grid = c(10,30,50), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 14 |                "x should be a SingleCellExperiment or Milo object.",
 15 |                fixed=TRUE
 16 |   )
 17 |   sce_test = sce_mouseEmbryo
 18 |   colnames(sce_test) = rep(1,1,ncol(sce_test))
 19 |   expect_error(estimate_neighbourhood_sizes(sce_test , k_grid = c(10,30,50), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 20 |                "If colnames(x) exist, they should be unique.",
 21 |                fixed=TRUE
 22 |   )
 23 | 
 24 | 
 25 |   # k_grid should be of the right format
 26 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c("10" , "30" , "50"), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 27 |                "Check k_grid - should be numeric vector",
 28 |                fixed=TRUE
 29 |   )
 30 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(-1,0,10), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 31 |                "Values of k_grid should be positive integers. Please enter valid k_grid.",
 32 |                fixed=TRUE
 33 |   )
 34 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,20.5,30), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 35 |                "Values of k_grid should be positive integers. Please enter valid k_grid.",
 36 |                fixed=TRUE
 37 |   )
 38 |   expect_warning(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = 20, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 39 |                  "You only selected one value for k. If it is intended, we recommend to run directly 'assign_neighbourhoods'."
 40 |   )
 41 |   expect_warning(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(5,20,1500), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 42 |                  "The highest selected value is > 1000. It is gonna cost computationally, and we generally do not recommend such high k. Consider reducing."
 43 |   )
 44 | 
 45 | 
 46 |   # prop should be positive number between 0 and 1
 47 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = "0.2", order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 48 |                "Check prop - should be positive number between 0 and 1",
 49 |                fixed=TRUE
 50 |   )
 51 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 52 |                "Check prop - should be positive number between 0 and 1",
 53 |                fixed=TRUE
 54 |   )
 55 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = -1, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 56 |                "Check prop - should be positive number between 0 and 1",
 57 |                fixed=TRUE
 58 |   )
 59 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 60 |                "Check prop - should be positive number between 0 and 1",
 61 |                fixed=TRUE
 62 |   )
 63 | 
 64 | 
 65 |   # order should be 1 or 2
 66 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 0, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 67 |                "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)",
 68 |                fixed=TRUE
 69 |   )
 70 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30), prop = 0.1, order = 3, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 71 |                "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)",
 72 |                fixed=TRUE
 73 |   )
 74 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30), prop = 0.1, order = 1.5, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 75 |                "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)",
 76 |                fixed=TRUE
 77 |   )
 78 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = "x", filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 79 |                "Check order - should be either 1 (standard kNN-graph) or 2 (2nd-order kNN-graph)",
 80 |                fixed=TRUE
 81 |   )
 82 | 
 83 |   # check filtering should be T or F
 84 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, filtering = 2, reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 85 |                "Check filtering - should be either TRUE or FALSE",
 86 |                fixed=TRUE
 87 |   )
 88 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, filtering = "aa", reducedDim_name = "pca.corrected", k_init = 50, d = 30),
 89 |                "Check filtering - should be either TRUE or FALSE",
 90 |                fixed=TRUE
 91 |   )
 92 | 
 93 |   # reducedDim should be character
 94 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, filtering = TRUE, reducedDim_name = 1, k_init = 50, d = 30),
 95 |                "Check reducedDim_name - should be character vector",
 96 |                fixed=TRUE
 97 |   )
 98 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = TRUE, k_init = 50, d = 30),
 99 |                "Check reducedDim_name - should be character vector",
100 |                fixed=TRUE
101 |   )
102 | 
103 | 
104 |   # k_init - positive integer
105 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = "50", d = 30),
106 |                "Check k_init - should be positive integer",
107 |                fixed=TRUE
108 |   )
109 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 0, d = 30),
110 |                "Check k_init - should be positive integer",
111 |                fixed=TRUE
112 |   )
113 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = -10, d = 30),
114 |                "Check k_init - should be positive integer",
115 |                fixed=TRUE
116 |   )
117 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 10.5, d = 30),
118 |                "Check k_init - should be positive integer",
119 |                fixed=TRUE
120 |   )
121 | 
122 | 
123 |   # d - positive integer
124 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = "30"),
125 |                "Check d - should be positive integer",
126 |                fixed=TRUE
127 |   )
128 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = 0),
129 |                "Check d - should be positive integer",
130 |                fixed=TRUE
131 |   )
132 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = -1),
133 |                "Check d - should be positive integer",
134 |                fixed=TRUE
135 |   )
136 |   expect_error(estimate_neighbourhood_sizes(sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.1, order = 2, reducedDim_name = "pca.corrected", k_init = 50, d = 1.5),
137 |                "Check d - should be positive integer",
138 |                fixed=TRUE
139 |   )
140 | 
141 | 
142 |   # reduced dim should be in reducedDimNames
143 |   expect_error(estimate_neighbourhood_sizes(x = sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.2, k_init = 50, d = 30)
144 |   )
145 | 
146 |   expect_error(estimate_neighbourhood_sizes(x = sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca_corrected", k_init = 50, d = 30),
147 |                "reducedDim_name should be in reducedDimNames(x).",
148 |                fixed=TRUE
149 |   )
150 | 
151 | 
152 |   # cluster_id -- NULL or in the columns of colData
153 |   expect_error(estimate_neighbourhood_sizes(x = sce_mouseEmbryo , k_grid = c(10,30,50), prop = 0.2, order = 2, filtering = T,
154 |                                             reducedDim_name = "pca.corrected", k_init = 50, d = 30 , cluster_id = "ct"),
155 |                "If cluster_id not NULL, it should be in colnames(colData(x))",
156 |                fixed=TRUE
157 |   )
158 | 
159 | })
160 | 
161 | 
162 | 
163 | # return of the correct output
164 | test_that("Return is the correct class", {
165 |   # right class
166 |   expect_s3_class(stat, "data.frame")
167 | 
168 |   # right colnames
169 |   expected_colnames = c("k" , "min" , "q25" , "med" , "q75" , "max")
170 |   expect_identical(colnames(stat) , expected_colnames)
171 | 
172 |   # right rownames
173 |   expect_equal(nrow(stat) , 3)
174 | })
175 | 
176 | 
177 | 
178 | # values increasing in both directions
179 | test_that("Values increase as expected", {
180 | 
181 |   expect_gt(stat$min[2] , stat$min[1])
182 |   expect_gt(stat$min[3] , stat$min[2])
183 |   expect_gt(stat$q75[2] , stat$q25[2])
184 |   expect_gt(stat$med[3] , stat$q25[3])
185 |   expect_gt(stat$max[1] , stat$med[1])
186 |   expect_gt(stat$q25[2] , stat$min[3])
187 | 
188 | })
189 | 
190 | 
191 | test_that("Finishes for diff cluster_id", {
192 |   out_null = estimate_neighbourhood_sizes(x = sce_mouseEmbryo , k_grid = c(10,30), prop = 0.2, order = 2, filtering = T,
193 |                                          reducedDim_name = "pca.corrected", k_init = 50, d = 30 , cluster_id = NULL)
194 |   out_ct = estimate_neighbourhood_sizes(x = sce_mouseEmbryo , k_grid = c(10,30), prop = 0.2, order = 2, filtering = T,
195 |                                           reducedDim_name = "pca.corrected", k_init = 50, d = 30 , cluster_id = "celltype.mapped")
196 | 
197 |   expect_s3_class(out_null, "data.frame")
198 |   expect_s3_class(out_ct, "data.frame")
199 | 
200 | })
201 | 
202 | 
203 | test_that("CT grid should be reasonable", {
204 |   expect_error(estimate_neighbourhood_sizes(x = sce_mouseEmbryo , k_grid = c(10,30,1000), prop = 0.2, order = 2, filtering = T,
205 |                                             reducedDim_name = "pca.corrected", k_init = 50, d = 30 , cluster_id = "celltype.mapped"),
206 |                "All specified clusters have # cells < 2*max(k). We recommed to provide lower clustering resolution, decreasing max(k) or set cluster_id = NULL.",
207 |                fixed=TRUE
208 |   )
209 | })
210 | 
211 | 
212 | 
213 | 


--------------------------------------------------------------------------------
/tests/testthat/test-filter_neighbourhoods.R:
--------------------------------------------------------------------------------
 1 | # context("Testing filter_neighbourhoods")
 2 | library(miloDE)
 3 | library(miloR)
 4 | 
 5 | # load data
 6 | data("sce_mouseEmbryo", package = "miloDE")
 7 | 
 8 | 
 9 | # error msgs
10 | test_that("Wrong input gives errors", {
11 |   # sce should be of the right format
12 |   expect_error(filter_neighbourhoods(x = 1),
13 |                "x should be a Milo object. Please run `assign_neighbourhoods` first.",
14 |                fixed=TRUE
15 |   )
16 |   expect_error(filter_neighbourhoods(sce_mouseEmbryo),
17 |                "x should be a Milo object. Please run `assign_neighbourhoods` first.",
18 |                fixed=TRUE
19 |   )
20 |   sce_test = Milo(sce_mouseEmbryo)
21 |   expect_error(filter_neighbourhoods(sce_test),
22 |                "x should contain non-trivial graph. Please run `assign_neighbourhoods` first.",
23 |                fixed=TRUE
24 |   )
25 | })
26 | 
27 | # return correct class
28 | test_that("Return is the correct class", {
29 |   # right class
30 |   out = assign_neighbourhoods(x = sce_mouseEmbryo , k = 10, prop = 0.2, order = 2, filtering = FALSE, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
31 |   out = filter_neighbourhoods(out)
32 |   expect_s4_class(out, "Milo")
33 | })
34 | 
35 | 
36 | # filtering returns less nhoods
37 | test_that("Filtering returns less nhoods", {
38 |   set.seed(15)
39 |   sce_milo = assign_neighbourhoods(sce_mouseEmbryo , k = 25, filtering = FALSE, reducedDim_name = "pca.corrected")
40 |   sce_milo_filtered = filter_neighbourhoods(sce_milo)
41 | 
42 |   # sce should be of the right format
43 |   expect_gt(ncol(nhoods(sce_milo)) , ncol(nhoods(sce_milo_filtered)))
44 |   expect_equal(nrow(nhoods(sce_milo)) , nrow(nhoods(sce_milo_filtered)))
45 | })
46 | 
47 | 
48 | # filtering filtered data gives same result
49 | test_that("Filtering filtered data is futile_1", {
50 |   set.seed(15)
51 |   sce_milo = assign_neighbourhoods(sce_mouseEmbryo , k = 25, filtering = TRUE, reducedDim_name = "pca.corrected")
52 |   sce_milo_filtered = filter_neighbourhoods(sce_milo)
53 | 
54 |   # sce should be of the right format
55 |   expect_identical(nhoods(sce_milo) , nhoods(sce_milo_filtered))
56 |   expect_identical(as.numeric(nhoodIndex(sce_milo)) , as.numeric(nhoodIndex(sce_milo_filtered)))
57 | 
58 | })
59 | 
60 | 
61 | # initial filtering = post hoc filtering
62 | test_that("Filtering filtered data is futile_2", {
63 |   set.seed(32)
64 |   sce_milo_1 = assign_neighbourhoods(sce_mouseEmbryo , k = 25, filtering = TRUE, reducedDim_name = "pca.corrected")
65 | 
66 |   set.seed(32)
67 |   sce_milo_2 = assign_neighbourhoods(sce_mouseEmbryo , k = 25, filtering = FALSE, reducedDim_name = "pca.corrected")
68 |   sce_milo_2 = filter_neighbourhoods(sce_milo_2)
69 | 
70 |   # sce should be of the right format
71 |   expect_identical(nhoods(sce_milo_1) , nhoods(sce_milo_2))
72 |   expect_identical( as.numeric(nhoodIndex(sce_milo_1)) , as.numeric(nhoodIndex(sce_milo_2)))
73 | 
74 | })
75 | 
76 | 


--------------------------------------------------------------------------------
/tests/testthat/test-rank_neighbourhoods_by_DE_magnitude.R:
--------------------------------------------------------------------------------
  1 | # context("Testing rank_neighbourhoods_by_DE_magnitude")
  2 | library(miloDE)
  3 | 
  4 | # load data
  5 | data("sce_mouseEmbryo", package = "miloDE")
  6 | set.seed(32)
  7 | sce_milo = assign_neighbourhoods(sce_mouseEmbryo , k = 25, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
  8 | meta_sce = as.data.frame(colData(sce_milo))
  9 | de_stat = de_test_neighbourhoods(sce_milo , sample_id = "sample", design = ~tomato, covariates = c("tomato"), output_type = "SCE")
 10 | 
 11 | 
 12 | # wrong inputs return error
 13 | # error msgs
 14 | test_that("Wrong input gives errors", {
 15 |   require(SingleCellExperiment)
 16 |   # de_stat should be right format
 17 |   expect_error(rank_neighbourhoods_by_DE_magnitude(1),
 18 |                "de_stat should be either data.frame or SingleCellExperiment object.\n
 19 |          To get valid de_stat object, please run 'de_test_neighbourhoods.R'",
 20 |                fixed=TRUE
 21 |   )
 22 |   expect_error(rank_neighbourhoods_by_DE_magnitude(sce_milo),
 23 |                "de_stat should be either data.frame or SingleCellExperiment object.\n
 24 |          To get valid de_stat object, please run 'de_test_neighbourhoods.R'",
 25 |                fixed=TRUE
 26 |   )
 27 | 
 28 | 
 29 |   out = data.frame(Nhood = c(1:10) , Nhood_center = c(1:10))
 30 |   out$logFC = 0
 31 |   out$pval = 1
 32 |   expect_error(rank_neighbourhoods_by_DE_magnitude(out),
 33 |                "colnames(de_stat) missing some of the assay_names or coldata_names.",
 34 |                fixed=TRUE
 35 |   )
 36 | 
 37 |   out$pval_corrected_across_nhoods = 1
 38 |   out$pval_corrected_across_genes = 1
 39 |   out$Nhood = as.character(out$Nhood)
 40 |   expect_error(rank_neighbourhoods_by_DE_magnitude(out),
 41 |                "Nhood field should be numeric. To get valid de_stat object, please run 'de_test_neighbourhoods.R'",
 42 |                fixed=TRUE
 43 |   )
 44 | 
 45 | 
 46 |   out = SingleCellExperiment(list(logFC = assay(de_stat , "logFC") ,
 47 |                                   pval = assay(de_stat , "pval") ,
 48 |                                   pval_corrected_across_genes = assay(de_stat , "pval_corrected_across_genes") ,
 49 |                                   pval_corrected_across_nhoods = assay(de_stat , "pval_corrected_across_nhoods")
 50 |                                   ))
 51 |   rownames(out) = rownames(de_stat)
 52 |   out$Nhood = de_stat$Nhood
 53 |   expect_error(rank_neighbourhoods_by_DE_magnitude(out),
 54 |                "de_stat missing some of the coldata_names",
 55 |                fixed=TRUE
 56 |   )
 57 |   out$Nhood = as.character(out$Nhood)
 58 |   out$Nhood_center = out$Nhood
 59 |   expect_error(rank_neighbourhoods_by_DE_magnitude(out),
 60 |                "colData field 'Nhood' should be numeric. To get valid de_stat object, please run 'de_test_neighbourhoods.R'",
 61 |                fixed=TRUE
 62 |   )
 63 | 
 64 | 
 65 |   # pvalue - positive number between 0 and 1
 66 |   expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = "a"),
 67 |                "pval.thresh should be numeric",
 68 |                fixed=TRUE
 69 |   )
 70 |   expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = FALSE),
 71 |                "pval.thresh should be numeric",
 72 |                fixed=TRUE
 73 |   )
 74 |   expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = c(1:10)),
 75 |                "pval.thresh should be a single number",
 76 |                fixed=TRUE
 77 |   )
 78 |   expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = -1),
 79 |                "pval.thresh should be between 0 and 1",
 80 |                fixed=TRUE
 81 |   )
 82 |   expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = 0),
 83 |                "pval.thresh should be between 0 and 1",
 84 |                fixed=TRUE
 85 |   )
 86 |   expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = 1),
 87 |                "pval.thresh should be between 0 and 1",
 88 |                fixed=TRUE
 89 |   )
 90 | 
 91 | 
 92 |   # z-thresh - neagtive number
 93 |   expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , z.thresh = "a"),
 94 |                "z.thresh should be numeric",
 95 |                fixed=TRUE
 96 |   )
 97 |   expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , z.thresh = FALSE),
 98 |                "z.thresh should be numeric",
 99 |                fixed=TRUE
100 |   )
101 |   expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , z.thresh = c(1:10)),
102 |                "z.thresh should be a single number",
103 |                fixed=TRUE
104 |   )
105 |   expect_error(rank_neighbourhoods_by_DE_magnitude(de_stat , z.thresh = 0.01),
106 |                "z.thresh should be not higher than 0",
107 |                fixed=TRUE
108 |   )
109 | 
110 | })
111 | 
112 | 
113 | test_that("right format", {
114 |   out = rank_neighbourhoods_by_DE_magnitude(de_stat)
115 |   expect_s3_class(out, "data.frame")
116 |   expect_equal(nrow(out) , ncol(nhoods(sce_milo)))
117 | 
118 |   nhood_stat = data.frame(Nhood = c(1:ncol(nhoods(sce_milo))) , Nhood_center = colnames(nhoods(sce_milo)))
119 |   nhood_stat$Nhood_center = as.character(nhood_stat$Nhood_center)
120 | 
121 |   out = out[, c("Nhood" , "Nhood_center")]
122 |   out$Nhood_center = as.character(out$Nhood_center)
123 |   out = out[order(out$Nhood) , ]
124 |   expect_identical(out$Nhood , nhood_stat$Nhood)
125 |   expect_identical(out$Nhood_center , nhood_stat$Nhood_center)
126 | })
127 | 
128 | 
129 | 
130 | test_that("NaNs give 0s", {
131 |   # de_stat should be right format
132 |   test = convert_de_stat(de_stat)
133 |   test$pval = NaN
134 |   test$pval_corrected_across_nhoods = NaN
135 |   test$pval_corrected_across_genes = NaN
136 | 
137 |   out = rank_neighbourhoods_by_DE_magnitude(test)
138 |   expect_s3_class(out, "data.frame")
139 |   expect_equal(1 , mean(out$n_DE_genes == 0))
140 |   expect_equal(1 , mean(out$n_specific_DE_genes == 0))
141 | })
142 | 
143 | 
144 | 
145 | test_that("lower pvals - less genes", {
146 |   out_1 = rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = 0.01)
147 |   out_2 = rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = 0.1)
148 |   out_3 = rank_neighbourhoods_by_DE_magnitude(de_stat , pval.thresh = 0.5)
149 |   expect_gte(mean(out_3$n_DE_genes) , mean(out_2$n_DE_genes))
150 |   expect_gte(mean(out_2$n_DE_genes) , mean(out_1$n_DE_genes))
151 | })
152 | 
153 | test_that("lower z - less genes", {
154 |   out_1 = rank_neighbourhoods_by_DE_magnitude(de_stat , z.thresh = -1)
155 |   out_2 = rank_neighbourhoods_by_DE_magnitude(de_stat , z.thresh = -2)
156 |   out_3 = rank_neighbourhoods_by_DE_magnitude(de_stat , z.thresh = -3)
157 |   expect_gte(mean(out_1$n_specific_DE_genes) , mean(out_2$n_specific_DE_genes))
158 |   expect_gte(mean(out_2$n_specific_DE_genes) , mean(out_3$n_specific_DE_genes))
159 | })
160 | 
161 | 
162 | 


--------------------------------------------------------------------------------
/tests/testthat/test-spatial_pval_adjustment.R:
--------------------------------------------------------------------------------
 1 | # context("Testing spatial_pval_adjustment")
 2 | library(miloDE)
 3 | library(miloR)
 4 | library(SingleCellExperiment)
 5 | 
 6 | # load data
 7 | data("sce_mouseEmbryo", package = "miloDE")
 8 | set.seed(32)
 9 | sce = assign_neighbourhoods(sce_mouseEmbryo , k = 25, prop = 0.2, order = 2, filtering = T, reducedDim_name = "pca.corrected", k_init = 50, d = 30)
10 | nhoods_sce = nhoods(sce)
11 | pvals_1 = rep(1,0.0001,ncol(nhoods_sce))
12 | pvals_2 = rep(1,0.001,ncol(nhoods_sce))
13 | pvals_3 = rep(1,0.01,ncol(nhoods_sce))
14 | pvals_4 = rep(1,0.1,ncol(nhoods_sce))
15 | pvals_5 = pvals_4
16 | pvals_5[c(4,1,8)] = NaN
17 | 
18 | 
19 | test_that("Right length", {
20 |   out = spatial_pval_adjustment(nhoods_sce , pvals_2)
21 |   expect_equal(ncol(nhoods_sce) , length(out))
22 | })
23 | 
24 | test_that("NaNs at right places", {
25 | 
26 |   out = spatial_pval_adjustment(nhoods_sce , pvals_5)
27 |   expect_identical(out[1] , NaN)
28 |   expect_identical(out[4] , NaN)
29 |   expect_identical(out[8] , NaN)
30 | })
31 | 
32 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/miloDE__mouse_embryo.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "miloDE: chimera mouse embryo, Tal1-"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{miloDE__mouse_embryo}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>"
 14 | )
 15 | library(knitr)
 16 | 
 17 | ```
 18 | 
 19 | # Load libraries
 20 | 
 21 | ```{r setup}
 22 | 
 23 | library(miloDE)
 24 | 
 25 | # library containing toy data
 26 | suppressMessages(library(MouseGastrulationData))
 27 | 
 28 | # analysis libraries
 29 | library(scuttle)
 30 | suppressMessages(library(miloR))
 31 | suppressMessages(library(uwot))
 32 | library(scran)
 33 | suppressMessages(library(dplyr))
 34 | library(reshape2)
 35 | 
 36 | # packages for gene cluster analysis
 37 | library(scWGCNA)
 38 | suppressMessages(library(Seurat))
 39 | 
 40 | # plotting libraries
 41 | library(ggplot2)
 42 | library(viridis)
 43 | library(ggpubr)
 44 | 
 45 | ```
 46 | 
 47 | We will also show how we can parallel `de_test_neighbourhoods`. For this we need to load `BiocParallel` and enable multicore parallel evaluation.   
 48 | 
 49 | ```{r setup-parallel}
 50 | 
 51 | library(BiocParallel)
 52 | 
 53 | ncores = 4
 54 | mcparam = MulticoreParam(workers = ncores)
 55 | register(mcparam)
 56 | 
 57 | ```
 58 | 
 59 | 
 60 | # Load data
 61 | 
 62 | We will use data from mouse gastrulation scRNA-seq atlas [Pijuan-Sala et al., 2019](https://pubmed.ncbi.nlm.nih.gov/30787436/). As a part of this project, using chimera mouse embryos, authors characterised the impact of Tal1 knock out on the mouse development. The most prominent phenotype was a loss of blood cells in Tal1- cells. 
 63 | 
 64 | In this vignette, we will apply miloDE on cells contributing to blood lineage (endothelia and haematoendothelial progenitors (haem. prog-s.)) and assess whether we can detect and characterise more subtle phenotypes (i.e. DE).
 65 | 
 66 | Tal1-/+ chimer data are processed and directly available within `MouseGastrulationData` package. As condition ID, we will use slot `tomato` which indicates whether cells carry Tal1 KO.
 67 | 
 68 | 
 69 | ```{r load-data, fig.width=8 , fig.cap="UMAPs, coloured by cell types"}
 70 | 
 71 | 
 72 | # load chimera Tal1
 73 | sce = suppressMessages(MouseGastrulationData::Tal1ChimeraData())
 74 | 
 75 | # downsample to few selected cell types
 76 | cts = c("Spinal cord" , "Haematoendothelial progenitors", "Endothelium" , "Blood progenitors 1" , "Blood progenitors 2")
 77 | sce = sce[, sce$celltype.mapped %in% cts]
 78 | # let's rename Haematoendothelial progenitors
 79 | sce$celltype.mapped[sce$celltype.mapped == "Haematoendothelial progenitors"] = "Haem. prog-s."
 80 | 
 81 | # delete row for tomato
 82 | sce = sce[!rownames(sce) == "tomato-td" , ]
 83 | 
 84 | # add logcounts
 85 | sce = logNormCounts(sce)
 86 | 
 87 | # update tomato field to be more interpretable 
 88 | sce$tomato = sapply(sce$tomato , function(x) ifelse(isTRUE(x) , "Tal1_KO" , "WT"))
 89 | 
 90 | # for this exercise, we focus on 3000 highly variable genes (for computational efficiency)
 91 | dec.sce = modelGeneVar(sce)
 92 | hvg.genes = getTopHVGs(dec.sce, n = 3000)
 93 | sce = sce[hvg.genes , ]
 94 | # change rownames to symbol names
 95 | rowdata = as.data.frame(rowData(sce))
 96 | rownames(sce) = rowdata$SYMBOL
 97 | 
 98 | # add UMAPs
 99 | set.seed(32)
100 | umaps = as.data.frame(uwot::umap(reducedDim(sce , "pca.corrected")))
101 | # let's store UMAPs - we will use them for visualisation
102 | reducedDim(sce , "UMAP") = umaps
103 | 
104 | # plot UMAPs, colored by cell types
105 | umaps = cbind(as.data.frame(colData(sce)) , reducedDim(sce , "UMAP"))
106 | names(EmbryoCelltypeColours)[names(EmbryoCelltypeColours) == "Haematoendothelial progenitors"] = "Haem. prog-s."
107 | cols_ct = EmbryoCelltypeColours[names(EmbryoCelltypeColours) %in% unique(umaps$celltype.mapped)]
108 | 
109 | p = ggplot(umaps , aes(x = V1 , y = V2 , col = celltype.mapped)) +
110 |   geom_point() + 
111 |   scale_color_manual(values = cols_ct) +
112 |   facet_wrap(~tomato) +
113 |   theme_bw() + 
114 |   labs(x = "UMAP-1", y = "UMAP-2")
115 | p
116 | 
117 | 
118 | ```
119 | 
120 | 
121 | # Assign neighbourhoods
122 | 
123 | ## Estimate k -> neighbourhood size 
124 | 
125 | `estimate_neighbourhood_sizes` allows to gauge how neighbourhood size distribution changes as a function of (order,k). It might be useful to run it first in order to determine optimal range that will return desired neighbourhood sizes.
126 | 
127 | ```{r estimate-k, fig.width=6, fig.cap="Neighbourhood size distribution ~ k"}
128 | 
129 | 
130 | stat_k = estimate_neighbourhood_sizes(sce, k_grid = seq(10,40,5) , 
131 |                                       order = 2, prop = 0.1 , filtering = TRUE,
132 |                                       reducedDim_name = "pca.corrected" , plot_stat = TRUE)
133 | 
134 | kable(stat_k , caption = "Neighbourhood size distribution ~ k")
135 | 
136 | ```
137 | 
138 | We will use k=20, order=2 --> that returns an average neighbourhood size ~400 cells.
139 | 
140 | ## Assign neighbourhoods
141 | 
142 | To assign neighbourhoods, use `assign_neighbourhoods`. Note that under the hood, there is a random sampling of index cells --> if you want to ensure the same neighbourhood assignment, please set seed prior to running this function.
143 | 
144 | Note that we set `filtering = TRUE` to achieve a refined assignment in which redundant neighbourhoods are discarded. Alternatively this can be done post hoc, using `filter_neighbourhoods`. 
145 | 
146 | ```{r assign-nhoods}
147 | 
148 | 
149 | set.seed(32)
150 | sce_milo = assign_neighbourhoods(sce , k = 20 , order = 2, 
151 |                                  filtering = TRUE , reducedDim_name = "pca.corrected" , verbose = F)
152 | 
153 | 
154 | ```
155 | 
156 | In total we get 42 nhoods. A neighbourhood assignment can be visualised using Milo plots, in which each circle corresponds to a neighbourhood, and edges between them represent shared cells. The center of each neighbourhood are coordinated in 2D latent space (e.g. UMAP) for the center cell of the neighbourhood. We can also colour each neighbourhood by provided metric. In this plot, we will annotate each neighbourhood with its enriched cell type, and colour neighbourhoods by assigned cell types.
157 | 
158 | 
159 | ```{r plot-nhoods-by-cts , fig.width=5, fig.cap="Neighbourhood plot, coloured by cell types"}
160 | 
161 | 
162 | nhoods_sce = nhoods(sce_milo)
163 | # assign cell types for nhoods 
164 | nhood_stat_ct = data.frame(Nhood = 1:ncol(nhoods_sce) , Nhood_center = colnames(nhoods_sce))
165 | nhood_stat_ct = miloR::annotateNhoods(sce_milo , nhood_stat_ct , coldata_col = "celltype.mapped")
166 | p = plot_milo_by_single_metric(sce_milo, nhood_stat_ct, colour_by = "celltype.mapped" , 
167 |                                       layout = "UMAP" , size_range = c(1.5,3) , edge_width = c(0.2,0.5)) + 
168 |   scale_fill_manual(values = cols_ct , name = "Cell type")
169 | p
170 | 
171 | 
172 | ```
173 | 
174 | # DE testing
175 | 
176 | ## Calculate AUC per neighbourhood
177 | 
178 | Prior to DE testing, an optional step is to assess which neighbourhoods do not show any signs of perturbation and discard them prior to DE testing in order to facilitate the burden from multiple testing correction (by using `calc_AUC_per_neighbourhood`). To do so we build on [Augur](https://pubmed.ncbi.nlm.nih.gov/32690972/) that employs RF classifiers to separate cells between the conditions. As an output of `calc_AUC_per_neighbourhood`, we return data frame containing AUC of the per neighbourhood classifiers. 
179 | We suggest that AUC <= 0.5 corresponds to neighbourhoods that can be safely discarded from DE testing (however, you may use your own threshold if desired). In addition, if classifier can not be built due to very low number of cells in at least one of the conditions, AUC will be set to NaN.
180 | 
181 | Note that this part is rather computationally costly, and we recommend it to run if a substantial transcriptional regions are anticipated to be unperturbed.
182 | 
183 | ```{r calc-auc-per-nhood, fig.width=5, fig.cap="Neighbourhood plot, coloured by AUC"}
184 | 
185 | 
186 | stat_auc = suppressWarnings(calc_AUC_per_neighbourhood(sce_milo , sample_id = "sample" , condition_id = "tomato", min_n_cells_per_sample = 1, BPPARAM = mcparam))
187 | 
188 | p = plot_milo_by_single_metric(sce_milo, stat_auc, colour_by = "auc" , 
189 |                                       layout = "UMAP" , size_range = c(1.5,3) , edge_width = c(0.2,0.5)) + 
190 |   scale_fill_viridis(name = "AUC")
191 | p
192 | 
193 | 
194 | ```
195 | 
196 | We observe that AUCs in the neighbourhoods containing mostly Blood progenitors is NaN which is consistent with absense of these cells in Tal1- cells. 
197 | In addition, we observe higher AUCs in endothelial subregions, likely reflecting that these cells are more affected by Tal1 KO.
198 | 
199 | ## DE testing
200 | 
201 | Let's proceed with DE testing within each neighbourhood. We will test neighbourhoods in which AUC is not NaN (i.e. neighbourhoods in which there are enough cells from both conditions).
202 | 
203 | ```{r de-testing, fig.width=5 , fig.cap="Neighbourhood plot, coloured by whether DE testing is performed"}
204 | 
205 | 
206 | de_stat = de_test_neighbourhoods(sce_milo ,
207 |                              sample_id = "sample",
208 |                              design = ~tomato,
209 |                              covariates = c("tomato"),
210 |                              subset_nhoods = stat_auc$Nhood[!is.na(stat_auc$auc)],
211 |                              output_type = "SCE",
212 |                              plot_summary_stat = TRUE,
213 |                              layout = "UMAP", BPPARAM = mcparam , 
214 |                              verbose = T)
215 | 
216 | 
217 | 
218 | ```
219 | 
220 | # Analysis of miloDE results 
221 | 
222 | ## Get neighbourhood ranking by the extent of DE
223 | 
224 | One explanatory question a user might have is an overall scan of which transcriptional regions show noteworthy signs of DE. To do so ona neighbourhood level, we provide the function `rank_neighbourhoods_by_DE_magnitude`. Within this function, we calculate two metrics:
225 | 
226 | a) `n_DE_genes` - for each neighbourhood, we calculate how many genes are assigned as DE. Since we are doing it within each neighbourhood, we use `pval_corrected_across_genes` and we use default `pval.thresh=0.1` (can be changed). Note that there is no comparison between neighbourhoods here.
227 | 
228 | 
229 | b) `n_specific_DE_genes`. We also might be interested which neighbourhoods differ from others more so than we would expect. To assess this, we are interested in which neighbourhoods contain genes, that are DE 'specifically' in those neighbourhoods. To calculate this, for each gene we now use z-transformation of `pval_corrected_across_nhoods`, and we identify the neighbourhoods in which z-normalised p-values are lower than a threshold (default `z.thresh=-3`). This would tell us that the gene is signifciantly DE in the neighbourhood *compared to most other neighbourhoods*. We do so for each gene, and then for each neighbourhood we calculate how mane genes have z-normalised p-values are lower than a threshold.
230 | 
231 | 
232 | Note that for gene/neighbourhood combinations for which p-values are returned as NaNs (e.g. genes are not tested), for this function we set pvalues = 1. In other words, if a gene is only tested in few neighbourhoods to begin with, z-normalised p-value corrected across neighbourhoods is likely to be small for these neighbourhoods.
233 | 
234 | 
235 | ```{r rank-nhoods-by-DE-magnitude, fig.width=10 , fig.cap = "Neighbourhood plot, coloured by # of DE genes" , fig.height=5}
236 | 
237 | stat_de_magnitude = rank_neighbourhoods_by_DE_magnitude(de_stat)
238 | 
239 | p1 = plot_milo_by_single_metric(sce_milo, stat_de_magnitude, colour_by = "n_DE_genes" , 
240 |                                       layout = "UMAP" , size_range = c(1.5,3) , edge_width = c(0.2,0.5)) + 
241 |   scale_fill_viridis(name = "# DE genes")
242 | p2 = plot_milo_by_single_metric(sce_milo, stat_de_magnitude, colour_by = "n_specific_DE_genes" , 
243 |                                       layout = "UMAP" , size_range = c(1.5,3) , edge_width = c(0.2,0.5)) + 
244 |   scale_fill_viridis(name = "# specific\nDE genes" , option = "inferno")
245 | p = ggarrange(p1,p2)
246 | p
247 | 
248 | ```
249 | 
250 | Reassuringly, we observe that same regions show both higher AUC and number of DE expressed genes.
251 | 
252 | # Co-regulated programs
253 | 
254 | The fine neighbourhood resolution allows to cluster genes based on logFC vectors (across the neighbourhoods). One way is to employ WGCNA approach, originally designed to finding co-expressed genes. Instead of gene counts we will use corrected logFC (logFC set to 0 in the neighbourhoods in which it is not DE).
255 | 
256 | Below we outline a custom script to detect gene modules using WGCNA approach. We will use [scWGCNA](https://github.com/CFeregrino/scWGCNA), specifically designed to handle single-cell data.
257 | 
258 | Note that WGCNA algorithm is exclusive and sensitive to input parameters - nevertheless, we suggest it is useful to explore what DE patterns exist in your data.
259 | 
260 | ## Gene modules using WGCNA
261 | 
262 | ```{r sc-wgcna}
263 | 
264 | 
265 | get_wgcna_modules = function(de_stat , subset_hoods = NULL , 
266 |                              n_hoods_sig.thresh = 2 ,
267 |                              npcs = 5 ,
268 |                              pval.thresh = 0.1 ){
269 |   require(scWGCNA)
270 |   require(Seurat)
271 |   require(dplyr)
272 |   require(reshape2)
273 |   
274 |   set.seed(32)
275 |   # subset hoods
276 |   if (!is.null(subset_hoods)){
277 |     de_stat = de_stat[de_stat$Nhood %in% subset_hoods , ]
278 |   }
279 |   
280 |   # focus on genes that DE in at least 2 nhoods
281 |   de_stat_per_gene = as.data.frame(de_stat %>% group_by(gene) %>% dplyr::summarise(n_hoods_sig = sum(pval_corrected_across_nhoods < pval.thresh , na.rm = TRUE)))
282 |   genes_sig = de_stat_per_gene$gene[de_stat_per_gene$n_hoods_sig >= n_hoods_sig.thresh]
283 |   
284 |   de_stat = de_stat[de_stat$gene %in% genes_sig, ]
285 |   de_stat = de_stat[order(de_stat$Nhood) , ]
286 |   
287 |   # discard neighbourhoods in which testing was not performed
288 |   de_stat = de_stat[de_stat$test_performed , ]
289 |   
290 |   # for this analysis, set logFC to 0 and pvals to 1 if they are NaN
291 |   de_stat$logFC[is.na(de_stat$logFC)] = 0
292 |   de_stat$pval[is.na(de_stat$pval)] = 1
293 |   de_stat$pval_corrected_across_genes[is.na(de_stat$pval_corrected_across_genes)] = 1
294 |   de_stat$pval_corrected_across_nhoods[is.na(de_stat$pval_corrected_across_nhoods)] = 1
295 |   
296 |   # set logFC to 0 if pval_corrected_across_nhoods > pval.thresh
297 |   de_stat$logFC[de_stat$pval_corrected_across_nhoods >= pval.thresh] = 0
298 |   
299 |   # move the object to Seurat
300 |   de_stat = reshape2::dcast(data = de_stat, formula = gene~Nhood, value.var = "logFC")
301 |   rownames(de_stat) = de_stat$gene
302 |   de_stat = de_stat[,2:ncol(de_stat)]
303 |   
304 |   obj.seurat <- CreateSeuratObject(counts = de_stat)
305 |   DefaultAssay(obj.seurat) <- "RNA"
306 |   obj.seurat = FindVariableFeatures(obj.seurat)
307 |   # scale
308 |   obj.seurat[["RNA"]]@scale.data = as.matrix(obj.seurat[["RNA"]]@data)
309 |   obj.seurat = RunPCA(obj.seurat , npcs = npcs)
310 |   
311 |   # run scwgcna
312 |   clusters_scwgcna = run.scWGCNA(p.cells = obj.seurat, 
313 |                                  s.cells = obj.seurat, 
314 |                                  is.pseudocell = F, 
315 |                                  features = rownames(obj.seurat),
316 |                                  less = TRUE , merging = TRUE)
317 |   # compile stat
318 |   clusters = lapply(1:length(clusters_scwgcna$module.genes) , function(i){
319 |     out = data.frame(cluster = i , gene = clusters_scwgcna$module.genes[[i]] , n_genes = length(clusters_scwgcna$module.genes[[i]]))
320 |     return(out)
321 |   })
322 |   clusters = do.call(rbind , clusters)
323 |   # add colors
324 |   genes_w_colors = clusters_scwgcna$dynamicCols
325 |   genes_w_colors = data.frame(gene = names(genes_w_colors) , cluster_color = genes_w_colors)
326 |   clusters = merge(clusters , genes_w_colors)
327 |   
328 |   return(clusters)
329 | }
330 | 
331 | 
332 | # for this vignette, for simplicity we will focus on genes that are DE in at least 4 neighbourhoods
333 | modules_wgcna = suppressMessages(get_wgcna_modules(convert_de_stat(de_stat) , n_hoods_sig.thresh = 4))
334 | 
335 | 
336 | ```
337 | 
338 | 
339 | ## Gene module plots
340 | 
341 | We can visualise for which transcriptional regions different gene modules are relevant.
342 | 
343 | For each gene module, we can plot neighbourhood plot, in which neighbourhood colour corresponds to average logFC (across genes from the module) and neighbourhood size corresponds to fraction of genes from the module, that are DE in this neighbourhood.
344 | 
345 | ```{r sc-wgcna-plot-modules, fig.cap = "Co-reulated modules", fig.width=10, fig.height=8}
346 | 
347 | 
348 | plots = lapply(sort(unique(modules_wgcna$cluster)) , function(cluster){
349 |   p = plot_DE_gene_set(sce_milo, de_stat , genes = modules_wgcna$gene[modules_wgcna$cluster == cluster],
350 |                             layout = "UMAP" , size_range = c(0.5,3) ,
351 |                             node_stroke = 0.3, edge_width = c(0.2,0.5)) +
352 |     ggtitle(paste0("Module ", cluster, ", ", length(modules_wgcna$gene[modules_wgcna$cluster == cluster]) , " genes"))
353 |   return(p)
354 | })
355 | p = ggarrange(plotlist = plots)
356 | p
357 | 
358 | 
359 | ```
360 | 
361 | ## Break down by cell types
362 | 
363 | We can also quantify the relevance of each module for different cell types.
364 | 
365 | ```{r sc-wgcna-plot-modules-beeswarm, fig.width=10,fig.height=8 , fig.cap = "Co-regulated modules, break down by cell types."}
366 | 
367 | 
368 | # we first need to assign cell type label to each neighbourhood - we have calculated it previously
369 | # we will use data.frame format for de-stat; the conversion between SingleCellExperiment and data.frame formats can be done using `convert_de_stat`
370 | de_stat_df = convert_de_stat(de_stat)
371 | de_stat_df = merge(de_stat_df , nhood_stat_ct , by = c("Nhood" , "Nhood_center"))
372 | 
373 | 
374 | plots = lapply(sort(unique(modules_wgcna$cluster)) , function(cluster){
375 |   p = plot_beeswarm_gene_set(de_stat_df, 
376 |                              genes = modules_wgcna$gene[modules_wgcna$cluster == cluster], 
377 |                              nhoodGroup = "celltype.mapped") + 
378 |     ggtitle(paste0("Module ", cluster))
379 |   return(p)
380 | })
381 | p = ggarrange(plotlist = plots)
382 | p
383 | 
384 | 
385 | ```
386 | 
387 | 
388 | ## Visualising individual genes
389 | 
390 | We can also visualise DE results for individual genes, in which each neighbourhood is coloured by its logFC if significant. 
391 | 
392 | Below we show couple examples of DE patterns of genes from modules 2 and 4.
393 | 
394 | ## Module 2
395 | 
396 | ```{r de-single-genes-mod-2 , fig.width=10,fig.height=15 , fig.cap = "Genes from module 2."}
397 | 
398 | 
399 | set.seed(1020)
400 | module = 2
401 | n_genes = 6
402 | genes = sample(modules_wgcna$gene[modules_wgcna$cluster == module] , n_genes)
403 | 
404 | 
405 | plots = lapply(genes , function(gene){
406 |   p = plot_DE_single_gene(sce_milo, de_stat , gene = gene , layout = "UMAP" , set_na_to_0 = TRUE) + 
407 |     ggtitle(gene)
408 |   return(p)
409 | })
410 | p = ggarrange(plotlist = plots , ncol = 2, nrow = 3)
411 | p
412 | 
413 | 
414 | 
415 | ```
416 | 
417 | ## Module 4
418 | 
419 | ```{r de-single-genes-mod-4 , fig.width=10,fig.height=15 , fig.cap = "Genes from module 4."}
420 | 
421 | 
422 | set.seed(1020)
423 | module = 4
424 | n_genes = 6
425 | genes = sample(modules_wgcna$gene[modules_wgcna$cluster == module] , n_genes)
426 | 
427 | 
428 | plots = lapply(genes , function(gene){
429 |   p = plot_DE_single_gene(sce_milo, de_stat , gene = gene , layout = "UMAP" , set_na_to_0 = TRUE) + 
430 |     ggtitle(gene)
431 |   return(p)
432 | })
433 | p = ggarrange(plotlist = plots, ncol=2, nrow=3)
434 | p
435 | 
436 | 
437 | 
438 | ```
439 | 
440 | 
441 | # Session Info
442 | 
443 | ```{r sessinf}
444 | sessionInfo()
445 | ```
446 | 


--------------------------------------------------------------------------------