├── .DS_Store
├── ENSG_to_HGNC.rds
├── LICENSE.md
├── MASTER.R
├── README.md
├── SQUID
    ├── DESCRIPTION
    ├── LICENSE
    ├── NAMESPACE
    ├── R
    │   └── SQUID.R
    ├── README.md
    ├── SQUID.Rproj
    ├── SQUID_Toy_Example_Data.RData
    └── man
    │   └── SQUID.Rd
├── SURVIVAL
    └── Survival_Analyses.R
├── Toy_example
    ├── .DS_Store
    ├── P.rds
    ├── T.rds
    ├── markers_TMM_FC1.5_Seuratwilcox.rds
    ├── phenoDataC_clusters_after_regrouping.txt
    ├── phenoDataC_clusters_wo_regrouping.txt
    └── scC.rds
├── additional_data
    └── Denisenko_crosstable_bulk_sc_sn_14092021.csv
├── helper_functions.R
├── human_lengths.rda
├── schematic_figures
    ├── .DS_Store
    ├── Scheme1_Github.jpg
    ├── Scheme1_Github.pdf
    ├── Scheme2_Github.jpg
    └── Scheme2_Github.pdf
└── sessionInfo_macOS.txt


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/favilaco/deconv_matching_bulk_scnRNA/c353e025979a7a122078a324585d1980301bc5f7/.DS_Store


--------------------------------------------------------------------------------
/ENSG_to_HGNC.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/favilaco/deconv_matching_bulk_scnRNA/c353e025979a7a122078a324585d1980301bc5f7/ENSG_to_HGNC.rds


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # LICENSE
 2 | 
 3 | This repository is distributed under the BSD 3-clause license. 
 4 | 
 5 | **IMPORTANT: methods used and included in this repository are provided as separate packages. Their respective license conditions apply**
 6 | 
 7 | 
 8 | ```
 9 | Copyright 2022 Francisco Avila Cobos
10 | 
11 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
12 | 
13 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
14 | 
15 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
16 | 
17 | * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20 | ```


--------------------------------------------------------------------------------
/MASTER.R:
--------------------------------------------------------------------------------
  1 | suppressMessages(library(tidyverse))
  2 | suppressMessages(library(data.table))
  3 | suppressMessages(library(ggrastr))
  4 | suppressMessages(library(viridis))
  5 | suppressMessages(library(ggpointdensity))
  6 | suppressMessages(library(egg))
  7 | suppressMessages(library(Matrix))
  8 | suppressMessages(library(matrixStats))
  9 | suppressMessages(library(limma))
 10 | suppressMessages(library(ggplot2))
 11 | suppressMessages(library(RColorBrewer))
 12 | suppressMessages(library(ggrepel))
 13 | suppressMessages(library(ggpubr))
 14 | suppressMessages(library(monocle3))
 15 | suppressMessages(library(Seurat))
 16 | suppressMessages(library(foreach))
 17 | suppressMessages(library(R.utils))
 18 | 
 19 | args <- R.utils::commandArgs(trailingOnly=TRUE)
 20 | 
 21 | if(length(args) != 5){
 22 | 
 23 |   	print("Please check that all required parameters are indicated or are correct")
 24 |   	stop()
 25 | } 
 26 | 
 27 | set.seed(4)
 28 | options(digits = 3)
 29 | options(future.globals.maxSize = 2000 * 1024^2) #https://satijalab.org/seurat/archive/v3.0/future_vignette.html
 30 | 
 31 | ENSG_to_HGNC = readRDS("./ENSG_to_HGNC.rds")
 32 | 
 33 | source('./helper_functions.R')
 34 | 
 35 | ##############################################################################
 36 | ## Input files and parameters:
 37 | 
 38 | directory <- args[1] 
 39 | ref.normalization = args[2] # normalization choice for scC or C
 40 | mix.normalization = args[3] # normalization choice for T
 41 | method = args[4]
 42 | 
 43 | cell.type.info.present <- args[5] #if "yes", no need to apply unsupervised clustering  
 44 | 
 45 | ##############################################################################
 46 | ## Reading input into memory:
 47 | 
 48 | setwd(directory)
 49 | 
 50 | scC <- readRDS("./scC.rds") %>% as.matrix()
 51 | T = readRDS("./T.rds") %>% as.matrix()
 52 | phenoDataC = read.table("phenoDataC_clusters_wo_regrouping.txt", header = TRUE) # It must contain at least two columns: "cellID" & "SubjectName". "cellType" is optional (e.g. if cell.type.info.present = "no")
 53 | 
 54 | #Unless proportions are provided from IHC, it would be computed directly from scRNA-seq/snRNA-seq
 55 | if(!file.exists("./P.rds")){
 56 | 	P = NULL
 57 | } else {
 58 | 	P = readRDS("./P.rds")	
 59 | }
 60 | 
 61 | ##############################################################################
 62 | ## PRE-PROCESSING AND QUALITY CONTROL
 63 | 
 64 | # Filter to remove 1% of extreme values (top 0.5% and bottom 0.5%)
 65 | filterCells <- function(filterParam){
 66 |   cellsToRemove <- which(filterParam < quantile(filterParam, .005) | filterParam > quantile(filterParam, .995)) 
 67 |   cellsToRemove
 68 | }
 69 | 
 70 | libSizes <- colSums(scC)
 71 | gene_names <- rownames(scC)
 72 | 
 73 | if(length(grep("ENSG000|ENSMUSG000",rownames(scC))) > 100){
 74 | 
 75 |   mtID.Ensembl <- ENSG_to_HGNC$ENSG_ID[grep("^MT-|_MT-", ENSG_to_HGNC$HGNC, ignore.case = TRUE)]
 76 |   rbID.Ensembl <- ENSG_to_HGNC$ENSG_ID[grep("^RPL|^RPS|_RPL|_RPS", ENSG_to_HGNC$HGNC, ignore.case = TRUE)]
 77 |   mtID <- gene_names %in% mtID.Ensembl
 78 |   rbID <- gene_names %in% rbID.Ensembl
 79 | 
 80 | } else {
 81 | 
 82 |   mtID <- grepl("^MT-|_MT-", gene_names, ignore.case = TRUE)
 83 |   rbID <- grepl("^RPL|^RPS|_RPL|_RPS", gene_names, ignore.case = TRUE)
 84 | 
 85 | }
 86 | 
 87 | mtPercent <- colSums(scC[mtID, ])/libSizes
 88 | rbPercent <- colSums(scC[rbID, ])/libSizes
 89 | 
 90 | lapply(list(libSizes = libSizes, mtPercent = mtPercent, rbPercent = rbPercent), filterCells) %>% 
 91 |   unlist() %>% 
 92 |   unique() -> cellsToRemove
 93 | 
 94 | if(length(cellsToRemove) != 0){
 95 |   scC <- scC[,-cellsToRemove]
 96 |   phenoDataC <- phenoDataC[-cellsToRemove,]
 97 | }
 98 | 
 99 | # Keep only "detectable" genes: min(round(0.01 * ncol(scC), 10) cells (regardless of the group) have a read/UMI count different from 0
100 | keep <- which(Matrix::rowSums(scC > 0) >= min(round(0.01 * ncol(scC)), 10)) #otherwise it removes many genes from Ellis dataset
101 | scC = scC[keep,]
102 | 
103 | ##############################################################################
104 | ## Unsupervised clustering (Monocle3) if metadata information not provided
105 | 
106 | if(cell.type.info.present == "no"){
107 | 
108 | 	if(file.exists("phenoDataC_clusters_after_regrouping.txt")){
109 | 
110 | 		phenoDataC = read.table("phenoDataC_clusters_after_regrouping.txt", header = TRUE)
111 | 
112 | 	} else {
113 | 
114 | 		cds <- monocle3::new_cell_data_set(scC)
115 | 		cds <- monocle3::preprocess_cds(cds, num_dim = 100, norm_method = "log", method = "PCA", scaling = TRUE)
116 | 		cds <- monocle3::reduce_dimension(cds, max_components = 2, umap.metric = "cosine", umap.fast_sgd = FALSE, preprocess_method = 'PCA') 
117 | 		cds <- monocle3::cluster_cells(cds, k = 20, resolution = NULL, partition_qval = 0.05, num_iter = 1) #with resolution = NULL, the parameter is determined automatically
118 | 
119 | 		Monocle3_metadata <- data.frame(cellID = names(cds@clusters$UMAP$clusters), cellType = paste("cluster",  cds@clusters$UMAP$clusters, sep ="."))
120 | 		phenoDataC$clusterID <- Monocle3_metadata$cellType
121 | 
122 | 		m1 <- monocle3::plot_cells(cds, color_cells_by = "cluster", group_label_size = 4)
123 | 
124 | 		ggsave(paste("Monocle3_",basename(directory),".pdf",sep=""), gridExtra::grid.arrange(m1, nrow = 1), height = 6, width = 6, limitsize = FALSE)
125 | 		dev.off()
126 | 
127 | 		## Make C based on clusters from unsupervised clustering
128 | 		cellType <- phenoDataC$clusterID
129 | 		group = list()
130 | 
131 | 		for(i in unique(cellType)){ 
132 | 		  group[[i]] <- which(cellType %in% i)
133 | 		}
134 | 
135 | 		C = lapply(group,function(x) Matrix::rowMeans(scC[,x])) #C should be made with the mean (not sum) to agree with the way markers were selected
136 | 		C = do.call(cbind.data.frame, C)
137 | 
138 | 		###############
139 | 		# Regrouping if correlation > 0.95  
140 | 
141 | 		correlation.matrix = cor(C)
142 | 		annotation.regrouped <- regroup.cor(correlation.matrix, correlation.threshold = 0.95)
143 | 
144 | 		if(nrow(annotation.regrouped) > 0){
145 | 
146 | 			pdf(paste("Monocle3_cor_full_", basename(getwd()), "_log.ls.norm.pdf", sep = ""), width = max(7, ncol(C)/2), height = max(7, ncol(C)/2))
147 | 				pheatmap::pheatmap(correlation.matrix, annotation = annotation.regrouped, scale = "none", display_numbers = TRUE, number_format = "%.2f")
148 | 			dev.off()
149 | 
150 | 			counter = 1
151 | 			for(elem in unique(annotation.regrouped$new.group)){
152 | 				phenoDataC$clusterID[phenoDataC$clusterID %in% rownames(annotation.regrouped)[annotation.regrouped$new.group == elem]] <- paste("regrouped.cluster", counter, sep = ".")
153 | 				counter = counter + 1
154 | 			}
155 | 
156 | 			phenoDataC$cellType <- phenoDataC$clusterID
157 | 			# re-order phenoDataC in function of scC
158 | 			phenoDataC = phenoDataC[phenoDataC$cellID %in% colnames(scC),]
159 | 			phenoDataC <- phenoDataC[match(colnames(scC),phenoDataC$cellID),]
160 | 
161 | 		} 
162 | 
163 | 		write.table(phenoDataC, file = "phenoDataC_clusters_after_regrouping.txt", row.names = FALSE, col.names = TRUE, sep = "\t", quote = FALSE)
164 | 
165 | 		###############
166 | 		# re-making UMAP plots with new information:
167 | 
168 | 		cds@colData$new.cluster = phenoDataC$clusterID[match(cds@colData$cell, phenoDataC$cellID)]
169 | 
170 | 		m2 <- monocle3::plot_cells(cds, color_cells_by = "new.cluster", group_label_size = 4, show_trajectory_graph = FALSE)
171 | 
172 | 		ggsave(paste("MONOCLE.UMAP_REGROUPED_",basename(directory),".pdf",sep=""), gridExtra::grid.arrange(m2, nrow = 1), height = 6, width = 6, limitsize = FALSE)
173 | 		dev.off()
174 | 
175 | 	}
176 | 
177 | } else {
178 | 
179 | 	# re-order phenoDataC in function of scC
180 | 	phenoDataC = phenoDataC[phenoDataC$cellID %in% colnames(scC),]
181 | 	phenoDataC <- phenoDataC[match(colnames(scC),phenoDataC$cellID),]
182 | 
183 | }
184 | 
185 | ## Ensure that phenoDataC & scC have the same (& ordered) cells [for cases where users will provide a foreign "phenoDataC_clusters_after_regrouping.txt" as input]:
186 | common.cells = intersect(phenoDataC$cellID,colnames(scC))
187 | scC = scC[, common.cells]
188 | phenoDataC = phenoDataC[match(common.cells, phenoDataC$cellID),]
189 | 
190 | # Create matrix C (made with the mean expression values for all cells across cell types)
191 | cellType <- phenoDataC$cellType
192 | group = list()
193 | 
194 | for(i in unique(cellType)){ 
195 |   group[[i]] <- which(cellType %in% i)
196 | }
197 | 
198 | # Generating C (raw) from the mean (raw) expression values across all cells from each cell type
199 | C = lapply(group,function(x) Matrix::rowMeans(scC[,x])) 
200 | C = do.call(cbind.data.frame, C) %>% as.matrix() 
201 | 
202 | ##############################################################################
203 | ## Remove row duplicates
204 | 
205 | C = collapsing(C, rownames(C))
206 | scC = collapsing(scC, rownames(scC))
207 | scC.raw = scC # needed to run marker selection + deconvolution
208 | T = collapsing(T, rownames(T))
209 | 
210 | ##############################################################################
211 | ## Single-cell and bulk normalization 
212 | 
213 | C <- Scaling(matrix = C, option = ref.normalization)
214 | scC <- Scaling(matrix = scC.raw, option = ref.normalization, phenoDataC = phenoDataC)
215 | 
216 | T <- Scaling(matrix = T, option = mix.normalization)
217 | 
218 | ##############################################################################
219 | ## Seurat - FindAllMarkers (Wilcoxon) on TMM normalized scRNA-seq/snRNA-seq 
220 | # MuSiC & DWLS: without markers
221 | 
222 | if(!method %in% c("MuSiC", "DWLS", "SQUID")){
223 | 
224 | 	if(! file.exists("./markers_TMM_FC1.5_Seuratwilcox.rds")){
225 | 
226 | 		scC <- Scaling(matrix = scC.raw, option = "TMM", phenoDataC = phenoDataC)
227 | 
228 | 		scC <- Seurat::CreateSeuratObject(counts = scC)
229 | 		scC$groups <- as.character(phenoDataC$clusterID)
230 | 		Idents(object = scC) <- as.character(phenoDataC$clusterID) #Needed. See: https://github.com/satijalab/seurat/issues/1482
231 | 
232 | 		markers <- Seurat::FindAllMarkers(scC, 
233 | 		                                 logfc.threshold = log2(1.5), 
234 | 		                                 grouping.var = "groups",
235 | 		                                 test.use = "wilcox") 
236 | 
237 | 		markers <- markers[,c("gene","avg_log2FC","cluster")]
238 | 		markers$AveExpr <- Inf
239 | 		colnames(markers) <- c("gene","log2FC","cell_type","AveExpr")
240 | 		markers$HGNC <- ENSG_to_HGNC$HGNC[match(markers$gene, ENSG_to_HGNC$ENSG_ID)]
241 | 		markers$ENSG <- ENSG_to_HGNC$ENSG[match(markers$gene, ENSG_to_HGNC$HGNC)]
242 | 		if(sum(is.na(markers$HGNC)) == nrow(markers)){markers$HGNC <- NULL; markers$HGNC <- rownames(markers)}
243 | 		if(sum(is.na(markers$ENSG)) == nrow(markers)){markers$ENSG <- NULL; markers$ENSG <- rownames(markers)}
244 | 
245 | 		#if markers$gene is both ENSG and HGNC, split:
246 | 		if(length(grep("--",markers$gene)) == nrow(markers)){ markers$gene <- strsplit(markers$gene, "--") %>% lapply(., function(x) x[1]) %>% unlist()}
247 | 		if(length(grep("__",markers$gene)) == nrow(markers)){ markers$gene <- strsplit(markers$gene, "__") %>% lapply(., function(x) x[1]) %>% unlist()}
248 | 
249 | 		## if needed, put markers$gene in same gene format as rownames C & T:
250 | 		if(length(intersect(markers$gene, rownames(C))) == 0){
251 | 
252 | 		    if(length(grep("ENSG000|ENSMUSG000",markers$gene)) > 100){
253 | 
254 | 		        markers.gene <- ENSG_to_HGNC$HGNC[match(markers$gene, ENSG_to_HGNC$ENSG_ID)] 
255 | 
256 | 		    } else {
257 | 
258 | 		        markers.gene <- ENSG_to_HGNC$ENSG_ID[match(markers$gene, ENSG_to_HGNC$HGNC)] 
259 | 
260 | 		    }
261 | 
262 | 		    markers$gene <- markers.gene
263 | 
264 | 		}
265 | 
266 | 		no.dups <- which(table(markers$gene)[markers$gene] == 1) %>% as.numeric()
267 | 		dups <- which(table(markers$gene)[markers$gene] > 1) #keeps ALL occurrences in original table + original ordering
268 | 		dups <- sapply(unique(names(dups)), function(x) grep(x, markers$gene)[1]) %>% as.numeric() #keeps ONLY FIRST occurrence of duplicates (in original table) + original ordering
269 | 		markers <- markers[c(no.dups, dups),]
270 | 
271 | 		saveRDS(markers, "./markers_TMM_FC1.5_Seuratwilcox.rds")
272 | 
273 | 	} else {
274 | 
275 | 		markers = readRDS("./markers_TMM_FC1.5_Seuratwilcox.rds")
276 | 
277 | 	}
278 | 
279 | } else { 
280 | 
281 |     markers = ""
282 | 
283 | }
284 | 
285 | ##############################################################################
286 | ## Deconvolution
287 | 
288 | STRING <- paste(basename(directory), method, ref.normalization, mix.normalization, sep = "_")
289 | 
290 | output.filename <- paste(STRING, ".rds", sep = "")
291 | output_folder <- paste(directory, "OUTPUT_DECONVOLUTION/", sep = "/")
292 | ifelse(!dir.exists(output_folder), dir.create(output_folder), FALSE)
293 | output_name <- paste(output_folder, "RESULTS_", output.filename, sep = "")
294 | 
295 | if(is.null(P)){
296 | 
297 | 	P <- table(phenoDataC$SubjectName,phenoDataC$cellType) %>% t() %>% as.data.table() %>% data.table::dcast.data.table(., V1 ~ V2, fill = "N")
298 | 	P <- data.frame(P, check.names = FALSE) 
299 | 	rownames(P) <- P$V1
300 | 	P$V1 <- NULL
301 | 	P <- P[,colnames(P) %in% colnames(T)]
302 | 	P <- apply(P, 2, function(x) x / sum(x))
303 | 
304 | }
305 | 
306 | ## Unify row names between T and C (after normalization!)
307 | perc.overlap <- (length(intersect(rownames(C),rownames(T))) * 100)/min(nrow(T),nrow(C))
308 | if(perc.overlap < 33.3){
309 | 
310 |     if(length(grep("ENSG000|ENSMUSG000",rownames(T))) > 100){
311 | 
312 |         rownames.T <- ENSG_to_HGNC$HGNC[match(rownames(T), ENSG_to_HGNC$ENSG_ID)] 
313 |         T = T[!is.na(rownames.T),]
314 |         rownames.T = rownames.T[!is.na(rownames.T)]
315 | 
316 |     } else {
317 | 
318 |         rownames.T <- ENSG_to_HGNC$ENSG_ID[match(rownames(T), ENSG_to_HGNC$HGNC)] 
319 |         T = T[!is.na(rownames.T),]
320 |         rownames.T = rownames.T[!is.na(rownames.T)]
321 | 
322 |     }
323 | 
324 |     T <- collapsing(T, rownames.T)
325 | 
326 | }
327 | 
328 | # To avoid issues with sample names that are fully integers
329 | if(length(grep("[A-Z|a-z]", colnames(T))) == 0){colnames(T) <- paste("X",colnames(T),sep="")}
330 | if(length(grep("[A-Z|a-z]", colnames(P))) == 0){colnames(P) <- paste("X",colnames(P),sep="")}
331 | 
332 | ## Remove NA values (if any) after row.name interconversion
333 | if(sum(is.na(rownames(T))) != 0){T = T[!is.na(rownames(T)), ]}
334 | 
335 | P = P[,gtools::mixedsort(colnames(P))]
336 | T = T[,gtools::mixedsort(colnames(T))]
337 | 
338 | #To avoid MAST-DWLS to be repeated!
339 | if(method == "DWLS"){STRING <- paste("DWLS", ref.normalization, sep = "_")} 
340 | 
341 | if(method %in% c("CIBERSORT", "RLR", "FARDEEP", "nnls")){
342 | 
343 | 	RESULTS = Deconvolution(T = T, 
344 | 	                        C = C, 
345 | 	                        method = method,
346 | 	                        phenoDataC = phenoDataC,
347 | 	                        P = P, 
348 | 				STRING = STRING,
349 | 	                        marker_distrib = markers, 
350 | 	                        refProfiles.var = NULL)
351 | 	
352 | } else {
353 | 
354 | 	RESULTS = Deconvolution(T = T, 
355 |                         C = scC, 
356 |                         method = method,
357 |                         phenoDataC = phenoDataC,
358 |                         P = P, 
359 | 			STRING = STRING,
360 |                         marker_distrib = markers, 
361 |                         refProfiles.var = NULL)
362 | 
363 | }
364 | 
365 | RESULTS$dataset <- basename(directory)
366 | RESULTS$method <- method
367 | RESULTS$ref.normalization <- ref.normalization
368 | RESULTS$mix.normalization <- mix.normalization
369 | 
370 | ann_text.global = RESULTS %>% dplyr::summarise(RMSE = sqrt(mean((observed_fraction - expected_fraction)^2)) %>% round(.,3), 
371 | 					       Pearson = cor(observed_fraction, expected_fraction) %>% round(.,3))
372 | 
373 | label = paste("Pearson = ", ann_text.global$Pearson, "\n RMSE = ", ann_text.global$RMSE, sep = "")
374 | 
375 | scaleFUN <- function(x) sprintf("%.3f", x)
376 | 
377 | P <- ggplot(RESULTS, aes(x = observed_fraction , y = expected_fraction, colour = cell_type)) +
378 |       geom_point() +
379 |       geom_abline(intercept = 0, slope = 1, lty = 2, col = "blue") +
380 |       facet_grid( ~ method) +
381 |       geom_text(vjust = "inward", hjust = "inward", data = ann_text.global, aes(x = 0.02, y = 1.04, label = label), size = 4, inherit.aes = FALSE)+
382 |       ggtitle("all_normalizations_per_cell") +
383 |       theme_bw() +
384 |       xlim(0,1) +
385 |       ylim(0,1.05) +
386 |       xlab("observed fraction") +
387 |       ylab("expected fraction") +
388 |       ggtitle(STRING) +
389 |       theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
390 |            legend.position = "bottom")
391 | 
392 | ggsave(P, filename = paste(output_folder, paste(STRING, ".pdf", sep = ""), sep = "/"), width = 6, height = 6)
393 | saveRDS(RESULTS, output_name)
394 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Source code (R statistical programming language, v4.0.1) to reproduce the results described in the article:
  2 | 
  3 | > <em> Francisco Avila Cobos, Mohammad Javad Najaf Panah, Jessica Epps, Xiaochen Long, Tsz-Kwong Man, Hua-Sheng Chiu, Elad Chomsky, Evgeny Kiner, Michael J Krueger, Diego di Bernardo, Luis Voloch, Jan Molenaar, Sander R. van Hooff, Frank Westermann, Selina Jansky, Michele L. Redell, Pieter Mestdagh, Pavel Sumazin</em> **Effective methods for bulk RNA-Seq deconvolution using scnRNA-Seq transcriptomes** *(bioRxiv; https://www.biorxiv.org/content/10.1101/2022.12.13.520241v2)*
  4 | 
  5 | SQUID as standalone R package
  6 | ========
  7 | Please go here to detailed instructions for its installation:
  8 | > https://github.com/favilaco/deconv_matching_bulk_scnRNA/tree/master/SQUID
  9 | 
 10 | DATASETS
 11 | ========
 12 | Here we provide an **example folder** (named "Toy_example"; see *"Folder requirements" & "Running the deconvolution"*) that can be directly used to test the framework. It contains an artificial single-cell RNA-seq dataset made of 5 artificial cell types where two of them are highly correlated (cell type 1 and 5); 1000 cells per cell type and 64 genes (of which 16 are marker genes, 4 per cell type and in different ranges of expression).
 13 | 
 14 | The **other external datasets used in the manuscript** (together with the necessary metadata) can be downloaded from their respective sources:
 15 | 
 16 | * Cell_mixtures: GEO GSE220608
 17 | * Kidney: GEO GSE141115 (https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE141115&format=file) ; additional sample metadata information in "additional_data/Denisenko_crosstable_bulk_sc_sn_14092021".
 18 | * AML: GEO GSE220608
 19 | * Breast_cancer: GEO GSE176078 ("GSE176078_RAW.tar" from "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE176078&format=file")
 20 | * NB_1: EGAS00001004349; 
 21 | * NB_2: EGAS00001006723; EGAS00001006823; GEO GSE218450
 22 | * Synapse: syn8691134; syn23554292; syn23554293; syn23554294
 23 | * Brain: GEO GSE67835 ("GSE67835_RAW.tar" from https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE67835&format=file) & IHC proportions (re-scaled to sum-to-one) present here: https://github.com/ellispatrick/CortexCellDeconv/tree/master/CellTypeDeconvAnalysis/Data [IHC.astro.txt, IHC.endo.txt, IHC.microglia.txt, IHC.neuro.txt; IHC.oligo.txt]
 24 | 
 25 | 
 26 | The following line is needed for fresh installations of Linux (Debian):
 27 | `sudo apt-get install curl libcurl4-openssl-dev libssl-dev zlib1g-dev r-base-dev libxml2-dev`
 28 | 
 29 | 
 30 | R 4.0.1: REQUIRED PACKAGES AND PACKAGE DEPENDENCIES:
 31 | ===================================================
 32 | Packages to be installed (alphabetically ordered) before running any deconvolution (**R >= 4.0.1**):
 33 | ```
 34 | AnnotationDbi
 35 | Biobase
 36 | CIBERSORT
 37 | data.table
 38 | devtools
 39 | doMC
 40 | dplyr
 41 | DWLS
 42 | edgeR
 43 | egg
 44 | foreach
 45 | FARDEEP
 46 | ggplot2
 47 | ggpointdensity
 48 | ggpubr
 49 | ggrastr
 50 | ggrepel
 51 | gridExtra
 52 | gtools
 53 | limma
 54 | MASS
 55 | Matrix
 56 | matrixStats
 57 | monocle3
 58 | MuSiC
 59 | nnls
 60 | org.Hs.eg.db
 61 | org.Mm.eg.db
 62 | parallel
 63 | pheatmap
 64 | RColorBrewer
 65 | R.utils
 66 | scater
 67 | scran
 68 | sctransform
 69 | Seurat
 70 | SingleCellExperiment
 71 | tidyr
 72 | tidyverse
 73 | viridis
 74 | ```
 75 | 
 76 | 
 77 | References to other methods included in our benchmark:
 78 | ======================================================
 79 | While our work has a **BSD (3-clause)** license, you **may need** to obtain a license to use the individual normalization/deconvolution methods (e.g. CIBERSORT. The source code for CIBERSORT needs to be asked to the authors at https://cibersort.stanford.edu).
 80 | 
 81 | | method | ref |
 82 | |--------|----------|
 83 | | nnls | Mullen, K. M. & van Stokkum, I. H. M. nnls: The Lawson-Hanson algorithm for non-negative least squares (NNLS). R package version 1.4. https://CRAN.R-project.org/package=nnls |
 84 | | FARDEEP | Hao, Y., Yan, M., Lei, Y. L. & Xie, Y. Fast and Robust Deconvolution of Tumor Infiltrating Lymphocyte from Expression Profiles using Least Trimmed Squares. bioRxiv 358366 (2018) doi:10.1101/358366 |
 85 | | MASS: Robust linear regression (RLR) | Ripley, B. et al. MASS: Support Functions and Datasets for Venables and Ripley’s MASS. (2002) |
 86 | | CIBERSORT | Newman, A. M. et al. Robust enumeration of cell subsets from tissue expression profiles. Nat. Methods 12, 453–457 (2015) |
 87 | |--------|----------|
 88 | | DWLS | Tsoucas, D. et al. Accurate estimation of cell-type composition from gene expression data. Nat. Commun. 10, 1–9 (2019) |
 89 | | Bisque | Jew, B. et al. Accurate estimation of cell composition in bulk expression through robust integration of single-cell information. Nat. Commun. 11, 1971  (2020) |
 90 | | MuSiC | Wang, X., Park, J., Susztak, K., Zhang, N. R. & Li, M. Bulk tissue cell type deconvolution with multi-subject single-cell expression reference. Nat. Commun. 10, 380 (2019) |
 91 | |--------|----------|
 92 | | SCTransform / regularized negative binomial regression (RNBR) | Hafemeister, C. & Satija, R. Normalization and variance stabilization of single-cell RNA-seq data using regularized negative binomial regression. Genome Biology (2019) doi:10.1186/s13059-019-1874-1 |
 93 | |scran | L. Lun, A. T., Bach, K. & Marioni, J. C. Pooling across cells to normalize single-cell RNA sequencing data with many zero counts. Genome Biol. 17, 75 (2016) |
 94 | | scater | McCarthy, D. J., Campbell, K. R., Lun, A. T. L. & Wills, Q. F. Scater: pre-processing, quality control, normalization and visualization of single-cell RNA-seq data in R. Bioinformatics 33, 1179–1186 (2017) |
 95 | | Trimmed mean of M-values (TMM) | Robinson, M. D. & Oshlack, A. A scaling normalization method for differential expression analysis of RNA-seq data. Genome Biol. 11, R25 (2010) |
 96 | | Transcripts per million (TPM) | Li, B., Ruotti, V., Stewart, R. M., Thomson, J. A. & Dewey, C. N. RNA-Seq gene expression estimation with read mapping uncertainty. Bioinformatics 26, 493–500 (2010) |
 97 | | LogNormalize | LogNormalize function (part of "Seurat"). R Documentation. https://www.rdocumentation.org/packages/Seurat/versions/3.1.1/topics/LogNormalize ; Butler, A., Hoffman, P., Smibert, P. et al. Integrating single-cell transcriptomic data across different conditions, technologies, and species. Nat Biotechnol 36, 411–420 (2018) doi:10.1038/nbt.4096 |
 98 | 
 99 | 
100 | FOLDER REQUIREMENTS
101 | ===================
102 | 
103 | a) Folder structure:
104 | 
105 | **NOTE**: The gold standard matrix of proportions ("P") is either present or obtained as the sum of individual cells across each cell type.
106 | 
107 | ```
108 | .
109 | ├── AML
110 | │   ├── scC.rds
111 | │   ├── T.rds
112 | │   ├── phenoDataC_clusters_wo_regrouping.txt
113 | ├── Brain
114 | │   ├── scC.rds
115 | │   ├── T.rds
116 | │   ├── P.rds #from IHC
117 | │   ├── phenoDataC_clusters_wo_regrouping.txt
118 | ├── Toy_example
119 | │   ├── scC.rds
120 | │   ├── T.rds
121 | │   ├── P.rds
122 | │   ├── phenoDataC_clusters_wo_regrouping.txt
123 | │   └── markers_TMM_FC1.5_Seuratwilcox.rds #(if not present will be created)
124 | ... 
125 | │
126 | ├── helper_functions.R
127 | ├── MASTER.R
128 | └── CIBERSORT.R
129 | ```
130 | 
131 | b) Minimally the following (tab-separated) columns being part of the metadata: "cellID", "cellType", "SubjectName". 
132 | 
133 | ```
134 | # For the "example" dataset, it should look like:
135 | 
136 | cellID		cellType	SubjectName
137 | cell_1	 cell_type_1		  Mix_1
138 | cell_2	 cell_type_1		  Mix_2
139 | ...
140 | ```
141 | 
142 | 
143 | RUNNING THE DECONVOLUTION
144 | =========================
145 | The deconvolution problem is formulated as: T = C·P [see https://doi.org/10.1093/bioinformatics/bty019 and https://doi.org/10.1038/s41467-020-19015-1 for detailed information]
146 | 
147 | Make the following choices (in order):
148 | 
149 | ```
150 | 	i) specific dataset [from: "Toy_example","Cell_mixtures", "Kidney", "AML", "Breast_cancer", "NB_1", "NB_2", "Synapse", "Brain"]
151 | 	ii) normalization strategy for the reference matrix [bulk (C): "none", "LogNormalize", "TMM", "TPM"] ; [single-cell: "none", "LogNormalize", "TMM", "TPM", "SCTransform", "scran", "scater"]
152 | 	iii) normalization strategy for the mixture matrix (T)["none", "LogNormalize", "TMM", "TPM"]
153 | 	iv) deconvolution method [from: "CIBERSORT", "nnls", "FARDEEP", "RLR", "MuSiC", "MuSiC_with_markers", "Bisque", "Bisque_with_markers", "DWLS", "SQUID"]
154 | 	v) indicate whether cell-type labels are included ("yes") or whether they should be obtained by unsupervised clustering ("no")
155 | ```
156 | 
157 | **NOTE: To systematically test the benefit of bulk transformation and deconvolution with SQUID, a leave-one-out cross-validation strategy can also be used: iteratively, concurrent RNA-Seq and scnRNA-Seq profiles of all but one of the samples were used to predict the composition of the remaining sample based on its bulk RNA-Seq profile. By default, all available data is used at once, but this approach can be tried out by using "leave.one.out = TRUE" as a parameter of the function "transformation2" that is used inside SQUID (line 507 inside "helper_functions.R")**
158 | 
159 | 
160 | R example call
161 | ===============
162 | 
163 | **MINIMUM REQUIREMENTS**: matching scnRNA-seq **("scC.rds")** and bulk RNA-seq **("T.rds")** samples together with the correspoinding metadata **("phenoDataC_clusters_wo_regrouping.txt")**. 
164 | 
165 | **OPTIONAL**: if users have gold standard proportion estimates from IHC or other techniques, they can include such information as **"P.rds"** (this will avoid re-computing the proportions directly from the scnRNA-seq data). If users wish to provide their own re-clustering or set of cell type labels, they can include this as a file named **"phenoDataC_clusters_after_regrouping.txt"** (see Toy_example/phenoDataC_clusters_after_regrouping.txt for an example).
166 | 
167 | **NOTE**: Each single-cell RNA-seq dataset ("scC.rds") and mixture ("T.rds") should be **RAW integer MATRICES** containing genomic features as rows.
168 | 
169 | 
170 | ```
171 | 
172 | Rscript MASTER.R ~/Downloads/deconv_matching_bulk_scnRNA/Toy_example TPM TPM nnls no
173 | 
174 | ```
175 | 
176 | In the "example" (toy) dataset cell types 1 and 5 were found to be very highly correlated and were first collapsed into a unique cluster (relabelled as "regrouped.cluster.1"):
177 | 
178 | ![plot](./schematic_figures/Scheme1_Github.jpg)
179 | 
180 | Next, the deconvolution was run and the output was generated:
181 | 
182 | ![plot](./schematic_figures/Scheme2_Github.jpg)
183 | 
184 | 
185 | Survival analysis
186 | =================
187 | Please run script(s) inside folder "SURVIVAL"
188 | 


--------------------------------------------------------------------------------
/SQUID/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: SQUID
 2 | Type: Package
 3 | Title: Single-cell RNA Quantity Informed Deconvolution (SQUID)
 4 | Version: 0.1.0
 5 | Year: 2023
 6 | Author: Francisco Avila Cobos, Mohammad Javad Najaf Panah, ..., Pieter Mestdagh, Pavel Sumazin
 7 | Maintainer: Francisco Avila Cobos <favil90@gmail.com> and Mohammad Javad Najaf Panah <mohammadjavad.najafpanah@bcm.edu>
 8 | Description: SQUID executes a combination of RNA-Seq transformation and dampened weighted least-squares deconvolution approaches in predicting the composition of 
 9 |   cell mixtures and tissue samples based on concurrent RNA-Seq and scnRNA-Seq profiles. To systematically test the benefit of bulk transformation and deconvolution 
10 |   with SQUID, a leave-one-out cross-validation strategy can also be used; iteratively, concurrent RNA-Seq and scnRNA-Seq profiles of all but one of the samples were 
11 |   used to predict the composition of the remaining sample based on its bulk RNA-Seq profile.
12 | Data: SQUID_Toy_Example_Data.RData
13 | License: MIT
14 | Encoding: UTF-8
15 | LazyData: true
16 | 


--------------------------------------------------------------------------------
/SQUID/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Mohammad Najafpanah
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/SQUID/NAMESPACE:
--------------------------------------------------------------------------------
1 | exportPattern("^[[:alpha:]]+")
2 | 


--------------------------------------------------------------------------------
/SQUID/R/SQUID.R:
--------------------------------------------------------------------------------
  1 | #' Single-cell RNA Quantity Informed Deconvolution (SQUID)!
  2 | #'
  3 | #' This is an executable R function named 'SQUID'
  4 | #' which analyse cell-type composition predictions based on concurrent RNA-Seq and scRNA-Seq profiles.
  5 | #'
  6 | #' You can learn more about SQUID compare to some other approaches at:
  7 | #'
  8 | #'   https://github.com/favilaco/deconv_matching_bulk_scnRNA
  9 | #'
 10 | #' Citation:
 11 | #' Avila Cobos, F., Najaf Panah, M. J., Epps, J., Long, X., Man, T. K., Chiu, H. S., ..., Mestdagh, P & Sumazin, P. (2022). Effective
 12 | #' methods for bulk RNA-Seq deconvolution using scnRNA-Seq transcriptomes. bioRxiv, 2022-12.
 13 | #'
 14 | #' GitHub: https://github.com/favilaco/deconv_matching_bulk_scnRNA
 15 | #'
 16 | #' Licence: MIT License
 17 | #'
 18 | #' Copyright (c) [2023] [Francisco Avila Cobos, Mohammad Javad Najaf Panah, Pieter Mestdagh, Pavel Sumazin]
 19 | #'
 20 | #'
 21 | #' Permission is hereby granted, free of charge, to any person obtaining a copy
 22 | #' of this software and associated documentation files (the "Software"), to deal
 23 | #' in the Software without restriction, including without limitation the rights
 24 | #' to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 25 | #' copies of the Software, and to permit persons to whom the Software is
 26 | #' furnished to do so, subject to the following conditions:
 27 | #'
 28 | #' The above copyright notice and this permission notice shall be included in
 29 | #' all copies or substantial portions of the Software.
 30 | #'
 31 | #' THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 32 | #' IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 33 | #' FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 34 | #' AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 35 | #' LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 36 | #' OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 37 | #' THE SOFTWARE.
 38 | #'
 39 | #'
 40 | #' For commercial use inquiries, please contact the corresponding authors [Pavel Sumazin at Pavel.Sumazin@bcm.edu; Pieter Mestdagh at pieter.mestdagh@ugent.be].
 41 | #'
 42 | #'
 43 | 
 44 | # Load the example dataset:
 45 | load("SQUID_Toy_Example_Data.RData", verbose = T)
 46 | 
 47 | #' Single-cell RNA Quantity Informed Deconvolution (SQUID).
 48 | #'
 49 | #' SQUID executes a combination of RNA-Seq transformation and dampened weighted least-squares deconvolution approaches in predicting the composition of
 50 | #' cell mixtures and tissue samples.
 51 | #'
 52 | #' SQUID is an improved deconvolution method based on concurrent RNA-Seq and scRNA-Seq profiles.
 53 | #'
 54 | #' NOTE: To systematically test the benefit of bulk transformation and deconvolution with SQUID, a leave-one-out cross-validation strategy can also be used:
 55 | #' iteratively, concurrent RNA-Seq and scnRNA-Seq profiles of all but one of the samples were used to predict the composition of the remaining sample based
 56 | #' on its bulk RNA-Seq profile.
 57 | #'
 58 | #' @param B           A bulk RNA-seq numeric matrix which could be either count or normalized values. Rows should be genes and columns should be sample.ids.
 59 | #' @param scC         A single-cell numeric matrix which could be either count or normalized values. Rows should be genes and columns should be cell.ids.
 60 | #' @param scMeta      A single-cell annotation data.frame which rows should be cell.ids and columns including cell.id, sample.id, cluster.id, cellType, etc.
 61 | #' @param pB          (Optional) A pseudo-bulk matrix generated based on cluster.id/cellType from single-cell analysis. Rows should be genes and columns should be cluster.id/cellType.
 62 | #' @param P           (Optional) A numeric matrix of expected cell fractions including the composition of cluster.id/cellTypes per samples can be calculated from scRNA-seq analysis. Rows should be unique cluster.ids/cellTypes and columns should be sample.ids.
 63 | #' @param LeaveOneOut A logical variable which allow users run SQUID with/without a leave-one-out cross validation strategy. Default is FALSE.
 64 | #'
 65 | #' @return            A table including cellType, sample.id, observed_fraction (predicted cluster/cell-type fraction), expected_fraction (P) for each bulk sample.
 66 | #'
 67 | #' @export RESULTS    Save the RESULTS.rds and RESULTS.csv files in your working directory.
 68 | #'
 69 | #' @usage             RESULTS <- SQUID(B=B, scC=scC , scMeta=scMeta, pB=NULL, P=NULL, LeaveOneOut=FALSE)
 70 | #'
 71 | #'
 72 | SQUID <- function(B=B, scC=scC , scMeta=scMeta, pB=NULL, P=NULL, LeaveOneOut=FALSE) {
 73 |   cat('
 74 | 
 75 | 
 76 | 
 77 |               ----------------------------------- Welcome To SQUID ---------------------------------------
 78 | 
 79 |                                   ########    ########    #      #    ########    #######
 80 |                                   #           #      #    #      #       ##       #      #
 81 |                                   #           #      #    #      #       ##       #       #
 82 |                                   ########    #  ##  #    #      #       ##       #        #
 83 |                                          #    #    # #    #      #       ##       #       #
 84 |                                          #    #     ####  #      #       ##       #      #
 85 |                                   ########    ########    ########    ########    #######
 86 | 
 87 | 
 88 |                Execute an Effective method for bulk RNA-seq deconvolution using scnRNA-seq transcriptomes
 89 | 
 90 |               --------------------------------------------------------------------------------------------
 91 | 
 92 | 
 93 | 
 94 | ')
 95 |   cat('SQUID just began the analysis ... \n\n')
 96 | 
 97 |   # Make pseudo-Bulk (pB) based on clusters from single-cell analysis, if not provided
 98 |   if(is.null(pB)){
 99 |     cellType <- scMeta$cellType # could be 'scMeta$cluster.id' as well.
100 |     group = list()
101 |     for(i in unique(cellType)){
102 |       group[[i]] <- which(cellType %in% i)
103 |     }
104 |     pB = lapply(group,function(x) base::rowMeans(scC[,x, drop = FALSE]))
105 |     pB = do.call(cbind.data.frame, pB)
106 |   } else {
107 |     pB=pB
108 |   }
109 | 
110 |   # Make proportional table (P), if not provided
111 |   if(is.null(P)){
112 |     P <- table(scMeta$sample.id, scMeta$cellType) %>% t()
113 |     P <- P[,gtools::mixedsort(colnames(P))]
114 |     P <- P[,colnames(P) %in% colnames(B)]
115 |     P <- apply(P, 2, function(x) x / sum(x))
116 |   } else {
117 |     P=P
118 |   }
119 | 
120 |   # Unify row names between B and pB
121 |   perc.overlap <- (length(intersect(rownames(pB),rownames(B))) * 60)/min(nrow(B),nrow(pB))
122 |   if(perc.overlap < 33.3){
123 |     if(length(grep("ENSG000|ENSMUSG000",rownames(B))) > 100){
124 |       rownames.B <- ENSG_to_HGNC$HGNC[match(rownames(B), ENSG_to_HGNC$ENSG_ID)]
125 |       B = B[!is.na(rownames.B),]
126 |       rownames.B = rownames.B[!is.na(rownames.B)]
127 |     } else {
128 |       rownames.B <- ENSG_to_HGNC$ENSG_ID[match(rownames(B), ENSG_to_HGNC$HGNC)]
129 |       B = B[!is.na(rownames.B),]
130 |       rownames.B = rownames.B[!is.na(rownames.B)]
131 |     }
132 |     B <- collapsing(B, rownames.B)
133 |   }
134 | 
135 |   # To avoid issues with sample.ids that are fully integers
136 |   if(length(grep("[A-Z|a-z]", colnames(B))) == 0){colnames(B) <- paste("X",colnames(B),sep="")}
137 |   if(length(grep("[A-Z|a-z]", colnames(P))) == 0){colnames(P) <- paste("X",colnames(P),sep="")}
138 | 
139 |   # Remove NA values (if any) after row.name interconversion
140 |   if(sum(is.na(rownames(B))) != 0){B = B[!is.na(rownames(B)), ]}
141 | 
142 |   # Transform the bulk data
143 |   if(LeaveOneOut) {
144 |     cat("SQUID is transforming the Bulk samples using Leave-One-Out cross validation method ... ")
145 |   } else {
146 |     cat("SQUID is transforming the Bulk samples using all informations ... ")
147 |   }
148 | 
149 |   Z = as.matrix(pB)
150 |   Y = Z %*% P
151 |   B.new = transformation2(X = B, Y = Y, leave.one.out = LeaveOneOut)
152 |   B.new[!is.finite(B.new)] <- 0
153 | 
154 |   cat(' DONE!\n\n')
155 | 
156 |   # take common genes
157 |   Genes <- intersect(rownames(Z),rownames(B.new))
158 |   B.new <- as.matrix(B.new[Genes,])
159 |   Z <- as.matrix(Z[Genes,])
160 | 
161 |   # solve Dampened WLS
162 |   cat('SQUID is solving the dampened weighted least squares model for Bulk samples ... \n\n')
163 |   RESULTS <- apply(B.new, 2, function(x){
164 |     b = setNames(x, rownames(B.new))
165 |     tr <- trimData(Z, b)
166 |     RES <- t(solveDampenedWLS(S = Z, B = x))
167 |   })
168 | 
169 |   rownames(RESULTS) <- colnames(Z)
170 |   RESULTS = apply(RESULTS,2,function(x) ifelse(x < 0, 0, x)) # explicit non-negativity constraint
171 |   RESULTS = apply(RESULTS,2,function(x) x/sum(x)) # explicit STO constraint
172 |   RESULTS = RESULTS[gtools::mixedsort(rownames(RESULTS)), , drop = FALSE]
173 |   RESULTS = suppressMessages(reshape2::melt(RESULTS))
174 |   colnames(RESULTS) <- c("cellType","sample.id","observed_fraction")
175 |   RESULTS$cellType = as.character(RESULTS$cellType)
176 |   RESULTS$sample.id = as.character(RESULTS$sample.id)
177 |   if(!is.null(P)){
178 |     P = P[gtools::mixedsort(rownames(P)),,drop = FALSE] %>% data.frame(., check.names = FALSE)
179 |     P$cellType = rownames(P)
180 |     P = suppressMessages(reshape2::melt(P))
181 |     colnames(P) <- c("cellType","sample.id","expected_fraction")
182 |     P$cellType = as.character(P$cellType)
183 |     P$sample.id = as.character(P$sample.id)
184 |     RESULTS = merge(RESULTS, P, by = c("cellType", "sample.id"), all = TRUE)
185 |     RESULTS[is.na(RESULTS)] <- 0
186 |     RESULTS$expected_fraction <- round(RESULTS$expected_fraction, 3)
187 |     RESULTS$observed_fraction <- round(RESULTS$observed_fraction, 3)
188 |   }
189 |   saveRDS(RESULTS, "RESULTS.rds")
190 |   write.csv(RESULTS, "RESULTS.csv")
191 |   cat('\nSQUID has saved the results table in .rds and .csv formats.\n\n')
192 |   cat('Congrats! SQUID has finished the analyses!\n')
193 |   return(RESULTS)
194 | } # The End
195 | 


--------------------------------------------------------------------------------
/SQUID/README.md:
--------------------------------------------------------------------------------
 1 | # SQUID: Single-cell RNA Quantity Informed Deconvolution
 2 | 
 3 | # Overview
 4 | SQUID is an R package for conducting (tumor) deconvolution analyses. SQUID executes a combination of RNA-Seq transformation and dampened weighted least-squares deconvolution to predict the composition of cell mixtures and tissue samples based on the concurrent RNA-Seq and scnRNA-Seq profiles. SQUID harnesses the power of concurrent RNA-Seq and scnRNA-Seq profiling, outperforming other methods in predicting the composition of cell mixtures and tissue samples.
 5 | 
 6 | # Installation
 7 | First load the requirements: 
 8 | ```r
 9 | library(devtools)
10 | devtools::source_url("https://github.com/favilaco/deconv_matching_bulk_scnRNA/blob/master/helper_functions.R?raw=TRUE")
11 | library(dplyr)
12 | ```
13 | Now, you can install the latest version of SQUID from GitHub using the devtools package:
14 | ```r
15 | install_github("favilaco/deconv_matching_bulk_scnRNA/SQUID")
16 | ```
17 | # Usage
18 | To use SQUID, load the package into your R session using the library function:
19 | ```r
20 | library(SQUID)
21 | ```
22 | You can then use the provided SQUID function to conduct the experimantal analyses. You need to provide the required inputs as described in the package documentation in details. SQUID package contains a toy example dataset including bulk and single-cell RNA-seq simulated count data which you can use to run the program and check how it works: 
23 | ```r
24 | RESULTS <- SQUID(B = B, scC = scC , scMeta = scMeta, pB = NULL, P = NULL, LeaveOneOut = FALSE)
25 | 
26 | #' @param B           A bulk RNA-seq numeric matrix which could be either count or normalized values. Rows should be genes and columns should be sample.ids.
27 | #' @param scC         A single-cell numeric matrix which could be either count or normalized values. Rows should be genes and columns should be cell.ids.
28 | #' @param scMeta      A single-cell annotation data.frame which rows should be cell.ids and columns including cell.id, sample.id, cluster.id, cellType, etc.
29 | #' @param pB          (Optional) A pseudo-bulk matrix generated based on cluster.id/cellType from single-cell analysis. Rows should be genes and columns should be cluster.id/cellType.
30 | #' @param P           (Optional) A numeric matrix of expected cell fractions including the composition of cluster.id/cellTypes per samples can be calculated from scRNA-seq analysis. Rows should be unique cluster.ids/cellTypes and columns should be sample.ids.
31 | #' @param LeaveOneOut A logical variable which allow users run SQUID with/without a leave-one-out cross validation strategy. Default is FALSE.
32 | #'
33 | ```
34 | Notice: To improve the performance of SQUID prediction, you may need to set up your research-specific pipeline for the quality contol, normalization, imputation, and single-cell clustering procedures on your datasets prior to run SQUID. You are welcome to look at our preprint [paper](https://www.biorxiv.org/content/10.1101/2022.12.13.520241v2) as an example.
35 | 
36 | # License
37 | This package is licensed under the MIT license. See the LICENSE file for details.
38 | 
39 | # Contact
40 | For questions or comments about SQUID, please contact the package maintainers at Francisco Avila Cobos <francisco.avilacobos@ugent.be> and Mohammad Javad Najaf Panah <mohammadjavad.najafpanah@bcm.edu>. If you find a bug or have a feature request, please submit an issue on the GitHub repository.
41 | 
42 | # Citation
43 |  Francisco Avila Cobos, Mohammad Javad Najaf Panah, Jessica Epps, Xiaochen Long, Tsz-Kwong Man, Hua-Sheng Chiu, Elad Chomsky, Evgeny Kiner, Michael J Krueger, Diego di Bernardo, Luis Voloch, Jan Molenaar, Sander R. van Hooff, Frank Westermann, Selina Jansky, Michele L. Redell, Pieter Mestdagh, Pavel Sumazin Effective methods for bulk RNA-Seq deconvolution using scnRNA-Seq transcriptomes (bioRxiv; https://www.biorxiv.org/content/10.1101/2022.12.13.520241v2)
44 | 


--------------------------------------------------------------------------------
/SQUID/SQUID.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | 


--------------------------------------------------------------------------------
/SQUID/SQUID_Toy_Example_Data.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/favilaco/deconv_matching_bulk_scnRNA/c353e025979a7a122078a324585d1980301bc5f7/SQUID/SQUID_Toy_Example_Data.RData


--------------------------------------------------------------------------------
/SQUID/man/SQUID.Rd:
--------------------------------------------------------------------------------
 1 | \name{SQUID}
 2 | 
 3 | \alias{SQUID}
 4 | 
 5 | \title{Single-cell RNA Quantity Informed Deconvolution (SQUID)}
 6 | 
 7 | \description{SQUID predicts the composition of cell mixtures and tissue samples based on concurrent RNA-Seq and scnRNA-Seq profiles.}
 8 | 
 9 | \usage{SQUID(B=B, scC=scC , scMeta=scMeta, pB=NULL, P=NULL, LeaveOneOut=FALSE)}
10 | 
11 | \arguments{
12 |   \item{B}{A bulk RNA-seq numeric matrix which could be either count or normalized values. Rows should be genes and columns should be samples.}
13 |   \item{scC}{A single-cell numeric matrix which could be either count or normalized values. Rows should be genes and columns should be cell.id.}
14 |   \item{scMeta}{A single-cell annotation data.frame which rows should be cell.ids and columns including cell.id, sample.id, cluster.id, cellType, etc.}
15 |   \item{pB}{(Optional) A pseudo-bulk matrix generated based on cluster.id/cellType from single-cell analysis. Rows should be genes and columns should be cluster.id/cellType.}
16 |   \item{P}{ (Optional) A numeric matrix of expected cell fractions including the composition of clusters/cellTypes per samples can be calculated from scRNA-seq analysis. Rows should be unique cluster.ids/cellTypes and columns should be samples.}
17 |   \item{LeaveOneOut}{A logical variable which allow users run SQUID with/without a leave-one-out cross validation strategy. Default is FALSE. Note that the LeaveOneOut argument included here to give users an extra option to systematically test the benefit of bulk transformation and deconvolution accuracy by SQUID.}
18 | }
19 | 
20 | \value{A matrix including cellType, sample.id, observed_fraction (predicted cluster/cell-type fraction), expected_fraction (P) for each bulk sample. By default, SQUID also saves the RESULTS.rds and RESULTS.csv files on the working directory.}
21 | 
22 | 
23 | \examples{
24 | RESULTS <- SQUID(B=B, scC=scC , scMeta=scMeta, pB=NULL, P=NULL, LeaveOneOut=FALSE)
25 | }
26 | 


--------------------------------------------------------------------------------
/SURVIVAL/Survival_Analyses.R:
--------------------------------------------------------------------------------
 1 | # Survival Analyses (AML dataset)
 2 | # Bulk: pAML from TARGET (n=256)
 3 | # SC: 6 pAML from BCM/TCH inclusing Diagnosis and relapes timepoints (n=12)
 4 | 
 5 | # Load libraries:
 6 | library(survival)
 7 | library(survminer)
 8 | 
 9 | ## Read TARGET patients clinical data:
10 | clinical.dt <- read.delim("./AML-TARGET-RNAseq-Diagn-No_21-Anno-091920.txt")
11 | 
12 | ## Read the predicted proportional results (LogNormalize-LogNormalize normalization) generated by deconvolution methods:
13 | pSQUID <- read.csv("./TARGET_AML_LogNormalize_LogNormalize_pSQUID.csv")
14 | 
15 | # Extract patients IDs
16 | TARGET.USI <- strsplit(pSQUID$X, "[.]")
17 | TARGET.USI <- matrix(unlist(TARGET.USI), ncol=5, byrow=TRUE)
18 | TARGET.USI <- as.data.frame(TARGET.USI)
19 | pSQUID <- cbind.data.frame(TARGET.USI, pSQUID)
20 | pSQUID <- pSQUID[,-c(1,2,4,5,6)]
21 | colnames(pSQUID)[1] <- "TARGET.USI"
22 | 
23 | # Merge the cluster.prop variables to clinical data (Relapsed_died + non_Relapsed_Alive patients) n=181
24 | merged.tAML <- merge(clinical.dt, pJanus, by="TARGET.USI")
25 | merged.tAML.R.DoD <- merged.tAML[merged.tAML$First.Event=="Relapse" & merged.tAML$Vital.Status=="Dead",]   # 93
26 | merged.tAML.C.Ali <- merged.tAML[merged.tAML$First.Event=="Censored" & merged.tAML$Vital.Status=="Alive",] # 88
27 | t.AML.R.C.DOD.ALI <- rbind(merged.tAML.R.DoD,merged.tAML.C.Ali)
28 | 
29 | # Categorize the patients based on their median cut-off on cluster proportion (i. e. cluster 11): 
30 | data=t.AML.R.C.DOD.ALI
31 | cluster="cluster.11"
32 | for(i in 1:(nrow(data))) {
33 |   if(data[i,cluster] >= median(data[,cluster])) {
34 |     data$cluster.11.pro[i] <- "high"
35 |   } else {
36 |     data$cluster.11.pro[i] <- "low"
37 |   }
38 | }
39 | 
40 | # Getting the status value
41 | status <- c()
42 | for(j in 1:nrow(data)) {
43 |   if(data$Vital.Status[j]=="Dead") {
44 |     status[j] <- 1
45 |   } else {
46 |     status[j] <- 0 
47 |   } 
48 | }
49 | 
50 | # Model overall survival
51 | sfit <- survfit(Surv(Overall.Survival.Time.in.Days, status)~cluster.11.pro, data=data)
52 | ggsurvplot(sfit, data = data, pval=T, risk.table = F, palette = c("red","blue"))
53 | 
54 | # Using cox ph regression to fit the model 
55 | cfit <- coxph(Surv(Overall.Survival.Time.in.Days, status)~cluster.11, data= data)
56 | cfit
57 | 
58 | # Model event-free survival
59 | sfit0 <- survfit(Surv(Event.Free.Survival.Time.in.Days, status)~cluster.11.pro, data = data)
60 | ggsurvplot(sfit0, data = data, pval=T, risk.table = F, palette = c("red","blue"))
61 | # Using cox ph regression to fit the model 
62 | cfit0 <- coxph(Surv(Event.Free.Survival.Time.in.Days, status)~cluster.11, data=data)
63 | cfit0
64 | 


--------------------------------------------------------------------------------
/Toy_example/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/favilaco/deconv_matching_bulk_scnRNA/c353e025979a7a122078a324585d1980301bc5f7/Toy_example/.DS_Store


--------------------------------------------------------------------------------
/Toy_example/P.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/favilaco/deconv_matching_bulk_scnRNA/c353e025979a7a122078a324585d1980301bc5f7/Toy_example/P.rds


--------------------------------------------------------------------------------
/Toy_example/T.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/favilaco/deconv_matching_bulk_scnRNA/c353e025979a7a122078a324585d1980301bc5f7/Toy_example/T.rds


--------------------------------------------------------------------------------
/Toy_example/markers_TMM_FC1.5_Seuratwilcox.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/favilaco/deconv_matching_bulk_scnRNA/c353e025979a7a122078a324585d1980301bc5f7/Toy_example/markers_TMM_FC1.5_Seuratwilcox.rds


--------------------------------------------------------------------------------
/Toy_example/scC.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/favilaco/deconv_matching_bulk_scnRNA/c353e025979a7a122078a324585d1980301bc5f7/Toy_example/scC.rds


--------------------------------------------------------------------------------
/additional_data/Denisenko_crosstable_bulk_sc_sn_14092021.csv:
--------------------------------------------------------------------------------
1 | ﻿BG17_BulkSeq_ColdDiss_Fresh;BG1,BG30,BG52;Mouse2_Right;"kidney	Batch1_C57BL6J_Male_19weeks"
2 | BG18_BulkSeq_WarmDiss_Fresh;BG2,BG31,BG53;Mouse1_Right;"kidney	Batch1_C57BL6J_Male_19weeks"
3 | BG19_BulkSeq_ColdDiss_Fresh;BG3,BG32,BG54;Mouse4_Left;"kidney	Batch1_C57BL6J_Male_19weeks"
4 | BG20_BulkSeq_WarmDiss_Fresh;BG4,BG33,BG55;Mouse3_Left;"kidney	Batch1_C57BL6J_Male_19weeks"
5 | BG21_BulkSeq_ColdDiss_Fresh;BG5,BG34,BG56;Mouse6_Left;"kidney	Batch1_C57BL6J_Male_19weeks"
6 | BG22_BulkSeq_WarmDiss_Fresh;BG6,BG35,BG57;Mouse5_Right;"kidney	Batch1_C57BL6J_Male_19weeks"
7 | LDK4_BulkSeq_ColdDiss_Fresh;LD01,MJ51;Mouse1_Right;"kidney	Batch3_Balbc_Female_15weeks"
8 | LDK5_BulkSeq_ColdDiss_Fresh;LD02,MJ52;Mouse2_Right;"kidney	Batch3_Balbc_Female_15weeks"
9 | LDK6_BulkSeq_ColdDiss_Fresh;LD03,MJ53;Mouse3_Left;"kidney	Batch3_Balbc_Female_15weeks"


--------------------------------------------------------------------------------
/helper_functions.R:
--------------------------------------------------------------------------------
  1 | regroup.cor <- function(correlation.matrix, correlation.threshold = 0.95){
  2 | 
  3 |       top.cor <- which(abs(correlation.matrix) >= correlation.threshold & row(correlation.matrix) < col(correlation.matrix), arr.ind = TRUE)
  4 |       
  5 |       if(nrow(top.cor) != 0){
  6 | 
  7 |         ## reconstruct names from positions
  8 |         high_cor <- matrix(colnames(correlation.matrix)[top.cor], ncol = 2)
  9 | 
 10 |         count = 0; groups = list()
 11 | 
 12 |         for(elem in unique(high_cor[,1])){
 13 |           count = count + 1
 14 |           items = high_cor[high_cor[,1] %in% elem,]
 15 |           if(!is.null(dim(items))){items <- items %>% melt() %>% .[3] %>% .$value %>% unique()}
 16 |           groups[[paste("group",count,sep="_")]] <- items
 17 |         }
 18 | 
 19 |         for(elem in unique(high_cor[,2])){
 20 |           count = count + 1
 21 |           items = high_cor[high_cor[,2] %in% elem,]
 22 |           if(!is.null(dim(items))){items <- items %>% melt() %>% .[3] %>% .$value %>% unique()}
 23 |           groups[[paste("group",count,sep="_")]] <- items
 24 |         }
 25 | 
 26 |         # If one element present in multiple groups, keep largest
 27 |         groups <- groups[sapply(groups, length) %>% order() %>% rev()] #re-order NEEDED to ensure biggest groups and no repetition
 28 |         new.groups <- list()
 29 |         used <- c()
 30 |         count = 0
 31 |         for(i in names(groups)){
 32 |           count = count + 1
 33 |           to.regroup <- sapply(groups, function(x) groups[[i]] %in% x) %>% colSums() != 0
 34 |           to.regroup <- names(to.regroup)[to.regroup]
 35 |           if(sum(to.regroup %in% used) == 0){
 36 |             new.groups[[paste("group",count,sep="_")]] <- groups[to.regroup] %>% unlist() %>% unique()
 37 |             used <- c(used, to.regroup)
 38 |           }
 39 |           
 40 |         }
 41 | 
 42 |         annotation = melt(new.groups)
 43 |         rownames(annotation) <- annotation$value
 44 |         annotation$value <- NULL
 45 |         colnames(annotation) <- "new.group"
 46 | 
 47 |       } else {
 48 | 
 49 |         annotation = data.frame(new.group = "")
 50 | 
 51 |       }
 52 | 
 53 |       return(annotation)
 54 | 
 55 | }
 56 | 
 57 | 
 58 | 
 59 | collapsing <- function(input, duplicated.names){
 60 |     
 61 |     if(sum(duplicated(duplicated.names)) != 0){
 62 | 
 63 |       exp.var <- apply(input, 1, mean) %>% melt()
 64 |       exp.var$gene  <- duplicated.names
 65 |       exp.var$IQR <- apply(input, 1, IQR) %>% as.numeric()
 66 |       exp.var$row.number <- 1:nrow(exp.var)
 67 |       max.values = exp.var %>% 
 68 |         group_by(gene) %>% 
 69 |         summarise(across(c("value", "IQR"), ~ max(.x)))
 70 |       exp.var <- merge(max.values, exp.var, by = "gene")
 71 |       to.keep <- which(exp.var[,2] == exp.var[,4] & exp.var[,3] == exp.var[,5]) %>% sort()
 72 | 
 73 |       #if after this criteria, still duplicates, choose the first one (smallest row.number)!
 74 |       exp.var <- exp.var[to.keep,]
 75 |       min.row = exp.var %>% 
 76 |         group_by(gene) %>% 
 77 |         summarise(across(c("row.number"), ~ min(.x)))
 78 |       exp.var <- merge(min.row, exp.var, by = "gene")
 79 |       to.keep <- which(exp.var[,2] == exp.var[,7]) %>% sort()
 80 |       exp.var <- exp.var[to.keep,] %>% na.omit()
 81 |       input <- input[exp.var[,2], ]
 82 | 
 83 |       if(length(grep("ENS.*\\.[0-9]+", rownames(input))) > 1000){ # For datasets with ENSG names with format "ENSG00...003.14": keep genes with highest mean expression AND highest variability (first criterium insufficient, there are cases with same mean)
 84 |       
 85 |         rownames(input) <- strsplit(rownames(input),"\\.") %>% lapply(., function(x) x[1]) %>% unlist()
 86 | 
 87 |       } else if(length(grep("ENS.*\\__", rownames(input))) > 1000){  # For datasets with gene names as "ENSG00...__HGNC": remove the HGNC part
 88 | 
 89 |         rownames(input) <- strsplit(rownames(input),"__") %>% lapply(., function(x) x[1]) %>% unlist()
 90 | 
 91 |       } else {
 92 | 
 93 |         rownames(input) <- exp.var$gene
 94 | 
 95 |       }
 96 | 
 97 |     }
 98 | 
 99 |   return(input)
100 | 
101 | }
102 | 
103 | 
104 | 
105 | transformation2 <- function(X, Y, leave.one.out = TRUE) {
106 | 
107 |     # use the same genes for all input datasets
108 |     Genes <- intersect(row.names(Y), row.names(X))
109 | 
110 |     X <- as.matrix(X[Genes,])
111 |     Y <- as.matrix(Y[Genes,])
112 | 
113 |     if(leave.one.out){
114 | 
115 |         pred <- intersect(colnames(X), colnames(Y)) # matching samples with sc and bulk data
116 |         X.new <- matrix(0, nrow=dim(X)[1], ncol=length(pred))
117 | 
118 |         for(j in 1:length(pred)){
119 | 
120 |             X.train <- as.matrix(X[,pred[-j]])
121 |             X.test <- as.matrix(X[,pred[j]])
122 |             Y.train <- as.matrix(Y[,pred])
123 | 
124 |             # track the mean and sd after leaving one out:
125 |             X.mean <- rowMeans(X.train)
126 |             X.sd <- apply(X.train,1,sd) 
127 |             Y.mean <- rowMeans(Y)
128 |             Y.sd <- apply(Y,1,sd)
129 | 
130 |             for (i in 1:dim(X)[1]){
131 | 
132 |                 # transforming over genes
133 |                 x <- X.test[i]
134 |                 sigma_j <- Y.sd[i]*sqrt((length(Y[i,])-1)/(length(Y[i,])+1))
135 |                 x.new <- (x-X.mean[i])/X.sd[i]
136 |                 x.new <- x.new*sigma_j + Y.mean[i]
137 | 
138 |                 X.new[i,j] <- x.new
139 | 
140 |             }
141 | 
142 |         }
143 | 
144 |     } else {
145 | 
146 |         # transforming over genes
147 |         X.new <- matrix(0, nrow=dim(X)[1], ncol=dim(X)[2])
148 | 
149 |         for (i in 1:dim(X)[1]){
150 | 
151 |             y <- Y[i,]
152 |             x <- X[i,]
153 |             l <- length(y)
154 |             sigma_j <- sd(y)*sqrt((l-1)/(l+1))
155 |             x.new <- (x-mean(x))/sd(x)
156 |             x.new <- x.new*sigma_j + mean(y)
157 |             X.new[i,] <- x.new
158 | 
159 |         }
160 | 
161 |     }
162 | 
163 |     # explicit non-negativity constraint:
164 |     X.new = apply(X.new,2,function(x) ifelse(x < 0, 0, x))
165 | 
166 |     rownames(X.new) = rownames(X); colnames(X.new) = colnames(X)
167 | 
168 |     return(X.new)
169 | 
170 | }
171 | 
172 | 
173 | 
174 | bulkC.fromSC <- function(scC, phenoDataC){
175 | 
176 |     phenoDataC = phenoDataC[colnames(scC),]
177 |     cellType <- phenoDataC$cellType
178 |     group = list()
179 | 
180 |     for(i in unique(cellType)){ 
181 | 
182 |         group[[i]] <- which(cellType %in% i)
183 | 
184 |     }
185 | 
186 |     C = lapply(group,function(x) Matrix::rowMeans(scC[,x])) 
187 |     C = do.call(cbind.data.frame, C)
188 | 
189 |     return(as.matrix(C))
190 | 
191 | }
192 | 
193 | 
194 | 
195 | Scaling <- function(matrix, option, phenoDataC=NULL){
196 | 
197 |     #Avoid Error: Input matrix x contains at least one null or NA-filled row.
198 |     matrix = matrix[rowSums(matrix) != 0,]
199 |     #Avoid error if all elements within a row are equal (e.g. all 0, or all a common value after log/sqrt/vst transformation)
200 |     matrix = matrix[!apply(matrix, 1, function(x) var(x) == 0),]
201 |     matrix = matrix[,colSums(matrix) != 0]
202 |  
203 |     if (option == "LogNormalize"){
204 |         
205 |        matrix = expm1(Seurat::LogNormalize(matrix, verbose = FALSE)) %>% as.matrix()
206 | 
207 |     } else if (option == "TMM"){# CPM counts coming from TMM-normalized library sizes; https://support.bioconductor.org/p/114798/
208 | 
209 |         if(!is.null(phenoDataC)){
210 | 
211 |             Celltype = as.character(phenoDataC$cellType[phenoDataC$cellID %in% colnames(matrix)])
212 | 
213 |         } else {
214 | 
215 |             Celltype = colnames(matrix)
216 | 
217 |         }
218 | 
219 |         matrix <- edgeR::DGEList(counts=matrix, group=Celltype)
220 |         CtrlGenes <- grep("ERCC-",rownames(data))
221 |   
222 |         if(length(CtrlGenes) > 1){
223 |             
224 |             spikes <- data[CtrlGenes,]
225 |             spikes <- edgeR::calcNormFactors(spikes, method = "TMM") 
226 |             matrix$samples$norm.factors <- spikes$samples$norm.factors
227 |         
228 |         } else {
229 |         
230 |             matrix <- edgeR::calcNormFactors(matrix, method = "TMM")  
231 |         
232 |         }
233 | 
234 |         matrix <- edgeR::cpm(matrix)
235 | 
236 |     } else if (option == "TPM"){
237 | 
238 |         # MULTI-CORE VERSION: adapted from #devtools::source_url('https://raw.githubusercontent.com/dviraran/SingleR/master/R/HelperFunctions.R', sha1 = "df5560b4ebb28295349aadcbe5b0dc77d847fd9e")
239 | 
240 |         TPM <- function(counts,lengths=NULL){
241 | 
242 |             require(foreach)
243 |             require(Matrix)
244 | 
245 |             if (is.null(lengths)) {
246 |                 data('gene_lengths')
247 |             }
248 | 
249 |             A = intersect(rownames(counts),names(lengths))
250 |             counts = as.matrix(counts[A,])
251 |             lengths = lengths[A]
252 |             rate = counts / lengths 
253 | 
254 |             num_cores <- min(4, parallel::detectCores()) #slurm will automatically restrict this to parameter --cpus-per-task
255 |             print(paste("num_cores = ", num_cores, sep = ""))
256 | 
257 |             doMC::registerDoMC(cores = num_cores)
258 |             jump = 100
259 |             tpm <- foreach::foreach(elem = seq(1,ncol(counts), jump), .combine='cbind.data.frame') %dopar% {   
260 |                     
261 |                     lsizes <- colSums(rate[, elem:min(ncol(counts),(elem + jump - 1))])
262 |                     lo <- 1e6*rate[, elem:min(ncol(counts),(elem + jump - 1))]
263 |                     tpm <- lo %*% diag(1/lsizes)
264 |                     tpm
265 | 
266 |                 }
267 | 
268 |             colnames(tpm) <- colnames(counts)
269 |             return(tpm)
270 | 
271 |         }
272 | 
273 |         if(! file.exists("human_lengths.rda")){
274 |           download.file("https://github.com/dviraran/SingleR/blob/master/data/human_lengths.rda?raw=true","./human_lengths.rda")
275 |         }
276 |         load("./human_lengths.rda")
277 | 
278 |         # Doesn't work with Ensembl IDs:
279 |         if(length(grep("ENSG000",rownames(matrix))) > 50){
280 |             
281 |             suppressMessages(library("AnnotationDbi"))
282 |             suppressMessages(library("org.Hs.eg.db"))
283 |             temp = mapIds(org.Hs.eg.db,keys = names(human_lengths), column = "ENSEMBL", keytype = "SYMBOL", multiVals = "first")
284 |             names(human_lengths) = as.character(temp)
285 | 
286 |         }
287 |         
288 |         matrix = TPM(counts = matrix, lengths = human_lengths)
289 |         rownames(matrix) = toupper(rownames(matrix))
290 | 
291 |     } else if (option == "TPM.murine"){
292 | 
293 |         # MULTI-CORE VERSION: adapted from #devtools::source_url('https://raw.githubusercontent.com/dviraran/SingleR/master/R/HelperFunctions.R', sha1 = "df5560b4ebb28295349aadcbe5b0dc77d847fd9e")
294 | 
295 |         TPM <- function(counts,lengths=NULL){
296 | 
297 |             require(foreach)
298 |             require(Matrix)
299 | 
300 |             if (is.null(lengths)) {
301 |                 data('gene_lengths')
302 |             }
303 | 
304 | 
305 |             A = intersect(rownames(counts),names(lengths))
306 |             counts = as.matrix(counts[A,])
307 |             lengths = lengths[A]
308 |             rate = counts / lengths 
309 | 
310 |             num_cores <- min(4, parallel::detectCores()) #slurm will automatically restrict this to parameter --cpus-per-task
311 |             print(paste("num_cores = ", num_cores, sep = ""))
312 | 
313 |             doMC::registerDoMC(cores = num_cores)
314 |             jump = 100
315 |             tpm <- foreach::foreach(elem = seq(1,ncol(counts), jump), .combine='cbind.data.frame') %dopar% {   
316 |                     
317 |                     lsizes <- colSums(rate[, elem:min(ncol(counts),(elem + jump - 1))])
318 |                     lo <- 1e6*rate[, elem:min(ncol(counts),(elem + jump - 1))]
319 |                     tpm <- lo %*% diag(1/lsizes)
320 |                     tpm
321 | 
322 |                 }
323 | 
324 |             colnames(tpm) <- colnames(counts)
325 |             return(tpm)
326 | 
327 |         }
328 | 
329 |         if(! file.exists("mouse_lengths.rda")){
330 |           download.file("https://github.com/dviraran/SingleR/blob/master/data/mouse_lengths.rda?raw=true","./mouse_lengths.rda")
331 |         }
332 | 
333 |         load("./mouse_lengths.rda")
334 | 
335 |         ## Append version with ENSMUSG000 names
336 |         mouse_lengths2 <- mouse_lengths
337 | 
338 |         # Doesn't work with Ensembl IDs:
339 |         suppressMessages(library("AnnotationDbi"))
340 |         suppressMessages(library("org.Mm.eg.db"))
341 |         temp = mapIds(org.Mm.eg.db,keys = names(mouse_lengths), column = "ENSEMBL", keytype = "SYMBOL", multiVals = "first")
342 |         names(mouse_lengths) = as.character(temp)
343 | 
344 |         mouse_lengths = c(mouse_lengths,mouse_lengths2)
345 |         matrix = TPM(counts = matrix, lengths = mouse_lengths)
346 | 
347 |     ####################################################################################
348 |     ## scRNA-seq specific  
349 | 
350 |     } else if (option == "SCTransform"){
351 | 
352 |         matrix = as(matrix, "dgCMatrix")
353 |         matrix = sctransform::vst(matrix, return_corrected_umi = TRUE, show_progress = FALSE)$umi_corrected
354 | 
355 |     } else if (option == "scran"){
356 |         
357 |         sce = SingleCellExperiment::SingleCellExperiment(assays = list(counts=as.matrix(matrix)))
358 |         sce = scran::computeSumFactors(sce, clusters=NULL)
359 |         sce = scater::logNormCounts(sce, log = FALSE)
360 |         matrix = normcounts(sce)
361 |         
362 |     } else if (option == "scater"){  
363 | 
364 |         sce = SingleCellExperiment::SingleCellExperiment(assays = list(counts=as.matrix(matrix)))
365 |         size_factors = scater::librarySizeFactors(sce)
366 |         sce = scater::logNormCounts(sce, log = FALSE)
367 |         matrix = normcounts(sce)
368 | 
369 |     }
370 | 
371 |     return(matrix)
372 | 
373 | }
374 | 
375 | 
376 | 
377 | Deconvolution <- function(T, C, method, phenoDataC, P = NULL, elem = NULL, STRING = NULL, marker_distrib, refProfiles.var){ 
378 | 
379 |     bulk_methods = c("CIBERSORT","nnls","FARDEEP","RLR")
380 |     sc_methods = c("MuSiC","MuSiC_with_markers","DWLS","SQUID", "Bisque", "Bisque_with_markers")
381 | 
382 |     ########## Using marker information for bulk_methods
383 | 
384 |     if(method %in% bulk_methods){
385 | 
386 |         C = C[rownames(C) %in% unique(marker_distrib$gene), , drop = FALSE]
387 |         T = T[rownames(T) %in% unique(marker_distrib$gene), , drop = FALSE]
388 |         refProfiles.var = refProfiles.var[rownames(refProfiles.var) %in% unique(marker_distrib$gene), , drop = FALSE]
389 | 
390 |     } else { ### For scRNA-seq methods 
391 | 
392 |         if(length(grep("[N-n]ame",colnames(phenoDataC))) > 0){
393 |             sample_column = grep("[N-n]ame",colnames(phenoDataC))
394 |         } else {
395 |             sample_column = grep("[S-s]ample|[S-s]ubject",colnames(phenoDataC))
396 |         }
397 | 
398 |         colnames(phenoDataC)[sample_column] = "SubjectName"
399 |         rownames(phenoDataC) = phenoDataC$cellID
400 |         # establish same order in (sc)C and phenoDataC:
401 |         phenoDataC <- phenoDataC[match(colnames(C),phenoDataC$cellID),]
402 | 
403 |         if(method %in% c("MuSiC","MuSiC_with_markers", "Bisque", "Bisque_with_markers")){
404 | 
405 |             require(xbioc)
406 |             C.eset <- Biobase::ExpressionSet(assayData = as.matrix(C), phenoData = Biobase::AnnotatedDataFrame(phenoDataC))
407 |             T.eset <- Biobase::ExpressionSet(assayData = as.matrix(T))
408 |         
409 |         }
410 |         
411 |     }
412 | 
413 |     ##########    MATRIX DIMENSION APPROPRIATENESS    ##########
414 | 
415 |     keep = intersect(rownames(C),rownames(T)) 
416 |     C = C[keep, , drop = FALSE]
417 |     T = T[keep, , drop = FALSE]
418 | 
419 |     ###################################
420 | 
421 |     if(method == "CIBERSORT"){ 
422 | 
423 |         #source("./CIBERSORT.R")
424 |         RESULTS = CIBERSORT(sig_matrix = C, mixture_file = T, QN = FALSE) 
425 |         RESULTS = t(RESULTS[,1:(ncol(RESULTS)-3),drop=FALSE]) 
426 | 
427 |     } else if (method == "nnls"){
428 | 
429 |         require(nnls)
430 |         RESULTS = do.call(cbind.data.frame,lapply(apply(T,2,function(x) nnls::nnls(as.matrix(C),x)), function(y) y$x))
431 |         RESULTS = apply(RESULTS,2,function(x) x/sum(x)) #explicit STO constraint
432 |         rownames(RESULTS) <- colnames(C)
433 | 
434 |     } else if (method == "FARDEEP"){
435 | 
436 |         require(FARDEEP)
437 |         RESULTS = t(FARDEEP::fardeep(C, T, nn = TRUE, intercept = TRUE, permn = 10, QN = FALSE)$abs.beta)
438 |         RESULTS = apply(RESULTS,2,function(x) x/sum(x)) #explicit STO constraint
439 |         # These 2 lines are similar as retrieving $relative.beta instead of abs.beta + re-scaling
440 | 
441 |     } else if (method == "RLR"){ #RLR = robust linear regression
442 | 
443 |         require(MASS)
444 |         RESULTS = do.call(cbind.data.frame,lapply(apply(T,2,function(x) MASS::rlm(x ~ as.matrix(C), maxit=100)), function(y) y$coefficients[-1]))
445 |         RESULTS = apply(RESULTS,2,function(x) ifelse(x < 0, 0, x)) #explicit non-negativity constraint
446 |         RESULTS = apply(RESULTS,2,function(x) x/sum(x)) #explicit STO constraint
447 |         rownames(RESULTS) <- unlist(lapply(strsplit(rownames(RESULTS),")"),function(x) x[2]))
448 | 
449 |     ###################################
450 |     ###################################
451 |     
452 |     } else if (method == "MuSiC"){
453 | 
454 |         require(MuSiC)
455 |         RESULTS = t(MuSiC::music_prop(bulk.eset = T.eset, sc.eset = C.eset, clusters = 'cellType',
456 |                                             markers = NULL, normalize = FALSE, samples = 'SubjectName', 
457 |                                             verbose = F)$Est.prop.weighted)
458 | 
459 |     } else if (method == "MuSiC_with_markers"){
460 | 
461 |         require(MuSiC)
462 |         RESULTS = t(MuSiC::music_prop(bulk.eset = T.eset, sc.eset = C.eset, clusters = 'cellType',
463 |                                             markers = unique(marker_distrib$gene), normalize = FALSE, samples = 'SubjectName', 
464 |                                            verbose = F)$Est.prop.weighted)
465 |     
466 |     } else if (method == "Bisque"){#By default, Bisque uses all genes for decomposition. However, you may supply a list of genes (such as marker genes) to be used with the markers parameter
467 | 
468 |         require(BisqueRNA)
469 |         RESULTS <- BisqueRNA::ReferenceBasedDecomposition(T.eset, C.eset, markers = NULL, use.overlap = FALSE)$bulk.props 
470 | 
471 |     } else if (method == "Bisque_with_markers"){#By default, Bisque uses all genes for decomposition. However, you may supply a list of genes (such as marker genes) to be used with the markers parameter
472 | 
473 |         require(BisqueRNA)
474 |         RESULTS <- BisqueRNA::ReferenceBasedDecomposition(T.eset, C.eset, markers = marker_distrib$gene, use.overlap = FALSE)$bulk.props 
475 | 
476 |     } else if (method == "DWLS"){
477 |         
478 |         require(DWLS)
479 |         path = paste(getwd(),"/results_",STRING,sep="")
480 | 
481 |         if(! dir.exists(path)){ dir.create(path) } #to avoid repeating marker_selection step when removing cell types; Sig.RData automatically created
482 | 
483 |         if(!file.exists(paste(path,"Sig.RData",sep="/"))){
484 | 
485 |             Signature <- DWLS::buildSignatureMatrixMAST(scdata = C, id = as.character(phenoDataC$cellType), path = path, diff.cutoff = 0.5, pval.cutoff = 0.01)
486 | 
487 |         } else {#re-load signature and remove CT column + its correspondent markers
488 | 
489 |             load(paste(path,"Sig.RData",sep="/"))
490 |             Signature <- Sig
491 |             
492 |         }
493 | 
494 |         Signature = as.matrix(Signature)
495 |         
496 |         RESULTS <- apply(T, 2, function(x){
497 |             b = setNames(x, rownames(T))
498 |             tr <- DWLS::trimData(Signature, b)
499 |             RES <- t(DWLS::solveDampenedWLS(tr$sig, tr$bulk))
500 |         })
501 | 
502 |         rownames(RESULTS) <- as.character(unique(phenoDataC$cellType))
503 |         RESULTS = apply(RESULTS,2,function(x) ifelse(x < 0, 0, x)) #explicit non-negativity constraint
504 |         RESULTS = apply(RESULTS,2,function(x) x/sum(x)) #explicit STO constraint
505 |         print(head(RESULTS))
506 | 
507 |     } else if (method == "SQUID"){ 
508 |             
509 |         # Transforming T :
510 |         Z = bulkC.fromSC(scC = C, phenoDataC = phenoDataC)
511 |         X = T
512 |         P <- as.matrix(P[base::colnames(Z),,drop = FALSE])
513 |         Y = Z %*% P
514 | 
515 |         X.new = transformation2(X = X, Y = Y, leave.one.out = FALSE)
516 |         X.new[!is.finite(X.new)] <- 0 
517 |         
518 |         # take common genes
519 |         Genes <- intersect(rownames(Z),rownames(X.new))
520 | 
521 |         X.new <- as.matrix.Vector(X.new[Genes,])
522 |         Z <- as.matrix.Vector(Z[Genes,])
523 | 
524 |         RESULTS <- apply(X.new, 2, function(x){
525 |             RES <- t(DWLS::solveDampenedWLS(S = Z, B = x))
526 |         })
527 | 
528 |         rownames(RESULTS) <- colnames(Z)
529 |         RESULTS = apply(RESULTS,2,function(x) ifelse(x < 0, 0, x)) #explicit non-negativity constraint
530 |         RESULTS = apply(RESULTS,2,function(x) x/sum(x)) #explicit STO constraint
531 | 
532 |     }
533 | 
534 |     RESULTS = RESULTS[gtools::mixedsort(rownames(RESULTS)), , drop = FALSE] 
535 |     RESULTS = data.table::melt(RESULTS)
536 |     colnames(RESULTS) <- c("cell_type","tissue","observed_fraction")
537 |     RESULTS$cell_type = as.character(RESULTS$cell_type)
538 |     RESULTS$tissue = as.character(RESULTS$tissue)
539 | 
540 |     if(!is.null(P)){
541 | 
542 |         P = P[gtools::mixedsort(rownames(P)),,drop = FALSE] %>% data.frame(., check.names = FALSE)
543 |         P$cell_type = rownames(P)
544 |         P = data.table::melt(P, id.vars="cell_type")
545 |         colnames(P) <-c("cell_type","tissue","expected_fraction")
546 |         P$cell_type = as.character(P$cell_type)
547 |         P$tissue = as.character(P$tissue)
548 | 
549 |         RESULTS = merge(RESULTS, P, by = c("cell_type", "tissue"), all = TRUE)
550 |         RESULTS[is.na(RESULTS)] <- 0
551 |         RESULTS$expected_fraction <- round(RESULTS$expected_fraction, 3)
552 |         RESULTS$observed_fraction <- round(RESULTS$observed_fraction, 3)
553 | 
554 |     }
555 | 
556 |     return(RESULTS) 
557 | 
558 | }
559 | 


--------------------------------------------------------------------------------
/human_lengths.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/favilaco/deconv_matching_bulk_scnRNA/c353e025979a7a122078a324585d1980301bc5f7/human_lengths.rda


--------------------------------------------------------------------------------
/schematic_figures/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/favilaco/deconv_matching_bulk_scnRNA/c353e025979a7a122078a324585d1980301bc5f7/schematic_figures/.DS_Store


--------------------------------------------------------------------------------
/schematic_figures/Scheme1_Github.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/favilaco/deconv_matching_bulk_scnRNA/c353e025979a7a122078a324585d1980301bc5f7/schematic_figures/Scheme1_Github.jpg


--------------------------------------------------------------------------------
/schematic_figures/Scheme1_Github.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/favilaco/deconv_matching_bulk_scnRNA/c353e025979a7a122078a324585d1980301bc5f7/schematic_figures/Scheme1_Github.pdf


--------------------------------------------------------------------------------
/schematic_figures/Scheme2_Github.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/favilaco/deconv_matching_bulk_scnRNA/c353e025979a7a122078a324585d1980301bc5f7/schematic_figures/Scheme2_Github.jpg


--------------------------------------------------------------------------------
/schematic_figures/Scheme2_Github.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/favilaco/deconv_matching_bulk_scnRNA/c353e025979a7a122078a324585d1980301bc5f7/schematic_figures/Scheme2_Github.pdf


--------------------------------------------------------------------------------
/sessionInfo_macOS.txt:
--------------------------------------------------------------------------------
  1 | > sessionInfo()
  2 | R version 4.0.1 (2020-06-06)
  3 | Platform: x86_64-apple-darwin17.0 (64-bit)
  4 | Running under: macOS High Sierra 10.13.6
  5 | 
  6 | Matrix products: default
  7 | BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
  8 | LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
  9 | 
 10 | locale:
 11 | [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
 12 | 
 13 | attached base packages:
 14 | [1] parallel  stats4    stats     graphics  grDevices utils     datasets
 15 | [8] methods   base
 16 | 
 17 | other attached packages:
 18 |  [1] viridis_0.6.1               viridisLite_0.4.0
 19 |  [3] forcats_0.5.1               stringr_1.4.0
 20 |  [5] purrr_0.3.4                 readr_1.4.0
 21 |  [7] tibble_3.1.6                tidyverse_1.3.1
 22 |  [9] tidyr_1.1.4                 sctransform_0.3.2
 23 | [11] scran_1.16.0                scater_1.16.2
 24 | [13] R.utils_2.10.1              R.oo_1.24.0
 25 | [15] R.methodsS3_1.8.1           RColorBrewer_1.1-2
 26 | [17] pheatmap_1.0.12             org.Mm.eg.db_3.11.4
 27 | [19] org.Hs.eg.db_3.11.4         MuSiC_0.1.1
 28 | [21] nnls_1.4                    monocle3_1.0.0
 29 | [23] Matrix_1.3-4                MASS_7.3-54
 30 | [25] gtools_3.9.2                ggrepel_0.9.1
 31 | [27] ggrastr_0.2.3               ggpubr_0.4.0
 32 | [29] ggpointdensity_0.1.0        FARDEEP_1.0.1
 33 | [31] egg_0.4.5                   ggplot2_3.3.5
 34 | [33] gridExtra_2.3               edgeR_3.30.3
 35 | [35] limma_3.44.3                dplyr_1.0.7
 36 | [37] doMC_1.3.7                  iterators_1.0.13
 37 | [39] foreach_1.5.1               devtools_2.4.2
 38 | [41] usethis_2.0.1               data.table_1.14.0
 39 | [43] AnnotationDbi_1.50.3        MAST_1.14.0
 40 | [45] SingleCellExperiment_1.10.1 SummarizedExperiment_1.18.2
 41 | [47] DelayedArray_0.14.1         matrixStats_0.59.0
 42 | [49] Biobase_2.50.0              GenomicRanges_1.40.0
 43 | [51] GenomeInfoDb_1.24.2         IRanges_2.22.2
 44 | [53] S4Vectors_0.26.1            BiocGenerics_0.36.1
 45 | [55] varhandle_2.0.5             ROCR_1.0-11
 46 | [57] SeuratObject_4.0.2          Seurat_4.0.3
 47 | [59] e1071_1.7-9                 reshape_0.8.8
 48 | [61] quadprog_1.5-8              BisqueRNA_1.0.5
 49 | 
 50 | loaded via a namespace (and not attached):
 51 |   [1] SparseM_1.81              scattermore_0.7
 52 |   [3] coda_0.19-4               bit64_4.0.5
 53 |   [5] irlba_2.3.3               rpart_4.1-15
 54 |   [7] RCurl_1.98-1.3            generics_0.1.0
 55 |   [9] callr_3.7.0               cowplot_1.1.1
 56 |  [11] RSQLite_2.2.7             RANN_2.6.1
 57 |  [13] proxy_0.4-26              future_1.21.0
 58 |  [15] bit_4.0.4                 xml2_1.3.2
 59 |  [17] lubridate_1.7.10          spatstat.data_2.1-0
 60 |  [19] httpuv_1.6.5              assertthat_0.2.1
 61 |  [21] hms_1.1.0                 promises_1.2.0.1
 62 |  [23] fansi_0.5.0               dbplyr_2.1.1
 63 |  [25] readxl_1.3.1              igraph_1.2.6
 64 |  [27] DBI_1.1.1                 htmlwidgets_1.5.3
 65 |  [29] mcmc_0.9-7                spatstat.geom_2.2-0
 66 |  [31] ellipsis_0.3.2            backports_1.2.1
 67 |  [33] MCMCpack_1.5-0            deldir_0.2-10
 68 |  [35] vctrs_0.3.8               remotes_2.4.0
 69 |  [37] quantreg_5.86             abind_1.4-5
 70 |  [39] cachem_1.0.5              withr_2.4.2
 71 |  [41] prettyunits_1.1.1         goftest_1.2-2
 72 |  [43] cluster_2.1.2             lazyeval_0.2.2
 73 |  [45] crayon_1.4.1              pkgconfig_2.0.3
 74 |  [47] nlme_3.1-152              vipor_0.4.5
 75 |  [49] pkgload_1.2.1             rlang_0.4.11
 76 |  [51] globals_0.14.0            lifecycle_1.0.0
 77 |  [53] miniUI_0.1.1.1            MatrixModels_0.5-0
 78 |  [55] modelr_0.1.8              rsvd_1.0.5
 79 |  [57] cellranger_1.1.0          rprojroot_2.0.2
 80 |  [59] polyclip_1.10-0           lmtest_0.9-38
 81 |  [61] carData_3.0-4             zoo_1.8-9
 82 |  [63] reprex_2.0.0              beeswarm_0.4.0
 83 |  [65] ggridges_0.5.3            processx_3.5.2
 84 |  [67] png_0.1-7                 bitops_1.0-7
 85 |  [69] KernSmooth_2.23-20        blob_1.2.1
 86 |  [71] DelayedMatrixStats_1.10.1 parallelly_1.26.0
 87 |  [73] rstatix_0.7.0             ggsignif_0.6.2
 88 |  [75] scales_1.1.1              memoise_2.0.0
 89 |  [77] magrittr_2.0.1            plyr_1.8.6
 90 |  [79] ica_1.0-2                 zlibbioc_1.34.0
 91 |  [81] compiler_4.0.1            dqrng_0.3.0
 92 |  [83] fitdistrplus_1.1-5        cli_2.5.0
 93 |  [85] XVector_0.28.0            listenv_0.8.0
 94 |  [87] patchwork_1.1.1           pbapply_1.4-3
 95 |  [89] ps_1.6.0                  mgcv_1.8-36
 96 |  [91] tidyselect_1.1.1          stringi_1.6.2
 97 |  [93] BiocSingular_1.4.0        locfit_1.5-9.4
 98 |  [95] grid_4.0.1                tools_4.0.1
 99 |  [97] future.apply_1.7.0        rio_0.5.27
100 |  [99] rstudioapi_0.13           foreign_0.8-81
101 | [101] Rtsne_0.15                digest_0.6.27
102 | [103] shiny_1.6.0               Rcpp_1.0.8
103 | [105] car_3.0-11                broom_0.7.8
104 | [107] later_1.2.0               RcppAnnoy_0.0.18
105 | [109] httr_1.4.2                colorspace_2.0-2
106 | [111] rvest_1.0.0               fs_1.5.0
107 | [113] tensor_1.5                reticulate_1.20
108 | [115] splines_4.0.1             uwot_0.1.10
109 | [117] statmod_1.4.36            conquer_1.0.2
110 | [119] spatstat.utils_2.2-0      plotly_4.9.4.1
111 | [121] sessioninfo_1.1.1         xtable_1.8-4
112 | [123] jsonlite_1.7.2            testthat_3.0.3
113 | [125] R6_2.5.0                  pillar_1.6.4
114 | [127] htmltools_0.5.1.1         mime_0.11
115 | [129] glue_1.4.2                fastmap_1.1.0
116 | [131] BiocParallel_1.22.0       BiocNeighbors_1.6.0
117 | [133] class_7.3-19              codetools_0.2-18
118 | [135] pkgbuild_1.2.0            utf8_1.2.1
119 | [137] lattice_0.20-44           spatstat.sparse_2.0-0
120 | [139] curl_4.3.2                ggbeeswarm_0.6.0
121 | [141] leiden_0.3.8              zip_2.2.0
122 | [143] openxlsx_4.2.4            survival_3.2-11
123 | [145] desc_1.3.0                munsell_0.5.0
124 | [147] GenomeInfoDbData_1.2.3    haven_2.4.1
125 | [149] reshape2_1.4.4            gtable_0.3.0
126 | [151] spatstat.core_2.2-0
127 | 


--------------------------------------------------------------------------------