├── Module1_MP.R
├── Module2_spatial_coherence_org_zones.R
├── Module3_spatial_associations.R
├── Module4_consensus_interactions.R
├── Module5_CNA.R
├── Module6_CODEX.R
└── README.md


/Module1_MP.R:
--------------------------------------------------------------------------------
  1 | 
  2 | Sys.setenv(RETICULATE_PYTHON = "/.conda/envs/leiden/bin/python")
  3 | Sys.setenv(RETICULATE_PYTHON = "/.conda/envs/leiden/bin/python")
  4 | 
  5 | library(reticulate)
  6 | library(Seurat)
  7 | library(scalop)
  8 | library(leiden)
  9 | library(ggplot2)
 10 | library(RColorBrewer)
 11 | library(viridis)
 12 | library(scales)
 13 | library(reshape2)
 14 | library(reshape2)
 15 | library(scales)
 16 | library(NMF)
 17 | library(MetBrewer)
 18 | library(colorspace)
 19 | library(tibble)
 20 | library(dplyr)
 21 | library(data.table)
 22 | library(stringr)
 23 | library(readr)
 24 | library(Matrix)
 25 | library(bigmemory)
 26 | library(doMC)
 27 | library(patchwork)
 28 | 
 29 | ###### Per sample Leiden clustering##############################
 30 | 
 31 | samples_names <- (read.delim("general/GBM_samples.txt", header = FALSE, sep = "\t"))$V1
 32 | 
 33 | # set parameters 
 34 | complexity_filter <- 1000
 35 | mitochondrial_filter <- 20
 36 | genes_filter <- 7000
 37 | n_dim <- 20
 38 | dim_filter <- 5^(-15)
 39 | res_param <- 1
 40 | sig_th <- .005
 41 | mp_num <- 13
 42 | distinct16_pal<-c("#11A579","#F2B701","#66C5CC","#80BA5A","#F6CF71","#7F3C8D","#CF1C90","#3969AC","#f97b72","#E73F74","#4b4b8f","#ACA4E2","#31C53F","#B95FBB","#D4D915","#28CECA")
 43 | dim2use_list <- c(8,20,19,9,12,14,16,10,10,14,8,10,12,16,7,5,14,16,4,12,6,6,8,12,13,10) # set by jackstraw
 44 | 
 45 | leiden_clustering <- sapply(c(1:length(samples_names)), function(i){
 46 |   print(samples_names[i])
 47 |   # load spatial data
 48 |   unfilt_obj<-Load10X_Spatial(data.dir = paste("general/GBM_data/",samples_names[i],"/outs", sep = ""),
 49 |                               filename = "filtered_feature_bc_matrix.h5",
 50 |                               assay = "Spatial",
 51 |                               slice = "detected_tissue_image.jpg",
 52 |                               filter.matrix = TRUE,
 53 |                               to.upper = FALSE
 54 |   )
 55 |   
 56 |   # filtering
 57 |   exp_obj <- subset(unfilt_obj, subset = nCount_Spatial > complexity_filter) # filter out spots with less than #complexity_filter UMIs
 58 |   exp_obj[["percent.mt"]] <- PercentageFeatureSet(exp_obj, pattern = "^MT-") # filter spots with high percent (mitochondrial_filter) mitochondrial genes
 59 |   exp_obj <- subset(exp_obj, subset = percent.mt<mitochondrial_filter)
 60 |   
 61 |   # normalization and centering
 62 |   exp_obj <- NormalizeData(exp_obj, assay = "Spatial", normalization.method="LogNormalize", scale.factor=10000)
 63 |   exp_obj <- FindVariableFeatures(exp_obj, selection.method = "vst", nfeatures = genes_filter) # use only #genes_filter most variable genes
 64 |   exp_obj <- ScaleData(exp_obj)
 65 |   
 66 |   # PCA
 67 |   exp_obj <- RunPCA(exp_obj, features = VariableFeatures(object = exp_obj))
 68 |   #PCA dims to use determined by Jackstraw
 69 |   #exp_obj <- JackStraw(exp_obj, num.replicate = 100)
 70 |   #exp_obj <- ScoreJackStraw(exp_obj, dims = 1:n_dim)
 71 |   #js_scores <- exp_obj@reductions$pca@jackstraw$overall.p.values
 72 |   #dim2use <- max(js_scores[,"PC"][js_scores[,"Score"] < dim_filter])
 73 |   dim2use <- dim2use_list[i]
 74 |   
 75 |   # clustering
 76 |   exp_obj <- FindNeighbors(exp_obj, dims = 1:dim2use)
 77 |   exp_obj <- FindClusters(exp_obj, algorithm=4, resolution = res_param) # leiden based clustering
 78 | 
 79 |   spatial_sample_programs <- FindAllMarkers(exp_obj, only.pos = TRUE, return.thresh = sig_th)
 80 |   
 81 |   genes_num <- table(spatial_sample_programs$cluster)
 82 |   spots_clusters <- exp_obj@meta.data[,"seurat_clusters"]
 83 |   names(spots_clusters) <- row.names(exp_obj@meta.data)
 84 |   cluster_num <- table(exp_obj@meta.data$seurat_clusters)
 85 |   
 86 |   return(list(spatial_sample_programs,genes_num,spots_clusters,cluster_num)) # output: #genes per cluster, #spots per cluster, clusters genes sig, spots assignment to cluster
 87 |   
 88 | })
 89 | 
 90 | 
 91 | # clusters quality 
 92 | genes_num <- c()
 93 | spots_num <- c()
 94 | sapply(c(1:length(samples_names)), function(i){
 95 |   genes_num <<- c(genes_num,as.numeric(leiden_clustering[2,i][[1]]))
 96 |   spots_num <<- c(spots_num,as.numeric(leiden_clustering[4,i][[1]]))
 97 | })
 98 | hist(genes_num, breaks = 30)
 99 | hist(spots_num, breaks = 30)
100 | 
101 | # create genes sig table for save
102 | genes_sig <- leiden_clustering[1,1][[1]]
103 | genes_sig$sample <- rep(samples_names[1],dim(genes_sig)[1])
104 | 
105 | sapply(c(2:length(samples_names)),function(i){
106 |   genes_sig_temp <- leiden_clustering[1,i][[1]]
107 |   genes_sig_temp$sample <- rep(samples_names[i],dim(genes_sig_temp)[1])
108 |   genes_sig <<- rbind(genes_sig,genes_sig_temp)
109 | })
110 | 
111 | # create clusters assignment table for save
112 | spots_assign <- as.data.frame(leiden_clustering[3,1][[1]])
113 | spots_assign$sample <- rep(samples_names[1],dim(spots_assign)[1])
114 | row.names(spots_assign) <- paste(row.names(spots_assign), "_", samples_names[1], sep = "")
115 | colnames(spots_assign) <- c("cluster","sample")
116 | 
117 | sapply(c(2:length(samples_names)),function(i){
118 |   spots_assign_temp <- as.data.frame(leiden_clustering[3,i][[1]])
119 |   spots_assign_temp$sample <- rep(samples_names[i],dim(spots_assign_temp)[1])
120 |   row.names(spots_assign_temp) <- paste(row.names(spots_assign_temp), "_", samples_names[i], sep = "")
121 |   colnames(spots_assign_temp) <- c("cluster","sample")
122 |   spots_assign <<- rbind(spots_assign,spots_assign_temp)
123 | })
124 | 
125 | # set gene signatures list in the right format 
126 | genes_list <- lapply(c(1:length(samples_names)), function(i){
127 |   sample_table <- leiden_clustering[1,i][[1]]
128 |   sample_genes <- lapply(c(1:length(unique(sample_table$cluster))),function(j){
129 |     cluster_table <- sample_table[sample_table$cluster == j,]
130 |     genes <- cluster_table$gene[order(cluster_table$avg_log2FC, decreasing = TRUE)][1:min(50,nrow(cluster_table))]
131 |     return(genes)
132 |   })
133 |   return(sample_genes)
134 | })
135 | genes_list <- unlist(genes_list, recursive = FALSE)
136 | names(genes_list) <- unlist(sapply(c(1:length(samples_names)),function(i){
137 |   paste(samples_names[i],c(1:length(leiden_clustering[4,i][[1]])), sep = "_")
138 | }))
139 | 
140 | samples_list <- unlist(sapply(c(1:length(samples_names)),function(i){
141 |   rep(samples_names[i],length(leiden_clustering[4,i][[1]]))
142 | }))
143 | #saveRDS(genes_list,"/home/labs/tirosh/spatial_glioma/results/leiden_MPs/all_leiden_programs_v2.rds")
144 | # generate an ordered jaccard matrix and clusters 
145 | jac_mat = Jaccard(genes_list)
146 | jac_matord = scalop::hca_reorder(jac_mat)
147 | jac_plot <- melt(jac_matord)
148 | 
149 | prog_ordered <- colnames(jac_matord)
150 | clusters <- scalop::hca_groups(jac_matord,
151 |                                cor.method="none",
152 |                                k=mp_num,
153 |                                min.size=4,
154 |                                max.size=0.5)
155 | 
156 | clusterCut <- sapply(names(clusters), function(i){
157 |   new_vec <- rep(i,length(clusters[[i]]))
158 |   names(new_vec) <- clusters[[i]]
159 |   return(new_vec)
160 | })
161 | 
162 | names(clusterCut) <- NULL
163 | clusterCut <- unlist(clusterCut)
164 | 
165 | # plot jaccard 
166 | 
167 | jac_plot$value[jac_plot$value >= 0.35] <- 0.35
168 | p1 <- ggplot(data = jac_plot, aes(x = Var1, y = Var2, fill = value)) + geom_raster() + 
169 |   scale_fill_gradientn(colours = colorspace::sequential_hcl(9, palette="YlOrRd", rev=T), limits = c(0,0.35), name = "Jaccard\nIndex") +
170 |   theme(axis.ticks = element_blank(), panel.background = element_rect(fill = "white"),  axis.line = element_blank(), legend.text.align = 0.5) +
171 |   scale_x_discrete(name = "\nPrograms", labels = element_blank(), breaks = element_blank()) + scale_y_discrete(name = "\nPrograms", labels = element_blank(), breaks = element_blank())
172 | 
173 | 
174 | metaprog_df <- data.frame(row.names = prog_ordered,
175 |                           cluster = as.factor(clusterCut[prog_ordered]),
176 |                           samples = stringr::str_replace(prog_ordered, "_.*",""),
177 |                           cells = prog_ordered)
178 | 
179 | samples_annotation_plot <- ggplot(metaprog_df, aes(x = factor(cells, levels = cells), y = 1)) +
180 |   geom_raster(aes(fill = samples)) + theme(legend.position = "none",axis.ticks = element_blank(), panel.background = element_rect(fill = "white"), axis.text = element_blank(), axis.line = element_blank(), axis.text.x = element_blank()) +
181 |   labs(x = "", y = "samples", fill = "")
182 | metaprog_annotation_plot <- ggplot(metaprog_df, aes(x = factor(cells, levels = cells), y = 1)) +
183 |   geom_raster(aes(fill = cluster)) + theme(legend.position = "none", axis.ticks = element_blank(), panel.background = element_rect(fill = "white"), axis.text = element_blank(), axis.line = element_blank(), axis.text.x = element_blank()) +
184 |   labs(x = "", y = "", fill = "")
185 | MP_plot <- egg::ggarrange(p1, metaprog_annotation_plot, samples_annotation_plot, ncol = 1, nrow = 3, heights = c(40, 2, 1))
186 | 
187 | 
188 | #Define meta-programs from program clusters, Sort by frequency across programs in cluster (>= 3)
189 | cut_off <- table(metaprog_df$cluster)*0.25
190 | cut_off[cut_off < 3] <- 3
191 | 
192 | mp_freqs = sapply(clusters, function(k) sort(table(unlist(genes_list[k])),decreasing = T),simplify = F)
193 | metaprograms = sapply(names(mp_freqs), function(c) head(names(mp_freqs[[c]])[mp_freqs[[c]] >= as.numeric(cut_off[c])], 50), simplify = F)
194 | names(metaprograms)<-c("vascular","neuron","hypoxia","hypoxia.immune","oligo","AC.immune","low.quality","NPC2","macrophage","MES.stromal","OPC.AC","metabolism","MES1.RGL")
195 | 
196 | 
197 | metaprog_df$samp_cluster <- str_split(metaprog_df$cells, "_", simplify = T)[,2]
198 | metaprog_df$anno_cluster <- metaprog_df$cluster
199 | class(metaprog_df$anno_cluster)
200 | levels(metaprog_df$anno_cluster) <- c("vascular","MES.stromal","OPC.AC","metabolism","MES1.RGL","neuron","hypoxia","hypoxia.immune","oligo","AC.immune","low.quality","NPC2","macrophage")
201 | 
202 | sapply(c(1:26), function(i){
203 |   print(samples_names[i])
204 |   spots_gen_assign <- as.data.frame(leiden_clustering[3,i][[1]])
205 |   spots_gen_assign$generalized <- apply(spots_gen_assign,1,function(r){
206 |     return(metaprog_df$anno_cluster[metaprog_df$samples == samples_names[i] & metaprog_df$samp_cluster == r])
207 |   })
208 |   spots_gen_assign$barcodes <- row.names(spots_gen_assign)
209 |   spots_gen_assign <- spots_gen_assign[,c("barcodes", "generalized")]
210 | })
211 | 
212 | 
213 | ######generating input matrices for NMF
214 | # vector of file paths
215 | file_paths <- list.files(path ="/general/exp_mats_GBM/", pattern = "\\.rds", full.names = TRUE)#directory with counts mats
216 | sample_ls <-  gsub(pattern = "\\.rds$", replacement = "", x = basename(file_paths))
217 | per_sample_mat <- lapply(file_paths, readRDS)
218 | 
219 | for (i in seq_along(per_sample_mat)){ 
220 |   m <- as.matrix(per_sample_mat[[i]])
221 |   if(min(colSums(m)) == 0){m <- m[, colSums(m) != 0]}
222 |   scaling_factor <- 1000000/colSums(m)
223 |   m_CPM <- sweep(m, MARGIN = 2, STATS = scaling_factor, FUN = "*")
224 |   m_loged <- log2(1 + (m_CPM/10))
225 |   
226 |   # removing genes with zero variance across all cells
227 |   var_filter <- apply(m_loged, 1, var)
228 |   m_proc <- m_loged[var_filter != 0, ]
229 |   # filtering out lowly expressed genes
230 |   exp_genes <- rownames(m_proc)[(rowMeans(m_proc) > 0.4)]
231 |   m_proc <- m_proc[exp_genes, ]
232 |   
233 |   # centering data gene-wise
234 |   count_mat_cent<- m_proc - rowMeans(m_proc)
235 |   #nmf preprocessing
236 |   count_mat_nmf<- count_mat_cent
237 |   count_mat_nmf[count_mat_nmf<0] <- 0 # negative values should be set to 0 in initial matrix
238 |   
239 |   # output to a list of gene expression profiles (GEP)
240 |   per_sample_mat[[i]] <- count_mat_nmf
241 |   names(per_sample_mat)[i] <- sample_ls[[i]]
242 |   rm(m,m_loged, var_filter, exp_genes, m_proc,count_mat_cent)
243 |   #saveRDS(count_mat_nmf, paste("MP/NMF/NMF_mats_GBM/", samples_names[i], ".rds", sep =""))
244 | }
245 | 
246 | 
247 | ## Extract sample names for sample list file:
248 | #samples <- list.files("MP/NMF/NMF_mats_GBM/") %>% substri(., pos = 1) %>% str_c(., collapse = "\n")
249 | #write_lines(samples, ("/samples.txt"), sep = "\n")
250 | 
251 | 
252 | 
253 | 
254 | ###### NMF (Previously run by server. Possible to run per sample below)---------------------------------------------------------------------
255 | 
256 | #!/usr/bin/env Rscript
257 | #args = commandArgs(trailingOnly = TRUE)
258 | 
259 | 
260 | sname <- "MGH258"# enter sample name here 
261 | rank_lb <- 2
262 | rank_ub <- 11
263 | 
264 | m <- readRDS(paste0("MP/NMF/NMF_mats_GBM/", sname, ".rds"))
265 | m <- as.matrix(m)
266 | res <- NMF::nmf(x = m, rank = rank_lb:rank_ub, nrun = 5, method = "snmf/r", .opt = list(debug=F, parallel=F, shared.memory=F, verbose=T))
267 | 
268 | 
269 | ###### Generation of NMF + Leiden GBM spatial metaprograms#########
270 | 
271 | #
272 | 
273 | #robust_nmf_programs function
274 | 
275 | robust_nmf_programs <- function(nmf_programs, intra_min = 30, intra_max = 10, inter_filter=T, inter_min = 10) {
276 |   
277 |   # Select NMF programs based on the minimum overlap with other NMF programs from the same sample
278 |   intra_intersect <- lapply(nmf_programs, function(z) apply(z, 2, function(x) apply(z, 2, function(y) length(intersect(x,y))))) 
279 |   intra_intersect_max <- lapply(intra_intersect, function(x) apply(x, 2, function(y) sort(y, decreasing = T)[2]))             
280 |   nmf_sel <- lapply(names(nmf_programs), function(x) nmf_programs[[x]][,intra_intersect_max[[x]]>=intra_min]) 
281 |   names(nmf_sel) <- names(nmf_programs)
282 |   
283 |   # Select NMF programs based on i) the maximum overlap with other NMF programs from the same sample and
284 |   # ii) the minimum overlap with programs from another sample
285 |   nmf_sel_unlist <- do.call(cbind, nmf_sel)
286 |   inter_intersect <- apply(nmf_sel_unlist , 2, function(x) apply(nmf_sel_unlist , 2, function(y) length(intersect(x,y)))) ## calculating intersection between all programs
287 |   
288 |   final_filter <- NULL 
289 |   for(i in names(nmf_sel)) {
290 |     a <- inter_intersect[grep(i, colnames(inter_intersect), invert = T),grep(i, colnames(inter_intersect))]
291 |     b <- sort(apply(a, 2, max), decreasing = T) # for each sample, ranks programs based on their maximum overlap with programs of other samples
292 |     if(inter_filter==T) b <- b[b>=inter_min] # selects programs with a maximum intersection of at least 10
293 |     if(length(b) > 1) {
294 |       c <- names(b[1]) 
295 |       for(y in 2:length(b)) {
296 |         if(max(inter_intersect[c,names(b[y])]) <= intra_max) c <- c(c,names(b[y])) # selects programs iteratively from top-down. Only selects programs that have a intersection smaller than 10 with a previously selected programs
297 |       }
298 |       final_filter <- c(final_filter, c)
299 |     } else {
300 |       final_filter <- c(final_filter, names(b))
301 |     }
302 |   }
303 |   return(final_filter)                                                      
304 | }
305 | 
306 | 
307 | # Custom color palette
308 | 
309 | custom_magma <- c(colorRampPalette(c("white", rev(magma(323, begin = 0.15))[1]))(10), rev(magma(323, begin = 0.18)))
310 | 
311 | ## Create list of NMF matrics where each sample is an entry
312 | path <- "MP/NMF/out_dir/"
313 | sample_ls <- list.files(path)
314 | 
315 | ## Create list of NMF matrics where each sample is an entry
316 | prog_genes_ls <- list()
317 | for(i in seq_along(sample_ls)) {
318 |   nmf_obj <- readRDS(paste(path, sample_ls[[i]], sep = "/"))
319 |   samp_name <- stringr::str_split(sample_ls[[i]], pattern = "_")[[1]][[1]]
320 |   nmf_mats <- c()
321 |   for(j in names(nmf_obj$fit)) {
322 |     get_basis <- basis(nmf_obj$fit[[j]])
323 |     colnames(get_basis)  <- paste0(samp_name, ".", j, ".", 1:j)
324 |     nmf_mats <- cbind(nmf_mats, get_basis)
325 |   }
326 |   prog_genes_ls[[i]] <- nmf_mats
327 |   names(prog_genes_ls)[i] <- paste0(samp_name)
328 |   rm(nmf_obj, nmf_mats, get_basis)
329 | }
330 | Genes_nmf_w_basis <- do.call(c, list(prog_genes_ls))
331 | 
332 | # Find robust NMFs
333 | # get gene programs (top 50 genes by NMF score)
334 | nmf_programs_sig <- lapply(prog_genes_ls, function(x) apply(x, 2, function(y) names(sort(y, decreasing = T))[1:50]))
335 | 
336 | # for each sample, select robust NMF programs (i.e. observed using different ranks in the same sample), remove redundancy due to multiple ranks, and apply a filter based on the similarity to programs from other samples. 
337 | nmf_filter_all <- robust_nmf_programs(nmf_programs_sig, intra_min = 35, intra_max = 10, inter_filter=T, inter_min = 10)
338 | nmf_programs_sig <- lapply(nmf_programs_sig, function(x) x[, is.element(colnames(x), nmf_filter_all),drop=F])
339 | nmf_programs_sig <- do.call(cbind, nmf_programs_sig)
340 | 
341 | #leiden clusters
342 | all_leiden_programs<-readRDS("MP/all_leiden_programs_v2.rds")
343 | all_leiden_programs <- do.call(cbind, all_leiden_programs)
344 | nmf_programs_sig<-cbind(nmf_programs_sig,all_leiden_programs)
345 | 
346 | # calculate similarity between programs
347 | nmf_intersect <- apply(nmf_programs_sig , 2, function(x) apply(nmf_programs_sig , 2, function(y) length(intersect(x,y)))) 
348 | 
349 | # hierarchical clustering of the similarity matrix 
350 | nmf_intersect_hc_all <- hclust(as.dist(50-nmf_intersect), method="average") 
351 | nmf_intersect_hc_all <- reorder(as.dendrogram(nmf_intersect_hc_all), colMeans(nmf_intersect))
352 | nmf_intersect         <- nmf_intersect[order.dendrogram(nmf_intersect_hc_all), order.dendrogram(nmf_intersect_hc_all)]
353 | 
354 | nmf_intersect<-readRDS("MP/NMF/nmf_intersect_124.RDS")
355 | nmf_programs_sig<-readRDS("MP/NMF/nmf_programs_sig_124.RDS")
356 | 
357 | 
358 | ### use a clustering approach that updates MPs in each iteration 
359 | 
360 | nmf_intersect_KEEP    <- nmf_intersect
361 | nmf_programs_sig_KEEP <- nmf_programs_sig
362 | 
363 | 
364 | ### Parameters (later change to function form)v1-keep!
365 | 
366 | Min_intersect_initial <- 12  # the minimal intersection cutoff for defining the Founder NMF program of a cluster
367 | Min_intersect_cluster <- 12 # the minimal intersection cuttof for adding a new NMF to the forming cluster 
368 | Min_group_size        <- 4     # the minimal group size to consider for defining the Founder_NMF of a MP 
369 | 
370 | Sorted_intersection       <-  sort(apply(nmf_intersect , 2, function(x) (length(which(x>=Min_intersect_initial))-1)  ) , decreasing = TRUE)
371 | 
372 | Cluster_list <- list()   ### Every entry contains the NMFs of a chosec cluster
373 | k <- 1
374 | Curr_cluster <- c()
375 | MP_list      <- list()
376 | 
377 | while (Sorted_intersection[1]>Min_group_size) {   
378 |   
379 |   Curr_cluster <- c(Curr_cluster , names(Sorted_intersection[1]))
380 |   
381 |   ### intersection between all remaining NMFs and Genes in MP 
382 |   Genes_MP                   <- nmf_programs_sig[,names(Sorted_intersection[1])] # initial genes are those in the first NMF. Genes_MP always has only 50 genes consisting of the current MP
383 |   nmf_programs_sig           <- nmf_programs_sig[,-match(names(Sorted_intersection[1]) , colnames(nmf_programs_sig))]  # remove selected NMF
384 |   Intersection_with_Genes_MP <- sort(apply(nmf_programs_sig, 2, function(x) length(intersect(Genes_MP,x))) , decreasing = TRUE) # intersection between all other NMFs and Genes_MP  
385 |   NMF_history                <- Genes_MP  # has all genes in all NMFs in the current cluster, for newly defining Genes_MP after adding a new NMF 
386 |   
387 |   ### Create gene list - composed of intersecting genes in descending order. Update Curr_cluster each time.
388 |   
389 |   while ( Intersection_with_Genes_MP[1] >= Min_intersect_cluster) {   ### Define current cluster 
390 |     
391 |     Curr_cluster  <- c(Curr_cluster , names(Intersection_with_Genes_MP)[1])
392 |     
393 |     Genes_MP_temp   <- sort(table(c(NMF_history , nmf_programs_sig[,names(Intersection_with_Genes_MP)[1]])), decreasing = TRUE)   ## Genes_MP is newly defined each time according to all NMFs in the current cluster 
394 |     Genes_at_border <- Genes_MP_temp[which(Genes_MP_temp == Genes_MP_temp[50])]   ### genes with overlap equal to the 50th gene
395 |     
396 |     if (length(Genes_at_border)>1){
397 |       ### Sort last genes in Genes_at_border according to maximal NMF gene scores
398 |       ### Run over all NMF programs in Curr_cluster and extract NMF scores for each gene
399 |       Genes_curr_NMF_score <- c()
400 |       for (i in Curr_cluster) {
401 |         curr_study           <- strsplit(i, "[.]")[[1]][[1]]
402 |         Q                    <- Genes_nmf_w_basis[[curr_study]][ match(names(Genes_at_border),toupper(rownames(Genes_nmf_w_basis[[curr_study]])))[!is.na(match(names(Genes_at_border),toupper(rownames(Genes_nmf_w_basis[[curr_study]]))))]   ,i] 
403 |         #names(Q)             <- names(Genes_at_border[!is.na(match(names(Genes_at_border),toupper(rownames(Genes_nmf_w_basis[[curr_study]]))))])  ### sometimes when adding genes the names do not appear 
404 |         Genes_curr_NMF_score <- c(Genes_curr_NMF_score,  Q )
405 |       }
406 |       Genes_curr_NMF_score_sort <- sort(Genes_curr_NMF_score , decreasing = TRUE)
407 |       Genes_curr_NMF_score_sort <- Genes_curr_NMF_score_sort[unique(names(Genes_curr_NMF_score_sort))]   ## take only the maximal score of each gene - which is the first entry after sorting
408 |       
409 |       Genes_MP_temp <- c(names(Genes_MP_temp[which(Genes_MP_temp > Genes_MP_temp[50])]) , names(Genes_curr_NMF_score_sort))
410 |       
411 |     } else {
412 |       Genes_MP_temp <- names(Genes_MP_temp)[1:50] 
413 |     }
414 |     
415 |     NMF_history   <- c(NMF_history , nmf_programs_sig[,names(Intersection_with_Genes_MP)[1]]) 
416 |     Genes_MP <- Genes_MP_temp[1:50]
417 |     
418 |     nmf_programs_sig      <- nmf_programs_sig[,-match(names(Intersection_with_Genes_MP)[1] , colnames(nmf_programs_sig))]  # remove selected NMF
419 |     
420 |     Intersection_with_Genes_MP <- sort(apply(nmf_programs_sig, 2, function(x) length(intersect(Genes_MP,x))) , decreasing = TRUE) # intersection between all other NMFs and Genes_MP  
421 |     
422 |   }
423 |   
424 |   Cluster_list[[paste0("Cluster_",k)]] <- Curr_cluster
425 |   MP_list[[paste0("MP_",k)]]           <- Genes_MP
426 |   k <- k+1
427 |   
428 |   
429 |   nmf_intersect             <- nmf_intersect[-match(Curr_cluster,rownames(nmf_intersect) ) , -match(Curr_cluster,colnames(nmf_intersect) ) ]  # remove current chosen cluster
430 |   
431 |   Sorted_intersection       <-  sort(apply(nmf_intersect , 2, function(x) (length(which(x>=Min_intersect_initial))-1)  ) , decreasing = TRUE)   ### Sort intersection of remaining NMFs not included in any of the previous clusters
432 |   
433 |   Curr_cluster <- c()
434 |   print(dim(nmf_intersect)[2])
435 | }
436 | 
437 | #### *****  Sort Jaccard similarity plot according to new clusters:
438 | 
439 | inds_sorted <- c()
440 | 
441 | for (j in 1:length(Cluster_list)){
442 |   
443 |   inds_sorted <- c(inds_sorted , match(Cluster_list[[j]] , colnames(nmf_intersect_KEEP)))
444 |   
445 | }
446 | inds_new <- c(inds_sorted   ,   which(is.na( match(1:dim(nmf_intersect_KEEP)[2],inds_sorted)))) ### combine inds in clusters with inds without clusters
447 | 
448 | 
449 | # plot re-ordered similarity matrix heatmap     
450 | nmf_intersect_meltI_NEW <- reshape2::melt(nmf_intersect_KEEP[inds_new,inds_new]) 
451 | 
452 | custom_magma <- c(colorRampPalette(c("white", rev(magma(323, begin = 0.15))[1]))(10), rev(magma(323, begin = 0.18)))
453 | 
454 | 
455 | ggplot(data = nmf_intersect_meltI_NEW, aes(x=Var1, y=Var2, fill=100*value/(100-value), color=100*value/(100-value))) + 
456 |   geom_tile() + 
457 |   scale_color_gradient2(limits=c(2,25), low=custom_magma[1:111],  mid =custom_magma[112:222], high = custom_magma[223:333], midpoint = 13.5, oob=squish, name="Similarity\n(Jaccard index)") +                                
458 |   scale_fill_gradient2(limits=c(2,25), low=custom_magma[1:111],  mid =custom_magma[112:222], high = custom_magma[223:333], midpoint = 13.5, oob=squish, name="Similarity\n(Jaccard index)")  +
459 |   theme( axis.ticks = element_blank(), panel.border = element_rect(fill=F), panel.background = element_blank(),  axis.line = element_blank(), axis.text = element_text(size = 11), axis.title = element_text(size = 12), legend.title = element_text(size=11), legend.text = element_text(size = 10), legend.text.align = 0.5, legend.justification = "bottom") + 
460 |   theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) + 
461 |   theme(axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) + 
462 |   guides(fill = guide_colourbar(barheight = 4, barwidth = 1))
463 | 
464 | ggplot(data = nmf_intersect_meltI_NEW, aes(x=Var1, y=Var2, fill=100*value/(100-value), color=100*value/(100-value))) + 
465 |   geom_tile() + 
466 |   scale_color_gradient2(limits=c(2,30), low=custom_magma[1:111],  mid =custom_magma[112:222], high = custom_magma[223:333], midpoint = 16, oob=squish, name="Similarity\n(Jaccard index)") +                                
467 |   scale_fill_gradient2(limits=c(2,30), low=custom_magma[1:111],  mid =custom_magma[112:222], high = custom_magma[223:333], midpoint = 16, oob=squish, name="Similarity\n(Jaccard index)")  +
468 |   theme( axis.ticks = element_blank(), panel.border = element_rect(fill=F), panel.background = element_blank(),  axis.line = element_blank(), axis.text = element_text(size = 11), axis.title = element_text(size = 12), legend.title = element_text(size=11), legend.text = element_text(size = 10), legend.text.align = 0.5, legend.justification = "bottom") + 
469 |   theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) + 
470 |   theme(axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) + 
471 |   guides(fill = guide_colourbar(barheight = 4, barwidth = 1))
472 | 
473 | 
474 | MP <-  do.call(cbind, MP_list)
475 | 
476 | names(MP_list)=c("Neuron","Vasc","MES.Hyp","Mac","OPC.AC","Oligo","LQ.Chromatin.reg","MES","Prolif.Metab","MES.Ast","Reactive.Ast","NPC","Inflammatory.Mac")
477 | 
478 | 
479 | ###### RECLUSTERING/HEIRARCHICAL CLUSTERING, EXTENDED MPs, AND SUBCLUSTER PROGRAMS ################
480 | programs_ls<-readRDS("MP/NMF/programs_ls_124.rds") #this is "nmf_programs_sig"
481 | Cluster_list<-readRDS("MP/NMF/Cluster_list_124.rds")
482 | str(Cluster_list)
483 | MP_list<-readRDS("MP/NMF/combined_gbm_metaprograms_raw_124.rds")
484 | 
485 | 
486 | #reclustering OPC.AC into 2 clusters
487 | opc.ac <- Cluster_list[[5]]
488 | 
489 | mat2list <- function(x) {
490 |   stopifnot(is.matrix(x))
491 |   lapply(seq_len(ncol(x)), function(i) x[, i])
492 | }
493 | program_ls<-mat2list(programs_ls)
494 | names(program_ls)<-colnames(programs_ls)
495 | opc.ac_mat <- program_ls[names(program_ls) %in% opc.ac]
496 | 
497 | jac_mat <- scalop::jaccard(opc.ac_mat)
498 | hc <- hclust(dist(1 - jac_mat), method = "average")
499 | jac_plot <- melt(jac_mat[hc$order, hc$order])
500 | #summary(jac_plot$value)
501 | 
502 | p1 <- ggplot(data = jac_plot, aes(x = Var1, y = Var2, fill = value)) + geom_raster() + 
503 |   scale_fill_gradient2(limits = c(0, 0.4), low = rev(magma(323, begin = 0.15)), mid = "antiquewhite", high = rev(magma(323, begin = 0.18)), midpoint = 0 , oob = squish, name = "Pearson\nCorrelation") + 
504 |   theme(axis.ticks = element_blank(), panel.background = element_rect(fill = "white"),  axis.line = element_blank(),axis.title = element_text(size=24), legend.text.align = 0.5) + 
505 |   scale_x_discrete(name = "\nPrograms", breaks = colnames(jac_mat)[hc$order][seq(5, ncol(jac_mat), by = 5)], labels = seq(5, ncol(jac_mat), by = 5)) + 
506 |   scale_y_discrete(name = "\nPrograms", breaks = colnames(jac_mat)[hc$order][seq(5, ncol(jac_mat), by = 5)], labels = seq(5, ncol(jac_mat), by = 5))  
507 | 
508 | prog_ordered <- colnames(jac_mat[hc$order, hc$order])
509 | plot(hc)
510 | rect.hclust(hc , k = 2, border = 2:6)
511 | clusterCut <- stats::cutree(tree = hc, k = 2)
512 | 
513 | metaprog_df <- data.frame(row.names = prog_ordered)
514 | metaprog_df$cluster <- (clusterCut[prog_ordered])
515 | metaprog_df$cells <- prog_ordered
516 | metaprog_df$cluster <- as.factor(metaprog_df$cluster)
517 | 
518 | metaprog_annotation_plot <- ggplot(metaprog_df, aes(x = factor(cells, levels = cells), y = 1)) + 
519 |   geom_raster(aes(fill = cluster)) + theme(legend.position = "bottom", axis.ticks = element_blank(),axis.title = element_text(size=24), panel.background = element_rect(fill = "white"), axis.text = element_blank(), axis.line = element_blank(), axis.text.x = element_blank()) + 
520 |   labs(x = "", y = "", fill = "") 
521 | 
522 | egg::ggarrange(p1, metaprog_annotation_plot, ncol = 1, nrow = 2, heights = c(40, 2))
523 | 
524 | clust_list_ac_opc <- list(names(clusterCut[clusterCut == 1]), names(clusterCut[clusterCut == 2])) # use this ordering to reorder cluster 5 in main MP heatmap
525 | mp_freqs <- sapply(clust_list_ac_opc, function(k) sort(table(unlist(opc.ac_mat[k])), decreasing = T), simplify = F)
526 | opc.ac_metaprograms <- sapply(mp_freqs, function(tab) head(names(tab)[tab >= 3], 50), simplify = F)
527 | names(opc.ac_metaprograms)<-c("AC","OPC")
528 | 
529 | 
530 | #dividing low quality from chromatin.reg cluster
531 | chromatin <- Cluster_list[[7]]
532 | mat2list <- function(x) {
533 |   stopifnot(is.matrix(x))
534 |   lapply(seq_len(ncol(x)), function(i) x[, i])
535 | }
536 | program_ls<-mat2list(programs_ls)
537 | names(program_ls)<-colnames(programs_ls)
538 | chromatin_mat <- program_ls[names(program_ls) %in% chromatin]
539 | 
540 | jac_mat <- scalop::jaccard(chromatin_mat)
541 | hc <- hclust(dist(1 - jac_mat), method = "average")
542 | jac_plot <- melt(jac_mat[hc$order, hc$order])
543 | #summary(jac_plot$value)
544 | 
545 | p1 <- ggplot(data = jac_plot, aes(x = Var1, y = Var2, fill = value)) + geom_raster() + 
546 |   scale_fill_gradient2(limits = c(0, 0.4), low = rev(magma(323, begin = 0.15)), mid = "antiquewhite", high = rev(magma(323, begin = 0.18)), midpoint = 0 , oob = squish, name = "Pearson\nCorrelation") + 
547 |   theme(axis.ticks = element_blank(), panel.background = element_rect(fill = "white"),  axis.line = element_blank(),axis.title = element_text(size=24), legend.text.align = 0.5) + 
548 |   scale_x_discrete(name = "\nPrograms", breaks = colnames(jac_mat)[hc$order][seq(5, ncol(jac_mat), by = 5)], labels = seq(5, ncol(jac_mat), by = 5)) + 
549 |   scale_y_discrete(name = "\nPrograms", breaks = colnames(jac_mat)[hc$order][seq(5, ncol(jac_mat), by = 5)], labels = seq(5, ncol(jac_mat), by = 5))  
550 | 
551 | 
552 | prog_ordered <- colnames(jac_mat[hc$order, hc$order])
553 | plot(hc)
554 | rect.hclust(hc , k = 3, border = 2:6)
555 | clusterCut <- stats::cutree(tree = hc, k = 3)
556 | 
557 | metaprog_df <- data.frame(row.names = prog_ordered)
558 | metaprog_df$cluster <- (clusterCut[prog_ordered])
559 | metaprog_df$cells <- prog_ordered
560 | metaprog_df$cluster <- as.factor(metaprog_df$cluster)
561 | 
562 | metaprog_annotation_plot <- ggplot(metaprog_df, aes(x = factor(cells, levels = cells), y = 1)) + 
563 |   geom_raster(aes(fill = cluster)) + theme(legend.position = "bottom", axis.ticks = element_blank(),panel.background = element_rect(fill = "white"), axis.text = element_blank(), axis.line = element_blank(), axis.text.x = element_blank(),axis.title = element_text(size=16)) + 
564 |   labs(x = "", y = "", fill = "") 
565 | 
566 | egg::ggarrange(p1, metaprog_annotation_plot, ncol = 1, nrow = 2, heights = c(40, 2))
567 | 
568 | clust_list <- list(names(clusterCut[clusterCut == 1]), names(clusterCut[clusterCut == 2]),names(clusterCut[clusterCut == 3]))
569 | mp_freqs <- sapply(clust_list, function(k) sort(table(unlist(chromatin_mat[k])), decreasing = T), simplify = F)
570 | chromatin_metaprograms <- sapply(mp_freqs, function(tab) head(names(tab)[tab >= 3], 50), simplify = F)
571 | names(chromatin_metaprograms)<-c("LQ1","chromatin_clean","LQ2")
572 | chromatin_metaprogram_clean<-chromatin_metaprograms$chromatin_clean
573 | 
574 | 
575 | clust_list2 <- list(names(clusterCut[clusterCut == 1]), names(clusterCut[clusterCut == 3]),names(clusterCut[clusterCut == 2]))
576 | #this is the order to use for the final MP heatmap
577 | clust_list_chromatin <- c(names(clusterCut[clusterCut == 1]), names(clusterCut[clusterCut == 2]),names(clusterCut[clusterCut == 3]))
578 | 
579 | ###### MPs after OP/AC split and LQ/chromatin.reg split#######
580 | MP_list2<-MP_list[-c(5,7)]
581 | 
582 | MP_list2[["chromatin.reg"]] <- chromatin_metaprogram_clean
583 | MP_list2[["OPC"]] <- opc.ac_metaprograms$OPC
584 | MP_list2[["AC"]] <- opc.ac_metaprograms$AC
585 | 
586 | 
587 | ###### SPOT ASSIGNMENTS TO CLEANED METAPROGRAMS##########
588 | 
589 | # generate normalized exp matrices
590 | file_paths <- list.files(path ="general/exp_mats_GBM", pattern = "\\.rds", full.names = TRUE)
591 | sample_ls <-  gsub(pattern = "\\.rds$", replacement = "", x = basename(file_paths))
592 | sample_ls->samples_names
593 | sample_ls->samples
594 | per_sample_mat <- lapply(file_paths, readRDS)
595 | 
596 | for (i in seq_along(per_sample_mat)){ 
597 |   m <- as.matrix(per_sample_mat[[i]])
598 |   m <- m[-grep("^MT-|^RPL|^RPS", rownames(m)), ]
599 |   if(min(colSums(m)) == 0){m <- m[, colSums(m) != 0]}
600 |   scaling_factor <- 1000000/colSums(m)
601 |   m_CPM <- sweep(m, MARGIN = 2, STATS = scaling_factor, FUN = "*")
602 |   m_loged <- log2(1 + (m_CPM/10))
603 |   
604 |   # removing genes with zero variance across all cells
605 |   var_filter <- apply(m_loged, 1, var)
606 |   m_proc <- m_loged[var_filter != 0, ]
607 |   # filtering out lowly expressed genes
608 |   exp_genes <- rownames(m_proc)[(rowMeans(m_proc) > 0.4)]
609 |   m_proc <- m_proc[exp_genes, ]
610 |   
611 |   # output to a list of gene expression profiles (GEP)
612 |   per_sample_mat[[i]] <- m_proc
613 |   names(per_sample_mat)[i] <- sample_ls[[i]]
614 |   rm(m,m_loged, var_filter, exp_genes, m_proc)
615 | }
616 | 
617 | #generate a list with the filtered exp matrix for each sample, i.e. m_proc<-(per_sample_mat[[1]])
618 | score_mat <- lapply(c(1:length(per_sample_mat)), function(i){
619 |   m_proc<-per_sample_mat[[i]]
620 |   metaprograms_gene_list <- readRDS("MP/clean_spatial_gbm_metaprograms_124.rds")
621 |   signatures <- scalop::sigScores(m_proc, metaprograms_gene_list, expr.center = TRUE, conserved.genes = 0.5)
622 |   
623 |   spot_scores <- data.frame(spot_names = rownames(signatures))
624 |   spot_scores$Neuron <- signatures$Neuron
625 |   spot_scores$Vasc<- signatures$Vasc
626 |   spot_scores$MES.Hyp <- signatures$MES.Hyp
627 |   spot_scores$Mac <- signatures$Mac
628 |   spot_scores$Oligo <- signatures$Oligo
629 |   spot_scores$MES <- signatures$MES
630 |   spot_scores$Prolif.Metab <- signatures$Prolif.Metab
631 |   spot_scores$MES.Ast <- signatures$MES.Ast
632 |   spot_scores$Reactive.Ast <- signatures$Reactive.Ast
633 |   spot_scores$NPC <- signatures$NPC
634 |   spot_scores$Inflammatory.Mac <- signatures$Inflammatory.Mac
635 |   spot_scores$chromatin.reg <- signatures$chromatin.reg
636 |   spot_scores$OPC <- signatures$OPC
637 |   spot_scores$AC <- signatures$AC
638 |   return(spot_scores)
639 | })
640 | 
641 | names(score_mat) <- sample_ls
642 | 
643 | for (i in seq_along(score_mat)){
644 |   score_df <- as.data.frame(score_mat[[i]])
645 |   score_df <-column_to_rownames(score_df, 'spot_names')
646 |   maxcol_meta<-maxcol_strict(score_df)
647 |   maxcol_meta<-stack(maxcol_meta)
648 |   setnames(maxcol_meta,2,"spot_type_meta_new")
649 |   setnames(maxcol_meta,1,"SpotID")
650 | }
651 | 


--------------------------------------------------------------------------------
/Module2_spatial_coherence_org_zones.R:
--------------------------------------------------------------------------------
   1 | library(patchwork)
   2 | library(parallel)
   3 | 
   4 | #!!! Run Functions section first
   5 | 
   6 | # load data ---------------------------------------------------------------
   7 | 
   8 | sample_ls <- (read.delim("general/GBM_samples.txt", header = FALSE))$V1
   9 | 
  10 | gen_clusters <- as.character(unique(unlist(sapply(c(1:length(sample_ls)), function(i){
  11 |   mp_assign <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = ""))
  12 |   return(unique(mp_assign$spot_type_meta_new))
  13 | }))))
  14 | 
  15 | 
  16 | 
  17 | # calculate spatial coherence (Figures 4A-C) --------
  18 | 
  19 | rand_num <- 100
  20 | 
  21 | sapply(c(1:length(sample_ls)), function(i){
  22 |   
  23 |   print(sample_ls[i])
  24 |   
  25 |   # load data
  26 |   spots_positions <- read.csv(paste("general/GBM_data/", sample_ls[i] , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE)
  27 |   row.names(spots_positions) <- spots_positions$V1
  28 |   
  29 |   spots_clusters <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = ""))
  30 |   spots_clusters <- na.omit(spots_clusters)
  31 |   colnames(spots_clusters) <- c("barcodes", "spot_type")
  32 |   row.names(spots_clusters)<- spots_clusters$barcodes  
  33 |   
  34 |   # abundance 
  35 |   
  36 |   programs_comp <- sample_programs_composition(spots_clusters,gen_clusters)
  37 |   
  38 |   # neighbors tables   
  39 |   
  40 |   neighbors_table <- neighbors_table_func(spots_positions,spots_clusters)
  41 |   
  42 |   rand_neighbors_table <- lapply(c(1:rand_num), function(i){
  43 |     new_pos <- sample(spots_positions$V1, length(spots_positions$V1), replace = FALSE)
  44 |     pos_table <- spots_positions
  45 |     pos_table$V1 <- new_pos
  46 |     pos_table$V2 <- spots_positions[new_pos, "V2"]
  47 |     
  48 |     neighbors_table <- neighbors_table_func(pos_table,spots_clusters)
  49 |     return(neighbors_table)
  50 |   })
  51 |   
  52 |   
  53 |   # spatial coherence
  54 |   
  55 |   programs_spatial_score <- sapply(sort(gen_clusters), function(cluster){
  56 |     if (!(cluster %in% spots_clusters$spot_type)) {
  57 |       prog_score <- NaN
  58 |     } else {
  59 |       program_neighbors_table = neighbors_table[row.names(neighbors_table) %in% spots_clusters$barcodes[spots_clusters$spot_type == cluster],]
  60 |       obs <- obs_program_spatial_score(program_neighbors_table, cluster)
  61 |       one_spatial_score <- one_val(dim(program_neighbors_table)[1])
  62 |       zero_spatial_score <- zero_val(rand_neighbors_table, spots_clusters, cluster)
  63 |       if (obs>one_spatial_score){obs <- one_spatial_score}
  64 |       if (obs<zero_spatial_score){obs <- zero_spatial_score}
  65 |       
  66 |       prog_score <- (obs - zero_spatial_score)/(one_spatial_score - zero_spatial_score)
  67 |     }
  68 |     return(prog_score)
  69 |   })
  70 |   
  71 |   saveRDS(programs_spatial_score, "save_path")
  72 | })
  73 | 
  74 | # plot 
  75 | 
  76 | sapply(c(1:length(sample_ls)), function(i){
  77 |   
  78 |   print(sample_ls[i])
  79 |   
  80 |   # load data
  81 |   spots_clusters <- readRDS(paste("MP/mp_assign/mp_assign_124/", sample_ls[i], ".rds", sep = ""))
  82 |   spots_clusters <- na.omit(spots_clusters)
  83 |   colnames(spots_clusters) <- c("barcodes", "spot_type")
  84 |   row.names(spots_clusters)<- spots_clusters$barcodes  
  85 |   
  86 |   # abundance 
  87 |   
  88 |   programs_comp <- sample_programs_composition(spots_clusters,gen_clusters)
  89 |   saveRDS(programs_comp, paste("save_path_abund", sample_ls[i],"_nodes_abund.rds", sep = ""))
  90 | })
  91 | 
  92 | all_comp <- list.files("save_path_abund")
  93 | 
  94 | compositions <- sapply(all_comp, function(comp){
  95 |   samp_comp <- readRDS(paste("save_path_abund", comp, sep = ""))
  96 |   return(samp_comp)
  97 | })
  98 | 
  99 | colnames(compositions) <- as.character(sapply(colnames(compositions), function(x){substr(x, 1, nchar(x)-16)}))
 100 | 
 101 | all_spatial_score <- list.files("save_path")
 102 | 
 103 | spatial_score <- sapply(all_spatial_score, function(s_score){
 104 |   samp_scores <- readRDS(paste("save_path", s_score, sep = ""))
 105 |   return(samp_scores)
 106 | })
 107 | 
 108 | 
 109 | spatial_score <- as.data.frame(spatial_score)
 110 | colnames(spatial_score) <- as.character(sapply(colnames(spatial_score), function(x){substr(x,1, nchar(x)-18)}))
 111 | 
 112 | new_score <- ifelse(compositions < 0.05, 0,1) * spatial_score # filtered <0.05
 113 | new_score[new_score == 0] <- NA
 114 | 
 115 | new_score$metaprogram <- row.names(spatial_score)
 116 | spatial_score_filt <- new_score
 117 | 
 118 | 
 119 | 
 120 | mal_mp <- c("AC", "NPC", "OPC", "MES", "chromatin.reg", "AC.MES", "metabolism", "inflammatory.resp", "macrophage", "vascular", "oligo", "reactive.astrocyte")
 121 | prog_order <- rev(spatial_score_filt$metaprogram[order(apply(spatial_score_filt[,c(1:length(sample_ls))],1, function(x){mean(na.omit(x[x!=0]))}))])
 122 | samples_order <- rev(colnames(spatial_score_filt)[c(1:length(sample_ls))][order(apply(spatial_score_filt[spatial_score_filt$metaprogram %in% c("AC", "NPC", "OPC", "MES", "chromatin.reg", "AC.MES", "inflammatory.resp", "macrophage", "vascular","oligo", "reactive.astrocyte"),c(1:length(sample_ls))],2, function(x){mean(na.omit(x[x!=0]))}))])
 123 | 
 124 | spatial_long <- melt(setDT(spatial_score_filt), id.vars = "metaprogram", variable.name = "sample", value.name = "spatial_score")
 125 | spatial_long <- spatial_long[spatial_long$spatial_score != 0,]
 126 | 
 127 | 
 128 | spatial_long$sample <- factor(spatial_long$sample, levels = samples_order)
 129 | spatial_long$metaprogram <- factor(spatial_long$metaprogram, levels = prog_order)
 130 | 
 131 | mp_spatial_mean <- as.data.frame(apply(new_score[,c(1:26)],1, function(x){mean(na.omit(x))}))
 132 | colnames(mp_spatial_mean) <- c("mean_spatial")
 133 | mp_spatial_mean$metaprogram <- new_score$metaprogram
 134 | mp_spatial_mean$sd <- apply(new_score[,c(1:26)],1, function(x){sd(na.omit(x))})
 135 | mp_ord_tmp <- apply(new_score[,c(1:26)],1, function(x){mean(na.omit(x))})
 136 | names(mp_ord_tmp) <- new_score$metaprogram
 137 | mp_ord <- names(sort(mp_ord_tmp, decreasing = T))
 138 | 
 139 | mp_spatial_mean$metaprogram <- factor(mp_spatial_mean$metaprogram, levels = mp_ord)
 140 | 
 141 | samp_pal<-met.brewer("Veronese",26)
 142 | meta_pal<-c("hypoxia"="#F2B701", "MES"="#F6CF71", "AC.MES"="#66C5CC","AC"="#E68310","OPC"="#f97b72", "NPC"="#7F3C8D","chromatin.reg"="#3969AC", "metabolism"="#EF4868", "neuron"="#4b4b8f","oligo"="#B95FBB", "reactive.astrocyte"="#D4D915", "macrophage"="#80BA5A", "inflammatory.resp"="#11A579","vascular"="#CF1C90")
 143 | 
 144 | # plot was not not used in paper
 145 | p1_mp <- ggplot(spatial_long, aes(x=factor(metaprogram), y=spatial_score, fill = sample)) + 
 146 |   geom_dotplot(binaxis='y', stackdir='center', dotsize=1.5) +
 147 |   theme_minimal() +
 148 |   theme(axis.text.x = element_text(size= 12, angle = 45, vjust = 0.5),
 149 |         axis.title.x = element_blank(),
 150 |         axis.title.y = element_text(size=14, face="bold"),
 151 |         plot.title = element_text(size=14, face="bold"),
 152 |         panel.grid.major = element_blank(),
 153 |         panel.grid.minor = element_blank(),
 154 |         panel.border = element_blank()) + 
 155 |   ylab("spatial cohernece score") +
 156 |   xlab("metaprogram") +
 157 |   ggtitle("Spatial Coherence by Cancer Metaprogram") +
 158 |   scale_fill_manual(values = samp_pal)
 159 | 
 160 | # fig 4C
 161 | p2_mp <- ggplot(data=mp_spatial_mean, aes(x=metaprogram, y=mean_spatial, group=1)) + 
 162 |   geom_line(color = "grey", size = 2)+
 163 |   geom_point() +
 164 |   geom_errorbar(aes(ymin=mean_spatial-sd, ymax=mean_spatial+sd), width=.2,
 165 |                 position=position_dodge(0.05)) + theme_minimal() +
 166 |   theme(axis.text.x = element_text(size= 12, angle = 45, vjust = 0.5),
 167 |         axis.title.x = element_blank(),
 168 |         axis.title.y = element_text(size=14, face="bold"),
 169 |         plot.title = element_text(size=14, face="bold"),
 170 |         panel.grid.major = element_blank(),
 171 |         panel.grid.minor = element_blank(),
 172 |         panel.border = element_blank()) + 
 173 |   ylab("mean spatial cohernece score") 
 174 | 
 175 | #fig 4B
 176 | p1_samp <- ggplot(spatial_long[spatial_long$metaprogram %in% c("AC", "NPC", "OPC", "MES", "chromatin.reg", "AC.MES", "inflammatory.resp", "macrophage", "vascular","oligo", "reactive.astrocyte")], aes(x=factor(sample), y=spatial_score, fill = metaprogram)) + 
 177 |   geom_dotplot(binaxis='y', stackdir='center', dotsize=1.5) +
 178 |   theme_minimal() +
 179 |   theme(axis.text.x = element_text(size= 12, angle = 45, vjust = 0.5),
 180 |         axis.title.x = element_blank(),
 181 |         axis.title.y = element_text(size=14, face="bold"),
 182 |         plot.title = element_text(size=14, face="bold"),
 183 |         panel.grid.major = element_blank(),
 184 |         panel.grid.minor = element_blank(),
 185 |         panel.border = element_blank()) + 
 186 |   ylab("spatial cohernece score") +
 187 |   xlab("sample") +
 188 |   ggtitle("Spatial Coherence by samples") +
 189 |   scale_fill_manual(values = meta_pal)
 190 | 
 191 | #[ plot was not used in paper]
 192 | spatial_mean <- as.data.frame(apply(new_score[c(1:3,5:7,10,12,14),c(1:26)],2, function(x){mean(na.omit(x))})) # claculate mean spatial coherence excluding hypoxia and metabolism and normal brain (oligo, neuron and reactive.ast) 
 193 | colnames(spatial_mean) <- c("mean_spatial")
 194 | spatial_mean$sample <- row.names(spatial_mean)
 195 | spatial_mean$sd <- apply(new_score[c(1:3,5:7,10,12,14),c(1:26)],2, function(x){sd(na.omit(x))})
 196 | 
 197 | samples_ord <- names(sort(apply(new_score[c(1:3,5:7,10,12,14),c(1:26)],2, function(x){mean(na.omit(x))}), decreasing = T))
 198 | 
 199 | spatial_mean$sample <- factor(spatial_mean$sample, levels = samples_ord)
 200 | 
 201 | p2_samp <- ggplot(data=spatial_mean[spatial_mean$sample,], aes(x=sample, y=mean_spatial, group=1)) +
 202 |   geom_line(color = "grey", size = 2)+
 203 |   geom_point() + 
 204 |   geom_errorbar(aes(ymin=mean_spatial-sd, ymax=mean_spatial+sd), width=.2,
 205 |                 position=position_dodge(0.05)) + theme_minimal() +
 206 |   theme(axis.text.x = element_text(size= 12, angle = 45, vjust = 0.5),
 207 |         axis.title.x = element_blank(),
 208 |         axis.title.y = element_text(size=14, face="bold"),
 209 |         plot.title = element_text(size=14, face="bold"),
 210 |         panel.grid.major = element_blank(),
 211 |         panel.grid.minor = element_blank(),
 212 |         panel.border = element_blank()) + 
 213 |   ylab("mean spatial cohernece score") 
 214 | 
 215 | 
 216 | 
 217 | # calculate spatial coherence by window to define structured vs organized spots (Figures 4D-E) (Alt. skip and use saved results in next section) ----------------------------------------------------------
 218 | 
 219 | win_size <- 5 # run for c(5,8,11) 
 220 | rand_num <- 100
 221 | 
 222 | 
 223 | samples_num <- c(1:length(sample_ls))
 224 | all_scores <- mclapply(samples_num, all_scores_fx, mc.cores = 26)
 225 | 
 226 | all_scores_fx <- function(i){
 227 |   
 228 |   #print(sample_ls[i])
 229 |   
 230 |   # load data
 231 |   spots_positions <- read.csv(paste("general/GBM_data/", sample_ls[i] , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE)
 232 |   row.names(spots_positions) <- spots_positions$V1
 233 |   
 234 |   spots_clusters <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = ""))
 235 |   spots_clusters <- na.omit(spots_clusters)
 236 |   colnames(spots_clusters) <- c("barcodes", "spot_type")
 237 |   row.names(spots_clusters)<- spots_clusters$barcodes  
 238 |   
 239 |   
 240 |   # neighbors tables   
 241 |   all_win_neighbors_table <- neighbors_table_funcV2(spots_positions,spots_clusters)
 242 |   neighbors_table <- neighbors_table_func(spots_positions,spots_clusters)
 243 |   
 244 |   # spatial coherence by window 
 245 |   
 246 |   all_spots <- spots_clusters$barcodes
 247 |   
 248 |   spatial_score_win <- sapply(all_spots, function(spot){
 249 |     win_spots <- c(spot)
 250 |     sapply(c(1:win_size), function(j){
 251 |       win_spots <<- unique(c(win_spots,unique(na.omit(as.character(all_win_neighbors_table[win_spots,])))))
 252 |       win_spots <<- win_spots[win_spots != "NaN"]
 253 |     })
 254 |     
 255 |     win_rand_neighbors_table <- lapply(c(1:rand_num), function(k){
 256 |       win_spots_positions <- spots_positions[spots_positions$V1 %in% win_spots,]
 257 |       new_pos <- sample(win_spots_positions$V1, length(win_spots_positions$V1), replace = FALSE)
 258 |       pos_table <- win_spots_positions
 259 |       row.names(pos_table) <- new_pos 
 260 |       pos_table$V1 <- new_pos
 261 |       pos_table$V2 <- win_spots_positions[new_pos, "V2"]
 262 |       
 263 |       neighbors_table <- win_prox_neighbors_table_func(pos_table,spots_clusters[row.names(pos_table),])
 264 |       return(neighbors_table)
 265 |     }) 
 266 |     
 267 |     win_abund <- sample_programs_composition(spots_clusters[spots_clusters$barcodes %in% win_spots,], gen_clusters) 
 268 |     
 269 |     programs_spatial_score <- lapply(sort(gen_clusters), function(cluster){
 270 |       if (!(cluster %in% spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots]) | 
 271 |           length(spots_clusters$barcodes[spots_clusters$spot_type == cluster & spots_clusters$barcodes %in% win_spots]) < 10) {
 272 |         prog_score <- NaN
 273 |         return(prog_score)
 274 |       } else {
 275 |         program_neighbors_table = neighbors_table[row.names(neighbors_table) %in% spots_clusters$barcodes[spots_clusters$spot_type == cluster & spots_clusters$barcodes %in% win_spots],]
 276 |         obs <- obs_program_spatial_score(program_neighbors_table, cluster)
 277 |         one_spatial_score <- one_val(dim(program_neighbors_table)[1])
 278 |         zero_spatial_score <- zero_val(win_rand_neighbors_table, spots_clusters[spots_clusters$barcodes %in% win_spots,], cluster)
 279 |         
 280 |         if (win_abund[cluster] >= 0.6) {
 281 |           if (obs>one_spatial_score){obs <- one_spatial_score}
 282 |           return(obs/one_spatial_score)
 283 |         } else {
 284 |           if (obs>one_spatial_score){obs <- one_spatial_score}
 285 |           if (obs<zero_spatial_score){obs <- zero_spatial_score}
 286 |           
 287 |           prog_score <- (obs - zero_spatial_score)/(one_spatial_score - zero_spatial_score)
 288 |           return(prog_score)
 289 |         }
 290 |       }
 291 |     })
 292 |     return(mean(na.omit(unlist(programs_spatial_score))))
 293 |   })
 294 |   return(spatial_score_win)
 295 | }
 296 | 
 297 | 
 298 | # define dis-organized zones ------------------------------------------------------------
 299 | 
 300 | all_scores <- readRDS("Spatial_coh_zones/sc_windows/spatial_win5v3.rds")
 301 | hist(unlist(all_scores), breaks = 100, main = "radius 5: spatial coherence socres", xlab = "spatial coherence score")
 302 | th1 <- as.numeric(quantile(na.omit(unlist(all_scores)),probs = (seq(0,1,0.1)))[5])
 303 | 
 304 | all_zones5 <- sapply(c(1:length(sample_ls)), function(i){
 305 |   set_zones <- ifelse(all_scores[[i]] <= th1, "dis", "other")
 306 |   print(table(set_zones))
 307 |   return(set_zones)
 308 | })
 309 | 
 310 | all_scores <- readRDS("Spatial_coh_zones/sc_windows/spatial_win8v3.rds")
 311 | hist(unlist(all_scores)[unlist(all_scores) > 0 & unlist(all_scores) < 1], breaks = 100, main = "radius 8: spatial coherence socres", xlab = "spatial coherence score")
 312 | th1 <- as.numeric(quantile(na.omit(unlist(all_scores)),probs = (seq(0,1,0.1)))[5])
 313 | 
 314 | all_zones10 <- sapply(c(1:length(sample_ls)), function(i){
 315 |   set_zones <- ifelse(all_scores[[i]] <= th1, "dis", "other")
 316 |   print(table(set_zones))
 317 |   return(set_zones)
 318 | })
 319 | 
 320 | all_scores <- readRDS("Spatial_coh_zones/sc_windows/spatial_win11v3.rds")
 321 | hist(unlist(all_scores)[unlist(all_scores) > 0 & unlist(all_scores) < 1], breaks = 100, main = "radius 11: spatial coherence socres", xlab = "spatial coherence score")
 322 | th1 <- as.numeric(quantile(na.omit(unlist(all_scores)),probs = (seq(0,1,0.1)))[5])
 323 | 
 324 | all_zones15 <- sapply(c(1:length(sample_ls)), function(i){
 325 |   set_zones <- ifelse(all_scores[[i]] <= th1, "dis", "other")
 326 |   print(table(set_zones))
 327 |   return(set_zones)
 328 | })
 329 | 
 330 | 
 331 | 
 332 | zones_intersect <- sapply(c(1:26), function(x){
 333 |   inter1 <- intersect(names(all_zones5[[x]])[all_zones5[[x]] == "dis"],names(all_zones10[[x]])[all_zones10[[x]] == "dis"])
 334 |   inter2 <- intersect(inter1,names(all_zones15[[x]])[all_zones15[[x]] == "dis"])
 335 |   return(inter2)
 336 | })
 337 | 
 338 | 
 339 | all_zones <- sapply(c(1:26),function(i){
 340 |   set_zones <- ifelse(names(all_scores[[i]]) %in% zones_intersect[[i]],"dis","other")
 341 |   names(set_zones) <- names(all_scores[[i]])
 342 |   return(set_zones)
 343 | })
 344 | 
 345 | win_size <- 4
 346 | 
 347 | all_smooth <- sapply(c(1:26),function(i){
 348 |   print(sample_ls[i])
 349 |   spots_positions <- read.csv(paste("general/GBM_data/", sample_ls[i] , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE)
 350 |   row.names(spots_positions) <- spots_positions$V1
 351 |   
 352 |   spots_clusters <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = ""))
 353 |   spots_clusters <- na.omit(spots_clusters)
 354 |   colnames(spots_clusters) <- c("barcodes", "spot_type")
 355 |   row.names(spots_clusters)<- spots_clusters$barcodes
 356 |   
 357 |   neighbors_table <- prox_neighbors_table_func(spots_positions,spots_clusters)
 358 |   
 359 |   all_spots <- spots_clusters$barcodes
 360 |   
 361 |   smoothing <- sapply(all_spots, function(spot){
 362 |     win_spots <- c(spot)
 363 |     sapply(c(1:win_size), function(i){
 364 |       win_spots <<- unique(c(win_spots,unique(na.omit(as.character(neighbors_table[win_spots,])))))
 365 |     })
 366 |     win_zones <- all_zones[[i]][names(all_zones[[i]]) %in% win_spots]
 367 |     if (length(win_zones[win_zones == "dis"])/length(win_zones) >= 0.5) {return("dis")}
 368 |     else {return("other")}
 369 |   })
 370 |   return(smoothing)
 371 | })
 372 | 
 373 | 
 374 | # define struct zones -----------------------------------------------------
 375 | 
 376 | all_scores <- readRDS("Spatial_coh_zones/sc_windows/spatial_win5v3.rds")
 377 | hist(unlist(all_scores), breaks = 100)
 378 | th1 <- as.numeric(quantile(na.omit(unlist(all_scores)),probs = (seq(0,1,0.1)))[5])
 379 | 
 380 | all_zones5 <- sapply(c(1:length(sample_ls)), function(i){
 381 |   set_zones <- ifelse(all_scores[[i]] >= th1, "struct","other")
 382 |   print(table(set_zones))
 383 |   return(set_zones)
 384 | })
 385 | 
 386 | all_scores <- readRDS("Spatial_coh_zones/sc_windows/spatial_win8v3.rds")
 387 | hist(unlist(all_scores)[unlist(all_scores) > 0 & unlist(all_scores) < 1], breaks = 100)
 388 | th1 <- as.numeric(quantile(na.omit(unlist(all_scores)),probs = (seq(0,1,0.1)))[5])
 389 | 
 390 | all_zones10 <- sapply(c(1:length(sample_ls)), function(i){
 391 |   set_zones <- ifelse(all_scores[[i]] >= th1, "struct", "other")
 392 |   print(table(set_zones))
 393 |   return(set_zones)
 394 | })
 395 | 
 396 | all_scores <- readRDS("Spatial_coh_zones/sc_windows/spatial_win11v3.rds")
 397 | hist(unlist(all_scores)[unlist(all_scores) > 0 & unlist(all_scores) < 1], breaks = 100)
 398 | th1 <- as.numeric(quantile(na.omit(unlist(all_scores)),probs = (seq(0,1,0.1)))[5])
 399 | 
 400 | all_zones15 <- sapply(c(1:length(sample_ls)), function(i){
 401 |   set_zones <- ifelse(all_scores[[i]] >= th1, "struct", "other")
 402 |   print(table(set_zones))
 403 |   return(set_zones)
 404 | })
 405 | 
 406 | 
 407 | 
 408 | zones_intersect <- sapply(c(1:26), function(x){
 409 |   inter1 <- intersect(names(all_zones5[[x]])[all_zones5[[x]] == "struct"],names(all_zones10[[x]])[all_zones10[[x]] == "struct"])
 410 |   inter2 <- intersect(inter1,names(all_zones15[[x]])[all_zones15[[x]] == "struct"])
 411 |   return(inter2)
 412 | })
 413 | 
 414 | 
 415 | all_zones <- sapply(c(1:26),function(i){
 416 |   set_zones <- ifelse(names(all_scores[[i]]) %in% zones_intersect[[i]],"struct","other")
 417 |   names(set_zones) <- names(all_scores[[i]])
 418 |   return(set_zones)
 419 | })
 420 | 
 421 | win_size <- 4
 422 | all_smooth_struct <- sapply(c(1:26),function(i){
 423 |   print(sample_ls[i])
 424 |   spots_positions <- read.csv(paste("general/GBM_data/", sample_ls[i] , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE)
 425 |   row.names(spots_positions) <- spots_positions$V1
 426 |   
 427 |   spots_clusters <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = ""))
 428 |   spots_clusters <- na.omit(spots_clusters)
 429 |   colnames(spots_clusters) <- c("barcodes", "spot_type")
 430 |   row.names(spots_clusters)<- spots_clusters$barcodes
 431 |   
 432 |   neighbors_table <- prox_neighbors_table_func(spots_positions,spots_clusters)
 433 |   
 434 |   all_spots <- spots_clusters$barcodes
 435 |   
 436 |   smoothing <- sapply(all_spots, function(spot){
 437 |     win_spots <- c(spot)
 438 |     sapply(c(1:win_size), function(i){
 439 |       win_spots <<- unique(c(win_spots,unique(na.omit(as.character(neighbors_table[win_spots,])))))
 440 |     })
 441 |     win_zones <- all_zones[[i]][names(all_zones[[i]]) %in% win_spots]
 442 |     if (length(win_zones[win_zones == "struct"])/length(win_zones) >= 0.5) {return("struct")}
 443 |     else {return("other")}
 444 |   })
 445 |   return(smoothing)
 446 | })
 447 | 
 448 | 
 449 | # final zones -------------------------------------------------------------
 450 | 
 451 | all_zones <- sapply(c(1:26),function(i){
 452 |   sapply(names(all_smooth[[i]]), function(s){
 453 |     if (all_smooth[[i]][s] == "dis" & all_smooth_struct[[i]][s] == "struct") {
 454 |       return("Intermediate")
 455 |     } else if (all_smooth[[i]][s] == "dis") {
 456 |       return("dis")
 457 |     } else if (all_smooth_struct[[i]][s] == "struct") {
 458 |       return("struct")
 459 |     } else {
 460 |       return("Intermediate")
 461 |     }
 462 |   })
 463 | })
 464 | 
 465 | names(all_zones) <- sample_ls
 466 | 
 467 | is_small <- sapply(all_zones, function(x){
 468 |   tx <- table(x)
 469 |   
 470 |   old_clusters <- names(tx)
 471 |   add_clusters <- c("dis","Intermediate", "struct")[!c("dis","Intermediate", "struct") %in% old_clusters]
 472 |   if (length(add_clusters) > 0) {
 473 |     tx <- c(as.numeric(tx),0)
 474 |     names(tx) <- c(old_clusters, add_clusters)
 475 |   }
 476 |   tx <- tx[sort(names(tx))]
 477 |   tx10 <- sum(tx)*0.12
 478 |   print(tx)
 479 |   return(c(tx[1]<tx10, tx[2]<tx10,tx[3]<tx10))
 480 | })
 481 | colnames(is_small) <- sample_ls
 482 | 
 483 | dis2use <- c(1:26)[!is_small[1,]]
 484 | sturct2use <- c(1:26)[!is_small[3,]] 
 485 | 
 486 | spatial_score_zones <- lapply(c(1:length(sample_ls)), function(i){
 487 |   # load data
 488 |   print(sample_ls[i])
 489 |   spots_positions <- read.csv(paste("general/GBM_data/", sample_ls[i] , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE)
 490 |   row.names(spots_positions) <- spots_positions$V1
 491 |   
 492 |   spots_clusters <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = ""))
 493 |   spots_clusters <- na.omit(spots_clusters)
 494 |   colnames(spots_clusters) <- c("barcodes", "spot_type")
 495 |   row.names(spots_clusters)<- spots_clusters$barcodes  
 496 |   zone_score <- lapply(c("dis", "struct"), function(z){
 497 |     
 498 |     zspots <- na.omit(names(all_zones[[i]])[all_zones[[i]] == z]) # checkk!!!!!!!!!!!
 499 |     if (length(zspots) == 0) {
 500 |       return(NA)
 501 |     } else {
 502 |       # abundance 
 503 |       programs_comp <- sample_programs_composition(spots_clusters[spots_clusters$barcodes %in% zspots,],gen_clusters)
 504 |       
 505 |       # neighbors tables   
 506 |       
 507 |       neighbors_table <- neighbors_table_func(spots_positions,spots_clusters)
 508 |       zone_neighbors_table <- neighbors_table[zspots,] 
 509 |       
 510 |       win_rand_neighbors_table <- lapply(c(1:rand_num), function(i){
 511 |         zspots_positions <- spots_positions[spots_positions$V1 %in% zspots,]
 512 |         new_pos <- sample(zspots_positions$V1, length(zspots_positions$V1), replace = FALSE)
 513 |         pos_table <- zspots_positions
 514 |         row.names(pos_table) <- new_pos 
 515 |         pos_table$V1 <- new_pos
 516 |         pos_table$V2 <- zspots_positions[new_pos, "V2"]
 517 |         
 518 |         neighbors_table <- win_prox_neighbors_table_func(pos_table,spots_clusters[spots_clusters$barcodes %in% zspots,])
 519 |         return(neighbors_table)
 520 |       }) 
 521 |       
 522 |       # spatial coherence
 523 |       
 524 |       programs_spatial_score <- sapply(sort(gen_clusters), function(cluster){
 525 |         if (!(cluster %in% spots_clusters$spot_type[spots_clusters$barcodes %in% zspots]) | 
 526 |             length(spots_clusters$barcodes[spots_clusters$spot_type == cluster & spots_clusters$barcodes %in% zspots]) < 3) {
 527 |           prog_score <- NaN
 528 |         } else {
 529 |           program_neighbors_table = neighbors_table[row.names(neighbors_table) %in% spots_clusters$barcodes[spots_clusters$spot_type == cluster & spots_clusters$barcodes %in% zspots],]
 530 |           obs <- obs_program_spatial_score(program_neighbors_table, cluster)
 531 |           one_spatial_score <- one_val(dim(program_neighbors_table)[1])
 532 |           zero_spatial_score <- zero_val(win_rand_neighbors_table, spots_clusters[spots_clusters$barcodes %in% zspots,], cluster)
 533 |           if (obs>one_spatial_score){obs <- one_spatial_score}
 534 |           if (obs<zero_spatial_score){obs <- zero_spatial_score}
 535 |           
 536 |           prog_score <- (obs - zero_spatial_score)/(one_spatial_score - zero_spatial_score)
 537 |         }
 538 |         return(prog_score)
 539 |       })    
 540 |       return(list(programs_comp,programs_spatial_score))
 541 |     }
 542 |   })
 543 |   names(zone_score) <- c("dis", "struct")
 544 |   return(zone_score)
 545 | })
 546 | 
 547 | 
 548 | # plot Relationship between spot purity and spatial coherence -------------
 549 | 
 550 | purity_score_scaled <- readRDS("CNA/mal_lev.rds")
 551 | 
 552 | all_dis_purity <- sapply(c(1:26), function(i){
 553 |   dis_purity <- purity_score_scaled[[i]][all_zones[[i]] == "dis"]
 554 |   return(dis_purity)
 555 | })
 556 | 
 557 | hist(unlist(all_dis_purity))
 558 | 
 559 | all_st_purity <- sapply(c(1:26), function(i){
 560 |   st_purity <- purity_score_scaled[[i]][all_zones[[i]] == "struct"]
 561 |   return(st_purity)
 562 | })
 563 | 
 564 | hist(unlist(all_st_purity))
 565 | 
 566 | 
 567 | zones_class_fin <- sapply(c(1:26), function(i){
 568 |   print(sample_ls[[i]])
 569 |   
 570 |   spots_clusters <- readRDS(paste("MP/mp_assign_124/", sample_ls[[i]], ".rds", sep = ""))
 571 |   spots_clusters <- na.omit(spots_clusters)
 572 |   colnames(spots_clusters) <- c("barcodes", "spot_type")
 573 |   row.names(spots_clusters)<- spots_clusters$barcodes  
 574 |   
 575 |   v2 <- sapply(c(1:length(all_zones[[i]])), function(s){
 576 |     if(all_zones[[i]][s] == "dis" & purity_score_scaled[[i]][names(all_zones[[i]][s])] < 0.5) {
 577 |       return("dis_norm")
 578 |     } else if (all_zones[[i]][s] == "dis" & purity_score_scaled[[i]][names(all_zones[[i]][s])] >= 0.5) {
 579 |       return("dis_mal")
 580 |     } else if (all_zones[[i]][s] == "struct" & purity_score_scaled[[i]][names(all_zones[[i]][s])] < 0.4) {
 581 |       return("st_norm")
 582 |     } else if (all_zones[[i]][s] == "struct" & purity_score_scaled[[i]][names(all_zones[[i]][s])] >= 0.4) {
 583 |       return("st_mal")
 584 |     } else {
 585 |       return(all_zones[[i]][s])
 586 |     }
 587 |   })
 588 |   names(v2) <- names(all_zones[[i]])
 589 |   
 590 |   return(v2)
 591 | })
 592 | names(zones_class_fin) <- sample_ls
 593 | 
 594 | 
 595 | zone.colors <- c(structured_mal = "#cf5e4e",structured_norm = "#a82203", disorganized_mal = "#208cc0", disorganized_norm = "#003967",intermediate ="#f1af3a")
 596 | sapply(c(1:26), function(i){
 597 |   print(sample_ls[[i]])
 598 |   spots_positions <- read.csv(paste("general/GBM_data/", sample_ls[[i]] , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE)
 599 |   row.names(spots_positions) <- spots_positions$V1
 600 |   
 601 |   spots_clusters <- readRDS(paste("MP/mp_assign_124/", sample_ls[[i]], ".rds", sep = ""))
 602 |   spots_clusters <- na.omit(spots_clusters)
 603 |   colnames(spots_clusters) <- c("barcodes", "spot_type")
 604 |   row.names(spots_clusters)<- spots_clusters$barcodes  
 605 |   
 606 |   spots_filt = spots_positions[spots_positions$V1 %in% spots_clusters$barcodes,]
 607 |   row2plot1 <- spots_clusters$barcodes[spots_clusters$barcodes %in% names(zones_class_fin[[i]])[zones_class_fin[[i]] == "st_mal"]]
 608 |   row2plot2 <- spots_clusters$barcodes[spots_clusters$barcodes %in% names(zones_class_fin[[i]])[zones_class_fin[[i]] == "st_norm"]]
 609 |   row2plot3 <- spots_clusters$barcodes[spots_clusters$barcodes %in% names(zones_class_fin[[i]])[zones_class_fin[[i]] == "dis_mal"]]
 610 |   row2plot4 <- spots_clusters$barcodes[spots_clusters$barcodes %in% names(zones_class_fin[[i]])[zones_class_fin[[i]] == "dis_norm"]]
 611 |   spots_filt$plot <- factor(ifelse(spots_filt$V1 %in% row2plot1, "structured_mal", 
 612 |                                    ifelse(spots_filt$V1 %in% row2plot2, "structured_norm", 
 613 |                                           ifelse(spots_filt$V1 %in% row2plot3, "disorganized_mal",
 614 |                                                  ifelse(spots_filt$V1 %in% row2plot4, "disorganized_norm","intermediate")))), levels = c("disorganized_mal","disorganized_norm", "structured_mal", "structured_norm","intermediate"))
 615 |   spots_filt$V3_ops = -(spots_filt$V3)
 616 |   
 617 |   
 618 |   gg = ggplot(spots_filt, aes(x=V4, y=V3_ops)) + 
 619 |     geom_point(aes(col=plot), size=2) + 
 620 |     labs(title=paste(sample_ls[[i]], "spatial coherence zones"), y="pos y", x="pos x") +
 621 |     scale_color_manual(values = zone.colors, name = "zone") + 
 622 |     theme_void()
 623 |   print(gg)
 624 |   
 625 | })
 626 | 
 627 | zones_cat <- c("dis_mal","dis_norm","Intermediate","st_mal","st_norm")
 628 | zone_abund <- sapply(zones_class_fin, function(z){
 629 |   spots_zone <- as.data.frame(z)
 630 |   spots_zone <- na.omit(spots_zone)
 631 |   spots_zone$barcodes <- row.names(spots_zone)
 632 |   colnames(spots_zone) <- c("spot_type","barcodes")
 633 |   
 634 |   # abundance 
 635 |   
 636 |   zones_comp <- sample_programs_composition(spots_zone,zones_cat)
 637 |   return(zones_comp)
 638 | })
 639 | 
 640 | 
 641 | 
 642 | ######## Functions  --------------------------------------------------------------
 643 | 
 644 | 
 645 | sample_programs_composition <- function(spots_clusters, gen_clusters){
 646 |   composition <- table(spots_clusters$spot_type)
 647 |   old_clusters <- names(composition)
 648 |   add_clusters <- gen_clusters[!gen_clusters %in% old_clusters]
 649 |   sapply(add_clusters,function(clust){
 650 |     composition <<- c(composition, clust = 0)
 651 |   })
 652 |   
 653 |   names(composition) <- c(old_clusters, add_clusters)
 654 |   final_composition <- composition[sort(names(composition))]/sum(composition)
 655 |   return(final_composition)
 656 | }
 657 | 
 658 | obs_program_spatial_score <- function(program_neighbors, cluster){
 659 |   cluster_neighbors_bin <- ifelse(program_neighbors == cluster, 1, 0)
 660 |   if(is.null(dim(program_neighbors))){
 661 |     cluster_neighbors_sum <- sum(cluster_neighbors_bin)
 662 |   } else {
 663 |     cluster_neighbors_sum <- apply(cluster_neighbors_bin,1,function(rx){sum(na.omit(rx))})
 664 |   }
 665 |   obs <- mean(cluster_neighbors_sum)
 666 |   return(obs)
 667 | }
 668 | 
 669 | one_val <- function(spots_num){
 670 |   a <- sqrt((4*spots_num)/(6*sqrt(3)))
 671 |   oneval <- (6*spots_num-12*a-6)/spots_num
 672 |   return(oneval)
 673 | }
 674 | 
 675 | zero_val <- function(rand_table, spots_clusters, cluster){
 676 |   all_zeroval <- sapply(rand_table, function(neighbors_rand_table){
 677 |     program_rand_neighbors_table = neighbors_rand_table[row.names(neighbors_rand_table) %in% spots_clusters$barcodes[spots_clusters$spot_type == cluster],]
 678 |     rand_obs <- obs_program_spatial_score(program_rand_neighbors_table, cluster)
 679 |     return(rand_obs)
 680 |   })
 681 |   zeroval <- mean(all_zeroval)
 682 |   return(zeroval)
 683 | }
 684 | 
 685 | 
 686 | calc_adj_mat <- function(neighbored_cluster, spots_clusters, cluster_neighbors_table){
 687 |   if (!(neighbored_cluster %in% spots_clusters$spot_type)) {
 688 |     return(0)
 689 |   } else {
 690 |     cluster_neighbors_bin <- ifelse(cluster_neighbors_table == neighbored_cluster, 1, 0)
 691 |     
 692 |     if (is.null(dim(cluster_neighbors_bin))){
 693 |       cluster_neighbors_sum <- sum(na.omit(cluster_neighbors_bin))
 694 |     } else {
 695 |       cluster_neighbors_sum <- sum(apply(cluster_neighbors_bin,1,function(x){sum(na.omit(x))}))
 696 |     }
 697 |     return(cluster_neighbors_sum)
 698 |   }
 699 | }
 700 | 
 701 | 
 702 | prog_connectivity_score <- function(program_neighbors, cluster){
 703 |   cluster_neighbors_bin <- ifelse(program_neighbors == cluster, 0, 1)
 704 |   if(is.null(dim(program_neighbors))){
 705 |     prog_connect <- 1
 706 |   } else {
 707 |     prog_connect <- length(which(apply(cluster_neighbors_bin,1,function(x){sum(na.omit(x))}) >0))
 708 |   }
 709 |   return(prog_connect)
 710 | }
 711 | 
 712 | neighbors_table_func <- function(spots_positions,spots_clusters){
 713 |   neighbors_table <- sapply(spots_clusters$barcodes, function(spot){
 714 |     spots_row = spots_positions[spots_positions$V1 == spot, 3]
 715 |     spots_col = spots_positions[spots_positions$V1 == spot, 4]
 716 |     
 717 |     if (spots_col == 0 | spots_row == 0) {
 718 |       c1 = NaN
 719 |     } else {
 720 |       n1 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1]
 721 |       if (spots_positions$V2[spots_positions$V1 == n1] == 0 | !(n1 %in% spots_clusters$barcodes)){
 722 |         c1 = NaN
 723 |       } else {
 724 |         c1 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n1])
 725 |       }
 726 |     }
 727 |     
 728 |     if (spots_col == 127 | spots_row == 0) {
 729 |       c2 = NaN
 730 |     } else {
 731 |       n2 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1]
 732 |       if (spots_positions$V2[spots_positions$V1 == n2] == 0 | !(n2 %in% spots_clusters$barcodes)){
 733 |         c2 = NaN
 734 |       } else {
 735 |         c2 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n2])
 736 |       }
 737 |     }
 738 |     
 739 |     if (spots_col == 0 | spots_col == 1) {
 740 |       c3 = NaN
 741 |     } else {
 742 |       n3 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2]
 743 |       if (spots_positions$V2[spots_positions$V1 == n3] == 0 | !(n3 %in% spots_clusters$barcodes)){
 744 |         c3 = NaN
 745 |       } else {
 746 |         c3 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n3])
 747 |       }
 748 |     }
 749 |     
 750 |     if (spots_col == 126 | spots_col == 127) {
 751 |       c4 = NaN
 752 |     } else {
 753 |       n4 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2]
 754 |       if (spots_positions$V2[spots_positions$V1 == n4] == 0 | !(n4 %in% spots_clusters$barcodes)){
 755 |         c4 = NaN
 756 |       } else {
 757 |         c4 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n4])
 758 |       }
 759 |     }
 760 |     
 761 |     if (spots_col == 0 | spots_row == 77) {
 762 |       c5 = NaN
 763 |     } else {
 764 |       n5 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1]
 765 |       if (spots_positions$V2[spots_positions$V1 == n5] == 0 | !(n5 %in% spots_clusters$barcodes)){
 766 |         c5 = NaN
 767 |       } else {
 768 |         c5 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n5])
 769 |       }
 770 |     }
 771 |     
 772 |     if (spots_col == 127 | spots_row == 77) {
 773 |       c6 = NaN
 774 |     } else {
 775 |       n6 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1]
 776 |       if (spots_positions$V2[spots_positions$V1 == n6] == 0 | !(n6 %in% spots_clusters$barcodes)){
 777 |         c6 = NaN
 778 |       } else {
 779 |         c6 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n6])
 780 |       }
 781 |     }
 782 |     
 783 |     
 784 |     return(c(c1,c2,c3,c4,c5,c6))
 785 |     
 786 |   })
 787 |   
 788 |   neighbors_table = t(neighbors_table)
 789 |   row.names(neighbors_table) = spots_clusters$barcodes
 790 |   
 791 |   return(neighbors_table)
 792 | }
 793 | 
 794 | neighbors_table_funcV2 <- function(spots_positions,spots_clusters){
 795 |   neighbors_table <- sapply(spots_clusters$barcodes, function(spot){
 796 |     spots_row = spots_positions[spots_positions$V1 == spot, 3]
 797 |     spots_col = spots_positions[spots_positions$V1 == spot, 4]
 798 |     
 799 |     if (spots_col == 0 | spots_row == 0) {
 800 |       c1 = NaN
 801 |     } else {
 802 |       n1 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1]
 803 |       if (spots_positions$V2[spots_positions$V1 == n1] == 0 | !(n1 %in% spots_clusters$barcodes)){
 804 |         c1 = NaN
 805 |       } else {
 806 |         c1 = as.character(n1)
 807 |       }
 808 |     }
 809 |     
 810 |     if (spots_col == 127 | spots_row == 0) {
 811 |       c2 = NaN
 812 |     } else {
 813 |       n2 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1]
 814 |       if (spots_positions$V2[spots_positions$V1 == n2] == 0 | !(n2 %in% spots_clusters$barcodes)){
 815 |         c2 = NaN
 816 |       } else {
 817 |         c2 = as.character(n2)
 818 |       }
 819 |     }
 820 |     
 821 |     if (spots_col == 0 | spots_col == 1) {
 822 |       c3 = NaN
 823 |     } else {
 824 |       n3 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2]
 825 |       if (spots_positions$V2[spots_positions$V1 == n3] == 0 | !(n3 %in% spots_clusters$barcodes)){
 826 |         c3 = NaN
 827 |       } else {
 828 |         c3 = as.character(n3)
 829 |       }
 830 |     }
 831 |     
 832 |     if (spots_col == 126 | spots_col == 127) {
 833 |       c4 = NaN
 834 |     } else {
 835 |       n4 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2]
 836 |       if (spots_positions$V2[spots_positions$V1 == n4] == 0 | !(n4 %in% spots_clusters$barcodes)){
 837 |         c4 = NaN
 838 |       } else {
 839 |         c4 = as.character(n4)
 840 |       }
 841 |     }
 842 |     
 843 |     if (spots_col == 0 | spots_row == 77) {
 844 |       c5 = NaN
 845 |     } else {
 846 |       n5 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1]
 847 |       if (spots_positions$V2[spots_positions$V1 == n5] == 0 | !(n5 %in% spots_clusters$barcodes)){
 848 |         c5 = NaN
 849 |       } else {
 850 |         c5 = as.character(n5)
 851 |       }
 852 |     }
 853 |     
 854 |     if (spots_col == 127 | spots_row == 77) {
 855 |       c6 = NaN
 856 |     } else {
 857 |       n6 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1]
 858 |       if (spots_positions$V2[spots_positions$V1 == n6] == 0 | !(n6 %in% spots_clusters$barcodes)){
 859 |         c6 = NaN
 860 |       } else {
 861 |         c6 = as.character(n6)
 862 |       }
 863 |     }
 864 |     
 865 |     
 866 |     return(c(c1,c2,c3,c4,c5,c6))
 867 |     
 868 |   })
 869 |   
 870 |   neighbors_table = t(neighbors_table)
 871 |   row.names(neighbors_table) = spots_clusters$barcodes
 872 |   
 873 |   return(neighbors_table)
 874 | }
 875 | 
 876 | 
 877 | win_prox_neighbors_table_func <- function(spots_positions,spots_clusters){
 878 |   neighbors_table <- sapply(spots_clusters$barcodes, function(spot){
 879 |     spots_row = spots_positions[spots_positions$V1 == spot, 3]
 880 |     spots_col = spots_positions[spots_positions$V1 == spot, 4]
 881 |     
 882 |     if (spots_col == 0 | spots_row == 0) {
 883 |       n1 = NA
 884 |     } else {
 885 |       n1_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1]
 886 |       if (length(n1_temp) == 0) {
 887 |         n1 = NA
 888 |       } else if (spots_positions$V2[spots_positions$V1 == n1_temp] == 0 | !(n1_temp %in% spots_clusters$barcodes)){
 889 |         n1 = NA
 890 |       } else {
 891 |         n1 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n1_temp])
 892 |       }
 893 |     }
 894 |     
 895 |     if (spots_col == 127 | spots_row == 0) {
 896 |       n2 = NA
 897 |     } else {
 898 |       n2_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1]
 899 |       if (length(n2_temp) == 0) {
 900 |         n2 = NA
 901 |       } else if (spots_positions$V2[spots_positions$V1 == n2_temp] == 0 | !(n2_temp %in% spots_clusters$barcodes)){
 902 |         n2 = NA
 903 |       } else {
 904 |         n2 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n2_temp])
 905 |       }
 906 |     }
 907 |     
 908 |     if (spots_col == 0 | spots_col == 1) {
 909 |       n3 = NA
 910 |     } else {
 911 |       n3_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2]
 912 |       if (length(n3_temp) == 0) {
 913 |         n3 = NA
 914 |       } else if (spots_positions$V2[spots_positions$V1 == n3_temp] == 0 | !(n3_temp %in% spots_clusters$barcodes)){
 915 |         n3 = NA
 916 |       } else {
 917 |         n3 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n3_temp])
 918 |       }
 919 |     }
 920 |     
 921 |     if (spots_col == 126 | spots_col == 127) {
 922 |       n4 = NA
 923 |     } else {
 924 |       n4_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2]
 925 |       if (length(n4_temp) == 0) {
 926 |         n4 = NA
 927 |       } else if (spots_positions$V2[spots_positions$V1 == n4_temp] == 0 | !(n4_temp %in% spots_clusters$barcodes)){
 928 |         n4 = NA
 929 |       } else {
 930 |         n4 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n4_temp])
 931 |       }
 932 |     }
 933 |     
 934 |     if (spots_col == 0 | spots_row == 77) {
 935 |       n5 = NA
 936 |     } else {
 937 |       n5_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1]
 938 |       if (length(n5_temp) == 0) {
 939 |         n5 = NA
 940 |       } else if (spots_positions$V2[spots_positions$V1 == n5_temp] == 0 | !(n5_temp %in% spots_clusters$barcodes)){
 941 |         n5 = NA
 942 |       } else {
 943 |         n5 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n5_temp])
 944 |       }
 945 |     }
 946 |     
 947 |     if (spots_col == 127 | spots_row == 77) {
 948 |       n6 = NA
 949 |     } else {
 950 |       n6_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1]
 951 |       if (length(n6_temp) == 0) {
 952 |         n6 = NA
 953 |       } else if (spots_positions$V2[spots_positions$V1 == n6_temp] == 0 | !(n6_temp %in% spots_clusters$barcodes)){
 954 |         n6 = NA
 955 |       } else {
 956 |         n6 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n6_temp])
 957 |       }
 958 |     }
 959 |     
 960 |     
 961 |     return(c(n1,n2,n3,n4,n5,n6))
 962 |     
 963 |   })
 964 |   
 965 |   neighbors_table = t(neighbors_table)
 966 |   row.names(neighbors_table) = spots_clusters$barcodes
 967 |   
 968 |   return(neighbors_table)
 969 | }
 970 | 
 971 | prox_neighbors_table_func <- function(spots_positions,spots_clusters){
 972 |   neighbors_table <- sapply(spots_clusters$barcodes, function(spot){
 973 |     spots_row = spots_positions[spots_positions$V1 == spot, 3]
 974 |     spots_col = spots_positions[spots_positions$V1 == spot, 4]
 975 |     
 976 |     if (spots_col == 0 | spots_row == 0) {
 977 |       n1 = NA
 978 |     } else {
 979 |       n1_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1]
 980 |       if (spots_positions$V2[spots_positions$V1 == n1_temp] == 0 | !(n1_temp %in% spots_clusters$barcodes)){
 981 |         n1 = NA
 982 |       } else {
 983 |         n1 = n1_temp
 984 |       }
 985 |     }
 986 |     
 987 |     if (spots_col == 127 | spots_row == 0) {
 988 |       n2 = NA
 989 |     } else {
 990 |       n2_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1]
 991 |       if (spots_positions$V2[spots_positions$V1 == n2_temp] == 0 | !(n2_temp %in% spots_clusters$barcodes)){
 992 |         n2 = NA
 993 |       } else {
 994 |         n2 = n2_temp
 995 |       }
 996 |     }
 997 |     
 998 |     if (spots_col == 0 | spots_col == 1) {
 999 |       n3 = NA
1000 |     } else {
1001 |       n3_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2]
1002 |       if (spots_positions$V2[spots_positions$V1 == n3_temp] == 0 | !(n3_temp %in% spots_clusters$barcodes)){
1003 |         n3 = NA
1004 |       } else {
1005 |         n3 = n3_temp
1006 |       }
1007 |     }
1008 |     
1009 |     if (spots_col == 126 | spots_col == 127) {
1010 |       n4 = NA
1011 |     } else {
1012 |       n4_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2]
1013 |       if (spots_positions$V2[spots_positions$V1 == n4_temp] == 0 | !(n4_temp %in% spots_clusters$barcodes)){
1014 |         n4 = NA
1015 |       } else {
1016 |         n4 = n4_temp
1017 |       }
1018 |     }
1019 |     
1020 |     if (spots_col == 0 | spots_row == 77) {
1021 |       n5 = NA
1022 |     } else {
1023 |       n5_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1]
1024 |       if (spots_positions$V2[spots_positions$V1 == n5_temp] == 0 | !(n5_temp %in% spots_clusters$barcodes)){
1025 |         n5 = NA
1026 |       } else {
1027 |         n5 = n5_temp
1028 |       }
1029 |     }
1030 |     
1031 |     if (spots_col == 127 | spots_row == 77) {
1032 |       n6 = NA
1033 |     } else {
1034 |       n6_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1]
1035 |       if (spots_positions$V2[spots_positions$V1 == n6_temp] == 0 | !(n6_temp %in% spots_clusters$barcodes)){
1036 |         n6 = NA
1037 |       } else {
1038 |         n6 = n6_temp
1039 |       }
1040 |     }
1041 |     
1042 |     
1043 |     return(c(n1,n2,n3,n4,n5,n6))
1044 |     
1045 |   })
1046 |   
1047 |   neighbors_table = t(neighbors_table)
1048 |   row.names(neighbors_table) = spots_clusters$barcodes
1049 |   
1050 |   return(neighbors_table)
1051 | }
1052 | 


--------------------------------------------------------------------------------
/Module3_spatial_associations.R:
--------------------------------------------------------------------------------
   1 | library(stringr)
   2 | library(cocor)
   3 | library(dplyr)
   4 | library(data.table)
   5 | library(stats)
   6 | library(scalop)
   7 | library(tibble)
   8 | library(ggrepel)
   9 | library(tidyr)
  10 | library(scales)
  11 | library(matrixStats)
  12 | 
  13 | # Run functions first
  14 | 
  15 | # co-localization  ---------------------------------------------------------
  16 | #### log-transform expression matrices 
  17 | 
  18 | sample_ls <- (read.delim("general/GBM_samples.txt", header = FALSE))$V1
  19 | 
  20 | gen_clusters <- unique(unlist(sapply(c(1:length(sample_ls)), function(i){
  21 |   mp_assign <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = ""))
  22 |   return(unique(mp_assign$spot_type_meta_new))
  23 | })))
  24 | mp_names <- sort(as.character(gen_clusters))
  25 | 
  26 | 
  27 | per_sample_mat <- list()
  28 | for(i in seq_along(sample_ls)) {
  29 |   m <- readRDS(paste("general/exp_mats_GBM/", sample_ls[i], "counts.rds", sep = ""))
  30 |   m <- m[-grep("^MT-|^RPL|^RPS", rownames(m)), ]
  31 |   if(min(colSums(m)) == 0){m <- m[, colSums(m) != 0]}
  32 |   scaling_factor <- 1000000/colSums(m)
  33 |   m_CPM <- sweep(m, MARGIN = 2, STATS = scaling_factor, FUN = "*")
  34 |   m_loged <- log2(1 + (m_CPM/10))
  35 |   # removing genes with zero variance across all cells
  36 |   var_filter <- apply(m_loged, 1, var)
  37 |   m_proc <- m_loged[var_filter != 0, ]
  38 |   # filtering out lowly expressed genes
  39 |   exp_genes <- rownames(m_proc)[(rowMeans(m_proc) > 0.4)]
  40 |   m_proc <- m_proc[exp_genes, ]
  41 |   # output to a list 
  42 |   per_sample_mat[[i]] <- m_proc
  43 |   names(per_sample_mat)[i] <- sample_ls[[i]]
  44 |   rm(path, files, m, m_CPM, m_loged, var_filter, exp_genes, m_proc)
  45 | }
  46 | 
  47 | #### scoring based matrices 
  48 | 
  49 | metaprograms_gene_list <- readRDS("MP/clean_spatial_gbm_metaprograms_124.rds")
  50 | 
  51 | all_decon <- lapply(c(1:length(per_sample_mat)), function(i){
  52 |   m_proc<-per_sample_mat[[i]]
  53 |   signatures <- scalop::sigScores(m_proc, metaprograms_gene_list, expr.center = TRUE, conserved.genes = 0.5)
  54 |   score_vecs<-t(signatures)
  55 |   scored_filt <- apply(score_vecs, 2, function(x) {
  56 |     if(length(x[x > 0]) == 0) {prop_vec <- setNames(rep(0, nrow(score_vecs)), rownames(score_vecs))}
  57 |     get_positives <- x[x > 0.1] #all scores >=0.1 - consider cell state present in that spot
  58 |     prop_vec <- setNames(rep(0, nrow(score_vecs)), rownames(score_vecs))
  59 |     prop_vec <- setNames(ifelse(rownames(score_vecs) %in% names(get_positives), yes =  get_positives[match(names(prop_vec), names( get_positives))], no = 0), rownames(score_vecs))
  60 |     return(prop_vec)
  61 |   })
  62 |   #convert to df
  63 |   new_decon_df <- as.data.frame(t(scored_filt)) %>% rownames_to_column(var = "barcodes")
  64 |   new_decon_mtrx <- new_decon_df %>%
  65 |     tibble::column_to_rownames("barcodes") %>%
  66 |     as.matrix()
  67 |   return(new_decon_mtrx)
  68 | })
  69 | names(all_decon) <- sample_ls
  70 | 
  71 | #### co-localiztion
  72 | results<-list()
  73 | for(i in seq_along(names(all_decon))) {
  74 |   decon <- all_decon[[sample_ls[i]]]
  75 |   spot_list <- lapply(c(1:500),function(x){ #num. of shuffled matrices
  76 |     all_row <- sapply(c(1:dim(decon)[2]),function(c){
  77 |       new_row <- sample(c(1:length(row.names(decon))), length(row.names(decon)), replace = FALSE)
  78 |       return(new_row) 
  79 |     })
  80 |     new_mat <- sapply(c(1:dim(decon)[2]),function(c){
  81 |       sh_row <- decon[,c][all_row[,c]]#generate many deconv matrices by shuffling cell types
  82 |       return(sh_row)
  83 |     })
  84 |     colnames(new_mat) <- colnames(decon)
  85 |     return(new_mat)
  86 |   }) 
  87 |   
  88 |   #run colocal_mat_fun -- perform colocalization on the shuffled decon matrices
  89 |   mats_list<- lapply(spot_list, function(new_decon){
  90 |     colocal_mats<-colocal_mat(new_decon)
  91 |     return(colocal_mats)
  92 |   })
  93 |   mean_mat <- sapply(mats_list,function(x){ #expected colocalization calculated from shuffled decon matrices
  94 |     return(x[,"ab_comb"])
  95 |   })
  96 |   pairs_shuf_mean <- apply(mean_mat,1,mean)
  97 |   names(pairs_shuf_mean) <- mats_list[[1]][,"pairs"]
  98 |   rownames(mean_mat) <- mats_list[[1]][,"pairs"]
  99 |   
 100 |   obs_coloc <- colocal_mat(decon)
 101 |   
 102 |   r1 = obs_coloc$ab_comb #observed colocal
 103 |   r2 = pairs_shuf_mean #expected colocal
 104 |   
 105 |   n = dim(decon)[1] #num total spots
 106 |   
 107 |   fisher = ((0.5*log((1+r1)/(1-r1)))-(0.5*log((1+r2)/(1-r2))))/((1/(n-3))+(1/(n-3)))^0.5
 108 |   
 109 |   p.value = (2*(1-pnorm(abs(as.matrix(fisher)))))
 110 |   effect_size <- r1/r2 #observed/expected
 111 |   results[[i]]<-data.frame(pvalue = p.value,
 112 |                            effect_size = effect_size)
 113 |   
 114 | }
 115 | names(results) <- sample_ls
 116 | 
 117 | results2<-lapply(results, dplyr::add_rownames, 'pairs')
 118 | results2<- lapply(results2,function(x){
 119 |   as.matrix(x)
 120 | })
 121 | results2 <- lapply(results2, function(x){rownames(x) <- x[,1]; x})
 122 | df <- do.call("rbind", results2)
 123 | 
 124 | ###volcano plot (all pairs, all samples)###
 125 | df->results_comb2
 126 | results_comb2<-as.data.frame(results_comb2)
 127 | results_comb2$pvalue<-as.numeric(results_comb2$pvalue)
 128 | results_comb2$effect_size<-as.numeric(results_comb2$effect_size)
 129 | results_comb2$sig <- "NO"
 130 | results_comb2$sig[results_comb2$effect_size >= 1.3 & results_comb2$pvalue <= 0.01] <- "enriched"
 131 | results_comb2$sig[results_comb2$effect_size <= 0.7 & results_comb2$pvalue <= 0.01] <- "depleted"
 132 | results_comb2$label <- NA
 133 | results_comb2$label[results_comb2$sig != "NO"] <- results_comb2$pairs[results_comb2$sig != "NO"]
 134 | 
 135 | ggplot(data=results_comb2, aes(x=effect_size, y=-log10(pvalue), col=sig, label=label)) + 
 136 |   geom_point() + 
 137 |   theme_minimal() +
 138 |   geom_text_repel()+
 139 |   theme(text = element_text(size = 20))
 140 | 
 141 | # Adjacency  --------------------------------------------------------------
 142 | 
 143 | sample_ls <- (read.delim("general/GBM_samples.txt", header = FALSE))$V1
 144 | 
 145 | gen_clusters <- as.character(unique(unlist(sapply(c(1:length(sample_ls)), function(i){
 146 |   mp_assign <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = ""))
 147 |   return(unique(mp_assign$spot_type_meta_new))
 148 | }))))
 149 | 
 150 | all_zones <- readRDS("Spatial_coh_zones/final_zones.rds")
 151 | 
 152 | extend_metadata <- tibble()
 153 | generate_metadata <- sapply(c(1:length(sample_ls)), function(i){
 154 |   print(sample_ls[i])
 155 |   
 156 |   # load data
 157 |   spots_positions <- read.csv(paste("general/GBM_data/", sample_ls[i] , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE)
 158 |   row.names(spots_positions) <- spots_positions$V1
 159 |   
 160 |   spots_clusters <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = ""))
 161 |   spots_clusters <- na.omit(spots_clusters)
 162 |   colnames(spots_clusters) <- c("barcodes", "spot_type")
 163 |   row.names(spots_clusters)<- spots_clusters$barcodes  
 164 |   
 165 |   
 166 |   metadata <- tibble(Key = paste(sample_ls[i],spots_clusters$barcodes, sep="_"),
 167 |                      SpotID = spots_clusters$barcodes,
 168 |                      Sample = rep(sample_ls[i], nrow(spots_clusters)),
 169 |                      MPid = spots_clusters$spot_type,
 170 |                      array_row = spots_positions[spots_clusters$barcodes,"V3"],
 171 |                      array_col = spots_positions[spots_clusters$barcodes, "V4"],
 172 |                      pxl_in_rows = spots_positions[spots_clusters$barcodes, "V5"],
 173 |                      pxl_in_cols = spots_positions[spots_clusters$barcodes, "V6"],
 174 |                      Zone = as.character(all_zones[[i]][spots_clusters$barcodes]))
 175 |   
 176 |   extend_metadata <<- rbind.data.frame(extend_metadata, metadata)
 177 | })
 178 | 
 179 | set.seed(50)
 180 | neighbs_stats <- neighbor_spot_props(metadata = extend_metadata, 
 181 |                                      zone = "All", 
 182 |                                      #site = "All", 
 183 |                                      samples = "All",
 184 |                                      #zone_by = "EpiStroma", 
 185 |                                      n_cores = 30, 
 186 |                                      plot_perm_distr = TRUE, 
 187 |                                      n_perm = 10000, 
 188 |                                      filter_signif = TRUE, 
 189 |                                      zscore_thresh = 1)
 190 | 
 191 | 
 192 | 
 193 | 
 194 | # regional comp  (Previously run by server. Possible to run per sample below)----------------------------------------------------------
 195 | 
 196 | #!/usr/bin/env Rscript
 197 | #args = commandArgs(trailingOnly = TRUE)
 198 | 
 199 | #sname <- as.character(args[1])
 200 | #sname <- str_replace(sname, "\r", "")
 201 | #print(paste("I got the samp right", sname))
 202 | 
 203 | sname <- "" #insert sample name 
 204 | file_list <- list.files("MP/mp_assign_124/")
 205 | gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){
 206 |   mp_assign <- readRDS(paste("MP/mp_assign_124/", file_list[i], sep = ""))
 207 |   return(unique(mp_assign$spot_type_meta_new))
 208 | }))))
 209 | 
 210 | max_win_size <- 15
 211 | pairs <- combn(sort(gen_clusters),2)
 212 | pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))})
 213 | 
 214 | # load data
 215 | spots_positions <- read.csv(paste("general/GBM_data/", sname , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE)
 216 | row.names(spots_positions) <- spots_positions$V1
 217 | 
 218 | spots_clusters <- readRDS(paste("MP/mp_assign_124/", sname, ".rds", sep = ""))
 219 | spots_clusters <- na.omit(spots_clusters)
 220 | colnames(spots_clusters) <- c("barcodes", "spot_type")
 221 | row.names(spots_clusters)<- spots_clusters$barcodes  
 222 | 
 223 | neighbors_table <- prox_neighbors_table_func(spots_positions,spots_clusters)
 224 | 
 225 | all_spots <- spots_clusters$barcodes
 226 | 
 227 | all_pval_windows <- sapply(c(1:max_win_size), function(win_size){
 228 |   proximity <- t(sapply(all_spots, function(spot){
 229 |     win_spots <- c(spot)
 230 |     sapply(c(1:win_size), function(i){
 231 |       win_spots <<- unique(c(win_spots,unique(na.omit(as.character(neighbors_table[win_spots,])))))
 232 |     })
 233 |     win_abund <- table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots])/sum(table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots]))
 234 |     return(win_abund)
 235 |   }))
 236 |   
 237 |   old_clusters <- colnames(proximity)
 238 |   add_clusters <- gen_clusters[!gen_clusters %in% old_clusters]
 239 |   
 240 |   if (length(add_clusters) != 0){
 241 |     sapply(add_clusters,function(clust){
 242 |       proximity <<- cbind(proximity,rep(0,dim(proximity)[1]))
 243 |     })
 244 |     colnames(proximity)[c((length(old_clusters)+1):dim(proximity)[2])] <- add_clusters
 245 |   }
 246 |   
 247 |   proximity <- proximity[,sort(colnames(proximity))]
 248 |   
 249 |   is_one <- apply(proximity,1,function(ro){
 250 |     return(1 %in% ro )
 251 |   })
 252 |   
 253 |   proximity <- proximity[!is_one,]
 254 |   
 255 |   all_cor <- sapply(c(1:dim(pairs)[2]), function(j){
 256 |     pair_cor <- cor(proximity[,pairs[1,j]], proximity[,pairs[2,j]])
 257 |     return(pair_cor)
 258 |   })
 259 |   
 260 |   all_cor <- data.frame(pair_cors = all_cor)
 261 |   row.names(all_cor) <- pairs_names
 262 |   return(all_cor)
 263 |   
 264 | })
 265 | sample_proximity <- as.data.frame(all_pval_windows)
 266 | row.names(sample_proximity) <- pairs_names
 267 | 
 268 | 
 269 | 
 270 | # regional comp random (Previously run by server. Possible to run per sample below) ---------------------------------------------------
 271 | 
 272 | #!/usr/bin/env Rscript
 273 | #args = commandArgs(trailingOnly = TRUE)
 274 | 
 275 | #sname <- as.character(args[1])
 276 | #sname <- str_replace(sname, "\r", "")
 277 | 
 278 | sname <- "" #insert sample name 
 279 | file_list <- list.files("MP/mp_assign_124/")
 280 | gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){
 281 |   mp_assign <- readRDS(paste("MP/mp_assign_124/", file_list[i], sep = ""))
 282 |   return(unique(mp_assign$spot_type_meta_new))
 283 | }))))
 284 | 
 285 | max_win_size <- 15
 286 | pairs <- combn(sort(gen_clusters),2)
 287 | pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))})
 288 | rand_num <- 500
 289 | 
 290 | # load data
 291 | spots_positions_orign <- read.csv(paste("general/GBM_data/", sname , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE)
 292 | row.names(spots_positions_orign) <- spots_positions_orign$V1
 293 | 
 294 | spots_clusters <- readRDS(paste("MP/mp_assign_124/", sname, ".rds", sep = ""))
 295 | spots_clusters <- na.omit(spots_clusters)
 296 | colnames(spots_clusters) <- c("barcodes", "spot_type")
 297 | row.names(spots_clusters)<- spots_clusters$barcodes  
 298 | 
 299 | all_rand <- lapply(c(1:rand_num),function(j){
 300 |   new_pos_all <- sample(spots_positions_orign$V1[spots_positions_orign$V2 != 0], length(spots_positions_orign$V1[spots_positions_orign$V2 != 0]), replace = FALSE)
 301 |   spots_positions <- spots_positions_orign
 302 |   spots_positions$V1[spots_positions$V2 != 0] <- new_pos_all
 303 |   
 304 |   neighbors_table <- prox_neighbors_table_func(spots_positions,spots_clusters)
 305 |   
 306 |   all_spots <- spots_clusters$barcodes
 307 |   
 308 |   
 309 |   all_pval_windows <- sapply(c(1:max_win_size), function(win_size){
 310 |     proximity <- t(sapply(all_spots, function(spot){
 311 |       win_spots <- c(spot)
 312 |       sapply(c(1:win_size), function(i){
 313 |         win_spots <<- unique(c(win_spots,unique(na.omit(as.character(neighbors_table[win_spots,])))))
 314 |       })
 315 |       win_abund <- table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots])/sum(table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots]))
 316 |       return(win_abund)
 317 |     }))
 318 |     
 319 |     old_clusters <- colnames(proximity)
 320 |     add_clusters <- gen_clusters[!gen_clusters %in% old_clusters]
 321 |     
 322 |     if (length(add_clusters) != 0){
 323 |       sapply(add_clusters,function(clust){
 324 |         proximity <<- cbind(proximity,rep(0,dim(proximity)[1]))
 325 |       })
 326 |       colnames(proximity)[c((length(old_clusters)+1):dim(proximity)[2])] <- add_clusters
 327 |     }
 328 |     
 329 |     proximity <- proximity[,sort(colnames(proximity))]
 330 |     
 331 |     is_one <- apply(proximity,1,function(ro){
 332 |       return(1 %in% ro )
 333 |     })
 334 |     
 335 |     proximity <- proximity[!is_one,]
 336 |     
 337 |     all_cor <- sapply(c(1:dim(pairs)[2]), function(j){
 338 |       pair_cor <- cor(proximity[,pairs[1,j]], proximity[,pairs[2,j]])
 339 |       return(pair_cor)
 340 |     })
 341 |     
 342 |     all_cor <- data.frame(pair_cors = all_cor)
 343 |     row.names(all_cor) <- pairs_names
 344 |     return(all_cor)
 345 |     
 346 |   })
 347 |   sample_proximity <- as.data.frame(all_pval_windows)
 348 |   row.names(sample_proximity) <- pairs_names
 349 |   return(sample_proximity)
 350 | })
 351 | sample_mean_rand_prox <- Reduce("+", all_rand) / length(all_rand)
 352 | 
 353 | sample_sd_rand_prox <- round(apply(array(unlist(all_rand), c(length(pairs_names), max_win_size, rand_num)), c(1,2), sd),4)
 354 | 
 355 | 
 356 | 
 357 | # regional comp downstream ------------------------------------------------
 358 | 
 359 | file_list <- list.files("MP/mp_assign_124/")
 360 | gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){
 361 |   mp_assign <- readRDS(paste("MP/mp_assign_124/", file_list[i], sep = ""))
 362 |   return(unique(mp_assign$spot_type_meta_new))
 363 | }))))
 364 | 
 365 | samp_list <- list.files("MP/mp_assign_124/")
 366 | 
 367 | pairs <- combn(sort(gen_clusters),2)
 368 | pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))})
 369 | all_zones <- readRDS("Spatial_coh_zones/spatial_zonesv3.rds")
 370 | 
 371 | all_proximity_list <- list.files("Spatial_coh_zones/proximity_samples/")
 372 | 
 373 | all_proximity <- lapply(all_proximity_list, function(prox){
 374 |   samp_prox <- readRDS(paste("Spatial_coh_zones/proximity_samples/", prox, sep = ""))
 375 |   return(samp_prox)
 376 | })
 377 | names(all_proximity) <- sapply(str_split(all_proximity_list, "_"), function(x){return(x[1])})
 378 | 
 379 | all_proximity_rand_list <- list.files("Spatial_coh_zones/proximity_rand_samples/")
 380 | 
 381 | all_proximity_rand <- lapply(all_proximity_rand_list, function(prox){
 382 |   samp_prox <- readRDS(paste("Spatial_coh_zones/proximity_rand_samples/", prox, sep = ""))
 383 |   return(samp_prox)
 384 | })
 385 | names(all_proximity_rand) <- sapply(str_split(all_proximity_rand_list, "_"), function(x){return(x[1])})
 386 | 
 387 | 
 388 | combined_proximity <- t(sapply(c(1:length(pairs_names)), function(i){
 389 |   pair_prox <- sapply(all_proximity, function(x){
 390 |     pair_sample_df <- as.data.frame(x[i,c(1:15)])
 391 |     colnames(pair_sample_df) <- c(as.character(c(1:15)))
 392 |     return(pair_sample_df)
 393 |   })
 394 |   return(apply(pair_prox,1,function(x){mean(na.omit(as.numeric(x)))}))
 395 | }))
 396 | 
 397 | row.names(combined_proximity) <- row.names(all_proximity[[1]])
 398 | 
 399 | 
 400 | 
 401 | Heatmap(na.omit(combined_proximity), cluster_columns = FALSE, column_title = "Metaprograms Proximity", 
 402 |         row_names_gp = grid::gpar(fontsize = 5), name = "proximity", show_row_names = T, show_row_dend = F)
 403 | # regional comp downstream significant ---------------------------------------------------
 404 | 
 405 | spots_numv1 <-sapply(samp_list, function(smp){
 406 |   samp_df <- readRDS(paste("MP/mp_assign_124/", smp, sep = ""))
 407 |   return(nrow(samp_df))
 408 | })
 409 | names(spots_numv1) <- sapply(str_split(names(spots_numv1), "\\."), function(x){return(x[1])})
 410 | 
 411 | spots_num <-sapply(samp_list, function(smp){
 412 |   samp_df <- readRDS(paste("MP/mp_assign_124/", smp, sep = ""))
 413 |   pairs_n <- sapply(c(1:ncol(pairs)),function(p){
 414 |     p_table <- samp_df[samp_df$spot_type_meta_new %in% c(pairs[,p]),]
 415 |     return(nrow(p_table))
 416 |   })
 417 |   return(pairs_n)
 418 | })
 419 | colnames(spots_num) <- sapply(str_split(colnames(spots_num), "\\."), function(x){return(x[1])})
 420 | 
 421 | 
 422 | 
 423 | proximity_bin <- lapply(c(1:26),function(i){
 424 |   r1 = all_proximity[[i]]
 425 |   r2 = all_proximity_rand[[i]]
 426 |   
 427 |   n_df = as.data.frame(spots_num[,names(all_proximity)[i]])
 428 |   n_df <- cbind(n_df, rep(n_df,14))
 429 |   
 430 |   fisher = ((0.5*log((1+r1)/(1-r1)))-(0.5*log((1+r2)/(1-r2))))/((1/(n_df-3))+(1/(n_df-3)))^0.5
 431 |   
 432 |   p.value = (2*(1-pnorm(abs(as.matrix(fisher)))))
 433 |   colnames(p.value) <- as.character(1:15)
 434 |   bin_pval <- ifelse(p.value < 0.0000000001,1,NA)
 435 |   #  bin_pval <- ifelse(p.value < 0.00001,1,0)
 436 |   rbin <- r1*bin_pval
 437 |   return(rbin)
 438 | })
 439 | names(proximity_bin) <- names(all_proximity)
 440 | 
 441 | ######## Functions ---------------------------------------------------------------
 442 | 
 443 | 
 444 | 
 445 | # # co-localization functions  ---------------------------------------------
 446 | # Count interactions between pairs 
 447 | colocal_mat <- function(x) {
 448 |   stopifnot(
 449 |     is.matrix(x), is.numeric(x),
 450 |     all(dim(x) > 0), ncol(x) > 1)
 451 |   
 452 |   if (is.null(colnames(x))) {
 453 |     colnames(x) <- seq_len(ncol(x))
 454 |   }
 455 |   df <- calc_pairs(x)
 456 |   df <- compute_interactions(x, df)
 457 |   return(df)
 458 | }
 459 | 
 460 | calc_pairs <- function(x) {
 461 |   x <- x > 0
 462 |   ab <- combn(colnames(x), 2)
 463 |   y <- apply(ab, 2, function(.) sum(matrixStats::rowAlls(x[, ., drop = FALSE])))
 464 |   df <- data.frame(t(ab), y)
 465 |   names(df) <- c("pair1", "pair2", "n")
 466 |   return(df)
 467 | }
 468 | 
 469 | compute_interactions <- function(x, df) {
 470 |   y <- colnames(x)
 471 |   df$a <- factor(df$pair1, levels = y)
 472 |   df$b <- factor(df$pair2, levels = rev(y))
 473 |   t <- colSums(x > 0)
 474 |   a <- match(df$pair1, y)
 475 |   b <- match(df$pair2, y)
 476 |   df$ta <- t[a]
 477 |   df$tb <- t[b]
 478 |   df$pa <- df$n / df$ta
 479 |   df$pb <- df$n / df$tb
 480 |   df$ab_mean <- (df$pa + df$pb) / 2
 481 |   df$ab_comb <- df$n / (df$ta + df$tb)
 482 |   df$ab_comb2 <- df$n / (df$ta * df$tb)
 483 |   df$pairs <- paste(df$a, df$b)
 484 |   return(df)
 485 | }
 486 | 
 487 | # Adjacency Helper Functions -------------------------------------------
 488 | 
 489 | 
 490 | .neighbors_table_func <- function(metadata, spot_class = "MPid", spot_name = "Key") {
 491 |   if(is.factor(metadata[[spot_class]])) {metadata[[spot_class]] <- as.character(metadata[[spot_class]])}
 492 |   neighbors_table <- sapply(metadata[[spot_name]], function(spot) {
 493 |     spots_row = metadata$array_row[metadata[[spot_name]] == spot]
 494 |     spots_col = metadata$array_col[metadata[[spot_name]] == spot]
 495 |     
 496 |     n1_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) - 1 & metadata$array_col == as.numeric(spots_col) - 1]
 497 |     if(length(n1_temp) == 0) {
 498 |       n1 = NA
 499 |     } else {
 500 |       n1 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n1_temp])
 501 |     }
 502 |     
 503 |     n2_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) - 1 & metadata$array_col == as.numeric(spots_col) + 1]
 504 |     if (length(n2_temp) == 0) {
 505 |       n2 = NA
 506 |     } else {
 507 |       n2 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n2_temp])
 508 |     }
 509 |     
 510 |     n3_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) & metadata$array_col == as.numeric(spots_col) - 2]
 511 |     if (length(n3_temp) == 0) {
 512 |       n3 = NA
 513 |     } else {
 514 |       n3 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n3_temp])
 515 |     }
 516 |     
 517 |     n4_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) & metadata$array_col == as.numeric(spots_col) + 2]
 518 |     if (length(n4_temp) == 0) {
 519 |       n4 = NA
 520 |     } else {        
 521 |       n4 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n4_temp])
 522 |     }
 523 |     
 524 |     n5_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) + 1 & metadata$array_col == as.numeric(spots_col) - 1]
 525 |     if (length(n5_temp) == 0) {
 526 |       n5 = NA
 527 |     } else {
 528 |       n5 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n5_temp])
 529 |     }
 530 |     
 531 |     n6_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) + 1 & metadata$array_col == as.numeric(spots_col) + 1]
 532 |     if (length(n6_temp) == 0) {
 533 |       n6 = NA
 534 |     } else {
 535 |       n6 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n6_temp])
 536 |     }
 537 |     
 538 |     return(c(n1, n2, n3, n4, n5, n6))
 539 |   })
 540 |   
 541 |   neighbors_table = t(neighbors_table)
 542 |   rownames(neighbors_table) = metadata[[spot_name]]
 543 |   return(neighbors_table)
 544 | }
 545 | 
 546 | 
 547 | .prog_connectivity_score <- function(program_neighbors, state) {
 548 |   state_neighbors_bin <- ifelse(program_neighbors == state, 0, 1)
 549 |   if(is.null(dim(state_neighbors_bin))) {
 550 |     prog_connect <- 1
 551 |   } else {
 552 |     prog_connect <- length(which(apply(state_neighbors_bin, 1, function(x) {sum(na.omit(x))}) > 0))
 553 |   }
 554 |   return(prog_connect)
 555 | }
 556 | 
 557 | 
 558 | .calc_adj_mat <- function(neighbored_state, spots_states, state_neighbors_table, spot_class = "MPid") {
 559 |   if(!(neighbored_state %in% spots_states[[spot_class]])) {
 560 |     return(0)
 561 |   } else {
 562 |     state_neighbors_bin <- ifelse(state_neighbors_table == neighbored_state, 1, 0)
 563 |     if(is.null(dim(state_neighbors_bin))) {
 564 |       state_neighbors_sum <- sum(na.omit(state_neighbors_bin))
 565 |     } else {
 566 |       state_neighbors_sum <- sum(apply(state_neighbors_bin, 1, function(x) {sum(na.omit(x))}))
 567 |     }
 568 |     return(state_neighbors_sum)
 569 |   }
 570 | }
 571 | 
 572 | 
 573 | 
 574 | # Adjacency Bootstrapping Method ---------------------------------------
 575 | 
 576 | 
 577 | # The metadata supplied to the function must contain spatial coordinate information!
 578 | calc_spatial_neighborhood <- function(metadata, 
 579 |                                       spot_class = "MPid", 
 580 |                                       spot_name = "Key", 
 581 |                                       samples = "all", 
 582 |                                       iter = 20) {
 583 |   
 584 |   # Check inputs
 585 |   if(is.factor(metadata[[spot_class]])) {
 586 |     all_labels <- na.omit(levels(metadata[[spot_class]]))
 587 |   } else if(is.character(metadata[[spot_class]])) {
 588 |     all_labels <- na.omit(unique(metadata[[spot_class]]))
 589 |     metadata[[spot_class]] <- as.factor(metadata[[spot_class]])
 590 |   } else {stop("spot_class should either be a factor or character vector!")}
 591 |   stopifnot("Error: metadata must contain a column specifying sample name." = 
 592 |               any(grepl("sample*", colnames(metadata), ignore.case = TRUE)))
 593 |   metadata$enumerate <- metadata[[grep("sample*", colnames(metadata), ignore.case = TRUE)]]
 594 |   if(length(samples) == 1 && samples == "all") {
 595 |     samples <- unique(metadata$enumerate)
 596 |   } else if((length(samples) > 1 || samples != "all") & all(samples %in% unique(metadata$enumerate))) {
 597 |     samples <- samples
 598 |   } else {stop("samples supplied do not match the samples present in the metadata!")}
 599 |   
 600 |   message("This proccess may take a while...")
 601 |   connectivity <- lapply(seq_along(samples), function(i) {
 602 |     message(paste0("Start proccessing sample: ", samples[[i]]))
 603 |     samp_meta <- metadata[metadata$enumerate == samples[[i]], ]
 604 |     
 605 |     ## Construct Neighbors tables
 606 |     # Randomized spot ids & position table
 607 |     rand_neighbors_table <- lapply(seq_len(iter), function(j) {
 608 |       shuffled_spots <- sample(samp_meta[[spot_name]], length(samp_meta[[spot_name]]), replace = FALSE)
 609 |       shuffled_meta <- samp_meta %>% dplyr::mutate({{spot_name}} := shuffled_spots)
 610 |       neighbors_table <- .neighbors_table_func(shuffled_meta, spot_class = spot_class, spot_name = spot_name)
 611 |       return(neighbors_table)
 612 |     })
 613 |     
 614 |     # Bootstrapped spots class identity
 615 |     boots_neighbors_table <- lapply(seq_len(iter), function(j) {
 616 |       new_spots <- unique(sample(samp_meta[[spot_name]], length(samp_meta[[spot_name]]), replace = TRUE))
 617 |       bootsteped_meta <- samp_meta[samp_meta[[spot_name]] %in% new_spots, ]
 618 |       neighbors_table <- .neighbors_table_func(bootsteped_meta, spot_class = spot_class, spot_name = spot_name)
 619 |       return(neighbors_table)
 620 |     })
 621 |     
 622 |     # Actual (observed) neighborhood table
 623 |     neighbors_table <- .neighbors_table_func(samp_meta, spot_class = spot_class, spot_name = spot_name)
 624 |     
 625 |     # Calculate connectivity scores (spatial coherence)
 626 |     programs_connectivity_score <- sapply(sort(all_labels), function(cluster) {
 627 |       if (!(cluster %in% samp_meta[[spot_class]])) {
 628 |         prog_score <- NaN
 629 |       } else {
 630 |         program_neighbors_table = neighbors_table[rownames(neighbors_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == cluster & !is.na(samp_meta[[spot_class]])], ]
 631 |         prog_score <- .prog_connectivity_score(program_neighbors_table, cluster)
 632 |       }
 633 |       return(prog_score)
 634 |     })
 635 |     
 636 |     
 637 |     ## Connectivity
 638 |     boots_adj_mat <- lapply(boots_neighbors_table, function(b_table) {
 639 |       obs_adj_mat <- sapply(sort(all_labels), function(cluster) {
 640 |         if (!(cluster %in% samp_meta[[spot_class]])) {
 641 |           zero_neigh <- rep(0, length(all_labels)) 
 642 |           names(zero_neigh) <- all_labels
 643 |           return(zero_neigh)
 644 |         } else {
 645 |           cluster_neighbors_table = b_table[rownames(b_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == cluster & !is.na(samp_meta[[spot_class]])], ]
 646 |           num_of_neighbores = sapply(sort(as.character(all_labels)), function(neighbored_cluster) {
 647 |             num <- .calc_adj_mat(neighbored_cluster, samp_meta, cluster_neighbors_table)
 648 |             return(num)
 649 |           })
 650 |           return(num_of_neighbores)
 651 |         }})
 652 |       
 653 |       diag(obs_adj_mat) <- 0
 654 |       weighted_adj_mat <- apply(obs_adj_mat, 2, function(x) {x / sum(x)})
 655 |       
 656 |       comp4mat <- programs_connectivity_score[colnames(weighted_adj_mat)]
 657 |       comp4mat[is.na(comp4mat)] <- 0
 658 |       weighted_denominator_v2 <- sapply(c(names(comp4mat)), function(prog){
 659 |         new_comp <- comp4mat
 660 |         new_comp[prog] <- 0
 661 |         new_comp <- new_comp / sum(new_comp)
 662 |         return(new_comp)
 663 |       })
 664 |       
 665 |       norm_adj_mat <- weighted_adj_mat / weighted_denominator_v2
 666 |       
 667 |       upper_mat <- (norm_adj_mat[upper.tri(norm_adj_mat)] + t(norm_adj_mat)[upper.tri(t(norm_adj_mat))]) / 2
 668 |       lower_mat <- rep(NaN, length(upper_mat))
 669 |       avg_mat <- norm_adj_mat
 670 |       avg_mat[upper.tri(avg_mat)] <- upper_mat
 671 |       avg_mat[lower.tri(avg_mat)] <- lower_mat
 672 |       
 673 |       rownames(avg_mat) <- colnames(avg_mat)
 674 |       avg_mat <- t(avg_mat)
 675 |       avg_mat <- as.data.frame(avg_mat)
 676 |       avg_mat$pair2 <- rownames(avg_mat)
 677 |       long <- reshape2::melt(data.table::setDT(avg_mat), id.vars = c("pair2"), variable.name = "pair1")
 678 |       return(long)
 679 |     })
 680 |     
 681 |     
 682 |     # Random connectivity 
 683 |     rand_adj_mat <- lapply(rand_neighbors_table, function(b_table) {
 684 |       obs_adj_mat <- sapply(sort(all_labels), function(cluster) {
 685 |         if (!(cluster %in% samp_meta[[spot_class]])) {
 686 |           return(rep(0, length(all_labels)))
 687 |         } else {
 688 |           cluster_neighbors_table = b_table[row.names(b_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == cluster & !is.na(samp_meta[[spot_class]])], ]
 689 |           num_of_neighbores = sapply(sort(as.character(all_labels)), function(neighbored_cluster) {
 690 |             num <- .calc_adj_mat(neighbored_cluster, samp_meta, cluster_neighbors_table)
 691 |             return(num)
 692 |           })
 693 |           return(num_of_neighbores)
 694 |         }})
 695 |       
 696 |       diag(obs_adj_mat) <- 0
 697 |       weighted_adj_mat <- apply(obs_adj_mat, 2, function(x) {x / sum(x)})
 698 |       
 699 |       comp4mat <- programs_connectivity_score[colnames(weighted_adj_mat)]
 700 |       comp4mat[is.na(comp4mat)] <- 0
 701 |       weighted_denominator_v2 <- sapply(c(names(comp4mat)), function(prog) {
 702 |         new_comp <- comp4mat
 703 |         new_comp[prog] <- 0
 704 |         new_comp <- new_comp / sum(new_comp)
 705 |         return(new_comp)
 706 |       })
 707 |       
 708 |       norm_adj_mat <- weighted_adj_mat / weighted_denominator_v2
 709 |       
 710 |       upper_mat <- (norm_adj_mat[upper.tri(norm_adj_mat)] + t(norm_adj_mat)[upper.tri(t(norm_adj_mat))]) / 2
 711 |       lower_mat <- rep(NaN, length(upper_mat))
 712 |       avg_mat <- norm_adj_mat
 713 |       avg_mat[upper.tri(avg_mat)] <- upper_mat
 714 |       avg_mat[lower.tri(avg_mat)] <- lower_mat
 715 |       
 716 |       rownames(avg_mat) <- colnames(avg_mat)
 717 |       avg_mat <- t(avg_mat)
 718 |       avg_mat <- as.data.frame(avg_mat)
 719 |       avg_mat$pair2 <- rownames(avg_mat)
 720 |       long <- reshape2::melt(data.table::setDT(avg_mat), id.vars = c("pair2"), variable.name = "pair1")
 721 |       return(long)
 722 |     })
 723 |     
 724 |     final_adj_mat <- data.frame(pair1 = boots_adj_mat[[1]]$pair1,
 725 |                                 pair2 = boots_adj_mat[[1]]$pair2,
 726 |                                 connectivity = apply(sapply(boots_adj_mat, function(x) {return(x$value)}), 1, function(k) {mean(na.omit(k))}),
 727 |                                 effect_size = apply(sapply(boots_adj_mat, function(x) {return(x$value)}), 1, function(k) {mean(na.omit(k))}) / apply(sapply(rand_adj_mat, function(x) {return(x$value)}), 1, function(k) {mean(na.omit(k))}),
 728 |                                 sd = apply(sapply(boots_adj_mat, function(x) {return(x$value)}), 1, function(k) {sd(na.omit(k))}))
 729 |     
 730 |     
 731 |     ## Add p-value 
 732 |     pval <- sapply(seq_len(nrow(final_adj_mat)), function(j1) {
 733 |       obs <- na.omit(as.numeric(sapply(seq_len(iter), function(j2) {
 734 |         return(boots_adj_mat[[j2]][j1, "value"])
 735 |       })))
 736 |       exp <- na.omit(as.numeric(sapply(seq_len(iter), function(j2) {
 737 |         return(rand_adj_mat[[j2]][j1, "value"])
 738 |       })))
 739 |       if (length(obs) == 0) {
 740 |         return(NA)
 741 |       } else {
 742 |         t.res <- t.test(obs, exp, alternative = "two.sided", var.equal = FALSE)
 743 |         return(t.res$p.value)
 744 |       }
 745 |     })
 746 |     
 747 |     final_adj_mat$pval <- pval
 748 |     return(final_adj_mat)
 749 |   })
 750 |   names(connectivity) <- samples
 751 |   return(connectivity)
 752 | }
 753 | 
 754 | 
 755 | 
 756 | 
 757 | 
 758 | # Adjacency Permutation Method ----------------------------------------------------
 759 | 
 760 | # The metadata supplied to the function must contain spatial coordinate information!
 761 | neighbor_spot_props <- function(metadata, 
 762 |                                 zone = "All", 
 763 |                                 #site = "All", 
 764 |                                 samples = "All",
 765 |                                 spot_class = "MPid", 
 766 |                                 spot_name = "Key", 
 767 |                                 #zone_by = "EpiStroma", 
 768 |                                 n_cores = 10, 
 769 |                                 n_perm = 1000, 
 770 |                                 signif_val = 0.01, 
 771 |                                 plot_perm_distr = TRUE, 
 772 |                                 filter_signif = TRUE, 
 773 |                                 zscore_thresh = 1) {
 774 |   # Load variables
 775 |   all_states <- as.character(unique(unlist(sapply(c(1:length(sample_ls)), function(i){
 776 |     mp_assign <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = ""))
 777 |     return(unique(mp_assign$spot_type_meta_new))
 778 |   }))))
 779 |   #samples_metadata <- readRDS(file = here("Analysis/Metadata/samples_metadata.rds"))
 780 |   neighbs_stats_ls <- list()
 781 |   signif_neighbs_ls <- list()
 782 |   distr_plots_ls <- list()
 783 |   
 784 |   # Filter metadata input by selected site and zone
 785 |   stopifnot("Error: metadata must contain a column specifying sample name." = 
 786 |               any(grepl("sample*", colnames(metadata), ignore.case = TRUE)))
 787 |   # if(site != "All") {
 788 |   #   stopifnot("Error: Site argument must be one of the following: 'Laryngeal' / 'Oral' / 'Oropharynx'." = 
 789 |   #               site %in% unique(samples_metadata$Site))
 790 |   #   metadata <- metadata[metadata$Sample %in% samples_metadata$Sample[samples_metadata$Site == site], ]
 791 |   # }
 792 |   if(zone != "All") {
 793 |     #stopifnot("Error: Argument `zone_by` must be either 'EpiStroma' (for separating Epithelial, Stromal or Mixed spots) or 'Zone' (for separating Epithelial zonation)." = 
 794 |     #           zone_by %in% c("EpiStroma", "Zone") & length(zone_by) == 1)
 795 |     # if(zone_by == "EpiStroma") {
 796 |     #   stopifnot("Error: Argument `zone` should specify on which tumor region neighboring states will be computed - 'Epithelial' / 'Stroma' / 'Mixed'." = 
 797 |     #               zone %in% unique(metadata$EpiStroma) & length(zone) == 1)
 798 |     #   metadata <- metadata[metadata$EpiStroma == zone, ]
 799 |     # }
 800 |     # if(zone_by == "Zone") {
 801 |     #   stopifnot("Error: The variable `Zone` is not found in the metadata. Run classify_zones function first." = 
 802 |     #               any(colnames(metadata) %in% "Zone"))
 803 |     #  stopifnot("Error: Argument `zone` should specify on which Epithelial zone neighboring states will be computed - 'Zone_1' / 'Zone_2' / 'Zone_3'." = 
 804 |     #               zone %in% unique(metadata$Zone) & length(zone) == 1)
 805 |     metadata <- metadata[metadata$Zone == zone, ]
 806 |     # }
 807 |   }
 808 |   metadata$enumerate <- metadata[[grep("sample*", colnames(metadata), ignore.case = TRUE)]]
 809 |   if(length(samples) == 1 && samples == "All") {
 810 |     samples <- unique(metadata$enumerate)
 811 |   } else if((length(samples) > 1 || samples != "All") & all(samples %in% unique(metadata$enumerate))) {
 812 |     samples <- samples
 813 |   } else {stop("samples supplied do not match the samples present in the metadata!")}
 814 |   
 815 |   for(samp in samples) {
 816 |     message(paste0("Processing sample: ", samp))
 817 |     
 818 |     ### ============ Actual state pairs connectivity values ============
 819 |     # Construct neighboring table for the sample
 820 |     samp_meta <- metadata[metadata$Sample == samp, ]
 821 |     neighbors_table <- .neighbors_table_func(samp_meta, spot_class = spot_class, spot_name = spot_name)
 822 |     
 823 |     # Calculate connectivity scores (spatial coherence)
 824 |     programs_connectivity_score <- sapply(sort(all_states), function(state) {
 825 |       if (!(state %in% samp_meta[[spot_class]])) {
 826 |         prog_score <- NaN
 827 |       } else {
 828 |         program_neighbors_table = neighbors_table[rownames(neighbors_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == state & !is.na(samp_meta[[spot_class]])], ]
 829 |         prog_score <- .prog_connectivity_score(program_neighbors_table, state)
 830 |       }
 831 |       return(prog_score)
 832 |     })
 833 |     
 834 |     # Count neighboring states for each spot (number of free (non-coherent) X state classified spots that neighbor free Y reference-state)
 835 |     obs_adj_mat <- sapply(sort(all_states), function(state) {
 836 |       if (!(state %in% samp_meta[[spot_class]])) {
 837 |         zero_neigh <- rep(0, length(all_states)) 
 838 |         names(zero_neigh) <- all_states
 839 |         return(zero_neigh)
 840 |       } else {
 841 |         state_neighbors_table = neighbors_table[rownames(neighbors_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == state & !is.na(samp_meta[[spot_class]])], ]
 842 |         num_of_neighbors = sapply(sort(as.character(all_states)), function(neighbored_state) {
 843 |           num <- .calc_adj_mat(neighbored_state, samp_meta, state_neighbors_table)
 844 |           return(num)
 845 |         })
 846 |         return(num_of_neighbors)
 847 |       }})
 848 |     diag(obs_adj_mat) <- 0
 849 |     weighted_adj_mat <- apply(obs_adj_mat, 2, function(x) {x / sum(x)})
 850 |     
 851 |     # Calculate corrected proportion of neighboring spots
 852 |     comp4mat <- programs_connectivity_score[colnames(weighted_adj_mat)]
 853 |     comp4mat[is.na(comp4mat)] <- 0
 854 |     weighted_denominator_v2 <- sapply(c(names(comp4mat)), function(prog){
 855 |       new_comp <- comp4mat
 856 |       new_comp[prog] <- 0
 857 |       new_comp <- new_comp / sum(new_comp)
 858 |       return(new_comp)
 859 |     })
 860 |     
 861 |     norm_adj_mat <- weighted_adj_mat / weighted_denominator_v2
 862 |     baseline_stat <- Melt(norm_adj_mat)
 863 |     
 864 |     ### ============ Permuted state pairs connectivity values ============
 865 |     # Permute neighbor table `n_perm` times, to create a permuted sampling distribution which will serve as the null distribution
 866 |     permute_neighbs <- parallel::mclapply(1:n_perm, function(x) {
 867 |       shuffled_spots <- sample(samp_meta[[spot_name]], length(samp_meta[[spot_name]]), replace = FALSE)
 868 |       shuffled_meta <- samp_meta %>% dplyr::mutate({{spot_name}} := shuffled_spots)
 869 |       neighbors_table <- .neighbors_table_func(shuffled_meta, spot_class = spot_class, spot_name = spot_name)
 870 |       neighbors_table <- neighbors_table[match(samp_meta[[spot_name]], rownames(neighbors_table)), ]
 871 |       
 872 |       obs_adj_mat <- sapply(sort(all_states), function(state) {
 873 |         if (!(state %in% samp_meta[[spot_class]])) {
 874 |           zero_neigh <- rep(0, length(all_states)) 
 875 |           names(zero_neigh) <- all_states
 876 |           return(zero_neigh)
 877 |         } else {
 878 |           state_neighbors_table = neighbors_table[rownames(neighbors_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == state & !is.na(samp_meta[[spot_class]])], ]
 879 |           num_of_neighbors = sapply(sort(as.character(all_states)), function(neighbored_state) {
 880 |             num <- .calc_adj_mat(neighbored_state, samp_meta, state_neighbors_table)
 881 |             return(num)
 882 |           })
 883 |           return(num_of_neighbors)
 884 |         }})
 885 |       diag(obs_adj_mat) <- 0
 886 |       weighted_adj_mat <- apply(obs_adj_mat, 2, function(x) {x / sum(x)})
 887 |       
 888 |       comp4mat <- programs_connectivity_score[colnames(weighted_adj_mat)]
 889 |       comp4mat[is.na(comp4mat)] <- 0
 890 |       weighted_denominator_v2 <- sapply(c(names(comp4mat)), function(prog){
 891 |         new_comp <- comp4mat
 892 |         new_comp[prog] <- 0
 893 |         new_comp <- new_comp / sum(new_comp)
 894 |         return(new_comp)
 895 |       })
 896 |       
 897 |       norm_adj_mat <- weighted_adj_mat / weighted_denominator_v2
 898 |       perm_stat <- Melt(norm_adj_mat)
 899 |     }, mc.cores = n_cores)
 900 |     
 901 |     perm_stats <- cbind.data.frame(permute_neighbs[[1]]$rows, permute_neighbs[[1]]$cols, do.call(cbind.data.frame, lapply(permute_neighbs, function(y) y$vals))) %>% 
 902 |       magrittr::set_colnames(c("rows", "cols", paste0("perm_", seq(n_perm))))
 903 |     
 904 |     
 905 |     
 906 |     # Compare observed neighboring state proportion to the permuted null distribution - extract Neighbor-proportion, P-value & Permuted distribution statistics
 907 |     # merged_df <- merge(baseline_stat, perm_stats, by = c("rows", "cols"))
 908 |     merged_df <- na.omit(merge(baseline_stat, perm_stats, by = c("rows", "cols")))
 909 |     scaled_vals <- merged_df %>% dplyr::select(-c("rows", "cols")) %>% t() %>% scale() %>% t() %>% 
 910 |       as.data.frame() %>% dplyr::mutate(rows = merged_df$rows, cols = merged_df$cols, .before = 1) %>% dplyr::pull(vals)
 911 |     neighbs_stats <- lapply(seq_len(nrow(merged_df)), function(i) {
 912 |       summary_stats <- summary(as.numeric(merged_df[i, grep("perm_", colnames(merged_df))]))
 913 |       if(merged_df[i, "vals"] == 0) {
 914 |         pvals <- 1
 915 |         tail_direction <- NA
 916 |       } else {
 917 |         Rtail_pvals <- (sum(as.numeric(merged_df[i, grep("perm_", colnames(merged_df))]) >= merged_df[i, "vals"]) + 1) / (n_perm + 1)
 918 |         Ltail_pvals <- (n_perm - sum(as.numeric(merged_df[i, grep("perm_", colnames(merged_df))]) > merged_df[i, "vals"]) + 1) / (n_perm + 1)
 919 |         tail_direction <- Rtail_pvals < Ltail_pvals
 920 |         pvals <- Rtail_pvals * tail_direction + Ltail_pvals * !(tail_direction)
 921 |       }
 922 |       sds <- sd(as.numeric(merged_df[i, grep("perm_", colnames(merged_df))]))
 923 |       out <- data.frame(Neighb_State = merged_df$rows[i], Ref_State = merged_df$cols[i], Prop_Neighb = merged_df$vals[i], Z_Score = scaled_vals[i],
 924 |                         Perm_Min = summary_stats[["Min."]], Perm_Max = summary_stats[["Max."]], Perm_Mean = summary_stats[["Mean"]], Perm_Median = summary_stats[["Median"]],
 925 |                         P_val = pvals, Interaction_Type = ifelse(is.na(tail_direction), NA, 
 926 |                                                                  ifelse(tail_direction,"Drawn","Repelled")), SD = sds)
 927 |     })
 928 |     neighbs_stats <- do.call(rbind.data.frame, neighbs_stats)
 929 |     neighbs_stats$Significant <- neighbs_stats$P_val < signif_val
 930 |     neighbs_stats_ls[[samp]] <- neighbs_stats
 931 |     
 932 |     if(filter_signif) {
 933 |       top_neighbs_df <- neighbs_stats %>% 
 934 |         dplyr::filter(Neighb_State != Ref_State) %>%
 935 |         dplyr::mutate(State_Pair = paste0(Ref_State, "_", Neighb_State)) %>%
 936 |         dplyr::filter(Significant == TRUE) %>% 
 937 |         mutate(Signif_Pair = unname(sapply(.$State_Pair, function(x) {
 938 |           ifelse(paste0(scalop::substri(x, pos = 2, sep = "_"), "_", scalop::substri(x, pos = 1, sep = "_")) %in% .$State_Pair,
 939 |                  yes = "Yes", no = "No")
 940 |         }))) %>% dplyr::filter(abs(Z_Score) >= zscore_thresh, Signif_Pair == "Yes") %>% dplyr::arrange(desc(Z_Score))
 941 |       signif_neighbs_ls[[samp]] <- top_neighbs_df
 942 |     }
 943 |     
 944 |     # Generate plots
 945 |     if(plot_perm_distr) {
 946 |       plot_df <- reshape2::melt(perm_stats)
 947 |       distr_plot <- ggplot(plot_df, aes(x = value)) +
 948 |         facet_grid(rows ~ cols) +
 949 |         geom_histogram() +
 950 |         geom_vline(data = baseline_stat, aes(xintercept = vals), color = "red")
 951 |       distr_plots_ls[[samp]] <- distr_plot
 952 |     }
 953 |   }
 954 |   
 955 |   if(isTRUE(plot_perm_distr) & isFALSE(filter_signif)) {return(list(Neighbor_Stats = neighbs_stats_ls, Distr_plots = distr_plots_ls))}
 956 |   else if(isTRUE(filter_signif) & isFALSE(plot_perm_distr)) {return(list(Neighbs_Stats = neighbs_stats_ls, Top_Neighbs = signif_neighbs_ls))}
 957 |   else if(isTRUE(plot_perm_distr) & isTRUE(filter_signif)) {return(list(Neighbs_Stats = neighbs_stats_ls, Top_Neighbs = signif_neighbs_ls, Distr_plots = distr_plots_ls))}
 958 |   else {return(neighbs_stats_ls)}
 959 | }
 960 | 
 961 | 
 962 | 
 963 | 
 964 | Melt <- function(x) {
 965 |   if (!is.data.frame(x = x)) {
 966 |     x <- as.data.frame(x = x)
 967 |   }
 968 |   return(data.frame(
 969 |     rows = rep.int(x = rownames(x = x), times = ncol(x = x)),
 970 |     cols = unlist(x = lapply(X = colnames(x = x), FUN = rep.int, times = nrow(x = x))),
 971 |     vals = unlist(x = x, use.names = FALSE)
 972 |   ))
 973 | }
 974 | 
 975 | 
 976 | 
 977 | # regional comp functions  ------------------------------------------------
 978 | sample_programs_composition <- function(spots_clusters, gen_clusters){
 979 |   composition <- table(spots_clusters$spot_type)
 980 |   old_clusters <- names(composition)
 981 |   add_clusters <- gen_clusters[!gen_clusters %in% old_clusters]
 982 |   sapply(add_clusters,function(clust){
 983 |     composition <<- c(composition, clust = 0)
 984 |   })
 985 |   
 986 |   names(composition) <- c(old_clusters, add_clusters)
 987 |   final_composition <- composition[sort(names(composition))]/sum(composition)
 988 |   return(final_composition)
 989 | }
 990 | 
 991 | obs_program_spatial_score <- function(program_neighbors, cluster){
 992 |   cluster_neighbors_bin <- ifelse(program_neighbors == cluster, 1, 0)
 993 |   if(is.null(dim(program_neighbors))){
 994 |     cluster_neighbors_sum <- sum(cluster_neighbors_bin)
 995 |   } else {
 996 |     cluster_neighbors_sum <- apply(cluster_neighbors_bin,1,function(rx){sum(na.omit(rx))})
 997 |   }
 998 |   obs <- mean(cluster_neighbors_sum)
 999 |   return(obs)
1000 | }
1001 | 
1002 | one_val <- function(spots_num){
1003 |   a <- sqrt((4*spots_num)/(6*sqrt(3)))
1004 |   oneval <- (6*spots_num-12*a-6)/spots_num
1005 |   return(oneval)
1006 | }
1007 | 
1008 | zero_val <- function(rand_table, spots_clusters, cluster){
1009 |   all_zeroval <- sapply(rand_table, function(neighbors_rand_table){
1010 |     program_rand_neighbors_table = neighbors_rand_table[row.names(neighbors_rand_table) %in% spots_clusters$barcodes[spots_clusters$spot_type == cluster],]
1011 |     rand_obs <- obs_program_spatial_score(program_rand_neighbors_table, cluster)
1012 |     return(rand_obs)
1013 |   })
1014 |   zeroval <- mean(all_zeroval)
1015 |   return(zeroval)
1016 | }
1017 | 
1018 | 
1019 | calc_adj_mat <- function(neighbored_cluster, spots_clusters, cluster_neighbors_table){
1020 |   if (!(neighbored_cluster %in% spots_clusters$spot_type)) {
1021 |     return(0)
1022 |   } else {
1023 |     cluster_neighbors_bin <- ifelse(cluster_neighbors_table == neighbored_cluster, 1, 0)
1024 |     
1025 |     if (is.null(dim(cluster_neighbors_bin))){
1026 |       cluster_neighbors_sum <- sum(na.omit(cluster_neighbors_bin))
1027 |     } else {
1028 |       cluster_neighbors_sum <- sum(apply(cluster_neighbors_bin,1,function(x){sum(na.omit(x))}))
1029 |     }
1030 |     return(cluster_neighbors_sum)
1031 |   }
1032 | }
1033 | 
1034 | 
1035 | prog_connectivity_score <- function(program_neighbors, cluster){
1036 |   cluster_neighbors_bin <- ifelse(program_neighbors == cluster, 0, 1)
1037 |   if(is.null(dim(program_neighbors))){
1038 |     prog_connect <- 1
1039 |   } else {
1040 |     prog_connect <- length(which(apply(cluster_neighbors_bin,1,function(x){sum(na.omit(x))}) >0))
1041 |   }
1042 |   return(prog_connect)
1043 | }
1044 | 
1045 | neighbors_table_func <- function(spots_positions,spots_clusters){
1046 |   neighbors_table <- sapply(spots_clusters$barcodes, function(spot){
1047 |     spots_row = spots_positions[spots_positions$V1 == spot, 3]
1048 |     spots_col = spots_positions[spots_positions$V1 == spot, 4]
1049 |     
1050 |     if (spots_col == 0 | spots_row == 0) {
1051 |       c1 = NaN
1052 |     } else {
1053 |       n1 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1]
1054 |       if (spots_positions$V2[spots_positions$V1 == n1] == 0 | !(n1 %in% spots_clusters$barcodes)){
1055 |         c1 = NaN
1056 |       } else {
1057 |         c1 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n1])
1058 |       }
1059 |     }
1060 |     
1061 |     if (spots_col == 127 | spots_row == 0) {
1062 |       c2 = NaN
1063 |     } else {
1064 |       n2 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1]
1065 |       if (spots_positions$V2[spots_positions$V1 == n2] == 0 | !(n2 %in% spots_clusters$barcodes)){
1066 |         c2 = NaN
1067 |       } else {
1068 |         c2 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n2])
1069 |       }
1070 |     }
1071 |     
1072 |     if (spots_col == 0 | spots_col == 1) {
1073 |       c3 = NaN
1074 |     } else {
1075 |       n3 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2]
1076 |       if (spots_positions$V2[spots_positions$V1 == n3] == 0 | !(n3 %in% spots_clusters$barcodes)){
1077 |         c3 = NaN
1078 |       } else {
1079 |         c3 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n3])
1080 |       }
1081 |     }
1082 |     
1083 |     if (spots_col == 126 | spots_col == 127) {
1084 |       c4 = NaN
1085 |     } else {
1086 |       n4 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2]
1087 |       if (spots_positions$V2[spots_positions$V1 == n4] == 0 | !(n4 %in% spots_clusters$barcodes)){
1088 |         c4 = NaN
1089 |       } else {
1090 |         c4 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n4])
1091 |       }
1092 |     }
1093 |     
1094 |     if (spots_col == 0 | spots_row == 77) {
1095 |       c5 = NaN
1096 |     } else {
1097 |       n5 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1]
1098 |       if (spots_positions$V2[spots_positions$V1 == n5] == 0 | !(n5 %in% spots_clusters$barcodes)){
1099 |         c5 = NaN
1100 |       } else {
1101 |         c5 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n5])
1102 |       }
1103 |     }
1104 |     
1105 |     if (spots_col == 127 | spots_row == 77) {
1106 |       c6 = NaN
1107 |     } else {
1108 |       n6 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1]
1109 |       if (spots_positions$V2[spots_positions$V1 == n6] == 0 | !(n6 %in% spots_clusters$barcodes)){
1110 |         c6 = NaN
1111 |       } else {
1112 |         c6 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n6])
1113 |       }
1114 |     }
1115 |     
1116 |     
1117 |     return(c(c1,c2,c3,c4,c5,c6))
1118 |     
1119 |   })
1120 |   
1121 |   neighbors_table = t(neighbors_table)
1122 |   row.names(neighbors_table) = spots_clusters$barcodes
1123 |   
1124 |   return(neighbors_table)
1125 | }
1126 | 
1127 | neighbors_table_funcV2 <- function(spots_positions,spots_clusters){
1128 |   neighbors_table <- sapply(spots_clusters$barcodes, function(spot){
1129 |     spots_row = spots_positions[spots_positions$V1 == spot, 3]
1130 |     spots_col = spots_positions[spots_positions$V1 == spot, 4]
1131 |     
1132 |     if (spots_col == 0 | spots_row == 0) {
1133 |       c1 = NaN
1134 |     } else {
1135 |       n1 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1]
1136 |       if (spots_positions$V2[spots_positions$V1 == n1] == 0 | !(n1 %in% spots_clusters$barcodes)){
1137 |         c1 = NaN
1138 |       } else {
1139 |         c1 = as.character(n1)
1140 |       }
1141 |     }
1142 |     
1143 |     if (spots_col == 127 | spots_row == 0) {
1144 |       c2 = NaN
1145 |     } else {
1146 |       n2 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1]
1147 |       if (spots_positions$V2[spots_positions$V1 == n2] == 0 | !(n2 %in% spots_clusters$barcodes)){
1148 |         c2 = NaN
1149 |       } else {
1150 |         c2 = as.character(n2)
1151 |       }
1152 |     }
1153 |     
1154 |     if (spots_col == 0 | spots_col == 1) {
1155 |       c3 = NaN
1156 |     } else {
1157 |       n3 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2]
1158 |       if (spots_positions$V2[spots_positions$V1 == n3] == 0 | !(n3 %in% spots_clusters$barcodes)){
1159 |         c3 = NaN
1160 |       } else {
1161 |         c3 = as.character(n3)
1162 |       }
1163 |     }
1164 |     
1165 |     if (spots_col == 126 | spots_col == 127) {
1166 |       c4 = NaN
1167 |     } else {
1168 |       n4 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2]
1169 |       if (spots_positions$V2[spots_positions$V1 == n4] == 0 | !(n4 %in% spots_clusters$barcodes)){
1170 |         c4 = NaN
1171 |       } else {
1172 |         c4 = as.character(n4)
1173 |       }
1174 |     }
1175 |     
1176 |     if (spots_col == 0 | spots_row == 77) {
1177 |       c5 = NaN
1178 |     } else {
1179 |       n5 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1]
1180 |       if (spots_positions$V2[spots_positions$V1 == n5] == 0 | !(n5 %in% spots_clusters$barcodes)){
1181 |         c5 = NaN
1182 |       } else {
1183 |         c5 = as.character(n5)
1184 |       }
1185 |     }
1186 |     
1187 |     if (spots_col == 127 | spots_row == 77) {
1188 |       c6 = NaN
1189 |     } else {
1190 |       n6 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1]
1191 |       if (spots_positions$V2[spots_positions$V1 == n6] == 0 | !(n6 %in% spots_clusters$barcodes)){
1192 |         c6 = NaN
1193 |       } else {
1194 |         c6 = as.character(n6)
1195 |       }
1196 |     }
1197 |     
1198 |     
1199 |     return(c(c1,c2,c3,c4,c5,c6))
1200 |     
1201 |   })
1202 |   
1203 |   neighbors_table = t(neighbors_table)
1204 |   row.names(neighbors_table) = spots_clusters$barcodes
1205 |   
1206 |   return(neighbors_table)
1207 | }
1208 | 
1209 | 
1210 | win_prox_neighbors_table_func <- function(spots_positions,spots_clusters){
1211 |   neighbors_table <- sapply(spots_clusters$barcodes, function(spot){
1212 |     spots_row = spots_positions[spots_positions$V1 == spot, 3]
1213 |     spots_col = spots_positions[spots_positions$V1 == spot, 4]
1214 |     
1215 |     if (spots_col == 0 | spots_row == 0) {
1216 |       n1 = NA
1217 |     } else {
1218 |       n1_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1]
1219 |       if (length(n1_temp) == 0) {
1220 |         n1 = NA
1221 |       } else if (spots_positions$V2[spots_positions$V1 == n1_temp] == 0 | !(n1_temp %in% spots_clusters$barcodes)){
1222 |         n1 = NA
1223 |       } else {
1224 |         n1 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n1_temp])
1225 |       }
1226 |     }
1227 |     
1228 |     if (spots_col == 127 | spots_row == 0) {
1229 |       n2 = NA
1230 |     } else {
1231 |       n2_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1]
1232 |       if (length(n2_temp) == 0) {
1233 |         n2 = NA
1234 |       } else if (spots_positions$V2[spots_positions$V1 == n2_temp] == 0 | !(n2_temp %in% spots_clusters$barcodes)){
1235 |         n2 = NA
1236 |       } else {
1237 |         n2 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n2_temp])
1238 |       }
1239 |     }
1240 |     
1241 |     if (spots_col == 0 | spots_col == 1) {
1242 |       n3 = NA
1243 |     } else {
1244 |       n3_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2]
1245 |       if (length(n3_temp) == 0) {
1246 |         n3 = NA
1247 |       } else if (spots_positions$V2[spots_positions$V1 == n3_temp] == 0 | !(n3_temp %in% spots_clusters$barcodes)){
1248 |         n3 = NA
1249 |       } else {
1250 |         n3 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n3_temp])
1251 |       }
1252 |     }
1253 |     
1254 |     if (spots_col == 126 | spots_col == 127) {
1255 |       n4 = NA
1256 |     } else {
1257 |       n4_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2]
1258 |       if (length(n4_temp) == 0) {
1259 |         n4 = NA
1260 |       } else if (spots_positions$V2[spots_positions$V1 == n4_temp] == 0 | !(n4_temp %in% spots_clusters$barcodes)){
1261 |         n4 = NA
1262 |       } else {
1263 |         n4 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n4_temp])
1264 |       }
1265 |     }
1266 |     
1267 |     if (spots_col == 0 | spots_row == 77) {
1268 |       n5 = NA
1269 |     } else {
1270 |       n5_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1]
1271 |       if (length(n5_temp) == 0) {
1272 |         n5 = NA
1273 |       } else if (spots_positions$V2[spots_positions$V1 == n5_temp] == 0 | !(n5_temp %in% spots_clusters$barcodes)){
1274 |         n5 = NA
1275 |       } else {
1276 |         n5 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n5_temp])
1277 |       }
1278 |     }
1279 |     
1280 |     if (spots_col == 127 | spots_row == 77) {
1281 |       n6 = NA
1282 |     } else {
1283 |       n6_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1]
1284 |       if (length(n6_temp) == 0) {
1285 |         n6 = NA
1286 |       } else if (spots_positions$V2[spots_positions$V1 == n6_temp] == 0 | !(n6_temp %in% spots_clusters$barcodes)){
1287 |         n6 = NA
1288 |       } else {
1289 |         n6 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n6_temp])
1290 |       }
1291 |     }
1292 |     
1293 |     
1294 |     return(c(n1,n2,n3,n4,n5,n6))
1295 |     
1296 |   })
1297 |   
1298 |   neighbors_table = t(neighbors_table)
1299 |   row.names(neighbors_table) = spots_clusters$barcodes
1300 |   
1301 |   return(neighbors_table)
1302 | }
1303 | 
1304 | 
1305 | prox_neighbors_table_func <- function(spots_positions,spots_clusters){
1306 |   neighbors_table <- sapply(spots_clusters$barcodes, function(spot){
1307 |     spots_row = spots_positions[spots_positions$V1 == spot, 3]
1308 |     spots_col = spots_positions[spots_positions$V1 == spot, 4]
1309 |     
1310 |     if (spots_col == 0 | spots_row == 0) {
1311 |       n1 = NA
1312 |     } else {
1313 |       n1_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1]
1314 |       if (spots_positions$V2[spots_positions$V1 == n1_temp] == 0 | !(n1_temp %in% spots_clusters$barcodes)){
1315 |         n1 = NA
1316 |       } else {
1317 |         n1 = n1_temp
1318 |       }
1319 |     }
1320 |     
1321 |     if (spots_col == 127 | spots_row == 0) {
1322 |       n2 = NA
1323 |     } else {
1324 |       n2_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1]
1325 |       if (spots_positions$V2[spots_positions$V1 == n2_temp] == 0 | !(n2_temp %in% spots_clusters$barcodes)){
1326 |         n2 = NA
1327 |       } else {
1328 |         n2 = n2_temp
1329 |       }
1330 |     }
1331 |     
1332 |     if (spots_col == 0 | spots_col == 1) {
1333 |       n3 = NA
1334 |     } else {
1335 |       n3_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2]
1336 |       if (spots_positions$V2[spots_positions$V1 == n3_temp] == 0 | !(n3_temp %in% spots_clusters$barcodes)){
1337 |         n3 = NA
1338 |       } else {
1339 |         n3 = n3_temp
1340 |       }
1341 |     }
1342 |     
1343 |     if (spots_col == 126 | spots_col == 127) {
1344 |       n4 = NA
1345 |     } else {
1346 |       n4_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2]
1347 |       if (spots_positions$V2[spots_positions$V1 == n4_temp] == 0 | !(n4_temp %in% spots_clusters$barcodes)){
1348 |         n4 = NA
1349 |       } else {
1350 |         n4 = n4_temp
1351 |       }
1352 |     }
1353 |     
1354 |     if (spots_col == 0 | spots_row == 77) {
1355 |       n5 = NA
1356 |     } else {
1357 |       n5_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1]
1358 |       if (spots_positions$V2[spots_positions$V1 == n5_temp] == 0 | !(n5_temp %in% spots_clusters$barcodes)){
1359 |         n5 = NA
1360 |       } else {
1361 |         n5 = n5_temp
1362 |       }
1363 |     }
1364 |     
1365 |     if (spots_col == 127 | spots_row == 77) {
1366 |       n6 = NA
1367 |     } else {
1368 |       n6_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1]
1369 |       if (spots_positions$V2[spots_positions$V1 == n6_temp] == 0 | !(n6_temp %in% spots_clusters$barcodes)){
1370 |         n6 = NA
1371 |       } else {
1372 |         n6 = n6_temp
1373 |       }
1374 |     }
1375 |     
1376 |     
1377 |     return(c(n1,n2,n3,n4,n5,n6))
1378 |     
1379 |   })
1380 |   
1381 |   neighbors_table = t(neighbors_table)
1382 |   row.names(neighbors_table) = spots_clusters$barcodes
1383 |   
1384 |   return(neighbors_table)
1385 | }
1386 | 


--------------------------------------------------------------------------------
/Module4_consensus_interactions.R:
--------------------------------------------------------------------------------
  1 | library(ggvenn)
  2 | 
  3 | # Load data ---------------------------------------------------------------
  4 | 
  5 | 
  6 | file_list <- list.files("MP/mp_assign_124/")
  7 | 
  8 | gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){
  9 |   mp_assign <- readRDS(paste("MP/mp_assign_124/", file_list[i], sep = ""))
 10 |   return(unique(mp_assign$spot_type_meta_new))
 11 | }))))
 12 | 
 13 | pairs <- combn(sort(gen_clusters),2)
 14 | pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))})
 15 | 
 16 | st_samp <-c("UKF243","UKF248","UKF255","UKF259","UKF260","UKF266","UKF269","UKF275","UKF304","UKF313","UKF334","ZH1019inf","ZH881T1","ZH916bulk","ZH916inf","ZH916T1","ZH1007nec","ZH1019T1","ZH8811Abulk","ZH8811Bbulk")
 17 | dis_samp <-c("MGH258","UKF251","UKF259","UKF260","UKF275","UKF296","UKF313","ZH1019inf","ZH881inf","ZH916inf","ZH1007inf","ZH1019T1", "ZH8812bulk")
 18 | 
 19 | 
 20 | # structured ----------------------------------------------------------------------
 21 | 
 22 | coloc_st <- readRDS("Summary_tables/colocal_structured_summary.rds")
 23 | conn_st <- readRDS("Summary_tables/conn_st.rds")
 24 | prox_st <- readRDS("Summary_tables/prox_st.rds")
 25 | (sum(coloc_st$enriched)+sum(conn_st$enr)+sum(prox_st[,5]$enr))/(length(pairs_names)*length(st_samp)*3)
 26 | 
 27 | coloc_st$mean_scaled <- ifelse(coloc_st$mean_enrichment < 1,
 28 |                                ((coloc_st$mean_enrichment - min(coloc_st$mean_enrichment))/(max(coloc_st$mean_enrichment)-min(coloc_st$mean_enrichment))) - 1,
 29 |                                ((coloc_st$mean_enrichment - min(coloc_st$mean_enrichment))/(max(coloc_st$mean_enrichment)-min(coloc_st$mean_enrichment)))) 
 30 | conn_st$mean_scaled <- ifelse(conn_st$mean_score < 0,
 31 |                               ((conn_st$mean_score - min(na.omit(conn_st$mean_score)))/(max(na.omit(conn_st$mean_score))-min(na.omit(conn_st$mean_score)))) - 1,
 32 |                               ((conn_st$mean_score - min(na.omit(conn_st$mean_score)))/(max(na.omit(conn_st$mean_score))-min(na.omit(conn_st$mean_score)))))
 33 | 
 34 | 
 35 | summary_coloc <- data.frame(pair = coloc_st$pair,
 36 |                             analysis = rep("coloc", 91),
 37 |                             prop = coloc_st$proportion_sig,
 38 |                             mean_scaled = coloc_st$mean_scaled)
 39 | 
 40 | new_pair <- as.character(sapply(summary_coloc$pair, function(p1){
 41 |   splt <- strsplit(p1, " ")[[1]]
 42 |   new_name <- sapply(row.names(conn_st),function(p2){
 43 |     splt2 <- strsplit(p2, " ")[[1]]
 44 |     if(splt[1] %in% splt2 & splt[2] %in% splt2) {
 45 |       return(p2)
 46 |     } else
 47 |       return(NA)
 48 |   })
 49 |   return(na.omit(new_name))
 50 | }))
 51 | 
 52 | summary_coloc$pair <- new_pair
 53 | 
 54 | 
 55 | summary_adj <- data.frame(pair = row.names(conn_st),
 56 |                           analysis = rep("adj", 91),
 57 |                           prop = conn_st$prop,
 58 |                           mean_scaled = conn_st$mean_scaled)
 59 | 
 60 | summary_prox5 <- data.frame(pair = row.names(conn_st),
 61 |                             analysis = rep("prox5", 91),
 62 |                             prop = prox_st[,5]$prop,
 63 |                             mean_scaled = prox_st[,5]$mean_score)
 64 | 
 65 | summary_prox5$mean_scaled <- ifelse(summary_prox5$mean_scaled < 0,
 66 |                                     ((summary_prox5$mean_scaled - min(na.omit(summary_prox5$mean_scaled)))/(max(na.omit(summary_prox5$mean_scaled))-min(na.omit(summary_prox5$mean_scaled)))) - 1,
 67 |                                     ((summary_prox5$mean_scaled - min(na.omit(summary_prox5$mean_scaled)))/(max(na.omit(summary_prox5$mean_scaled))-min(na.omit(summary_prox5$mean_scaled)))))
 68 | 
 69 | 
 70 | summary_prox8 <- data.frame(pair = row.names(conn_st),
 71 |                             analysis = rep("prox8", 91),
 72 |                             prop = prox_st[,8]$prop,
 73 |                             mean_scaled = prox_st[,8]$mean_score)
 74 | 
 75 | summary_prox8$mean_scaled <- ifelse(summary_prox8$mean_scaled < 0,
 76 |                                     ((summary_prox8$mean_scaled - min(na.omit(summary_prox8$mean_scaled)))/(max(na.omit(summary_prox8$mean_scaled))-min(na.omit(summary_prox8$mean_scaled)))) - 1,
 77 |                                     ((summary_prox8$mean_scaled - min(na.omit(summary_prox8$mean_scaled)))/(max(na.omit(summary_prox8$mean_scaled))-min(na.omit(summary_prox8$mean_scaled)))))
 78 | 
 79 | summary_prox15 <- data.frame(pair = row.names(conn_st),
 80 |                              analysis = rep("prox15", 91),
 81 |                              prop = prox_st[,15]$prop,
 82 |                              mean_scaled = prox_st[,15]$mean_score)
 83 | 
 84 | summary_prox15$mean_scaled <- ifelse(summary_prox15$mean_scaled < 0,
 85 |                                      ((summary_prox15$mean_scaled - min(na.omit(summary_prox15$mean_scaled)))/(max(na.omit(summary_prox15$mean_scaled))-min(na.omit(summary_prox15$mean_scaled)))) - 1,
 86 |                                      ((summary_prox15$mean_scaled - min(na.omit(summary_prox15$mean_scaled)))/(max(na.omit(summary_prox15$mean_scaled))-min(na.omit(summary_prox15$mean_scaled)))))
 87 | 
 88 | 
 89 | summary_df <- rbind(summary_coloc,summary_adj,summary_prox5,summary_prox8,summary_prox15)
 90 | 
 91 | pairs_mean <- sapply(unique(summary_df$pair),function(p){
 92 |   p_df <- summary_df[summary_df$pair == p,]
 93 |   return(mean(na.omit(p_df$mean_scaled)))
 94 | })
 95 | 
 96 | summary_df$pair_mean <- sapply(summary_df$pair, function(p){
 97 |   return(pairs_mean[p])
 98 | })
 99 | 
100 | summary_df$pattern <- ifelse(summary_df$pair_mean > 0.35, "connected",
101 |                              ifelse(summary_df$pair_mean < -0.35, "dis connected","mix"))
102 | st_summary <- summary_df
103 | 
104 | 
105 | # dis-organized  ---------------------------------------------------------------------
106 | 
107 | 
108 | coloc_dis <- readRDS("Summary_tables/colocal_summary_dis.rds")
109 | conn_dis <- readRDS("Summary_tables/conn_dis.rds")
110 | prox_dis <- readRDS("Summary_tables/prox_dis.rds")
111 | round((sum(coloc_dis$enriched)+sum(conn_dis$enr)+sum(prox_dis[,5]$enr))/(length(pairs_names)*length(dis_samp)*3),2)
112 | 
113 | coloc_dis$mean_scaled <- ifelse(coloc_dis$mean_enrichment < 1,
114 |                                 ((coloc_dis$mean_enrichment - min(coloc_dis$mean_enrichment))/(max(coloc_dis$mean_enrichment)-min(coloc_dis$mean_enrichment))) - 1,
115 |                                 ((coloc_dis$mean_enrichment - min(coloc_dis$mean_enrichment))/(max(coloc_dis$mean_enrichment)-min(coloc_dis$mean_enrichment)))) 
116 | conn_dis$mean_scaled <- ifelse(conn_dis$mean_score < 0,
117 |                                ((conn_dis$mean_score - min(na.omit(conn_dis$mean_score)))/(max(na.omit(conn_dis$mean_score))-min(na.omit(conn_dis$mean_score)))) - 1,
118 |                                ((conn_dis$mean_score - min(na.omit(conn_dis$mean_score)))/(max(na.omit(conn_dis$mean_score))-min(na.omit(conn_dis$mean_score)))))
119 | 
120 | 
121 | summary_coloc <- data.frame(pair = coloc_dis$pairs,
122 |                             analysis = rep("coloc", 91),
123 |                             prop = coloc_dis$prop_sig,
124 |                             mean_scaled = coloc_dis$mean_scaled)
125 | 
126 | new_pair <- as.character(sapply(summary_coloc$pair, function(p1){
127 |   splt <- strsplit(p1, " ")[[1]]
128 |   new_name <- sapply(row.names(conn_dis),function(p2){
129 |     splt2 <- strsplit(p2, " ")[[1]]
130 |     if(splt[1] %in% splt2 & splt[2] %in% splt2) {
131 |       return(p2)
132 |     } else
133 |       return(NA)
134 |   })
135 |   return(na.omit(new_name))
136 | }))
137 | 
138 | summary_coloc$pair <- new_pair
139 | 
140 | 
141 | summary_adj <- data.frame(pair = row.names(conn_dis),
142 |                           analysis = rep("adj", 91),
143 |                           prop = conn_dis$prop,
144 |                           mean_scaled = conn_dis$mean_scaled)
145 | 
146 | summary_prox5 <- data.frame(pair = row.names(conn_dis),
147 |                             analysis = rep("prox5", 91),
148 |                             prop = prox_dis[,5]$prop,
149 |                             mean_scaled = prox_dis[,5]$mean_score)
150 | 
151 | summary_prox5$mean_scaled <- ifelse(summary_prox5$mean_scaled < 0,
152 |                                     ((summary_prox5$mean_scaled - min(na.omit(summary_prox5$mean_scaled)))/(max(na.omit(summary_prox5$mean_scaled))-min(na.omit(summary_prox5$mean_scaled)))) - 1,
153 |                                     ((summary_prox5$mean_scaled - min(na.omit(summary_prox5$mean_scaled)))/(max(na.omit(summary_prox5$mean_scaled))-min(na.omit(summary_prox5$mean_scaled)))))
154 | 
155 | 
156 | summary_prox8 <- data.frame(pair = row.names(conn_dis),
157 |                             analysis = rep("prox8", 91),
158 |                             prop = prox_dis[,8]$prop,
159 |                             mean_scaled = prox_dis[,8]$mean_score)
160 | 
161 | summary_prox8$mean_scaled <- ifelse(summary_prox8$mean_scaled < 0,
162 |                                     ((summary_prox8$mean_scaled - min(na.omit(summary_prox8$mean_scaled)))/(max(na.omit(summary_prox8$mean_scaled))-min(na.omit(summary_prox8$mean_scaled)))) - 1,
163 |                                     ((summary_prox8$mean_scaled - min(na.omit(summary_prox8$mean_scaled)))/(max(na.omit(summary_prox8$mean_scaled))-min(na.omit(summary_prox8$mean_scaled)))))
164 | 
165 | summary_prox15 <- data.frame(pair = row.names(conn_dis),
166 |                              analysis = rep("prox15", 91),
167 |                              prop = prox_dis[,15]$prop,
168 |                              mean_scaled = prox_dis[,15]$mean_score)
169 | 
170 | summary_prox15$mean_scaled <- ifelse(summary_prox15$mean_scaled < 0,
171 |                                      ((summary_prox15$mean_scaled - min(na.omit(summary_prox15$mean_scaled)))/(max(na.omit(summary_prox15$mean_scaled))-min(na.omit(summary_prox15$mean_scaled)))) - 1,
172 |                                      ((summary_prox15$mean_scaled - min(na.omit(summary_prox15$mean_scaled)))/(max(na.omit(summary_prox15$mean_scaled))-min(na.omit(summary_prox15$mean_scaled)))))
173 | 
174 | 
175 | summary_df <- rbind(summary_coloc,summary_adj,summary_prox5,summary_prox8,summary_prox15)
176 | 
177 | pairs_mean <- sapply(unique(summary_df$pair),function(p){
178 |   p_df <- summary_df[summary_df$pair == p,]
179 |   return(mean(na.omit(p_df$mean_scaled)))
180 | })
181 | 
182 | summary_df$pair_mean <- sapply(summary_df$pair, function(p){
183 |   return(pairs_mean[p])
184 | })
185 | 
186 | summary_df$pattern <- ifelse(summary_df$pair_mean > 0.35, "connected",
187 |                              ifelse(summary_df$pair_mean < -0.35, "dis connected","mix"))
188 | 
189 | dis_summary <- summary_df
190 | 
191 | # st vs dis strongest couplings ---------------------------------------------------------------
192 | 
193 | 
194 | dim(st_summary[st_summary$pattern == "connected" & st_summary$analysis %in% c("coloc", "adj", "prox5") & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2,])
195 | t_st <- table(as.character(st_summary$pair[st_summary$pattern == "connected" & st_summary$analysis %in% c("coloc", "adj") & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2]))
196 | colocOadj_st <- names(t_st)
197 | colocPadj_st <- names(t_st[t_st > 1])
198 | t_st2 <- table(as.character(st_summary$pair[st_summary$pattern == "connected" & st_summary$analysis %in% c("prox5","prox8","prox15") & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2]))
199 | prox_st <- names(t_st2)
200 | st_pairs <- unique(c(colocPadj_st, intersect(colocOadj_st,prox_st)))
201 | 
202 | 
203 | dim(dis_summary[dis_summary$pattern == "connected" & dis_summary$analysis %in% c("coloc", "adj", "prox5") & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2,])
204 | t_dis <- table(as.character(dis_summary$pair[dis_summary$pattern == "connected" & dis_summary$analysis %in% c("coloc", "adj") & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2]))
205 | colocOadj_dis <- names(t_dis)
206 | colocPadj_dis <- names(t_dis[t_dis > 1])
207 | t_dis2 <- table(as.character(dis_summary$pair[dis_summary$pattern == "connected" & dis_summary$analysis %in% c("prox5","prox8","prox15") & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2]))
208 | prox_dis <- names(t_dis2)
209 | dis_pairs <- unique(c(colocPadj_dis, intersect(colocOadj_dis,prox_dis)))
210 | 
211 | 
212 | 
213 | 
214 | # st vs dis strong coupling by scale  -------------------------------------
215 | 
216 | 
217 | prox_st <- readRDS("Summary_tables/prox_st.rds")
218 | summary_prox10 <- data.frame(pair = pairs_names,
219 |                              analysis = rep("prox10", 91),
220 |                              prop = prox_st[,10]$prop,
221 |                              mean_scaled = prox_st[,10]$mean_score)
222 | 
223 | summary_prox10$mean_scaled <- ifelse(summary_prox10$mean_scaled < 0,
224 |                                      ((summary_prox10$mean_scaled - min(na.omit(summary_prox10$mean_scaled)))/(max(na.omit(summary_prox10$mean_scaled))-min(na.omit(summary_prox10$mean_scaled)))) - 1,
225 |                                      ((summary_prox10$mean_scaled - min(na.omit(summary_prox10$mean_scaled)))/(max(na.omit(summary_prox10$mean_scaled))-min(na.omit(summary_prox10$mean_scaled)))))
226 | 
227 | 
228 | summary_prox12 <- data.frame(pair = pairs_names,
229 |                              analysis = rep("prox12", 91),
230 |                              prop = prox_st[,12]$prop,
231 |                              mean_scaled = prox_st[,12]$mean_score)
232 | 
233 | summary_prox12$mean_scaled <- ifelse(summary_prox12$mean_scaled < 0,
234 |                                      ((summary_prox12$mean_scaled - min(na.omit(summary_prox12$mean_scaled)))/(max(na.omit(summary_prox12$mean_scaled))-min(na.omit(summary_prox12$mean_scaled)))) - 1,
235 |                                      ((summary_prox12$mean_scaled - min(na.omit(summary_prox12$mean_scaled)))/(max(na.omit(summary_prox12$mean_scaled))-min(na.omit(summary_prox12$mean_scaled)))))
236 | 
237 | st_coloc  <- length(st_summary$pair[st_summary$analysis == "coloc" & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2])
238 | st_adj  <- length(st_summary$pair[st_summary$analysis == "adj" & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2])
239 | st_p8  <- length(st_summary$pair[st_summary$analysis == "prox8" & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2])
240 | st_p10  <- length(summary_prox10$pair[summary_prox10$analysis == "prox10" & summary_prox10$mean_scaled > 0.35 & summary_prox10$prop >= 0.2])
241 | st_p12  <- length(summary_prox12$pair[summary_prox12$analysis == "prox12" & summary_prox12$mean_scaled > 0.35 & summary_prox12$prop >= 0.2])
242 | st_p15  <- length(st_summary$pair[st_summary$analysis == "prox15" & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2])
243 | 
244 | 
245 | prox_dis <- readRDS("Summary_tables/prox_dis.rds")
246 | summary_prox10 <- data.frame(pair = pairs_names,
247 |                              analysis = rep("prox10", 91),
248 |                              prop = prox_dis[,10]$prop,
249 |                              mean_scaled = prox_dis[,10]$mean_score)
250 | 
251 | summary_prox10$mean_scaled <- ifelse(summary_prox10$mean_scaled < 0,
252 |                                      ((summary_prox10$mean_scaled - min(na.omit(summary_prox10$mean_scaled)))/(max(na.omit(summary_prox10$mean_scaled))-min(na.omit(summary_prox10$mean_scaled)))) - 1,
253 |                                      ((summary_prox10$mean_scaled - min(na.omit(summary_prox10$mean_scaled)))/(max(na.omit(summary_prox10$mean_scaled))-min(na.omit(summary_prox10$mean_scaled)))))
254 | 
255 | 
256 | summary_prox12 <- data.frame(pair = pairs_names,
257 |                              analysis = rep("prox12", 91),
258 |                              prop = prox_dis[,12]$prop,
259 |                              mean_scaled = prox_dis[,12]$mean_score)
260 | 
261 | summary_prox12$mean_scaled <- ifelse(summary_prox12$mean_scaled < 0,
262 |                                      ((summary_prox12$mean_scaled - min(na.omit(summary_prox12$mean_scaled)))/(max(na.omit(summary_prox12$mean_scaled))-min(na.omit(summary_prox12$mean_scaled)))) - 1,
263 |                                      ((summary_prox12$mean_scaled - min(na.omit(summary_prox12$mean_scaled)))/(max(na.omit(summary_prox12$mean_scaled))-min(na.omit(summary_prox12$mean_scaled)))))
264 | 
265 | 
266 | dis_coloc  <- length(dis_summary$pair[dis_summary$analysis == "coloc" & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2])
267 | dis_adj  <- length(dis_summary$pair[dis_summary$analysis == "adj" & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2])
268 | dis_p8  <- length(dis_summary$pair[dis_summary$analysis == "prox8" & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2])
269 | dis_p10  <- length(summary_prox10$pair[summary_prox10$analysis == "prox10" & summary_prox10$mean_scaled > 0.35 & summary_prox10$prop >= 0.2])
270 | dis_p12  <- length(summary_prox12$pair[summary_prox12$analysis == "prox12" & summary_prox12$mean_scaled > 0.35 & summary_prox12$prop >= 0.2])
271 | dis_p15  <- length(dis_summary$pair[dis_summary$analysis == "prox15" & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2])
272 | 
273 | coupling_by_scale <- data.frame(num_coup = c(st_coloc,st_adj,st_p8,
274 |                                              dis_coloc,dis_adj,dis_p8),
275 |                                 region = c(rep("st",3),rep("dis",3)),
276 |                                 analysis = rep(c("coloc","adj","prox8"),2))
277 | 
278 | 
279 | coupling_by_scale$analysis <- factor(coupling_by_scale$analysis, levels = c("coloc","adj","prox8"))
280 | 
281 | ggplot(coupling_by_scale, aes(x=analysis, y=num_coup, group=region)) +
282 |   geom_line(aes(color=region))+
283 |   geom_point(aes(color=region))
284 | 
285 | st_p8_pairs  <- unique(st_summary$pair[st_summary$analysis %in% c("prox5","prox8","prox15") & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2])
286 | dis_p8_pairs  <- unique(dis_summary$pair[dis_summary$analysis %in% c("prox5","prox8","prox15") & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2])
287 | 
288 | 
289 | x <- list(
290 |   struct = st_p8_pairs, 
291 |   disorganised = dis_p8_pairs
292 | )
293 | 
294 | ggvenn(
295 |   x, 
296 |   fill_color = c("white", "white"),
297 |   stroke_size = 0.5, set_name_size = 4
298 | )
299 | 
300 | stdis_int <- intersect(st_p8_pairs,dis_p8_pairs)
301 | only_st <- st_p8_pairs[!(st_p8_pairs %in% stdis_int)]
302 | only_dis <- dis_p8_pairs[!(dis_p8_pairs %in% stdis_int)]
303 | 
304 | 
305 | 
306 | 


--------------------------------------------------------------------------------
/Module5_CNA.R:
--------------------------------------------------------------------------------
  1 | library(tidyverse)
  2 | library(scalop)
  3 | library(patchwork)
  4 | library(colorspace)
  5 | library(ggpubr)
  6 | library(Matrix)
  7 | library(biomaRt)
  8 | 
  9 | # Run functions first
 10 | 
 11 | # load data ---------------------------------------------------------------
 12 | 
 13 | 
 14 | # Load Data (counts mat exported from Seurat post-spot filtering:
 15 | 
 16 | samples_names <- (read.delim("general/GBM_samples.txt", header = FALSE))$V1
 17 | 
 18 | norm_brain_ref1<-readRDS("CNA/normal_brain_ref/UKF256_C.rds")
 19 | colnames(norm_brain_ref1) <- paste(colnames(norm_brain_ref1), "_ref1", sep="") 
 20 | 
 21 | norm_brain_ref2<-readRDS("CNA/normal_brain_ref/UKF265_C.rds")
 22 | colnames(norm_brain_ref2) <- paste(colnames(norm_brain_ref2), "_ref2", sep="") 
 23 | 
 24 | 
 25 | samples_regions <- read.delim("CNA/samples_regions.txt", header = TRUE)
 26 | 
 27 | 
 28 | # all cna + metadata --------------------------------------------------------
 29 | 
 30 | samples_num <- c(1:26)
 31 | all_cna <- mclapply(samples_num, all_cna_fx, mc.cores = 26)
 32 | 
 33 | all_cna_fx <- function(i){
 34 |   m1 <- readRDS(paste("general/exp_mats_GBM/", samples_names[i], "counts.rds", sep = ""))
 35 |   m <- cbind(m1,norm_brain_ref1,norm_brain_ref2)
 36 |   
 37 |   scaling_factor <- 1000000/colSums(m) #tumor mat processing
 38 |   m_CPM <- sweep(m, MARGIN = 2, STATS = scaling_factor, FUN = "*")
 39 |   m_loged <- log2(1 + (m_CPM/10))
 40 |   var_filter <- apply(m_loged, 1, var) # removing genes with zero variance across all cells
 41 |   m_proc <- m_loged[var_filter != 0, ]
 42 |   exp_genes <- rownames(m_proc)[(rowMeans(m_proc) > 0.4)] # filtering out lowly expressed genes
 43 |   m_proc <- m_proc[exp_genes, ]
 44 |   
 45 |   ref <-list(colnames(norm_brain_ref1), colnames(norm_brain_ref2))
 46 |   query <- colnames(m_proc)[!is.element(colnames(m_proc), unname(unlist(ref)))]
 47 |   
 48 |   # create metadata table 
 49 |   metadata <- tibble(CellID = colnames(m_proc), 
 50 |                      org_ref = ifelse(colnames(m_proc) %in% unname(unlist(ref)), "reference", "not reference"))
 51 |   
 52 |   metadata$sample <- "control"
 53 |   
 54 |   metadata$sample <- ifelse(metadata$org_ref == "not reference", samples_names[i], metadata$sample)
 55 |   
 56 |   
 57 |   # calc CNA
 58 |   hg19<- readRDS("CNA/hg38.rds")
 59 |   cna_score_mat <- calc_cna(matrix = m_proc, query = query, ref = ref, genome = hg19, range = c(-3,3), window = 150, noise = 0.15, isLog = TRUE, per_chr = TRUE, scale = NULL, top_genes = NULL, verbose = TRUE)
 60 |   sig_and_cor <- cna_sig_cor(cna_score_mat, epi_cells = query, ref_cells = ref, cna_sig = "abs", top_region = 1/3, top_cells = 1/3)
 61 |   metadata$CNAsig_org <- sig_and_cor$cna_signal[match(metadata$CellID, names(sig_and_cor$cna_signal))]
 62 |   metadata$CNAcor_org <- sig_and_cor$cna_correlation[match(metadata$CellID, names(sig_and_cor$cna_correlation))]
 63 |   
 64 |   # update reference with combined external ref + tumor defined by cna sig / corr thresholds
 65 |   metadata$new_ref <- ifelse((metadata$CNAcor_org <= 0.25 & metadata$CNAsig_org <= 0.13) | (metadata$org_ref == "reference"), "reference", "query")
 66 |   
 67 |   ref <- metadata$CellID[metadata$new_ref == "reference"]
 68 |   query <- colnames(m_proc)[!is.element(colnames(m_proc), unname(unlist(ref)))]
 69 |   
 70 |   # calc CNA with updated ref
 71 |   cna_score_mat <- calc_cna(matrix = m_proc, query = query, ref = ref, genome = hg19, range = c(-3,3), window = 150, noise = 0.15, isLog = TRUE, per_chr = TRUE, scale = NULL, top_genes = NULL, verbose = TRUE)
 72 |   sig_and_cor <- cna_sig_cor(cna_score_mat, epi_cells = query, ref_cells = ref, cna_sig = "abs", top_region = 1/3, top_cells = 1/3)
 73 |   metadata$CNAsig <- sig_and_cor$cna_signal[match(metadata$CellID, names(sig_and_cor$cna_signal))]
 74 |   metadata$CNAcor <- sig_and_cor$cna_correlation[match(metadata$CellID, names(sig_and_cor$cna_correlation))]
 75 |   
 76 |   metadata$CNAtot <- metadata$CNAcor + (metadata$CNAsig * max(na.omit(metadata$CNAcor)) / max(na.omit(metadata$CNAsig)))
 77 |   
 78 |   fin_cna <- cna_score_mat[,metadata$CellID[metadata$org_ref == "not reference"]]
 79 |   md <- metadata[metadata$org_ref == "not reference",]
 80 |   return(list(md,fin_cna))
 81 |   
 82 | }
 83 | 
 84 | 
 85 | # plot combined CNA ----------------------------------------------------------------
 86 | 
 87 | 
 88 | c1 <- as.data.frame(all_cna[[1]][[2]])
 89 | c2 <- as.data.frame(all_cna[[2]][[2]])
 90 | 
 91 | genes_cna <- intersect(row.names(c1),row.names(c2))
 92 | 
 93 | sapply(c(3:26), function(i){
 94 |   ci <- as.data.frame(all_cna[[i]][[2]])
 95 |   genes_cna <<- intersect(genes_cna, row.names(ci))
 96 | })
 97 | 
 98 | set.seed(50)
 99 | samp_spots <- sapply(c(1:26), function(i){
100 |   m <- all_cna[[i]][[1]]
101 |   return(sample(m$CellID[m$sample == samples_names[i]],700, replace = F))
102 | })
103 | 
104 | cna_comb <- c1[genes_cna,as.character(samp_spots[,1])]
105 | colnames(cna_comb) <- paste(samples_names[1], colnames(cna_comb), sep = "_")
106 | 
107 | sapply(c(2:26), function(i){
108 |   ci_temp <- as.data.frame(all_cna[[i]][[2]])
109 |   ci <- ci_temp[genes_cna,as.character(samp_spots[,i])]
110 |   colnames(ci) <- paste(samples_names[i], colnames(ci), sep = "_")
111 |   cna_comb <<- cbind(cna_comb, ci)
112 | })
113 | 
114 | 
115 | spots_list <- lapply(c(1:26), function(i){
116 |   change_spots <- paste(samples_names[i], samp_spots[,i], sep = "_")
117 |   return(change_spots)
118 | })
119 | 
120 | names(spots_list) <- samples_names
121 | 
122 | #find_chr <- genome_break(row.names(cna_comb))
123 | sig7 <- apply(cna_comb[1772:2006,],2,mean)
124 | sig10 <- apply(cna_comb[2363:2538,],2,mean)
125 | 
126 | spots_ord <- sapply(spots_list, function(sp){
127 |   return(mean(sig7[sp])+abs(mean(sig10[sp])))
128 | })
129 | names(spots_ord) <- names(spots_list)
130 | 
131 | spots_list <- spots_list[names(sort(spots_ord))]
132 | 
133 | cna_pl <- infercna::cnaPlot(cna_comb, limit=c(-0.7,0.7), order.cells = spots_list, ratio = NULL)   # plot cna - important that the ratio is set to NULL!
134 | 
135 | cna_pl$p + theme(legend.position = "right")
136 | 
137 | 
138 | 
139 | # cna merged ---------------------------------------------------------------
140 | 
141 | samples_names <- (read.delim("CNA/cna_samples.txt", header = FALSE))$V1
142 | 
143 | samples_num <- c(15:18)
144 | merge_cna <- mclapply(samples_num, merge_cna_md_fx, mc.cores = 4)
145 | 
146 | merge_cna_md_fx <- function(i){
147 |   m1 <- readRDS(paste("general/exp_mats_GBM/", samples_names[i], "counts.rds", sep = ""))
148 |   m <- cbind(m1,norm_brain_ref1,norm_brain_ref2)
149 |   
150 |   scaling_factor <- 1000000/colSums(m) #tumor mat processing
151 |   m_CPM <- sweep(m, MARGIN = 2, STATS = scaling_factor, FUN = "*")
152 |   m_loged <- log2(1 + (m_CPM/10))
153 |   var_filter <- apply(m_loged, 1, var) # removing genes with zero variance across all cells
154 |   m_proc <- m_loged[var_filter != 0, ]
155 |   exp_genes <- rownames(m_proc)[(rowMeans(m_proc) > 0.4)] # filtering out lowly expressed genes
156 |   m_proc <- m_proc[exp_genes, ]
157 |   
158 |   # create normal ref
159 |   ref <-list(colnames(norm_brain_ref1), colnames(norm_brain_ref2))
160 |   query <- colnames(m_proc)[!is.element(colnames(m_proc), unname(unlist(ref)))]
161 |   
162 |   # create metadata table 
163 |   
164 |   
165 |   metadata <- tibble(CellID = colnames(m_proc), 
166 |                      org_ref = ifelse(colnames(m_proc) %in% unname(unlist(ref)), "reference", "not reference"))
167 |   
168 |   metadata$sample <- "control"
169 |   metadata$region <- "control"
170 |   
171 |   if (str_detect(samples_names[i], "merge")) {
172 |     sapply(c(1:length(samples_regions$sample[samples_regions$cna_samples_name == samples_names[i]])), function(s){
173 |       s_name <- samples_regions$sample[samples_regions$cna_samples_name == samples_names[i]][s]
174 |       s_region <- samples_regions$region[samples_regions$cna_samples_name == samples_names[i]][s]
175 |       metadata$sample <<- ifelse(str_detect(metadata$CellID, s_name), s_name, metadata$sample)
176 |       metadata$region <<- ifelse(str_detect(metadata$CellID, s_name), s_region, metadata$region)
177 |     })
178 |   } else {
179 |     metadata$sample <- ifelse(metadata$org_ref == "not reference", samples_regions$sample[samples_regions$cna_samples_name == samples_names[i]], metadata$sample)
180 |     metadata$region <- ifelse(metadata$org_ref == "not reference", samples_regions$region[samples_regions$cna_samples_name == samples_names[i]], metadata$region)
181 |     
182 |   }
183 |   
184 |   
185 |   # calc CNA
186 |   hg19<- readRDS("CNA/hg38.rds")
187 |   cna_score_mat <- calc_cna(matrix = m_proc, query = query, ref = ref, genome = hg19, range = c(-3,3), window = 150, noise = 0.15, isLog = TRUE, per_chr = TRUE, scale = NULL, top_genes = NULL, verbose = TRUE)
188 |   sig_and_cor <- cna_sig_cor(cna_score_mat, epi_cells = query, ref_cells = ref, cna_sig = "abs", top_region = 1/3, top_cells = 1/3)
189 |   metadata$CNAsig_org <- sig_and_cor$cna_signal[match(metadata$CellID, names(sig_and_cor$cna_signal))]
190 |   metadata$CNAcor_org <- sig_and_cor$cna_correlation[match(metadata$CellID, names(sig_and_cor$cna_correlation))]
191 |   
192 |   # update reference with combined external ref + tumor defined by cna sig / corr thresholds
193 |   metadata$new_ref <- ifelse((metadata$CNAcor_org <= 0.25 & metadata$CNAsig_org <= 0.13) | (metadata$org_ref == "reference"), "reference", "query")
194 |   
195 |   ref <- metadata$CellID[metadata$new_ref == "reference"]
196 |   query <- colnames(m_proc)[!is.element(colnames(m_proc), unname(unlist(ref)))]
197 |   
198 |   # calc CNA with updated ref
199 |   cna_score_mat <- calc_cna(matrix = m_proc, query = query, ref = ref, genome = hg19, range = c(-3,3), window = 150, noise = 0.15, isLog = TRUE, per_chr = TRUE, scale = NULL, top_genes = NULL, verbose = TRUE)
200 |   sig_and_cor <- cna_sig_cor(cna_score_mat, epi_cells = query, ref_cells = ref, cna_sig = "abs", top_region = 1/3, top_cells = 1/3)
201 |   metadata$CNAsig <- sig_and_cor$cna_signal[match(metadata$CellID, names(sig_and_cor$cna_signal))]
202 |   metadata$CNAcor <- sig_and_cor$cna_correlation[match(metadata$CellID, names(sig_and_cor$cna_correlation))]
203 |   
204 |   metadata$CNAtot <- metadata$CNAcor + (metadata$CNAsig * max(metadata$CNAcor) / max(metadata$CNAsig))
205 |   
206 |   fin_cna <- cna_score_mat[,metadata$CellID[metadata$org_ref == "not reference"]]
207 |   md <- metadata[metadata$org_ref == "not reference",]
208 |   
209 |   return(list(md,fin_cna))
210 | }
211 | # malignancy level and binning ------------------------------------------------------------------
212 | 
213 | 
214 | all_tot <- sapply(c(1:14), function(i){
215 |   md <- all_cna[[i]][[1]]
216 |   return(md$CNAtot)
217 | })
218 | names(all_tot) <- samples_names[1:14]
219 | 
220 | all_tot_merge <- sapply(c(1:4), function(i){
221 |   md <- merge_cna[[i]][[1]]
222 |   return(md$CNAtot)
223 | })
224 | names(all_tot_merge) <- samples_names[15:18]
225 | 
226 | hist(c(unlist(all_tot), unlist(all_tot_merge)), breaks = 150)
227 | 
228 | all_tot_split_tmp <- sapply(names(all_tot_merge), function(m){
229 |   tmp <- all_tot_merge[[m]]
230 |   split_samps <- samples_regions$sample[samples_regions$cna_samples_name == m]
231 |   split_scores <- sapply(split_samps, function(s){
232 |     tmp_split <- tmp[grepl(s,names(tmp))]
233 |     names(tmp_split) <- sapply(strsplit(names(tmp_split),"_"),function(ns){ns[2]}) 
234 |     return(tmp_split)
235 |   })
236 |   return(split_scores)
237 | })
238 | 
239 | all_tot_split <- unlist(all_tot_split_tmp, recursive = F)
240 | names(all_tot_split) <- sapply(strsplit(names(all_tot_split),"\\."),function(ns){ns[2]})
241 | 
242 | sample_ls <- (read.delim("general/GBM_samples.txt", header = FALSE))$V1
243 | mal_lev_score <- c(all_tot,all_tot_split)
244 | mal_lev_score <- mal_lev_score[sample_ls]
245 | 
246 | min_vl <- min(unlist(mal_lev_score))
247 | max_vl <- max(unlist(mal_lev_score))
248 | 
249 | mal_lev_score_scaled <- sapply(mal_lev_score, function(p){
250 |   p_sc <- (p - min_vl)/(max_vl-min_vl)
251 |   return(p_sc)
252 | })
253 | 
254 | 
255 | # malignancy binning ------------------------------------
256 | 
257 | hist(c(mal_lev_score_scaled[[15]], mal_lev_score_scaled[[23]]), breaks = 100)
258 | mal_lev_th <- quantile(c(mal_lev_score_scaled[[15]], mal_lev_score_scaled[[23]]))
259 | 
260 | malignancy_bin <- sapply(mal_lev_score_scaled, function(p){
261 |   bin <- ifelse(p < mal_lev_th[2] , "non_malignant", 
262 |                 ifelse(p >= mal_lev_th[2] & p < mal_lev_th[3], "mix_low", 
263 |                        ifelse(p >= mal_lev_th[3]  & p < mal_lev_th[4],"mix_high","malignant")))
264 |   return(bin)
265 | })
266 | 
267 | 
268 | 
269 | 
270 | ######## Functions ---------------------------------------------------------------
271 | # CNA Score Calculation ---------------------------------------------------
272 | 
273 | 
274 | #### calc_cna - calculate CNA scores per cell/spot given a gene x cell matrix, query cells and reference cell list/vector
275 | # NOTE: matrix should NOT be row(gene)-centered
276 | calc_cna <- function(matrix,
277 |                      query,
278 |                      ref,
279 |                      top_genes = NULL,
280 |                      window = 100,
281 |                      range = c(-3, 3),
282 |                      per_chr = TRUE,
283 |                      scale = 0.05,
284 |                      noise = NULL,
285 |                      isLog = FALSE,
286 |                      genome = NULL,
287 |                      min_ref_leng = 10,
288 |                      verbose = FALSE){
289 |   
290 |   if(all(round(range(rowMeans(matrix)), 3) == 0)) {
291 |     stop(print("Matrix is row-centered. Please provide non-centered data."))
292 |   }
293 |   if(is.list(ref)){
294 |     # Remove small references
295 |     ref_leng <- lapply(ref, length)
296 |     ref <- ref[ref_leng >= min_ref_leng]
297 |     # Prepare CNA matrix to work on
298 |     cna_mat <- matrix[, c(query, unlist(ref))]
299 |   }
300 |   else if(!is.list(ref)){
301 |     if(length(ref) < min_ref_leng){
302 |       stop(print("There are not enough reference cells!"))
303 |     }
304 |     cna_mat <- matrix[, c(query, ref)]
305 |   }
306 |   
307 |   # Order chromosomes
308 |   if (verbose) message("Ordering the genes by their genomic position.")
309 |   order_genes <- genome_sort(rownames(cna_mat), genome = genome)
310 |   genes <- order_genes$cna_genes
311 |   chr_breaks <- order_genes$chr_breaks
312 |   
313 |   # Optional list of top expressed genes
314 |   if(!is.null(top_genes)){
315 |     if(verbose) message("Filtering the expression matrix to include only top ", length(top_genes), "genes...")
316 |     if(isTRUE(isLog)){
317 |       cna_mat <- un_log(cna_mat)}
318 |     cna_mat <- cna_mat[genes, ]
319 |     cna_mat <- apply(cna_mat, 1, mean)
320 |     cna_mat <- names(tail(sort(cna_mat), n = top_genes))
321 |     order_genes <- genome_sort(cna_mat, genome = genome)
322 |     genes <- order_genes$cna_genes
323 |     chr_breaks <- order_genes$chr_breaks
324 |   }  
325 |   
326 |   # Reorder
327 |   ordered_mat <- cna_mat[genes, ]
328 |   # Log before first centering
329 |   if(isFALSE(isLog)){
330 |     ordered_mat <- log_norm(ordered_mat)}
331 |   # First row centering step
332 |   if (verbose) message("Performing mean-centering of the genes.")
333 |   avg <- apply(ordered_mat, 1, mean)
334 |   ordered_mat <- sweep(ordered_mat, 1, avg)
335 |   # Set 3 and -3 as extreme values (as set by the argument "range")
336 |   if (verbose) message("Restricting expression matrix values to between ", range[[1]], " and ", range[[2]], ".")
337 |   ordered_mat <- apply(ordered_mat, 2, function(x) pmax(x, range[1]))
338 |   ordered_mat <- apply(ordered_mat, 2, function(x) pmin(x, range[2]))
339 |   # Unlog to CPM/TPM again for moving average
340 |   ordered_mat <- un_log(ordered_mat)
341 |   
342 |   # Calculate moving average per chromosome by window of 100 (as set by the argument "window")
343 |   if(isTRUE(per_chr)){
344 |     if (verbose) message("Calculating rolling means with a window size of ", window, " genes, on each chromosome in turn.")
345 |     num <- seq(1:(length(chr_breaks) -1))
346 |     perchr <- lapply(num, function(y){
347 |       if(y == length(num)){
348 |         end <- nrow(ordered_mat)
349 |       }
350 |       if(y != length(num)){
351 |         end <- chr_breaks[y + 1] - 1
352 |       }
353 |       chr <- ordered_mat[chr_breaks[y]:end, ]
354 |       chr_mat <- apply(chr, 2, function(x) caTools::runmean(x, k = window, endrule = "mean"))
355 |     })
356 |     calc_mat <- do.call(rbind, perchr)  
357 |   }
358 |   # Calculate moving average for all genes as one chromosome
359 |   if(isFALSE(per_chr)){
360 |     if (verbose) message("Calculating rolling means with a window size of ", window, " genes, on all genes as one chromosome.")
361 |     calc_mat <- apply(ordered_mat, 2, function(x) caTools::runmean(x, k = window, endrule = "mean"))
362 |   }
363 |   
364 |   # Log before second centering
365 |   if (verbose) message("Converting CNA score values to log(2) space.")
366 |   calc_mat <- log_norm(calc_mat)
367 |   # Substract median per cell
368 |   if (verbose) message("Performing median-centering of the cells.")
369 |   cell_med <- apply(calc_mat, 2, median)
370 |   calc_mat <- sweep(calc_mat, 2, cell_med)
371 |   # Unlog to CPM/TPM again for reference removal
372 |   calc_mat <- un_log(calc_mat)
373 |   
374 |   # Create max/min values per gene from reference cells
375 |   if(is.list(ref)){
376 |     ref_leng <- seq(1, length(ref))
377 |     mean_mat <- lapply(ref_leng, function(x){
378 |       idx_ref <- ref[[x]]
379 |       m1 <- apply(calc_mat[, idx_ref], 1, mean)
380 |       m1
381 |     })
382 |     ref_mat <- do.call(cbind, mean_mat)
383 |     # Log references
384 |     ref_mat <- log_norm(ref_mat)
385 |     ref_max <- apply(ref_mat, 1, max)
386 |     ref_min <- apply(ref_mat, 1, min)
387 |   }
388 |   else if(!is.list(ref)){
389 |     ref_mat <- apply(calc_mat[, ref], 1, mean)
390 |     ref_mat <- log_norm(ref_mat)
391 |     ref_max <- ref_mat
392 |     ref_min <- ref_mat
393 |   }
394 |   
395 |   # Expand reference boundaries by scaling percentage
396 |   if(!is.null(scale)){
397 |     rmax <- ref_max + scale * abs(ref_max)
398 |     rmin <- ref_min - scale * abs(ref_min)
399 |   }
400 |   # Or expand by fixed noise factor
401 |   if(!is.null(noise)){
402 |     rmax <- ref_max + noise
403 |     rmin <- ref_min - noise 
404 |   }
405 |   
406 |   # Log CNA matrix
407 |   calc_mat <- log_norm(calc_mat)
408 |   # Centre by reference
409 |   if (verbose) message("Correcting CNA profiles using CNA values from reference cells.")
410 |   score_mat <- ifelse(calc_mat > rmax, calc_mat - rmax,
411 |                       ifelse(calc_mat < rmin, calc_mat - rmin, 0))
412 |   rownames(score_mat) <- rownames(ordered_mat)
413 |   
414 |   if (verbose) message("Done!")
415 |   return(score_mat)
416 | }
417 | 
418 | 
419 | 
420 | # CNA Matrix Subclones ----------------------------------------------------
421 | 
422 | 
423 | #### Dived the CNA matrix to subclones by reducing feature dimension and clustering with Louvain:
424 | spatial_subclones <- function(cna_matrix,
425 |                               epi_cells,
426 |                               separate = c("arm", "chr"),
427 |                               genome = NULL,
428 |                               genecut = 10,
429 |                               top_region = 1/3,
430 |                               top_method = c("abs", "sd"),
431 |                               reduction_dims = 30,
432 |                               cluster_method = c("louvain", "dbscan"),
433 |                               cluster_k = 10,
434 |                               dbs_minpts = NULL,
435 |                               dbs_ptscale = NULL,
436 |                               diffcut = 10){
437 |   
438 |   ## Subset by chromosome
439 |   breaks <- genome_break(genes = rownames(cna_matrix), separate = separate,
440 |                          genecut = genecut, genome = genome)
441 |   break_idx <- breaks$break_idx
442 |   labels <- breaks$breaks_df$labels
443 |   dispose <- breaks$dispose
444 |   if(length(dispose) != 0) {
445 |     matrix <- cna_matrix[-dispose, ]
446 |   } else {matrix <- cna_matrix}
447 |   break_num <- unique(break_idx)
448 |   
449 |   ## Set working matrix
450 |   matrix <- matrix[, epi_cells] 
451 |   names(break_idx) <- rownames(matrix)
452 |   
453 |   ## Select the most CNA-rich regions
454 |   if(top_method == "abs"){
455 |     abs_vals <- apply(matrix, 1, function(x) mean(abs(x)))
456 |     mat <- matrix[abs_vals > quantile(abs_vals, probs = 1 - top_region), ]
457 |   }
458 |   ## or alternatively - Select the most variant gene regions  
459 |   if(top_method == "sd"){
460 |     stand_dev <- apply(matrix, 1, sd)
461 |     mat <- matrix[stand_dev > quantile(stand_dev, probs = 1 - top_region), ]
462 |   }
463 |   transpose_mat <- as.matrix(t(mat))
464 |   
465 |   # Dimred
466 |   pca <- prcomp(transpose_mat, center = FALSE, scale. = FALSE)
467 |   dim_red_mat <- pca$x[, 1:reduction_dims]
468 |   rownames(dim_red_mat) <- colnames(matrix)
469 |   
470 |   ## Clustering methods:
471 |   if(cluster_method == "louvain"){
472 |     clusters <- cluster_coord(dim_red_mat, method = "louvain", louvain = cluster_k)
473 |   }
474 |   if(cluster_method == "dbscan"){
475 |     if(is.null(dbs_minpts) & is.null(dbs_ptscale)) {minpts <- log(nrow(trans_mat3))}
476 |     if(!is.null(dbs_ptscale)) {minpts <- dbs_ptscale * log(nrow(trans_mat3))}
477 |     if(!is.null(dbs_minpts)) {minpts <- dbs_minpts}
478 |     clusters <- cluster_coord(dim_red_mat, method = "dbscan", min_pts = minpts, diffcut = diffcut)
479 |     
480 |     clusters <- dbscan.cluster(trans_mat3, min_pts = minpts, diffcut = diffcut)
481 |   }
482 |   
483 |   names(clusters) <- colnames(matrix)
484 |   return(clusters)
485 | }
486 | 
487 | 
488 | 
489 | # Arrange Genes by Chromosomes --------------------------------------------
490 | 
491 | 
492 | #### genome_sort - with gene list as input, reorder genes by chromosome  position and get breaks for chromosomes and arms
493 | genome_sort <- function(genes, genome = NULL, attributes = c("hgnc_symbol", "start_position", "chromosome_name", "band"), downURL = "https://uswest.ensembl.org"){
494 |   
495 |   # Choose which species to use and server to download from
496 |   if(is.null(genome)){
497 |     mart <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = "hsapiens_gene_ensembl", host = downURL, port = 80)
498 |     #Get attributes for gene list
499 |     cna_attr <- biomaRt::getBM(attributes = attributes, filters = "hgnc_symbol", values = genes, mart = mart, uniqueRows = TRUE)
500 |   }
501 |   if(!is.null(genome)){
502 |     cna_attr <- as.data.frame(genome)
503 |     rownames(cna_attr) <- cna_attr[, 1]
504 |     cna_attr <- cna_attr[rownames(cna_attr) %in% genes, ]
505 |     colnames(cna_attr)[1] <- "hgnc_symbol"
506 |   }
507 |   
508 |   # Remove genes not mapped to proper chromosomes
509 |   chr_names <- c(seq(1:22), "X", "Y")
510 |   genes2rm <- setdiff(unique(cna_attr$chromosome_name), chr_names)
511 |   rmgenes <- lapply(genes2rm, function(x){
512 |     gene_drop <- cna_attr[grep(x, cna_attr$chromosome_name), ]
513 |   })
514 |   gene2rm <- do.call(rbind, rmgenes)
515 |   cna_attr <- cna_attr[!is.element(rownames(cna_attr), rownames(gene2rm)), ]
516 |   # Remove doubles
517 |   cna_attr <- cna_attr[!duplicated(cna_attr$hgnc_symbol), ]
518 |   # Remove NA's
519 |   cna_attr <- na.omit(cna_attr)
520 |   
521 |   # Change X and Y chromosomes to numbers for ordering
522 |   cna_attr$chromosome_name <- gsub("X", 23, cna_attr$chromosome_name)
523 |   cna_attr$chromosome_name <- gsub("Y", 24, cna_attr$chromosome_name)
524 |   cna_attr$chromosome_name <- as.numeric(cna_attr$chromosome_name)
525 |   # Order
526 |   cna_attr <- cna_attr[order(cna_attr$chromosome_name, cna_attr$start_position, decreasing = FALSE), ]
527 |   # Chromosome number as sorting vector
528 |   chr_names <- as.character(unique(cna_attr$chromosome_name))
529 |   # Chromosome length
530 |   chr_length <- lapply(chr_names, function(x){
531 |     l <- nrow(subset(cna_attr, cna_attr$chromosome_name == x))
532 |   })
533 |   chr_length <- do.call(rbind, chr_length)
534 |   # Break vector at each chromosome end
535 |   chr_breaks <- cumsum(chr_length)
536 |   # Set all genes as p or q
537 |   cna_attr$band <- gsub("[[:digit:]]", "", cna_attr$band)
538 |   cna_attr$band <- gsub("p.", "p", cna_attr$band)
539 |   cna_attr$band <- gsub("q.", "q", cna_attr$band)
540 |   # Chromosome arm lenghts
541 |   arm_breaks <- integer(length = length(chr_breaks))
542 |   for(i in seq_along(chr_breaks)){
543 |     starting <- chr_breaks[i]
544 |     ending <- nrow(cna_attr[cna_attr$chromosome_name == i & cna_attr$band == "q", ])
545 |     gap_length <- starting - ending
546 |     arm_breaks[i] <- round(gap_length, digits = 0)
547 |   }
548 |   
549 |   # Breaks and labels for arms and full chromosomes
550 |   full_breaks <- sort(c(1, chr_breaks, arm_breaks))
551 |   # Add p and q labels
552 |   q2q <- paste(seq(1, length(chr_breaks)), "q", sep = "")
553 |   p2p <- paste(seq(1, length(chr_breaks)), "p", sep = "")
554 |   full_labels <- sort(c(seq(1, length(chr_breaks)), seq(1, length(chr_breaks))))
555 |   full_labels[seq(2, length(full_labels), 2)] <- q2q
556 |   full_labels[seq(1, length(full_labels), 2)] <- p2p
557 |   # Empty label at end of Y chromosome
558 |   full_labels <- c(full_labels, " ")
559 |   # Name X and Y chromosomes
560 |   full_labels[45:48] <- c("Xp", "Xq", "Yp", "Yq")
561 |   chr_breaks <- c(1, chr_breaks)
562 |   chr_names[23:25] <- c("X", "Y", " ")
563 |   
564 |   output <- list(cna_genes = cna_attr$hgnc_symbol,
565 |                  chr_breaks = chr_breaks,
566 |                  chr_names = chr_names,
567 |                  arm_breaks = arm_breaks,
568 |                  full_breaks = full_breaks,
569 |                  full_labels = full_labels,
570 |                  all = cna_attr)
571 |   return(output)
572 | }
573 | 
574 | 
575 | #### Index and label genes by chromosomal location - chromosome number and arm
576 | 
577 | genome_break <- function(genes, genecut = 10, separate = "chr", genome = NULL){
578 |   
579 |   chr_sort <- genome_sort(genes, genome = genome)
580 |   genes <- chr_sort$cna_genes
581 |   
582 |   if(separate == "chr") {breaks = chr_sort$chr_breaks
583 |   labels = chr_sort$chr_names}
584 |   if(separate == "arm") {breaks = chr_sort$full_breaks
585 |   labels = chr_sort$full_labels}
586 |   if(length(breaks) != length(labels)) {labels = labels[1:length(breaks)]}
587 |   
588 |   # Breaks as factor
589 |   break_idx <- character(length = length(genes))
590 |   for(i in 1:(length(breaks) - 1)){
591 |     if(i == (length(breaks) - 1)) {end = length(genes)}
592 |     if(i != (length(breaks) - 1)) {end = breaks[i + 1]}
593 |     chr_leng <- end - breaks[i]
594 |     if(chr_leng < genecut & chr_leng > 0 & i != 1) {break_idx[breaks[i] + 1:end] = NA}
595 |     if(chr_leng < genecut & chr_leng > 0 & i == 1) {break_idx[breaks[i]:end] = NA}
596 |     if(chr_leng == 0 & i != 1) {break_idx[breaks[i] + 1] = NA}
597 |     if(chr_leng == 0 & i == 1) {break_idx[breaks[i]] = NA}
598 |     if(chr_leng > genecut & i != 1) {break_idx[breaks[i] + 1:end] = i}
599 |     if(chr_leng > genecut & i == 1) {break_idx[breaks[i]:end] = i}
600 |   }
601 |   
602 |   # Remove genes on chromosomes with too few genes
603 |   dispose <- which(is.na(break_idx))
604 |   break_idx <- na.omit(break_idx)
605 |   break_idx <- sort(as.numeric(break_idx))
606 |   genes <- chr_sort$cna_genes[-dispose]
607 |   names(break_idx) <- genes
608 |   
609 |   # Keep only labels with chromosomes that have enough genes
610 |   breaks_df <- cbind.data.frame(breaks, labels)
611 |   rownames(breaks_df) <- seq(1, nrow(breaks_df))
612 |   breaks_df <- breaks_df[unique(break_idx), ]
613 |   
614 |   out <- list(break_idx = break_idx, breaks_df = breaks_df, dispose = dispose)
615 |   return(out)
616 | }
617 | 
618 | 
619 | 
620 | 
621 | # Utility functions -------------------------------------------------------
622 | 
623 | 
624 | #### log and un_log functions:
625 | log_norm <- function(x, centre = FALSE){
626 |   logged <- log2(1 + (x/10))
627 |   if(isTRUE(centre)){
628 |     avg <- rowMeans(logged)
629 |     logged <- sweep(logged, 1, avg)
630 |   }
631 |   return(logged)
632 | }
633 | 
634 | un_log <- function(x){
635 |   tpmed <- 10 * (2^x - 1)
636 |   return(tpmed)
637 | }
638 | 
639 | 
640 | #### Clustering function and methods
641 | ## General cluster coordinate function
642 | cluster_coord <- function(matrix, method = c("dbscan", "louvain"), louvain = 25, ...){
643 |   if(method == "dbscan"){
644 |     clusters <- cluster_dbscan(matrix, ...)
645 |   }
646 |   if(method == "louvain"){
647 |     clusters <- cluster_louvain(matrix, k = louvain)
648 |   }
649 |   
650 |   rename_clusts <- function(x){paste("cluster", x, sep = "")}
651 |   clusters <- rename_clusts(clusters)
652 |   names(clusters) <- rownames(matrix)
653 |   return(clusters)
654 | }
655 | 
656 | ## Louvain clustering
657 | cluster_louvain <- function(data, k) {
658 |   knn <- FNN::get.knn(as.matrix(data), k = k)
659 |   knn <- data.frame(from = rep(1:nrow(knn$nn.index), k), to = as.vector(knn$nn.index), weight = 1 / (1 + as.vector(knn$nn.dist)))
660 |   simple_igraph <- igraph::simplify(igraph::graph_from_data_frame(knn, directed = FALSE))
661 |   louvain_clusts <- igraph::cluster_louvain(simple_igraph)
662 |   clusters <- igraph::membership(louvain_clusts)
663 |   return (clusters)
664 | }
665 | 
666 | ## DBScan clustering
667 | cluster_dbscan <- function(matrix, min_pts = log(nrow(matrix)), probs = 3/4, diff_cut = 10){
668 |   dist <- dbscan::kNNdist(matrix, min_pts)
669 |   dist_cutoff <- quantile(sort(dist), probs = probs)
670 |   dist <- dist[dist > dist_cutoff]
671 |   dist <- dist[order(dist)]
672 |   dist <- dist / max(dist)
673 |   der_dist <- diff(dist) / (1 / length(dist))
674 |   knee <- dist[length(der_dist) - length(der_dist[der_dist > diff_cut])] + dist_cutoff
675 |   db_run <- dbscan::dbscan(matrix, eps = knee, minPts = min_pts)
676 |   clusters <- as.factor(db_run$cluster)
677 |   return(clusters)
678 | }
679 | 
680 | 
681 | #Dor Simkin
682 | #11:18 AM (0 minutes ago)
683 | #to me
684 | 
685 | #### Calculate CNAsignal & CNAcorrelation scores
686 | 
687 | cna_sig_cor <- function(cna_matrix,
688 |                         
689 |                         epi_cells,
690 |                         
691 |                         ref_cells,
692 |                         
693 |                         top_region = 1/3,
694 |                         
695 |                         top_cells = 1/3,
696 |                         
697 |                         cna_sig = "abs"){
698 |   
699 |   
700 |   
701 |   if(is.list(ref_cells)){ref_cells <- unlist(ref_cells)}
702 |   ref_cells <- ref_cells[ref_cells %in% colnames(cna_matrix)]
703 |   
704 |   if(!is.null(intersect(ref_cells, epi_cells))){
705 |     
706 |     rmcells <- intersect(ref_cells, epi_cells)
707 |     
708 |     ref_cells <- setdiff(ref_cells, rmcells)
709 |     
710 |   } 
711 |   
712 |   cna_matrix <- cna_matrix[, c(epi_cells, ref_cells)]
713 |   
714 |   all_cells <- colnames(cna_matrix)
715 |   
716 |   epi_matrix <- cna_matrix[, epi_cells]
717 |   
718 |   
719 |   
720 |   # Absolute CNA value per gene for all epithelial cells
721 |   
722 |   absvals <- apply(epi_matrix, 1, function(x) mean(abs(x)))
723 |   
724 |   # Top one-third of genes with highest absolute value (as set by the argument "top_region")
725 |   
726 |   top_abs <- ifelse(absvals > quantile(absvals, probs = 1 - top_region), absvals, 0)
727 |   
728 |   # Define matrices where to calculate cell averages - only the regions with most CNA
729 |   
730 |   score_cna <- cna_matrix[top_abs > 0, ]
731 |   
732 |   # CNA score for each tumor cell across relevant region
733 |   
734 |   if(cna_sig == "abs"){cna_score <- apply(score_cna, 2, function(x) mean(abs(x)))}
735 |   
736 |   if(cna_sig == "sqrt"){cna_score <- apply(score_cna, 2, function(x) mean(sqrt(x^2)))}
737 |   
738 |   if(cna_sig == "square"){cna_score <- apply(score_cna, 2, function(x) mean(x^2))}
739 |   
740 |   # Calculate correlation only with epithelial cells with strongest signal (defined by a cutoff set by the argument "top_cells")
741 |   
742 |   cellcut <- quantile(cna_score, probs = 1 - top_cells)
743 |   
744 |   epi_cna <- epi_matrix[top_abs > 0, cna_score[epi_cells] > cellcut]
745 |   
746 |   # Correlation vector
747 |   
748 |   cor_vec <- cor(score_cna, epi_cna)
749 |   
750 |   cna_cor <- apply(cor_vec, 1, mean)
751 |   
752 |   
753 |   
754 |   out <- list(cna_signal = cna_score,
755 |               
756 |               cna_correlation = cna_cor)
757 |   
758 |   return(out)
759 |   
760 | }
761 | 
762 | ## plotting function
763 | 
764 | 
765 | ggbar = function(colVar,
766 |                  legend_title = NULL,
767 |                  obs=NULL,
768 |                  dir=c('h','v'),
769 |                  cols = c('blue','red','orange','green','magenta')) {
770 |   
771 |   L = list(i = obs,col = colVar)
772 |   lens = sapply(L, length, simplify = F)
773 |   lens = lens[lens!=0]
774 |   lens = unlist(lens)
775 |   
776 |   stopifnot(length(lens)>0 & length(unique(lens))==1)
777 |   
778 |   len = unique(unlist(lens))
779 |   if (is.null(obs)) obs = 1:len
780 |   if (is.null(colVar)) colVar = rep('', len)
781 |   
782 |   dir = match.arg(dir)
783 |   d = data.frame(id = obs,
784 |                  colVar=colVar,
785 |                  stringsAsFactors=F)
786 |   
787 |   d = d %>% dplyr::mutate(id = factor(id, levels = unique(id)))
788 |   
789 |   if (dir=='h') {G = ggplot(d, aes(y=1,x=id,fill=colVar))}
790 |   else {G = ggplot(d, aes(y=id,x=1,fill=colVar))}
791 |   
792 |   
793 |   G + geom_tile() +
794 |     scale_fill_manual(values=cols, name = legend_title, guide = guide_legend(override.aes = list(size = 10))) +
795 |     theme_void() +
796 |     theme(legend.position='top',
797 |           plot.margin=margin(0,0,0,0,'cm')) 
798 | }


--------------------------------------------------------------------------------
/Module6_CODEX.R:
--------------------------------------------------------------------------------
  1 | # CODEX related code
  2 | # Load libraries, custom functions and levels  -----------------------------------------
  3 | ## Load libraries
  4 | library(ComplexHeatmap)
  5 | library(circlize)
  6 | library(viridis)
  7 | library(RColorBrewer)
  8 | library(ggraph)
  9 | library(scales)
 10 | library(ggpubr)
 11 | library(tidyverse)
 12 | #library(ggvoronoi)
 13 | 
 14 | ## Load custom functions and levels
 15 | Count_cell_type <- function(cell_type = cell_type_figure) {
 16 |   cells %>% count({{cell_type}}) %>% print(n=Inf)
 17 | }
 18 | 
 19 | `%ni%` <- Negate(`%in%`)
 20 | 
 21 | Create_aligned_pseudospots <- function(sample_codex="ZH916_INF", sample_visium="ZH916inf") {
 22 |   
 23 |   tmp_codex <- cells_aligned %>% 
 24 |     filter(sample %in% c({{sample_codex}})) %>% 
 25 |     filter(cell_type_tidy %ni% c("T_cell", "B_cell", "excluded"))
 26 |   
 27 |   # Create aligned pseudospots codex
 28 |   spot_distance <- scalef %>% 
 29 |     filter(sample == {{sample_visium}}) %>% 
 30 |     pull(scalef_hires)
 31 |   
 32 |   spot_diameter <- spot_distance*0.55
 33 |   
 34 |   ## Import spot grid
 35 |   spots <- visium_positions %>% 
 36 |     filter(sample == {{sample_visium}}) 
 37 |   
 38 |   ## Calculate distances between tmp and spot centers
 39 |   distances <- spots %>%
 40 |     expand_grid(cell_name = tmp_codex$cell_name) %>%
 41 |     left_join(tmp_codex, by = "cell_name") %>%
 42 |     mutate(distance = sqrt((spot_x - aligned_x)^2 + (spot_y - aligned_y)^2))
 43 |   
 44 |   ## Assign tmp_codex to spots based on distances
 45 |   assigned_spots <- distances %>%
 46 |     filter(distance <= spot_diameter / 2) %>%
 47 |     group_by(cell_name) %>%
 48 |     arrange(distance) %>%
 49 |     slice_head(n = 1)
 50 |   
 51 |   rm(distances)
 52 |   gc()
 53 |   
 54 |   ## Append spot names to the original tibble
 55 |   tmp_final <- tmp_codex %>%
 56 |     left_join(assigned_spots %>% dplyr::select(cell_name, spot_id), by = "cell_name") %>%
 57 |     mutate(spot_id = ifelse(is.na(spot_id), NA, spot_id))
 58 |   
 59 |   ## Calculate and plot dominant cell_type per spot
 60 |   tmp_dom <- tmp_final %>%
 61 |     filter(cell_type_tidy %ni% c("excluded")) %>%
 62 |     group_by(spot_id) %>% 
 63 |     count(cell_type_tidy) %>% 
 64 |     arrange(spot_id, desc(n)) %>% 
 65 |     mutate(n_total = sum(n)) %>% 
 66 |     mutate(fraction = n/sum(n)) %>% 
 67 |     filter(row_number()==1) %>% 
 68 |     left_join(spots %>% select(spot_x, spot_y, spot_id), by = c("spot_id")) %>%
 69 |     ungroup() %>% 
 70 |     distinct(.)
 71 |   
 72 |   return(tmp_dom)
 73 | }
 74 | 
 75 | Run_DEPs <- function(sample) {
 76 |   nmat_sample <- nmat[, cells %>% 
 77 |                         filter(sample %in% c({{sample}})) %>% 
 78 |                         filter(cell_type_figure %ni% c("excluded")) %>%
 79 |                         pull(cell_name)
 80 |   ]
 81 |   
 82 |   cells_sample <- cells %>% 
 83 |     filter(sample %in% c({{sample}})) %>%  
 84 |     filter(cell_type_figure %ni% c("excluded"))
 85 |   
 86 |   DEPs_hm_sample <-
 87 |     tibble(dim(nmat_sample)[1]:dim(nmat_sample)[1]) # Create df in which the DEG for each cluster will be inserted
 88 |   
 89 |   for (i in sort(unique(cells_sample$cell_type_figure))) {
 90 |     # i iterates all clusters
 91 |     top_DEP_value <-
 92 |       (rowMeans(nmat_sample[, which(i == cells_sample$cell_type_figure)]) - rowMeans(nmat_sample[, which(i != cells_sample$cell_type_figure)]))
 93 |     DEPs_hm_sample <-
 94 |       cbind(DEPs_hm_sample, top_DEP_value)   # select top gene names
 95 |   }
 96 |   
 97 |   DEPs_hm_sample <-
 98 |     DEPs_hm_sample[, -1] # Delete redundant first column
 99 |   colnames(DEPs_hm_sample) <-
100 |     paste0(sort(unique(cells_sample$cell_type_figure)), "_", {
101 |       {
102 |         sample
103 |       }
104 |     })
105 |   return(DEPs_hm_sample)
106 | }
107 | 
108 | Run_DEPs_hm <- function(sample, cell_types, marker) {
109 |   nmat <- nmat[{{marker}} ,cells %>% 
110 |                  filter(sample %in% {{sample}}) %>% 
111 |                  filter(cell_type_figure %in% {{cell_types}}) %>% 
112 |                  pull(cell_name) ]
113 |   
114 |   cells <- cells %>%
115 |     filter(sample %in% {{sample}}) %>%
116 |     filter(cell_type_figure %in% {{cell_types}})
117 |   
118 |   
119 |   DEPs_hm <- tibble(dim(nmat)[1]:dim(nmat)[1]) # Create df in which the DEG for each cluster will be inserted
120 |   
121 |   for (i in sort(unique(cells$cell_type_figure))) {    # i iterates all clusters
122 |     top_DEP_value <- (rowMeans(nmat[,which(i == cells$cell_type_figure) ]) - rowMeans(nmat[,which(i != cells$cell_type_figure)]))
123 |     DEPs_hm <- cbind(DEPs_hm, top_DEP_value)   # select top gene names
124 |   }
125 |   
126 |   DEPs_hm <- DEPs_hm[,-1] # Delete redundant first column
127 |   colnames(DEPs_hm) <- rep( x = paste(sort(unique(cells$cell_type_figure))))
128 |   
129 |   return(DEPs_hm)
130 | }
131 | 
132 | outersect <- function(x, y) {
133 |   sort(c(setdiff(x, y),
134 |          setdiff(y, x)))
135 | }
136 | 
137 | `%ni%` <- Negate(`%in%`)
138 | 
139 | Plot_tiles_9 <- function(sample = "ZH916T1") {
140 |   
141 |   mp_cons <- tibble(mp_cons = vspots$mp_cons %>% unique()) %>% 
142 |     drop_na()
143 |   
144 |   vis <- spots_visium_aligned %>% 
145 |     filter(spot_id_sample %in% intersect(spots_codex_aligned$spot_id_sample, spots_visium_aligned$spot_id_sample)) %>% 
146 |     filter(sample == {{sample}}) %>% 
147 |     select(spot_id, mp_cons, spot_x, spot_y)
148 |   
149 |   cod <- spots_codex_aligned %>% 
150 |     filter(spot_id_sample %in% intersect(spots_codex_aligned$spot_id_sample, spots_visium_aligned$spot_id_sample)) %>% 
151 |     filter(sample == {{sample}}) %>% 
152 |     mutate(mp_cons = mp_cons) %>%
153 |     select(spot_id, mp_cons, spot_x, spot_y)
154 |   
155 |   # Calculate the range of x and y coordinates
156 |   x_range <- range(vis$spot_x)
157 |   y_range <- range(vis$spot_y)
158 |   
159 |   # Define the size of the tiles (one-third of the range)
160 |   tile_size_x <- (x_range[2] - x_range[1]) / 3
161 |   tile_size_y <- (y_range[2] - y_range[1]) / 3
162 |   
163 |   # Initialize a list to store the 3x3 tiles
164 |   tiles_vis <- vector("list", length = 9)
165 |   
166 |   # Loop through each tile
167 |   for (i in 1:3) {
168 |     for (j in 1:3) {
169 |       # Define the boundaries of the current tile
170 |       x_min <- x_range[1] + (i - 1) * tile_size_x
171 |       x_max <- x_range[1] + i * tile_size_x
172 |       y_min <- y_range[1] + (j - 1) * tile_size_y
173 |       y_max <- y_range[1] + j * tile_size_y
174 |     }
175 |   }
176 |   
177 |   ggpubr::ggarrange(
178 |     ggplot(vis, aes(x=spot_x, y=spot_y, color = mp_cons)) + 
179 |       geom_point(size = 1, alpha = 1, shape = 16) + 
180 |       guides(colour = guide_legend(override.aes = list(size=2, alpha = 1))) +
181 |       scale_colour_manual(values = an_cols_mp) +
182 |       labs(color = "mp_cons", x = "", y = "") +
183 |       theme_classic() + 
184 |       geom_vline(xintercept = seq(x_range[1], x_range[2], length.out = 4)) +
185 |       geom_hline(yintercept = seq(y_range[1], y_range[2], length.out = 4)) +
186 |       scale_y_reverse() +
187 |       guides(color="none") +
188 |       theme(axis.text.x = element_text(angle = 45, hjust = 1))
189 |     ,
190 |     ggplot(cod, aes(x=spot_x, y=spot_y, color=mp_cons)) +
191 |       geom_point(size = 1, alpha=1, shape = 16) +
192 |       scale_colour_manual(values = an_cols_mp) +
193 |       guides(colour = guide_legend(override.aes = list(size=2, alpha = 1))) +
194 |       labs(color = "mp_cons", x = "", y = "") +
195 |       geom_vline(xintercept = seq(x_range[1], x_range[2], length.out = 4)) +
196 |       geom_hline(yintercept = seq(y_range[1], y_range[2], length.out = 4)) +
197 |       theme_classic() +
198 |       scale_y_reverse() +
199 |       guides(color="none") +
200 |       theme(axis.text.x = element_text(angle = 45, hjust = 1))
201 |     ,
202 |     nrow = 2
203 |   )
204 | }
205 | 
206 | Get_prop_9 <- function(sample="ZH1019inf", min_spots_per_tile = 50) {
207 |   
208 |   mp_cons <- tibble(mp_cons = vspots$mp_cons %>% unique()) %>% 
209 |     drop_na()
210 |   
211 |   vis <- spots_visium_aligned %>% 
212 |     filter(spot_id_sample %in% intersect(spots_codex_aligned$spot_id_sample, spots_visium_aligned$spot_id_sample)) %>% 
213 |     filter(sample == {{sample}}) %>% 
214 |     select(spot_id, mp_cons, spot_x, spot_y)
215 |   
216 |   cod <- spots_codex_aligned %>% 
217 |     filter(spot_id_sample %in% intersect(spots_codex_aligned$spot_id_sample, spots_visium_aligned$spot_id_sample)) %>% 
218 |     filter(sample == {{sample}}) %>% 
219 |     mutate(mp_cons = mp_cons) %>%
220 |     select(spot_id, mp_cons, spot_x, spot_y)
221 |   
222 |   ## For vis
223 |   # Calculate the range of x and y coordinates
224 |   x_range <- range(vis$spot_x)
225 |   y_range <- range(vis$spot_y)
226 |   
227 |   # Define the size of the tiles (one-third of the range)
228 |   tile_size_x <- (x_range[2] - x_range[1]) / 3
229 |   tile_size_y <- (y_range[2] - y_range[1]) / 3
230 |   
231 |   # Initialize a list to store the 3x3 tiles
232 |   tiles_vis <- vector("list", length = 9)
233 |   
234 |   # Loop through each tile
235 |   for (i in 1:3) {
236 |     for (j in 1:3) {
237 |       # Define the boundaries of the current tile
238 |       x_min <- x_range[1] + (i - 1) * tile_size_x
239 |       x_max <- x_range[1] + i * tile_size_x
240 |       y_min <- y_range[1] + (j - 1) * tile_size_y
241 |       y_max <- y_range[1] + j * tile_size_y
242 |       
243 |       # Extract cells within the current tile
244 |       current_tile <- vis[
245 |         vis$spot_x >= x_min & vis$spot_x <= x_max &
246 |           vis$spot_y >= y_min & vis$spot_y <= y_max, ]
247 |       
248 |       # Store the current tile in the list
249 |       tiles_vis[[(i - 1) * 3 + j]] <- current_tile
250 |     }
251 |   }
252 |   
253 |   names(tiles_vis) <- paste0("vis_", 1:9)
254 |   
255 |   ## For cod
256 |   # Calculate the range of x and y coordinates
257 |   x_range <- range(cod$spot_x)
258 |   y_range <- range(cod$spot_y)
259 |   
260 |   # Define the size of the tiles (one-third of the range)
261 |   tile_size_x <- (x_range[2] - x_range[1]) / 3
262 |   tile_size_y <- (y_range[2] - y_range[1]) / 3
263 |   
264 |   # Initialize a list to store the 3x3 tiles
265 |   tiles_cod <- vector("list", length = 9)
266 |   
267 |   # Loop through each tile
268 |   for (i in 1:3) {
269 |     for (j in 1:3) {
270 |       # Define the boundaries of the current tile
271 |       x_min <- x_range[1] + (i - 1) * tile_size_x
272 |       x_max <- x_range[1] + i * tile_size_x
273 |       y_min <- y_range[1] + (j - 1) * tile_size_y
274 |       y_max <- y_range[1] + j * tile_size_y
275 |       
276 |       # Extract cells within the current tile
277 |       current_tile <- cod[
278 |         cod$spot_x >= x_min & cod$spot_x <= x_max &
279 |           cod$spot_y >= y_min & cod$spot_y <= y_max, ]
280 |       
281 |       # Store the current tile in the list
282 |       tiles_cod[[(i - 1) * 3 + j]] <- current_tile
283 |     }
284 |   }
285 |   
286 |   names(tiles_cod) <- paste0("cod_", 1:9)
287 |   
288 |   list_prop <- lapply(1:9, function(x) {
289 |     x <- left_join(
290 |       tiles_vis[[x]] %>% 
291 |         count(mp_cons) %>% 
292 |         mutate(prop_vis = n/sum(n))
293 |       ,
294 |       tiles_cod[[x]] %>% 
295 |         count(mp_cons) %>% 
296 |         mutate(prop_cod = n/sum(n))
297 |       ,
298 |       by = "mp_cons") %>% 
299 |       right_join(., mp_cons, by = "mp_cons") %>% 
300 |       replace_na(list(prop_cod = 0, prop_vis = 0, n.x = 0, n.y = 0))
301 |     
302 |   })
303 |   
304 |   
305 |   # Only use quadrants that cover > 150 spots
306 |   filtered_list_prop <- lapply(list_prop, function(df) {
307 |     if(sum(df$n.x) > min_spots_per_tile) {
308 |       return(df)
309 |     }
310 |   })
311 |   
312 |   prop_final <- do.call(rbind, filtered_list_prop)
313 |   
314 |   return(prop_final)
315 | }
316 | 
317 | ## Min distance to cell type with distribution
318 | 
319 | Min_dist_to_nth_cell_type_distribution <- function(cell_type = "Vasc", 
320 |                                                    sample = "ZH916_INF", 
321 |                                                    n = 1,
322 |                                                    cell_type_column = "cell_type_figure") {
323 |   # Create distance matrix
324 |   cells_clean <- cells %>% filter(cell_type_figure %ni% c("excluded"))
325 |   
326 |   dist <- cells_clean %>% 
327 |     filter(sample == {{sample}}) %>% 
328 |     select(c(centroid_x,centroid_y)) %>% 
329 |     dist() %>% 
330 |     as.matrix()
331 |   
332 |   # Find the index of the closest spot of cell_type "endothelial
333 |   ix_endothelial <- cells_clean %>% 
334 |     filter(sample == {{sample}}) %>% 
335 |     pull({{cell_type_column}}) == {{cell_type}}
336 |   
337 |   
338 |   # Calculate min distance for each spot to the closest spot of cell type {{cell_type}}
339 |   min_dist_to_endothelial <- apply(dist, 1, function(x) sort(x[ix_endothelial])[n])
340 |   
341 |   
342 |   
343 |   # Calculate average distance per cell type to closest endothelial cell
344 |   cells_clean <- cells_clean %>% 
345 |     filter(sample == {{sample}}) %>% 
346 |     select(cell_name, {{cell_type_column}}, sample) %>% 
347 |     cbind(min_dist_to_endothelial) %>% 
348 |     group_by(!!sym(cell_type_column)) #%>% 
349 |   # summarise(mean_dist = mean(min_dist_to_endothelial), 
350 |   #           median_dist = median(min_dist_to_endothelial)) %>% 
351 |   # arrange(desc(mean_dist))
352 |   
353 |   rm(dist)
354 |   gc()
355 |   
356 |   return(cells_clean)
357 | }
358 | 
359 | ## Test neighborhood with shuffling
360 | 
361 | Test_nhood <- function(sample = "ZH1019_INF", 
362 |                        radius = 27.5, 
363 |                        cell_type_figure_column = "cell_type_figure", 
364 |                        fraction_coherance = 0.8,
365 |                        iter = 10,
366 |                        workers = 10) {
367 |   
368 |   # Observed part  
369 |   cells_clean <- cells %>% 
370 |     filter(cell_type_figure %ni% c("excluded")) %>% 
371 |     filter(sample %in% c({{sample}}))
372 |   
373 |   spe_clean <- SpatialExperiment::SpatialExperiment(
374 |     sample_id = cells_clean$sample,
375 |     spatialCoords = as.matrix(cells_clean %>% select(c(centroid_x, centroid_y))),
376 |     rowData = marker,
377 |     colData = list(cell_name = cells_clean$cell_name,
378 |                    cell_type_figure = as.factor(cells_clean[[cell_type_figure_column]]),
379 |                    centroid_x = cells_clean$centroid_x,
380 |                    centroid_y = cells_clean$centroid_y,
381 |                    img_id = cells_clean$sample,
382 |                    all = rep("all", dim(cells_clean)[1]))
383 |   )
384 |   
385 |   spe_clean <- imcRtools::buildSpatialGraph(spe_clean, 
386 |                                             img_id = "sample_id", 
387 |                                             type = "expansion", 
388 |                                             threshold = radius, 
389 |                                             coords = c("centroid_x", "centroid_y"),
390 |                                             name = "expansion_graph")
391 |   
392 |   
393 |   spe_clean <- imcRtools::aggregateNeighbors(spe_clean, 
394 |                                              colPairName = "expansion_graph", 
395 |                                              aggregate_by = "metadata", 
396 |                                              count_by = "cell_type_figure",
397 |                                              name = "nhood_mat", 
398 |                                              proportions = F)
399 |   
400 |   
401 |   nhood_mat <- cbind((SingleCellExperiment::colData(spe_clean)[["nhood_mat"]]), 
402 |                      from_cell_type_figure = cells_clean[[cell_type_figure_column]]) %>% as_tibble
403 |   
404 |   
405 |   nhood_mat_num <- nhood_mat %>% select_if(is.numeric)
406 |   
407 |   row_sums <- nhood_mat_num %>% rowSums()
408 |   fraction_coherance_mat <- nhood_mat_num/row_sums
409 |   drop_rows <- apply(fraction_coherance_mat, 1, function(x) any(x > fraction_coherance))
410 |   nhood_norm_coherant <- nhood_mat[!drop_rows,]
411 |   nhood_norm_coherant <- nhood_norm_coherant %>% drop_na()
412 |   
413 |   ## Summarise interactions
414 |   
415 |   nhood_sum <- nhood_norm_coherant %>% 
416 |     group_by(from_cell_type_figure) %>% 
417 |     summarise(across(where(is.numeric), sum))
418 |   
419 |   ## Calc rowSums and subtract self-pairs
420 |   nhood_rowSums <- nhood_sum %>% 
421 |     select_if(is.numeric) %>% 
422 |     rowSums()
423 |   
424 |   ## Pivot_long and extract self-pairs
425 |   self_pair_count <- nhood_sum %>% 
426 |     pivot_longer(!from_cell_type_figure, names_to = "to_cell_type_figure", values_to = "count") %>% 
427 |     filter(from_cell_type_figure == to_cell_type_figure) %>% 
428 |     pull(count)
429 |   
430 |   ## Normalize nhood_sum by difference rowSums and self_pair_count
431 |   
432 |   nhood_norm <- sweep(nhood_sum %>% select_if(is.numeric), 1, (nhood_rowSums-self_pair_count), "/") %>% 
433 |     cbind(from_cell_type_figure = nhood_sum$from_cell_type_figure, .) %>% as_tibble()
434 |   
435 |   nhood_norm_obs <- nhood_norm %>% 
436 |     pivot_longer(!from_cell_type_figure, names_to = "to_cell_type_figure", values_to = paste0("obs_count_" ,{{sample}}))
437 |   
438 |   # Shuffeled part 
439 |   
440 |   Tmp <- function(sample. = sample, 
441 |                   radius. = radius, 
442 |                   cell_type_figure_column. = cell_type_figure_column, 
443 |                   fraction_coherance. = fraction_coherance,
444 |                   iter. = iter,
445 |                   workers. = workers) 
446 |   {
447 |     
448 |     shuffled_counts <- BiocParallel::bplapply(1:iter., 
449 |                                               BPPARAM = BiocParallel::MulticoreParam(workers = workers., progressbar = T),
450 |                                               function(i) {
451 |                                                 
452 |                                                 cells_clean <- cells %>% 
453 |                                                   filter(cell_type_figure %ni% c("excluded")) %>% 
454 |                                                   filter(sample %in% c(sample.))
455 |                                                 
456 |                                                 cells_clean[[cell_type_figure_column.]] <- sample(cells_clean[[cell_type_figure_column.]])
457 |                                                 
458 |                                                 spe_clean <- SpatialExperiment::SpatialExperiment(
459 |                                                   sample_id = cells_clean$sample,
460 |                                                   spatialCoords = as.matrix(cells_clean %>% select(c(centroid_x, centroid_y))),
461 |                                                   rowData = marker,
462 |                                                   colData = list(cell_name = cells_clean$cell_name,
463 |                                                                  cell_type_figure = as.factor(cells_clean[[cell_type_figure_column.]]),
464 |                                                                  centroid_x = cells_clean$centroid_x,
465 |                                                                  centroid_y = cells_clean$centroid_y,
466 |                                                                  img_id = cells_clean$sample,
467 |                                                                  all = rep("all", dim(cells_clean)[1]))
468 |                                                 )
469 |                                                 
470 |                                                 spe_clean <- imcRtools::buildSpatialGraph(spe_clean, 
471 |                                                                                           img_id = "sample_id", 
472 |                                                                                           type = "expansion", 
473 |                                                                                           threshold = radius., 
474 |                                                                                           coords = c("centroid_x", "centroid_y"),
475 |                                                                                           name = "expansion_graph")
476 |                                                 
477 |                                                 
478 |                                                 spe_clean <- imcRtools::aggregateNeighbors(spe_clean, 
479 |                                                                                            colPairName = "expansion_graph", 
480 |                                                                                            aggregate_by = "metadata", 
481 |                                                                                            count_by = "cell_type_figure",
482 |                                                                                            name = "nhood_mat", 
483 |                                                                                            proportions = F)
484 |                                                 
485 |                                                 
486 |                                                 nhood_mat <- cbind((SingleCellExperiment::colData(spe_clean)[["nhood_mat"]]), 
487 |                                                                    from_cell_type_figure = cells_clean[[cell_type_figure_column.]]) %>% as_tibble
488 |                                                 
489 |                                                 
490 |                                                 nhood_mat_num <- nhood_mat %>% select_if(is.numeric)
491 |                                                 
492 |                                                 row_sums <- nhood_mat_num %>% rowSums()
493 |                                                 fraction_coherance_mat <- nhood_mat_num/row_sums
494 |                                                 drop_rows <- apply(fraction_coherance_mat, 1, function(x) any(x > fraction_coherance.))
495 |                                                 nhood_norm_coherant <- nhood_mat[!drop_rows,]
496 |                                                 nhood_norm_coherant <- nhood_norm_coherant %>% drop_na()
497 |                                                 
498 |                                                 ## Summarise interactions
499 |                                                 
500 |                                                 nhood_sum <- nhood_norm_coherant %>% 
501 |                                                   group_by(from_cell_type_figure) %>% 
502 |                                                   summarise(across(where(is.numeric), sum))
503 |                                                 
504 |                                                 ## Calc rowSums and subtract self-pairs
505 |                                                 nhood_rowSums <- nhood_sum %>% 
506 |                                                   select_if(is.numeric) %>% 
507 |                                                   rowSums()
508 |                                                 
509 |                                                 ## Pivot_long and extract self-pairs
510 |                                                 self_pair_count <- nhood_sum %>% 
511 |                                                   pivot_longer(!from_cell_type_figure, names_to = "to_cell_type_figure", values_to = "count") %>% 
512 |                                                   filter(from_cell_type_figure == to_cell_type_figure) %>% 
513 |                                                   pull(count)
514 |                                                 
515 |                                                 ## Normalize nhood_sum by difference rowSums and self_pair_count
516 |                                                 
517 |                                                 nhood_norm <- sweep(nhood_sum %>% select_if(is.numeric), 1, (nhood_rowSums-self_pair_count), "/") %>% 
518 |                                                   cbind(from_cell_type_figure = nhood_sum$from_cell_type_figure, .) %>% as_tibble()
519 |                                                 
520 |                                                 nhood_norm_shuff <- nhood_norm %>% 
521 |                                                   pivot_longer(!from_cell_type_figure, names_to = "to_cell_type_figure", values_to = paste0("shuff_", i)) #%>% 
522 |                                                 #select(paste0("shuff_", i))
523 |                                                 
524 |                                                 return(nhood_norm_shuff)
525 |                                               }
526 |     )
527 |     
528 |   }
529 |   
530 |   list_nhood_norm_shuff <- Tmp()
531 |   
532 |   nhood_norm_shuff <-
533 |     left_join(nhood_norm_obs,
534 |               reduce(
535 |                 list_nhood_norm_shuff,
536 |                 left_join,
537 |                 by = c("from_cell_type_figure", "to_cell_type_figure")
538 |               )) %>% as_tibble()
539 |   
540 |   nhood_scaled <-
541 |     nhood_norm_shuff %>% select_if(is.numeric) %>% t %>%  scale(center = T, scale = T) %>% t
542 |   
543 |   nhood_scaled <-
544 |     cbind(
545 |       nhood_norm_shuff %>% select(from_cell_type_figure, to_cell_type_figure),
546 |       nhood_scaled
547 |     ) %>% as_tibble
548 |   
549 |   
550 |   return(nhood_scaled)
551 |   
552 | }
553 | 
554 | ## Summarize nhood 
555 | 
556 | Summarise_nhood <- function(sample) {
557 |   
558 |   tmp <- sample
559 |   
560 |   tmp$z_score <- tmp[[3]]
561 |   tmp$perm_min <- apply(tmp %>% select_if(str_detect(colnames(.), pattern = "shuff")), 1, min)
562 |   tmp$perm_max <- apply(tmp %>% select_if(str_detect(colnames(.), pattern = "shuff")), 1, max)
563 |   tmp$perm_mean <- apply(tmp %>% select_if(str_detect(colnames(.), pattern = "shuff")), 1, mean)
564 |   tmp$perm_median <- apply(tmp %>% select_if(str_detect(colnames(.), pattern = "shuff")), 1, median)
565 |   tmp$perm_sd <- apply(tmp %>% select_if(str_detect(colnames(.), pattern = "shuff")), 1, sd)
566 |   
567 |   tmp$count_larger <- rowSums(tmp %>% select_if(str_detect(colnames(.), pattern = "shuff")) >  tmp[[3]])
568 |   tmp$count_smaller <- rowSums(tmp %>% select_if(str_detect(colnames(.), pattern = "shuff")) <  tmp[[3]])
569 |   
570 |   tmp$interaction_type <- if_else(tmp[[3]] >0, true = "attraction", false = "avoidance")
571 |   tmp$p_value <- if_else(tmp[[3]]>0, true = 1/tmp$count_smaller, false = 1/tmp$count_larger)
572 |   tmp$significant <- if_else(tmp$p_value < 0.05, true = TRUE, false = FALSE)
573 |   tmp$pair <- paste0(tmp$from_cell_type_figure, "_", tmp$to_cell_type_figure)
574 |   
575 |   tmp <- tmp %>% select(from_cell_type_figure, 
576 |                         to_cell_type_figure, 
577 |                         z_score, 
578 |                         interaction_type, 
579 |                         p_value, 
580 |                         significant, 
581 |                         perm_mean, 
582 |                         perm_median, 
583 |                         perm_min, 
584 |                         perm_max, 
585 |                         perm_sd, 
586 |                         count_larger, 
587 |                         count_smaller, 
588 |                         pair)
589 |   
590 |   return(tmp)
591 |   
592 | }
593 | 
594 | 
595 | ## Plot summary
596 | 
597 | Plot_summary <- function(list_nhood = list_nhood, cell_type_figure_column) {
598 |   
599 |   tmp <- BiocParallel::bplapply(list_nhood, 
600 |                                 FUN = function(x) Summarise_nhood(sample = x), 
601 |                                 BPPARAM = BiocParallel::MulticoreParam(workers = 6, progressbar = T))
602 |   
603 |   nhood_summary <- do.call(rbind.data.frame, tmp)
604 |   nhood_summary <- nhood_summary %>% select(from_cell_type_figure, to_cell_type_figure, interaction_type, z_score)
605 |   nhood_summary <- nhood_summary %>% 
606 |     group_by(from_cell_type_figure, to_cell_type_figure, interaction_type) %>% 
607 |     summarise(mean = mean(z_score), n = n()) %>% 
608 |     group_by(from_cell_type_figure, to_cell_type_figure) %>% 
609 |     arrange(-n) %>% 
610 |     slice(1) %>% 
611 |     ungroup() %>%
612 |     complete(from_cell_type_figure = cells %>% filter({{cell_type_figure_column}} %ni% c("excluded")) %>% pull({{cell_type_figure_column}}) %>% unique(), 
613 |              to_cell_type_figure= cells %>% filter({{cell_type_figure_column}} %ni% c("excluded")) %>% pull({{cell_type_figure_column}}) %>% unique(), 
614 |              fill = list(mean = NA, n = 0))
615 |   
616 |   nhood_summary$percent_samples <- (nhood_summary$n/12)
617 |   
618 |   return(nhood_summary)
619 | }
620 | 
621 | 
622 | levels_samples_structured <- c(
623 |   "ZH916_T1_B", 
624 |   "ZH1007_NEC_B",
625 |   "ZH916_2_B",
626 |   "ZH1019_T1_A",
627 |   "ZH1041_T1_B",
628 |   "ZH881_T1_v6",
629 |   "ZH881_INF_v6",
630 |   "ZH881_1_B",
631 |   "ZH1007_INF"
632 | )
633 | 
634 | levels_samples_disorganised <- c(
635 |   "MGH258_v6",
636 |   "ZH1019_INF",
637 |   "ZH916_INF")
638 | 
639 | levels_samples <-
640 |   c(
641 |     "ZH916_INF",
642 |     "ZH881_INF_v6",
643 |     "ZH1007_INF",
644 |     "ZH1019_INF",
645 |     "ZH916_T1_B",
646 |     "ZH881_T1_v6",
647 |     "ZH1007_NEC_B",
648 |     "ZH1019_T1_A",
649 |     "ZH916_2_B",
650 |     "ZH881_1_B",
651 |     "ZH1041_T1_B",
652 |     "MGH258_v6"
653 |   )
654 | 
655 | key_sample <-
656 |   tibble(
657 |     codex = c(
658 |       "ZH916_INF",
659 |       "ZH881_INF_v6",
660 |       "ZH1007_INF",
661 |       "ZH1019_INF",
662 |       "ZH916_T1_B",
663 |       "ZH881_T1_v6",
664 |       "ZH1007_NEC_B",
665 |       "ZH1019_T1_A",
666 |       "ZH881_1_B",
667 |       "MGH258_v6"
668 |     ),
669 |     visium = c(
670 |       "ZH916inf",
671 |       "ZH881inf",
672 |       "ZH1007inf",
673 |       "ZH1019inf",
674 |       "ZH916T1",
675 |       "ZH881T1",
676 |       "ZH1007nec",
677 |       "ZH1019T1",
678 |       "ZH8811Bbulk",
679 |       "MGH258"
680 |     )
681 |   )
682 | 
683 | # Import data -------------------------------------------------------------
684 | ##Import log-transformed and normalized matrix and cell_table
685 | nmat <- readRDS("Codex/mat_norm.RDS")
686 | cells <- readRDS("Codex/cells_table.RDS")
687 | marker <- readRDS("Codex/marker.RDS")
688 | 
689 | vspots <- readRDS("Codex/visium_spots.RDS")
690 | visium_positions <- readRDS("Codex/visium_spot_positions_all.RDS")
691 | spots_visium_aligned <- readRDS("Codex/spots_aligned_visium.RDS")
692 | spots_codex_aligned <- readRDS("Codex/pseudospots_aligned_codex.RDS")
693 | cells_aligned <- readRDS("Codex/cells_aligend_codex.RDS")
694 | scalef <- readRDS("Codex/scale_factors_hires.RDS")
695 | 
696 | spot_dom <- readRDS("Codex/CODEX_pseudospot_dominant.RDS")
697 | spot_comp <- readRDS("Codex/CODEX_pseudospot_composition.RDS")
698 | 
699 | 
700 | # Calculate cell densities per cell type -----------------------------------
701 | ## Create SpatialExperiment (spe)
702 | cells_clean <- cells %>% filter(cell_type_figure %ni% c("excluded"))
703 | 
704 | spe <- SpatialExperiment::SpatialExperiment(assays = list(logcounts = nmat[, which(cells$cell_type_figure %ni% c("excluded"))]),
705 |                                             sample_id = cells_clean$sample,
706 |                                             spatialCoords = as.matrix(cells_clean %>% select(c(centroid_x, centroid_y))),
707 |                                             rowData = marker,
708 |                                             colData = list(cell_name = cells_clean$cell_name,
709 |                                                            cell_type_figure = as.factor(cells_clean$cell_type_figure),
710 |                                                            ivygap = as.factor(cells_clean$ivygap),
711 |                                                            region = as.factor(cells_clean$region),
712 |                                                            centroid_x = cells_clean$centroid_x,
713 |                                                            centroid_y = cells_clean$centroid_y,
714 |                                                            img_id = cells_clean$sample,
715 |                                                            all = rep("all", dim(nmat[, which(cells$cell_type_figure %ni% c("excluded"))])[2]))
716 | )
717 | 
718 | ## Create spatial graph by centroid expansion and add to spe (colPair)
719 | spe <- imcRtools::buildSpatialGraph(spe, 
720 |                                     img_id = "sample_id", 
721 |                                     type = "expansion", 
722 |                                     threshold = 27.5, 
723 |                                     coords = c("centroid_x", "centroid_y"),
724 |                                     name = "expansion_graph")
725 | 
726 | SingleCellExperiment::colPairNames(spe) #shows constructed graph
727 | colPair(spe, "expansion_graph") # prints summary of constructed graph
728 | colPair(spe, "expansion_graph") %>% class #
729 | 
730 | # The graph is stored in form of a SelfHits object in colPair(object, name). This object can be regarded as an edgelist and coerced to an igraph object via:
731 | 
732 | igraph::graph_from_edgelist(as.matrix(colPair(spe, "expansion_graph")))
733 | 
734 | ## Aggregate neighbors and create neighborhood matrix
735 | spe <- imcRtools::aggregateNeighbors(spe, 
736 |                                      colPairName = "expansion_graph", 
737 |                                      aggregate_by = "metadata", 
738 |                                      count_by = "cell_type_figure",
739 |                                      name = "nhood_mat")
740 | 
741 | as_tibble(SingleCellExperiment::colData(spe)[["nhood_mat"]]) %>% dim
742 | as_tibble(SingleCellExperiment::colData(spe)[["nhood_mat"]])
743 | 
744 | nhood <- as_tibble(SingleCellExperiment::colData(spe)[["nhood_mat"]])
745 | 
746 | ## Aggregate neighbors for cell density plots
747 | spe <- imcRtools::aggregateNeighbors(spe, 
748 |                                      colPairName = "expansion_graph", 
749 |                                      aggregate_by = "metadata",
750 |                                      count_by = "cell_type_figure",
751 |                                      name = "nhood_abs_mat", 
752 |                                      proportions = F)
753 | 
754 | nhood_abs <- SingleCellExperiment::colData(spe)[["nhood_abs_mat"]] %>% as_tibble()
755 | nhood_abs <- nhood_abs %>% 
756 |   mutate(count = rowSums(nhood_abs)) %>% 
757 |   mutate(cell_type_figure = cells_clean$cell_type_figure) %>% 
758 |   mutate(ivygap = cells_clean$ivygap)
759 | 
760 | 
761 | ## Add malignant column for later plots
762 | nhood_abs <- nhood_abs %>% 
763 |   mutate(malignant = if_else(cell_type_figure %in% c("Chromatin-Reg","MES-Hyp", "MES", "AC", "OPC", "NPC"), true = "malignant", false = cell_type_figure)) %>% 
764 |   mutate(malignant = if_else(cell_type_figure %in% c("Mac", "Inflammatory-Mac", "T-cell", "B-cell", "Vasc"), true = "immune", false = malignant)) %>% 
765 |   mutate(malignant = if_else(cell_type_figure %in% c("Reactive-Ast", "Oligo", "Neuron"), true = "normal", false = malignant))
766 | 
767 | # Plot boxplot per cell type
768 | ggplot(nhood_abs, aes(x=fct_reorder(cell_type, count, .desc = F), y=count, fill=cell_type)) +
769 |   stat_boxplot(geom = "errorbar", color = "black") +
770 |   geom_boxplot(color = "black", outlier.shape = NA, notch = T) +
771 |   scale_fill_manual(values = an_cols_cell_type) +
772 |   theme_classic() +
773 |   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
774 |         legend.position = "none",
775 |         axis.text = element_text(size = 20, color = "black"), 
776 |         axis.title = element_text(size = 25)) +
777 |   labs(x=NULL, y="cell count per spot-radius") +
778 |   ylim(0,30)
779 | 
780 | 
781 | # Co-localisation analysis ------------------------------------------------
782 | ## 1. See custom function: Test neighborhood
783 | ## 2. Run function for 1 visium spot resolution (r=27.5) for all samples:
784 | 
785 | list_nhood <- lapply(cells$sample %>% unique(), function(x) Test_nhood(sample = x, 
786 |                                                                        iter = 500, 
787 |                                                                        workers = 100, 
788 |                                                                        cell_type_figure_column = "cell_type_tidy", 
789 |                                                                        radius = 27.5))
790 | 
791 | names(list_nhood) <- cells$sample %>% unique()
792 | 
793 | ## 3. Summarize and plot the results
794 | 
795 | levels_layers <-
796 |   c(
797 |     "Neuron",
798 |     "NPC",
799 |     "Oligo",
800 |     "Reactive_Ast",
801 |     "OPC",
802 |     "AC",
803 |     "Mac",
804 |     "Vasc",
805 |     "T_cell",
806 |     "B_cell",
807 |     "MES",
808 |     "Inflammatory_Mac",
809 |     "MES_Hyp",
810 |     "Chromatin_Reg"
811 |   )
812 | 
813 | 
814 | tmp1 <- Plot_summary(list_nhood = list_nhood[levels_samples_structured], cell_type_figure_column = "cell_type_tidy")
815 | 
816 | tmp2 <- Plot_summary(list_nhood = list_nhood[levels_samples_disorganised], cell_type_figure_column = "cell_type_tidy")
817 | 
818 | 
819 | #plot 10*22
820 | ggpubr::ggarrange(
821 |   
822 |   
823 |   ggplot(tmp1 %>% 
824 |            filter(from_cell_type_figure != to_cell_type_figure) %>% 
825 |            drop_na(),
826 |          aes(factor(to_cell_type_figure, levels = levels_layers), factor(from_cell_type_figure, levels = levels_layers), col=mean, size=percent_samples)) +
827 |     geom_tile(col = "black", size=0, fill = "white") +
828 |     geom_point(shape = 19) + 
829 |     scale_color_gradient2(low = "white", high = "#b2182b", midpoint=0, na.value = "white") +
830 |     scale_size(range=c(9,16),limits = c(0.1,1)) +
831 |     theme_classic() +
832 |     guides(size = guide_legend(override.aes = list(color = "black"))) +
833 |     theme(axis.text.x = element_text(angle = 45, vjust = 0.9, hjust = 1,size=24)) + 
834 |     theme(axis.text.y =element_text(size=24)) +
835 |     labs(x=NULL, y=NULL, size = "significant in \nfraction of samples") +
836 |     ggtitle("CODEX structured - spot 1")
837 |   
838 |   ,
839 |   
840 |   ggplot(tmp2 %>% 
841 |            filter(from_cell_type_figure != to_cell_type_figure) %>% 
842 |            drop_na(),
843 |          aes(factor(to_cell_type_figure, levels = levels_layers), factor(from_cell_type_figure, levels = levels_layers), col=mean, size=percent_samples)) +
844 |     geom_tile(col = "black", size=0, fill = "white") +
845 |     geom_point(shape = 19) + 
846 |     scale_color_gradient2(low = "white", high = "#b2182b", midpoint=0, na.value = "white") +
847 |     scale_size(range=c(9,16),limits = c(0.1,1)) +
848 |     theme_classic() +
849 |     guides(size = guide_legend(override.aes = list(color = "black"))) +
850 |     theme(axis.text.x = element_text(angle = 45, vjust = 0.9, hjust = 1,size=24)) + 
851 |     theme(axis.text.y =element_text(size=24)) +
852 |     labs(x=NULL, y=NULL, size = "significant in \nfraction of samples") +
853 |     ggtitle("CODEX disorganised - spot 1")
854 | )
855 | 
856 | 
857 | 
858 | ## Structured only
859 | ggplot(tmp1 %>% 
860 |          filter(from_cell_type_figure != to_cell_type_figure) %>% 
861 |          drop_na(),
862 |        aes(factor(to_cell_type_figure, levels = levels_layers), factor(from_cell_type_figure, levels = levels_layers), col=mean, size=percent_samples)) +
863 |   geom_tile(col = "white", size=0, fill = "white") +
864 |   geom_point(shape = 19) + 
865 |   scale_color_gradient2(
866 |     low = "white",
867 |     high = "#b2182b",
868 |     midpoint = 0,
869 |     na.value = "white",
870 |     limit = c(0, 20),
871 |     oob = scales::squish, 
872 |     guide = guide_colorbar(frame.colour = "black", ticks.colour = "black")) +
873 |   scale_size(range=c(5,12),limits = c(0.1,1)) +
874 |   theme_classic() +
875 |   theme(axis.text.x = element_text(
876 |     angle = 40,
877 |     vjust = 1,
878 |     hjust = 1,
879 |     size = 20)) + 
880 |   theme(axis.text.y = element_text(size = 20)) + 
881 |   theme(legend.text = element_text(size = 20)) + 
882 |   theme(legend.title = element_text(size = 20)) +
883 |   labs(x=NULL, y=NULL, size = "significant in \nfraction of samples")
884 | 
885 | ## 4. Calculate mean of each reciprocal pair for network graph
886 | 
887 | pair_struct_mean <- tmp1 %>% 
888 |   rowwise() %>% 
889 |   mutate(pair = paste0(sort(c(from_cell_type_figure, to_cell_type_figure)), collapse = ".")) %>% 
890 |   arrange(pair) %>% 
891 |   filter(from_cell_type_figure != to_cell_type_figure) %>% 
892 |   filter(from_cell_type_figure %ni% c("excluded")) %>% 
893 |   filter(to_cell_type_figure %ni% c("excluded")) 
894 | 
895 | pair_struct_mean <- pair_struct_mean %>% 
896 |   group_by(pair) %>% 
897 |   summarise(pair_mean = mean(mean),
898 |             pair_n = mean(n),
899 |             pair_percent_samples = mean(percent_samples))
900 | 
901 | pair_struct_mean <- pair_struct_mean %>% separate(col = pair, into = c("from", "to"), sep = "\\.", remove = F)
902 | 
903 | write_tsv(pair_struct_mean %>% select(-pair), file = "pair_struct_mean.tsv")
904 | 
905 | 
906 | ## 5. Calculate max of each pair for network graph (To not loose assymetric pairs like MES-Hyp + MES)
907 | pair_struct_max <- tmp1 %>% 
908 |   rowwise() %>% 
909 |   mutate(pair = paste0(sort(c(from_cell_type_figure, to_cell_type_figure)), collapse = ".")) %>% 
910 |   arrange(pair) %>% 
911 |   filter(from_cell_type_figure != to_cell_type_figure) %>% 
912 |   filter(from_cell_type_figure %ni% c("excluded")) %>% 
913 |   filter(to_cell_type_figure %ni% c("excluded")) 
914 | 
915 | pair_struct_max <- pair_struct_max %>% 
916 |   arrange(pair, desc(mean)) %>% 
917 |   group_by(pair) %>% 
918 |   filter(row_number() == 1) %>% 
919 |   ungroup() %>% 
920 |   arrange(desc(mean))
921 | 
922 | 
923 | # Create aligned pseudospots ----------------------------------------------
924 | ## Create list of df with per sample CODEX pseudospots
925 | list_spots_aligned <- map2(.x = key_sample$codex,
926 |                            .y = key_sample$visium,
927 |                            .f = ~Create_aligned_pseudospots(sample_codex = .x, sample_visium = .y))
928 | 
929 | names(list_spots_aligned) <- key_sample$visium
930 | 
931 | ## Flatten list do df
932 | list_spots_aligned <- map(names(list_spots_aligned), 
933 |                           function(x) list_spots_aligned[[x]] %>% mutate(sample = x)
934 | )
935 | 
936 | spots_codex_aligned <- do.call(rbind, list_spots_aligned) %>% 
937 |   mutate(spot_id_sample = paste0(sample, "_", spot_id)) %>% 
938 |   rename(mp_cons = cell_type_tidy) %>% 
939 |   drop_na()
940 | 
941 | ## Filter aligned visium spots
942 | spots_visium_aligned <- vspots %>% 
943 |   filter(sample %in% key_sample$visium) %>% 
944 |   mutate(spot_id_sample = paste0(sample, "_", spot_id))
945 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Spatial_Glioma
 2 | 
 3 | This resource provides the R code to reproduce key results described in Greenwald, Galili Darnell, Hoefflin, et al. "Integrative spatial analysis reveals a multi-layered organization of glioblastoma".
 4 | 
 5 | The analyses are divided into 6 main modules:
 6 | 
 7 | Module 1: Per sample clustering, metaprogram generation, and spot annotation
 8 | 
 9 | Module 2: Spatial coherence and organizational zones
10 | 
11 | Module 3: Measures of spatial associations 
12 | 
13 | Module 4: defining consensus interactions
14 | 
15 | Module 5: Spatial CNA inference
16 | 
17 | Module 6: CODEX analysis
18 | 
19 | ## Getting started
20 | 1. Clone Github repository.
21 | 2. Download and extract the data provided in [Inputs.zip](https://drive.google.com/file/d/1YFOwEDFLpyxG6WX243FNA7lnrVSaiVL-/view?usp=sharing)
22 | 3. Set the working directory to Inputs.
23 | 4. Run one of the 6 code modules in R.
24 | 
25 | ## General notes
26 | 
27 | Please note results of modules 1-3 might slightly differ dependending on the version of R/R packages used.
28 | 
29 | Each code module can be run independently.
30 | 
31 | The code uploaded here is the working code being used throughout the work on the project. 
32 | 
33 | The MP generation approach is based on our earlier work described in Kinker et al. 2020 and Gavish et al. 2023.  Further documentation can be found [here](https://github.com/gabrielakinker/CCLE_heterogeneity) and [here](https://github.com/tiroshlab/3ca).
34 | 
35 | We have updated Module 2 to include the code used to generate Figures 4A-C and clarified which code was used for different panels.   
36 | 
37 | The Inputs file contains an additional README regarding alignment of Visium and CODEX samples. 
38 | 
39 | You can find the Visium H&E [here](https://drive.google.com/file/d/19ROs5wKtDH-RLqELFFBeEexKScW20FJL/view?usp=drive_link). 
40 | 
41 | Requirements
42 | R (tested in version 4.1.1).
43 | 


--------------------------------------------------------------------------------