├── Module1_MP.R ├── Module2_spatial_coherence_org_zones.R ├── Module3_spatial_associations.R ├── Module4_consensus_interactions.R ├── Module5_CNA.R ├── Module6_CODEX.R └── README.md /Module1_MP.R: -------------------------------------------------------------------------------- 1 | 2 | Sys.setenv(RETICULATE_PYTHON = "/.conda/envs/leiden/bin/python") 3 | Sys.setenv(RETICULATE_PYTHON = "/.conda/envs/leiden/bin/python") 4 | 5 | library(reticulate) 6 | library(Seurat) 7 | library(scalop) 8 | library(leiden) 9 | library(ggplot2) 10 | library(RColorBrewer) 11 | library(viridis) 12 | library(scales) 13 | library(reshape2) 14 | library(reshape2) 15 | library(scales) 16 | library(NMF) 17 | library(MetBrewer) 18 | library(colorspace) 19 | library(tibble) 20 | library(dplyr) 21 | library(data.table) 22 | library(stringr) 23 | library(readr) 24 | library(Matrix) 25 | library(bigmemory) 26 | library(doMC) 27 | library(patchwork) 28 | 29 | ###### Per sample Leiden clustering############################## 30 | 31 | samples_names <- (read.delim("general/GBM_samples.txt", header = FALSE, sep = "\t"))$V1 32 | 33 | # set parameters 34 | complexity_filter <- 1000 35 | mitochondrial_filter <- 20 36 | genes_filter <- 7000 37 | n_dim <- 20 38 | dim_filter <- 5^(-15) 39 | res_param <- 1 40 | sig_th <- .005 41 | mp_num <- 13 42 | distinct16_pal<-c("#11A579","#F2B701","#66C5CC","#80BA5A","#F6CF71","#7F3C8D","#CF1C90","#3969AC","#f97b72","#E73F74","#4b4b8f","#ACA4E2","#31C53F","#B95FBB","#D4D915","#28CECA") 43 | dim2use_list <- c(8,20,19,9,12,14,16,10,10,14,8,10,12,16,7,5,14,16,4,12,6,6,8,12,13,10) # set by jackstraw 44 | 45 | leiden_clustering <- sapply(c(1:length(samples_names)), function(i){ 46 | print(samples_names[i]) 47 | # load spatial data 48 | unfilt_obj<-Load10X_Spatial(data.dir = paste("general/GBM_data/",samples_names[i],"/outs", sep = ""), 49 | filename = "filtered_feature_bc_matrix.h5", 50 | assay = "Spatial", 51 | slice = "detected_tissue_image.jpg", 52 | filter.matrix = TRUE, 53 | to.upper = FALSE 54 | ) 55 | 56 | # filtering 57 | exp_obj <- subset(unfilt_obj, subset = nCount_Spatial > complexity_filter) # filter out spots with less than #complexity_filter UMIs 58 | exp_obj[["percent.mt"]] <- PercentageFeatureSet(exp_obj, pattern = "^MT-") # filter spots with high percent (mitochondrial_filter) mitochondrial genes 59 | exp_obj <- subset(exp_obj, subset = percent.mt= 0.35] <- 0.35 168 | p1 <- ggplot(data = jac_plot, aes(x = Var1, y = Var2, fill = value)) + geom_raster() + 169 | scale_fill_gradientn(colours = colorspace::sequential_hcl(9, palette="YlOrRd", rev=T), limits = c(0,0.35), name = "Jaccard\nIndex") + 170 | theme(axis.ticks = element_blank(), panel.background = element_rect(fill = "white"), axis.line = element_blank(), legend.text.align = 0.5) + 171 | scale_x_discrete(name = "\nPrograms", labels = element_blank(), breaks = element_blank()) + scale_y_discrete(name = "\nPrograms", labels = element_blank(), breaks = element_blank()) 172 | 173 | 174 | metaprog_df <- data.frame(row.names = prog_ordered, 175 | cluster = as.factor(clusterCut[prog_ordered]), 176 | samples = stringr::str_replace(prog_ordered, "_.*",""), 177 | cells = prog_ordered) 178 | 179 | samples_annotation_plot <- ggplot(metaprog_df, aes(x = factor(cells, levels = cells), y = 1)) + 180 | geom_raster(aes(fill = samples)) + theme(legend.position = "none",axis.ticks = element_blank(), panel.background = element_rect(fill = "white"), axis.text = element_blank(), axis.line = element_blank(), axis.text.x = element_blank()) + 181 | labs(x = "", y = "samples", fill = "") 182 | metaprog_annotation_plot <- ggplot(metaprog_df, aes(x = factor(cells, levels = cells), y = 1)) + 183 | geom_raster(aes(fill = cluster)) + theme(legend.position = "none", axis.ticks = element_blank(), panel.background = element_rect(fill = "white"), axis.text = element_blank(), axis.line = element_blank(), axis.text.x = element_blank()) + 184 | labs(x = "", y = "", fill = "") 185 | MP_plot <- egg::ggarrange(p1, metaprog_annotation_plot, samples_annotation_plot, ncol = 1, nrow = 3, heights = c(40, 2, 1)) 186 | 187 | 188 | #Define meta-programs from program clusters, Sort by frequency across programs in cluster (>= 3) 189 | cut_off <- table(metaprog_df$cluster)*0.25 190 | cut_off[cut_off < 3] <- 3 191 | 192 | mp_freqs = sapply(clusters, function(k) sort(table(unlist(genes_list[k])),decreasing = T),simplify = F) 193 | metaprograms = sapply(names(mp_freqs), function(c) head(names(mp_freqs[[c]])[mp_freqs[[c]] >= as.numeric(cut_off[c])], 50), simplify = F) 194 | names(metaprograms)<-c("vascular","neuron","hypoxia","hypoxia.immune","oligo","AC.immune","low.quality","NPC2","macrophage","MES.stromal","OPC.AC","metabolism","MES1.RGL") 195 | 196 | 197 | metaprog_df$samp_cluster <- str_split(metaprog_df$cells, "_", simplify = T)[,2] 198 | metaprog_df$anno_cluster <- metaprog_df$cluster 199 | class(metaprog_df$anno_cluster) 200 | levels(metaprog_df$anno_cluster) <- c("vascular","MES.stromal","OPC.AC","metabolism","MES1.RGL","neuron","hypoxia","hypoxia.immune","oligo","AC.immune","low.quality","NPC2","macrophage") 201 | 202 | sapply(c(1:26), function(i){ 203 | print(samples_names[i]) 204 | spots_gen_assign <- as.data.frame(leiden_clustering[3,i][[1]]) 205 | spots_gen_assign$generalized <- apply(spots_gen_assign,1,function(r){ 206 | return(metaprog_df$anno_cluster[metaprog_df$samples == samples_names[i] & metaprog_df$samp_cluster == r]) 207 | }) 208 | spots_gen_assign$barcodes <- row.names(spots_gen_assign) 209 | spots_gen_assign <- spots_gen_assign[,c("barcodes", "generalized")] 210 | }) 211 | 212 | 213 | ######generating input matrices for NMF 214 | # vector of file paths 215 | file_paths <- list.files(path ="/general/exp_mats_GBM/", pattern = "\\.rds", full.names = TRUE)#directory with counts mats 216 | sample_ls <- gsub(pattern = "\\.rds$", replacement = "", x = basename(file_paths)) 217 | per_sample_mat <- lapply(file_paths, readRDS) 218 | 219 | for (i in seq_along(per_sample_mat)){ 220 | m <- as.matrix(per_sample_mat[[i]]) 221 | if(min(colSums(m)) == 0){m <- m[, colSums(m) != 0]} 222 | scaling_factor <- 1000000/colSums(m) 223 | m_CPM <- sweep(m, MARGIN = 2, STATS = scaling_factor, FUN = "*") 224 | m_loged <- log2(1 + (m_CPM/10)) 225 | 226 | # removing genes with zero variance across all cells 227 | var_filter <- apply(m_loged, 1, var) 228 | m_proc <- m_loged[var_filter != 0, ] 229 | # filtering out lowly expressed genes 230 | exp_genes <- rownames(m_proc)[(rowMeans(m_proc) > 0.4)] 231 | m_proc <- m_proc[exp_genes, ] 232 | 233 | # centering data gene-wise 234 | count_mat_cent<- m_proc - rowMeans(m_proc) 235 | #nmf preprocessing 236 | count_mat_nmf<- count_mat_cent 237 | count_mat_nmf[count_mat_nmf<0] <- 0 # negative values should be set to 0 in initial matrix 238 | 239 | # output to a list of gene expression profiles (GEP) 240 | per_sample_mat[[i]] <- count_mat_nmf 241 | names(per_sample_mat)[i] <- sample_ls[[i]] 242 | rm(m,m_loged, var_filter, exp_genes, m_proc,count_mat_cent) 243 | #saveRDS(count_mat_nmf, paste("MP/NMF/NMF_mats_GBM/", samples_names[i], ".rds", sep ="")) 244 | } 245 | 246 | 247 | ## Extract sample names for sample list file: 248 | #samples <- list.files("MP/NMF/NMF_mats_GBM/") %>% substri(., pos = 1) %>% str_c(., collapse = "\n") 249 | #write_lines(samples, ("/samples.txt"), sep = "\n") 250 | 251 | 252 | 253 | 254 | ###### NMF (Previously run by server. Possible to run per sample below)--------------------------------------------------------------------- 255 | 256 | #!/usr/bin/env Rscript 257 | #args = commandArgs(trailingOnly = TRUE) 258 | 259 | 260 | sname <- "MGH258"# enter sample name here 261 | rank_lb <- 2 262 | rank_ub <- 11 263 | 264 | m <- readRDS(paste0("MP/NMF/NMF_mats_GBM/", sname, ".rds")) 265 | m <- as.matrix(m) 266 | res <- NMF::nmf(x = m, rank = rank_lb:rank_ub, nrun = 5, method = "snmf/r", .opt = list(debug=F, parallel=F, shared.memory=F, verbose=T)) 267 | 268 | 269 | ###### Generation of NMF + Leiden GBM spatial metaprograms######### 270 | 271 | # 272 | 273 | #robust_nmf_programs function 274 | 275 | robust_nmf_programs <- function(nmf_programs, intra_min = 30, intra_max = 10, inter_filter=T, inter_min = 10) { 276 | 277 | # Select NMF programs based on the minimum overlap with other NMF programs from the same sample 278 | intra_intersect <- lapply(nmf_programs, function(z) apply(z, 2, function(x) apply(z, 2, function(y) length(intersect(x,y))))) 279 | intra_intersect_max <- lapply(intra_intersect, function(x) apply(x, 2, function(y) sort(y, decreasing = T)[2])) 280 | nmf_sel <- lapply(names(nmf_programs), function(x) nmf_programs[[x]][,intra_intersect_max[[x]]>=intra_min]) 281 | names(nmf_sel) <- names(nmf_programs) 282 | 283 | # Select NMF programs based on i) the maximum overlap with other NMF programs from the same sample and 284 | # ii) the minimum overlap with programs from another sample 285 | nmf_sel_unlist <- do.call(cbind, nmf_sel) 286 | inter_intersect <- apply(nmf_sel_unlist , 2, function(x) apply(nmf_sel_unlist , 2, function(y) length(intersect(x,y)))) ## calculating intersection between all programs 287 | 288 | final_filter <- NULL 289 | for(i in names(nmf_sel)) { 290 | a <- inter_intersect[grep(i, colnames(inter_intersect), invert = T),grep(i, colnames(inter_intersect))] 291 | b <- sort(apply(a, 2, max), decreasing = T) # for each sample, ranks programs based on their maximum overlap with programs of other samples 292 | if(inter_filter==T) b <- b[b>=inter_min] # selects programs with a maximum intersection of at least 10 293 | if(length(b) > 1) { 294 | c <- names(b[1]) 295 | for(y in 2:length(b)) { 296 | if(max(inter_intersect[c,names(b[y])]) <= intra_max) c <- c(c,names(b[y])) # selects programs iteratively from top-down. Only selects programs that have a intersection smaller than 10 with a previously selected programs 297 | } 298 | final_filter <- c(final_filter, c) 299 | } else { 300 | final_filter <- c(final_filter, names(b)) 301 | } 302 | } 303 | return(final_filter) 304 | } 305 | 306 | 307 | # Custom color palette 308 | 309 | custom_magma <- c(colorRampPalette(c("white", rev(magma(323, begin = 0.15))[1]))(10), rev(magma(323, begin = 0.18))) 310 | 311 | ## Create list of NMF matrics where each sample is an entry 312 | path <- "MP/NMF/out_dir/" 313 | sample_ls <- list.files(path) 314 | 315 | ## Create list of NMF matrics where each sample is an entry 316 | prog_genes_ls <- list() 317 | for(i in seq_along(sample_ls)) { 318 | nmf_obj <- readRDS(paste(path, sample_ls[[i]], sep = "/")) 319 | samp_name <- stringr::str_split(sample_ls[[i]], pattern = "_")[[1]][[1]] 320 | nmf_mats <- c() 321 | for(j in names(nmf_obj$fit)) { 322 | get_basis <- basis(nmf_obj$fit[[j]]) 323 | colnames(get_basis) <- paste0(samp_name, ".", j, ".", 1:j) 324 | nmf_mats <- cbind(nmf_mats, get_basis) 325 | } 326 | prog_genes_ls[[i]] <- nmf_mats 327 | names(prog_genes_ls)[i] <- paste0(samp_name) 328 | rm(nmf_obj, nmf_mats, get_basis) 329 | } 330 | Genes_nmf_w_basis <- do.call(c, list(prog_genes_ls)) 331 | 332 | # Find robust NMFs 333 | # get gene programs (top 50 genes by NMF score) 334 | nmf_programs_sig <- lapply(prog_genes_ls, function(x) apply(x, 2, function(y) names(sort(y, decreasing = T))[1:50])) 335 | 336 | # for each sample, select robust NMF programs (i.e. observed using different ranks in the same sample), remove redundancy due to multiple ranks, and apply a filter based on the similarity to programs from other samples. 337 | nmf_filter_all <- robust_nmf_programs(nmf_programs_sig, intra_min = 35, intra_max = 10, inter_filter=T, inter_min = 10) 338 | nmf_programs_sig <- lapply(nmf_programs_sig, function(x) x[, is.element(colnames(x), nmf_filter_all),drop=F]) 339 | nmf_programs_sig <- do.call(cbind, nmf_programs_sig) 340 | 341 | #leiden clusters 342 | all_leiden_programs<-readRDS("MP/all_leiden_programs_v2.rds") 343 | all_leiden_programs <- do.call(cbind, all_leiden_programs) 344 | nmf_programs_sig<-cbind(nmf_programs_sig,all_leiden_programs) 345 | 346 | # calculate similarity between programs 347 | nmf_intersect <- apply(nmf_programs_sig , 2, function(x) apply(nmf_programs_sig , 2, function(y) length(intersect(x,y)))) 348 | 349 | # hierarchical clustering of the similarity matrix 350 | nmf_intersect_hc_all <- hclust(as.dist(50-nmf_intersect), method="average") 351 | nmf_intersect_hc_all <- reorder(as.dendrogram(nmf_intersect_hc_all), colMeans(nmf_intersect)) 352 | nmf_intersect <- nmf_intersect[order.dendrogram(nmf_intersect_hc_all), order.dendrogram(nmf_intersect_hc_all)] 353 | 354 | nmf_intersect<-readRDS("MP/NMF/nmf_intersect_124.RDS") 355 | nmf_programs_sig<-readRDS("MP/NMF/nmf_programs_sig_124.RDS") 356 | 357 | 358 | ### use a clustering approach that updates MPs in each iteration 359 | 360 | nmf_intersect_KEEP <- nmf_intersect 361 | nmf_programs_sig_KEEP <- nmf_programs_sig 362 | 363 | 364 | ### Parameters (later change to function form)v1-keep! 365 | 366 | Min_intersect_initial <- 12 # the minimal intersection cutoff for defining the Founder NMF program of a cluster 367 | Min_intersect_cluster <- 12 # the minimal intersection cuttof for adding a new NMF to the forming cluster 368 | Min_group_size <- 4 # the minimal group size to consider for defining the Founder_NMF of a MP 369 | 370 | Sorted_intersection <- sort(apply(nmf_intersect , 2, function(x) (length(which(x>=Min_intersect_initial))-1) ) , decreasing = TRUE) 371 | 372 | Cluster_list <- list() ### Every entry contains the NMFs of a chosec cluster 373 | k <- 1 374 | Curr_cluster <- c() 375 | MP_list <- list() 376 | 377 | while (Sorted_intersection[1]>Min_group_size) { 378 | 379 | Curr_cluster <- c(Curr_cluster , names(Sorted_intersection[1])) 380 | 381 | ### intersection between all remaining NMFs and Genes in MP 382 | Genes_MP <- nmf_programs_sig[,names(Sorted_intersection[1])] # initial genes are those in the first NMF. Genes_MP always has only 50 genes consisting of the current MP 383 | nmf_programs_sig <- nmf_programs_sig[,-match(names(Sorted_intersection[1]) , colnames(nmf_programs_sig))] # remove selected NMF 384 | Intersection_with_Genes_MP <- sort(apply(nmf_programs_sig, 2, function(x) length(intersect(Genes_MP,x))) , decreasing = TRUE) # intersection between all other NMFs and Genes_MP 385 | NMF_history <- Genes_MP # has all genes in all NMFs in the current cluster, for newly defining Genes_MP after adding a new NMF 386 | 387 | ### Create gene list - composed of intersecting genes in descending order. Update Curr_cluster each time. 388 | 389 | while ( Intersection_with_Genes_MP[1] >= Min_intersect_cluster) { ### Define current cluster 390 | 391 | Curr_cluster <- c(Curr_cluster , names(Intersection_with_Genes_MP)[1]) 392 | 393 | Genes_MP_temp <- sort(table(c(NMF_history , nmf_programs_sig[,names(Intersection_with_Genes_MP)[1]])), decreasing = TRUE) ## Genes_MP is newly defined each time according to all NMFs in the current cluster 394 | Genes_at_border <- Genes_MP_temp[which(Genes_MP_temp == Genes_MP_temp[50])] ### genes with overlap equal to the 50th gene 395 | 396 | if (length(Genes_at_border)>1){ 397 | ### Sort last genes in Genes_at_border according to maximal NMF gene scores 398 | ### Run over all NMF programs in Curr_cluster and extract NMF scores for each gene 399 | Genes_curr_NMF_score <- c() 400 | for (i in Curr_cluster) { 401 | curr_study <- strsplit(i, "[.]")[[1]][[1]] 402 | Q <- Genes_nmf_w_basis[[curr_study]][ match(names(Genes_at_border),toupper(rownames(Genes_nmf_w_basis[[curr_study]])))[!is.na(match(names(Genes_at_border),toupper(rownames(Genes_nmf_w_basis[[curr_study]]))))] ,i] 403 | #names(Q) <- names(Genes_at_border[!is.na(match(names(Genes_at_border),toupper(rownames(Genes_nmf_w_basis[[curr_study]]))))]) ### sometimes when adding genes the names do not appear 404 | Genes_curr_NMF_score <- c(Genes_curr_NMF_score, Q ) 405 | } 406 | Genes_curr_NMF_score_sort <- sort(Genes_curr_NMF_score , decreasing = TRUE) 407 | Genes_curr_NMF_score_sort <- Genes_curr_NMF_score_sort[unique(names(Genes_curr_NMF_score_sort))] ## take only the maximal score of each gene - which is the first entry after sorting 408 | 409 | Genes_MP_temp <- c(names(Genes_MP_temp[which(Genes_MP_temp > Genes_MP_temp[50])]) , names(Genes_curr_NMF_score_sort)) 410 | 411 | } else { 412 | Genes_MP_temp <- names(Genes_MP_temp)[1:50] 413 | } 414 | 415 | NMF_history <- c(NMF_history , nmf_programs_sig[,names(Intersection_with_Genes_MP)[1]]) 416 | Genes_MP <- Genes_MP_temp[1:50] 417 | 418 | nmf_programs_sig <- nmf_programs_sig[,-match(names(Intersection_with_Genes_MP)[1] , colnames(nmf_programs_sig))] # remove selected NMF 419 | 420 | Intersection_with_Genes_MP <- sort(apply(nmf_programs_sig, 2, function(x) length(intersect(Genes_MP,x))) , decreasing = TRUE) # intersection between all other NMFs and Genes_MP 421 | 422 | } 423 | 424 | Cluster_list[[paste0("Cluster_",k)]] <- Curr_cluster 425 | MP_list[[paste0("MP_",k)]] <- Genes_MP 426 | k <- k+1 427 | 428 | 429 | nmf_intersect <- nmf_intersect[-match(Curr_cluster,rownames(nmf_intersect) ) , -match(Curr_cluster,colnames(nmf_intersect) ) ] # remove current chosen cluster 430 | 431 | Sorted_intersection <- sort(apply(nmf_intersect , 2, function(x) (length(which(x>=Min_intersect_initial))-1) ) , decreasing = TRUE) ### Sort intersection of remaining NMFs not included in any of the previous clusters 432 | 433 | Curr_cluster <- c() 434 | print(dim(nmf_intersect)[2]) 435 | } 436 | 437 | #### ***** Sort Jaccard similarity plot according to new clusters: 438 | 439 | inds_sorted <- c() 440 | 441 | for (j in 1:length(Cluster_list)){ 442 | 443 | inds_sorted <- c(inds_sorted , match(Cluster_list[[j]] , colnames(nmf_intersect_KEEP))) 444 | 445 | } 446 | inds_new <- c(inds_sorted , which(is.na( match(1:dim(nmf_intersect_KEEP)[2],inds_sorted)))) ### combine inds in clusters with inds without clusters 447 | 448 | 449 | # plot re-ordered similarity matrix heatmap 450 | nmf_intersect_meltI_NEW <- reshape2::melt(nmf_intersect_KEEP[inds_new,inds_new]) 451 | 452 | custom_magma <- c(colorRampPalette(c("white", rev(magma(323, begin = 0.15))[1]))(10), rev(magma(323, begin = 0.18))) 453 | 454 | 455 | ggplot(data = nmf_intersect_meltI_NEW, aes(x=Var1, y=Var2, fill=100*value/(100-value), color=100*value/(100-value))) + 456 | geom_tile() + 457 | scale_color_gradient2(limits=c(2,25), low=custom_magma[1:111], mid =custom_magma[112:222], high = custom_magma[223:333], midpoint = 13.5, oob=squish, name="Similarity\n(Jaccard index)") + 458 | scale_fill_gradient2(limits=c(2,25), low=custom_magma[1:111], mid =custom_magma[112:222], high = custom_magma[223:333], midpoint = 13.5, oob=squish, name="Similarity\n(Jaccard index)") + 459 | theme( axis.ticks = element_blank(), panel.border = element_rect(fill=F), panel.background = element_blank(), axis.line = element_blank(), axis.text = element_text(size = 11), axis.title = element_text(size = 12), legend.title = element_text(size=11), legend.text = element_text(size = 10), legend.text.align = 0.5, legend.justification = "bottom") + 460 | theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) + 461 | theme(axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) + 462 | guides(fill = guide_colourbar(barheight = 4, barwidth = 1)) 463 | 464 | ggplot(data = nmf_intersect_meltI_NEW, aes(x=Var1, y=Var2, fill=100*value/(100-value), color=100*value/(100-value))) + 465 | geom_tile() + 466 | scale_color_gradient2(limits=c(2,30), low=custom_magma[1:111], mid =custom_magma[112:222], high = custom_magma[223:333], midpoint = 16, oob=squish, name="Similarity\n(Jaccard index)") + 467 | scale_fill_gradient2(limits=c(2,30), low=custom_magma[1:111], mid =custom_magma[112:222], high = custom_magma[223:333], midpoint = 16, oob=squish, name="Similarity\n(Jaccard index)") + 468 | theme( axis.ticks = element_blank(), panel.border = element_rect(fill=F), panel.background = element_blank(), axis.line = element_blank(), axis.text = element_text(size = 11), axis.title = element_text(size = 12), legend.title = element_text(size=11), legend.text = element_text(size = 10), legend.text.align = 0.5, legend.justification = "bottom") + 469 | theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) + 470 | theme(axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) + 471 | guides(fill = guide_colourbar(barheight = 4, barwidth = 1)) 472 | 473 | 474 | MP <- do.call(cbind, MP_list) 475 | 476 | names(MP_list)=c("Neuron","Vasc","MES.Hyp","Mac","OPC.AC","Oligo","LQ.Chromatin.reg","MES","Prolif.Metab","MES.Ast","Reactive.Ast","NPC","Inflammatory.Mac") 477 | 478 | 479 | ###### RECLUSTERING/HEIRARCHICAL CLUSTERING, EXTENDED MPs, AND SUBCLUSTER PROGRAMS ################ 480 | programs_ls<-readRDS("MP/NMF/programs_ls_124.rds") #this is "nmf_programs_sig" 481 | Cluster_list<-readRDS("MP/NMF/Cluster_list_124.rds") 482 | str(Cluster_list) 483 | MP_list<-readRDS("MP/NMF/combined_gbm_metaprograms_raw_124.rds") 484 | 485 | 486 | #reclustering OPC.AC into 2 clusters 487 | opc.ac <- Cluster_list[[5]] 488 | 489 | mat2list <- function(x) { 490 | stopifnot(is.matrix(x)) 491 | lapply(seq_len(ncol(x)), function(i) x[, i]) 492 | } 493 | program_ls<-mat2list(programs_ls) 494 | names(program_ls)<-colnames(programs_ls) 495 | opc.ac_mat <- program_ls[names(program_ls) %in% opc.ac] 496 | 497 | jac_mat <- scalop::jaccard(opc.ac_mat) 498 | hc <- hclust(dist(1 - jac_mat), method = "average") 499 | jac_plot <- melt(jac_mat[hc$order, hc$order]) 500 | #summary(jac_plot$value) 501 | 502 | p1 <- ggplot(data = jac_plot, aes(x = Var1, y = Var2, fill = value)) + geom_raster() + 503 | scale_fill_gradient2(limits = c(0, 0.4), low = rev(magma(323, begin = 0.15)), mid = "antiquewhite", high = rev(magma(323, begin = 0.18)), midpoint = 0 , oob = squish, name = "Pearson\nCorrelation") + 504 | theme(axis.ticks = element_blank(), panel.background = element_rect(fill = "white"), axis.line = element_blank(),axis.title = element_text(size=24), legend.text.align = 0.5) + 505 | scale_x_discrete(name = "\nPrograms", breaks = colnames(jac_mat)[hc$order][seq(5, ncol(jac_mat), by = 5)], labels = seq(5, ncol(jac_mat), by = 5)) + 506 | scale_y_discrete(name = "\nPrograms", breaks = colnames(jac_mat)[hc$order][seq(5, ncol(jac_mat), by = 5)], labels = seq(5, ncol(jac_mat), by = 5)) 507 | 508 | prog_ordered <- colnames(jac_mat[hc$order, hc$order]) 509 | plot(hc) 510 | rect.hclust(hc , k = 2, border = 2:6) 511 | clusterCut <- stats::cutree(tree = hc, k = 2) 512 | 513 | metaprog_df <- data.frame(row.names = prog_ordered) 514 | metaprog_df$cluster <- (clusterCut[prog_ordered]) 515 | metaprog_df$cells <- prog_ordered 516 | metaprog_df$cluster <- as.factor(metaprog_df$cluster) 517 | 518 | metaprog_annotation_plot <- ggplot(metaprog_df, aes(x = factor(cells, levels = cells), y = 1)) + 519 | geom_raster(aes(fill = cluster)) + theme(legend.position = "bottom", axis.ticks = element_blank(),axis.title = element_text(size=24), panel.background = element_rect(fill = "white"), axis.text = element_blank(), axis.line = element_blank(), axis.text.x = element_blank()) + 520 | labs(x = "", y = "", fill = "") 521 | 522 | egg::ggarrange(p1, metaprog_annotation_plot, ncol = 1, nrow = 2, heights = c(40, 2)) 523 | 524 | clust_list_ac_opc <- list(names(clusterCut[clusterCut == 1]), names(clusterCut[clusterCut == 2])) # use this ordering to reorder cluster 5 in main MP heatmap 525 | mp_freqs <- sapply(clust_list_ac_opc, function(k) sort(table(unlist(opc.ac_mat[k])), decreasing = T), simplify = F) 526 | opc.ac_metaprograms <- sapply(mp_freqs, function(tab) head(names(tab)[tab >= 3], 50), simplify = F) 527 | names(opc.ac_metaprograms)<-c("AC","OPC") 528 | 529 | 530 | #dividing low quality from chromatin.reg cluster 531 | chromatin <- Cluster_list[[7]] 532 | mat2list <- function(x) { 533 | stopifnot(is.matrix(x)) 534 | lapply(seq_len(ncol(x)), function(i) x[, i]) 535 | } 536 | program_ls<-mat2list(programs_ls) 537 | names(program_ls)<-colnames(programs_ls) 538 | chromatin_mat <- program_ls[names(program_ls) %in% chromatin] 539 | 540 | jac_mat <- scalop::jaccard(chromatin_mat) 541 | hc <- hclust(dist(1 - jac_mat), method = "average") 542 | jac_plot <- melt(jac_mat[hc$order, hc$order]) 543 | #summary(jac_plot$value) 544 | 545 | p1 <- ggplot(data = jac_plot, aes(x = Var1, y = Var2, fill = value)) + geom_raster() + 546 | scale_fill_gradient2(limits = c(0, 0.4), low = rev(magma(323, begin = 0.15)), mid = "antiquewhite", high = rev(magma(323, begin = 0.18)), midpoint = 0 , oob = squish, name = "Pearson\nCorrelation") + 547 | theme(axis.ticks = element_blank(), panel.background = element_rect(fill = "white"), axis.line = element_blank(),axis.title = element_text(size=24), legend.text.align = 0.5) + 548 | scale_x_discrete(name = "\nPrograms", breaks = colnames(jac_mat)[hc$order][seq(5, ncol(jac_mat), by = 5)], labels = seq(5, ncol(jac_mat), by = 5)) + 549 | scale_y_discrete(name = "\nPrograms", breaks = colnames(jac_mat)[hc$order][seq(5, ncol(jac_mat), by = 5)], labels = seq(5, ncol(jac_mat), by = 5)) 550 | 551 | 552 | prog_ordered <- colnames(jac_mat[hc$order, hc$order]) 553 | plot(hc) 554 | rect.hclust(hc , k = 3, border = 2:6) 555 | clusterCut <- stats::cutree(tree = hc, k = 3) 556 | 557 | metaprog_df <- data.frame(row.names = prog_ordered) 558 | metaprog_df$cluster <- (clusterCut[prog_ordered]) 559 | metaprog_df$cells <- prog_ordered 560 | metaprog_df$cluster <- as.factor(metaprog_df$cluster) 561 | 562 | metaprog_annotation_plot <- ggplot(metaprog_df, aes(x = factor(cells, levels = cells), y = 1)) + 563 | geom_raster(aes(fill = cluster)) + theme(legend.position = "bottom", axis.ticks = element_blank(),panel.background = element_rect(fill = "white"), axis.text = element_blank(), axis.line = element_blank(), axis.text.x = element_blank(),axis.title = element_text(size=16)) + 564 | labs(x = "", y = "", fill = "") 565 | 566 | egg::ggarrange(p1, metaprog_annotation_plot, ncol = 1, nrow = 2, heights = c(40, 2)) 567 | 568 | clust_list <- list(names(clusterCut[clusterCut == 1]), names(clusterCut[clusterCut == 2]),names(clusterCut[clusterCut == 3])) 569 | mp_freqs <- sapply(clust_list, function(k) sort(table(unlist(chromatin_mat[k])), decreasing = T), simplify = F) 570 | chromatin_metaprograms <- sapply(mp_freqs, function(tab) head(names(tab)[tab >= 3], 50), simplify = F) 571 | names(chromatin_metaprograms)<-c("LQ1","chromatin_clean","LQ2") 572 | chromatin_metaprogram_clean<-chromatin_metaprograms$chromatin_clean 573 | 574 | 575 | clust_list2 <- list(names(clusterCut[clusterCut == 1]), names(clusterCut[clusterCut == 3]),names(clusterCut[clusterCut == 2])) 576 | #this is the order to use for the final MP heatmap 577 | clust_list_chromatin <- c(names(clusterCut[clusterCut == 1]), names(clusterCut[clusterCut == 2]),names(clusterCut[clusterCut == 3])) 578 | 579 | ###### MPs after OP/AC split and LQ/chromatin.reg split####### 580 | MP_list2<-MP_list[-c(5,7)] 581 | 582 | MP_list2[["chromatin.reg"]] <- chromatin_metaprogram_clean 583 | MP_list2[["OPC"]] <- opc.ac_metaprograms$OPC 584 | MP_list2[["AC"]] <- opc.ac_metaprograms$AC 585 | 586 | 587 | ###### SPOT ASSIGNMENTS TO CLEANED METAPROGRAMS########## 588 | 589 | # generate normalized exp matrices 590 | file_paths <- list.files(path ="general/exp_mats_GBM", pattern = "\\.rds", full.names = TRUE) 591 | sample_ls <- gsub(pattern = "\\.rds$", replacement = "", x = basename(file_paths)) 592 | sample_ls->samples_names 593 | sample_ls->samples 594 | per_sample_mat <- lapply(file_paths, readRDS) 595 | 596 | for (i in seq_along(per_sample_mat)){ 597 | m <- as.matrix(per_sample_mat[[i]]) 598 | m <- m[-grep("^MT-|^RPL|^RPS", rownames(m)), ] 599 | if(min(colSums(m)) == 0){m <- m[, colSums(m) != 0]} 600 | scaling_factor <- 1000000/colSums(m) 601 | m_CPM <- sweep(m, MARGIN = 2, STATS = scaling_factor, FUN = "*") 602 | m_loged <- log2(1 + (m_CPM/10)) 603 | 604 | # removing genes with zero variance across all cells 605 | var_filter <- apply(m_loged, 1, var) 606 | m_proc <- m_loged[var_filter != 0, ] 607 | # filtering out lowly expressed genes 608 | exp_genes <- rownames(m_proc)[(rowMeans(m_proc) > 0.4)] 609 | m_proc <- m_proc[exp_genes, ] 610 | 611 | # output to a list of gene expression profiles (GEP) 612 | per_sample_mat[[i]] <- m_proc 613 | names(per_sample_mat)[i] <- sample_ls[[i]] 614 | rm(m,m_loged, var_filter, exp_genes, m_proc) 615 | } 616 | 617 | #generate a list with the filtered exp matrix for each sample, i.e. m_proc<-(per_sample_mat[[1]]) 618 | score_mat <- lapply(c(1:length(per_sample_mat)), function(i){ 619 | m_proc<-per_sample_mat[[i]] 620 | metaprograms_gene_list <- readRDS("MP/clean_spatial_gbm_metaprograms_124.rds") 621 | signatures <- scalop::sigScores(m_proc, metaprograms_gene_list, expr.center = TRUE, conserved.genes = 0.5) 622 | 623 | spot_scores <- data.frame(spot_names = rownames(signatures)) 624 | spot_scores$Neuron <- signatures$Neuron 625 | spot_scores$Vasc<- signatures$Vasc 626 | spot_scores$MES.Hyp <- signatures$MES.Hyp 627 | spot_scores$Mac <- signatures$Mac 628 | spot_scores$Oligo <- signatures$Oligo 629 | spot_scores$MES <- signatures$MES 630 | spot_scores$Prolif.Metab <- signatures$Prolif.Metab 631 | spot_scores$MES.Ast <- signatures$MES.Ast 632 | spot_scores$Reactive.Ast <- signatures$Reactive.Ast 633 | spot_scores$NPC <- signatures$NPC 634 | spot_scores$Inflammatory.Mac <- signatures$Inflammatory.Mac 635 | spot_scores$chromatin.reg <- signatures$chromatin.reg 636 | spot_scores$OPC <- signatures$OPC 637 | spot_scores$AC <- signatures$AC 638 | return(spot_scores) 639 | }) 640 | 641 | names(score_mat) <- sample_ls 642 | 643 | for (i in seq_along(score_mat)){ 644 | score_df <- as.data.frame(score_mat[[i]]) 645 | score_df <-column_to_rownames(score_df, 'spot_names') 646 | maxcol_meta<-maxcol_strict(score_df) 647 | maxcol_meta<-stack(maxcol_meta) 648 | setnames(maxcol_meta,2,"spot_type_meta_new") 649 | setnames(maxcol_meta,1,"SpotID") 650 | } 651 | -------------------------------------------------------------------------------- /Module2_spatial_coherence_org_zones.R: -------------------------------------------------------------------------------- 1 | library(patchwork) 2 | library(parallel) 3 | 4 | #!!! Run Functions section first 5 | 6 | # load data --------------------------------------------------------------- 7 | 8 | sample_ls <- (read.delim("general/GBM_samples.txt", header = FALSE))$V1 9 | 10 | gen_clusters <- as.character(unique(unlist(sapply(c(1:length(sample_ls)), function(i){ 11 | mp_assign <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = "")) 12 | return(unique(mp_assign$spot_type_meta_new)) 13 | })))) 14 | 15 | 16 | 17 | # calculate spatial coherence (Figures 4A-C) -------- 18 | 19 | rand_num <- 100 20 | 21 | sapply(c(1:length(sample_ls)), function(i){ 22 | 23 | print(sample_ls[i]) 24 | 25 | # load data 26 | spots_positions <- read.csv(paste("general/GBM_data/", sample_ls[i] , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE) 27 | row.names(spots_positions) <- spots_positions$V1 28 | 29 | spots_clusters <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = "")) 30 | spots_clusters <- na.omit(spots_clusters) 31 | colnames(spots_clusters) <- c("barcodes", "spot_type") 32 | row.names(spots_clusters)<- spots_clusters$barcodes 33 | 34 | # abundance 35 | 36 | programs_comp <- sample_programs_composition(spots_clusters,gen_clusters) 37 | 38 | # neighbors tables 39 | 40 | neighbors_table <- neighbors_table_func(spots_positions,spots_clusters) 41 | 42 | rand_neighbors_table <- lapply(c(1:rand_num), function(i){ 43 | new_pos <- sample(spots_positions$V1, length(spots_positions$V1), replace = FALSE) 44 | pos_table <- spots_positions 45 | pos_table$V1 <- new_pos 46 | pos_table$V2 <- spots_positions[new_pos, "V2"] 47 | 48 | neighbors_table <- neighbors_table_func(pos_table,spots_clusters) 49 | return(neighbors_table) 50 | }) 51 | 52 | 53 | # spatial coherence 54 | 55 | programs_spatial_score <- sapply(sort(gen_clusters), function(cluster){ 56 | if (!(cluster %in% spots_clusters$spot_type)) { 57 | prog_score <- NaN 58 | } else { 59 | program_neighbors_table = neighbors_table[row.names(neighbors_table) %in% spots_clusters$barcodes[spots_clusters$spot_type == cluster],] 60 | obs <- obs_program_spatial_score(program_neighbors_table, cluster) 61 | one_spatial_score <- one_val(dim(program_neighbors_table)[1]) 62 | zero_spatial_score <- zero_val(rand_neighbors_table, spots_clusters, cluster) 63 | if (obs>one_spatial_score){obs <- one_spatial_score} 64 | if (obs= 0.6) { 281 | if (obs>one_spatial_score){obs <- one_spatial_score} 282 | return(obs/one_spatial_score) 283 | } else { 284 | if (obs>one_spatial_score){obs <- one_spatial_score} 285 | if (obs 0 & unlist(all_scores) < 1], breaks = 100, main = "radius 8: spatial coherence socres", xlab = "spatial coherence score") 312 | th1 <- as.numeric(quantile(na.omit(unlist(all_scores)),probs = (seq(0,1,0.1)))[5]) 313 | 314 | all_zones10 <- sapply(c(1:length(sample_ls)), function(i){ 315 | set_zones <- ifelse(all_scores[[i]] <= th1, "dis", "other") 316 | print(table(set_zones)) 317 | return(set_zones) 318 | }) 319 | 320 | all_scores <- readRDS("Spatial_coh_zones/sc_windows/spatial_win11v3.rds") 321 | hist(unlist(all_scores)[unlist(all_scores) > 0 & unlist(all_scores) < 1], breaks = 100, main = "radius 11: spatial coherence socres", xlab = "spatial coherence score") 322 | th1 <- as.numeric(quantile(na.omit(unlist(all_scores)),probs = (seq(0,1,0.1)))[5]) 323 | 324 | all_zones15 <- sapply(c(1:length(sample_ls)), function(i){ 325 | set_zones <- ifelse(all_scores[[i]] <= th1, "dis", "other") 326 | print(table(set_zones)) 327 | return(set_zones) 328 | }) 329 | 330 | 331 | 332 | zones_intersect <- sapply(c(1:26), function(x){ 333 | inter1 <- intersect(names(all_zones5[[x]])[all_zones5[[x]] == "dis"],names(all_zones10[[x]])[all_zones10[[x]] == "dis"]) 334 | inter2 <- intersect(inter1,names(all_zones15[[x]])[all_zones15[[x]] == "dis"]) 335 | return(inter2) 336 | }) 337 | 338 | 339 | all_zones <- sapply(c(1:26),function(i){ 340 | set_zones <- ifelse(names(all_scores[[i]]) %in% zones_intersect[[i]],"dis","other") 341 | names(set_zones) <- names(all_scores[[i]]) 342 | return(set_zones) 343 | }) 344 | 345 | win_size <- 4 346 | 347 | all_smooth <- sapply(c(1:26),function(i){ 348 | print(sample_ls[i]) 349 | spots_positions <- read.csv(paste("general/GBM_data/", sample_ls[i] , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE) 350 | row.names(spots_positions) <- spots_positions$V1 351 | 352 | spots_clusters <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = "")) 353 | spots_clusters <- na.omit(spots_clusters) 354 | colnames(spots_clusters) <- c("barcodes", "spot_type") 355 | row.names(spots_clusters)<- spots_clusters$barcodes 356 | 357 | neighbors_table <- prox_neighbors_table_func(spots_positions,spots_clusters) 358 | 359 | all_spots <- spots_clusters$barcodes 360 | 361 | smoothing <- sapply(all_spots, function(spot){ 362 | win_spots <- c(spot) 363 | sapply(c(1:win_size), function(i){ 364 | win_spots <<- unique(c(win_spots,unique(na.omit(as.character(neighbors_table[win_spots,]))))) 365 | }) 366 | win_zones <- all_zones[[i]][names(all_zones[[i]]) %in% win_spots] 367 | if (length(win_zones[win_zones == "dis"])/length(win_zones) >= 0.5) {return("dis")} 368 | else {return("other")} 369 | }) 370 | return(smoothing) 371 | }) 372 | 373 | 374 | # define struct zones ----------------------------------------------------- 375 | 376 | all_scores <- readRDS("Spatial_coh_zones/sc_windows/spatial_win5v3.rds") 377 | hist(unlist(all_scores), breaks = 100) 378 | th1 <- as.numeric(quantile(na.omit(unlist(all_scores)),probs = (seq(0,1,0.1)))[5]) 379 | 380 | all_zones5 <- sapply(c(1:length(sample_ls)), function(i){ 381 | set_zones <- ifelse(all_scores[[i]] >= th1, "struct","other") 382 | print(table(set_zones)) 383 | return(set_zones) 384 | }) 385 | 386 | all_scores <- readRDS("Spatial_coh_zones/sc_windows/spatial_win8v3.rds") 387 | hist(unlist(all_scores)[unlist(all_scores) > 0 & unlist(all_scores) < 1], breaks = 100) 388 | th1 <- as.numeric(quantile(na.omit(unlist(all_scores)),probs = (seq(0,1,0.1)))[5]) 389 | 390 | all_zones10 <- sapply(c(1:length(sample_ls)), function(i){ 391 | set_zones <- ifelse(all_scores[[i]] >= th1, "struct", "other") 392 | print(table(set_zones)) 393 | return(set_zones) 394 | }) 395 | 396 | all_scores <- readRDS("Spatial_coh_zones/sc_windows/spatial_win11v3.rds") 397 | hist(unlist(all_scores)[unlist(all_scores) > 0 & unlist(all_scores) < 1], breaks = 100) 398 | th1 <- as.numeric(quantile(na.omit(unlist(all_scores)),probs = (seq(0,1,0.1)))[5]) 399 | 400 | all_zones15 <- sapply(c(1:length(sample_ls)), function(i){ 401 | set_zones <- ifelse(all_scores[[i]] >= th1, "struct", "other") 402 | print(table(set_zones)) 403 | return(set_zones) 404 | }) 405 | 406 | 407 | 408 | zones_intersect <- sapply(c(1:26), function(x){ 409 | inter1 <- intersect(names(all_zones5[[x]])[all_zones5[[x]] == "struct"],names(all_zones10[[x]])[all_zones10[[x]] == "struct"]) 410 | inter2 <- intersect(inter1,names(all_zones15[[x]])[all_zones15[[x]] == "struct"]) 411 | return(inter2) 412 | }) 413 | 414 | 415 | all_zones <- sapply(c(1:26),function(i){ 416 | set_zones <- ifelse(names(all_scores[[i]]) %in% zones_intersect[[i]],"struct","other") 417 | names(set_zones) <- names(all_scores[[i]]) 418 | return(set_zones) 419 | }) 420 | 421 | win_size <- 4 422 | all_smooth_struct <- sapply(c(1:26),function(i){ 423 | print(sample_ls[i]) 424 | spots_positions <- read.csv(paste("general/GBM_data/", sample_ls[i] , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE) 425 | row.names(spots_positions) <- spots_positions$V1 426 | 427 | spots_clusters <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = "")) 428 | spots_clusters <- na.omit(spots_clusters) 429 | colnames(spots_clusters) <- c("barcodes", "spot_type") 430 | row.names(spots_clusters)<- spots_clusters$barcodes 431 | 432 | neighbors_table <- prox_neighbors_table_func(spots_positions,spots_clusters) 433 | 434 | all_spots <- spots_clusters$barcodes 435 | 436 | smoothing <- sapply(all_spots, function(spot){ 437 | win_spots <- c(spot) 438 | sapply(c(1:win_size), function(i){ 439 | win_spots <<- unique(c(win_spots,unique(na.omit(as.character(neighbors_table[win_spots,]))))) 440 | }) 441 | win_zones <- all_zones[[i]][names(all_zones[[i]]) %in% win_spots] 442 | if (length(win_zones[win_zones == "struct"])/length(win_zones) >= 0.5) {return("struct")} 443 | else {return("other")} 444 | }) 445 | return(smoothing) 446 | }) 447 | 448 | 449 | # final zones ------------------------------------------------------------- 450 | 451 | all_zones <- sapply(c(1:26),function(i){ 452 | sapply(names(all_smooth[[i]]), function(s){ 453 | if (all_smooth[[i]][s] == "dis" & all_smooth_struct[[i]][s] == "struct") { 454 | return("Intermediate") 455 | } else if (all_smooth[[i]][s] == "dis") { 456 | return("dis") 457 | } else if (all_smooth_struct[[i]][s] == "struct") { 458 | return("struct") 459 | } else { 460 | return("Intermediate") 461 | } 462 | }) 463 | }) 464 | 465 | names(all_zones) <- sample_ls 466 | 467 | is_small <- sapply(all_zones, function(x){ 468 | tx <- table(x) 469 | 470 | old_clusters <- names(tx) 471 | add_clusters <- c("dis","Intermediate", "struct")[!c("dis","Intermediate", "struct") %in% old_clusters] 472 | if (length(add_clusters) > 0) { 473 | tx <- c(as.numeric(tx),0) 474 | names(tx) <- c(old_clusters, add_clusters) 475 | } 476 | tx <- tx[sort(names(tx))] 477 | tx10 <- sum(tx)*0.12 478 | print(tx) 479 | return(c(tx[1]one_spatial_score){obs <- one_spatial_score} 534 | if (obs= 0.5) { 579 | return("dis_mal") 580 | } else if (all_zones[[i]][s] == "struct" & purity_score_scaled[[i]][names(all_zones[[i]][s])] < 0.4) { 581 | return("st_norm") 582 | } else if (all_zones[[i]][s] == "struct" & purity_score_scaled[[i]][names(all_zones[[i]][s])] >= 0.4) { 583 | return("st_mal") 584 | } else { 585 | return(all_zones[[i]][s]) 586 | } 587 | }) 588 | names(v2) <- names(all_zones[[i]]) 589 | 590 | return(v2) 591 | }) 592 | names(zones_class_fin) <- sample_ls 593 | 594 | 595 | zone.colors <- c(structured_mal = "#cf5e4e",structured_norm = "#a82203", disorganized_mal = "#208cc0", disorganized_norm = "#003967",intermediate ="#f1af3a") 596 | sapply(c(1:26), function(i){ 597 | print(sample_ls[[i]]) 598 | spots_positions <- read.csv(paste("general/GBM_data/", sample_ls[[i]] , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE) 599 | row.names(spots_positions) <- spots_positions$V1 600 | 601 | spots_clusters <- readRDS(paste("MP/mp_assign_124/", sample_ls[[i]], ".rds", sep = "")) 602 | spots_clusters <- na.omit(spots_clusters) 603 | colnames(spots_clusters) <- c("barcodes", "spot_type") 604 | row.names(spots_clusters)<- spots_clusters$barcodes 605 | 606 | spots_filt = spots_positions[spots_positions$V1 %in% spots_clusters$barcodes,] 607 | row2plot1 <- spots_clusters$barcodes[spots_clusters$barcodes %in% names(zones_class_fin[[i]])[zones_class_fin[[i]] == "st_mal"]] 608 | row2plot2 <- spots_clusters$barcodes[spots_clusters$barcodes %in% names(zones_class_fin[[i]])[zones_class_fin[[i]] == "st_norm"]] 609 | row2plot3 <- spots_clusters$barcodes[spots_clusters$barcodes %in% names(zones_class_fin[[i]])[zones_class_fin[[i]] == "dis_mal"]] 610 | row2plot4 <- spots_clusters$barcodes[spots_clusters$barcodes %in% names(zones_class_fin[[i]])[zones_class_fin[[i]] == "dis_norm"]] 611 | spots_filt$plot <- factor(ifelse(spots_filt$V1 %in% row2plot1, "structured_mal", 612 | ifelse(spots_filt$V1 %in% row2plot2, "structured_norm", 613 | ifelse(spots_filt$V1 %in% row2plot3, "disorganized_mal", 614 | ifelse(spots_filt$V1 %in% row2plot4, "disorganized_norm","intermediate")))), levels = c("disorganized_mal","disorganized_norm", "structured_mal", "structured_norm","intermediate")) 615 | spots_filt$V3_ops = -(spots_filt$V3) 616 | 617 | 618 | gg = ggplot(spots_filt, aes(x=V4, y=V3_ops)) + 619 | geom_point(aes(col=plot), size=2) + 620 | labs(title=paste(sample_ls[[i]], "spatial coherence zones"), y="pos y", x="pos x") + 621 | scale_color_manual(values = zone.colors, name = "zone") + 622 | theme_void() 623 | print(gg) 624 | 625 | }) 626 | 627 | zones_cat <- c("dis_mal","dis_norm","Intermediate","st_mal","st_norm") 628 | zone_abund <- sapply(zones_class_fin, function(z){ 629 | spots_zone <- as.data.frame(z) 630 | spots_zone <- na.omit(spots_zone) 631 | spots_zone$barcodes <- row.names(spots_zone) 632 | colnames(spots_zone) <- c("spot_type","barcodes") 633 | 634 | # abundance 635 | 636 | zones_comp <- sample_programs_composition(spots_zone,zones_cat) 637 | return(zones_comp) 638 | }) 639 | 640 | 641 | 642 | ######## Functions -------------------------------------------------------------- 643 | 644 | 645 | sample_programs_composition <- function(spots_clusters, gen_clusters){ 646 | composition <- table(spots_clusters$spot_type) 647 | old_clusters <- names(composition) 648 | add_clusters <- gen_clusters[!gen_clusters %in% old_clusters] 649 | sapply(add_clusters,function(clust){ 650 | composition <<- c(composition, clust = 0) 651 | }) 652 | 653 | names(composition) <- c(old_clusters, add_clusters) 654 | final_composition <- composition[sort(names(composition))]/sum(composition) 655 | return(final_composition) 656 | } 657 | 658 | obs_program_spatial_score <- function(program_neighbors, cluster){ 659 | cluster_neighbors_bin <- ifelse(program_neighbors == cluster, 1, 0) 660 | if(is.null(dim(program_neighbors))){ 661 | cluster_neighbors_sum <- sum(cluster_neighbors_bin) 662 | } else { 663 | cluster_neighbors_sum <- apply(cluster_neighbors_bin,1,function(rx){sum(na.omit(rx))}) 664 | } 665 | obs <- mean(cluster_neighbors_sum) 666 | return(obs) 667 | } 668 | 669 | one_val <- function(spots_num){ 670 | a <- sqrt((4*spots_num)/(6*sqrt(3))) 671 | oneval <- (6*spots_num-12*a-6)/spots_num 672 | return(oneval) 673 | } 674 | 675 | zero_val <- function(rand_table, spots_clusters, cluster){ 676 | all_zeroval <- sapply(rand_table, function(neighbors_rand_table){ 677 | program_rand_neighbors_table = neighbors_rand_table[row.names(neighbors_rand_table) %in% spots_clusters$barcodes[spots_clusters$spot_type == cluster],] 678 | rand_obs <- obs_program_spatial_score(program_rand_neighbors_table, cluster) 679 | return(rand_obs) 680 | }) 681 | zeroval <- mean(all_zeroval) 682 | return(zeroval) 683 | } 684 | 685 | 686 | calc_adj_mat <- function(neighbored_cluster, spots_clusters, cluster_neighbors_table){ 687 | if (!(neighbored_cluster %in% spots_clusters$spot_type)) { 688 | return(0) 689 | } else { 690 | cluster_neighbors_bin <- ifelse(cluster_neighbors_table == neighbored_cluster, 1, 0) 691 | 692 | if (is.null(dim(cluster_neighbors_bin))){ 693 | cluster_neighbors_sum <- sum(na.omit(cluster_neighbors_bin)) 694 | } else { 695 | cluster_neighbors_sum <- sum(apply(cluster_neighbors_bin,1,function(x){sum(na.omit(x))})) 696 | } 697 | return(cluster_neighbors_sum) 698 | } 699 | } 700 | 701 | 702 | prog_connectivity_score <- function(program_neighbors, cluster){ 703 | cluster_neighbors_bin <- ifelse(program_neighbors == cluster, 0, 1) 704 | if(is.null(dim(program_neighbors))){ 705 | prog_connect <- 1 706 | } else { 707 | prog_connect <- length(which(apply(cluster_neighbors_bin,1,function(x){sum(na.omit(x))}) >0)) 708 | } 709 | return(prog_connect) 710 | } 711 | 712 | neighbors_table_func <- function(spots_positions,spots_clusters){ 713 | neighbors_table <- sapply(spots_clusters$barcodes, function(spot){ 714 | spots_row = spots_positions[spots_positions$V1 == spot, 3] 715 | spots_col = spots_positions[spots_positions$V1 == spot, 4] 716 | 717 | if (spots_col == 0 | spots_row == 0) { 718 | c1 = NaN 719 | } else { 720 | n1 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1] 721 | if (spots_positions$V2[spots_positions$V1 == n1] == 0 | !(n1 %in% spots_clusters$barcodes)){ 722 | c1 = NaN 723 | } else { 724 | c1 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n1]) 725 | } 726 | } 727 | 728 | if (spots_col == 127 | spots_row == 0) { 729 | c2 = NaN 730 | } else { 731 | n2 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1] 732 | if (spots_positions$V2[spots_positions$V1 == n2] == 0 | !(n2 %in% spots_clusters$barcodes)){ 733 | c2 = NaN 734 | } else { 735 | c2 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n2]) 736 | } 737 | } 738 | 739 | if (spots_col == 0 | spots_col == 1) { 740 | c3 = NaN 741 | } else { 742 | n3 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2] 743 | if (spots_positions$V2[spots_positions$V1 == n3] == 0 | !(n3 %in% spots_clusters$barcodes)){ 744 | c3 = NaN 745 | } else { 746 | c3 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n3]) 747 | } 748 | } 749 | 750 | if (spots_col == 126 | spots_col == 127) { 751 | c4 = NaN 752 | } else { 753 | n4 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2] 754 | if (spots_positions$V2[spots_positions$V1 == n4] == 0 | !(n4 %in% spots_clusters$barcodes)){ 755 | c4 = NaN 756 | } else { 757 | c4 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n4]) 758 | } 759 | } 760 | 761 | if (spots_col == 0 | spots_row == 77) { 762 | c5 = NaN 763 | } else { 764 | n5 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1] 765 | if (spots_positions$V2[spots_positions$V1 == n5] == 0 | !(n5 %in% spots_clusters$barcodes)){ 766 | c5 = NaN 767 | } else { 768 | c5 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n5]) 769 | } 770 | } 771 | 772 | if (spots_col == 127 | spots_row == 77) { 773 | c6 = NaN 774 | } else { 775 | n6 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1] 776 | if (spots_positions$V2[spots_positions$V1 == n6] == 0 | !(n6 %in% spots_clusters$barcodes)){ 777 | c6 = NaN 778 | } else { 779 | c6 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n6]) 780 | } 781 | } 782 | 783 | 784 | return(c(c1,c2,c3,c4,c5,c6)) 785 | 786 | }) 787 | 788 | neighbors_table = t(neighbors_table) 789 | row.names(neighbors_table) = spots_clusters$barcodes 790 | 791 | return(neighbors_table) 792 | } 793 | 794 | neighbors_table_funcV2 <- function(spots_positions,spots_clusters){ 795 | neighbors_table <- sapply(spots_clusters$barcodes, function(spot){ 796 | spots_row = spots_positions[spots_positions$V1 == spot, 3] 797 | spots_col = spots_positions[spots_positions$V1 == spot, 4] 798 | 799 | if (spots_col == 0 | spots_row == 0) { 800 | c1 = NaN 801 | } else { 802 | n1 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1] 803 | if (spots_positions$V2[spots_positions$V1 == n1] == 0 | !(n1 %in% spots_clusters$barcodes)){ 804 | c1 = NaN 805 | } else { 806 | c1 = as.character(n1) 807 | } 808 | } 809 | 810 | if (spots_col == 127 | spots_row == 0) { 811 | c2 = NaN 812 | } else { 813 | n2 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1] 814 | if (spots_positions$V2[spots_positions$V1 == n2] == 0 | !(n2 %in% spots_clusters$barcodes)){ 815 | c2 = NaN 816 | } else { 817 | c2 = as.character(n2) 818 | } 819 | } 820 | 821 | if (spots_col == 0 | spots_col == 1) { 822 | c3 = NaN 823 | } else { 824 | n3 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2] 825 | if (spots_positions$V2[spots_positions$V1 == n3] == 0 | !(n3 %in% spots_clusters$barcodes)){ 826 | c3 = NaN 827 | } else { 828 | c3 = as.character(n3) 829 | } 830 | } 831 | 832 | if (spots_col == 126 | spots_col == 127) { 833 | c4 = NaN 834 | } else { 835 | n4 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2] 836 | if (spots_positions$V2[spots_positions$V1 == n4] == 0 | !(n4 %in% spots_clusters$barcodes)){ 837 | c4 = NaN 838 | } else { 839 | c4 = as.character(n4) 840 | } 841 | } 842 | 843 | if (spots_col == 0 | spots_row == 77) { 844 | c5 = NaN 845 | } else { 846 | n5 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1] 847 | if (spots_positions$V2[spots_positions$V1 == n5] == 0 | !(n5 %in% spots_clusters$barcodes)){ 848 | c5 = NaN 849 | } else { 850 | c5 = as.character(n5) 851 | } 852 | } 853 | 854 | if (spots_col == 127 | spots_row == 77) { 855 | c6 = NaN 856 | } else { 857 | n6 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1] 858 | if (spots_positions$V2[spots_positions$V1 == n6] == 0 | !(n6 %in% spots_clusters$barcodes)){ 859 | c6 = NaN 860 | } else { 861 | c6 = as.character(n6) 862 | } 863 | } 864 | 865 | 866 | return(c(c1,c2,c3,c4,c5,c6)) 867 | 868 | }) 869 | 870 | neighbors_table = t(neighbors_table) 871 | row.names(neighbors_table) = spots_clusters$barcodes 872 | 873 | return(neighbors_table) 874 | } 875 | 876 | 877 | win_prox_neighbors_table_func <- function(spots_positions,spots_clusters){ 878 | neighbors_table <- sapply(spots_clusters$barcodes, function(spot){ 879 | spots_row = spots_positions[spots_positions$V1 == spot, 3] 880 | spots_col = spots_positions[spots_positions$V1 == spot, 4] 881 | 882 | if (spots_col == 0 | spots_row == 0) { 883 | n1 = NA 884 | } else { 885 | n1_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1] 886 | if (length(n1_temp) == 0) { 887 | n1 = NA 888 | } else if (spots_positions$V2[spots_positions$V1 == n1_temp] == 0 | !(n1_temp %in% spots_clusters$barcodes)){ 889 | n1 = NA 890 | } else { 891 | n1 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n1_temp]) 892 | } 893 | } 894 | 895 | if (spots_col == 127 | spots_row == 0) { 896 | n2 = NA 897 | } else { 898 | n2_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1] 899 | if (length(n2_temp) == 0) { 900 | n2 = NA 901 | } else if (spots_positions$V2[spots_positions$V1 == n2_temp] == 0 | !(n2_temp %in% spots_clusters$barcodes)){ 902 | n2 = NA 903 | } else { 904 | n2 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n2_temp]) 905 | } 906 | } 907 | 908 | if (spots_col == 0 | spots_col == 1) { 909 | n3 = NA 910 | } else { 911 | n3_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2] 912 | if (length(n3_temp) == 0) { 913 | n3 = NA 914 | } else if (spots_positions$V2[spots_positions$V1 == n3_temp] == 0 | !(n3_temp %in% spots_clusters$barcodes)){ 915 | n3 = NA 916 | } else { 917 | n3 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n3_temp]) 918 | } 919 | } 920 | 921 | if (spots_col == 126 | spots_col == 127) { 922 | n4 = NA 923 | } else { 924 | n4_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2] 925 | if (length(n4_temp) == 0) { 926 | n4 = NA 927 | } else if (spots_positions$V2[spots_positions$V1 == n4_temp] == 0 | !(n4_temp %in% spots_clusters$barcodes)){ 928 | n4 = NA 929 | } else { 930 | n4 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n4_temp]) 931 | } 932 | } 933 | 934 | if (spots_col == 0 | spots_row == 77) { 935 | n5 = NA 936 | } else { 937 | n5_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1] 938 | if (length(n5_temp) == 0) { 939 | n5 = NA 940 | } else if (spots_positions$V2[spots_positions$V1 == n5_temp] == 0 | !(n5_temp %in% spots_clusters$barcodes)){ 941 | n5 = NA 942 | } else { 943 | n5 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n5_temp]) 944 | } 945 | } 946 | 947 | if (spots_col == 127 | spots_row == 77) { 948 | n6 = NA 949 | } else { 950 | n6_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1] 951 | if (length(n6_temp) == 0) { 952 | n6 = NA 953 | } else if (spots_positions$V2[spots_positions$V1 == n6_temp] == 0 | !(n6_temp %in% spots_clusters$barcodes)){ 954 | n6 = NA 955 | } else { 956 | n6 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n6_temp]) 957 | } 958 | } 959 | 960 | 961 | return(c(n1,n2,n3,n4,n5,n6)) 962 | 963 | }) 964 | 965 | neighbors_table = t(neighbors_table) 966 | row.names(neighbors_table) = spots_clusters$barcodes 967 | 968 | return(neighbors_table) 969 | } 970 | 971 | prox_neighbors_table_func <- function(spots_positions,spots_clusters){ 972 | neighbors_table <- sapply(spots_clusters$barcodes, function(spot){ 973 | spots_row = spots_positions[spots_positions$V1 == spot, 3] 974 | spots_col = spots_positions[spots_positions$V1 == spot, 4] 975 | 976 | if (spots_col == 0 | spots_row == 0) { 977 | n1 = NA 978 | } else { 979 | n1_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1] 980 | if (spots_positions$V2[spots_positions$V1 == n1_temp] == 0 | !(n1_temp %in% spots_clusters$barcodes)){ 981 | n1 = NA 982 | } else { 983 | n1 = n1_temp 984 | } 985 | } 986 | 987 | if (spots_col == 127 | spots_row == 0) { 988 | n2 = NA 989 | } else { 990 | n2_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1] 991 | if (spots_positions$V2[spots_positions$V1 == n2_temp] == 0 | !(n2_temp %in% spots_clusters$barcodes)){ 992 | n2 = NA 993 | } else { 994 | n2 = n2_temp 995 | } 996 | } 997 | 998 | if (spots_col == 0 | spots_col == 1) { 999 | n3 = NA 1000 | } else { 1001 | n3_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2] 1002 | if (spots_positions$V2[spots_positions$V1 == n3_temp] == 0 | !(n3_temp %in% spots_clusters$barcodes)){ 1003 | n3 = NA 1004 | } else { 1005 | n3 = n3_temp 1006 | } 1007 | } 1008 | 1009 | if (spots_col == 126 | spots_col == 127) { 1010 | n4 = NA 1011 | } else { 1012 | n4_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2] 1013 | if (spots_positions$V2[spots_positions$V1 == n4_temp] == 0 | !(n4_temp %in% spots_clusters$barcodes)){ 1014 | n4 = NA 1015 | } else { 1016 | n4 = n4_temp 1017 | } 1018 | } 1019 | 1020 | if (spots_col == 0 | spots_row == 77) { 1021 | n5 = NA 1022 | } else { 1023 | n5_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1] 1024 | if (spots_positions$V2[spots_positions$V1 == n5_temp] == 0 | !(n5_temp %in% spots_clusters$barcodes)){ 1025 | n5 = NA 1026 | } else { 1027 | n5 = n5_temp 1028 | } 1029 | } 1030 | 1031 | if (spots_col == 127 | spots_row == 77) { 1032 | n6 = NA 1033 | } else { 1034 | n6_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1] 1035 | if (spots_positions$V2[spots_positions$V1 == n6_temp] == 0 | !(n6_temp %in% spots_clusters$barcodes)){ 1036 | n6 = NA 1037 | } else { 1038 | n6 = n6_temp 1039 | } 1040 | } 1041 | 1042 | 1043 | return(c(n1,n2,n3,n4,n5,n6)) 1044 | 1045 | }) 1046 | 1047 | neighbors_table = t(neighbors_table) 1048 | row.names(neighbors_table) = spots_clusters$barcodes 1049 | 1050 | return(neighbors_table) 1051 | } 1052 | -------------------------------------------------------------------------------- /Module3_spatial_associations.R: -------------------------------------------------------------------------------- 1 | library(stringr) 2 | library(cocor) 3 | library(dplyr) 4 | library(data.table) 5 | library(stats) 6 | library(scalop) 7 | library(tibble) 8 | library(ggrepel) 9 | library(tidyr) 10 | library(scales) 11 | library(matrixStats) 12 | 13 | # Run functions first 14 | 15 | # co-localization --------------------------------------------------------- 16 | #### log-transform expression matrices 17 | 18 | sample_ls <- (read.delim("general/GBM_samples.txt", header = FALSE))$V1 19 | 20 | gen_clusters <- unique(unlist(sapply(c(1:length(sample_ls)), function(i){ 21 | mp_assign <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = "")) 22 | return(unique(mp_assign$spot_type_meta_new)) 23 | }))) 24 | mp_names <- sort(as.character(gen_clusters)) 25 | 26 | 27 | per_sample_mat <- list() 28 | for(i in seq_along(sample_ls)) { 29 | m <- readRDS(paste("general/exp_mats_GBM/", sample_ls[i], "counts.rds", sep = "")) 30 | m <- m[-grep("^MT-|^RPL|^RPS", rownames(m)), ] 31 | if(min(colSums(m)) == 0){m <- m[, colSums(m) != 0]} 32 | scaling_factor <- 1000000/colSums(m) 33 | m_CPM <- sweep(m, MARGIN = 2, STATS = scaling_factor, FUN = "*") 34 | m_loged <- log2(1 + (m_CPM/10)) 35 | # removing genes with zero variance across all cells 36 | var_filter <- apply(m_loged, 1, var) 37 | m_proc <- m_loged[var_filter != 0, ] 38 | # filtering out lowly expressed genes 39 | exp_genes <- rownames(m_proc)[(rowMeans(m_proc) > 0.4)] 40 | m_proc <- m_proc[exp_genes, ] 41 | # output to a list 42 | per_sample_mat[[i]] <- m_proc 43 | names(per_sample_mat)[i] <- sample_ls[[i]] 44 | rm(path, files, m, m_CPM, m_loged, var_filter, exp_genes, m_proc) 45 | } 46 | 47 | #### scoring based matrices 48 | 49 | metaprograms_gene_list <- readRDS("MP/clean_spatial_gbm_metaprograms_124.rds") 50 | 51 | all_decon <- lapply(c(1:length(per_sample_mat)), function(i){ 52 | m_proc<-per_sample_mat[[i]] 53 | signatures <- scalop::sigScores(m_proc, metaprograms_gene_list, expr.center = TRUE, conserved.genes = 0.5) 54 | score_vecs<-t(signatures) 55 | scored_filt <- apply(score_vecs, 2, function(x) { 56 | if(length(x[x > 0]) == 0) {prop_vec <- setNames(rep(0, nrow(score_vecs)), rownames(score_vecs))} 57 | get_positives <- x[x > 0.1] #all scores >=0.1 - consider cell state present in that spot 58 | prop_vec <- setNames(rep(0, nrow(score_vecs)), rownames(score_vecs)) 59 | prop_vec <- setNames(ifelse(rownames(score_vecs) %in% names(get_positives), yes = get_positives[match(names(prop_vec), names( get_positives))], no = 0), rownames(score_vecs)) 60 | return(prop_vec) 61 | }) 62 | #convert to df 63 | new_decon_df <- as.data.frame(t(scored_filt)) %>% rownames_to_column(var = "barcodes") 64 | new_decon_mtrx <- new_decon_df %>% 65 | tibble::column_to_rownames("barcodes") %>% 66 | as.matrix() 67 | return(new_decon_mtrx) 68 | }) 69 | names(all_decon) <- sample_ls 70 | 71 | #### co-localiztion 72 | results<-list() 73 | for(i in seq_along(names(all_decon))) { 74 | decon <- all_decon[[sample_ls[i]]] 75 | spot_list <- lapply(c(1:500),function(x){ #num. of shuffled matrices 76 | all_row <- sapply(c(1:dim(decon)[2]),function(c){ 77 | new_row <- sample(c(1:length(row.names(decon))), length(row.names(decon)), replace = FALSE) 78 | return(new_row) 79 | }) 80 | new_mat <- sapply(c(1:dim(decon)[2]),function(c){ 81 | sh_row <- decon[,c][all_row[,c]]#generate many deconv matrices by shuffling cell types 82 | return(sh_row) 83 | }) 84 | colnames(new_mat) <- colnames(decon) 85 | return(new_mat) 86 | }) 87 | 88 | #run colocal_mat_fun -- perform colocalization on the shuffled decon matrices 89 | mats_list<- lapply(spot_list, function(new_decon){ 90 | colocal_mats<-colocal_mat(new_decon) 91 | return(colocal_mats) 92 | }) 93 | mean_mat <- sapply(mats_list,function(x){ #expected colocalization calculated from shuffled decon matrices 94 | return(x[,"ab_comb"]) 95 | }) 96 | pairs_shuf_mean <- apply(mean_mat,1,mean) 97 | names(pairs_shuf_mean) <- mats_list[[1]][,"pairs"] 98 | rownames(mean_mat) <- mats_list[[1]][,"pairs"] 99 | 100 | obs_coloc <- colocal_mat(decon) 101 | 102 | r1 = obs_coloc$ab_comb #observed colocal 103 | r2 = pairs_shuf_mean #expected colocal 104 | 105 | n = dim(decon)[1] #num total spots 106 | 107 | fisher = ((0.5*log((1+r1)/(1-r1)))-(0.5*log((1+r2)/(1-r2))))/((1/(n-3))+(1/(n-3)))^0.5 108 | 109 | p.value = (2*(1-pnorm(abs(as.matrix(fisher))))) 110 | effect_size <- r1/r2 #observed/expected 111 | results[[i]]<-data.frame(pvalue = p.value, 112 | effect_size = effect_size) 113 | 114 | } 115 | names(results) <- sample_ls 116 | 117 | results2<-lapply(results, dplyr::add_rownames, 'pairs') 118 | results2<- lapply(results2,function(x){ 119 | as.matrix(x) 120 | }) 121 | results2 <- lapply(results2, function(x){rownames(x) <- x[,1]; x}) 122 | df <- do.call("rbind", results2) 123 | 124 | ###volcano plot (all pairs, all samples)### 125 | df->results_comb2 126 | results_comb2<-as.data.frame(results_comb2) 127 | results_comb2$pvalue<-as.numeric(results_comb2$pvalue) 128 | results_comb2$effect_size<-as.numeric(results_comb2$effect_size) 129 | results_comb2$sig <- "NO" 130 | results_comb2$sig[results_comb2$effect_size >= 1.3 & results_comb2$pvalue <= 0.01] <- "enriched" 131 | results_comb2$sig[results_comb2$effect_size <= 0.7 & results_comb2$pvalue <= 0.01] <- "depleted" 132 | results_comb2$label <- NA 133 | results_comb2$label[results_comb2$sig != "NO"] <- results_comb2$pairs[results_comb2$sig != "NO"] 134 | 135 | ggplot(data=results_comb2, aes(x=effect_size, y=-log10(pvalue), col=sig, label=label)) + 136 | geom_point() + 137 | theme_minimal() + 138 | geom_text_repel()+ 139 | theme(text = element_text(size = 20)) 140 | 141 | # Adjacency -------------------------------------------------------------- 142 | 143 | sample_ls <- (read.delim("general/GBM_samples.txt", header = FALSE))$V1 144 | 145 | gen_clusters <- as.character(unique(unlist(sapply(c(1:length(sample_ls)), function(i){ 146 | mp_assign <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = "")) 147 | return(unique(mp_assign$spot_type_meta_new)) 148 | })))) 149 | 150 | all_zones <- readRDS("Spatial_coh_zones/final_zones.rds") 151 | 152 | extend_metadata <- tibble() 153 | generate_metadata <- sapply(c(1:length(sample_ls)), function(i){ 154 | print(sample_ls[i]) 155 | 156 | # load data 157 | spots_positions <- read.csv(paste("general/GBM_data/", sample_ls[i] , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE) 158 | row.names(spots_positions) <- spots_positions$V1 159 | 160 | spots_clusters <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = "")) 161 | spots_clusters <- na.omit(spots_clusters) 162 | colnames(spots_clusters) <- c("barcodes", "spot_type") 163 | row.names(spots_clusters)<- spots_clusters$barcodes 164 | 165 | 166 | metadata <- tibble(Key = paste(sample_ls[i],spots_clusters$barcodes, sep="_"), 167 | SpotID = spots_clusters$barcodes, 168 | Sample = rep(sample_ls[i], nrow(spots_clusters)), 169 | MPid = spots_clusters$spot_type, 170 | array_row = spots_positions[spots_clusters$barcodes,"V3"], 171 | array_col = spots_positions[spots_clusters$barcodes, "V4"], 172 | pxl_in_rows = spots_positions[spots_clusters$barcodes, "V5"], 173 | pxl_in_cols = spots_positions[spots_clusters$barcodes, "V6"], 174 | Zone = as.character(all_zones[[i]][spots_clusters$barcodes])) 175 | 176 | extend_metadata <<- rbind.data.frame(extend_metadata, metadata) 177 | }) 178 | 179 | set.seed(50) 180 | neighbs_stats <- neighbor_spot_props(metadata = extend_metadata, 181 | zone = "All", 182 | #site = "All", 183 | samples = "All", 184 | #zone_by = "EpiStroma", 185 | n_cores = 30, 186 | plot_perm_distr = TRUE, 187 | n_perm = 10000, 188 | filter_signif = TRUE, 189 | zscore_thresh = 1) 190 | 191 | 192 | 193 | 194 | # regional comp (Previously run by server. Possible to run per sample below)---------------------------------------------------------- 195 | 196 | #!/usr/bin/env Rscript 197 | #args = commandArgs(trailingOnly = TRUE) 198 | 199 | #sname <- as.character(args[1]) 200 | #sname <- str_replace(sname, "\r", "") 201 | #print(paste("I got the samp right", sname)) 202 | 203 | sname <- "" #insert sample name 204 | file_list <- list.files("MP/mp_assign_124/") 205 | gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){ 206 | mp_assign <- readRDS(paste("MP/mp_assign_124/", file_list[i], sep = "")) 207 | return(unique(mp_assign$spot_type_meta_new)) 208 | })))) 209 | 210 | max_win_size <- 15 211 | pairs <- combn(sort(gen_clusters),2) 212 | pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))}) 213 | 214 | # load data 215 | spots_positions <- read.csv(paste("general/GBM_data/", sname , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE) 216 | row.names(spots_positions) <- spots_positions$V1 217 | 218 | spots_clusters <- readRDS(paste("MP/mp_assign_124/", sname, ".rds", sep = "")) 219 | spots_clusters <- na.omit(spots_clusters) 220 | colnames(spots_clusters) <- c("barcodes", "spot_type") 221 | row.names(spots_clusters)<- spots_clusters$barcodes 222 | 223 | neighbors_table <- prox_neighbors_table_func(spots_positions,spots_clusters) 224 | 225 | all_spots <- spots_clusters$barcodes 226 | 227 | all_pval_windows <- sapply(c(1:max_win_size), function(win_size){ 228 | proximity <- t(sapply(all_spots, function(spot){ 229 | win_spots <- c(spot) 230 | sapply(c(1:win_size), function(i){ 231 | win_spots <<- unique(c(win_spots,unique(na.omit(as.character(neighbors_table[win_spots,]))))) 232 | }) 233 | win_abund <- table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots])/sum(table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots])) 234 | return(win_abund) 235 | })) 236 | 237 | old_clusters <- colnames(proximity) 238 | add_clusters <- gen_clusters[!gen_clusters %in% old_clusters] 239 | 240 | if (length(add_clusters) != 0){ 241 | sapply(add_clusters,function(clust){ 242 | proximity <<- cbind(proximity,rep(0,dim(proximity)[1])) 243 | }) 244 | colnames(proximity)[c((length(old_clusters)+1):dim(proximity)[2])] <- add_clusters 245 | } 246 | 247 | proximity <- proximity[,sort(colnames(proximity))] 248 | 249 | is_one <- apply(proximity,1,function(ro){ 250 | return(1 %in% ro ) 251 | }) 252 | 253 | proximity <- proximity[!is_one,] 254 | 255 | all_cor <- sapply(c(1:dim(pairs)[2]), function(j){ 256 | pair_cor <- cor(proximity[,pairs[1,j]], proximity[,pairs[2,j]]) 257 | return(pair_cor) 258 | }) 259 | 260 | all_cor <- data.frame(pair_cors = all_cor) 261 | row.names(all_cor) <- pairs_names 262 | return(all_cor) 263 | 264 | }) 265 | sample_proximity <- as.data.frame(all_pval_windows) 266 | row.names(sample_proximity) <- pairs_names 267 | 268 | 269 | 270 | # regional comp random (Previously run by server. Possible to run per sample below) --------------------------------------------------- 271 | 272 | #!/usr/bin/env Rscript 273 | #args = commandArgs(trailingOnly = TRUE) 274 | 275 | #sname <- as.character(args[1]) 276 | #sname <- str_replace(sname, "\r", "") 277 | 278 | sname <- "" #insert sample name 279 | file_list <- list.files("MP/mp_assign_124/") 280 | gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){ 281 | mp_assign <- readRDS(paste("MP/mp_assign_124/", file_list[i], sep = "")) 282 | return(unique(mp_assign$spot_type_meta_new)) 283 | })))) 284 | 285 | max_win_size <- 15 286 | pairs <- combn(sort(gen_clusters),2) 287 | pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))}) 288 | rand_num <- 500 289 | 290 | # load data 291 | spots_positions_orign <- read.csv(paste("general/GBM_data/", sname , "/outs/spatial/tissue_positions_list.csv", sep = ""), header = FALSE, stringsAsFactors = FALSE) 292 | row.names(spots_positions_orign) <- spots_positions_orign$V1 293 | 294 | spots_clusters <- readRDS(paste("MP/mp_assign_124/", sname, ".rds", sep = "")) 295 | spots_clusters <- na.omit(spots_clusters) 296 | colnames(spots_clusters) <- c("barcodes", "spot_type") 297 | row.names(spots_clusters)<- spots_clusters$barcodes 298 | 299 | all_rand <- lapply(c(1:rand_num),function(j){ 300 | new_pos_all <- sample(spots_positions_orign$V1[spots_positions_orign$V2 != 0], length(spots_positions_orign$V1[spots_positions_orign$V2 != 0]), replace = FALSE) 301 | spots_positions <- spots_positions_orign 302 | spots_positions$V1[spots_positions$V2 != 0] <- new_pos_all 303 | 304 | neighbors_table <- prox_neighbors_table_func(spots_positions,spots_clusters) 305 | 306 | all_spots <- spots_clusters$barcodes 307 | 308 | 309 | all_pval_windows <- sapply(c(1:max_win_size), function(win_size){ 310 | proximity <- t(sapply(all_spots, function(spot){ 311 | win_spots <- c(spot) 312 | sapply(c(1:win_size), function(i){ 313 | win_spots <<- unique(c(win_spots,unique(na.omit(as.character(neighbors_table[win_spots,]))))) 314 | }) 315 | win_abund <- table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots])/sum(table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots])) 316 | return(win_abund) 317 | })) 318 | 319 | old_clusters <- colnames(proximity) 320 | add_clusters <- gen_clusters[!gen_clusters %in% old_clusters] 321 | 322 | if (length(add_clusters) != 0){ 323 | sapply(add_clusters,function(clust){ 324 | proximity <<- cbind(proximity,rep(0,dim(proximity)[1])) 325 | }) 326 | colnames(proximity)[c((length(old_clusters)+1):dim(proximity)[2])] <- add_clusters 327 | } 328 | 329 | proximity <- proximity[,sort(colnames(proximity))] 330 | 331 | is_one <- apply(proximity,1,function(ro){ 332 | return(1 %in% ro ) 333 | }) 334 | 335 | proximity <- proximity[!is_one,] 336 | 337 | all_cor <- sapply(c(1:dim(pairs)[2]), function(j){ 338 | pair_cor <- cor(proximity[,pairs[1,j]], proximity[,pairs[2,j]]) 339 | return(pair_cor) 340 | }) 341 | 342 | all_cor <- data.frame(pair_cors = all_cor) 343 | row.names(all_cor) <- pairs_names 344 | return(all_cor) 345 | 346 | }) 347 | sample_proximity <- as.data.frame(all_pval_windows) 348 | row.names(sample_proximity) <- pairs_names 349 | return(sample_proximity) 350 | }) 351 | sample_mean_rand_prox <- Reduce("+", all_rand) / length(all_rand) 352 | 353 | sample_sd_rand_prox <- round(apply(array(unlist(all_rand), c(length(pairs_names), max_win_size, rand_num)), c(1,2), sd),4) 354 | 355 | 356 | 357 | # regional comp downstream ------------------------------------------------ 358 | 359 | file_list <- list.files("MP/mp_assign_124/") 360 | gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){ 361 | mp_assign <- readRDS(paste("MP/mp_assign_124/", file_list[i], sep = "")) 362 | return(unique(mp_assign$spot_type_meta_new)) 363 | })))) 364 | 365 | samp_list <- list.files("MP/mp_assign_124/") 366 | 367 | pairs <- combn(sort(gen_clusters),2) 368 | pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))}) 369 | all_zones <- readRDS("Spatial_coh_zones/spatial_zonesv3.rds") 370 | 371 | all_proximity_list <- list.files("Spatial_coh_zones/proximity_samples/") 372 | 373 | all_proximity <- lapply(all_proximity_list, function(prox){ 374 | samp_prox <- readRDS(paste("Spatial_coh_zones/proximity_samples/", prox, sep = "")) 375 | return(samp_prox) 376 | }) 377 | names(all_proximity) <- sapply(str_split(all_proximity_list, "_"), function(x){return(x[1])}) 378 | 379 | all_proximity_rand_list <- list.files("Spatial_coh_zones/proximity_rand_samples/") 380 | 381 | all_proximity_rand <- lapply(all_proximity_rand_list, function(prox){ 382 | samp_prox <- readRDS(paste("Spatial_coh_zones/proximity_rand_samples/", prox, sep = "")) 383 | return(samp_prox) 384 | }) 385 | names(all_proximity_rand) <- sapply(str_split(all_proximity_rand_list, "_"), function(x){return(x[1])}) 386 | 387 | 388 | combined_proximity <- t(sapply(c(1:length(pairs_names)), function(i){ 389 | pair_prox <- sapply(all_proximity, function(x){ 390 | pair_sample_df <- as.data.frame(x[i,c(1:15)]) 391 | colnames(pair_sample_df) <- c(as.character(c(1:15))) 392 | return(pair_sample_df) 393 | }) 394 | return(apply(pair_prox,1,function(x){mean(na.omit(as.numeric(x)))})) 395 | })) 396 | 397 | row.names(combined_proximity) <- row.names(all_proximity[[1]]) 398 | 399 | 400 | 401 | Heatmap(na.omit(combined_proximity), cluster_columns = FALSE, column_title = "Metaprograms Proximity", 402 | row_names_gp = grid::gpar(fontsize = 5), name = "proximity", show_row_names = T, show_row_dend = F) 403 | # regional comp downstream significant --------------------------------------------------- 404 | 405 | spots_numv1 <-sapply(samp_list, function(smp){ 406 | samp_df <- readRDS(paste("MP/mp_assign_124/", smp, sep = "")) 407 | return(nrow(samp_df)) 408 | }) 409 | names(spots_numv1) <- sapply(str_split(names(spots_numv1), "\\."), function(x){return(x[1])}) 410 | 411 | spots_num <-sapply(samp_list, function(smp){ 412 | samp_df <- readRDS(paste("MP/mp_assign_124/", smp, sep = "")) 413 | pairs_n <- sapply(c(1:ncol(pairs)),function(p){ 414 | p_table <- samp_df[samp_df$spot_type_meta_new %in% c(pairs[,p]),] 415 | return(nrow(p_table)) 416 | }) 417 | return(pairs_n) 418 | }) 419 | colnames(spots_num) <- sapply(str_split(colnames(spots_num), "\\."), function(x){return(x[1])}) 420 | 421 | 422 | 423 | proximity_bin <- lapply(c(1:26),function(i){ 424 | r1 = all_proximity[[i]] 425 | r2 = all_proximity_rand[[i]] 426 | 427 | n_df = as.data.frame(spots_num[,names(all_proximity)[i]]) 428 | n_df <- cbind(n_df, rep(n_df,14)) 429 | 430 | fisher = ((0.5*log((1+r1)/(1-r1)))-(0.5*log((1+r2)/(1-r2))))/((1/(n_df-3))+(1/(n_df-3)))^0.5 431 | 432 | p.value = (2*(1-pnorm(abs(as.matrix(fisher))))) 433 | colnames(p.value) <- as.character(1:15) 434 | bin_pval <- ifelse(p.value < 0.0000000001,1,NA) 435 | # bin_pval <- ifelse(p.value < 0.00001,1,0) 436 | rbin <- r1*bin_pval 437 | return(rbin) 438 | }) 439 | names(proximity_bin) <- names(all_proximity) 440 | 441 | ######## Functions --------------------------------------------------------------- 442 | 443 | 444 | 445 | # # co-localization functions --------------------------------------------- 446 | # Count interactions between pairs 447 | colocal_mat <- function(x) { 448 | stopifnot( 449 | is.matrix(x), is.numeric(x), 450 | all(dim(x) > 0), ncol(x) > 1) 451 | 452 | if (is.null(colnames(x))) { 453 | colnames(x) <- seq_len(ncol(x)) 454 | } 455 | df <- calc_pairs(x) 456 | df <- compute_interactions(x, df) 457 | return(df) 458 | } 459 | 460 | calc_pairs <- function(x) { 461 | x <- x > 0 462 | ab <- combn(colnames(x), 2) 463 | y <- apply(ab, 2, function(.) sum(matrixStats::rowAlls(x[, ., drop = FALSE]))) 464 | df <- data.frame(t(ab), y) 465 | names(df) <- c("pair1", "pair2", "n") 466 | return(df) 467 | } 468 | 469 | compute_interactions <- function(x, df) { 470 | y <- colnames(x) 471 | df$a <- factor(df$pair1, levels = y) 472 | df$b <- factor(df$pair2, levels = rev(y)) 473 | t <- colSums(x > 0) 474 | a <- match(df$pair1, y) 475 | b <- match(df$pair2, y) 476 | df$ta <- t[a] 477 | df$tb <- t[b] 478 | df$pa <- df$n / df$ta 479 | df$pb <- df$n / df$tb 480 | df$ab_mean <- (df$pa + df$pb) / 2 481 | df$ab_comb <- df$n / (df$ta + df$tb) 482 | df$ab_comb2 <- df$n / (df$ta * df$tb) 483 | df$pairs <- paste(df$a, df$b) 484 | return(df) 485 | } 486 | 487 | # Adjacency Helper Functions ------------------------------------------- 488 | 489 | 490 | .neighbors_table_func <- function(metadata, spot_class = "MPid", spot_name = "Key") { 491 | if(is.factor(metadata[[spot_class]])) {metadata[[spot_class]] <- as.character(metadata[[spot_class]])} 492 | neighbors_table <- sapply(metadata[[spot_name]], function(spot) { 493 | spots_row = metadata$array_row[metadata[[spot_name]] == spot] 494 | spots_col = metadata$array_col[metadata[[spot_name]] == spot] 495 | 496 | n1_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) - 1 & metadata$array_col == as.numeric(spots_col) - 1] 497 | if(length(n1_temp) == 0) { 498 | n1 = NA 499 | } else { 500 | n1 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n1_temp]) 501 | } 502 | 503 | n2_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) - 1 & metadata$array_col == as.numeric(spots_col) + 1] 504 | if (length(n2_temp) == 0) { 505 | n2 = NA 506 | } else { 507 | n2 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n2_temp]) 508 | } 509 | 510 | n3_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) & metadata$array_col == as.numeric(spots_col) - 2] 511 | if (length(n3_temp) == 0) { 512 | n3 = NA 513 | } else { 514 | n3 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n3_temp]) 515 | } 516 | 517 | n4_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) & metadata$array_col == as.numeric(spots_col) + 2] 518 | if (length(n4_temp) == 0) { 519 | n4 = NA 520 | } else { 521 | n4 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n4_temp]) 522 | } 523 | 524 | n5_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) + 1 & metadata$array_col == as.numeric(spots_col) - 1] 525 | if (length(n5_temp) == 0) { 526 | n5 = NA 527 | } else { 528 | n5 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n5_temp]) 529 | } 530 | 531 | n6_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) + 1 & metadata$array_col == as.numeric(spots_col) + 1] 532 | if (length(n6_temp) == 0) { 533 | n6 = NA 534 | } else { 535 | n6 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n6_temp]) 536 | } 537 | 538 | return(c(n1, n2, n3, n4, n5, n6)) 539 | }) 540 | 541 | neighbors_table = t(neighbors_table) 542 | rownames(neighbors_table) = metadata[[spot_name]] 543 | return(neighbors_table) 544 | } 545 | 546 | 547 | .prog_connectivity_score <- function(program_neighbors, state) { 548 | state_neighbors_bin <- ifelse(program_neighbors == state, 0, 1) 549 | if(is.null(dim(state_neighbors_bin))) { 550 | prog_connect <- 1 551 | } else { 552 | prog_connect <- length(which(apply(state_neighbors_bin, 1, function(x) {sum(na.omit(x))}) > 0)) 553 | } 554 | return(prog_connect) 555 | } 556 | 557 | 558 | .calc_adj_mat <- function(neighbored_state, spots_states, state_neighbors_table, spot_class = "MPid") { 559 | if(!(neighbored_state %in% spots_states[[spot_class]])) { 560 | return(0) 561 | } else { 562 | state_neighbors_bin <- ifelse(state_neighbors_table == neighbored_state, 1, 0) 563 | if(is.null(dim(state_neighbors_bin))) { 564 | state_neighbors_sum <- sum(na.omit(state_neighbors_bin)) 565 | } else { 566 | state_neighbors_sum <- sum(apply(state_neighbors_bin, 1, function(x) {sum(na.omit(x))})) 567 | } 568 | return(state_neighbors_sum) 569 | } 570 | } 571 | 572 | 573 | 574 | # Adjacency Bootstrapping Method --------------------------------------- 575 | 576 | 577 | # The metadata supplied to the function must contain spatial coordinate information! 578 | calc_spatial_neighborhood <- function(metadata, 579 | spot_class = "MPid", 580 | spot_name = "Key", 581 | samples = "all", 582 | iter = 20) { 583 | 584 | # Check inputs 585 | if(is.factor(metadata[[spot_class]])) { 586 | all_labels <- na.omit(levels(metadata[[spot_class]])) 587 | } else if(is.character(metadata[[spot_class]])) { 588 | all_labels <- na.omit(unique(metadata[[spot_class]])) 589 | metadata[[spot_class]] <- as.factor(metadata[[spot_class]]) 590 | } else {stop("spot_class should either be a factor or character vector!")} 591 | stopifnot("Error: metadata must contain a column specifying sample name." = 592 | any(grepl("sample*", colnames(metadata), ignore.case = TRUE))) 593 | metadata$enumerate <- metadata[[grep("sample*", colnames(metadata), ignore.case = TRUE)]] 594 | if(length(samples) == 1 && samples == "all") { 595 | samples <- unique(metadata$enumerate) 596 | } else if((length(samples) > 1 || samples != "all") & all(samples %in% unique(metadata$enumerate))) { 597 | samples <- samples 598 | } else {stop("samples supplied do not match the samples present in the metadata!")} 599 | 600 | message("This proccess may take a while...") 601 | connectivity <- lapply(seq_along(samples), function(i) { 602 | message(paste0("Start proccessing sample: ", samples[[i]])) 603 | samp_meta <- metadata[metadata$enumerate == samples[[i]], ] 604 | 605 | ## Construct Neighbors tables 606 | # Randomized spot ids & position table 607 | rand_neighbors_table <- lapply(seq_len(iter), function(j) { 608 | shuffled_spots <- sample(samp_meta[[spot_name]], length(samp_meta[[spot_name]]), replace = FALSE) 609 | shuffled_meta <- samp_meta %>% dplyr::mutate({{spot_name}} := shuffled_spots) 610 | neighbors_table <- .neighbors_table_func(shuffled_meta, spot_class = spot_class, spot_name = spot_name) 611 | return(neighbors_table) 612 | }) 613 | 614 | # Bootstrapped spots class identity 615 | boots_neighbors_table <- lapply(seq_len(iter), function(j) { 616 | new_spots <- unique(sample(samp_meta[[spot_name]], length(samp_meta[[spot_name]]), replace = TRUE)) 617 | bootsteped_meta <- samp_meta[samp_meta[[spot_name]] %in% new_spots, ] 618 | neighbors_table <- .neighbors_table_func(bootsteped_meta, spot_class = spot_class, spot_name = spot_name) 619 | return(neighbors_table) 620 | }) 621 | 622 | # Actual (observed) neighborhood table 623 | neighbors_table <- .neighbors_table_func(samp_meta, spot_class = spot_class, spot_name = spot_name) 624 | 625 | # Calculate connectivity scores (spatial coherence) 626 | programs_connectivity_score <- sapply(sort(all_labels), function(cluster) { 627 | if (!(cluster %in% samp_meta[[spot_class]])) { 628 | prog_score <- NaN 629 | } else { 630 | program_neighbors_table = neighbors_table[rownames(neighbors_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == cluster & !is.na(samp_meta[[spot_class]])], ] 631 | prog_score <- .prog_connectivity_score(program_neighbors_table, cluster) 632 | } 633 | return(prog_score) 634 | }) 635 | 636 | 637 | ## Connectivity 638 | boots_adj_mat <- lapply(boots_neighbors_table, function(b_table) { 639 | obs_adj_mat <- sapply(sort(all_labels), function(cluster) { 640 | if (!(cluster %in% samp_meta[[spot_class]])) { 641 | zero_neigh <- rep(0, length(all_labels)) 642 | names(zero_neigh) <- all_labels 643 | return(zero_neigh) 644 | } else { 645 | cluster_neighbors_table = b_table[rownames(b_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == cluster & !is.na(samp_meta[[spot_class]])], ] 646 | num_of_neighbores = sapply(sort(as.character(all_labels)), function(neighbored_cluster) { 647 | num <- .calc_adj_mat(neighbored_cluster, samp_meta, cluster_neighbors_table) 648 | return(num) 649 | }) 650 | return(num_of_neighbores) 651 | }}) 652 | 653 | diag(obs_adj_mat) <- 0 654 | weighted_adj_mat <- apply(obs_adj_mat, 2, function(x) {x / sum(x)}) 655 | 656 | comp4mat <- programs_connectivity_score[colnames(weighted_adj_mat)] 657 | comp4mat[is.na(comp4mat)] <- 0 658 | weighted_denominator_v2 <- sapply(c(names(comp4mat)), function(prog){ 659 | new_comp <- comp4mat 660 | new_comp[prog] <- 0 661 | new_comp <- new_comp / sum(new_comp) 662 | return(new_comp) 663 | }) 664 | 665 | norm_adj_mat <- weighted_adj_mat / weighted_denominator_v2 666 | 667 | upper_mat <- (norm_adj_mat[upper.tri(norm_adj_mat)] + t(norm_adj_mat)[upper.tri(t(norm_adj_mat))]) / 2 668 | lower_mat <- rep(NaN, length(upper_mat)) 669 | avg_mat <- norm_adj_mat 670 | avg_mat[upper.tri(avg_mat)] <- upper_mat 671 | avg_mat[lower.tri(avg_mat)] <- lower_mat 672 | 673 | rownames(avg_mat) <- colnames(avg_mat) 674 | avg_mat <- t(avg_mat) 675 | avg_mat <- as.data.frame(avg_mat) 676 | avg_mat$pair2 <- rownames(avg_mat) 677 | long <- reshape2::melt(data.table::setDT(avg_mat), id.vars = c("pair2"), variable.name = "pair1") 678 | return(long) 679 | }) 680 | 681 | 682 | # Random connectivity 683 | rand_adj_mat <- lapply(rand_neighbors_table, function(b_table) { 684 | obs_adj_mat <- sapply(sort(all_labels), function(cluster) { 685 | if (!(cluster %in% samp_meta[[spot_class]])) { 686 | return(rep(0, length(all_labels))) 687 | } else { 688 | cluster_neighbors_table = b_table[row.names(b_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == cluster & !is.na(samp_meta[[spot_class]])], ] 689 | num_of_neighbores = sapply(sort(as.character(all_labels)), function(neighbored_cluster) { 690 | num <- .calc_adj_mat(neighbored_cluster, samp_meta, cluster_neighbors_table) 691 | return(num) 692 | }) 693 | return(num_of_neighbores) 694 | }}) 695 | 696 | diag(obs_adj_mat) <- 0 697 | weighted_adj_mat <- apply(obs_adj_mat, 2, function(x) {x / sum(x)}) 698 | 699 | comp4mat <- programs_connectivity_score[colnames(weighted_adj_mat)] 700 | comp4mat[is.na(comp4mat)] <- 0 701 | weighted_denominator_v2 <- sapply(c(names(comp4mat)), function(prog) { 702 | new_comp <- comp4mat 703 | new_comp[prog] <- 0 704 | new_comp <- new_comp / sum(new_comp) 705 | return(new_comp) 706 | }) 707 | 708 | norm_adj_mat <- weighted_adj_mat / weighted_denominator_v2 709 | 710 | upper_mat <- (norm_adj_mat[upper.tri(norm_adj_mat)] + t(norm_adj_mat)[upper.tri(t(norm_adj_mat))]) / 2 711 | lower_mat <- rep(NaN, length(upper_mat)) 712 | avg_mat <- norm_adj_mat 713 | avg_mat[upper.tri(avg_mat)] <- upper_mat 714 | avg_mat[lower.tri(avg_mat)] <- lower_mat 715 | 716 | rownames(avg_mat) <- colnames(avg_mat) 717 | avg_mat <- t(avg_mat) 718 | avg_mat <- as.data.frame(avg_mat) 719 | avg_mat$pair2 <- rownames(avg_mat) 720 | long <- reshape2::melt(data.table::setDT(avg_mat), id.vars = c("pair2"), variable.name = "pair1") 721 | return(long) 722 | }) 723 | 724 | final_adj_mat <- data.frame(pair1 = boots_adj_mat[[1]]$pair1, 725 | pair2 = boots_adj_mat[[1]]$pair2, 726 | connectivity = apply(sapply(boots_adj_mat, function(x) {return(x$value)}), 1, function(k) {mean(na.omit(k))}), 727 | effect_size = apply(sapply(boots_adj_mat, function(x) {return(x$value)}), 1, function(k) {mean(na.omit(k))}) / apply(sapply(rand_adj_mat, function(x) {return(x$value)}), 1, function(k) {mean(na.omit(k))}), 728 | sd = apply(sapply(boots_adj_mat, function(x) {return(x$value)}), 1, function(k) {sd(na.omit(k))})) 729 | 730 | 731 | ## Add p-value 732 | pval <- sapply(seq_len(nrow(final_adj_mat)), function(j1) { 733 | obs <- na.omit(as.numeric(sapply(seq_len(iter), function(j2) { 734 | return(boots_adj_mat[[j2]][j1, "value"]) 735 | }))) 736 | exp <- na.omit(as.numeric(sapply(seq_len(iter), function(j2) { 737 | return(rand_adj_mat[[j2]][j1, "value"]) 738 | }))) 739 | if (length(obs) == 0) { 740 | return(NA) 741 | } else { 742 | t.res <- t.test(obs, exp, alternative = "two.sided", var.equal = FALSE) 743 | return(t.res$p.value) 744 | } 745 | }) 746 | 747 | final_adj_mat$pval <- pval 748 | return(final_adj_mat) 749 | }) 750 | names(connectivity) <- samples 751 | return(connectivity) 752 | } 753 | 754 | 755 | 756 | 757 | 758 | # Adjacency Permutation Method ---------------------------------------------------- 759 | 760 | # The metadata supplied to the function must contain spatial coordinate information! 761 | neighbor_spot_props <- function(metadata, 762 | zone = "All", 763 | #site = "All", 764 | samples = "All", 765 | spot_class = "MPid", 766 | spot_name = "Key", 767 | #zone_by = "EpiStroma", 768 | n_cores = 10, 769 | n_perm = 1000, 770 | signif_val = 0.01, 771 | plot_perm_distr = TRUE, 772 | filter_signif = TRUE, 773 | zscore_thresh = 1) { 774 | # Load variables 775 | all_states <- as.character(unique(unlist(sapply(c(1:length(sample_ls)), function(i){ 776 | mp_assign <- readRDS(paste("MP/mp_assign_124/", sample_ls[i], ".rds", sep = "")) 777 | return(unique(mp_assign$spot_type_meta_new)) 778 | })))) 779 | #samples_metadata <- readRDS(file = here("Analysis/Metadata/samples_metadata.rds")) 780 | neighbs_stats_ls <- list() 781 | signif_neighbs_ls <- list() 782 | distr_plots_ls <- list() 783 | 784 | # Filter metadata input by selected site and zone 785 | stopifnot("Error: metadata must contain a column specifying sample name." = 786 | any(grepl("sample*", colnames(metadata), ignore.case = TRUE))) 787 | # if(site != "All") { 788 | # stopifnot("Error: Site argument must be one of the following: 'Laryngeal' / 'Oral' / 'Oropharynx'." = 789 | # site %in% unique(samples_metadata$Site)) 790 | # metadata <- metadata[metadata$Sample %in% samples_metadata$Sample[samples_metadata$Site == site], ] 791 | # } 792 | if(zone != "All") { 793 | #stopifnot("Error: Argument `zone_by` must be either 'EpiStroma' (for separating Epithelial, Stromal or Mixed spots) or 'Zone' (for separating Epithelial zonation)." = 794 | # zone_by %in% c("EpiStroma", "Zone") & length(zone_by) == 1) 795 | # if(zone_by == "EpiStroma") { 796 | # stopifnot("Error: Argument `zone` should specify on which tumor region neighboring states will be computed - 'Epithelial' / 'Stroma' / 'Mixed'." = 797 | # zone %in% unique(metadata$EpiStroma) & length(zone) == 1) 798 | # metadata <- metadata[metadata$EpiStroma == zone, ] 799 | # } 800 | # if(zone_by == "Zone") { 801 | # stopifnot("Error: The variable `Zone` is not found in the metadata. Run classify_zones function first." = 802 | # any(colnames(metadata) %in% "Zone")) 803 | # stopifnot("Error: Argument `zone` should specify on which Epithelial zone neighboring states will be computed - 'Zone_1' / 'Zone_2' / 'Zone_3'." = 804 | # zone %in% unique(metadata$Zone) & length(zone) == 1) 805 | metadata <- metadata[metadata$Zone == zone, ] 806 | # } 807 | } 808 | metadata$enumerate <- metadata[[grep("sample*", colnames(metadata), ignore.case = TRUE)]] 809 | if(length(samples) == 1 && samples == "All") { 810 | samples <- unique(metadata$enumerate) 811 | } else if((length(samples) > 1 || samples != "All") & all(samples %in% unique(metadata$enumerate))) { 812 | samples <- samples 813 | } else {stop("samples supplied do not match the samples present in the metadata!")} 814 | 815 | for(samp in samples) { 816 | message(paste0("Processing sample: ", samp)) 817 | 818 | ### ============ Actual state pairs connectivity values ============ 819 | # Construct neighboring table for the sample 820 | samp_meta <- metadata[metadata$Sample == samp, ] 821 | neighbors_table <- .neighbors_table_func(samp_meta, spot_class = spot_class, spot_name = spot_name) 822 | 823 | # Calculate connectivity scores (spatial coherence) 824 | programs_connectivity_score <- sapply(sort(all_states), function(state) { 825 | if (!(state %in% samp_meta[[spot_class]])) { 826 | prog_score <- NaN 827 | } else { 828 | program_neighbors_table = neighbors_table[rownames(neighbors_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == state & !is.na(samp_meta[[spot_class]])], ] 829 | prog_score <- .prog_connectivity_score(program_neighbors_table, state) 830 | } 831 | return(prog_score) 832 | }) 833 | 834 | # Count neighboring states for each spot (number of free (non-coherent) X state classified spots that neighbor free Y reference-state) 835 | obs_adj_mat <- sapply(sort(all_states), function(state) { 836 | if (!(state %in% samp_meta[[spot_class]])) { 837 | zero_neigh <- rep(0, length(all_states)) 838 | names(zero_neigh) <- all_states 839 | return(zero_neigh) 840 | } else { 841 | state_neighbors_table = neighbors_table[rownames(neighbors_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == state & !is.na(samp_meta[[spot_class]])], ] 842 | num_of_neighbors = sapply(sort(as.character(all_states)), function(neighbored_state) { 843 | num <- .calc_adj_mat(neighbored_state, samp_meta, state_neighbors_table) 844 | return(num) 845 | }) 846 | return(num_of_neighbors) 847 | }}) 848 | diag(obs_adj_mat) <- 0 849 | weighted_adj_mat <- apply(obs_adj_mat, 2, function(x) {x / sum(x)}) 850 | 851 | # Calculate corrected proportion of neighboring spots 852 | comp4mat <- programs_connectivity_score[colnames(weighted_adj_mat)] 853 | comp4mat[is.na(comp4mat)] <- 0 854 | weighted_denominator_v2 <- sapply(c(names(comp4mat)), function(prog){ 855 | new_comp <- comp4mat 856 | new_comp[prog] <- 0 857 | new_comp <- new_comp / sum(new_comp) 858 | return(new_comp) 859 | }) 860 | 861 | norm_adj_mat <- weighted_adj_mat / weighted_denominator_v2 862 | baseline_stat <- Melt(norm_adj_mat) 863 | 864 | ### ============ Permuted state pairs connectivity values ============ 865 | # Permute neighbor table `n_perm` times, to create a permuted sampling distribution which will serve as the null distribution 866 | permute_neighbs <- parallel::mclapply(1:n_perm, function(x) { 867 | shuffled_spots <- sample(samp_meta[[spot_name]], length(samp_meta[[spot_name]]), replace = FALSE) 868 | shuffled_meta <- samp_meta %>% dplyr::mutate({{spot_name}} := shuffled_spots) 869 | neighbors_table <- .neighbors_table_func(shuffled_meta, spot_class = spot_class, spot_name = spot_name) 870 | neighbors_table <- neighbors_table[match(samp_meta[[spot_name]], rownames(neighbors_table)), ] 871 | 872 | obs_adj_mat <- sapply(sort(all_states), function(state) { 873 | if (!(state %in% samp_meta[[spot_class]])) { 874 | zero_neigh <- rep(0, length(all_states)) 875 | names(zero_neigh) <- all_states 876 | return(zero_neigh) 877 | } else { 878 | state_neighbors_table = neighbors_table[rownames(neighbors_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == state & !is.na(samp_meta[[spot_class]])], ] 879 | num_of_neighbors = sapply(sort(as.character(all_states)), function(neighbored_state) { 880 | num <- .calc_adj_mat(neighbored_state, samp_meta, state_neighbors_table) 881 | return(num) 882 | }) 883 | return(num_of_neighbors) 884 | }}) 885 | diag(obs_adj_mat) <- 0 886 | weighted_adj_mat <- apply(obs_adj_mat, 2, function(x) {x / sum(x)}) 887 | 888 | comp4mat <- programs_connectivity_score[colnames(weighted_adj_mat)] 889 | comp4mat[is.na(comp4mat)] <- 0 890 | weighted_denominator_v2 <- sapply(c(names(comp4mat)), function(prog){ 891 | new_comp <- comp4mat 892 | new_comp[prog] <- 0 893 | new_comp <- new_comp / sum(new_comp) 894 | return(new_comp) 895 | }) 896 | 897 | norm_adj_mat <- weighted_adj_mat / weighted_denominator_v2 898 | perm_stat <- Melt(norm_adj_mat) 899 | }, mc.cores = n_cores) 900 | 901 | perm_stats <- cbind.data.frame(permute_neighbs[[1]]$rows, permute_neighbs[[1]]$cols, do.call(cbind.data.frame, lapply(permute_neighbs, function(y) y$vals))) %>% 902 | magrittr::set_colnames(c("rows", "cols", paste0("perm_", seq(n_perm)))) 903 | 904 | 905 | 906 | # Compare observed neighboring state proportion to the permuted null distribution - extract Neighbor-proportion, P-value & Permuted distribution statistics 907 | # merged_df <- merge(baseline_stat, perm_stats, by = c("rows", "cols")) 908 | merged_df <- na.omit(merge(baseline_stat, perm_stats, by = c("rows", "cols"))) 909 | scaled_vals <- merged_df %>% dplyr::select(-c("rows", "cols")) %>% t() %>% scale() %>% t() %>% 910 | as.data.frame() %>% dplyr::mutate(rows = merged_df$rows, cols = merged_df$cols, .before = 1) %>% dplyr::pull(vals) 911 | neighbs_stats <- lapply(seq_len(nrow(merged_df)), function(i) { 912 | summary_stats <- summary(as.numeric(merged_df[i, grep("perm_", colnames(merged_df))])) 913 | if(merged_df[i, "vals"] == 0) { 914 | pvals <- 1 915 | tail_direction <- NA 916 | } else { 917 | Rtail_pvals <- (sum(as.numeric(merged_df[i, grep("perm_", colnames(merged_df))]) >= merged_df[i, "vals"]) + 1) / (n_perm + 1) 918 | Ltail_pvals <- (n_perm - sum(as.numeric(merged_df[i, grep("perm_", colnames(merged_df))]) > merged_df[i, "vals"]) + 1) / (n_perm + 1) 919 | tail_direction <- Rtail_pvals < Ltail_pvals 920 | pvals <- Rtail_pvals * tail_direction + Ltail_pvals * !(tail_direction) 921 | } 922 | sds <- sd(as.numeric(merged_df[i, grep("perm_", colnames(merged_df))])) 923 | out <- data.frame(Neighb_State = merged_df$rows[i], Ref_State = merged_df$cols[i], Prop_Neighb = merged_df$vals[i], Z_Score = scaled_vals[i], 924 | Perm_Min = summary_stats[["Min."]], Perm_Max = summary_stats[["Max."]], Perm_Mean = summary_stats[["Mean"]], Perm_Median = summary_stats[["Median"]], 925 | P_val = pvals, Interaction_Type = ifelse(is.na(tail_direction), NA, 926 | ifelse(tail_direction,"Drawn","Repelled")), SD = sds) 927 | }) 928 | neighbs_stats <- do.call(rbind.data.frame, neighbs_stats) 929 | neighbs_stats$Significant <- neighbs_stats$P_val < signif_val 930 | neighbs_stats_ls[[samp]] <- neighbs_stats 931 | 932 | if(filter_signif) { 933 | top_neighbs_df <- neighbs_stats %>% 934 | dplyr::filter(Neighb_State != Ref_State) %>% 935 | dplyr::mutate(State_Pair = paste0(Ref_State, "_", Neighb_State)) %>% 936 | dplyr::filter(Significant == TRUE) %>% 937 | mutate(Signif_Pair = unname(sapply(.$State_Pair, function(x) { 938 | ifelse(paste0(scalop::substri(x, pos = 2, sep = "_"), "_", scalop::substri(x, pos = 1, sep = "_")) %in% .$State_Pair, 939 | yes = "Yes", no = "No") 940 | }))) %>% dplyr::filter(abs(Z_Score) >= zscore_thresh, Signif_Pair == "Yes") %>% dplyr::arrange(desc(Z_Score)) 941 | signif_neighbs_ls[[samp]] <- top_neighbs_df 942 | } 943 | 944 | # Generate plots 945 | if(plot_perm_distr) { 946 | plot_df <- reshape2::melt(perm_stats) 947 | distr_plot <- ggplot(plot_df, aes(x = value)) + 948 | facet_grid(rows ~ cols) + 949 | geom_histogram() + 950 | geom_vline(data = baseline_stat, aes(xintercept = vals), color = "red") 951 | distr_plots_ls[[samp]] <- distr_plot 952 | } 953 | } 954 | 955 | if(isTRUE(plot_perm_distr) & isFALSE(filter_signif)) {return(list(Neighbor_Stats = neighbs_stats_ls, Distr_plots = distr_plots_ls))} 956 | else if(isTRUE(filter_signif) & isFALSE(plot_perm_distr)) {return(list(Neighbs_Stats = neighbs_stats_ls, Top_Neighbs = signif_neighbs_ls))} 957 | else if(isTRUE(plot_perm_distr) & isTRUE(filter_signif)) {return(list(Neighbs_Stats = neighbs_stats_ls, Top_Neighbs = signif_neighbs_ls, Distr_plots = distr_plots_ls))} 958 | else {return(neighbs_stats_ls)} 959 | } 960 | 961 | 962 | 963 | 964 | Melt <- function(x) { 965 | if (!is.data.frame(x = x)) { 966 | x <- as.data.frame(x = x) 967 | } 968 | return(data.frame( 969 | rows = rep.int(x = rownames(x = x), times = ncol(x = x)), 970 | cols = unlist(x = lapply(X = colnames(x = x), FUN = rep.int, times = nrow(x = x))), 971 | vals = unlist(x = x, use.names = FALSE) 972 | )) 973 | } 974 | 975 | 976 | 977 | # regional comp functions ------------------------------------------------ 978 | sample_programs_composition <- function(spots_clusters, gen_clusters){ 979 | composition <- table(spots_clusters$spot_type) 980 | old_clusters <- names(composition) 981 | add_clusters <- gen_clusters[!gen_clusters %in% old_clusters] 982 | sapply(add_clusters,function(clust){ 983 | composition <<- c(composition, clust = 0) 984 | }) 985 | 986 | names(composition) <- c(old_clusters, add_clusters) 987 | final_composition <- composition[sort(names(composition))]/sum(composition) 988 | return(final_composition) 989 | } 990 | 991 | obs_program_spatial_score <- function(program_neighbors, cluster){ 992 | cluster_neighbors_bin <- ifelse(program_neighbors == cluster, 1, 0) 993 | if(is.null(dim(program_neighbors))){ 994 | cluster_neighbors_sum <- sum(cluster_neighbors_bin) 995 | } else { 996 | cluster_neighbors_sum <- apply(cluster_neighbors_bin,1,function(rx){sum(na.omit(rx))}) 997 | } 998 | obs <- mean(cluster_neighbors_sum) 999 | return(obs) 1000 | } 1001 | 1002 | one_val <- function(spots_num){ 1003 | a <- sqrt((4*spots_num)/(6*sqrt(3))) 1004 | oneval <- (6*spots_num-12*a-6)/spots_num 1005 | return(oneval) 1006 | } 1007 | 1008 | zero_val <- function(rand_table, spots_clusters, cluster){ 1009 | all_zeroval <- sapply(rand_table, function(neighbors_rand_table){ 1010 | program_rand_neighbors_table = neighbors_rand_table[row.names(neighbors_rand_table) %in% spots_clusters$barcodes[spots_clusters$spot_type == cluster],] 1011 | rand_obs <- obs_program_spatial_score(program_rand_neighbors_table, cluster) 1012 | return(rand_obs) 1013 | }) 1014 | zeroval <- mean(all_zeroval) 1015 | return(zeroval) 1016 | } 1017 | 1018 | 1019 | calc_adj_mat <- function(neighbored_cluster, spots_clusters, cluster_neighbors_table){ 1020 | if (!(neighbored_cluster %in% spots_clusters$spot_type)) { 1021 | return(0) 1022 | } else { 1023 | cluster_neighbors_bin <- ifelse(cluster_neighbors_table == neighbored_cluster, 1, 0) 1024 | 1025 | if (is.null(dim(cluster_neighbors_bin))){ 1026 | cluster_neighbors_sum <- sum(na.omit(cluster_neighbors_bin)) 1027 | } else { 1028 | cluster_neighbors_sum <- sum(apply(cluster_neighbors_bin,1,function(x){sum(na.omit(x))})) 1029 | } 1030 | return(cluster_neighbors_sum) 1031 | } 1032 | } 1033 | 1034 | 1035 | prog_connectivity_score <- function(program_neighbors, cluster){ 1036 | cluster_neighbors_bin <- ifelse(program_neighbors == cluster, 0, 1) 1037 | if(is.null(dim(program_neighbors))){ 1038 | prog_connect <- 1 1039 | } else { 1040 | prog_connect <- length(which(apply(cluster_neighbors_bin,1,function(x){sum(na.omit(x))}) >0)) 1041 | } 1042 | return(prog_connect) 1043 | } 1044 | 1045 | neighbors_table_func <- function(spots_positions,spots_clusters){ 1046 | neighbors_table <- sapply(spots_clusters$barcodes, function(spot){ 1047 | spots_row = spots_positions[spots_positions$V1 == spot, 3] 1048 | spots_col = spots_positions[spots_positions$V1 == spot, 4] 1049 | 1050 | if (spots_col == 0 | spots_row == 0) { 1051 | c1 = NaN 1052 | } else { 1053 | n1 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1] 1054 | if (spots_positions$V2[spots_positions$V1 == n1] == 0 | !(n1 %in% spots_clusters$barcodes)){ 1055 | c1 = NaN 1056 | } else { 1057 | c1 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n1]) 1058 | } 1059 | } 1060 | 1061 | if (spots_col == 127 | spots_row == 0) { 1062 | c2 = NaN 1063 | } else { 1064 | n2 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1] 1065 | if (spots_positions$V2[spots_positions$V1 == n2] == 0 | !(n2 %in% spots_clusters$barcodes)){ 1066 | c2 = NaN 1067 | } else { 1068 | c2 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n2]) 1069 | } 1070 | } 1071 | 1072 | if (spots_col == 0 | spots_col == 1) { 1073 | c3 = NaN 1074 | } else { 1075 | n3 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2] 1076 | if (spots_positions$V2[spots_positions$V1 == n3] == 0 | !(n3 %in% spots_clusters$barcodes)){ 1077 | c3 = NaN 1078 | } else { 1079 | c3 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n3]) 1080 | } 1081 | } 1082 | 1083 | if (spots_col == 126 | spots_col == 127) { 1084 | c4 = NaN 1085 | } else { 1086 | n4 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2] 1087 | if (spots_positions$V2[spots_positions$V1 == n4] == 0 | !(n4 %in% spots_clusters$barcodes)){ 1088 | c4 = NaN 1089 | } else { 1090 | c4 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n4]) 1091 | } 1092 | } 1093 | 1094 | if (spots_col == 0 | spots_row == 77) { 1095 | c5 = NaN 1096 | } else { 1097 | n5 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1] 1098 | if (spots_positions$V2[spots_positions$V1 == n5] == 0 | !(n5 %in% spots_clusters$barcodes)){ 1099 | c5 = NaN 1100 | } else { 1101 | c5 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n5]) 1102 | } 1103 | } 1104 | 1105 | if (spots_col == 127 | spots_row == 77) { 1106 | c6 = NaN 1107 | } else { 1108 | n6 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1] 1109 | if (spots_positions$V2[spots_positions$V1 == n6] == 0 | !(n6 %in% spots_clusters$barcodes)){ 1110 | c6 = NaN 1111 | } else { 1112 | c6 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n6]) 1113 | } 1114 | } 1115 | 1116 | 1117 | return(c(c1,c2,c3,c4,c5,c6)) 1118 | 1119 | }) 1120 | 1121 | neighbors_table = t(neighbors_table) 1122 | row.names(neighbors_table) = spots_clusters$barcodes 1123 | 1124 | return(neighbors_table) 1125 | } 1126 | 1127 | neighbors_table_funcV2 <- function(spots_positions,spots_clusters){ 1128 | neighbors_table <- sapply(spots_clusters$barcodes, function(spot){ 1129 | spots_row = spots_positions[spots_positions$V1 == spot, 3] 1130 | spots_col = spots_positions[spots_positions$V1 == spot, 4] 1131 | 1132 | if (spots_col == 0 | spots_row == 0) { 1133 | c1 = NaN 1134 | } else { 1135 | n1 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1] 1136 | if (spots_positions$V2[spots_positions$V1 == n1] == 0 | !(n1 %in% spots_clusters$barcodes)){ 1137 | c1 = NaN 1138 | } else { 1139 | c1 = as.character(n1) 1140 | } 1141 | } 1142 | 1143 | if (spots_col == 127 | spots_row == 0) { 1144 | c2 = NaN 1145 | } else { 1146 | n2 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1] 1147 | if (spots_positions$V2[spots_positions$V1 == n2] == 0 | !(n2 %in% spots_clusters$barcodes)){ 1148 | c2 = NaN 1149 | } else { 1150 | c2 = as.character(n2) 1151 | } 1152 | } 1153 | 1154 | if (spots_col == 0 | spots_col == 1) { 1155 | c3 = NaN 1156 | } else { 1157 | n3 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2] 1158 | if (spots_positions$V2[spots_positions$V1 == n3] == 0 | !(n3 %in% spots_clusters$barcodes)){ 1159 | c3 = NaN 1160 | } else { 1161 | c3 = as.character(n3) 1162 | } 1163 | } 1164 | 1165 | if (spots_col == 126 | spots_col == 127) { 1166 | c4 = NaN 1167 | } else { 1168 | n4 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2] 1169 | if (spots_positions$V2[spots_positions$V1 == n4] == 0 | !(n4 %in% spots_clusters$barcodes)){ 1170 | c4 = NaN 1171 | } else { 1172 | c4 = as.character(n4) 1173 | } 1174 | } 1175 | 1176 | if (spots_col == 0 | spots_row == 77) { 1177 | c5 = NaN 1178 | } else { 1179 | n5 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1] 1180 | if (spots_positions$V2[spots_positions$V1 == n5] == 0 | !(n5 %in% spots_clusters$barcodes)){ 1181 | c5 = NaN 1182 | } else { 1183 | c5 = as.character(n5) 1184 | } 1185 | } 1186 | 1187 | if (spots_col == 127 | spots_row == 77) { 1188 | c6 = NaN 1189 | } else { 1190 | n6 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1] 1191 | if (spots_positions$V2[spots_positions$V1 == n6] == 0 | !(n6 %in% spots_clusters$barcodes)){ 1192 | c6 = NaN 1193 | } else { 1194 | c6 = as.character(n6) 1195 | } 1196 | } 1197 | 1198 | 1199 | return(c(c1,c2,c3,c4,c5,c6)) 1200 | 1201 | }) 1202 | 1203 | neighbors_table = t(neighbors_table) 1204 | row.names(neighbors_table) = spots_clusters$barcodes 1205 | 1206 | return(neighbors_table) 1207 | } 1208 | 1209 | 1210 | win_prox_neighbors_table_func <- function(spots_positions,spots_clusters){ 1211 | neighbors_table <- sapply(spots_clusters$barcodes, function(spot){ 1212 | spots_row = spots_positions[spots_positions$V1 == spot, 3] 1213 | spots_col = spots_positions[spots_positions$V1 == spot, 4] 1214 | 1215 | if (spots_col == 0 | spots_row == 0) { 1216 | n1 = NA 1217 | } else { 1218 | n1_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1] 1219 | if (length(n1_temp) == 0) { 1220 | n1 = NA 1221 | } else if (spots_positions$V2[spots_positions$V1 == n1_temp] == 0 | !(n1_temp %in% spots_clusters$barcodes)){ 1222 | n1 = NA 1223 | } else { 1224 | n1 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n1_temp]) 1225 | } 1226 | } 1227 | 1228 | if (spots_col == 127 | spots_row == 0) { 1229 | n2 = NA 1230 | } else { 1231 | n2_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1] 1232 | if (length(n2_temp) == 0) { 1233 | n2 = NA 1234 | } else if (spots_positions$V2[spots_positions$V1 == n2_temp] == 0 | !(n2_temp %in% spots_clusters$barcodes)){ 1235 | n2 = NA 1236 | } else { 1237 | n2 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n2_temp]) 1238 | } 1239 | } 1240 | 1241 | if (spots_col == 0 | spots_col == 1) { 1242 | n3 = NA 1243 | } else { 1244 | n3_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2] 1245 | if (length(n3_temp) == 0) { 1246 | n3 = NA 1247 | } else if (spots_positions$V2[spots_positions$V1 == n3_temp] == 0 | !(n3_temp %in% spots_clusters$barcodes)){ 1248 | n3 = NA 1249 | } else { 1250 | n3 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n3_temp]) 1251 | } 1252 | } 1253 | 1254 | if (spots_col == 126 | spots_col == 127) { 1255 | n4 = NA 1256 | } else { 1257 | n4_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2] 1258 | if (length(n4_temp) == 0) { 1259 | n4 = NA 1260 | } else if (spots_positions$V2[spots_positions$V1 == n4_temp] == 0 | !(n4_temp %in% spots_clusters$barcodes)){ 1261 | n4 = NA 1262 | } else { 1263 | n4 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n4_temp]) 1264 | } 1265 | } 1266 | 1267 | if (spots_col == 0 | spots_row == 77) { 1268 | n5 = NA 1269 | } else { 1270 | n5_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1] 1271 | if (length(n5_temp) == 0) { 1272 | n5 = NA 1273 | } else if (spots_positions$V2[spots_positions$V1 == n5_temp] == 0 | !(n5_temp %in% spots_clusters$barcodes)){ 1274 | n5 = NA 1275 | } else { 1276 | n5 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n5_temp]) 1277 | } 1278 | } 1279 | 1280 | if (spots_col == 127 | spots_row == 77) { 1281 | n6 = NA 1282 | } else { 1283 | n6_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1] 1284 | if (length(n6_temp) == 0) { 1285 | n6 = NA 1286 | } else if (spots_positions$V2[spots_positions$V1 == n6_temp] == 0 | !(n6_temp %in% spots_clusters$barcodes)){ 1287 | n6 = NA 1288 | } else { 1289 | n6 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n6_temp]) 1290 | } 1291 | } 1292 | 1293 | 1294 | return(c(n1,n2,n3,n4,n5,n6)) 1295 | 1296 | }) 1297 | 1298 | neighbors_table = t(neighbors_table) 1299 | row.names(neighbors_table) = spots_clusters$barcodes 1300 | 1301 | return(neighbors_table) 1302 | } 1303 | 1304 | 1305 | prox_neighbors_table_func <- function(spots_positions,spots_clusters){ 1306 | neighbors_table <- sapply(spots_clusters$barcodes, function(spot){ 1307 | spots_row = spots_positions[spots_positions$V1 == spot, 3] 1308 | spots_col = spots_positions[spots_positions$V1 == spot, 4] 1309 | 1310 | if (spots_col == 0 | spots_row == 0) { 1311 | n1 = NA 1312 | } else { 1313 | n1_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1] 1314 | if (spots_positions$V2[spots_positions$V1 == n1_temp] == 0 | !(n1_temp %in% spots_clusters$barcodes)){ 1315 | n1 = NA 1316 | } else { 1317 | n1 = n1_temp 1318 | } 1319 | } 1320 | 1321 | if (spots_col == 127 | spots_row == 0) { 1322 | n2 = NA 1323 | } else { 1324 | n2_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1] 1325 | if (spots_positions$V2[spots_positions$V1 == n2_temp] == 0 | !(n2_temp %in% spots_clusters$barcodes)){ 1326 | n2 = NA 1327 | } else { 1328 | n2 = n2_temp 1329 | } 1330 | } 1331 | 1332 | if (spots_col == 0 | spots_col == 1) { 1333 | n3 = NA 1334 | } else { 1335 | n3_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2] 1336 | if (spots_positions$V2[spots_positions$V1 == n3_temp] == 0 | !(n3_temp %in% spots_clusters$barcodes)){ 1337 | n3 = NA 1338 | } else { 1339 | n3 = n3_temp 1340 | } 1341 | } 1342 | 1343 | if (spots_col == 126 | spots_col == 127) { 1344 | n4 = NA 1345 | } else { 1346 | n4_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2] 1347 | if (spots_positions$V2[spots_positions$V1 == n4_temp] == 0 | !(n4_temp %in% spots_clusters$barcodes)){ 1348 | n4 = NA 1349 | } else { 1350 | n4 = n4_temp 1351 | } 1352 | } 1353 | 1354 | if (spots_col == 0 | spots_row == 77) { 1355 | n5 = NA 1356 | } else { 1357 | n5_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1] 1358 | if (spots_positions$V2[spots_positions$V1 == n5_temp] == 0 | !(n5_temp %in% spots_clusters$barcodes)){ 1359 | n5 = NA 1360 | } else { 1361 | n5 = n5_temp 1362 | } 1363 | } 1364 | 1365 | if (spots_col == 127 | spots_row == 77) { 1366 | n6 = NA 1367 | } else { 1368 | n6_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1] 1369 | if (spots_positions$V2[spots_positions$V1 == n6_temp] == 0 | !(n6_temp %in% spots_clusters$barcodes)){ 1370 | n6 = NA 1371 | } else { 1372 | n6 = n6_temp 1373 | } 1374 | } 1375 | 1376 | 1377 | return(c(n1,n2,n3,n4,n5,n6)) 1378 | 1379 | }) 1380 | 1381 | neighbors_table = t(neighbors_table) 1382 | row.names(neighbors_table) = spots_clusters$barcodes 1383 | 1384 | return(neighbors_table) 1385 | } 1386 | -------------------------------------------------------------------------------- /Module4_consensus_interactions.R: -------------------------------------------------------------------------------- 1 | library(ggvenn) 2 | 3 | # Load data --------------------------------------------------------------- 4 | 5 | 6 | file_list <- list.files("MP/mp_assign_124/") 7 | 8 | gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){ 9 | mp_assign <- readRDS(paste("MP/mp_assign_124/", file_list[i], sep = "")) 10 | return(unique(mp_assign$spot_type_meta_new)) 11 | })))) 12 | 13 | pairs <- combn(sort(gen_clusters),2) 14 | pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))}) 15 | 16 | st_samp <-c("UKF243","UKF248","UKF255","UKF259","UKF260","UKF266","UKF269","UKF275","UKF304","UKF313","UKF334","ZH1019inf","ZH881T1","ZH916bulk","ZH916inf","ZH916T1","ZH1007nec","ZH1019T1","ZH8811Abulk","ZH8811Bbulk") 17 | dis_samp <-c("MGH258","UKF251","UKF259","UKF260","UKF275","UKF296","UKF313","ZH1019inf","ZH881inf","ZH916inf","ZH1007inf","ZH1019T1", "ZH8812bulk") 18 | 19 | 20 | # structured ---------------------------------------------------------------------- 21 | 22 | coloc_st <- readRDS("Summary_tables/colocal_structured_summary.rds") 23 | conn_st <- readRDS("Summary_tables/conn_st.rds") 24 | prox_st <- readRDS("Summary_tables/prox_st.rds") 25 | (sum(coloc_st$enriched)+sum(conn_st$enr)+sum(prox_st[,5]$enr))/(length(pairs_names)*length(st_samp)*3) 26 | 27 | coloc_st$mean_scaled <- ifelse(coloc_st$mean_enrichment < 1, 28 | ((coloc_st$mean_enrichment - min(coloc_st$mean_enrichment))/(max(coloc_st$mean_enrichment)-min(coloc_st$mean_enrichment))) - 1, 29 | ((coloc_st$mean_enrichment - min(coloc_st$mean_enrichment))/(max(coloc_st$mean_enrichment)-min(coloc_st$mean_enrichment)))) 30 | conn_st$mean_scaled <- ifelse(conn_st$mean_score < 0, 31 | ((conn_st$mean_score - min(na.omit(conn_st$mean_score)))/(max(na.omit(conn_st$mean_score))-min(na.omit(conn_st$mean_score)))) - 1, 32 | ((conn_st$mean_score - min(na.omit(conn_st$mean_score)))/(max(na.omit(conn_st$mean_score))-min(na.omit(conn_st$mean_score))))) 33 | 34 | 35 | summary_coloc <- data.frame(pair = coloc_st$pair, 36 | analysis = rep("coloc", 91), 37 | prop = coloc_st$proportion_sig, 38 | mean_scaled = coloc_st$mean_scaled) 39 | 40 | new_pair <- as.character(sapply(summary_coloc$pair, function(p1){ 41 | splt <- strsplit(p1, " ")[[1]] 42 | new_name <- sapply(row.names(conn_st),function(p2){ 43 | splt2 <- strsplit(p2, " ")[[1]] 44 | if(splt[1] %in% splt2 & splt[2] %in% splt2) { 45 | return(p2) 46 | } else 47 | return(NA) 48 | }) 49 | return(na.omit(new_name)) 50 | })) 51 | 52 | summary_coloc$pair <- new_pair 53 | 54 | 55 | summary_adj <- data.frame(pair = row.names(conn_st), 56 | analysis = rep("adj", 91), 57 | prop = conn_st$prop, 58 | mean_scaled = conn_st$mean_scaled) 59 | 60 | summary_prox5 <- data.frame(pair = row.names(conn_st), 61 | analysis = rep("prox5", 91), 62 | prop = prox_st[,5]$prop, 63 | mean_scaled = prox_st[,5]$mean_score) 64 | 65 | summary_prox5$mean_scaled <- ifelse(summary_prox5$mean_scaled < 0, 66 | ((summary_prox5$mean_scaled - min(na.omit(summary_prox5$mean_scaled)))/(max(na.omit(summary_prox5$mean_scaled))-min(na.omit(summary_prox5$mean_scaled)))) - 1, 67 | ((summary_prox5$mean_scaled - min(na.omit(summary_prox5$mean_scaled)))/(max(na.omit(summary_prox5$mean_scaled))-min(na.omit(summary_prox5$mean_scaled))))) 68 | 69 | 70 | summary_prox8 <- data.frame(pair = row.names(conn_st), 71 | analysis = rep("prox8", 91), 72 | prop = prox_st[,8]$prop, 73 | mean_scaled = prox_st[,8]$mean_score) 74 | 75 | summary_prox8$mean_scaled <- ifelse(summary_prox8$mean_scaled < 0, 76 | ((summary_prox8$mean_scaled - min(na.omit(summary_prox8$mean_scaled)))/(max(na.omit(summary_prox8$mean_scaled))-min(na.omit(summary_prox8$mean_scaled)))) - 1, 77 | ((summary_prox8$mean_scaled - min(na.omit(summary_prox8$mean_scaled)))/(max(na.omit(summary_prox8$mean_scaled))-min(na.omit(summary_prox8$mean_scaled))))) 78 | 79 | summary_prox15 <- data.frame(pair = row.names(conn_st), 80 | analysis = rep("prox15", 91), 81 | prop = prox_st[,15]$prop, 82 | mean_scaled = prox_st[,15]$mean_score) 83 | 84 | summary_prox15$mean_scaled <- ifelse(summary_prox15$mean_scaled < 0, 85 | ((summary_prox15$mean_scaled - min(na.omit(summary_prox15$mean_scaled)))/(max(na.omit(summary_prox15$mean_scaled))-min(na.omit(summary_prox15$mean_scaled)))) - 1, 86 | ((summary_prox15$mean_scaled - min(na.omit(summary_prox15$mean_scaled)))/(max(na.omit(summary_prox15$mean_scaled))-min(na.omit(summary_prox15$mean_scaled))))) 87 | 88 | 89 | summary_df <- rbind(summary_coloc,summary_adj,summary_prox5,summary_prox8,summary_prox15) 90 | 91 | pairs_mean <- sapply(unique(summary_df$pair),function(p){ 92 | p_df <- summary_df[summary_df$pair == p,] 93 | return(mean(na.omit(p_df$mean_scaled))) 94 | }) 95 | 96 | summary_df$pair_mean <- sapply(summary_df$pair, function(p){ 97 | return(pairs_mean[p]) 98 | }) 99 | 100 | summary_df$pattern <- ifelse(summary_df$pair_mean > 0.35, "connected", 101 | ifelse(summary_df$pair_mean < -0.35, "dis connected","mix")) 102 | st_summary <- summary_df 103 | 104 | 105 | # dis-organized --------------------------------------------------------------------- 106 | 107 | 108 | coloc_dis <- readRDS("Summary_tables/colocal_summary_dis.rds") 109 | conn_dis <- readRDS("Summary_tables/conn_dis.rds") 110 | prox_dis <- readRDS("Summary_tables/prox_dis.rds") 111 | round((sum(coloc_dis$enriched)+sum(conn_dis$enr)+sum(prox_dis[,5]$enr))/(length(pairs_names)*length(dis_samp)*3),2) 112 | 113 | coloc_dis$mean_scaled <- ifelse(coloc_dis$mean_enrichment < 1, 114 | ((coloc_dis$mean_enrichment - min(coloc_dis$mean_enrichment))/(max(coloc_dis$mean_enrichment)-min(coloc_dis$mean_enrichment))) - 1, 115 | ((coloc_dis$mean_enrichment - min(coloc_dis$mean_enrichment))/(max(coloc_dis$mean_enrichment)-min(coloc_dis$mean_enrichment)))) 116 | conn_dis$mean_scaled <- ifelse(conn_dis$mean_score < 0, 117 | ((conn_dis$mean_score - min(na.omit(conn_dis$mean_score)))/(max(na.omit(conn_dis$mean_score))-min(na.omit(conn_dis$mean_score)))) - 1, 118 | ((conn_dis$mean_score - min(na.omit(conn_dis$mean_score)))/(max(na.omit(conn_dis$mean_score))-min(na.omit(conn_dis$mean_score))))) 119 | 120 | 121 | summary_coloc <- data.frame(pair = coloc_dis$pairs, 122 | analysis = rep("coloc", 91), 123 | prop = coloc_dis$prop_sig, 124 | mean_scaled = coloc_dis$mean_scaled) 125 | 126 | new_pair <- as.character(sapply(summary_coloc$pair, function(p1){ 127 | splt <- strsplit(p1, " ")[[1]] 128 | new_name <- sapply(row.names(conn_dis),function(p2){ 129 | splt2 <- strsplit(p2, " ")[[1]] 130 | if(splt[1] %in% splt2 & splt[2] %in% splt2) { 131 | return(p2) 132 | } else 133 | return(NA) 134 | }) 135 | return(na.omit(new_name)) 136 | })) 137 | 138 | summary_coloc$pair <- new_pair 139 | 140 | 141 | summary_adj <- data.frame(pair = row.names(conn_dis), 142 | analysis = rep("adj", 91), 143 | prop = conn_dis$prop, 144 | mean_scaled = conn_dis$mean_scaled) 145 | 146 | summary_prox5 <- data.frame(pair = row.names(conn_dis), 147 | analysis = rep("prox5", 91), 148 | prop = prox_dis[,5]$prop, 149 | mean_scaled = prox_dis[,5]$mean_score) 150 | 151 | summary_prox5$mean_scaled <- ifelse(summary_prox5$mean_scaled < 0, 152 | ((summary_prox5$mean_scaled - min(na.omit(summary_prox5$mean_scaled)))/(max(na.omit(summary_prox5$mean_scaled))-min(na.omit(summary_prox5$mean_scaled)))) - 1, 153 | ((summary_prox5$mean_scaled - min(na.omit(summary_prox5$mean_scaled)))/(max(na.omit(summary_prox5$mean_scaled))-min(na.omit(summary_prox5$mean_scaled))))) 154 | 155 | 156 | summary_prox8 <- data.frame(pair = row.names(conn_dis), 157 | analysis = rep("prox8", 91), 158 | prop = prox_dis[,8]$prop, 159 | mean_scaled = prox_dis[,8]$mean_score) 160 | 161 | summary_prox8$mean_scaled <- ifelse(summary_prox8$mean_scaled < 0, 162 | ((summary_prox8$mean_scaled - min(na.omit(summary_prox8$mean_scaled)))/(max(na.omit(summary_prox8$mean_scaled))-min(na.omit(summary_prox8$mean_scaled)))) - 1, 163 | ((summary_prox8$mean_scaled - min(na.omit(summary_prox8$mean_scaled)))/(max(na.omit(summary_prox8$mean_scaled))-min(na.omit(summary_prox8$mean_scaled))))) 164 | 165 | summary_prox15 <- data.frame(pair = row.names(conn_dis), 166 | analysis = rep("prox15", 91), 167 | prop = prox_dis[,15]$prop, 168 | mean_scaled = prox_dis[,15]$mean_score) 169 | 170 | summary_prox15$mean_scaled <- ifelse(summary_prox15$mean_scaled < 0, 171 | ((summary_prox15$mean_scaled - min(na.omit(summary_prox15$mean_scaled)))/(max(na.omit(summary_prox15$mean_scaled))-min(na.omit(summary_prox15$mean_scaled)))) - 1, 172 | ((summary_prox15$mean_scaled - min(na.omit(summary_prox15$mean_scaled)))/(max(na.omit(summary_prox15$mean_scaled))-min(na.omit(summary_prox15$mean_scaled))))) 173 | 174 | 175 | summary_df <- rbind(summary_coloc,summary_adj,summary_prox5,summary_prox8,summary_prox15) 176 | 177 | pairs_mean <- sapply(unique(summary_df$pair),function(p){ 178 | p_df <- summary_df[summary_df$pair == p,] 179 | return(mean(na.omit(p_df$mean_scaled))) 180 | }) 181 | 182 | summary_df$pair_mean <- sapply(summary_df$pair, function(p){ 183 | return(pairs_mean[p]) 184 | }) 185 | 186 | summary_df$pattern <- ifelse(summary_df$pair_mean > 0.35, "connected", 187 | ifelse(summary_df$pair_mean < -0.35, "dis connected","mix")) 188 | 189 | dis_summary <- summary_df 190 | 191 | # st vs dis strongest couplings --------------------------------------------------------------- 192 | 193 | 194 | dim(st_summary[st_summary$pattern == "connected" & st_summary$analysis %in% c("coloc", "adj", "prox5") & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2,]) 195 | t_st <- table(as.character(st_summary$pair[st_summary$pattern == "connected" & st_summary$analysis %in% c("coloc", "adj") & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2])) 196 | colocOadj_st <- names(t_st) 197 | colocPadj_st <- names(t_st[t_st > 1]) 198 | t_st2 <- table(as.character(st_summary$pair[st_summary$pattern == "connected" & st_summary$analysis %in% c("prox5","prox8","prox15") & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2])) 199 | prox_st <- names(t_st2) 200 | st_pairs <- unique(c(colocPadj_st, intersect(colocOadj_st,prox_st))) 201 | 202 | 203 | dim(dis_summary[dis_summary$pattern == "connected" & dis_summary$analysis %in% c("coloc", "adj", "prox5") & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2,]) 204 | t_dis <- table(as.character(dis_summary$pair[dis_summary$pattern == "connected" & dis_summary$analysis %in% c("coloc", "adj") & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2])) 205 | colocOadj_dis <- names(t_dis) 206 | colocPadj_dis <- names(t_dis[t_dis > 1]) 207 | t_dis2 <- table(as.character(dis_summary$pair[dis_summary$pattern == "connected" & dis_summary$analysis %in% c("prox5","prox8","prox15") & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2])) 208 | prox_dis <- names(t_dis2) 209 | dis_pairs <- unique(c(colocPadj_dis, intersect(colocOadj_dis,prox_dis))) 210 | 211 | 212 | 213 | 214 | # st vs dis strong coupling by scale ------------------------------------- 215 | 216 | 217 | prox_st <- readRDS("Summary_tables/prox_st.rds") 218 | summary_prox10 <- data.frame(pair = pairs_names, 219 | analysis = rep("prox10", 91), 220 | prop = prox_st[,10]$prop, 221 | mean_scaled = prox_st[,10]$mean_score) 222 | 223 | summary_prox10$mean_scaled <- ifelse(summary_prox10$mean_scaled < 0, 224 | ((summary_prox10$mean_scaled - min(na.omit(summary_prox10$mean_scaled)))/(max(na.omit(summary_prox10$mean_scaled))-min(na.omit(summary_prox10$mean_scaled)))) - 1, 225 | ((summary_prox10$mean_scaled - min(na.omit(summary_prox10$mean_scaled)))/(max(na.omit(summary_prox10$mean_scaled))-min(na.omit(summary_prox10$mean_scaled))))) 226 | 227 | 228 | summary_prox12 <- data.frame(pair = pairs_names, 229 | analysis = rep("prox12", 91), 230 | prop = prox_st[,12]$prop, 231 | mean_scaled = prox_st[,12]$mean_score) 232 | 233 | summary_prox12$mean_scaled <- ifelse(summary_prox12$mean_scaled < 0, 234 | ((summary_prox12$mean_scaled - min(na.omit(summary_prox12$mean_scaled)))/(max(na.omit(summary_prox12$mean_scaled))-min(na.omit(summary_prox12$mean_scaled)))) - 1, 235 | ((summary_prox12$mean_scaled - min(na.omit(summary_prox12$mean_scaled)))/(max(na.omit(summary_prox12$mean_scaled))-min(na.omit(summary_prox12$mean_scaled))))) 236 | 237 | st_coloc <- length(st_summary$pair[st_summary$analysis == "coloc" & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2]) 238 | st_adj <- length(st_summary$pair[st_summary$analysis == "adj" & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2]) 239 | st_p8 <- length(st_summary$pair[st_summary$analysis == "prox8" & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2]) 240 | st_p10 <- length(summary_prox10$pair[summary_prox10$analysis == "prox10" & summary_prox10$mean_scaled > 0.35 & summary_prox10$prop >= 0.2]) 241 | st_p12 <- length(summary_prox12$pair[summary_prox12$analysis == "prox12" & summary_prox12$mean_scaled > 0.35 & summary_prox12$prop >= 0.2]) 242 | st_p15 <- length(st_summary$pair[st_summary$analysis == "prox15" & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2]) 243 | 244 | 245 | prox_dis <- readRDS("Summary_tables/prox_dis.rds") 246 | summary_prox10 <- data.frame(pair = pairs_names, 247 | analysis = rep("prox10", 91), 248 | prop = prox_dis[,10]$prop, 249 | mean_scaled = prox_dis[,10]$mean_score) 250 | 251 | summary_prox10$mean_scaled <- ifelse(summary_prox10$mean_scaled < 0, 252 | ((summary_prox10$mean_scaled - min(na.omit(summary_prox10$mean_scaled)))/(max(na.omit(summary_prox10$mean_scaled))-min(na.omit(summary_prox10$mean_scaled)))) - 1, 253 | ((summary_prox10$mean_scaled - min(na.omit(summary_prox10$mean_scaled)))/(max(na.omit(summary_prox10$mean_scaled))-min(na.omit(summary_prox10$mean_scaled))))) 254 | 255 | 256 | summary_prox12 <- data.frame(pair = pairs_names, 257 | analysis = rep("prox12", 91), 258 | prop = prox_dis[,12]$prop, 259 | mean_scaled = prox_dis[,12]$mean_score) 260 | 261 | summary_prox12$mean_scaled <- ifelse(summary_prox12$mean_scaled < 0, 262 | ((summary_prox12$mean_scaled - min(na.omit(summary_prox12$mean_scaled)))/(max(na.omit(summary_prox12$mean_scaled))-min(na.omit(summary_prox12$mean_scaled)))) - 1, 263 | ((summary_prox12$mean_scaled - min(na.omit(summary_prox12$mean_scaled)))/(max(na.omit(summary_prox12$mean_scaled))-min(na.omit(summary_prox12$mean_scaled))))) 264 | 265 | 266 | dis_coloc <- length(dis_summary$pair[dis_summary$analysis == "coloc" & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2]) 267 | dis_adj <- length(dis_summary$pair[dis_summary$analysis == "adj" & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2]) 268 | dis_p8 <- length(dis_summary$pair[dis_summary$analysis == "prox8" & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2]) 269 | dis_p10 <- length(summary_prox10$pair[summary_prox10$analysis == "prox10" & summary_prox10$mean_scaled > 0.35 & summary_prox10$prop >= 0.2]) 270 | dis_p12 <- length(summary_prox12$pair[summary_prox12$analysis == "prox12" & summary_prox12$mean_scaled > 0.35 & summary_prox12$prop >= 0.2]) 271 | dis_p15 <- length(dis_summary$pair[dis_summary$analysis == "prox15" & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2]) 272 | 273 | coupling_by_scale <- data.frame(num_coup = c(st_coloc,st_adj,st_p8, 274 | dis_coloc,dis_adj,dis_p8), 275 | region = c(rep("st",3),rep("dis",3)), 276 | analysis = rep(c("coloc","adj","prox8"),2)) 277 | 278 | 279 | coupling_by_scale$analysis <- factor(coupling_by_scale$analysis, levels = c("coloc","adj","prox8")) 280 | 281 | ggplot(coupling_by_scale, aes(x=analysis, y=num_coup, group=region)) + 282 | geom_line(aes(color=region))+ 283 | geom_point(aes(color=region)) 284 | 285 | st_p8_pairs <- unique(st_summary$pair[st_summary$analysis %in% c("prox5","prox8","prox15") & st_summary$mean_scaled > 0.35 & st_summary$prop >= 0.2]) 286 | dis_p8_pairs <- unique(dis_summary$pair[dis_summary$analysis %in% c("prox5","prox8","prox15") & dis_summary$mean_scaled > 0.35 & dis_summary$prop >= 0.2]) 287 | 288 | 289 | x <- list( 290 | struct = st_p8_pairs, 291 | disorganised = dis_p8_pairs 292 | ) 293 | 294 | ggvenn( 295 | x, 296 | fill_color = c("white", "white"), 297 | stroke_size = 0.5, set_name_size = 4 298 | ) 299 | 300 | stdis_int <- intersect(st_p8_pairs,dis_p8_pairs) 301 | only_st <- st_p8_pairs[!(st_p8_pairs %in% stdis_int)] 302 | only_dis <- dis_p8_pairs[!(dis_p8_pairs %in% stdis_int)] 303 | 304 | 305 | 306 | -------------------------------------------------------------------------------- /Module5_CNA.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(scalop) 3 | library(patchwork) 4 | library(colorspace) 5 | library(ggpubr) 6 | library(Matrix) 7 | library(biomaRt) 8 | 9 | # Run functions first 10 | 11 | # load data --------------------------------------------------------------- 12 | 13 | 14 | # Load Data (counts mat exported from Seurat post-spot filtering: 15 | 16 | samples_names <- (read.delim("general/GBM_samples.txt", header = FALSE))$V1 17 | 18 | norm_brain_ref1<-readRDS("CNA/normal_brain_ref/UKF256_C.rds") 19 | colnames(norm_brain_ref1) <- paste(colnames(norm_brain_ref1), "_ref1", sep="") 20 | 21 | norm_brain_ref2<-readRDS("CNA/normal_brain_ref/UKF265_C.rds") 22 | colnames(norm_brain_ref2) <- paste(colnames(norm_brain_ref2), "_ref2", sep="") 23 | 24 | 25 | samples_regions <- read.delim("CNA/samples_regions.txt", header = TRUE) 26 | 27 | 28 | # all cna + metadata -------------------------------------------------------- 29 | 30 | samples_num <- c(1:26) 31 | all_cna <- mclapply(samples_num, all_cna_fx, mc.cores = 26) 32 | 33 | all_cna_fx <- function(i){ 34 | m1 <- readRDS(paste("general/exp_mats_GBM/", samples_names[i], "counts.rds", sep = "")) 35 | m <- cbind(m1,norm_brain_ref1,norm_brain_ref2) 36 | 37 | scaling_factor <- 1000000/colSums(m) #tumor mat processing 38 | m_CPM <- sweep(m, MARGIN = 2, STATS = scaling_factor, FUN = "*") 39 | m_loged <- log2(1 + (m_CPM/10)) 40 | var_filter <- apply(m_loged, 1, var) # removing genes with zero variance across all cells 41 | m_proc <- m_loged[var_filter != 0, ] 42 | exp_genes <- rownames(m_proc)[(rowMeans(m_proc) > 0.4)] # filtering out lowly expressed genes 43 | m_proc <- m_proc[exp_genes, ] 44 | 45 | ref <-list(colnames(norm_brain_ref1), colnames(norm_brain_ref2)) 46 | query <- colnames(m_proc)[!is.element(colnames(m_proc), unname(unlist(ref)))] 47 | 48 | # create metadata table 49 | metadata <- tibble(CellID = colnames(m_proc), 50 | org_ref = ifelse(colnames(m_proc) %in% unname(unlist(ref)), "reference", "not reference")) 51 | 52 | metadata$sample <- "control" 53 | 54 | metadata$sample <- ifelse(metadata$org_ref == "not reference", samples_names[i], metadata$sample) 55 | 56 | 57 | # calc CNA 58 | hg19<- readRDS("CNA/hg38.rds") 59 | cna_score_mat <- calc_cna(matrix = m_proc, query = query, ref = ref, genome = hg19, range = c(-3,3), window = 150, noise = 0.15, isLog = TRUE, per_chr = TRUE, scale = NULL, top_genes = NULL, verbose = TRUE) 60 | sig_and_cor <- cna_sig_cor(cna_score_mat, epi_cells = query, ref_cells = ref, cna_sig = "abs", top_region = 1/3, top_cells = 1/3) 61 | metadata$CNAsig_org <- sig_and_cor$cna_signal[match(metadata$CellID, names(sig_and_cor$cna_signal))] 62 | metadata$CNAcor_org <- sig_and_cor$cna_correlation[match(metadata$CellID, names(sig_and_cor$cna_correlation))] 63 | 64 | # update reference with combined external ref + tumor defined by cna sig / corr thresholds 65 | metadata$new_ref <- ifelse((metadata$CNAcor_org <= 0.25 & metadata$CNAsig_org <= 0.13) | (metadata$org_ref == "reference"), "reference", "query") 66 | 67 | ref <- metadata$CellID[metadata$new_ref == "reference"] 68 | query <- colnames(m_proc)[!is.element(colnames(m_proc), unname(unlist(ref)))] 69 | 70 | # calc CNA with updated ref 71 | cna_score_mat <- calc_cna(matrix = m_proc, query = query, ref = ref, genome = hg19, range = c(-3,3), window = 150, noise = 0.15, isLog = TRUE, per_chr = TRUE, scale = NULL, top_genes = NULL, verbose = TRUE) 72 | sig_and_cor <- cna_sig_cor(cna_score_mat, epi_cells = query, ref_cells = ref, cna_sig = "abs", top_region = 1/3, top_cells = 1/3) 73 | metadata$CNAsig <- sig_and_cor$cna_signal[match(metadata$CellID, names(sig_and_cor$cna_signal))] 74 | metadata$CNAcor <- sig_and_cor$cna_correlation[match(metadata$CellID, names(sig_and_cor$cna_correlation))] 75 | 76 | metadata$CNAtot <- metadata$CNAcor + (metadata$CNAsig * max(na.omit(metadata$CNAcor)) / max(na.omit(metadata$CNAsig))) 77 | 78 | fin_cna <- cna_score_mat[,metadata$CellID[metadata$org_ref == "not reference"]] 79 | md <- metadata[metadata$org_ref == "not reference",] 80 | return(list(md,fin_cna)) 81 | 82 | } 83 | 84 | 85 | # plot combined CNA ---------------------------------------------------------------- 86 | 87 | 88 | c1 <- as.data.frame(all_cna[[1]][[2]]) 89 | c2 <- as.data.frame(all_cna[[2]][[2]]) 90 | 91 | genes_cna <- intersect(row.names(c1),row.names(c2)) 92 | 93 | sapply(c(3:26), function(i){ 94 | ci <- as.data.frame(all_cna[[i]][[2]]) 95 | genes_cna <<- intersect(genes_cna, row.names(ci)) 96 | }) 97 | 98 | set.seed(50) 99 | samp_spots <- sapply(c(1:26), function(i){ 100 | m <- all_cna[[i]][[1]] 101 | return(sample(m$CellID[m$sample == samples_names[i]],700, replace = F)) 102 | }) 103 | 104 | cna_comb <- c1[genes_cna,as.character(samp_spots[,1])] 105 | colnames(cna_comb) <- paste(samples_names[1], colnames(cna_comb), sep = "_") 106 | 107 | sapply(c(2:26), function(i){ 108 | ci_temp <- as.data.frame(all_cna[[i]][[2]]) 109 | ci <- ci_temp[genes_cna,as.character(samp_spots[,i])] 110 | colnames(ci) <- paste(samples_names[i], colnames(ci), sep = "_") 111 | cna_comb <<- cbind(cna_comb, ci) 112 | }) 113 | 114 | 115 | spots_list <- lapply(c(1:26), function(i){ 116 | change_spots <- paste(samples_names[i], samp_spots[,i], sep = "_") 117 | return(change_spots) 118 | }) 119 | 120 | names(spots_list) <- samples_names 121 | 122 | #find_chr <- genome_break(row.names(cna_comb)) 123 | sig7 <- apply(cna_comb[1772:2006,],2,mean) 124 | sig10 <- apply(cna_comb[2363:2538,],2,mean) 125 | 126 | spots_ord <- sapply(spots_list, function(sp){ 127 | return(mean(sig7[sp])+abs(mean(sig10[sp]))) 128 | }) 129 | names(spots_ord) <- names(spots_list) 130 | 131 | spots_list <- spots_list[names(sort(spots_ord))] 132 | 133 | cna_pl <- infercna::cnaPlot(cna_comb, limit=c(-0.7,0.7), order.cells = spots_list, ratio = NULL) # plot cna - important that the ratio is set to NULL! 134 | 135 | cna_pl$p + theme(legend.position = "right") 136 | 137 | 138 | 139 | # cna merged --------------------------------------------------------------- 140 | 141 | samples_names <- (read.delim("CNA/cna_samples.txt", header = FALSE))$V1 142 | 143 | samples_num <- c(15:18) 144 | merge_cna <- mclapply(samples_num, merge_cna_md_fx, mc.cores = 4) 145 | 146 | merge_cna_md_fx <- function(i){ 147 | m1 <- readRDS(paste("general/exp_mats_GBM/", samples_names[i], "counts.rds", sep = "")) 148 | m <- cbind(m1,norm_brain_ref1,norm_brain_ref2) 149 | 150 | scaling_factor <- 1000000/colSums(m) #tumor mat processing 151 | m_CPM <- sweep(m, MARGIN = 2, STATS = scaling_factor, FUN = "*") 152 | m_loged <- log2(1 + (m_CPM/10)) 153 | var_filter <- apply(m_loged, 1, var) # removing genes with zero variance across all cells 154 | m_proc <- m_loged[var_filter != 0, ] 155 | exp_genes <- rownames(m_proc)[(rowMeans(m_proc) > 0.4)] # filtering out lowly expressed genes 156 | m_proc <- m_proc[exp_genes, ] 157 | 158 | # create normal ref 159 | ref <-list(colnames(norm_brain_ref1), colnames(norm_brain_ref2)) 160 | query <- colnames(m_proc)[!is.element(colnames(m_proc), unname(unlist(ref)))] 161 | 162 | # create metadata table 163 | 164 | 165 | metadata <- tibble(CellID = colnames(m_proc), 166 | org_ref = ifelse(colnames(m_proc) %in% unname(unlist(ref)), "reference", "not reference")) 167 | 168 | metadata$sample <- "control" 169 | metadata$region <- "control" 170 | 171 | if (str_detect(samples_names[i], "merge")) { 172 | sapply(c(1:length(samples_regions$sample[samples_regions$cna_samples_name == samples_names[i]])), function(s){ 173 | s_name <- samples_regions$sample[samples_regions$cna_samples_name == samples_names[i]][s] 174 | s_region <- samples_regions$region[samples_regions$cna_samples_name == samples_names[i]][s] 175 | metadata$sample <<- ifelse(str_detect(metadata$CellID, s_name), s_name, metadata$sample) 176 | metadata$region <<- ifelse(str_detect(metadata$CellID, s_name), s_region, metadata$region) 177 | }) 178 | } else { 179 | metadata$sample <- ifelse(metadata$org_ref == "not reference", samples_regions$sample[samples_regions$cna_samples_name == samples_names[i]], metadata$sample) 180 | metadata$region <- ifelse(metadata$org_ref == "not reference", samples_regions$region[samples_regions$cna_samples_name == samples_names[i]], metadata$region) 181 | 182 | } 183 | 184 | 185 | # calc CNA 186 | hg19<- readRDS("CNA/hg38.rds") 187 | cna_score_mat <- calc_cna(matrix = m_proc, query = query, ref = ref, genome = hg19, range = c(-3,3), window = 150, noise = 0.15, isLog = TRUE, per_chr = TRUE, scale = NULL, top_genes = NULL, verbose = TRUE) 188 | sig_and_cor <- cna_sig_cor(cna_score_mat, epi_cells = query, ref_cells = ref, cna_sig = "abs", top_region = 1/3, top_cells = 1/3) 189 | metadata$CNAsig_org <- sig_and_cor$cna_signal[match(metadata$CellID, names(sig_and_cor$cna_signal))] 190 | metadata$CNAcor_org <- sig_and_cor$cna_correlation[match(metadata$CellID, names(sig_and_cor$cna_correlation))] 191 | 192 | # update reference with combined external ref + tumor defined by cna sig / corr thresholds 193 | metadata$new_ref <- ifelse((metadata$CNAcor_org <= 0.25 & metadata$CNAsig_org <= 0.13) | (metadata$org_ref == "reference"), "reference", "query") 194 | 195 | ref <- metadata$CellID[metadata$new_ref == "reference"] 196 | query <- colnames(m_proc)[!is.element(colnames(m_proc), unname(unlist(ref)))] 197 | 198 | # calc CNA with updated ref 199 | cna_score_mat <- calc_cna(matrix = m_proc, query = query, ref = ref, genome = hg19, range = c(-3,3), window = 150, noise = 0.15, isLog = TRUE, per_chr = TRUE, scale = NULL, top_genes = NULL, verbose = TRUE) 200 | sig_and_cor <- cna_sig_cor(cna_score_mat, epi_cells = query, ref_cells = ref, cna_sig = "abs", top_region = 1/3, top_cells = 1/3) 201 | metadata$CNAsig <- sig_and_cor$cna_signal[match(metadata$CellID, names(sig_and_cor$cna_signal))] 202 | metadata$CNAcor <- sig_and_cor$cna_correlation[match(metadata$CellID, names(sig_and_cor$cna_correlation))] 203 | 204 | metadata$CNAtot <- metadata$CNAcor + (metadata$CNAsig * max(metadata$CNAcor) / max(metadata$CNAsig)) 205 | 206 | fin_cna <- cna_score_mat[,metadata$CellID[metadata$org_ref == "not reference"]] 207 | md <- metadata[metadata$org_ref == "not reference",] 208 | 209 | return(list(md,fin_cna)) 210 | } 211 | # malignancy level and binning ------------------------------------------------------------------ 212 | 213 | 214 | all_tot <- sapply(c(1:14), function(i){ 215 | md <- all_cna[[i]][[1]] 216 | return(md$CNAtot) 217 | }) 218 | names(all_tot) <- samples_names[1:14] 219 | 220 | all_tot_merge <- sapply(c(1:4), function(i){ 221 | md <- merge_cna[[i]][[1]] 222 | return(md$CNAtot) 223 | }) 224 | names(all_tot_merge) <- samples_names[15:18] 225 | 226 | hist(c(unlist(all_tot), unlist(all_tot_merge)), breaks = 150) 227 | 228 | all_tot_split_tmp <- sapply(names(all_tot_merge), function(m){ 229 | tmp <- all_tot_merge[[m]] 230 | split_samps <- samples_regions$sample[samples_regions$cna_samples_name == m] 231 | split_scores <- sapply(split_samps, function(s){ 232 | tmp_split <- tmp[grepl(s,names(tmp))] 233 | names(tmp_split) <- sapply(strsplit(names(tmp_split),"_"),function(ns){ns[2]}) 234 | return(tmp_split) 235 | }) 236 | return(split_scores) 237 | }) 238 | 239 | all_tot_split <- unlist(all_tot_split_tmp, recursive = F) 240 | names(all_tot_split) <- sapply(strsplit(names(all_tot_split),"\\."),function(ns){ns[2]}) 241 | 242 | sample_ls <- (read.delim("general/GBM_samples.txt", header = FALSE))$V1 243 | mal_lev_score <- c(all_tot,all_tot_split) 244 | mal_lev_score <- mal_lev_score[sample_ls] 245 | 246 | min_vl <- min(unlist(mal_lev_score)) 247 | max_vl <- max(unlist(mal_lev_score)) 248 | 249 | mal_lev_score_scaled <- sapply(mal_lev_score, function(p){ 250 | p_sc <- (p - min_vl)/(max_vl-min_vl) 251 | return(p_sc) 252 | }) 253 | 254 | 255 | # malignancy binning ------------------------------------ 256 | 257 | hist(c(mal_lev_score_scaled[[15]], mal_lev_score_scaled[[23]]), breaks = 100) 258 | mal_lev_th <- quantile(c(mal_lev_score_scaled[[15]], mal_lev_score_scaled[[23]])) 259 | 260 | malignancy_bin <- sapply(mal_lev_score_scaled, function(p){ 261 | bin <- ifelse(p < mal_lev_th[2] , "non_malignant", 262 | ifelse(p >= mal_lev_th[2] & p < mal_lev_th[3], "mix_low", 263 | ifelse(p >= mal_lev_th[3] & p < mal_lev_th[4],"mix_high","malignant"))) 264 | return(bin) 265 | }) 266 | 267 | 268 | 269 | 270 | ######## Functions --------------------------------------------------------------- 271 | # CNA Score Calculation --------------------------------------------------- 272 | 273 | 274 | #### calc_cna - calculate CNA scores per cell/spot given a gene x cell matrix, query cells and reference cell list/vector 275 | # NOTE: matrix should NOT be row(gene)-centered 276 | calc_cna <- function(matrix, 277 | query, 278 | ref, 279 | top_genes = NULL, 280 | window = 100, 281 | range = c(-3, 3), 282 | per_chr = TRUE, 283 | scale = 0.05, 284 | noise = NULL, 285 | isLog = FALSE, 286 | genome = NULL, 287 | min_ref_leng = 10, 288 | verbose = FALSE){ 289 | 290 | if(all(round(range(rowMeans(matrix)), 3) == 0)) { 291 | stop(print("Matrix is row-centered. Please provide non-centered data.")) 292 | } 293 | if(is.list(ref)){ 294 | # Remove small references 295 | ref_leng <- lapply(ref, length) 296 | ref <- ref[ref_leng >= min_ref_leng] 297 | # Prepare CNA matrix to work on 298 | cna_mat <- matrix[, c(query, unlist(ref))] 299 | } 300 | else if(!is.list(ref)){ 301 | if(length(ref) < min_ref_leng){ 302 | stop(print("There are not enough reference cells!")) 303 | } 304 | cna_mat <- matrix[, c(query, ref)] 305 | } 306 | 307 | # Order chromosomes 308 | if (verbose) message("Ordering the genes by their genomic position.") 309 | order_genes <- genome_sort(rownames(cna_mat), genome = genome) 310 | genes <- order_genes$cna_genes 311 | chr_breaks <- order_genes$chr_breaks 312 | 313 | # Optional list of top expressed genes 314 | if(!is.null(top_genes)){ 315 | if(verbose) message("Filtering the expression matrix to include only top ", length(top_genes), "genes...") 316 | if(isTRUE(isLog)){ 317 | cna_mat <- un_log(cna_mat)} 318 | cna_mat <- cna_mat[genes, ] 319 | cna_mat <- apply(cna_mat, 1, mean) 320 | cna_mat <- names(tail(sort(cna_mat), n = top_genes)) 321 | order_genes <- genome_sort(cna_mat, genome = genome) 322 | genes <- order_genes$cna_genes 323 | chr_breaks <- order_genes$chr_breaks 324 | } 325 | 326 | # Reorder 327 | ordered_mat <- cna_mat[genes, ] 328 | # Log before first centering 329 | if(isFALSE(isLog)){ 330 | ordered_mat <- log_norm(ordered_mat)} 331 | # First row centering step 332 | if (verbose) message("Performing mean-centering of the genes.") 333 | avg <- apply(ordered_mat, 1, mean) 334 | ordered_mat <- sweep(ordered_mat, 1, avg) 335 | # Set 3 and -3 as extreme values (as set by the argument "range") 336 | if (verbose) message("Restricting expression matrix values to between ", range[[1]], " and ", range[[2]], ".") 337 | ordered_mat <- apply(ordered_mat, 2, function(x) pmax(x, range[1])) 338 | ordered_mat <- apply(ordered_mat, 2, function(x) pmin(x, range[2])) 339 | # Unlog to CPM/TPM again for moving average 340 | ordered_mat <- un_log(ordered_mat) 341 | 342 | # Calculate moving average per chromosome by window of 100 (as set by the argument "window") 343 | if(isTRUE(per_chr)){ 344 | if (verbose) message("Calculating rolling means with a window size of ", window, " genes, on each chromosome in turn.") 345 | num <- seq(1:(length(chr_breaks) -1)) 346 | perchr <- lapply(num, function(y){ 347 | if(y == length(num)){ 348 | end <- nrow(ordered_mat) 349 | } 350 | if(y != length(num)){ 351 | end <- chr_breaks[y + 1] - 1 352 | } 353 | chr <- ordered_mat[chr_breaks[y]:end, ] 354 | chr_mat <- apply(chr, 2, function(x) caTools::runmean(x, k = window, endrule = "mean")) 355 | }) 356 | calc_mat <- do.call(rbind, perchr) 357 | } 358 | # Calculate moving average for all genes as one chromosome 359 | if(isFALSE(per_chr)){ 360 | if (verbose) message("Calculating rolling means with a window size of ", window, " genes, on all genes as one chromosome.") 361 | calc_mat <- apply(ordered_mat, 2, function(x) caTools::runmean(x, k = window, endrule = "mean")) 362 | } 363 | 364 | # Log before second centering 365 | if (verbose) message("Converting CNA score values to log(2) space.") 366 | calc_mat <- log_norm(calc_mat) 367 | # Substract median per cell 368 | if (verbose) message("Performing median-centering of the cells.") 369 | cell_med <- apply(calc_mat, 2, median) 370 | calc_mat <- sweep(calc_mat, 2, cell_med) 371 | # Unlog to CPM/TPM again for reference removal 372 | calc_mat <- un_log(calc_mat) 373 | 374 | # Create max/min values per gene from reference cells 375 | if(is.list(ref)){ 376 | ref_leng <- seq(1, length(ref)) 377 | mean_mat <- lapply(ref_leng, function(x){ 378 | idx_ref <- ref[[x]] 379 | m1 <- apply(calc_mat[, idx_ref], 1, mean) 380 | m1 381 | }) 382 | ref_mat <- do.call(cbind, mean_mat) 383 | # Log references 384 | ref_mat <- log_norm(ref_mat) 385 | ref_max <- apply(ref_mat, 1, max) 386 | ref_min <- apply(ref_mat, 1, min) 387 | } 388 | else if(!is.list(ref)){ 389 | ref_mat <- apply(calc_mat[, ref], 1, mean) 390 | ref_mat <- log_norm(ref_mat) 391 | ref_max <- ref_mat 392 | ref_min <- ref_mat 393 | } 394 | 395 | # Expand reference boundaries by scaling percentage 396 | if(!is.null(scale)){ 397 | rmax <- ref_max + scale * abs(ref_max) 398 | rmin <- ref_min - scale * abs(ref_min) 399 | } 400 | # Or expand by fixed noise factor 401 | if(!is.null(noise)){ 402 | rmax <- ref_max + noise 403 | rmin <- ref_min - noise 404 | } 405 | 406 | # Log CNA matrix 407 | calc_mat <- log_norm(calc_mat) 408 | # Centre by reference 409 | if (verbose) message("Correcting CNA profiles using CNA values from reference cells.") 410 | score_mat <- ifelse(calc_mat > rmax, calc_mat - rmax, 411 | ifelse(calc_mat < rmin, calc_mat - rmin, 0)) 412 | rownames(score_mat) <- rownames(ordered_mat) 413 | 414 | if (verbose) message("Done!") 415 | return(score_mat) 416 | } 417 | 418 | 419 | 420 | # CNA Matrix Subclones ---------------------------------------------------- 421 | 422 | 423 | #### Dived the CNA matrix to subclones by reducing feature dimension and clustering with Louvain: 424 | spatial_subclones <- function(cna_matrix, 425 | epi_cells, 426 | separate = c("arm", "chr"), 427 | genome = NULL, 428 | genecut = 10, 429 | top_region = 1/3, 430 | top_method = c("abs", "sd"), 431 | reduction_dims = 30, 432 | cluster_method = c("louvain", "dbscan"), 433 | cluster_k = 10, 434 | dbs_minpts = NULL, 435 | dbs_ptscale = NULL, 436 | diffcut = 10){ 437 | 438 | ## Subset by chromosome 439 | breaks <- genome_break(genes = rownames(cna_matrix), separate = separate, 440 | genecut = genecut, genome = genome) 441 | break_idx <- breaks$break_idx 442 | labels <- breaks$breaks_df$labels 443 | dispose <- breaks$dispose 444 | if(length(dispose) != 0) { 445 | matrix <- cna_matrix[-dispose, ] 446 | } else {matrix <- cna_matrix} 447 | break_num <- unique(break_idx) 448 | 449 | ## Set working matrix 450 | matrix <- matrix[, epi_cells] 451 | names(break_idx) <- rownames(matrix) 452 | 453 | ## Select the most CNA-rich regions 454 | if(top_method == "abs"){ 455 | abs_vals <- apply(matrix, 1, function(x) mean(abs(x))) 456 | mat <- matrix[abs_vals > quantile(abs_vals, probs = 1 - top_region), ] 457 | } 458 | ## or alternatively - Select the most variant gene regions 459 | if(top_method == "sd"){ 460 | stand_dev <- apply(matrix, 1, sd) 461 | mat <- matrix[stand_dev > quantile(stand_dev, probs = 1 - top_region), ] 462 | } 463 | transpose_mat <- as.matrix(t(mat)) 464 | 465 | # Dimred 466 | pca <- prcomp(transpose_mat, center = FALSE, scale. = FALSE) 467 | dim_red_mat <- pca$x[, 1:reduction_dims] 468 | rownames(dim_red_mat) <- colnames(matrix) 469 | 470 | ## Clustering methods: 471 | if(cluster_method == "louvain"){ 472 | clusters <- cluster_coord(dim_red_mat, method = "louvain", louvain = cluster_k) 473 | } 474 | if(cluster_method == "dbscan"){ 475 | if(is.null(dbs_minpts) & is.null(dbs_ptscale)) {minpts <- log(nrow(trans_mat3))} 476 | if(!is.null(dbs_ptscale)) {minpts <- dbs_ptscale * log(nrow(trans_mat3))} 477 | if(!is.null(dbs_minpts)) {minpts <- dbs_minpts} 478 | clusters <- cluster_coord(dim_red_mat, method = "dbscan", min_pts = minpts, diffcut = diffcut) 479 | 480 | clusters <- dbscan.cluster(trans_mat3, min_pts = minpts, diffcut = diffcut) 481 | } 482 | 483 | names(clusters) <- colnames(matrix) 484 | return(clusters) 485 | } 486 | 487 | 488 | 489 | # Arrange Genes by Chromosomes -------------------------------------------- 490 | 491 | 492 | #### genome_sort - with gene list as input, reorder genes by chromosome position and get breaks for chromosomes and arms 493 | genome_sort <- function(genes, genome = NULL, attributes = c("hgnc_symbol", "start_position", "chromosome_name", "band"), downURL = "https://uswest.ensembl.org"){ 494 | 495 | # Choose which species to use and server to download from 496 | if(is.null(genome)){ 497 | mart <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = "hsapiens_gene_ensembl", host = downURL, port = 80) 498 | #Get attributes for gene list 499 | cna_attr <- biomaRt::getBM(attributes = attributes, filters = "hgnc_symbol", values = genes, mart = mart, uniqueRows = TRUE) 500 | } 501 | if(!is.null(genome)){ 502 | cna_attr <- as.data.frame(genome) 503 | rownames(cna_attr) <- cna_attr[, 1] 504 | cna_attr <- cna_attr[rownames(cna_attr) %in% genes, ] 505 | colnames(cna_attr)[1] <- "hgnc_symbol" 506 | } 507 | 508 | # Remove genes not mapped to proper chromosomes 509 | chr_names <- c(seq(1:22), "X", "Y") 510 | genes2rm <- setdiff(unique(cna_attr$chromosome_name), chr_names) 511 | rmgenes <- lapply(genes2rm, function(x){ 512 | gene_drop <- cna_attr[grep(x, cna_attr$chromosome_name), ] 513 | }) 514 | gene2rm <- do.call(rbind, rmgenes) 515 | cna_attr <- cna_attr[!is.element(rownames(cna_attr), rownames(gene2rm)), ] 516 | # Remove doubles 517 | cna_attr <- cna_attr[!duplicated(cna_attr$hgnc_symbol), ] 518 | # Remove NA's 519 | cna_attr <- na.omit(cna_attr) 520 | 521 | # Change X and Y chromosomes to numbers for ordering 522 | cna_attr$chromosome_name <- gsub("X", 23, cna_attr$chromosome_name) 523 | cna_attr$chromosome_name <- gsub("Y", 24, cna_attr$chromosome_name) 524 | cna_attr$chromosome_name <- as.numeric(cna_attr$chromosome_name) 525 | # Order 526 | cna_attr <- cna_attr[order(cna_attr$chromosome_name, cna_attr$start_position, decreasing = FALSE), ] 527 | # Chromosome number as sorting vector 528 | chr_names <- as.character(unique(cna_attr$chromosome_name)) 529 | # Chromosome length 530 | chr_length <- lapply(chr_names, function(x){ 531 | l <- nrow(subset(cna_attr, cna_attr$chromosome_name == x)) 532 | }) 533 | chr_length <- do.call(rbind, chr_length) 534 | # Break vector at each chromosome end 535 | chr_breaks <- cumsum(chr_length) 536 | # Set all genes as p or q 537 | cna_attr$band <- gsub("[[:digit:]]", "", cna_attr$band) 538 | cna_attr$band <- gsub("p.", "p", cna_attr$band) 539 | cna_attr$band <- gsub("q.", "q", cna_attr$band) 540 | # Chromosome arm lenghts 541 | arm_breaks <- integer(length = length(chr_breaks)) 542 | for(i in seq_along(chr_breaks)){ 543 | starting <- chr_breaks[i] 544 | ending <- nrow(cna_attr[cna_attr$chromosome_name == i & cna_attr$band == "q", ]) 545 | gap_length <- starting - ending 546 | arm_breaks[i] <- round(gap_length, digits = 0) 547 | } 548 | 549 | # Breaks and labels for arms and full chromosomes 550 | full_breaks <- sort(c(1, chr_breaks, arm_breaks)) 551 | # Add p and q labels 552 | q2q <- paste(seq(1, length(chr_breaks)), "q", sep = "") 553 | p2p <- paste(seq(1, length(chr_breaks)), "p", sep = "") 554 | full_labels <- sort(c(seq(1, length(chr_breaks)), seq(1, length(chr_breaks)))) 555 | full_labels[seq(2, length(full_labels), 2)] <- q2q 556 | full_labels[seq(1, length(full_labels), 2)] <- p2p 557 | # Empty label at end of Y chromosome 558 | full_labels <- c(full_labels, " ") 559 | # Name X and Y chromosomes 560 | full_labels[45:48] <- c("Xp", "Xq", "Yp", "Yq") 561 | chr_breaks <- c(1, chr_breaks) 562 | chr_names[23:25] <- c("X", "Y", " ") 563 | 564 | output <- list(cna_genes = cna_attr$hgnc_symbol, 565 | chr_breaks = chr_breaks, 566 | chr_names = chr_names, 567 | arm_breaks = arm_breaks, 568 | full_breaks = full_breaks, 569 | full_labels = full_labels, 570 | all = cna_attr) 571 | return(output) 572 | } 573 | 574 | 575 | #### Index and label genes by chromosomal location - chromosome number and arm 576 | 577 | genome_break <- function(genes, genecut = 10, separate = "chr", genome = NULL){ 578 | 579 | chr_sort <- genome_sort(genes, genome = genome) 580 | genes <- chr_sort$cna_genes 581 | 582 | if(separate == "chr") {breaks = chr_sort$chr_breaks 583 | labels = chr_sort$chr_names} 584 | if(separate == "arm") {breaks = chr_sort$full_breaks 585 | labels = chr_sort$full_labels} 586 | if(length(breaks) != length(labels)) {labels = labels[1:length(breaks)]} 587 | 588 | # Breaks as factor 589 | break_idx <- character(length = length(genes)) 590 | for(i in 1:(length(breaks) - 1)){ 591 | if(i == (length(breaks) - 1)) {end = length(genes)} 592 | if(i != (length(breaks) - 1)) {end = breaks[i + 1]} 593 | chr_leng <- end - breaks[i] 594 | if(chr_leng < genecut & chr_leng > 0 & i != 1) {break_idx[breaks[i] + 1:end] = NA} 595 | if(chr_leng < genecut & chr_leng > 0 & i == 1) {break_idx[breaks[i]:end] = NA} 596 | if(chr_leng == 0 & i != 1) {break_idx[breaks[i] + 1] = NA} 597 | if(chr_leng == 0 & i == 1) {break_idx[breaks[i]] = NA} 598 | if(chr_leng > genecut & i != 1) {break_idx[breaks[i] + 1:end] = i} 599 | if(chr_leng > genecut & i == 1) {break_idx[breaks[i]:end] = i} 600 | } 601 | 602 | # Remove genes on chromosomes with too few genes 603 | dispose <- which(is.na(break_idx)) 604 | break_idx <- na.omit(break_idx) 605 | break_idx <- sort(as.numeric(break_idx)) 606 | genes <- chr_sort$cna_genes[-dispose] 607 | names(break_idx) <- genes 608 | 609 | # Keep only labels with chromosomes that have enough genes 610 | breaks_df <- cbind.data.frame(breaks, labels) 611 | rownames(breaks_df) <- seq(1, nrow(breaks_df)) 612 | breaks_df <- breaks_df[unique(break_idx), ] 613 | 614 | out <- list(break_idx = break_idx, breaks_df = breaks_df, dispose = dispose) 615 | return(out) 616 | } 617 | 618 | 619 | 620 | 621 | # Utility functions ------------------------------------------------------- 622 | 623 | 624 | #### log and un_log functions: 625 | log_norm <- function(x, centre = FALSE){ 626 | logged <- log2(1 + (x/10)) 627 | if(isTRUE(centre)){ 628 | avg <- rowMeans(logged) 629 | logged <- sweep(logged, 1, avg) 630 | } 631 | return(logged) 632 | } 633 | 634 | un_log <- function(x){ 635 | tpmed <- 10 * (2^x - 1) 636 | return(tpmed) 637 | } 638 | 639 | 640 | #### Clustering function and methods 641 | ## General cluster coordinate function 642 | cluster_coord <- function(matrix, method = c("dbscan", "louvain"), louvain = 25, ...){ 643 | if(method == "dbscan"){ 644 | clusters <- cluster_dbscan(matrix, ...) 645 | } 646 | if(method == "louvain"){ 647 | clusters <- cluster_louvain(matrix, k = louvain) 648 | } 649 | 650 | rename_clusts <- function(x){paste("cluster", x, sep = "")} 651 | clusters <- rename_clusts(clusters) 652 | names(clusters) <- rownames(matrix) 653 | return(clusters) 654 | } 655 | 656 | ## Louvain clustering 657 | cluster_louvain <- function(data, k) { 658 | knn <- FNN::get.knn(as.matrix(data), k = k) 659 | knn <- data.frame(from = rep(1:nrow(knn$nn.index), k), to = as.vector(knn$nn.index), weight = 1 / (1 + as.vector(knn$nn.dist))) 660 | simple_igraph <- igraph::simplify(igraph::graph_from_data_frame(knn, directed = FALSE)) 661 | louvain_clusts <- igraph::cluster_louvain(simple_igraph) 662 | clusters <- igraph::membership(louvain_clusts) 663 | return (clusters) 664 | } 665 | 666 | ## DBScan clustering 667 | cluster_dbscan <- function(matrix, min_pts = log(nrow(matrix)), probs = 3/4, diff_cut = 10){ 668 | dist <- dbscan::kNNdist(matrix, min_pts) 669 | dist_cutoff <- quantile(sort(dist), probs = probs) 670 | dist <- dist[dist > dist_cutoff] 671 | dist <- dist[order(dist)] 672 | dist <- dist / max(dist) 673 | der_dist <- diff(dist) / (1 / length(dist)) 674 | knee <- dist[length(der_dist) - length(der_dist[der_dist > diff_cut])] + dist_cutoff 675 | db_run <- dbscan::dbscan(matrix, eps = knee, minPts = min_pts) 676 | clusters <- as.factor(db_run$cluster) 677 | return(clusters) 678 | } 679 | 680 | 681 | #Dor Simkin 682 | #11:18 AM (0 minutes ago) 683 | #to me 684 | 685 | #### Calculate CNAsignal & CNAcorrelation scores 686 | 687 | cna_sig_cor <- function(cna_matrix, 688 | 689 | epi_cells, 690 | 691 | ref_cells, 692 | 693 | top_region = 1/3, 694 | 695 | top_cells = 1/3, 696 | 697 | cna_sig = "abs"){ 698 | 699 | 700 | 701 | if(is.list(ref_cells)){ref_cells <- unlist(ref_cells)} 702 | ref_cells <- ref_cells[ref_cells %in% colnames(cna_matrix)] 703 | 704 | if(!is.null(intersect(ref_cells, epi_cells))){ 705 | 706 | rmcells <- intersect(ref_cells, epi_cells) 707 | 708 | ref_cells <- setdiff(ref_cells, rmcells) 709 | 710 | } 711 | 712 | cna_matrix <- cna_matrix[, c(epi_cells, ref_cells)] 713 | 714 | all_cells <- colnames(cna_matrix) 715 | 716 | epi_matrix <- cna_matrix[, epi_cells] 717 | 718 | 719 | 720 | # Absolute CNA value per gene for all epithelial cells 721 | 722 | absvals <- apply(epi_matrix, 1, function(x) mean(abs(x))) 723 | 724 | # Top one-third of genes with highest absolute value (as set by the argument "top_region") 725 | 726 | top_abs <- ifelse(absvals > quantile(absvals, probs = 1 - top_region), absvals, 0) 727 | 728 | # Define matrices where to calculate cell averages - only the regions with most CNA 729 | 730 | score_cna <- cna_matrix[top_abs > 0, ] 731 | 732 | # CNA score for each tumor cell across relevant region 733 | 734 | if(cna_sig == "abs"){cna_score <- apply(score_cna, 2, function(x) mean(abs(x)))} 735 | 736 | if(cna_sig == "sqrt"){cna_score <- apply(score_cna, 2, function(x) mean(sqrt(x^2)))} 737 | 738 | if(cna_sig == "square"){cna_score <- apply(score_cna, 2, function(x) mean(x^2))} 739 | 740 | # Calculate correlation only with epithelial cells with strongest signal (defined by a cutoff set by the argument "top_cells") 741 | 742 | cellcut <- quantile(cna_score, probs = 1 - top_cells) 743 | 744 | epi_cna <- epi_matrix[top_abs > 0, cna_score[epi_cells] > cellcut] 745 | 746 | # Correlation vector 747 | 748 | cor_vec <- cor(score_cna, epi_cna) 749 | 750 | cna_cor <- apply(cor_vec, 1, mean) 751 | 752 | 753 | 754 | out <- list(cna_signal = cna_score, 755 | 756 | cna_correlation = cna_cor) 757 | 758 | return(out) 759 | 760 | } 761 | 762 | ## plotting function 763 | 764 | 765 | ggbar = function(colVar, 766 | legend_title = NULL, 767 | obs=NULL, 768 | dir=c('h','v'), 769 | cols = c('blue','red','orange','green','magenta')) { 770 | 771 | L = list(i = obs,col = colVar) 772 | lens = sapply(L, length, simplify = F) 773 | lens = lens[lens!=0] 774 | lens = unlist(lens) 775 | 776 | stopifnot(length(lens)>0 & length(unique(lens))==1) 777 | 778 | len = unique(unlist(lens)) 779 | if (is.null(obs)) obs = 1:len 780 | if (is.null(colVar)) colVar = rep('', len) 781 | 782 | dir = match.arg(dir) 783 | d = data.frame(id = obs, 784 | colVar=colVar, 785 | stringsAsFactors=F) 786 | 787 | d = d %>% dplyr::mutate(id = factor(id, levels = unique(id))) 788 | 789 | if (dir=='h') {G = ggplot(d, aes(y=1,x=id,fill=colVar))} 790 | else {G = ggplot(d, aes(y=id,x=1,fill=colVar))} 791 | 792 | 793 | G + geom_tile() + 794 | scale_fill_manual(values=cols, name = legend_title, guide = guide_legend(override.aes = list(size = 10))) + 795 | theme_void() + 796 | theme(legend.position='top', 797 | plot.margin=margin(0,0,0,0,'cm')) 798 | } -------------------------------------------------------------------------------- /Module6_CODEX.R: -------------------------------------------------------------------------------- 1 | # CODEX related code 2 | # Load libraries, custom functions and levels ----------------------------------------- 3 | ## Load libraries 4 | library(ComplexHeatmap) 5 | library(circlize) 6 | library(viridis) 7 | library(RColorBrewer) 8 | library(ggraph) 9 | library(scales) 10 | library(ggpubr) 11 | library(tidyverse) 12 | #library(ggvoronoi) 13 | 14 | ## Load custom functions and levels 15 | Count_cell_type <- function(cell_type = cell_type_figure) { 16 | cells %>% count({{cell_type}}) %>% print(n=Inf) 17 | } 18 | 19 | `%ni%` <- Negate(`%in%`) 20 | 21 | Create_aligned_pseudospots <- function(sample_codex="ZH916_INF", sample_visium="ZH916inf") { 22 | 23 | tmp_codex <- cells_aligned %>% 24 | filter(sample %in% c({{sample_codex}})) %>% 25 | filter(cell_type_tidy %ni% c("T_cell", "B_cell", "excluded")) 26 | 27 | # Create aligned pseudospots codex 28 | spot_distance <- scalef %>% 29 | filter(sample == {{sample_visium}}) %>% 30 | pull(scalef_hires) 31 | 32 | spot_diameter <- spot_distance*0.55 33 | 34 | ## Import spot grid 35 | spots <- visium_positions %>% 36 | filter(sample == {{sample_visium}}) 37 | 38 | ## Calculate distances between tmp and spot centers 39 | distances <- spots %>% 40 | expand_grid(cell_name = tmp_codex$cell_name) %>% 41 | left_join(tmp_codex, by = "cell_name") %>% 42 | mutate(distance = sqrt((spot_x - aligned_x)^2 + (spot_y - aligned_y)^2)) 43 | 44 | ## Assign tmp_codex to spots based on distances 45 | assigned_spots <- distances %>% 46 | filter(distance <= spot_diameter / 2) %>% 47 | group_by(cell_name) %>% 48 | arrange(distance) %>% 49 | slice_head(n = 1) 50 | 51 | rm(distances) 52 | gc() 53 | 54 | ## Append spot names to the original tibble 55 | tmp_final <- tmp_codex %>% 56 | left_join(assigned_spots %>% dplyr::select(cell_name, spot_id), by = "cell_name") %>% 57 | mutate(spot_id = ifelse(is.na(spot_id), NA, spot_id)) 58 | 59 | ## Calculate and plot dominant cell_type per spot 60 | tmp_dom <- tmp_final %>% 61 | filter(cell_type_tidy %ni% c("excluded")) %>% 62 | group_by(spot_id) %>% 63 | count(cell_type_tidy) %>% 64 | arrange(spot_id, desc(n)) %>% 65 | mutate(n_total = sum(n)) %>% 66 | mutate(fraction = n/sum(n)) %>% 67 | filter(row_number()==1) %>% 68 | left_join(spots %>% select(spot_x, spot_y, spot_id), by = c("spot_id")) %>% 69 | ungroup() %>% 70 | distinct(.) 71 | 72 | return(tmp_dom) 73 | } 74 | 75 | Run_DEPs <- function(sample) { 76 | nmat_sample <- nmat[, cells %>% 77 | filter(sample %in% c({{sample}})) %>% 78 | filter(cell_type_figure %ni% c("excluded")) %>% 79 | pull(cell_name) 80 | ] 81 | 82 | cells_sample <- cells %>% 83 | filter(sample %in% c({{sample}})) %>% 84 | filter(cell_type_figure %ni% c("excluded")) 85 | 86 | DEPs_hm_sample <- 87 | tibble(dim(nmat_sample)[1]:dim(nmat_sample)[1]) # Create df in which the DEG for each cluster will be inserted 88 | 89 | for (i in sort(unique(cells_sample$cell_type_figure))) { 90 | # i iterates all clusters 91 | top_DEP_value <- 92 | (rowMeans(nmat_sample[, which(i == cells_sample$cell_type_figure)]) - rowMeans(nmat_sample[, which(i != cells_sample$cell_type_figure)])) 93 | DEPs_hm_sample <- 94 | cbind(DEPs_hm_sample, top_DEP_value) # select top gene names 95 | } 96 | 97 | DEPs_hm_sample <- 98 | DEPs_hm_sample[, -1] # Delete redundant first column 99 | colnames(DEPs_hm_sample) <- 100 | paste0(sort(unique(cells_sample$cell_type_figure)), "_", { 101 | { 102 | sample 103 | } 104 | }) 105 | return(DEPs_hm_sample) 106 | } 107 | 108 | Run_DEPs_hm <- function(sample, cell_types, marker) { 109 | nmat <- nmat[{{marker}} ,cells %>% 110 | filter(sample %in% {{sample}}) %>% 111 | filter(cell_type_figure %in% {{cell_types}}) %>% 112 | pull(cell_name) ] 113 | 114 | cells <- cells %>% 115 | filter(sample %in% {{sample}}) %>% 116 | filter(cell_type_figure %in% {{cell_types}}) 117 | 118 | 119 | DEPs_hm <- tibble(dim(nmat)[1]:dim(nmat)[1]) # Create df in which the DEG for each cluster will be inserted 120 | 121 | for (i in sort(unique(cells$cell_type_figure))) { # i iterates all clusters 122 | top_DEP_value <- (rowMeans(nmat[,which(i == cells$cell_type_figure) ]) - rowMeans(nmat[,which(i != cells$cell_type_figure)])) 123 | DEPs_hm <- cbind(DEPs_hm, top_DEP_value) # select top gene names 124 | } 125 | 126 | DEPs_hm <- DEPs_hm[,-1] # Delete redundant first column 127 | colnames(DEPs_hm) <- rep( x = paste(sort(unique(cells$cell_type_figure)))) 128 | 129 | return(DEPs_hm) 130 | } 131 | 132 | outersect <- function(x, y) { 133 | sort(c(setdiff(x, y), 134 | setdiff(y, x))) 135 | } 136 | 137 | `%ni%` <- Negate(`%in%`) 138 | 139 | Plot_tiles_9 <- function(sample = "ZH916T1") { 140 | 141 | mp_cons <- tibble(mp_cons = vspots$mp_cons %>% unique()) %>% 142 | drop_na() 143 | 144 | vis <- spots_visium_aligned %>% 145 | filter(spot_id_sample %in% intersect(spots_codex_aligned$spot_id_sample, spots_visium_aligned$spot_id_sample)) %>% 146 | filter(sample == {{sample}}) %>% 147 | select(spot_id, mp_cons, spot_x, spot_y) 148 | 149 | cod <- spots_codex_aligned %>% 150 | filter(spot_id_sample %in% intersect(spots_codex_aligned$spot_id_sample, spots_visium_aligned$spot_id_sample)) %>% 151 | filter(sample == {{sample}}) %>% 152 | mutate(mp_cons = mp_cons) %>% 153 | select(spot_id, mp_cons, spot_x, spot_y) 154 | 155 | # Calculate the range of x and y coordinates 156 | x_range <- range(vis$spot_x) 157 | y_range <- range(vis$spot_y) 158 | 159 | # Define the size of the tiles (one-third of the range) 160 | tile_size_x <- (x_range[2] - x_range[1]) / 3 161 | tile_size_y <- (y_range[2] - y_range[1]) / 3 162 | 163 | # Initialize a list to store the 3x3 tiles 164 | tiles_vis <- vector("list", length = 9) 165 | 166 | # Loop through each tile 167 | for (i in 1:3) { 168 | for (j in 1:3) { 169 | # Define the boundaries of the current tile 170 | x_min <- x_range[1] + (i - 1) * tile_size_x 171 | x_max <- x_range[1] + i * tile_size_x 172 | y_min <- y_range[1] + (j - 1) * tile_size_y 173 | y_max <- y_range[1] + j * tile_size_y 174 | } 175 | } 176 | 177 | ggpubr::ggarrange( 178 | ggplot(vis, aes(x=spot_x, y=spot_y, color = mp_cons)) + 179 | geom_point(size = 1, alpha = 1, shape = 16) + 180 | guides(colour = guide_legend(override.aes = list(size=2, alpha = 1))) + 181 | scale_colour_manual(values = an_cols_mp) + 182 | labs(color = "mp_cons", x = "", y = "") + 183 | theme_classic() + 184 | geom_vline(xintercept = seq(x_range[1], x_range[2], length.out = 4)) + 185 | geom_hline(yintercept = seq(y_range[1], y_range[2], length.out = 4)) + 186 | scale_y_reverse() + 187 | guides(color="none") + 188 | theme(axis.text.x = element_text(angle = 45, hjust = 1)) 189 | , 190 | ggplot(cod, aes(x=spot_x, y=spot_y, color=mp_cons)) + 191 | geom_point(size = 1, alpha=1, shape = 16) + 192 | scale_colour_manual(values = an_cols_mp) + 193 | guides(colour = guide_legend(override.aes = list(size=2, alpha = 1))) + 194 | labs(color = "mp_cons", x = "", y = "") + 195 | geom_vline(xintercept = seq(x_range[1], x_range[2], length.out = 4)) + 196 | geom_hline(yintercept = seq(y_range[1], y_range[2], length.out = 4)) + 197 | theme_classic() + 198 | scale_y_reverse() + 199 | guides(color="none") + 200 | theme(axis.text.x = element_text(angle = 45, hjust = 1)) 201 | , 202 | nrow = 2 203 | ) 204 | } 205 | 206 | Get_prop_9 <- function(sample="ZH1019inf", min_spots_per_tile = 50) { 207 | 208 | mp_cons <- tibble(mp_cons = vspots$mp_cons %>% unique()) %>% 209 | drop_na() 210 | 211 | vis <- spots_visium_aligned %>% 212 | filter(spot_id_sample %in% intersect(spots_codex_aligned$spot_id_sample, spots_visium_aligned$spot_id_sample)) %>% 213 | filter(sample == {{sample}}) %>% 214 | select(spot_id, mp_cons, spot_x, spot_y) 215 | 216 | cod <- spots_codex_aligned %>% 217 | filter(spot_id_sample %in% intersect(spots_codex_aligned$spot_id_sample, spots_visium_aligned$spot_id_sample)) %>% 218 | filter(sample == {{sample}}) %>% 219 | mutate(mp_cons = mp_cons) %>% 220 | select(spot_id, mp_cons, spot_x, spot_y) 221 | 222 | ## For vis 223 | # Calculate the range of x and y coordinates 224 | x_range <- range(vis$spot_x) 225 | y_range <- range(vis$spot_y) 226 | 227 | # Define the size of the tiles (one-third of the range) 228 | tile_size_x <- (x_range[2] - x_range[1]) / 3 229 | tile_size_y <- (y_range[2] - y_range[1]) / 3 230 | 231 | # Initialize a list to store the 3x3 tiles 232 | tiles_vis <- vector("list", length = 9) 233 | 234 | # Loop through each tile 235 | for (i in 1:3) { 236 | for (j in 1:3) { 237 | # Define the boundaries of the current tile 238 | x_min <- x_range[1] + (i - 1) * tile_size_x 239 | x_max <- x_range[1] + i * tile_size_x 240 | y_min <- y_range[1] + (j - 1) * tile_size_y 241 | y_max <- y_range[1] + j * tile_size_y 242 | 243 | # Extract cells within the current tile 244 | current_tile <- vis[ 245 | vis$spot_x >= x_min & vis$spot_x <= x_max & 246 | vis$spot_y >= y_min & vis$spot_y <= y_max, ] 247 | 248 | # Store the current tile in the list 249 | tiles_vis[[(i - 1) * 3 + j]] <- current_tile 250 | } 251 | } 252 | 253 | names(tiles_vis) <- paste0("vis_", 1:9) 254 | 255 | ## For cod 256 | # Calculate the range of x and y coordinates 257 | x_range <- range(cod$spot_x) 258 | y_range <- range(cod$spot_y) 259 | 260 | # Define the size of the tiles (one-third of the range) 261 | tile_size_x <- (x_range[2] - x_range[1]) / 3 262 | tile_size_y <- (y_range[2] - y_range[1]) / 3 263 | 264 | # Initialize a list to store the 3x3 tiles 265 | tiles_cod <- vector("list", length = 9) 266 | 267 | # Loop through each tile 268 | for (i in 1:3) { 269 | for (j in 1:3) { 270 | # Define the boundaries of the current tile 271 | x_min <- x_range[1] + (i - 1) * tile_size_x 272 | x_max <- x_range[1] + i * tile_size_x 273 | y_min <- y_range[1] + (j - 1) * tile_size_y 274 | y_max <- y_range[1] + j * tile_size_y 275 | 276 | # Extract cells within the current tile 277 | current_tile <- cod[ 278 | cod$spot_x >= x_min & cod$spot_x <= x_max & 279 | cod$spot_y >= y_min & cod$spot_y <= y_max, ] 280 | 281 | # Store the current tile in the list 282 | tiles_cod[[(i - 1) * 3 + j]] <- current_tile 283 | } 284 | } 285 | 286 | names(tiles_cod) <- paste0("cod_", 1:9) 287 | 288 | list_prop <- lapply(1:9, function(x) { 289 | x <- left_join( 290 | tiles_vis[[x]] %>% 291 | count(mp_cons) %>% 292 | mutate(prop_vis = n/sum(n)) 293 | , 294 | tiles_cod[[x]] %>% 295 | count(mp_cons) %>% 296 | mutate(prop_cod = n/sum(n)) 297 | , 298 | by = "mp_cons") %>% 299 | right_join(., mp_cons, by = "mp_cons") %>% 300 | replace_na(list(prop_cod = 0, prop_vis = 0, n.x = 0, n.y = 0)) 301 | 302 | }) 303 | 304 | 305 | # Only use quadrants that cover > 150 spots 306 | filtered_list_prop <- lapply(list_prop, function(df) { 307 | if(sum(df$n.x) > min_spots_per_tile) { 308 | return(df) 309 | } 310 | }) 311 | 312 | prop_final <- do.call(rbind, filtered_list_prop) 313 | 314 | return(prop_final) 315 | } 316 | 317 | ## Min distance to cell type with distribution 318 | 319 | Min_dist_to_nth_cell_type_distribution <- function(cell_type = "Vasc", 320 | sample = "ZH916_INF", 321 | n = 1, 322 | cell_type_column = "cell_type_figure") { 323 | # Create distance matrix 324 | cells_clean <- cells %>% filter(cell_type_figure %ni% c("excluded")) 325 | 326 | dist <- cells_clean %>% 327 | filter(sample == {{sample}}) %>% 328 | select(c(centroid_x,centroid_y)) %>% 329 | dist() %>% 330 | as.matrix() 331 | 332 | # Find the index of the closest spot of cell_type "endothelial 333 | ix_endothelial <- cells_clean %>% 334 | filter(sample == {{sample}}) %>% 335 | pull({{cell_type_column}}) == {{cell_type}} 336 | 337 | 338 | # Calculate min distance for each spot to the closest spot of cell type {{cell_type}} 339 | min_dist_to_endothelial <- apply(dist, 1, function(x) sort(x[ix_endothelial])[n]) 340 | 341 | 342 | 343 | # Calculate average distance per cell type to closest endothelial cell 344 | cells_clean <- cells_clean %>% 345 | filter(sample == {{sample}}) %>% 346 | select(cell_name, {{cell_type_column}}, sample) %>% 347 | cbind(min_dist_to_endothelial) %>% 348 | group_by(!!sym(cell_type_column)) #%>% 349 | # summarise(mean_dist = mean(min_dist_to_endothelial), 350 | # median_dist = median(min_dist_to_endothelial)) %>% 351 | # arrange(desc(mean_dist)) 352 | 353 | rm(dist) 354 | gc() 355 | 356 | return(cells_clean) 357 | } 358 | 359 | ## Test neighborhood with shuffling 360 | 361 | Test_nhood <- function(sample = "ZH1019_INF", 362 | radius = 27.5, 363 | cell_type_figure_column = "cell_type_figure", 364 | fraction_coherance = 0.8, 365 | iter = 10, 366 | workers = 10) { 367 | 368 | # Observed part 369 | cells_clean <- cells %>% 370 | filter(cell_type_figure %ni% c("excluded")) %>% 371 | filter(sample %in% c({{sample}})) 372 | 373 | spe_clean <- SpatialExperiment::SpatialExperiment( 374 | sample_id = cells_clean$sample, 375 | spatialCoords = as.matrix(cells_clean %>% select(c(centroid_x, centroid_y))), 376 | rowData = marker, 377 | colData = list(cell_name = cells_clean$cell_name, 378 | cell_type_figure = as.factor(cells_clean[[cell_type_figure_column]]), 379 | centroid_x = cells_clean$centroid_x, 380 | centroid_y = cells_clean$centroid_y, 381 | img_id = cells_clean$sample, 382 | all = rep("all", dim(cells_clean)[1])) 383 | ) 384 | 385 | spe_clean <- imcRtools::buildSpatialGraph(spe_clean, 386 | img_id = "sample_id", 387 | type = "expansion", 388 | threshold = radius, 389 | coords = c("centroid_x", "centroid_y"), 390 | name = "expansion_graph") 391 | 392 | 393 | spe_clean <- imcRtools::aggregateNeighbors(spe_clean, 394 | colPairName = "expansion_graph", 395 | aggregate_by = "metadata", 396 | count_by = "cell_type_figure", 397 | name = "nhood_mat", 398 | proportions = F) 399 | 400 | 401 | nhood_mat <- cbind((SingleCellExperiment::colData(spe_clean)[["nhood_mat"]]), 402 | from_cell_type_figure = cells_clean[[cell_type_figure_column]]) %>% as_tibble 403 | 404 | 405 | nhood_mat_num <- nhood_mat %>% select_if(is.numeric) 406 | 407 | row_sums <- nhood_mat_num %>% rowSums() 408 | fraction_coherance_mat <- nhood_mat_num/row_sums 409 | drop_rows <- apply(fraction_coherance_mat, 1, function(x) any(x > fraction_coherance)) 410 | nhood_norm_coherant <- nhood_mat[!drop_rows,] 411 | nhood_norm_coherant <- nhood_norm_coherant %>% drop_na() 412 | 413 | ## Summarise interactions 414 | 415 | nhood_sum <- nhood_norm_coherant %>% 416 | group_by(from_cell_type_figure) %>% 417 | summarise(across(where(is.numeric), sum)) 418 | 419 | ## Calc rowSums and subtract self-pairs 420 | nhood_rowSums <- nhood_sum %>% 421 | select_if(is.numeric) %>% 422 | rowSums() 423 | 424 | ## Pivot_long and extract self-pairs 425 | self_pair_count <- nhood_sum %>% 426 | pivot_longer(!from_cell_type_figure, names_to = "to_cell_type_figure", values_to = "count") %>% 427 | filter(from_cell_type_figure == to_cell_type_figure) %>% 428 | pull(count) 429 | 430 | ## Normalize nhood_sum by difference rowSums and self_pair_count 431 | 432 | nhood_norm <- sweep(nhood_sum %>% select_if(is.numeric), 1, (nhood_rowSums-self_pair_count), "/") %>% 433 | cbind(from_cell_type_figure = nhood_sum$from_cell_type_figure, .) %>% as_tibble() 434 | 435 | nhood_norm_obs <- nhood_norm %>% 436 | pivot_longer(!from_cell_type_figure, names_to = "to_cell_type_figure", values_to = paste0("obs_count_" ,{{sample}})) 437 | 438 | # Shuffeled part 439 | 440 | Tmp <- function(sample. = sample, 441 | radius. = radius, 442 | cell_type_figure_column. = cell_type_figure_column, 443 | fraction_coherance. = fraction_coherance, 444 | iter. = iter, 445 | workers. = workers) 446 | { 447 | 448 | shuffled_counts <- BiocParallel::bplapply(1:iter., 449 | BPPARAM = BiocParallel::MulticoreParam(workers = workers., progressbar = T), 450 | function(i) { 451 | 452 | cells_clean <- cells %>% 453 | filter(cell_type_figure %ni% c("excluded")) %>% 454 | filter(sample %in% c(sample.)) 455 | 456 | cells_clean[[cell_type_figure_column.]] <- sample(cells_clean[[cell_type_figure_column.]]) 457 | 458 | spe_clean <- SpatialExperiment::SpatialExperiment( 459 | sample_id = cells_clean$sample, 460 | spatialCoords = as.matrix(cells_clean %>% select(c(centroid_x, centroid_y))), 461 | rowData = marker, 462 | colData = list(cell_name = cells_clean$cell_name, 463 | cell_type_figure = as.factor(cells_clean[[cell_type_figure_column.]]), 464 | centroid_x = cells_clean$centroid_x, 465 | centroid_y = cells_clean$centroid_y, 466 | img_id = cells_clean$sample, 467 | all = rep("all", dim(cells_clean)[1])) 468 | ) 469 | 470 | spe_clean <- imcRtools::buildSpatialGraph(spe_clean, 471 | img_id = "sample_id", 472 | type = "expansion", 473 | threshold = radius., 474 | coords = c("centroid_x", "centroid_y"), 475 | name = "expansion_graph") 476 | 477 | 478 | spe_clean <- imcRtools::aggregateNeighbors(spe_clean, 479 | colPairName = "expansion_graph", 480 | aggregate_by = "metadata", 481 | count_by = "cell_type_figure", 482 | name = "nhood_mat", 483 | proportions = F) 484 | 485 | 486 | nhood_mat <- cbind((SingleCellExperiment::colData(spe_clean)[["nhood_mat"]]), 487 | from_cell_type_figure = cells_clean[[cell_type_figure_column.]]) %>% as_tibble 488 | 489 | 490 | nhood_mat_num <- nhood_mat %>% select_if(is.numeric) 491 | 492 | row_sums <- nhood_mat_num %>% rowSums() 493 | fraction_coherance_mat <- nhood_mat_num/row_sums 494 | drop_rows <- apply(fraction_coherance_mat, 1, function(x) any(x > fraction_coherance.)) 495 | nhood_norm_coherant <- nhood_mat[!drop_rows,] 496 | nhood_norm_coherant <- nhood_norm_coherant %>% drop_na() 497 | 498 | ## Summarise interactions 499 | 500 | nhood_sum <- nhood_norm_coherant %>% 501 | group_by(from_cell_type_figure) %>% 502 | summarise(across(where(is.numeric), sum)) 503 | 504 | ## Calc rowSums and subtract self-pairs 505 | nhood_rowSums <- nhood_sum %>% 506 | select_if(is.numeric) %>% 507 | rowSums() 508 | 509 | ## Pivot_long and extract self-pairs 510 | self_pair_count <- nhood_sum %>% 511 | pivot_longer(!from_cell_type_figure, names_to = "to_cell_type_figure", values_to = "count") %>% 512 | filter(from_cell_type_figure == to_cell_type_figure) %>% 513 | pull(count) 514 | 515 | ## Normalize nhood_sum by difference rowSums and self_pair_count 516 | 517 | nhood_norm <- sweep(nhood_sum %>% select_if(is.numeric), 1, (nhood_rowSums-self_pair_count), "/") %>% 518 | cbind(from_cell_type_figure = nhood_sum$from_cell_type_figure, .) %>% as_tibble() 519 | 520 | nhood_norm_shuff <- nhood_norm %>% 521 | pivot_longer(!from_cell_type_figure, names_to = "to_cell_type_figure", values_to = paste0("shuff_", i)) #%>% 522 | #select(paste0("shuff_", i)) 523 | 524 | return(nhood_norm_shuff) 525 | } 526 | ) 527 | 528 | } 529 | 530 | list_nhood_norm_shuff <- Tmp() 531 | 532 | nhood_norm_shuff <- 533 | left_join(nhood_norm_obs, 534 | reduce( 535 | list_nhood_norm_shuff, 536 | left_join, 537 | by = c("from_cell_type_figure", "to_cell_type_figure") 538 | )) %>% as_tibble() 539 | 540 | nhood_scaled <- 541 | nhood_norm_shuff %>% select_if(is.numeric) %>% t %>% scale(center = T, scale = T) %>% t 542 | 543 | nhood_scaled <- 544 | cbind( 545 | nhood_norm_shuff %>% select(from_cell_type_figure, to_cell_type_figure), 546 | nhood_scaled 547 | ) %>% as_tibble 548 | 549 | 550 | return(nhood_scaled) 551 | 552 | } 553 | 554 | ## Summarize nhood 555 | 556 | Summarise_nhood <- function(sample) { 557 | 558 | tmp <- sample 559 | 560 | tmp$z_score <- tmp[[3]] 561 | tmp$perm_min <- apply(tmp %>% select_if(str_detect(colnames(.), pattern = "shuff")), 1, min) 562 | tmp$perm_max <- apply(tmp %>% select_if(str_detect(colnames(.), pattern = "shuff")), 1, max) 563 | tmp$perm_mean <- apply(tmp %>% select_if(str_detect(colnames(.), pattern = "shuff")), 1, mean) 564 | tmp$perm_median <- apply(tmp %>% select_if(str_detect(colnames(.), pattern = "shuff")), 1, median) 565 | tmp$perm_sd <- apply(tmp %>% select_if(str_detect(colnames(.), pattern = "shuff")), 1, sd) 566 | 567 | tmp$count_larger <- rowSums(tmp %>% select_if(str_detect(colnames(.), pattern = "shuff")) > tmp[[3]]) 568 | tmp$count_smaller <- rowSums(tmp %>% select_if(str_detect(colnames(.), pattern = "shuff")) < tmp[[3]]) 569 | 570 | tmp$interaction_type <- if_else(tmp[[3]] >0, true = "attraction", false = "avoidance") 571 | tmp$p_value <- if_else(tmp[[3]]>0, true = 1/tmp$count_smaller, false = 1/tmp$count_larger) 572 | tmp$significant <- if_else(tmp$p_value < 0.05, true = TRUE, false = FALSE) 573 | tmp$pair <- paste0(tmp$from_cell_type_figure, "_", tmp$to_cell_type_figure) 574 | 575 | tmp <- tmp %>% select(from_cell_type_figure, 576 | to_cell_type_figure, 577 | z_score, 578 | interaction_type, 579 | p_value, 580 | significant, 581 | perm_mean, 582 | perm_median, 583 | perm_min, 584 | perm_max, 585 | perm_sd, 586 | count_larger, 587 | count_smaller, 588 | pair) 589 | 590 | return(tmp) 591 | 592 | } 593 | 594 | 595 | ## Plot summary 596 | 597 | Plot_summary <- function(list_nhood = list_nhood, cell_type_figure_column) { 598 | 599 | tmp <- BiocParallel::bplapply(list_nhood, 600 | FUN = function(x) Summarise_nhood(sample = x), 601 | BPPARAM = BiocParallel::MulticoreParam(workers = 6, progressbar = T)) 602 | 603 | nhood_summary <- do.call(rbind.data.frame, tmp) 604 | nhood_summary <- nhood_summary %>% select(from_cell_type_figure, to_cell_type_figure, interaction_type, z_score) 605 | nhood_summary <- nhood_summary %>% 606 | group_by(from_cell_type_figure, to_cell_type_figure, interaction_type) %>% 607 | summarise(mean = mean(z_score), n = n()) %>% 608 | group_by(from_cell_type_figure, to_cell_type_figure) %>% 609 | arrange(-n) %>% 610 | slice(1) %>% 611 | ungroup() %>% 612 | complete(from_cell_type_figure = cells %>% filter({{cell_type_figure_column}} %ni% c("excluded")) %>% pull({{cell_type_figure_column}}) %>% unique(), 613 | to_cell_type_figure= cells %>% filter({{cell_type_figure_column}} %ni% c("excluded")) %>% pull({{cell_type_figure_column}}) %>% unique(), 614 | fill = list(mean = NA, n = 0)) 615 | 616 | nhood_summary$percent_samples <- (nhood_summary$n/12) 617 | 618 | return(nhood_summary) 619 | } 620 | 621 | 622 | levels_samples_structured <- c( 623 | "ZH916_T1_B", 624 | "ZH1007_NEC_B", 625 | "ZH916_2_B", 626 | "ZH1019_T1_A", 627 | "ZH1041_T1_B", 628 | "ZH881_T1_v6", 629 | "ZH881_INF_v6", 630 | "ZH881_1_B", 631 | "ZH1007_INF" 632 | ) 633 | 634 | levels_samples_disorganised <- c( 635 | "MGH258_v6", 636 | "ZH1019_INF", 637 | "ZH916_INF") 638 | 639 | levels_samples <- 640 | c( 641 | "ZH916_INF", 642 | "ZH881_INF_v6", 643 | "ZH1007_INF", 644 | "ZH1019_INF", 645 | "ZH916_T1_B", 646 | "ZH881_T1_v6", 647 | "ZH1007_NEC_B", 648 | "ZH1019_T1_A", 649 | "ZH916_2_B", 650 | "ZH881_1_B", 651 | "ZH1041_T1_B", 652 | "MGH258_v6" 653 | ) 654 | 655 | key_sample <- 656 | tibble( 657 | codex = c( 658 | "ZH916_INF", 659 | "ZH881_INF_v6", 660 | "ZH1007_INF", 661 | "ZH1019_INF", 662 | "ZH916_T1_B", 663 | "ZH881_T1_v6", 664 | "ZH1007_NEC_B", 665 | "ZH1019_T1_A", 666 | "ZH881_1_B", 667 | "MGH258_v6" 668 | ), 669 | visium = c( 670 | "ZH916inf", 671 | "ZH881inf", 672 | "ZH1007inf", 673 | "ZH1019inf", 674 | "ZH916T1", 675 | "ZH881T1", 676 | "ZH1007nec", 677 | "ZH1019T1", 678 | "ZH8811Bbulk", 679 | "MGH258" 680 | ) 681 | ) 682 | 683 | # Import data ------------------------------------------------------------- 684 | ##Import log-transformed and normalized matrix and cell_table 685 | nmat <- readRDS("Codex/mat_norm.RDS") 686 | cells <- readRDS("Codex/cells_table.RDS") 687 | marker <- readRDS("Codex/marker.RDS") 688 | 689 | vspots <- readRDS("Codex/visium_spots.RDS") 690 | visium_positions <- readRDS("Codex/visium_spot_positions_all.RDS") 691 | spots_visium_aligned <- readRDS("Codex/spots_aligned_visium.RDS") 692 | spots_codex_aligned <- readRDS("Codex/pseudospots_aligned_codex.RDS") 693 | cells_aligned <- readRDS("Codex/cells_aligend_codex.RDS") 694 | scalef <- readRDS("Codex/scale_factors_hires.RDS") 695 | 696 | spot_dom <- readRDS("Codex/CODEX_pseudospot_dominant.RDS") 697 | spot_comp <- readRDS("Codex/CODEX_pseudospot_composition.RDS") 698 | 699 | 700 | # Calculate cell densities per cell type ----------------------------------- 701 | ## Create SpatialExperiment (spe) 702 | cells_clean <- cells %>% filter(cell_type_figure %ni% c("excluded")) 703 | 704 | spe <- SpatialExperiment::SpatialExperiment(assays = list(logcounts = nmat[, which(cells$cell_type_figure %ni% c("excluded"))]), 705 | sample_id = cells_clean$sample, 706 | spatialCoords = as.matrix(cells_clean %>% select(c(centroid_x, centroid_y))), 707 | rowData = marker, 708 | colData = list(cell_name = cells_clean$cell_name, 709 | cell_type_figure = as.factor(cells_clean$cell_type_figure), 710 | ivygap = as.factor(cells_clean$ivygap), 711 | region = as.factor(cells_clean$region), 712 | centroid_x = cells_clean$centroid_x, 713 | centroid_y = cells_clean$centroid_y, 714 | img_id = cells_clean$sample, 715 | all = rep("all", dim(nmat[, which(cells$cell_type_figure %ni% c("excluded"))])[2])) 716 | ) 717 | 718 | ## Create spatial graph by centroid expansion and add to spe (colPair) 719 | spe <- imcRtools::buildSpatialGraph(spe, 720 | img_id = "sample_id", 721 | type = "expansion", 722 | threshold = 27.5, 723 | coords = c("centroid_x", "centroid_y"), 724 | name = "expansion_graph") 725 | 726 | SingleCellExperiment::colPairNames(spe) #shows constructed graph 727 | colPair(spe, "expansion_graph") # prints summary of constructed graph 728 | colPair(spe, "expansion_graph") %>% class # 729 | 730 | # The graph is stored in form of a SelfHits object in colPair(object, name). This object can be regarded as an edgelist and coerced to an igraph object via: 731 | 732 | igraph::graph_from_edgelist(as.matrix(colPair(spe, "expansion_graph"))) 733 | 734 | ## Aggregate neighbors and create neighborhood matrix 735 | spe <- imcRtools::aggregateNeighbors(spe, 736 | colPairName = "expansion_graph", 737 | aggregate_by = "metadata", 738 | count_by = "cell_type_figure", 739 | name = "nhood_mat") 740 | 741 | as_tibble(SingleCellExperiment::colData(spe)[["nhood_mat"]]) %>% dim 742 | as_tibble(SingleCellExperiment::colData(spe)[["nhood_mat"]]) 743 | 744 | nhood <- as_tibble(SingleCellExperiment::colData(spe)[["nhood_mat"]]) 745 | 746 | ## Aggregate neighbors for cell density plots 747 | spe <- imcRtools::aggregateNeighbors(spe, 748 | colPairName = "expansion_graph", 749 | aggregate_by = "metadata", 750 | count_by = "cell_type_figure", 751 | name = "nhood_abs_mat", 752 | proportions = F) 753 | 754 | nhood_abs <- SingleCellExperiment::colData(spe)[["nhood_abs_mat"]] %>% as_tibble() 755 | nhood_abs <- nhood_abs %>% 756 | mutate(count = rowSums(nhood_abs)) %>% 757 | mutate(cell_type_figure = cells_clean$cell_type_figure) %>% 758 | mutate(ivygap = cells_clean$ivygap) 759 | 760 | 761 | ## Add malignant column for later plots 762 | nhood_abs <- nhood_abs %>% 763 | mutate(malignant = if_else(cell_type_figure %in% c("Chromatin-Reg","MES-Hyp", "MES", "AC", "OPC", "NPC"), true = "malignant", false = cell_type_figure)) %>% 764 | mutate(malignant = if_else(cell_type_figure %in% c("Mac", "Inflammatory-Mac", "T-cell", "B-cell", "Vasc"), true = "immune", false = malignant)) %>% 765 | mutate(malignant = if_else(cell_type_figure %in% c("Reactive-Ast", "Oligo", "Neuron"), true = "normal", false = malignant)) 766 | 767 | # Plot boxplot per cell type 768 | ggplot(nhood_abs, aes(x=fct_reorder(cell_type, count, .desc = F), y=count, fill=cell_type)) + 769 | stat_boxplot(geom = "errorbar", color = "black") + 770 | geom_boxplot(color = "black", outlier.shape = NA, notch = T) + 771 | scale_fill_manual(values = an_cols_cell_type) + 772 | theme_classic() + 773 | theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), 774 | legend.position = "none", 775 | axis.text = element_text(size = 20, color = "black"), 776 | axis.title = element_text(size = 25)) + 777 | labs(x=NULL, y="cell count per spot-radius") + 778 | ylim(0,30) 779 | 780 | 781 | # Co-localisation analysis ------------------------------------------------ 782 | ## 1. See custom function: Test neighborhood 783 | ## 2. Run function for 1 visium spot resolution (r=27.5) for all samples: 784 | 785 | list_nhood <- lapply(cells$sample %>% unique(), function(x) Test_nhood(sample = x, 786 | iter = 500, 787 | workers = 100, 788 | cell_type_figure_column = "cell_type_tidy", 789 | radius = 27.5)) 790 | 791 | names(list_nhood) <- cells$sample %>% unique() 792 | 793 | ## 3. Summarize and plot the results 794 | 795 | levels_layers <- 796 | c( 797 | "Neuron", 798 | "NPC", 799 | "Oligo", 800 | "Reactive_Ast", 801 | "OPC", 802 | "AC", 803 | "Mac", 804 | "Vasc", 805 | "T_cell", 806 | "B_cell", 807 | "MES", 808 | "Inflammatory_Mac", 809 | "MES_Hyp", 810 | "Chromatin_Reg" 811 | ) 812 | 813 | 814 | tmp1 <- Plot_summary(list_nhood = list_nhood[levels_samples_structured], cell_type_figure_column = "cell_type_tidy") 815 | 816 | tmp2 <- Plot_summary(list_nhood = list_nhood[levels_samples_disorganised], cell_type_figure_column = "cell_type_tidy") 817 | 818 | 819 | #plot 10*22 820 | ggpubr::ggarrange( 821 | 822 | 823 | ggplot(tmp1 %>% 824 | filter(from_cell_type_figure != to_cell_type_figure) %>% 825 | drop_na(), 826 | aes(factor(to_cell_type_figure, levels = levels_layers), factor(from_cell_type_figure, levels = levels_layers), col=mean, size=percent_samples)) + 827 | geom_tile(col = "black", size=0, fill = "white") + 828 | geom_point(shape = 19) + 829 | scale_color_gradient2(low = "white", high = "#b2182b", midpoint=0, na.value = "white") + 830 | scale_size(range=c(9,16),limits = c(0.1,1)) + 831 | theme_classic() + 832 | guides(size = guide_legend(override.aes = list(color = "black"))) + 833 | theme(axis.text.x = element_text(angle = 45, vjust = 0.9, hjust = 1,size=24)) + 834 | theme(axis.text.y =element_text(size=24)) + 835 | labs(x=NULL, y=NULL, size = "significant in \nfraction of samples") + 836 | ggtitle("CODEX structured - spot 1") 837 | 838 | , 839 | 840 | ggplot(tmp2 %>% 841 | filter(from_cell_type_figure != to_cell_type_figure) %>% 842 | drop_na(), 843 | aes(factor(to_cell_type_figure, levels = levels_layers), factor(from_cell_type_figure, levels = levels_layers), col=mean, size=percent_samples)) + 844 | geom_tile(col = "black", size=0, fill = "white") + 845 | geom_point(shape = 19) + 846 | scale_color_gradient2(low = "white", high = "#b2182b", midpoint=0, na.value = "white") + 847 | scale_size(range=c(9,16),limits = c(0.1,1)) + 848 | theme_classic() + 849 | guides(size = guide_legend(override.aes = list(color = "black"))) + 850 | theme(axis.text.x = element_text(angle = 45, vjust = 0.9, hjust = 1,size=24)) + 851 | theme(axis.text.y =element_text(size=24)) + 852 | labs(x=NULL, y=NULL, size = "significant in \nfraction of samples") + 853 | ggtitle("CODEX disorganised - spot 1") 854 | ) 855 | 856 | 857 | 858 | ## Structured only 859 | ggplot(tmp1 %>% 860 | filter(from_cell_type_figure != to_cell_type_figure) %>% 861 | drop_na(), 862 | aes(factor(to_cell_type_figure, levels = levels_layers), factor(from_cell_type_figure, levels = levels_layers), col=mean, size=percent_samples)) + 863 | geom_tile(col = "white", size=0, fill = "white") + 864 | geom_point(shape = 19) + 865 | scale_color_gradient2( 866 | low = "white", 867 | high = "#b2182b", 868 | midpoint = 0, 869 | na.value = "white", 870 | limit = c(0, 20), 871 | oob = scales::squish, 872 | guide = guide_colorbar(frame.colour = "black", ticks.colour = "black")) + 873 | scale_size(range=c(5,12),limits = c(0.1,1)) + 874 | theme_classic() + 875 | theme(axis.text.x = element_text( 876 | angle = 40, 877 | vjust = 1, 878 | hjust = 1, 879 | size = 20)) + 880 | theme(axis.text.y = element_text(size = 20)) + 881 | theme(legend.text = element_text(size = 20)) + 882 | theme(legend.title = element_text(size = 20)) + 883 | labs(x=NULL, y=NULL, size = "significant in \nfraction of samples") 884 | 885 | ## 4. Calculate mean of each reciprocal pair for network graph 886 | 887 | pair_struct_mean <- tmp1 %>% 888 | rowwise() %>% 889 | mutate(pair = paste0(sort(c(from_cell_type_figure, to_cell_type_figure)), collapse = ".")) %>% 890 | arrange(pair) %>% 891 | filter(from_cell_type_figure != to_cell_type_figure) %>% 892 | filter(from_cell_type_figure %ni% c("excluded")) %>% 893 | filter(to_cell_type_figure %ni% c("excluded")) 894 | 895 | pair_struct_mean <- pair_struct_mean %>% 896 | group_by(pair) %>% 897 | summarise(pair_mean = mean(mean), 898 | pair_n = mean(n), 899 | pair_percent_samples = mean(percent_samples)) 900 | 901 | pair_struct_mean <- pair_struct_mean %>% separate(col = pair, into = c("from", "to"), sep = "\\.", remove = F) 902 | 903 | write_tsv(pair_struct_mean %>% select(-pair), file = "pair_struct_mean.tsv") 904 | 905 | 906 | ## 5. Calculate max of each pair for network graph (To not loose assymetric pairs like MES-Hyp + MES) 907 | pair_struct_max <- tmp1 %>% 908 | rowwise() %>% 909 | mutate(pair = paste0(sort(c(from_cell_type_figure, to_cell_type_figure)), collapse = ".")) %>% 910 | arrange(pair) %>% 911 | filter(from_cell_type_figure != to_cell_type_figure) %>% 912 | filter(from_cell_type_figure %ni% c("excluded")) %>% 913 | filter(to_cell_type_figure %ni% c("excluded")) 914 | 915 | pair_struct_max <- pair_struct_max %>% 916 | arrange(pair, desc(mean)) %>% 917 | group_by(pair) %>% 918 | filter(row_number() == 1) %>% 919 | ungroup() %>% 920 | arrange(desc(mean)) 921 | 922 | 923 | # Create aligned pseudospots ---------------------------------------------- 924 | ## Create list of df with per sample CODEX pseudospots 925 | list_spots_aligned <- map2(.x = key_sample$codex, 926 | .y = key_sample$visium, 927 | .f = ~Create_aligned_pseudospots(sample_codex = .x, sample_visium = .y)) 928 | 929 | names(list_spots_aligned) <- key_sample$visium 930 | 931 | ## Flatten list do df 932 | list_spots_aligned <- map(names(list_spots_aligned), 933 | function(x) list_spots_aligned[[x]] %>% mutate(sample = x) 934 | ) 935 | 936 | spots_codex_aligned <- do.call(rbind, list_spots_aligned) %>% 937 | mutate(spot_id_sample = paste0(sample, "_", spot_id)) %>% 938 | rename(mp_cons = cell_type_tidy) %>% 939 | drop_na() 940 | 941 | ## Filter aligned visium spots 942 | spots_visium_aligned <- vspots %>% 943 | filter(sample %in% key_sample$visium) %>% 944 | mutate(spot_id_sample = paste0(sample, "_", spot_id)) 945 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spatial_Glioma 2 | 3 | This resource provides the R code to reproduce key results described in Greenwald, Galili Darnell, Hoefflin, et al. "Integrative spatial analysis reveals a multi-layered organization of glioblastoma". 4 | 5 | The analyses are divided into 6 main modules: 6 | 7 | Module 1: Per sample clustering, metaprogram generation, and spot annotation 8 | 9 | Module 2: Spatial coherence and organizational zones 10 | 11 | Module 3: Measures of spatial associations 12 | 13 | Module 4: defining consensus interactions 14 | 15 | Module 5: Spatial CNA inference 16 | 17 | Module 6: CODEX analysis 18 | 19 | ## Getting started 20 | 1. Clone Github repository. 21 | 2. Download and extract the data provided in [Inputs.zip](https://drive.google.com/file/d/1YFOwEDFLpyxG6WX243FNA7lnrVSaiVL-/view?usp=sharing) 22 | 3. Set the working directory to Inputs. 23 | 4. Run one of the 6 code modules in R. 24 | 25 | ## General notes 26 | 27 | Please note results of modules 1-3 might slightly differ dependending on the version of R/R packages used. 28 | 29 | Each code module can be run independently. 30 | 31 | The code uploaded here is the working code being used throughout the work on the project. 32 | 33 | The MP generation approach is based on our earlier work described in Kinker et al. 2020 and Gavish et al. 2023. Further documentation can be found [here](https://github.com/gabrielakinker/CCLE_heterogeneity) and [here](https://github.com/tiroshlab/3ca). 34 | 35 | We have updated Module 2 to include the code used to generate Figures 4A-C and clarified which code was used for different panels. 36 | 37 | The Inputs file contains an additional README regarding alignment of Visium and CODEX samples. 38 | 39 | You can find the Visium H&E [here](https://drive.google.com/file/d/19ROs5wKtDH-RLqELFFBeEexKScW20FJL/view?usp=drive_link). 40 | 41 | Requirements 42 | R (tested in version 4.1.1). 43 | --------------------------------------------------------------------------------