├── Finding.R ├── MergingOurs.R ├── MergingPaired.R ├── MergingPrimary.R ├── Scoring.R ├── Visium.R ├── experiments.R ├── part1.R ├── part1.sh ├── part2.R ├── part2.sh └── seurat_functions_public.R /Finding.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | source('~/Documents/R/seurat_functions.R') 3 | setwd('~/Documents/Analysis/Tumors/') 4 | 5 | pdf('Finding.pdf', height = 10, width = 12) 6 | sink('Finding.txt') 7 | 8 | load('parameters.RData') 9 | 10 | ## Data ## 11 | 12 | files = files.primary 13 | 14 | res.list = lapply(files, function(f){ 15 | loadRData(paste0(f,'res.malignant.RData')) 16 | }) 17 | modules.list = lapply(res.list, NMFToModules) 18 | genes.all = sort(unique(unlist(modules.list))) 19 | 20 | srt.list = lapply(files, function(f){ 21 | loadRData(paste0(f,'srt.malignant.RData')) 22 | }) 23 | genes.all = sort(unique(unlist(lapply(srt.list, rownames)))) 24 | names(genes.all) = genes.all 25 | 26 | types = c('GO','KEGG','REACTOME','HALLMARK','MOTIF') 27 | enrichment.matrix.list = lapply(types, function(type){ 28 | BuildEnrichmentMatrix(genes = genes.all, type = type) 29 | }) 30 | names(enrichment.matrix.list) = types 31 | 32 | ref = read.delim("PanglaoDB_markers_27_Mar_2020.tsv.gz") 33 | ref$germ.layer = gsub('mesoderm','Mesoderm',ref$germ.layer) 34 | byct = lapply(split(ref$official.gene.symbol, ref$cell.type), unique) 35 | bygl = lapply(split(ref$official.gene.symbol, ref$germ.layer), unique) 36 | byo = lapply(split(ref$official.gene.symbol, ref$organ), unique) 37 | enrichment.matrix.list[['Cell.type']] = BuildEnrichmentMatrix(genes = genes.all, db = byct) 38 | enrichment.matrix.list[['Germ.layer']] = BuildEnrichmentMatrix(genes = genes.all, db = bygl) 39 | enrichment.matrix.list[['Organ']] = BuildEnrichmentMatrix(genes = genes.all, db = byo) 40 | types = names(enrichment.matrix.list) 41 | 42 | gbm = read.csv('gbm.csv', header = TRUE, stringsAsFactors = FALSE) 43 | gbm = as.list(gbm) 44 | gbm = lapply(gbm, function(x){x[!x=='']}) 45 | names(gbm) = paste0('Neftel_',names(gbm)) 46 | enrichment.matrix.list[['gbm']] = BuildEnrichmentMatrix(genes = genes.all, db = gbm) 47 | types = names(enrichment.matrix.list) 48 | 49 | hn = read.csv('hn.csv', header = TRUE, stringsAsFactors = FALSE) 50 | hn = as.list(hn) 51 | hn = lapply(hn, function(x){x[!x=='']}) 52 | names(hn) = paste0('Puram_',names(hn)) 53 | enrichment.matrix.list[['hn']] = BuildEnrichmentMatrix(genes = genes.all, db = hn) 54 | types = names(enrichment.matrix.list) 55 | 56 | sk = read.csv('sk.csv', header = TRUE, stringsAsFactors = FALSE) 57 | sk = as.list(sk) 58 | sk = lapply(sk, function(x){x[!x=='']}) 59 | names(sk) = paste0('Ji_',names(sk)) 60 | enrichment.matrix.list[['sk']] = BuildEnrichmentMatrix(genes = genes.all, db = sk) 61 | types = names(enrichment.matrix.list) 62 | 63 | cancer = factor(apply(sapply(strsplit(names(res.list), ''), '[', 1:4), 2, paste0, collapse = '')) 64 | system = factor(corr[as.character(cancer),'system']) 65 | 66 | ## Modules from graph ## 67 | 68 | modules.list = lapply(res.list, NMFToModules) 69 | all = unlist(modules.list, recursive = FALSE, use.names = FALSE) 70 | names(all) = unlist(sapply(modules.list, names)) 71 | ta = table(unlist(all)) 72 | genes.use = names(ta)[ta > 1] 73 | 74 | # Filter non-overlapping modules 75 | for (i in 1:5){ 76 | all = unlist(modules.list, recursive = FALSE, use.names = TRUE) 77 | all = lapply(all, intersect, genes.all) 78 | sim = sapply(all, function(x){ 79 | sapply(all, function(y){ 80 | length(intersect(x,y))/length(union(x,y)) 81 | }) 82 | }) 83 | keep = rownames(sim)[apply(sim, 1, function(x){ 84 | sum(x > 0.05) >= 3 85 | })] 86 | all = all[keep] 87 | modules.list = lapply(names(modules.list), function(x){ 88 | li = modules.list[[x]] 89 | li[names(li)[paste(x,names(li),sep='.') %in% keep]] 90 | }) 91 | names(modules.list) = names(res.list) 92 | ta = table(unlist(all)) 93 | genes.use = names(ta)[ta > 1] 94 | print(length(all)) 95 | } 96 | 97 | # Adjacency matrix, list by cancer 98 | adj = matrix(0, nrow = length(genes.use), ncol = length(genes.use)) 99 | adj.list = list() 100 | for (can in levels(cancer)){ 101 | sub = matrix(0, nrow = length(genes.use), ncol = length(genes.use)) 102 | rownames(sub) = genes.use 103 | colnames(sub) = genes.use 104 | for (s in names(modules.list)[cancer == can]){ 105 | for (mod in modules.list[[s]]){ 106 | mod = intersect(mod, genes.use) 107 | for (x in mod){ 108 | for (y in mod){ 109 | sub[x,y] = sub[x,y] + 1 110 | } 111 | } 112 | } 113 | } 114 | diag(sub) = 0 115 | adj.list[[can]] = sub 116 | #adj = adj + (sub > 0) 117 | adj = adj + sub 118 | } 119 | adj_keep = adj 120 | adj = adj_keep 121 | 122 | # Remove low connections 123 | adj = adj_keep 124 | adj[] = (adj >= v_min) 125 | #adj[adj <= 1] = 0 126 | for (i in 1:5){ 127 | keep = names(which(rowSums(adj) >= s_min)) 128 | adj = adj[keep,keep] 129 | print(dim(adj)) 130 | } 131 | 132 | # Cluster 133 | g = graph_from_adjacency_matrix(adj, diag = FALSE, mode = 'undirected', weighted = TRUE) 134 | modules = communities(cluster_infomap(g, nb.trials = 100)) 135 | names(modules) = paste0('m_', sapply(modules, '[', 1)) 136 | modules = modules[c('m_ACYP1','m_ABHD3','m_ACTN1', 137 | 'm_ADM','m_ATOX1','m_FKBP5', 138 | 'm_AQP1','m_ACTB', 139 | 'm_AGER','m_ALDH3A1','m_AKR1B10','m_AGR2','m_AC007906.2', 140 | 'm_AGT','m_ASCL1','m_ARL4D')] 141 | names(modules) = c('Cycle','Stress','Interferon', 142 | 'Hypoxia','Oxphos','Metal', 143 | 'Mesenchymal','pEMT', 144 | 'Alveolar','Basal','Squamous','Glandular','Ciliated', 145 | 'AC','OPC','NPC') 146 | modules 147 | save(modules, file = 'modules.RData') 148 | 149 | colors.module = c(hue_pal()(length(modules)-3), 150 | 'thistle3', 151 | 'thistle2', 152 | 'thistle1') 153 | names(colors.module) = names(modules) 154 | save(colors.module, file = 'colors.module.RData') 155 | 156 | types.module = c(rep('Process',6), rep('Type',length(modules)-6)) 157 | names(types.module) = names(modules) 158 | save(types.module, file = 'types.module.RData') 159 | 160 | fi = data.frame(sapply(modules,'[', 1:max(sapply(modules, length))), stringsAsFactors = FALSE) 161 | fi[is.na(fi)] = '' 162 | write.table(fi, file = 'modules.csv', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE) 163 | 164 | ## Enrichment ## 165 | 166 | # LUAD vs LUSC 167 | 168 | data = read.csv('SCCvsAC.csv', row.names=1) 169 | data$PValue.max = apply(data[,c('PValue.x', 'PValue.y')], 1, max) 170 | data$logFC.mean = apply(data[,c('logFC.x', 'logFC.y')], 1, mean) 171 | data$type = 'Other' 172 | for (m in names(modules)){ 173 | data[intersect(rownames(data),modules[[m]]), 'type'] = m 174 | } 175 | plot(data$logFC.mean, -log10(data$PValue.max), ylim = c(0,50), pch = 20, cex = 0.5, col = colors.module[data$type], xlab = 'LogFC', ylab = 'p-value (-log10)') 176 | abline(v = 0, lty = 2) 177 | text(data$logFC.mean[data$type %in% names(modules)], -log10(data$PValue.max)[data$type %in% names(modules)], labels = rownames(data)[data$type %in% names(modules)], cex = 1, col = colors.module[data$type[data$type %in% names(modules)]]) 178 | for (m in names(modules)){ 179 | plot(data$logFC.mean, -log10(data$PValue.max), ylim = c(0,50), pch = 20, cex = 0.5, col = colors.module[data$type], xlab = 'LogFC', ylab = 'p-value (-log10)', main = m) 180 | abline(v = 0, lty = 2) 181 | tryCatch(expr = { 182 | text(data$logFC.mean[data$type == m], -log10(data$PValue.max)[data$type == m], labels = rownames(data)[data$type == m], cex = 1, col = colors.module[data$type[data$type == m]]) 183 | }, error = function(e){c()}) 184 | } 185 | pvals = sapply(names(modules), function(m){ 186 | a = mHG.test((data$type == m)[order(data$logFC.mean, decreasing = FALSE)])$p.value 187 | b = mHG.test((data$type == m)[order(data$logFC.mean, decreasing = TRUE)])$p.value 188 | return(-log10(c(a,b))) 189 | }) 190 | rownames(pvals) = c('Squamous','Adenocarcinoma') 191 | h = Heatmap(pvals, name = 'mHG p-pvalue', 192 | row_names_gp = gpar(cex = 0.3), column_names_gp = gpar(cex = 0.3, col = colors.module), 193 | cluster_rows = FALSE, cluster_columns = FALSE, 194 | show_row_names = TRUE, show_column_names = TRUE, 195 | breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Greens')(n = 9)) 196 | print(h) 197 | 198 | # Primary vs Met 199 | 200 | data.list = lapply(c('5K','10K','30K','100K'), function(x){ 201 | data = read.csv(paste0("~/Documents/Analysis/Tumors/PrimaryMet",x,".csv")) 202 | data = data[!duplicated(data[,1]),] 203 | rownames(data) = data[,1] 204 | colnames(data) = paste0(colnames(data),'.',x) 205 | return(data) 206 | }) 207 | keep = sort(Reduce(intersect, lapply(data.list, rownames))) 208 | data = Reduce(cbind, lapply(data.list, function(x){x[keep,]})) 209 | data$pvalues.max = apply(data[,grep('pvalues',colnames(data))], 1, max) 210 | data$log2fc.mean = apply(data[,grep('log2fc',colnames(data))], 1, mean) 211 | data = data[data$pvalues.max < 0.01,] 212 | data$type = 'Other' 213 | for (m in names(modules)){ 214 | data[intersect(rownames(data),modules[[m]]), 'type'] = m 215 | } 216 | plot(data$log2fc.mean, -log10(data$pvalues.max), xlim = c(-2,2), ylim = c(0,50), pch = 20, cex = 0.5, col = colors.module[data$type], xlab = 'LogFC', ylab = 'p-value (-log10)') 217 | abline(v = 0, lty = 2) 218 | text(data$log2fc.mean[data$type %in% names(modules)], -log10(data$pvalues.max)[data$type %in% names(modules)], labels = rownames(data)[data$type %in% names(modules)], cex = 1, col = colors.module[data$type[data$type %in% names(modules)]]) 219 | for (m in names(modules)){ 220 | plot(data$log2fc.mean, -log10(data$pvalues.max), xlim = c(-2,2), ylim = c(0,50), pch = 20, cex = 0.5, col = colors.module[data$type], xlab = 'LogFC', ylab = 'p-value (-log10)', main = m) 221 | abline(v = 0, lty = 2) 222 | tryCatch(expr = { 223 | text(data$log2fc.mean[data$type == m], -log10(data$pvalues.max)[data$type == m], labels = rownames(data)[data$type == m], cex = 1, col = colors.module[data$type[data$type == m]]) 224 | }, error = function(e){c()}) 225 | } 226 | pvals = sapply(names(modules), function(m){ 227 | a = mHG.test((data$type == m)[order(data$log2fc.mean, decreasing = FALSE)])$p.value 228 | b = mHG.test((data$type == m)[order(data$log2fc.mean, decreasing = TRUE)])$p.value 229 | return(-log10(c(a,b))) 230 | }) 231 | rownames(pvals) = c('Primary','Metastasis') 232 | h = Heatmap(pvals, name = 'mHG p-pvalue', 233 | row_names_gp = gpar(cex = 0.3), column_names_gp = gpar(cex = 0.3, col = colors.module), 234 | cluster_rows = FALSE, cluster_columns = FALSE, 235 | show_row_names = TRUE, show_column_names = TRUE, 236 | breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Greens')(n = 9)) 237 | print(h) 238 | 239 | # Enrichment 240 | 241 | enrichment = lapply(types, function(type){ 242 | enrichment.matrix = enrichment.matrix.list[[type]] 243 | e = sapply(modules, function(mod){ 244 | Enrichment(mod, enrichment.matrix = enrichment.matrix) 245 | }) 246 | e = -log10(e) 247 | e[is.infinite(e)] = max(e[is.finite(e)]) 248 | e = e[!apply(is.na(e), 1, any), ] 249 | rownames(e) = gsub(paste0(type,'_'), '', rownames(e)) 250 | rownames(e) = gsub('_', ' ', rownames(e)) 251 | rownames(e) = gsub('^ ', '', rownames(e)) 252 | return(e) 253 | }) 254 | names(enrichment) = types 255 | 256 | for (e in enrichment){ 257 | keep = unique(rownames(e)[apply(e, 2, function(x){order(x, decreasing = TRUE)[1:5]})]) 258 | e = e[keep, ] 259 | h = Heatmap(e, name = 'Enrichment p-pvalue', 260 | row_names_gp = gpar(cex = 0.6), column_names_gp = gpar(cex = 0.6, col = colors.module), 261 | cluster_rows = TRUE, cluster_columns = FALSE, row_dend_reorder = 1:length(keep), 262 | show_row_names = TRUE, show_column_names = TRUE, 263 | breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Greens')(n = 9)) 264 | print(h) 265 | } 266 | 267 | for (e in enrichment){ 268 | keep = unique(rownames(e)[apply(e, 2, function(x){order(x, decreasing = TRUE)[1:5]})]) 269 | e = e[keep, ] 270 | h = Heatmap(e, name = 'Enrichment p-pvalue', 271 | row_names_gp = gpar(cex = 0.6), column_names_gp = gpar(cex = 0.6, col = colors.module), 272 | cluster_rows = FALSE, cluster_columns = FALSE, 273 | show_row_names = TRUE, show_column_names = TRUE, 274 | breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Greens')(n = 9)) 275 | print(h) 276 | } 277 | 278 | for (e in enrichment){ 279 | keep = unique(rownames(e)[apply(e, 2, function(x){order(x, decreasing = TRUE)[1:2]})]) 280 | e = e[keep, ] 281 | h = Heatmap(e, name = 'Enrichment p-pvalue', 282 | row_names_gp = gpar(cex = 0.6), column_names_gp = gpar(cex = 0.6, col = colors.module), 283 | cluster_rows = FALSE, cluster_columns = FALSE, 284 | show_row_names = TRUE, show_column_names = TRUE, 285 | breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Greens')(n = 9)) 286 | print(h) 287 | } 288 | 289 | for (e in enrichment){ 290 | keep = unique(rownames(e)[apply(e, 2, function(x){order(x, decreasing = TRUE)[1]})]) 291 | e = e[keep, ] 292 | h = Heatmap(e, name = 'Enrichment p-pvalue', 293 | row_names_gp = gpar(cex = 0.6), column_names_gp = gpar(cex = 0.6, col = colors.module), 294 | cluster_rows = FALSE, cluster_columns = FALSE, 295 | show_row_names = TRUE, show_column_names = TRUE, 296 | breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Greens')(n = 9)) 297 | print(h) 298 | } 299 | 300 | ## Graph plot ## 301 | 302 | adj = adj_keep 303 | adj = adj[unlist(modules),unlist(modules)] 304 | adj[adj <= 1] = 0 305 | g = graph_from_adjacency_matrix(adj, diag = FALSE, mode = 'undirected', weighted = TRUE) 306 | 307 | for (coords in list(layout.kamada.kawai(g), layout.fruchterman.reingold(g))){ 308 | E(g)$width = E(g)$weight 309 | col = colors.module[unlist(mapply(rep, 1:length(modules), sapply(modules, length)))] 310 | names(col) = unlist(modules) 311 | col = col[!duplicated(names(col))] 312 | V(g)$color = col[names(V(g))] 313 | E(g)$color = 'lightgrey' 314 | plot(g, 315 | layout = coords, 316 | vertex.label.color = V(g)$color, vertex.label.cex = 0.5, 317 | vertex.shape = 'none', vertex.size = 0, 318 | frame=FALSE) 319 | lab = sapply(names(V(g)), function(x){ 320 | if (x %in% c('TOP2A','PCNA', 321 | 'JUN', 'FOS', 322 | 'IFIT1', 'STAT1','HLA-A','HLA-DRA', 323 | 'HILDPA','VEGFA', 324 | 'ATP5H','LAMTOR2', 325 | 'MT1G','MT1E', 326 | 'COL1A1','FN1', 327 | 'LAMC2','VIM', 328 | 'KRT5','KRT15', 329 | 'KLK10','LY6D', 330 | 'CLU','MUC5B','TFF1','CEACAM6', 331 | 'AGER','CAV1', 332 | 'FOXJ1','PIFO', 333 | 'ALDOC','APOE', 334 | 'OLIG1','OLIG2','SOX8', 335 | 'DLX1','DLX5','SOX11' 336 | )){ 337 | return(x) 338 | } else { 339 | return('') 340 | } 341 | }) 342 | plot(g, 343 | layout = coords, 344 | vertex.label.color = V(g)$color, vertex.label.cex = 1, vertex.label = lab, 345 | vertex.shape = 'circle', vertex.size = 1, vertex.frame.color = NA, 346 | frame=FALSE) 347 | } 348 | 349 | ## Overlap plots ## 350 | 351 | # Similarity 352 | modules.list = lapply(res.list, NMFToModules) 353 | all = unlist(modules.list, recursive = FALSE, use.names = FALSE) 354 | names(all) = unlist(sapply(modules.list, names)) 355 | ta = table(unlist(all)) 356 | genes.use = names(ta)[ta > 1] 357 | for (i in 1:5){ 358 | all = unlist(modules.list, recursive = FALSE, use.names = TRUE) 359 | all = lapply(all, intersect, genes.all) 360 | sim = sapply(all, function(x){ 361 | sapply(all, function(y){ 362 | length(intersect(x,y))/length(union(x,y)) 363 | }) 364 | }) 365 | keep = rownames(sim)[apply(sim, 1, function(x){ 366 | sum(x > 0.05) >= 3 367 | })] 368 | all = all[keep] 369 | modules.list = lapply(names(modules.list), function(x){ 370 | li = modules.list[[x]] 371 | li[names(li)[paste(x,names(li),sep='.') %in% keep]] 372 | }) 373 | names(modules.list) = names(res.list) 374 | ta = table(unlist(all)) 375 | genes.use = names(ta)[ta > 1] 376 | print(length(all)) 377 | } 378 | sim = sapply(all, function(x){ 379 | sapply(modules, function(y){ 380 | pval = phyper(length(intersect(x, y)), length(x), 2*10^4 - length(x), length(y), lower.tail = FALSE) 381 | return(-log10(pval)) 382 | }) 383 | }) 384 | sim[is.infinite(sim)] = max(sim[is.finite(sim)]) 385 | sim = sim[,apply(sim, 2, function(x){any(x > 3)})] 386 | df = data.frame('sample' = sapply(colnames(sim), function(x){ 387 | y = sapply(strsplit(x, '.', fixed = TRUE), '[', 1) 388 | })) 389 | df$cancer = sapply(as.character(df$sample), function(x){ 390 | y = sapply(strsplit(x, ''), '[', 1:4) 391 | paste0(y, collapse = '') 392 | }) 393 | df$system = factor(corr[as.character(df$cancer),'system']) 394 | df$top = apply(sim, 2, which.max) 395 | df$top = factor(names(modules)[df$top], levels = names(modules)) 396 | ta = table(df[c('top','system')]) 397 | pval = sapply(colnames(ta), function(y){ 398 | sapply(rownames(ta), function(x){ 399 | phyper(ta[x,y], sum(ta[,y]), sum(ta) - sum(ta[,y]), sum(ta[x,]), lower.tail = FALSE) 400 | }) 401 | }) 402 | pval = -log10(pval) 403 | pval[is.infinite(pval)] = max(pval[is.finite(pval)]) 404 | pval = (pval > max(3,max(apply(pval, 1, function(x){sort(x, decreasing = TRUE)[2]})))) 405 | category = factor(apply(pval, 1, function(x){ 406 | if (any(x)){ 407 | return(names(which(x))) 408 | } else { 409 | return('Universal') 410 | } 411 | })) 412 | df$category = category[df$top] 413 | top_ann = HeatmapAnnotation(df = df[, c('sample','category','top')], 414 | col = list('sample' = colors, 'category' = colors.system, 'top' = colors.module), 415 | which = 'column') 416 | side_ann = HeatmapAnnotation(df = df[, c('sample','category','top')], 417 | col = list('sample' = colors, 'category' = colors.system, 'top' = colors.module), 418 | which = 'row') 419 | h = Heatmap(name = 'Module overlap p-value', sim, 420 | top_annotation = top_ann, 421 | cluster_columns = FALSE, cluster_rows = FALSE, 422 | column_order = order(df$top), 423 | show_row_names = TRUE, row_names_gp = gpar(col = colors.module), 424 | breaks = seq(0, 6, length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7)) 425 | print(h) 426 | decorate_heatmap_body('Module overlap p-value', { 427 | l = c(0,cumsum(table(df$top))[1:length(table(df$top))]) 428 | for (k in 1:length(l)){ 429 | i = unit(l[k]/ncol(sim), 'npc') 430 | j = unit(k/nrow(sim), 'npc') 431 | grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black')) 432 | grid.lines(c(0,1),j, gp = gpar(lwd = 1, col = 'black')) 433 | } 434 | }) 435 | # Overlap 436 | ovlp = sapply(all, function(x){ 437 | sapply(all, function(y){ 438 | pval = phyper(length(intersect(x, y)), length(x), 2*10^4 - length(x), length(y), lower.tail = FALSE) 439 | return(-log10(pval)) 440 | }) 441 | }) 442 | ovlp[is.infinite(ovlp)] = max(ovlp[is.finite(ovlp)]) 443 | ovlp = ovlp[colnames(sim),colnames(sim)] 444 | h = Heatmap(name = 'Module overlap p-value', ovlp, 445 | top_annotation = top_ann, 446 | is.symmetric = TRUE, row_order = order(df$top), 447 | #show_row_names = TRUE, row_names_gp = gpar(cex = 0.4), 448 | breaks = seq(0, 6, length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7)) + side_ann 449 | print(h) 450 | # decorate_heatmap_body('Module overlap p-value', { 451 | # l = c(0,cumsum(table(df$top))[1:length(table(df$top))]) 452 | # for (k in 1:length(l)){ 453 | # i = unit(l[k]/ncol(ovlp), 'npc') 454 | # j = unit(1-l[k]/nrow(ovlp), 'npc') 455 | # grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black')) 456 | # grid.lines(c(0,1), j, gp = gpar(lwd = 1, col = 'black')) 457 | # } 458 | # }) 459 | decorate_heatmap_body('Module overlap p-value', { 460 | l = c(0,cumsum(table(df$top))[1:length(table(df$top))]) 461 | for (k in 1:length(l)){ 462 | i = unit(l[k:(k+1)]/ncol(ovlp), 'npc') 463 | j = unit(1-l[k:(k+1)]/nrow(ovlp), 'npc') 464 | grid.lines(i, j[1], gp = gpar(lwd = 1, col = 'black')) 465 | grid.lines(i, j[2], gp = gpar(lwd = 1, col = 'black')) 466 | grid.lines(i[1], j, gp = gpar(lwd = 1, col = 'black')) 467 | grid.lines(i[2], j, gp = gpar(lwd = 1, col = 'black')) 468 | } 469 | }) 470 | 471 | # Schematic 472 | for (s in names(srt.list)){ 473 | mod = modules.list[[s]] 474 | srt = srt.list[[s]] 475 | mod = lapply(mod, sort) 476 | labels = unlist(lapply(names(mod), function(m){ 477 | a = intersect(mod[[m]], modules[[df[paste(s, m, sep = '.'),'top']]]) 478 | if (length(a) > 5){ 479 | a = sample(a, size = 5, replace = FALSE) 480 | } 481 | return(a) 482 | })) 483 | data = GetData(srt, slot = 'scale.data')[unlist(mod), ] 484 | h = Heatmap(name = s, data, 485 | cluster_rows = FALSE, 486 | cluster_columns = TRUE, show_column_dend = FALSE, 487 | breaks = c(-3,0,3), colors = c('blue','white','red')) 488 | col = colors.module[unlist(mapply(rep, 1:length(modules), sapply(modules, length)))] 489 | names(col) = unlist(modules) 490 | col = col[!duplicated(names(col))] 491 | r = rowAnnotation(link = anno_mark(at = which(rownames(data) %in% labels), labels = labels, 492 | labels_gp = gpar(cex = 1.5, col = col[labels]))) 493 | print(h+r) 494 | decorate_heatmap_body(s, { 495 | i = c(0,cumsum(sapply(rev(mod), length))) 496 | x = unit(i/max(i), 'npc') 497 | for (k in x){ 498 | grid.lines(c(0,1), c(k,k), gp = gpar(lwd = 1, col = 'black')) 499 | } 500 | grid.lines(c(0,0), c(0,1), gp = gpar(lwd = 1, col = 'black')) 501 | grid.lines(c(1,1), c(0,1), gp = gpar(lwd = 1, col = 'black')) 502 | }) 503 | } 504 | 505 | # Scenic ## 506 | 507 | regulons.list = lapply(files, function(f){ 508 | loadRData(paste0(f,'regulons.malignant.RData')) 509 | }) 510 | regulons = unlist(regulons.list, recursive = FALSE, use.names = TRUE) 511 | # Similarity 512 | sim = sapply(regulons, function(x){ 513 | sapply(modules[setdiff(names(modules), c('Stress','Cycle'))], function(y){ 514 | pval = phyper(length(intersect(x, y)), length(x), 2*10^4 - length(x), length(y), lower.tail = FALSE) 515 | return(-log10(pval)) 516 | }) 517 | }) 518 | sim[is.infinite(sim)] = max(sim[is.finite(sim)]) 519 | sim = sim[,apply(sim, 2, function(x){any(x > 3)})] 520 | df = data.frame('sample' = sapply(colnames(sim), function(x){ 521 | y = sapply(strsplit(x, '.', fixed = TRUE), '[', 1) 522 | })) 523 | df$tf = sapply(colnames(sim), function(x){ 524 | y = sapply(strsplit(x, '.', fixed = TRUE), '[', 2) 525 | }) 526 | df$cancer = sapply(as.character(df$sample), function(x){ 527 | y = sapply(strsplit(x, ''), '[', 1:4) 528 | paste0(y, collapse = '') 529 | }) 530 | df$system = factor(corr[as.character(df$cancer),'system']) 531 | df$top = apply(sim, 2, which.max) 532 | df$top = factor(rownames(sim)[df$top], levels = rownames(sim)) 533 | ta = table(df[c('top','system')]) 534 | top_ann = HeatmapAnnotation(df = df[, c('sample','top')], 535 | col = list('sample' = colors, 'top' = colors.module), 536 | which = 'column') 537 | h = Heatmap(name = 'Scenic overlap p-value', sim, 538 | top_annotation = top_ann, 539 | cluster_columns = FALSE, cluster_rows = FALSE, 540 | column_order = order(df$top), 541 | show_row_names = TRUE, row_names_gp = gpar(col = colors.module), 542 | breaks = seq(0, 6, length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7)) 543 | print(h) 544 | decorate_heatmap_body('Scenic overlap p-value', { 545 | l = c(0,cumsum(table(df$top))[1:length(table(df$top))]) 546 | for (k in 1:length(l)){ 547 | i = unit(l[k]/ncol(sim), 'npc') 548 | j = unit(k/nrow(sim), 'npc') 549 | grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black')) 550 | grid.lines(c(0,1),j, gp = gpar(lwd = 1, col = 'black')) 551 | } 552 | }) 553 | print(sapply(split(df$tf, df$top), function(tf){sort(table(tf), decreasing = TRUE)})) 554 | 555 | regulons.list = lapply(files, function(f){ 556 | loadRData(paste0(f,'regulons.malignant.RData')) 557 | }) 558 | regulons = unlist(regulons.list, recursive = FALSE, use.names = TRUE) 559 | # Similarity 560 | sim = sapply(regulons, function(x){ 561 | sapply(modules, function(y){ 562 | pval = phyper(length(intersect(x, y)), length(x), 2*10^4 - length(x), length(y), lower.tail = FALSE) 563 | return(-log10(pval)) 564 | }) 565 | }) 566 | sim[is.infinite(sim)] = max(sim[is.finite(sim)]) 567 | sim = sim[,apply(sim, 2, function(x){any(x > 3)})] 568 | df = data.frame('sample' = sapply(colnames(sim), function(x){ 569 | y = sapply(strsplit(x, '.', fixed = TRUE), '[', 1) 570 | })) 571 | df$tf = sapply(colnames(sim), function(x){ 572 | y = sapply(strsplit(x, '.', fixed = TRUE), '[', 2) 573 | }) 574 | df$cancer = sapply(as.character(df$sample), function(x){ 575 | y = sapply(strsplit(x, ''), '[', 1:4) 576 | paste0(y, collapse = '') 577 | }) 578 | df$system = factor(corr[as.character(df$cancer),'system']) 579 | df$top = apply(sim, 2, which.max) 580 | df$top = factor(rownames(sim)[df$top], levels = rownames(sim)) 581 | ta = table(df[c('top','system')]) 582 | top_ann = HeatmapAnnotation(df = df[, c('sample','top')], 583 | col = list('sample' = colors, 'top' = colors.module), 584 | which = 'column') 585 | h = Heatmap(name = 'Scenic overlap p-value', sim, 586 | top_annotation = top_ann, 587 | cluster_columns = FALSE, cluster_rows = FALSE, 588 | column_order = order(df$top), 589 | show_row_names = TRUE, row_names_gp = gpar(col = colors.module), 590 | breaks = seq(0, 6, length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7)) 591 | print(h) 592 | decorate_heatmap_body('Scenic overlap p-value', { 593 | l = c(0,cumsum(table(df$top))[1:length(table(df$top))]) 594 | for (k in 1:length(l)){ 595 | i = unit(l[k]/ncol(sim), 'npc') 596 | j = unit(k/nrow(sim), 'npc') 597 | grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black')) 598 | grid.lines(c(0,1),j, gp = gpar(lwd = 1, col = 'black')) 599 | } 600 | }) 601 | print(sapply(split(df$tf, df$top), function(tf){sort(table(tf), decreasing = TRUE)})) 602 | 603 | sink() 604 | dev.off() 605 | -------------------------------------------------------------------------------- /MergingOurs.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | source('~/Documents/R/seurat_functions.R') 3 | setwd('~/Documents/Analysis/Tumors/') 4 | 5 | load('parameters.RData') 6 | 7 | #### Ours #### 8 | 9 | pdf('MergingOurs.pdf', height = 10, width = 12) 10 | 11 | modules = loadRData('modules.RData') 12 | colors.module = loadRData('colors.module.RData') 13 | types.module = loadRData('types.module.RData') 14 | modules = modules[!names(modules) %in% c('AC','OPC','NPC')] 15 | colors.module = colors.module[names(modules)] 16 | types.module = types.module[names(modules)] 17 | 18 | #### Malignant #### 19 | 20 | ## Srt 21 | 22 | srt.list.ours = lapply(files.ours, function(f){ 23 | loadRData(paste0(f,'srt.scored.RData')) 24 | }) 25 | cancer = factor(apply(sapply(strsplit(names(files.ours), ''), '[', 1:4), 2, paste0, collapse = '')) 26 | system = factor(corr[as.character(cancer),'system']) 27 | germlayer = factor(corr[as.character(cancer),'germlayer']) 28 | 29 | for (s in names(srt.list.ours)){ 30 | srt = srt.list.ours[[s]] 31 | h = DimPlot(srt, group.by = 'orig.ident', cols = colors) 32 | print(h) 33 | plot.list = lapply(names(modules), function(m){ 34 | h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m])) + 35 | NoAxes() + NoLegend() + 36 | theme(plot.title = element_text(size = 10)) 37 | }) 38 | print(CombinePlots(plot.list, ncol = 4)) 39 | tryCatch(expr = { 40 | srt = RunTSNE(srt, dims = 1:10) 41 | plot.list = lapply(names(modules), function(m){ 42 | h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m]), split.by = 'cat', reduction = 'tsne') + 43 | NoAxes() + NoLegend() + 44 | theme(plot.title = element_text(size = 10)) 45 | }) 46 | print(CombinePlots(plot.list, ncol = 4)) 47 | }, error = function(e){c()}) 48 | srt$state = apply(srt@meta.data[,names(modules)], 1, function(x){ 49 | top = which.max(x) 50 | return(names(x)[top]) 51 | }) 52 | srt$state = factor(srt$state, levels = names(modules)) 53 | h = DimPlot(srt, group.by = 'state', cols = colors.module) 54 | print(h) 55 | srt$num = rowSums(srt@meta.data[,names(modules)] > 0.5) 56 | h = FeaturePlot(srt, 'num', cols = c('grey','darkred')) 57 | print(h) 58 | srt.list.ours[[s]] = srt 59 | } 60 | 61 | srt.ours = Reduce(merge2, srt.list.ours) 62 | srt.ours = RunPCA(srt.ours) 63 | srt.ours = RunUMAP(srt.ours, dims = 1:10) 64 | 65 | h = DimPlot(srt.ours, group.by = 'orig.ident', cols = colors, label = TRUE) 66 | print(h) 67 | h = DimPlot(srt.ours, group.by = 'orig.ident', cols = colors) 68 | print(h) 69 | plot.list = lapply(names(modules), function(m){ 70 | h = FeaturePlot(srt.ours, features = m, pt.size = 1, cols = c('grey',colors.module[m])) + 71 | NoAxes() + NoLegend() + 72 | theme(plot.title = element_text(size = 10)) 73 | }) 74 | print(CombinePlots(plot.list, ncol = 4)) 75 | srt.ours$state = apply(srt.ours@meta.data[,names(modules)], 1, function(x){ 76 | top = which.max(x) 77 | return(names(x)[top]) 78 | }) 79 | srt.ours$state = factor(srt.ours$state, levels = names(modules)) 80 | h = DimPlot(srt.ours, group.by = 'state', cols = colors.module) 81 | print(h) 82 | 83 | save(srt.ours, file = 'srt.ours.RData') 84 | 85 | srt.ours = SCTransform(srt.ours, return.only.var.genes = FALSE) 86 | srt.ours = RunPCA(srt.ours) 87 | srt.ours = RunUMAP(srt.ours, dims = 1:10) 88 | #srt.ours = GeneToEnrichment(srt.ours, db = modules, method = 'rand', nrand = nrand, nbin = nbin) 89 | 90 | h = DimPlot(srt.ours, group.by = 'orig.ident', cols = colors, label = TRUE) 91 | print(h) 92 | h = DimPlot(srt.ours, group.by = 'orig.ident', cols = colors) 93 | print(h) 94 | plot.list = lapply(names(modules), function(m){ 95 | h = FeaturePlot(srt.ours, features = m, pt.size = 0.5, cols = c('grey',colors.module[m])) + 96 | NoAxes() + NoLegend() + 97 | theme(plot.title = element_text(size = 10)) 98 | }) 99 | print(CombinePlots(plot.list, ncol = 4)) 100 | srt.ours$state = apply(srt.ours@meta.data[,names(modules)], 1, function(x){ 101 | top = which.max(x) 102 | return(names(x)[top]) 103 | }) 104 | srt.ours$state = factor(srt.ours$state, levels = names(modules)) 105 | h = DimPlot(srt.ours, group.by = 'state', cols = colors.module) 106 | print(h) 107 | 108 | ## TSNE 109 | 110 | meta = Reduce(rbind, lapply(srt.list.ours, function(srt){ 111 | srt@meta.data[, names(modules)] 112 | })) 113 | ts = tsne(meta, max_iter = max_iter, perplexity = perplexity) 114 | 115 | ts = data.frame(ts) 116 | colnames(ts) = c('x','y') 117 | ts$state = unlist(lapply(srt.list.ours, function(srt){srt$state})) 118 | ts$num = unlist(lapply(srt.list.ours, function(srt){srt$num})) 119 | ts$orig.ident = unlist(lapply(srt.list.ours, function(srt){srt$orig.ident})) 120 | for (m in names(modules)){ 121 | ts[,m] = as.numeric(unlist(lapply(srt.list.ours, function(srt){srt@meta.data[,m]}))) 122 | } 123 | 124 | distances = as.matrix(dist(t(dist(ts[,c('x','y')])), method = 'euclidean')) 125 | ts$entropy = sapply(1:nrow(ts), function(c){ 126 | nn = order(distances[,c])[1:20] 127 | ta = table(ts$orig.ident[nn]) 128 | ta = ta/sum(ta) 129 | ta = ta[!ta == 0] 130 | return(sum(ta*log(ta))) 131 | }) 132 | ts$mentropy = sapply(1:nrow(ts), function(c){ 133 | nn = order(distances[,c])[1:20] 134 | ta = colSums(ts[nn,names(modules)] > 0.5) 135 | ta = ta/sum(ta) 136 | ta = ta[!ta == 0] 137 | return(sum(ta*log(ta))) 138 | }) 139 | 140 | p = ggplot(ts, aes(x = x, y = y, fill = state, color = state)) + 141 | geom_point() + 142 | scale_fill_manual(values = colors.module) + 143 | scale_color_manual(values = colors.module) + 144 | theme_classic() + 145 | theme( 146 | panel.border = element_rect(fill = NA), 147 | legend.position = "none", 148 | plot.title = element_text(hjust = 0.5), 149 | axis.line.x = element_blank(), 150 | axis.line.y = element_blank(), 151 | axis.ticks.x = element_blank(), 152 | axis.ticks.y = element_blank(), 153 | axis.text.x = element_blank(), 154 | axis.text.y = element_blank(), 155 | axis.title.x = element_blank(), 156 | axis.title.y = element_blank()) 157 | print(p) 158 | p = ggplot(ts, aes(x = x, y = y, fill = orig.ident, color = orig.ident)) + 159 | geom_point() + 160 | scale_fill_manual(values = colors) + 161 | scale_color_manual(values = colors) + 162 | theme_classic() + 163 | theme( 164 | panel.border = element_rect(fill = NA), 165 | legend.position = "none", 166 | plot.title = element_text(hjust = 0.5), 167 | axis.line.x = element_blank(), 168 | axis.line.y = element_blank(), 169 | axis.ticks.x = element_blank(), 170 | axis.ticks.y = element_blank(), 171 | axis.text.x = element_blank(), 172 | axis.text.y = element_blank(), 173 | axis.title.x = element_blank(), 174 | axis.title.y = element_blank()) 175 | print(p) 176 | p = ggplot(ts, aes_string(x = 'x', y = 'y', color = 'entropy')) + 177 | geom_point() + 178 | scale_color_gradientn(colors = c("lightgrey", "slateblue4")) + 179 | ggtitle('entropy') + 180 | theme_classic() + 181 | theme( 182 | panel.border = element_rect(fill = NA), 183 | legend.position = "none", 184 | plot.title = element_text(hjust = 0.5), 185 | axis.line.x = element_blank(), 186 | axis.line.y = element_blank(), 187 | axis.ticks.x = element_blank(), 188 | axis.ticks.y = element_blank(), 189 | axis.text.x = element_blank(), 190 | axis.text.y = element_blank(), 191 | axis.title.x = element_blank(), 192 | axis.title.y = element_blank()) 193 | print(p) 194 | p = ggplot(ts, aes_string(x = 'x', y = 'y', color = 'num')) + 195 | geom_point() + 196 | scale_color_gradientn(colors = c("lightgrey", "darkred")) + 197 | ggtitle('entropy') + 198 | theme_classic() + 199 | theme( 200 | panel.border = element_rect(fill = NA), 201 | legend.position = "none", 202 | plot.title = element_text(hjust = 0.5), 203 | axis.line.x = element_blank(), 204 | axis.line.y = element_blank(), 205 | axis.ticks.x = element_blank(), 206 | axis.ticks.y = element_blank(), 207 | axis.text.x = element_blank(), 208 | axis.text.y = element_blank(), 209 | axis.title.x = element_blank(), 210 | axis.title.y = element_blank()) 211 | print(p) 212 | p = ggplot(ts, aes(x = x, y = y, fill = orig.ident, color = orig.ident)) + 213 | geom_point() + 214 | scale_fill_manual(values = colors) + 215 | scale_color_manual(values = colors) + 216 | facet_wrap(~ orig.ident, ncol = 5) + 217 | theme_classic() + 218 | theme( 219 | panel.border = element_rect(fill = NA), 220 | legend.position = "none", 221 | plot.title = element_text(hjust = 0.5), 222 | axis.line.x = element_blank(), 223 | axis.line.y = element_blank(), 224 | axis.ticks.x = element_blank(), 225 | axis.ticks.y = element_blank(), 226 | axis.text.x = element_blank(), 227 | axis.text.y = element_blank(), 228 | axis.title.x = element_blank(), 229 | axis.title.y = element_blank()) 230 | print(p) 231 | p = ggplot(ts, aes(x = x, y = y, fill = state, color = state)) + 232 | geom_point() + 233 | scale_fill_manual(values = colors.module) + 234 | scale_color_manual(values = colors.module) + 235 | facet_wrap(~ orig.ident, ncol = 6) + 236 | theme_classic() + 237 | theme( 238 | panel.border = element_rect(fill = NA), 239 | legend.position = "none", 240 | plot.title = element_text(hjust = 0.5), 241 | axis.line.x = element_blank(), 242 | axis.line.y = element_blank(), 243 | axis.ticks.x = element_blank(), 244 | axis.ticks.y = element_blank(), 245 | axis.text.x = element_blank(), 246 | axis.text.y = element_blank(), 247 | axis.title.x = element_blank(), 248 | axis.title.y = element_blank()) 249 | print(p) 250 | plot.list = lapply(names(modules), function(m){ 251 | p = ggplot(ts, aes_string(x = 'x', y = 'y', color = m)) + 252 | geom_point(size = 0.25) + 253 | scale_color_gradientn(colors = c("lightgrey", colors.module[m])) + 254 | ggtitle(m) + 255 | theme_classic() + 256 | theme( 257 | panel.border = element_rect(fill = NA), 258 | legend.position = "none", 259 | plot.title = element_text(hjust = 0.5), 260 | axis.line.x = element_blank(), 261 | axis.line.y = element_blank(), 262 | axis.ticks.x = element_blank(), 263 | axis.ticks.y = element_blank(), 264 | axis.text.x = element_blank(), 265 | axis.text.y = element_blank(), 266 | axis.title.x = element_blank(), 267 | axis.title.y = element_blank()) 268 | }) 269 | print(CombinePlots(plot.list, ncol = 4)) 270 | 271 | ## Stats 272 | 273 | stats.list.ours = lapply(files.ours, function(f){ 274 | stats = loadRData(paste0(f,'stats.RData')) 275 | stats = stats[,names(modules)] 276 | return(stats) 277 | }) 278 | 279 | for (s in names(stats.list.ours)){ 280 | stats = stats.list.ours[[s]] 281 | srt = srt.ours[, srt.ours$orig.ident == s] 282 | scores = srt@meta.data[,names(modules)] 283 | fraction = colMeans(scores > 0.5, na.rm = TRUE) 284 | data = GetData(srt, slot = 'scale.data') 285 | average = sapply(modules, function(mod){ 286 | mean(data[intersect(mod,rownames(data)),]) 287 | }) 288 | num = mean(srt$num, na.rm = TRUE) 289 | stats = rbind(stats, fraction, average, num) 290 | stats.list.ours[[s]] = stats 291 | } 292 | 293 | stats.ours = melt(lapply(stats.list.ours, function(stats){ 294 | melt(stats) 295 | })) 296 | names(stats.ours) = c('measure','module','variable','value','sample') 297 | stats.ours$cancer = factor(apply(sapply(strsplit(stats.ours$sample, ''), '[', 1:4), 2, paste0, collapse = '')) 298 | stats.ours$system = factor(corr[as.character(stats.ours$cancer),'system']) 299 | stats.ours$germlayer = factor(corr[as.character(stats.ours$cancer),'germlayer']) 300 | save(stats.ours, file = 'stats.ours.RData') 301 | 302 | for (measure in levels(stats.ours$measure)){ 303 | 304 | mat = stats.ours[stats.ours$measure == measure, c('value','module','sample')] 305 | mat = reshape(mat, timevar = 'sample', idvar = 'module', direction = 'wide') 306 | rownames(mat) = mat[,1] 307 | mat = mat[,-1] 308 | colnames(mat) = gsub('value.', '', colnames(mat), fixed = TRUE) 309 | 310 | df = data.frame('subsample' = colnames(mat), stringsAsFactors = FALSE) 311 | df$sample = sapply(strsplit(df$subsample, split = '.', fixed = TRUE), '[', 1) 312 | df$cancer = sapply(as.character(df$sample), function(x){ 313 | y = sapply(strsplit(x, ''), '[', 1:4) 314 | paste0(y, collapse = '') 315 | }) 316 | df$system = factor(corr[as.character(df$cancer),'system']) 317 | df$germlayer = factor(corr[as.character(df$cancer),'germlayer']) 318 | top_ann = HeatmapAnnotation(df = df[,c('sample','system','germlayer')], 319 | col = list('sample' = colors, 'system' = colors.system, 'germlayer' = colors.germlayer), 320 | which = 'column') 321 | h = Heatmap(mat, name = measure, 322 | top_annotation = top_ann, 323 | cluster_rows = FALSE, cluster_columns = FALSE, 324 | show_row_names = TRUE, show_column_names = TRUE, 325 | row_names_gp = gpar(col = colors.module[rownames(mat)]), column_names_gp = gpar(col = colors[colnames(mat)]), 326 | breaks = seq(quantile(unlist(mat), seq(1:10)/10, na.rm = TRUE)['10%'], quantile(unlist(mat), seq(1:10)/10, na.rm = TRUE)['90%'], length = 11), colors = viridis_pal(begin = 0, end = 1, option = 'magma')(11)) 327 | print(h) 328 | decorate_heatmap_body(measure, { 329 | #l = c(0,cumsum(rle(as.character(cancer))$lengths)) 330 | l = 0:ncol(mat) 331 | for (k in 1:length(l)){ 332 | i = unit(l[k]/max(l), 'npc') 333 | grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black')) 334 | } 335 | l = 0:nrow(mat) 336 | for (k in 1:length(l)){ 337 | i = unit(l[k]/max(l), 'npc') 338 | grid.lines(c(0,1), i, gp = gpar(lwd = 1, col = 'black')) 339 | } 340 | }) 341 | 342 | } 343 | 344 | ## Co-occurence 345 | 346 | par(mfrow = c(2,2)) 347 | coocc.list = list() 348 | occ.list = list() 349 | for (s in names(srt.list.ours)){ 350 | 351 | srt = srt.list.ours[[s]] 352 | stats = stats.list.ours[[s]] 353 | meta = srt@meta.data[, names(modules)] 354 | 355 | occ = stats['frequency',] 356 | 357 | coocc = sapply(names(modules), function(m1){ 358 | sapply(names(modules), function(m2){ 359 | pval = phyper( 360 | sum((meta[, m1] > 0.5) * (meta[, m2] > 0.5)), 361 | sum(meta[, m2] > 0.5), 362 | dim(meta)[1] - sum(meta[, m2] > 0.5), 363 | sum(meta[, m1] > 0.5), 364 | lower.tail = FALSE) 365 | if (pval <= 0.5){ 366 | return(-log10(pval)) 367 | } 368 | if (pval > 0.5){ 369 | return(log10(1-pval)) 370 | } 371 | }) 372 | }) 373 | 374 | coocc[is.infinite(coocc) & coocc > 0] = max(coocc[is.finite(coocc)]) 375 | coocc[is.infinite(coocc) & coocc < 0] = min(coocc[is.finite(coocc)]) 376 | 377 | occ.list[[s]] = occ 378 | coocc.list[[s]] = coocc 379 | 380 | keep = names(which(stats['presence',] > 0.5 & stats['frequency',] > 0.05)) 381 | occ = occ[keep] 382 | coocc = coocc[keep, keep] 383 | coocc[ coocc > -0.5 & coocc < 0.5] = 0 384 | coocc[coocc > 10] = 10 385 | coocc[coocc < -10] = -10 386 | 387 | tryCatch(expr = { 388 | g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected') 389 | V(g)$type = (types.module[names(V(g))] == 'Type') 390 | coords = layout.circle(g) 391 | E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight) 392 | E(g)$width = 5*abs(E(g)$weight)/max(E(g)$weight) 393 | V(g)$color = colors.module[names(V(g))] 394 | V(g)$size = 50*occ/max(occ) 395 | # Plot graph 396 | plot(g, main = s, 397 | layout = coords, 398 | vertex.shape = 'circle', 399 | vertex.label.size = 1, vertex.label.color = 'black', 400 | frame=FALSE) 401 | }, error = function(e){c()}) 402 | } 403 | 404 | for (can in levels(cancer)){ 405 | coocc = SummarizeMatrices(coocc.list[cancer == can], median) 406 | occ = sapply(names(modules), function(m){median(sapply(occ.list[cancer == can], '[', m))}) 407 | keep = (names(which(occ >= 0.05))) 408 | occ = occ[keep] 409 | coocc = coocc[keep, keep] 410 | coocc[ coocc > -0.5 & coocc < 0.5] = 0 411 | coocc[coocc > 10] = 10 412 | coocc[coocc < -10] = -10 413 | tryCatch(expr = { 414 | g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected') 415 | V(g)$type = (types.module[names(V(g))] == 'Type') 416 | coords = layout.circle(g) 417 | E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight) 418 | E(g)$width = abs(E(g)$weight)/5 419 | V(g)$color = colors.module[names(V(g))] 420 | V(g)$size = occ*200 421 | # Plot graph 422 | plot(g, main = can, 423 | layout = coords, 424 | vertex.shape = 'circle', 425 | vertex.label.size = 1, vertex.label.color = 'black', 426 | frame=FALSE) 427 | }, error = function(e){c()}) 428 | } 429 | 430 | for (sys in levels(system)){ 431 | coocc = SummarizeMatrices(coocc.list[system == sys], median) 432 | occ = sapply(names(modules), function(m){median(sapply(occ.list[system == sys], '[', m))}) 433 | keep = (names(which(occ >= 0.05))) 434 | occ = occ[keep] 435 | coocc = coocc[keep, keep] 436 | coocc[ coocc > -0.5 & coocc < 0.5] = 0 437 | coocc[coocc > 10] = 10 438 | coocc[coocc < -10] = -10 439 | tryCatch(expr = { 440 | g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected') 441 | V(g)$type = (types.module[names(V(g))] == 'Type') 442 | coords = layout.circle(g) 443 | E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight) 444 | E(g)$width = abs(E(g)$weight)/5 445 | V(g)$color = colors.module[names(V(g))] 446 | V(g)$size = occ*200 447 | # Plot graph 448 | plot(g, main = sys, 449 | layout = coords, 450 | vertex.shape = 'circle', 451 | vertex.label.size = 1, vertex.label.color = 'black', 452 | frame=FALSE) 453 | }, error = function(e){c()}) 454 | } 455 | 456 | coocc = SummarizeMatrices(coocc.list, median) 457 | occ = sapply(names(modules), function(m){median(sapply(occ.list, '[', m))}) 458 | keep = (names(which(occ >= 0.05))) 459 | occ = occ[keep] 460 | coocc = coocc[keep, keep] 461 | coocc[coocc > -0.5 & coocc < 0.5] = 0 462 | coocc[coocc > 10] = 10 463 | coocc[coocc < -10] = -10 464 | tryCatch(expr = { 465 | g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected') 466 | V(g)$type = (types.module[names(V(g))] == 'Type') 467 | coords = layout.circle(g) 468 | E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight) 469 | E(g)$width = abs(E(g)$weight)/5 470 | V(g)$color = colors.module[names(V(g))] 471 | V(g)$size = occ*200 472 | # Plot graph 473 | plot(g, main = 'All', 474 | layout = coords, 475 | vertex.shape = 'circle', 476 | vertex.label.size = 1, vertex.label.color = 'black', 477 | frame=FALSE) 478 | }, error = function(e){c()}) 479 | 480 | par(mfrow = c(1,1)) 481 | 482 | 483 | #### All #### 484 | 485 | srt.list.ours.all = lapply(files.ours, function(f){ 486 | loadRData(paste0(f,'srt.all.RData')) 487 | }) 488 | 489 | srt.ours.all = Reduce(merge2, srt.list.ours.all) 490 | srt.ours.all = RunPCA(srt.ours.all) 491 | srt.ours.all = RunUMAP(srt.ours.all, dims = 1:10) 492 | save(srt.ours.all, file = 'srt.ours.all.RData') 493 | 494 | colors.pop = hue_pal()(nlevels(srt.ours.all$pop)) 495 | names(colors.pop) = levels(srt.ours.all$pop) 496 | colors.pop['Malignant'] = 'black' 497 | save(colors.pop, file = 'colors.pop.ours.RData') 498 | 499 | o = sample(1:ncol(srt.ours.all), size = ncol(srt.ours.all), replace = FALSE) 500 | h = DimPlot(srt.ours.all, group.by = 'orig.ident', cols = colors, order = o, pt.size = 1, label = TRUE) 501 | print(h) 502 | h = DimPlot(srt.ours.all, group.by = 'orig.ident', cols = colors, order = o, pt.size = 1) 503 | print(h) 504 | h = DimPlot(srt.ours.all, group.by = 'type', cols = colors.type, order = o, pt.size = 1, label = TRUE) 505 | print(h) 506 | h = DimPlot(srt.ours.all, group.by = 'type', cols = colors.type, order = o, pt.size = 1) 507 | print(h) 508 | h = DimPlot(srt.ours.all, group.by = 'pop', cols = colors.pop, order = o, pt.size = 1, label = TRUE) 509 | print(h) 510 | h = DimPlot(srt.ours.all, group.by = 'pop', cols = colors.pop, order = o, pt.size = 1, label = FALSE) 511 | print(h) 512 | 513 | srt.ours.tme = srt.ours.all[, !srt.ours.all$pop == 'Malignant'] 514 | srt.ours.tme = RunPCA(srt.ours.tme) 515 | srt.ours.tme = RunUMAP(srt.ours.tme, dims = 1:10) 516 | save(srt.ours.tme, file = 'srt.ours.tme.RData') 517 | 518 | # Frequency regression 519 | 520 | freq = t(sapply(levels(srt.ours$orig.ident), function(or){ 521 | colMeans(srt.ours@meta.data[srt.ours$orig.ident == or, names(modules)] > 0.5, na.rm = TRUE) 522 | })) 523 | tme = table(srt.ours.tme$orig.ident, srt.ours.tme$pop) 524 | exclude = c('Malignant','Embryonic_stem_cells','iPS_cells','Tissue_stem_cells','Smooth_muscle_cells','Chondrocytes','Epithelial_cells','Hepatocytes','Keratinocytes','Astrocyte','Neurons','Neuroepithelial_cell', 525 | 'CMP','Erythroblast','GMP','Gametocytes','GMP','HSC_CD34+','Pre-B_cell_CD34-','Pro-B_cell_CD34+','Pro-Myelocyte') 526 | tme = tme[, setdiff(colnames(tme), exclude)] 527 | tme = tme/rowSums(tme) 528 | 529 | # Multiple regression 530 | test = sapply(names(modules), function(m){ 531 | fit = summary(lm(freq[,m] ~ tme))$coefficients 532 | pval = -log10(fit[,4]) 533 | pval[pval < -log10(pval_thresh)] = 0 534 | names(pval) = gsub('tme','',names(pval)) 535 | est = fit[,1] 536 | names(est) = gsub('tme','',names(est)) 537 | val = pval*sign(est) 538 | return(val) 539 | }) 540 | h = Heatmap(test, 541 | show_row_names = TRUE, show_column_names = TRUE, 542 | cluster_rows = FALSE, cluster_columns = FALSE, 543 | breaks = seq(-3, 3, length = 11), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 11)) 544 | print(h) 545 | 546 | # Plot hits 547 | par(mfrow = c(4,4)) 548 | for (ct in colnames(tme)){ 549 | for (m in names(modules)){ 550 | fit = summary(lm(freq[,m] ~ tme[,ct]))$coefficients 551 | pval = -log10(fit[2,4]) 552 | plot(freq[,m] ~ tme[,ct], col = colors[rownames(tme)], pch = 20, xlab = ct, ylab = m, main = round(pval, digits = 2)) 553 | if (pval > 0.5){ 554 | abline(a = fit[1,1], b = fit[2,1], lty = 2) 555 | } 556 | } 557 | } 558 | par(mfrow = c(1,1)) 559 | 560 | dev.off() 561 | 562 | -------------------------------------------------------------------------------- /MergingPaired.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | source('~/Documents/R/seurat_functions.R') 3 | setwd('~/Documents/Analysis/Tumors/') 4 | 5 | load('parameters.RData') 6 | 7 | #### Paired #### 8 | 9 | pdf('MergingPaired.pdf', height = 10, width = 20) 10 | 11 | modules = loadRData('modules.RData') 12 | colors.module = loadRData('colors.module.RData') 13 | types.module = loadRData('types.module.RData') 14 | modules = modules[!names(modules) %in% c('AC','OPC','NPC')] 15 | colors.module = colors.module[names(modules)] 16 | types.module = types.module[names(modules)] 17 | 18 | #### Malignant #### 19 | 20 | ## Srt 21 | 22 | srt.list.paired = lapply(files.paired.list, function(f){ 23 | loadRData(paste0(f[['normal']],'Paired/srt.scored.RData')) 24 | }) 25 | cancer = factor(apply(sapply(strsplit(names(files.paired.list), ''), '[', 1:4), 2, paste0, collapse = '')) 26 | system = factor(corr[as.character(cancer),'system']) 27 | germlayer = factor(corr[as.character(cancer),'germlayer']) 28 | 29 | for (s in names(srt.list.paired)){ 30 | srt = srt.list.paired[[s]] 31 | h = DimPlot(srt, group.by = 'orig.ident', cols = colors) 32 | print(h) 33 | h = DimPlot(srt, group.by = 'cat', cols = colors.cat) 34 | print(h) 35 | plot.list = lapply(names(modules), function(m){ 36 | h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m]), split.by = 'cat') + 37 | NoAxes() + NoLegend() + 38 | theme(plot.title = element_text(size = 10)) 39 | }) 40 | print(CombinePlots(plot.list, ncol = 4)) 41 | tryCatch(expr = { 42 | srt = RunTSNE(srt, dims = 1:10) 43 | plot.list = lapply(names(modules), function(m){ 44 | h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m]), split.by = 'cat', reduction = 'tsne') + 45 | NoAxes() + NoLegend() + 46 | theme(plot.title = element_text(size = 10)) 47 | }) 48 | print(CombinePlots(plot.list, ncol = 4)) 49 | }, error = function(e){c()}) 50 | srt$state = apply(srt@meta.data[,names(modules)], 1, function(x){ 51 | top = which.max(x) 52 | return(names(x)[top]) 53 | }) 54 | srt$state = factor(srt$state, levels = names(modules)) 55 | srt$num = rowSums(srt@meta.data[,names(modules)] > 0.5) 56 | h = FeaturePlot(srt, 'num', cols = c('grey','darkred')) 57 | print(h) 58 | h = VlnPlot(srt, 'num', group.by = 'cat', cols = colors.cat, pt.size = 0) 59 | print(h) 60 | h = DimPlot(srt, group.by = 'state', cols = colors.module) 61 | print(h) 62 | srt.list.paired[[s]] = srt 63 | } 64 | 65 | srt.paired = Reduce(merge2, srt.list.paired) 66 | srt.paired = RunPCA(srt.paired) 67 | srt.paired = RunUMAP(srt.paired, dims = 1:10) 68 | 69 | h = DimPlot(srt.paired, group.by = 'orig.ident', cols = colors, label = TRUE) 70 | print(h) 71 | h = DimPlot(srt.paired, group.by = 'orig.ident', cols = colors) 72 | print(h) 73 | plot.list = lapply(names(modules), function(m){ 74 | h = FeaturePlot(srt.paired, features = m, pt.size = 0.5, cols = c('grey',colors.module[m])) + 75 | NoAxes() + NoLegend() + 76 | theme(plot.title = element_text(size = 10)) 77 | }) 78 | print(CombinePlots(plot.list, ncol = 4)) 79 | srt.paired$state = apply(srt.paired@meta.data[,names(modules)], 1, function(x){ 80 | top = which.max(x) 81 | return(names(x)[top]) 82 | }) 83 | srt.paired$state = factor(srt.paired$state, levels = names(modules)) 84 | h = DimPlot(srt.paired, group.by = 'state', cols = colors.module) 85 | print(h) 86 | h = FeaturePlot(srt.paired, 'num', cols = c('grey','darkred')) 87 | print(h) 88 | h = DimPlot(srt.paired, group.by = 'cat', cols = colors.cat) 89 | print(h) 90 | 91 | save(srt.paired, file = 'srt.paired.RData') 92 | 93 | srt.paired = SCTransform(srt.paired, return.only.var.genes = FALSE) 94 | srt.paired = RunPCA(srt.paired) 95 | srt.paired = RunUMAP(srt.paired, dims = 1:10) 96 | srt.paired = GeneToEnrichment(srt.paired, db = modules, method = 'rand', nrand = nrand, nbin = nbin) 97 | 98 | h = DimPlot(srt.paired, group.by = 'orig.ident', cols = colors, label = TRUE) 99 | print(h) 100 | h = DimPlot(srt.paired, group.by = 'orig.ident', cols = colors) 101 | print(h) 102 | h = DimPlot(srt.paired, group.by = 'cat', cols = colors.cat) 103 | print(h) 104 | plot.list = lapply(names(modules), function(m){ 105 | h = FeaturePlot(srt.paired, features = m, pt.size = 1, cols = c('grey',colors.module[m])) + 106 | NoAxes() + NoLegend() + 107 | theme(plot.title = element_text(size = 10)) 108 | }) 109 | print(CombinePlots(plot.list, ncol = 4)) 110 | srt.paired$state = apply(srt.paired@meta.data[,names(modules)], 1, function(x){ 111 | top = which.max(x) 112 | return(names(x)[top]) 113 | }) 114 | srt.paired$state = factor(srt.paired$state, levels = names(modules)) 115 | h = DimPlot(srt.paired, group.by = 'state', cols = colors.module) 116 | print(h) 117 | 118 | # ## TSNE 119 | # 120 | # meta = Reduce(rbind, lapply(srt.list.paired, function(srt){ 121 | # srt@meta.data[, names(modules)] 122 | # })) 123 | # ts = tsne(meta, max_iter = max_iter, perplexity = perplexity) 124 | # 125 | # ts = data.frame(ts) 126 | # colnames(ts) = c('x','y') 127 | # ts$state = unlist(lapply(srt.list.paired, function(srt){srt$state})) 128 | # ts$num = unlist(lapply(srt.list.paired, function(srt){srt$num})) 129 | # ts$orig.ident = unlist(lapply(srt.list.paired, function(srt){srt$orig.ident})) 130 | # for (m in names(modules)){ 131 | # ts[,m] = as.numeric(unlist(lapply(srt.list.paired, function(srt){srt@meta.data[,m]}))) 132 | # } 133 | # 134 | # distances = as.matrix(dist(t(dist(ts[,c('x','y')])), method = 'euclidean')) 135 | # ts$entropy = sapply(1:nrow(ts), function(c){ 136 | # nn = order(distances[,c])[1:20] 137 | # ta = table(ts$orig.ident[nn]) 138 | # ta = ta/sum(ta) 139 | # ta = ta[!ta == 0] 140 | # return(sum(ta*log(ta))) 141 | # }) 142 | # ts$mentropy = sapply(1:nrow(ts), function(c){ 143 | # nn = order(distances[,c])[1:20] 144 | # ta = colSums(ts[nn,names(modules)] > 0.5) 145 | # ta = ta/sum(ta) 146 | # ta = ta[!ta == 0] 147 | # return(sum(ta*log(ta))) 148 | # }) 149 | # 150 | # p = ggplot(ts, aes(x = x, y = y, fill = state, color = state)) + 151 | # geom_point() + 152 | # scale_fill_manual(values = colors.module) + 153 | # scale_color_manual(values = colors.module) + 154 | # theme_classic() + 155 | # theme( 156 | # panel.border = element_rect(fill = NA), 157 | # legend.position = "none", 158 | # plot.title = element_text(hjust = 0.5), 159 | # axis.line.x = element_blank(), 160 | # axis.line.y = element_blank(), 161 | # axis.ticks.x = element_blank(), 162 | # axis.ticks.y = element_blank(), 163 | # axis.text.x = element_blank(), 164 | # axis.text.y = element_blank(), 165 | # axis.title.x = element_blank(), 166 | # axis.title.y = element_blank()) 167 | # print(p) 168 | # p = ggplot(ts, aes(x = x, y = y, fill = orig.ident, color = orig.ident)) + 169 | # geom_point() + 170 | # scale_fill_manual(values = colors) + 171 | # scale_color_manual(values = colors) + 172 | # theme_classic() + 173 | # theme( 174 | # panel.border = element_rect(fill = NA), 175 | # legend.position = "none", 176 | # plot.title = element_text(hjust = 0.5), 177 | # axis.line.x = element_blank(), 178 | # axis.line.y = element_blank(), 179 | # axis.ticks.x = element_blank(), 180 | # axis.ticks.y = element_blank(), 181 | # axis.text.x = element_blank(), 182 | # axis.text.y = element_blank(), 183 | # axis.title.x = element_blank(), 184 | # axis.title.y = element_blank()) 185 | # print(p) 186 | # p = ggplot(ts, aes_string(x = 'x', y = 'y', color = 'entropy')) + 187 | # geom_point() + 188 | # scale_color_gradientn(colors = c("lightgrey", "slateblue4")) + 189 | # ggtitle('entropy') + 190 | # theme_classic() + 191 | # theme( 192 | # panel.border = element_rect(fill = NA), 193 | # legend.position = "none", 194 | # plot.title = element_text(hjust = 0.5), 195 | # axis.line.x = element_blank(), 196 | # axis.line.y = element_blank(), 197 | # axis.ticks.x = element_blank(), 198 | # axis.ticks.y = element_blank(), 199 | # axis.text.x = element_blank(), 200 | # axis.text.y = element_blank(), 201 | # axis.title.x = element_blank(), 202 | # axis.title.y = element_blank()) 203 | # print(p) 204 | # p = ggplot(ts, aes_string(x = 'x', y = 'y', color = 'num')) + 205 | # geom_point() + 206 | # scale_color_gradientn(colors = c("lightgrey", "darkred")) + 207 | # ggtitle('entropy') + 208 | # theme_classic() + 209 | # theme( 210 | # panel.border = element_rect(fill = NA), 211 | # legend.position = "none", 212 | # plot.title = element_text(hjust = 0.5), 213 | # axis.line.x = element_blank(), 214 | # axis.line.y = element_blank(), 215 | # axis.ticks.x = element_blank(), 216 | # axis.ticks.y = element_blank(), 217 | # axis.text.x = element_blank(), 218 | # axis.text.y = element_blank(), 219 | # axis.title.x = element_blank(), 220 | # axis.title.y = element_blank()) 221 | # print(p) 222 | # p = ggplot(ts, aes(x = x, y = y, fill = orig.ident, color = orig.ident)) + 223 | # geom_point() + 224 | # scale_fill_manual(values = colors) + 225 | # scale_color_manual(values = colors) + 226 | # facet_wrap(~ orig.ident, ncol = 6) + 227 | # theme_classic() + 228 | # theme( 229 | # panel.border = element_rect(fill = NA), 230 | # legend.position = "none", 231 | # plot.title = element_text(hjust = 0.5), 232 | # axis.line.x = element_blank(), 233 | # axis.line.y = element_blank(), 234 | # axis.ticks.x = element_blank(), 235 | # axis.ticks.y = element_blank(), 236 | # axis.text.x = element_blank(), 237 | # axis.text.y = element_blank(), 238 | # axis.title.x = element_blank(), 239 | # axis.title.y = element_blank()) 240 | # print(p) 241 | # p = ggplot(ts, aes(x = x, y = y, fill = state, color = state)) + 242 | # geom_point() + 243 | # scale_fill_manual(values = colors.module) + 244 | # scale_color_manual(values = colors.module) + 245 | # facet_wrap(~ orig.ident, ncol = 6) + 246 | # theme_classic() + 247 | # theme( 248 | # panel.border = element_rect(fill = NA), 249 | # legend.position = "none", 250 | # plot.title = element_text(hjust = 0.5), 251 | # axis.line.x = element_blank(), 252 | # axis.line.y = element_blank(), 253 | # axis.ticks.x = element_blank(), 254 | # axis.ticks.y = element_blank(), 255 | # axis.text.x = element_blank(), 256 | # axis.text.y = element_blank(), 257 | # axis.title.x = element_blank(), 258 | # axis.title.y = element_blank()) 259 | # print(p) 260 | # plot.list = lapply(names(modules), function(m){ 261 | # p = ggplot(ts, aes_string(x = 'x', y = 'y', color = m)) + 262 | # geom_point(size = 0.25) + 263 | # scale_color_gradientn(colors = c("lightgrey", colors.module[m])) + 264 | # ggtitle(m) + 265 | # theme_classic() + 266 | # theme( 267 | # panel.border = element_rect(fill = NA), 268 | # legend.position = "none", 269 | # plot.title = element_text(hjust = 0.5), 270 | # axis.line.x = element_blank(), 271 | # axis.line.y = element_blank(), 272 | # axis.ticks.x = element_blank(), 273 | # axis.ticks.y = element_blank(), 274 | # axis.text.x = element_blank(), 275 | # axis.text.y = element_blank(), 276 | # axis.title.x = element_blank(), 277 | # axis.title.y = element_blank()) 278 | # }) 279 | # print(CombinePlots(plot.list, ncol = 4)) 280 | 281 | ## Stats 282 | 283 | stats.list.paired = lapply(files.paired.list, function(f){ 284 | stats = loadRData(paste0(f[['normal']],'Paired/stats.RData')) 285 | stats = stats[,names(modules)] 286 | return(stats) 287 | }) 288 | 289 | for (s in names(stats.list.paired)){ 290 | stats = stats.list.paired[[s]] 291 | srt = srt.paired[, srt.paired$orig.ident == s] 292 | scores = srt@meta.data[,names(modules)] 293 | fraction.normal = colMeans(scores[srt$cat == 'normal',] > 0.5, na.rm = TRUE) 294 | fraction.tumor = colMeans(scores[srt$cat == 'tumor',] > 0.5, na.rm = TRUE) 295 | data = GetData(srt, slot = 'scale.data') 296 | average.normal = sapply(modules, function(mod){ 297 | mean(data[intersect(mod,rownames(data)),srt$cat == 'normal']) 298 | }) 299 | average.tumor = sapply(modules, function(mod){ 300 | mean(data[intersect(mod,rownames(data)),srt$cat == 'tumor']) 301 | }) 302 | num.normal = mean(srt@meta.data[srt$cat == 'normal','num'], na.rm = TRUE) 303 | num.tumor = mean(srt@meta.data[srt$cat == 'tumor','num'], na.rm = TRUE) 304 | stats = rbind(stats, fraction.normal, fraction.tumor, average.normal, average.tumor, num.normal, num.tumor) 305 | stats.list.paired[[s]] = stats 306 | } 307 | 308 | print(stats.list.paired$PDACHs1[c('num.normal','num.tumor'),1]) 309 | print(stats.list.paired$PDACHs2[c('num.normal','num.tumor'),1]) 310 | print(stats.list.paired$PDACHs3[c('num.normal','num.tumor'),1]) 311 | 312 | stats.paired = melt(lapply(stats.list.paired, function(stats){ 313 | melt(stats) 314 | })) 315 | names(stats.paired) = c('measure','module','variable','value','sample') 316 | stats.paired$cat = factor(sapply(strsplit(as.character(stats.paired$measure), '.', fixed = TRUE), '[', 2), levels = names(colors.cat)) 317 | stats.paired$measure = factor(sapply(strsplit(as.character(stats.paired$measure), '.', fixed = TRUE), '[', 1)) 318 | stats.paired$cancer = factor(apply(sapply(strsplit(stats.paired$sample, ''), '[', 1:4), 2, paste0, collapse = '')) 319 | stats.paired$system = factor(corr[as.character(stats.paired$cancer),'system']) 320 | stats.paired$germlayer = factor(corr[as.character(stats.paired$cancer),'germlayer']) 321 | stats.paired$sample.cat = paste(stats.paired$sample, stats.paired$cat, sep = '.') 322 | save(stats.paired, file = 'stats.paired.RData') 323 | 324 | for (measure in levels(stats.paired$measure)){ 325 | 326 | mat = stats.paired[stats.paired$measure == measure, c('value','module','sample.cat')] 327 | mat = reshape(mat, timevar = 'sample.cat', idvar = 'module', direction = 'wide') 328 | rownames(mat) = mat[,1] 329 | mat = mat[,-1] 330 | colnames(mat) = gsub('value.', '', colnames(mat), fixed = TRUE) 331 | 332 | df = data.frame('subsample' = colnames(mat), stringsAsFactors = FALSE) 333 | df$sample = sapply(strsplit(df$subsample, split = '.', fixed = TRUE), '[', 1) 334 | df$cat = sapply(strsplit(df$subsample, split = '.', fixed = TRUE), '[', 2) 335 | df$cancer = sapply(as.character(df$sample), function(x){ 336 | y = sapply(strsplit(x, ''), '[', 1:4) 337 | paste0(y, collapse = '') 338 | }) 339 | df$system = factor(corr[as.character(df$cancer),'system']) 340 | df$germlayer = factor(corr[as.character(df$cancer),'germlayer']) 341 | top_ann = HeatmapAnnotation(df = df[,c('sample','cat')], 342 | col = list('sample' = colors, 'cat' = colors.cat), 343 | which = 'column') 344 | h = Heatmap(mat, name = measure, 345 | top_annotation = top_ann, 346 | cluster_rows = FALSE, cluster_columns = FALSE, 347 | show_row_names = TRUE, show_column_names = TRUE, 348 | row_names_gp = gpar(col = colors.module[rownames(mat)]), column_names_gp = gpar(col = colors[colnames(mat)]), 349 | breaks = seq(quantile(unlist(mat), seq(1:10)/10, na.rm = TRUE)['10%'], quantile(unlist(mat), seq(1:10)/10, na.rm = TRUE)['90%'], length = 11), colors = viridis_pal(begin = 0, end = 1, option = 'magma')(11)) 350 | print(h) 351 | decorate_heatmap_body(measure, { 352 | #l = c(0,cumsum(rle(as.character(cancer))$lengths)) 353 | l = 0:ncol(mat) 354 | for (k in 1:length(l)){ 355 | i = unit(l[k]/max(l), 'npc') 356 | grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black')) 357 | } 358 | l = 0:nrow(mat) 359 | for (k in 1:length(l)){ 360 | i = unit(l[k]/max(l), 'npc') 361 | grid.lines(c(0,1), i, gp = gpar(lwd = 1, col = 'black')) 362 | } 363 | }) 364 | 365 | p.list = lapply(c('Squamous','Glandular','pEMT','Interferon'), function(m){ 366 | sub = stats.paired[stats.paired$measure == measure & stats.paired$module == m,] 367 | sub = sub[order(sub$cat), ] 368 | p = ggpaired(sub, x = 'cat', y = 'value', 369 | color = 'cat', line.color = 'grey', line.size = 0.4, facet.by = 'cancer') + 370 | stat_compare_means(paired = TRUE) + 371 | scale_color_manual(values = colors.cat) + 372 | ggtitle(m) + 373 | theme_classic() + 374 | theme( 375 | plot.title = element_text(hjust = 0.5), 376 | axis.text.x = element_text(size = 10), 377 | axis.title.x = element_text(size = 0), 378 | axis.title.y = element_text(size = 0), 379 | legend.position = 'none') 380 | }) 381 | print(CombinePlots(p.list), ncol = 4) 382 | 383 | p.list = lapply(c('Cycle','Stress','Hypoxia', 384 | 'Oxphos','Metal','Mesenchymal', 385 | 'Alveolar','Basal','Ciliated'), function(m){ 386 | sub = stats.paired[stats.paired$measure == measure & stats.paired$module == m,] 387 | sub = sub[order(sub$cat), ] 388 | p = ggpaired(sub, x = 'cat', y = 'value', 389 | color = 'cat', line.color = 'grey', line.size = 0.4, facet.by = 'cancer') + 390 | stat_compare_means(paired = TRUE) + 391 | scale_color_manual(values = colors.cat) + 392 | ggtitle(m) + 393 | theme_classic() + 394 | theme( 395 | plot.title = element_text(hjust = 0.5), 396 | axis.text.x = element_text(size = 10), 397 | axis.title.x = element_text(size = 0), 398 | axis.title.y = element_text(size = 0), 399 | legend.position = 'none') 400 | }) 401 | print(CombinePlots(p.list), ncol = 3) 402 | 403 | } 404 | 405 | 406 | # #### All #### 407 | # 408 | # srt.list.paired.all = lapply(files.paired, function(f){ 409 | # loadRData(paste0(f,'srt.all.RData')) 410 | # }) 411 | # 412 | # srt.paired.all = Reduce(merge2, srt.list.paired.all) 413 | # srt.paired.all = RunPCA(srt.paired.all) 414 | # srt.paired.all = RunUMAP(srt.paired.all, dims = 1:10) 415 | # save(srt.paired.all, file = 'srt.paired.all.RData') 416 | # 417 | # colors.pop = hue_pal()(nlevels(srt.paired.all$pop)) 418 | # names(colors.pop) = levels(srt.paired.all$pop) 419 | # colors.pop['Malignant'] = 'black' 420 | # save(colors.pop, file = 'colors.pop.paired.RData') 421 | # 422 | # o = sample(1:ncol(srt.paired.all), size = ncol(srt.paired.all), replace = FALSE) 423 | # h = DimPlot(srt.paired.all, group.by = 'orig.ident', cols = colors, order = o, pt.size = 1, label = TRUE) 424 | # print(h) 425 | # h = DimPlot(srt.paired.all, group.by = 'orig.ident', cols = colors, order = o, pt.size = 1) 426 | # print(h) 427 | # h = DimPlot(srt.paired.all, group.by = 'type', cols = colors.type, order = o, pt.size = 1, label = TRUE) 428 | # print(h) 429 | # h = DimPlot(srt.paired.all, group.by = 'type', cols = colors.type, order = o, pt.size = 1) 430 | # print(h) 431 | # h = DimPlot(srt.paired.all, group.by = 'pop', cols = colors.pop, order = o, pt.size = 1, label = TRUE) 432 | # print(h) 433 | # h = DimPlot(srt.paired.all, group.by = 'pop', cols = colors.pop, order = o, pt.size = 1, label = FALSE) 434 | # print(h) 435 | # 436 | # srt.paired.tme = srt.paired.all[, !srt.paired.all$pop == 'Malignant'] 437 | # srt.paired.tme = RunPCA(srt.paired.tme) 438 | # srt.paired.tme = RunUMAP(srt.paired.tme, dims = 1:10) 439 | # save(srt.paired.tme, file = 'srt.paired.tme.RData') 440 | 441 | 442 | dev.off() -------------------------------------------------------------------------------- /MergingPrimary.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | source('~/Documents/R/seurat_functions.R') 3 | setwd('~/Documents/Analysis/Tumors/') 4 | 5 | load('parameters.RData') 6 | 7 | #### Primary #### 8 | 9 | pdf('MergingPrimary.pdf', height = 10, width = 12) 10 | 11 | modules = loadRData('modules.RData') 12 | colors.module = loadRData('colors.module.RData') 13 | types.module = loadRData('types.module.RData') 14 | 15 | #### Malignant #### 16 | 17 | ## Srt 18 | 19 | srt.list.primary = lapply(files.primary, function(f){ 20 | loadRData(paste0(f,'srt.scored.RData')) 21 | }) 22 | cancer = factor(apply(sapply(strsplit(names(files.primary), ''), '[', 1:4), 2, paste0, collapse = '')) 23 | system = factor(corr[as.character(cancer),'system']) 24 | germlayer = factor(corr[as.character(cancer),'germlayer']) 25 | 26 | for (s in names(srt.list.primary)){ 27 | srt = srt.list.primary[[s]] 28 | h = DimPlot(srt, group.by = 'orig.ident', cols = colors) 29 | print(h) 30 | plot.list = lapply(names(modules), function(m){ 31 | h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m])) + 32 | NoAxes() + NoLegend() + 33 | theme(plot.title = element_text(size = 10)) 34 | }) 35 | print(CombinePlots(plot.list, ncol = 4)) 36 | tryCatch(expr = { 37 | srt = RunTSNE(srt, dims = 1:10) 38 | plot.list = lapply(names(modules), function(m){ 39 | h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m]), split.by = 'cat', reduction = 'tsne') + 40 | NoAxes() + NoLegend() + 41 | theme(plot.title = element_text(size = 10)) 42 | }) 43 | print(CombinePlots(plot.list, ncol = 4)) 44 | }, error = function(e){c()}) 45 | srt$state = apply(srt@meta.data[,names(modules)], 1, function(x){ 46 | top = which.max(x) 47 | return(names(x)[top]) 48 | }) 49 | srt$state = factor(srt$state, levels = names(modules)) 50 | h = DimPlot(srt, group.by = 'state', cols = colors.module) 51 | print(h) 52 | srt$num = rowSums(srt@meta.data[,names(modules)] > 0.5) 53 | h = FeaturePlot(srt, 'num', cols = c('grey','darkred')) 54 | print(h) 55 | srt.list.primary[[s]] = srt 56 | } 57 | 58 | srt.primary = Reduce(merge2, srt.list.primary) 59 | srt.primary = RunPCA(srt.primary) 60 | srt.primary = RunUMAP(srt.primary, dims = 1:10) 61 | 62 | h = DimPlot(srt.primary, group.by = 'orig.ident', cols = colors, label = TRUE) 63 | print(h) 64 | h = DimPlot(srt.primary, group.by = 'orig.ident', cols = colors) 65 | print(h) 66 | plot.list = lapply(names(modules), function(m){ 67 | h = FeaturePlot(srt.primary, features = m, pt.size = 0.5, cols = c('grey',colors.module[m])) + 68 | NoAxes() + NoLegend() + 69 | theme(plot.title = element_text(size = 10)) 70 | }) 71 | print(CombinePlots(plot.list, ncol = 4)) 72 | srt.primary$state = apply(srt.primary@meta.data[,names(modules)], 1, function(x){ 73 | top = which.max(x) 74 | return(names(x)[top]) 75 | }) 76 | srt.primary$state = factor(srt.primary$state, levels = names(modules)) 77 | h = DimPlot(srt.primary, group.by = 'state', cols = colors.module) 78 | print(h) 79 | 80 | save(srt.primary, file = 'srt.primary.RData') 81 | 82 | srt.primary = SCTransform(srt.primary, return.only.var.genes = FALSE) 83 | srt.primary = RunPCA(srt.primary) 84 | srt.primary = RunUMAP(srt.primary, dims = 1:10) 85 | #srt.primary = GeneToEnrichment(srt.primary, db = modules, method = 'rand', nrand = nrand, nbin = nbin) 86 | 87 | h = DimPlot(srt.primary, group.by = 'orig.ident', cols = colors, label = TRUE) 88 | print(h) 89 | h = DimPlot(srt.primary, group.by = 'orig.ident', cols = colors) 90 | print(h) 91 | plot.list = lapply(names(modules), function(m){ 92 | h = FeaturePlot(srt.primary, features = m, pt.size = 1, cols = c('grey',colors.module[m])) + 93 | NoAxes() + NoLegend() + 94 | theme(plot.title = element_text(size = 10)) 95 | }) 96 | print(CombinePlots(plot.list, ncol = 4)) 97 | srt.primary$state = apply(srt.primary@meta.data[,names(modules)], 1, function(x){ 98 | top = which.max(x) 99 | return(names(x)[top]) 100 | }) 101 | srt.primary$state = factor(srt.primary$state, levels = names(modules)) 102 | h = DimPlot(srt.primary, group.by = 'state', cols = colors.module) 103 | print(h) 104 | 105 | # ## TSNE 106 | # 107 | # meta = Reduce(rbind, lapply(srt.list.primary, function(srt){ 108 | # srt@meta.data[, names(modules)] 109 | # })) 110 | # ts = tsne(meta, max_iter = max_iter, perplexity = perplexity) 111 | # 112 | # ts = data.frame(ts) 113 | # colnames(ts) = c('x','y') 114 | # ts$state = unlist(lapply(srt.list.primary, function(srt){srt$state})) 115 | # ts$num = unlist(lapply(srt.list.primary, function(srt){srt$num})) 116 | # ts$orig.ident = unlist(lapply(srt.list.primary, function(srt){srt$orig.ident})) 117 | # for (m in names(modules)){ 118 | # ts[,m] = as.numeric(unlist(lapply(srt.list.primary, function(srt){srt@meta.data[,m]}))) 119 | # } 120 | # 121 | # distances = as.matrix(dist(t(dist(ts[,c('x','y')])), method = 'euclidean')) 122 | # ts$entropy = sapply(1:nrow(ts), function(c){ 123 | # nn = order(distances[,c])[1:20] 124 | # ta = table(ts$orig.ident[nn]) 125 | # ta = ta/sum(ta) 126 | # ta = ta[!ta == 0] 127 | # return(sum(ta*log(ta))) 128 | # }) 129 | # ts$mentropy = sapply(1:nrow(ts), function(c){ 130 | # nn = order(distances[,c])[1:20] 131 | # ta = colSums(ts[nn,names(modules)] > 0.5) 132 | # ta = ta/sum(ta) 133 | # ta = ta[!ta == 0] 134 | # return(sum(ta*log(ta))) 135 | # }) 136 | # 137 | # p = ggplot(ts, aes(x = x, y = y, fill = state, color = state)) + 138 | # geom_point() + 139 | # scale_fill_manual(values = colors.module) + 140 | # scale_color_manual(values = colors.module) + 141 | # theme_classic() + 142 | # theme( 143 | # panel.border = element_rect(fill = NA), 144 | # legend.position = "none", 145 | # plot.title = element_text(hjust = 0.5), 146 | # axis.line.x = element_blank(), 147 | # axis.line.y = element_blank(), 148 | # axis.ticks.x = element_blank(), 149 | # axis.ticks.y = element_blank(), 150 | # axis.text.x = element_blank(), 151 | # axis.text.y = element_blank(), 152 | # axis.title.x = element_blank(), 153 | # axis.title.y = element_blank()) 154 | # print(p) 155 | # p = ggplot(ts, aes(x = x, y = y, fill = orig.ident, color = orig.ident)) + 156 | # geom_point() + 157 | # scale_fill_manual(values = colors) + 158 | # scale_color_manual(values = colors) + 159 | # theme_classic() + 160 | # theme( 161 | # panel.border = element_rect(fill = NA), 162 | # legend.position = "none", 163 | # plot.title = element_text(hjust = 0.5), 164 | # axis.line.x = element_blank(), 165 | # axis.line.y = element_blank(), 166 | # axis.ticks.x = element_blank(), 167 | # axis.ticks.y = element_blank(), 168 | # axis.text.x = element_blank(), 169 | # axis.text.y = element_blank(), 170 | # axis.title.x = element_blank(), 171 | # axis.title.y = element_blank()) 172 | # print(p) 173 | # p = ggplot(ts, aes_string(x = 'x', y = 'y', color = 'entropy')) + 174 | # geom_point() + 175 | # scale_color_gradientn(colors = c("lightgrey", "slateblue4")) + 176 | # ggtitle('entropy') + 177 | # theme_classic() + 178 | # theme( 179 | # panel.border = element_rect(fill = NA), 180 | # legend.position = "none", 181 | # plot.title = element_text(hjust = 0.5), 182 | # axis.line.x = element_blank(), 183 | # axis.line.y = element_blank(), 184 | # axis.ticks.x = element_blank(), 185 | # axis.ticks.y = element_blank(), 186 | # axis.text.x = element_blank(), 187 | # axis.text.y = element_blank(), 188 | # axis.title.x = element_blank(), 189 | # axis.title.y = element_blank()) 190 | # print(p) 191 | # p = ggplot(ts, aes_string(x = 'x', y = 'y', color = 'num')) + 192 | # geom_point() + 193 | # scale_color_gradientn(colors = c("lightgrey", "darkred")) + 194 | # ggtitle('entropy') + 195 | # theme_classic() + 196 | # theme( 197 | # panel.border = element_rect(fill = NA), 198 | # legend.position = "none", 199 | # plot.title = element_text(hjust = 0.5), 200 | # axis.line.x = element_blank(), 201 | # axis.line.y = element_blank(), 202 | # axis.ticks.x = element_blank(), 203 | # axis.ticks.y = element_blank(), 204 | # axis.text.x = element_blank(), 205 | # axis.text.y = element_blank(), 206 | # axis.title.x = element_blank(), 207 | # axis.title.y = element_blank()) 208 | # print(p) 209 | # p = ggplot(ts, aes(x = x, y = y, fill = orig.ident, color = orig.ident)) + 210 | # geom_point() + 211 | # scale_fill_manual(values = colors) + 212 | # scale_color_manual(values = colors) + 213 | # facet_wrap(~ orig.ident, ncol = 6) + 214 | # theme_classic() + 215 | # theme( 216 | # panel.border = element_rect(fill = NA), 217 | # legend.position = "none", 218 | # plot.title = element_text(hjust = 0.5), 219 | # axis.line.x = element_blank(), 220 | # axis.line.y = element_blank(), 221 | # axis.ticks.x = element_blank(), 222 | # axis.ticks.y = element_blank(), 223 | # axis.text.x = element_blank(), 224 | # axis.text.y = element_blank(), 225 | # axis.title.x = element_blank(), 226 | # axis.title.y = element_blank()) 227 | # print(p) 228 | # p = ggplot(ts, aes(x = x, y = y, fill = state, color = state)) + 229 | # geom_point() + 230 | # scale_fill_manual(values = colors.module) + 231 | # scale_color_manual(values = colors.module) + 232 | # facet_wrap(~ orig.ident, ncol = 6) + 233 | # theme_classic() + 234 | # theme( 235 | # panel.border = element_rect(fill = NA), 236 | # legend.position = "none", 237 | # plot.title = element_text(hjust = 0.5), 238 | # axis.line.x = element_blank(), 239 | # axis.line.y = element_blank(), 240 | # axis.ticks.x = element_blank(), 241 | # axis.ticks.y = element_blank(), 242 | # axis.text.x = element_blank(), 243 | # axis.text.y = element_blank(), 244 | # axis.title.x = element_blank(), 245 | # axis.title.y = element_blank()) 246 | # print(p) 247 | # plot.list = lapply(names(modules), function(m){ 248 | # p = ggplot(ts, aes_string(x = 'x', y = 'y', color = m)) + 249 | # geom_point(size = 0.25) + 250 | # scale_color_gradientn(colors = c("lightgrey", colors.module[m])) + 251 | # ggtitle(m) + 252 | # theme_classic() + 253 | # theme( 254 | # panel.border = element_rect(fill = NA), 255 | # legend.position = "none", 256 | # plot.title = element_text(hjust = 0.5), 257 | # axis.line.x = element_blank(), 258 | # axis.line.y = element_blank(), 259 | # axis.ticks.x = element_blank(), 260 | # axis.ticks.y = element_blank(), 261 | # axis.text.x = element_blank(), 262 | # axis.text.y = element_blank(), 263 | # axis.title.x = element_blank(), 264 | # axis.title.y = element_blank()) 265 | # }) 266 | # print(CombinePlots(plot.list, ncol = 4)) 267 | 268 | ## Stats 269 | 270 | stats.list.primary = lapply(files.primary, function(f){ 271 | stats = loadRData(paste0(f,'stats.RData')) 272 | stats = stats[,names(modules)] 273 | return(stats) 274 | }) 275 | 276 | for (s in names(stats.list.primary)){ 277 | stats = stats.list.primary[[s]] 278 | srt = srt.primary[, srt.primary$orig.ident == s] 279 | scores = srt@meta.data[,names(modules)] 280 | fraction = colMeans(scores > 0.5, na.rm = TRUE) 281 | data = GetData(srt, slot = 'scale.data') 282 | average = sapply(modules, function(mod){ 283 | mean(data[intersect(mod,rownames(data)),]) 284 | }) 285 | num = mean(srt$num, na.rm = TRUE) 286 | stats = rbind(stats, fraction, average, num) 287 | stats.list.primary[[s]] = stats 288 | } 289 | 290 | stats.primary = melt(lapply(stats.list.primary, function(stats){ 291 | melt(stats) 292 | })) 293 | names(stats.primary) = c('measure','module','variable','value','sample') 294 | stats.primary$cancer = factor(apply(sapply(strsplit(stats.primary$sample, ''), '[', 1:4), 2, paste0, collapse = '')) 295 | stats.primary$system = factor(corr[as.character(stats.primary$cancer),'system']) 296 | stats.primary$germlayer = factor(corr[as.character(stats.primary$cancer),'germlayer']) 297 | save(stats.primary, file = 'stats.primary.RData') 298 | 299 | for (measure in levels(stats.primary$measure)){ 300 | 301 | mat = stats.primary[stats.primary$measure == measure, c('value','module','sample')] 302 | mat = reshape(mat, timevar = 'sample', idvar = 'module', direction = 'wide') 303 | rownames(mat) = mat[,1] 304 | mat = mat[,-1] 305 | colnames(mat) = gsub('value.', '', colnames(mat), fixed = TRUE) 306 | 307 | df = data.frame('subsample' = colnames(mat), stringsAsFactors = FALSE) 308 | df$sample = sapply(strsplit(df$subsample, split = '.', fixed = TRUE), '[', 1) 309 | df$cancer = sapply(as.character(df$sample), function(x){ 310 | y = sapply(strsplit(x, ''), '[', 1:4) 311 | paste0(y, collapse = '') 312 | }) 313 | df$system = factor(corr[as.character(df$cancer),'system']) 314 | df$germlayer = factor(corr[as.character(df$cancer),'germlayer']) 315 | top_ann = HeatmapAnnotation(df = df[,c('sample','system','germlayer')], 316 | col = list('sample' = colors, 'system' = colors.system, 'germlayer' = colors.germlayer), 317 | which = 'column') 318 | h = Heatmap(mat, name = measure, 319 | top_annotation = top_ann, 320 | cluster_rows = FALSE, cluster_columns = FALSE, 321 | show_row_names = TRUE, show_column_names = TRUE, 322 | row_names_gp = gpar(col = colors.module[rownames(mat)]), column_names_gp = gpar(col = colors[colnames(mat)]), 323 | breaks = seq(quantile(unlist(mat), seq(1:10)/10, na.rm = TRUE)['10%'], quantile(unlist(mat), seq(1:10)/10, na.rm = TRUE)['90%'], length = 11), colors = viridis_pal(begin = 0, end = 1, option = 'magma')(11)) 324 | print(h) 325 | decorate_heatmap_body(measure, { 326 | #l = c(0,cumsum(rle(as.character(cancer))$lengths)) 327 | l = 0:ncol(mat) 328 | for (k in 1:length(l)){ 329 | i = unit(l[k]/max(l), 'npc') 330 | grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black')) 331 | } 332 | l = 0:nrow(mat) 333 | for (k in 1:length(l)){ 334 | i = unit(l[k]/max(l), 'npc') 335 | grid.lines(c(0,1), i, gp = gpar(lwd = 1, col = 'black')) 336 | } 337 | }) 338 | 339 | } 340 | 341 | ## Co-occurence 342 | 343 | par(mfrow = c(2,2)) 344 | coocc.list = list() 345 | occ.list = list() 346 | for (s in names(srt.list.primary)){ 347 | 348 | srt = srt.list.primary[[s]] 349 | stats = stats.list.primary[[s]] 350 | meta = srt@meta.data[, names(modules)] 351 | 352 | occ = stats['frequency',] 353 | 354 | coocc = sapply(names(modules), function(m1){ 355 | sapply(names(modules), function(m2){ 356 | pval = phyper( 357 | sum((meta[, m1] > 0.5) * (meta[, m2] > 0.5)), 358 | sum(meta[, m2] > 0.5), 359 | dim(meta)[1] - sum(meta[, m2] > 0.5), 360 | sum(meta[, m1] > 0.5), 361 | lower.tail = FALSE) 362 | if (pval <= 0.5){ 363 | return(-log10(pval)) 364 | } 365 | if (pval > 0.5){ 366 | return(log10(1-pval)) 367 | } 368 | }) 369 | }) 370 | 371 | coocc[is.infinite(coocc) & coocc > 0] = max(coocc[is.finite(coocc)]) 372 | coocc[is.infinite(coocc) & coocc < 0] = min(coocc[is.finite(coocc)]) 373 | 374 | occ.list[[s]] = occ 375 | coocc.list[[s]] = coocc 376 | 377 | keep = names(which(stats['presence',] > 0.5 & stats['frequency',] > 0.05)) 378 | occ = occ[keep] 379 | coocc = coocc[keep, keep] 380 | coocc[ coocc > -0.5 & coocc < 0.5] = 0 381 | coocc[coocc > 10] = 10 382 | coocc[coocc < -10] = -10 383 | 384 | tryCatch(expr = { 385 | g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected') 386 | V(g)$type = (types.module[names(V(g))] == 'Type') 387 | coords = layout.circle(g) 388 | E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight) 389 | E(g)$width = 5*abs(E(g)$weight)/max(E(g)$weight) 390 | V(g)$color = colors.module[names(V(g))] 391 | V(g)$size = 50*occ/max(occ) 392 | # Plot graph 393 | plot(g, main = s, 394 | layout = coords, 395 | vertex.shape = 'circle', 396 | vertex.label.size = 1, vertex.label.color = 'black', 397 | frame=FALSE) 398 | }, error = function(e){c()}) 399 | } 400 | 401 | for (can in levels(cancer)){ 402 | coocc = SummarizeMatrices(coocc.list[cancer == can], median) 403 | occ = sapply(names(modules), function(m){median(sapply(occ.list[cancer == can], '[', m))}) 404 | keep = (names(which(occ >= 0.05))) 405 | occ = occ[keep] 406 | coocc = coocc[keep, keep] 407 | coocc[ coocc > -0.5 & coocc < 0.5] = 0 408 | coocc[coocc > 10] = 10 409 | coocc[coocc < -10] = -10 410 | tryCatch(expr = { 411 | g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected') 412 | V(g)$type = (types.module[names(V(g))] == 'Type') 413 | coords = layout.circle(g) 414 | E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight) 415 | E(g)$width = abs(E(g)$weight)/5 416 | V(g)$color = colors.module[names(V(g))] 417 | V(g)$size = occ*200 418 | # Plot graph 419 | plot(g, main = can, 420 | layout = coords, 421 | vertex.shape = 'circle', 422 | vertex.label.size = 1, vertex.label.color = 'black', 423 | frame=FALSE) 424 | }, error = function(e){c()}) 425 | } 426 | 427 | for (sys in levels(system)){ 428 | coocc = SummarizeMatrices(coocc.list[system == sys], median) 429 | occ = sapply(names(modules), function(m){median(sapply(occ.list[system == sys], '[', m))}) 430 | keep = (names(which(occ >= 0.05))) 431 | occ = occ[keep] 432 | coocc = coocc[keep, keep] 433 | coocc[ coocc > -0.5 & coocc < 0.5] = 0 434 | coocc[coocc > 10] = 10 435 | coocc[coocc < -10] = -10 436 | tryCatch(expr = { 437 | g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected') 438 | V(g)$type = (types.module[names(V(g))] == 'Type') 439 | coords = layout.circle(g) 440 | E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight) 441 | E(g)$width = abs(E(g)$weight)/5 442 | V(g)$color = colors.module[names(V(g))] 443 | V(g)$size = occ*200 444 | # Plot graph 445 | plot(g, main = sys, 446 | layout = coords, 447 | vertex.shape = 'circle', 448 | vertex.label.size = 1, vertex.label.color = 'black', 449 | frame=FALSE) 450 | }, error = function(e){c()}) 451 | } 452 | 453 | coocc = SummarizeMatrices(coocc.list, median) 454 | occ = sapply(names(modules), function(m){median(sapply(occ.list, '[', m))}) 455 | keep = (names(which(occ >= 0.05))) 456 | occ = occ[keep] 457 | coocc = coocc[keep, keep] 458 | coocc[coocc > -0.5 & coocc < 0.5] = 0 459 | coocc[coocc > 10] = 10 460 | coocc[coocc < -10] = -10 461 | tryCatch(expr = { 462 | g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected') 463 | V(g)$type = (types.module[names(V(g))] == 'Type') 464 | coords = layout.circle(g) 465 | E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight) 466 | E(g)$width = abs(E(g)$weight)/5 467 | V(g)$color = colors.module[names(V(g))] 468 | V(g)$size = occ*200 469 | # Plot graph 470 | plot(g, main = 'All', 471 | layout = coords, 472 | vertex.shape = 'circle', 473 | vertex.label.size = 1, vertex.label.color = 'black', 474 | frame=FALSE) 475 | }, error = function(e){c()}) 476 | 477 | par(mfrow = c(1,1)) 478 | 479 | 480 | #### All #### 481 | 482 | srt.list.primary.all = lapply(files.primary, function(f){ 483 | tryCatch(expr = { 484 | loadRData(paste0(f,'srt.all.RData')) 485 | }, error = function(e){c()}) 486 | }) 487 | srt.list.primary.all = srt.list.primary.all[sapply(srt.list.primary.all, length) > 0] 488 | 489 | srt.primary.all = Reduce(merge2, srt.list.primary.all) 490 | srt.primary.all = RunPCA(srt.primary.all) 491 | srt.primary.all = RunUMAP(srt.primary.all, dims = 1:10) 492 | save(srt.primary.all, file = 'srt.primary.all.RData') 493 | 494 | colors.pop = hue_pal()(nlevels(srt.primary.all$pop)) 495 | names(colors.pop) = levels(srt.primary.all$pop) 496 | colors.pop['Malignant'] = 'black' 497 | save(colors.pop, file = 'colors.pop.primary.RData') 498 | 499 | o = sample(1:ncol(srt.primary.all), size = ncol(srt.primary.all), replace = FALSE) 500 | h = DimPlot(srt.primary.all, group.by = 'orig.ident', cols = colors, order = o, pt.size = 1, label = TRUE) 501 | print(h) 502 | h = DimPlot(srt.primary.all, group.by = 'orig.ident', cols = colors, order = o, pt.size = 1) 503 | print(h) 504 | h = DimPlot(srt.primary.all, group.by = 'type', cols = colors.type, order = o, pt.size = 1, label = TRUE) 505 | print(h) 506 | h = DimPlot(srt.primary.all, group.by = 'type', cols = colors.type, order = o, pt.size = 1) 507 | print(h) 508 | h = DimPlot(srt.primary.all, group.by = 'pop', cols = colors.pop, order = o, pt.size = 1, label = TRUE) 509 | print(h) 510 | h = DimPlot(srt.primary.all, group.by = 'pop', cols = colors.pop, order = o, pt.size = 1, label = FALSE) 511 | print(h) 512 | 513 | srt.primary.tme = srt.primary.all[, !srt.primary.all$pop == 'Malignant'] 514 | srt.primary.tme = RunPCA(srt.primary.tme) 515 | srt.primary.tme = RunUMAP(srt.primary.tme, dims = 1:10) 516 | save(srt.primary.tme, file = 'srt.primary.tme.RData') 517 | 518 | # Frequency regression 519 | 520 | freq = t(sapply(levels(srt.primary$orig.ident), function(or){ 521 | colMeans(srt.primary@meta.data[srt.primary$orig.ident == or, names(modules)] > 0.5, na.rm = TRUE) 522 | })) 523 | tme = table(srt.primary.tme$orig.ident, srt.primary.tme$pop) 524 | exclude = c('Malignant','Embryonic_stem_cells','iPS_cells','Tissue_stem_cells','Smooth_muscle_cells','Chondrocytes','Epithelial_cells','Hepatocytes','Keratinocytes','Astrocyte','Neurons','Neuroepithelial_cell', 525 | 'CMP','Erythroblast','GMP','Gametocytes','GMP','HSC_CD34+','Pre-B_cell_CD34-','Pro-B_cell_CD34+','Pro-Myelocyte') 526 | tme = tme[, setdiff(colnames(tme), exclude)] 527 | tme = tme/rowSums(tme) 528 | 529 | # Multiple regression 530 | test = sapply(names(modules), function(m){ 531 | fit = summary(lm(freq[,m] ~ tme))$coefficients 532 | pval = -log10(fit[,4]) 533 | pval[pval < -log10(pval_thresh)] = 0 534 | names(pval) = gsub('tme','',names(pval)) 535 | est = fit[,1] 536 | names(est) = gsub('tme','',names(est)) 537 | val = pval*sign(est) 538 | return(val) 539 | }) 540 | h = Heatmap(test, 541 | show_row_names = TRUE, show_column_names = TRUE, 542 | cluster_rows = FALSE, cluster_columns = FALSE, 543 | breaks = seq(-3, 3, length = 11), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 11)) 544 | print(h) 545 | 546 | # Plot hits 547 | par(mfrow = c(4,4)) 548 | for (ct in colnames(tme)){ 549 | for (m in names(modules)){ 550 | fit = summary(lm(freq[,m] ~ tme[,ct]))$coefficients 551 | pval = -log10(fit[2,4]) 552 | plot(freq[,m] ~ tme[,ct], col = colors[rownames(tme)], pch = 20, xlab = ct, ylab = m, main = round(pval, digits = 2)) 553 | if (pval > pval_thresh){ 554 | abline(a = fit[1,1], b = fit[2,1], lty = 2) 555 | } 556 | } 557 | } 558 | par(mfrow = c(1,1)) 559 | 560 | dev.off() 561 | -------------------------------------------------------------------------------- /Scoring.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | source('~/Documents/R/seurat_functions.R') 3 | setwd('~/Documents/Analysis/Tumors/') 4 | 5 | load('parameters.RData') 6 | modules = loadRData('modules.RData') 7 | colors.module = loadRData('colors.module.RData') 8 | 9 | ## All 10 | 11 | mclapply(files.all, function(f){ 12 | setwd(f) 13 | srt = loadRData('srt.malignant.RData') 14 | 15 | modules_rand = MakeRand(srt, db = modules, nrand = nrand, nbin = nbin) 16 | 17 | ini = matrix(0,nrow = ncol(srt), ncol = length(modules)) 18 | rownames(ini) = colnames(srt) 19 | colnames(ini) = names(modules) 20 | srt@meta.data[,names(modules)] = as.data.frame(ini) 21 | 22 | for (m in names(modules)){ 23 | tryCatch(expr = { 24 | srt = GeneToEnrichment(srt, db = modules[m], method = 'rand', db_rand = modules_rand[m]) 25 | }, error = function(e){c()}) 26 | } 27 | 28 | scores = srt@meta.data[,names(modules)] 29 | frequency = colMeans(scores > 0.5, na.rm = TRUE) 30 | srt$state = apply(srt@meta.data[,names(modules)], 1, function(x){ 31 | top = which.max(x) 32 | return(names(x)[top]) 33 | }) 34 | srt$state = factor(srt$state, levels = names(modules)) 35 | save(srt, file = 'srt.scored.RData') 36 | 37 | pdf('scoring.pdf') 38 | plot.list = lapply(names(modules), function(m){ 39 | h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m])) + 40 | NoAxes() + NoLegend() + 41 | theme(plot.title = element_text(size = 10)) 42 | }) 43 | print(CombinePlots(plot.list, ncol = 4)) 44 | h = DimPlot(srt, group.by = 'state', cols = colors.module) 45 | print(h) 46 | # Co-occurence 47 | occ = colMeans(scores > 0.5) 48 | coocc = sapply(names(modules), function(m1){ 49 | sapply(names(modules), function(m2){ 50 | pval = phyper( 51 | sum((scores[, m1] > 0.5) * (scores[, m2] > 0.5)), 52 | sum(scores[, m2] > 0.5), 53 | dim(scores)[1] - sum(scores[, m2] > 0.5), 54 | sum(scores[, m1] > 0.5), 55 | lower.tail = FALSE) 56 | if (pval <= 0.5){ 57 | return(-log10(pval)) 58 | } 59 | if (pval > 0.5){ 60 | return(log10(1-pval)) 61 | } 62 | }) 63 | }) 64 | coocc[is.infinite(coocc) & coocc > 0] = max(coocc[is.finite(coocc)]) 65 | coocc[is.infinite(coocc) & coocc < 0] = min(coocc[is.finite(coocc)]) 66 | coocc[which(occ <= 0.01),] = 0 67 | coocc[, which(occ <= 0.01)] = 0 68 | coocc[coocc > pval_thresh & coocc < - pval_thresh] = 0 69 | coocc[coocc > 10] = 10 70 | coocc[coocc < -10] = -10 71 | g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected') 72 | coords = layout.circle(g) 73 | E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight) 74 | E(g)$width = abs(E(g)$weight)/5 75 | V(g)$color = colors.module[names(V(g))] 76 | V(g)$size = occ*200 77 | plot(g, 78 | layout = coords, 79 | vertex.shape = 'circle', 80 | vertex.label.size = 1, vertex.label.color = 'black', 81 | frame=FALSE) 82 | dev.off() 83 | 84 | stats = ScoreModule(srt, db = modules, method = 'rand', db_rand = modules_rand) 85 | stats = rbind(stats, frequency) 86 | save(stats, file = 'stats.RData') 87 | 88 | setwd('~/Documents/Analysis/Tumors/') 89 | return() 90 | }, mc.cores = ncores) 91 | 92 | 93 | ## Paired 94 | 95 | mclapply(files.paired.list, function(f){ 96 | 97 | setwd(paste0(f[['normal']],'Paired')) 98 | 99 | srt.normal = loadRData(paste0(f['normal'],'srt.malignant.RData')) 100 | srt.tumor = loadRData(paste0(f['tumor'],'srt.malignant.RData')) 101 | 102 | stats.normal = ScoreModule(srt.normal, db = modules, method = 'rand', nrand = nrand, nbin = nbin) 103 | rownames(stats.normal) = paste0(rownames(stats.normal),'.normal') 104 | stats.tumor = ScoreModule(srt.tumor, db = modules, method = 'rand', nrand = nrand, nbin = nbin) 105 | rownames(stats.tumor) = paste0(rownames(stats.tumor),'.tumor') 106 | 107 | srt = merge(srt.normal, srt.tumor) 108 | srt$cat = 'tumor' 109 | srt$cat[1:ncol(srt.normal)] = 'normal' 110 | 111 | srt = SCTransform(srt, return.only.var.genes = FALSE) 112 | srt = RunPCA(srt) 113 | srt = RunUMAP(srt, dims = 1:10) 114 | 115 | modules_rand = MakeRand(srt, db = modules, nrand = nrand, nbin = nbin) 116 | 117 | ini = matrix(0,nrow = ncol(srt), ncol = length(modules)) 118 | rownames(ini) = colnames(srt) 119 | colnames(ini) = names(modules) 120 | srt@meta.data[,names(modules)] = as.data.frame(ini) 121 | 122 | for (m in names(modules)){ 123 | tryCatch(expr = { 124 | srt = GeneToEnrichment(srt, db = modules[m], method = 'rand', db_rand = modules_rand[m]) 125 | }, error = function(e){c()}) 126 | } 127 | 128 | scores = srt@meta.data[,names(modules)] 129 | frequency.normal = colMeans(scores[srt$cat == 'normal',] > 0.5, na.rm = TRUE) 130 | frequency.tumor = colMeans(scores[srt$cat == 'tumor',] > 0.5, na.rm = TRUE) 131 | srt$state = apply(srt@meta.data[,names(modules)], 1, function(x){ 132 | top = which.max(x) 133 | return(names(x)[top]) 134 | }) 135 | srt$state = factor(srt$state, levels = names(modules)) 136 | save(srt, file = 'srt.scored.RData') 137 | 138 | pdf('scoring.pdf') 139 | plot.list = lapply(names(modules), function(m){ 140 | h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m]), split.by = 'cat') + 141 | NoAxes() + NoLegend() + 142 | theme(plot.title = element_text(size = 10)) 143 | }) 144 | print(CombinePlots(plot.list, ncol = 4)) 145 | h = DimPlot(srt, group.by = 'state', cols = colors.module, split.by = 'cat') 146 | print(h) 147 | h = DimPlot(srt, group.by = 'cat', cols = colors.cat) 148 | print(h) 149 | # Co-occurence 150 | for (ca in c('normal','tumor')){ 151 | occ = colMeans(scores[srt$cat == ca,] > 0.5) 152 | coocc = sapply(names(modules), function(m1){ 153 | sapply(names(modules), function(m2){ 154 | pval = phyper( 155 | sum((scores[srt$cat == ca, m1] > 0.5) * (scores[srt$cat == ca, m2] > 0.5)), 156 | sum(scores[srt$cat == ca, m2] > 0.5), 157 | dim(scores[srt$cat == ca,])[1] - sum(scores[srt$cat == ca, m2] > 0.5), 158 | sum(scores[srt$cat == ca, m1] > 0.5), 159 | lower.tail = FALSE) 160 | if (pval <= 0.5){ 161 | return(-log10(pval)) 162 | } 163 | if (pval > 0.5){ 164 | return(log10(1-pval)) 165 | } 166 | }) 167 | }) 168 | coocc[is.infinite(coocc) & coocc > 0] = max(coocc[is.finite(coocc)]) 169 | coocc[is.infinite(coocc) & coocc < 0] = min(coocc[is.finite(coocc)]) 170 | coocc[which(occ <= 0.01),] = 0 171 | coocc[, which(occ <= 0.01)] = 0 172 | coocc[coocc > pval_thresh & coocc < - pval_thresh] = 0 173 | coocc[coocc > 10] = 10 174 | coocc[coocc < -10] = -10 175 | g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected') 176 | coords = layout.circle(g) 177 | E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight) 178 | E(g)$width = abs(E(g)$weight)/5 179 | V(g)$color = colors.module[names(V(g))] 180 | V(g)$size = occ*200 181 | plot(g, 182 | layout = coords, 183 | vertex.shape = 'circle', 184 | vertex.label.size = 1, vertex.label.color = 'black', 185 | frame=FALSE) 186 | } 187 | dev.off() 188 | 189 | stats = rbind(stats.normal, stats.tumor, frequency.normal, frequency.tumor) 190 | save(stats, file = 'stats.RData') 191 | 192 | setwd('~/Documents/Analysis/Tumors/') 193 | return() 194 | }, mc.cores = ncores) 195 | 196 | -------------------------------------------------------------------------------- /Visium.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | source('~/Documents/R/seurat_functions.R') 3 | setwd('~/Documents/Analysis/Visium/') 4 | 5 | load('~/Documents/Analysis/Tumors/parameters.RData') 6 | w = which(names(colors) == 'LIHCHs1') 7 | l = colors[['LIHCHs1']] 8 | colors = c(colors, 'LIHCHs1' = l, 'LIHCHs1A' = l, 'LIHCHs1B' = l) 9 | colors = colors[-w] 10 | nrand = 2 11 | nbin = 10 12 | cats = c('Malignant','Both','Normal') 13 | colors.cat = brewer_pal(palette = 'RdYlGn')(8)[c(2,5,7)] 14 | names(colors.cat) = cats 15 | method = 'score' 16 | pval_thresh = 1 17 | 18 | for (prox in c('inverse')){ 19 | 20 | modules = loadRData('../Tumors/modules.RData') 21 | colors.module = loadRData('../Tumors/colors.module.RData') 22 | types.module = loadRData('../Tumors/types.module.RData') 23 | modules = modules[!names(modules) %in% c('AC','OPC','NPC')] 24 | colors.module = colors.module[names(modules)] 25 | types.module = types.module[names(modules)] 26 | ma = names(modules) 27 | 28 | colors.pop = loadRData('../Tumors/colors.pop.ours.RData') 29 | 30 | colors.bin = rep(Seurat:::SpatialColors(n = 3)[c(1,3)],2) 31 | names(colors.bin) = c('FALSE','TRUE','0','1') 32 | 33 | srt.tme = loadRData('~/Documents/Analysis/Tumors/srt.ours.tme.RData') 34 | exclude = c('Embryonic_stem_cells','iPS_cells','Tissue_stem_cells','Smooth_muscle_cells','Chondrocytes','Epithelial_cells','Hepatocytes','Keratinocytes','Astrocyte','Neurons','Neuroepithelial_cell') 35 | srt.tme = srt.tme[, !srt.tme$pop %in% exclude] 36 | rm = (srt.tme$pop == 'B_cell' & !colSums(GetData(srt.tme, slot = 'data')[c('CD4','GZMB'),]) == 0) # rm errors in B cell annotations 37 | srt.tme = srt.tme[, !rm] 38 | # keep = sample(1:ncol(srt.tme), size = 5000, replace = FALSE) 39 | # keep = unlist(lapply(levels(srt.tme$pop), function(ct){ 40 | # x = sample(colnames(srt.tme)[srt.tme$pop == ct]) 41 | # if (length(x) > 1000){ 42 | # x = sample(x, size = 1000, replace = FALSE) 43 | # } 44 | # return(x) 45 | # })) 46 | # srt.tme = srt.tme[,keep] 47 | 48 | pdf('macrophages.pdf') 49 | #srt.mac = srt.tme 50 | srt.mac = loadRData('~/Documents/Tumors/OVCAHs/OVCAHs1/srt.normal.RData') 51 | srt.mac = srt.mac[, srt.mac$pop == 'Macrophage'] 52 | srt.mac = SCTransform(srt.mac, return.only.var.genes = FALSE) 53 | srt.mac = RunPCA(srt.mac) 54 | srt.mac = RunUMAP(srt.mac, dims = 1:10) 55 | srt.mac = FindNeighbors(srt.mac) 56 | srt.mac = FindClusters(srt.mac) 57 | srt.mac$mac = factor(srt.mac$seurat_clusters, labels = c('M1','M2')) 58 | h = DimPlot(srt.mac, group.by = 'mac', cols = c('darkorange','darkred')) 59 | print(h) 60 | markers = FindAllMarkers2(srt.mac, group.by = 'mac', do.print = FALSE, do.enrichment = FALSE, do.plot = TRUE) 61 | db_mac = split(markers$genes_100$gene, markers$genes_100$cluster) 62 | print(db_mac) 63 | data = GetData(srt.mac, slot = 'scale.data')[unlist(db_mac),] 64 | top_ann = HeatmapAnnotation(df = data.frame('mac' = srt.mac$mac), 65 | col = list('mac' = c('M1' = 'darkorange','M2' = 'darkred')), 66 | which = 'column') 67 | h = Heatmap(data, top_annotation = top_ann, 68 | cluster_rows = FALSE, cluster_columns = FALSE, 69 | breaks = c(-2,0,2), colors = c('blue','white','red'), 70 | column_split = srt.mac$mac, 71 | row_split = unlist(lapply(names(db_mac), function(x){rep(x,length(db_mac[[x]]))}))) 72 | labels = c('IFIT1','TNF','IL1B','C1QA','CD14','CD163','APOE','HLA-DRA','MS4A4A') 73 | r = rowAnnotation(link = anno_mark(at = which(rownames(data) %in% labels), labels = labels)) 74 | print(h+r) 75 | dev.off() 76 | 77 | files = c( 78 | 'OVCAHs/OVCAHs1', 79 | #'OVCAHs/OVCAHs2', 80 | 'OVCAHs/OVCAHs3', 81 | 'BRCAHs/BRCAHs0', 82 | 'BRCAHs/BRCAHs1', 83 | 'BRCAHs/BRCAHs2', 84 | 'UCECHs/UCECHs3', 85 | 'LIHCHs/LIHCHs1', 86 | #'LIHCHs/LIHCHs1A', 87 | #'LIHCHs/LIHCHs1B', 88 | 'PDACHs/PDACHs1', 89 | #'PDACHs/PDACHs2', 90 | 'GISTHs/GISTHs1', 91 | 'GISTHs/GISTHs2') 92 | names(files) = sapply(strsplit(files, '/', fixed = TRUE), '[', 2) 93 | 94 | meta.list = mclapply(names(files), function(fa){ 95 | 96 | pdf(paste0(fa,'.',prox,'.pdf'), height = 10, width = 15) 97 | f = files[[fa]] 98 | meta = c() 99 | 100 | #### Loading data #### 101 | 102 | # ST 103 | 104 | st = loadRData(paste0('~/Documents/Visium/',f,'/st.RData')) 105 | 106 | res = loadRData(paste0('~/Documents/Visium/',f,'/res.RData')) 107 | modules_st = NMFToModules(res, gmin = 5) 108 | nmf = names(modules_st) 109 | 110 | st = AddMetaData(st, metadata = st@images$slice1@coordinates[,c('row','col')], col.name = c('row','col')) 111 | st$axis = st$row 112 | if (fa == 'OVCAHs1'){ 113 | st$axis = st$row 114 | } 115 | if (fa == 'OVCAHs2'){ 116 | st$axis = st$col 117 | } 118 | if (fa == 'OVCAHs3'){ 119 | st$axis = sqrt((st$row - 0)^2 + (st$col - 70)^2) 120 | } 121 | if (fa == 'BRCAHs0'){ 122 | st$axis = st$row - 1/3*st$col 123 | } 124 | if (fa == 'BRCAHs1'){ 125 | st$axis = st$row 126 | } 127 | if (fa == 'BRCAHs2'){ 128 | st$axis = - sqrt((st$col - 80)^2 + ((st$row - 25)*1.5)^2) 129 | } 130 | if (fa == 'UCECHs3'){ 131 | st$axis = st$col - 1.2*st$row 132 | } 133 | if (fa == 'LIHCHs1'){ 134 | st$axis = st$row 135 | } 136 | if (fa == 'PDACHs1'){ 137 | st$axis = st$col - st$row 138 | } 139 | if (fa == 'PDACHs2'){ 140 | st$axis = st$col 141 | } 142 | if (fa == 'GISTHs1'){ 143 | st$axis = - st$row - 3/4*st$col 144 | } 145 | if (fa == 'GISTHs2'){ 146 | st$axis = st$row + 1/3*st$col 147 | } 148 | st$axis = (st$axis - min(st$axis))/(max(st$axis) - min(st$axis)) 149 | h = SpatialFeaturePlot(st, 'axis') 150 | print(h) 151 | 152 | h = SpatialFeaturePlot(st, c('nCount_Spatial','nCount_SCT','nFeature_Spatial','nFeature_SCT')) 153 | print(h) 154 | 155 | colors.clone = brewer_pal(palette = 'Accent')(nlevels(st$clone)) 156 | names(colors.clone) = levels(st$clone) 157 | 158 | h = SpatialDimPlot(st, group.by = 'clone', cols = colors.clone) 159 | print(h) 160 | 161 | # Single cell 162 | 163 | if (fa %in% c('LIHCHs1A','LIHCHs1B')){ 164 | srt.malignant = loadRData(paste0('~/Documents/Tumors/LIHCHs/LIHCHs1/srt.malignant.RData')) 165 | } else { 166 | srt.malignant = loadRData(paste0('~/Documents/Tumors/',f,'/srt.malignant.RData')) 167 | } 168 | if (!all(sapply(ma, function(x){x %in% names(srt.malignant@meta.data)}))){ 169 | srt.malignant = GeneToEnrichment(srt.malignant, db = modules, method = 'rand', nrand = nrand, nbin = nbin) 170 | } 171 | ma_bin = paste0(ma, '_bin') 172 | scores = srt.malignant@meta.data[,ma] 173 | scores_bin = scores 174 | scores_bin[] = as.numeric(scores_bin > 0.5) 175 | srt.malignant = AddMetaData(srt.malignant, metadata = scores_bin, col.name = ma_bin) 176 | 177 | if (fa %in% c('LIHCHs1A','LIHCHs1B')){ 178 | srt.normal = loadRData(paste0('~/Documents/Tumors/LIHCHs/LIHCHs1/srt.normal.RData')) 179 | } else { 180 | srt.normal = loadRData(paste0('~/Documents/Tumors/',f,'/srt.normal.RData')) 181 | } 182 | if (f %in% c('GISTHs/GISTHs1')){ 183 | exclude = c('Embryonic_stem_cells','iPS_cells','Tissue_stem_cells','Smooth_muscle_cells','Chondrocytes','Hepatocytes','Keratinocytes','Astrocyte','Neurons','Neuroepithelial_cell') 184 | } else { 185 | exclude = c('Embryonic_stem_cells','iPS_cells','Tissue_stem_cells','Smooth_muscle_cells','Chondrocytes','Epithelial_cells','Hepatocytes','Keratinocytes','Astrocyte','Neurons','Neuroepithelial_cell') 186 | } 187 | srt.normal = srt.normal[, !srt.normal$pop %in% exclude] 188 | ta = table(srt.normal$pop) 189 | spec = names(ta)[ta > 20] 190 | gen = setdiff(levels(srt.tme$pop), spec) 191 | srt.normal = merge2(srt.normal[,srt.normal$pop %in% spec], srt.tme[,srt.tme$pop %in% gen]) 192 | srt.normal = SCTransform(srt.normal, return.only.var.genes = FALSE) 193 | 194 | srt = merge2(srt.malignant, srt.normal) 195 | srt = SCTransform(srt, return.only.var.genes = FALSE) 196 | srt$pop = as.character(srt$pop) 197 | srt$pop = factor(srt$pop, levels = intersect(names(colors.pop), srt$pop)) 198 | srt$pop = relevel(srt$pop, ref = 'Malignant') 199 | Idents(srt) = 'pop' 200 | srt = SCTransform(srt, return.only.var.genes = FALSE) 201 | srt = RunPCA(srt) 202 | srt = RunUMAP(srt, dims = 1:10) 203 | srt = RunTSNE(srt, dims = 1:10) 204 | h = DimPlot(srt, reduction = 'umap', group.by = 'pop', cols = colors.pop) 205 | print(h) 206 | h = DimPlot(srt, reduction = 'tsne', group.by = 'pop', cols = colors.pop) 207 | print(h) 208 | 209 | markers = FindAllMarkers2(srt, group.by = 'pop') 210 | genes.srt = markers$genes_100$gene 211 | 212 | #### NMF #### 213 | 214 | plot.list = lapply(nmf, function(x){ 215 | h = SpatialFeaturePlot(st, x) 216 | }) 217 | print(CombinePlots(plot.list)) 218 | 219 | #### Anchoring #### 220 | 221 | # Find pure cell types from integration labels 222 | 223 | options(future.globals.maxSize = 5000*1024^2) 224 | object.list = list('SC' = srt, 'ST' = st) 225 | genes.use = intersect(SpatiallyVariableFeatures(st), VariableFeatures(srt)) 226 | #genes.use = intersect(SpatiallyVariableFeatures(st), genes.srt) 227 | object.list = PrepSCTIntegration(object.list = object.list, 228 | anchor.features = genes.use, 229 | verbose = FALSE) 230 | anchors = FindTransferAnchors(reference = object.list$SC, query = object.list$ST, 231 | normalization.method = 'SCT', 232 | features = genes.use, 233 | verbose = FALSE) 234 | predictions = TransferData(anchorset = anchors, refdata = srt$pop) 235 | predictions = predictions[,!colnames(predictions) %in% c('predicted.id','prediction.score.max')] 236 | colnames(predictions) = gsub('prediction.score','pred',colnames(predictions)) 237 | pred = colnames(predictions) 238 | st = AddMetaData(st, metadata = predictions, col.name = pred) 239 | plot.list = lapply(pred, function(x){ 240 | h = SpatialFeaturePlot(st, features = x) 241 | }) 242 | print(CombinePlots(plot.list)) 243 | 244 | pred = colnames(predictions)[apply(predictions, 2, var) > 0] 245 | pred = pred[order(pred)] 246 | pred = c('pred.Malignant', setdiff(pred, 'pred.Malignant')) 247 | 248 | # Binarize 249 | 250 | predictions_bin = predictions 251 | predictions_bin[] = as.numeric(predictions_bin > 0.9) 252 | pred_bin = paste0(pred, '_bin') 253 | st = AddMetaData(st, predictions_bin, col.name = pred_bin) 254 | plot.list = lapply(pred_bin, function(x){ 255 | h = SpatialDimPlot(st, x, cols = colors.bin) 256 | }) 257 | print(CombinePlots(plot.list)) 258 | 259 | #### NNLS from single-cell #### 260 | 261 | # Data 262 | 263 | genes.use = intersect(SpatiallyVariableFeatures(st), VariableFeatures(srt)) 264 | #genes.use = intersect(SpatiallyVariableFeatures(st), genes.srt) 265 | prof = AverageExpression(srt, assay = 'SCT', slot = 'data')$SCT[genes.use,] 266 | data = as.matrix(GetAssayData(st, assay = 'SCT', slot = 'data'))[genes.use,] 267 | 268 | # Regression 269 | 270 | coef = t(apply(data, 2, function(y){ 271 | coef(nnls(as.matrix(prof), y)) 272 | })) 273 | colnames(coef) = colnames(prof) 274 | nnls = colnames(coef)[colSums(coef > 0) >= 20] 275 | prof = prof[,nnls] 276 | coef = t(apply(data, 2, function(y){ 277 | coef(nnls(as.matrix(prof), y)) 278 | })) 279 | colnames(coef) = nnls 280 | st = AddMetaData(st, coef, col.name = nnls) 281 | plot.list = lapply(nnls, function(x){ 282 | h = SpatialFeaturePlot(st, x) 283 | }) 284 | print(CombinePlots(plot.list)) 285 | plot.list = lapply(nnls, function(x){ 286 | h = SpatialFeaturePlot(st, x) 287 | }) 288 | print(CombinePlots(plot.list)) 289 | 290 | # Scaling from null from pure cell types 291 | 292 | coef_scaled = sapply(nnls, function(x){ 293 | vec = coef[,x] 294 | y = paste0('pred.',x) 295 | spots.use = colnames(st)[st@meta.data[,y] == 0] 296 | #spots.use = colnames(st)[st@meta.data[,y] == 0 & rowSums(predictions_bin) == 1] 297 | if (length(spots.use) < 5){ 298 | spots.use = colnames(st)[order(st@meta.data[,y])[1:5]] 299 | } 300 | data_rand = Reduce(cbind, lapply(1:10^nrand, function(i){ 301 | t(apply(data[,spots.use], 1, function(expr){ 302 | sample(expr, length(expr), replace = FALSE) 303 | })) 304 | })) 305 | coef_rand = t(apply(data_rand, 2, function(y){ 306 | coef(nnls(as.matrix(prof), y)) 307 | })) 308 | colnames(coef_rand) = nnls 309 | if (sd(coef_rand[,x]) == 0){ 310 | return((coef[,x] - mean(coef_rand[,x]))/min(coef[coef[,x] > 0,x])) 311 | } else { 312 | return((coef[,x] - mean(coef_rand[,x]))/sd(coef_rand[,x])) 313 | } 314 | }) 315 | nnls_scaled = paste0(nnls, '_scaled') 316 | st = AddMetaData(st, coef_scaled, col.name = nnls_scaled) 317 | plot.list = lapply(nnls_scaled, function(x){ 318 | h = SpatialFeaturePlot(st, x) 319 | }) 320 | print(CombinePlots(plot.list)) 321 | 322 | # Binarizing 323 | 324 | coef_bin = coef_scaled 325 | coef_bin[] = (coef_bin > 2) 326 | # for (x in rownames(coef_bin)){ 327 | # if (sum(coef_bin[x,]) == 0){ 328 | # y = coef_scaled[x,] 329 | # y = sort(y, decreasing = TRUE) 330 | # if (y[1] > 1 & y[1] - y[2] > 1){ 331 | # coef_bin[x,names(y)[1]] = 1 332 | # } 333 | # } 334 | # } 335 | 336 | nnls_bin = paste0(nnls, '_bin') 337 | st = AddMetaData(st, coef_bin, col.name = nnls_bin) 338 | plot.list = lapply(nnls_bin, function(x){ 339 | h = SpatialDimPlot(st, x, cols = colors.bin) 340 | }) 341 | print(CombinePlots(plot.list)) 342 | 343 | nnls = setdiff(nnls, 'Epithelial_cells') 344 | nnls_scaled = paste0(nnls, '_scaled') 345 | nnls_bin = paste0(nnls, '_bin') 346 | 347 | # M1 vs M2 348 | 349 | st.mac = st[, st$Macrophage_bin == 1] 350 | st.mac = SCTransform(st.mac, return.only.var.genes = FALSE, assay = 'Spatial') 351 | st.mac = GeneToEnrichment(st.mac, db = db_mac, method = 'score', nbin = nbin) 352 | st.mac$M1M2 = st.mac$M1 - st.mac$M2 353 | st.mac$M1_bin = as.numeric(st.mac$M1 > st.mac$M2) 354 | st.mac$M2_bin = as.numeric(st.mac$M2 > st.mac$M1) 355 | st.mac$M1M2_bin = c('M1','M2')[1 + (st.mac$M1 > st.mac$M2)] 356 | 357 | h = SpatialFeaturePlot(st.mac, c('M1','M2','M1M2')) 358 | print(h) 359 | 360 | srt = GeneToEnrichment(srt, db = db_mac, method = 'score', nbin = nbin) 361 | srt$M1M2 = srt$M1 - srt$M2 362 | h = VlnPlot(srt, 'M1M2', group.by = 'pop', cols = colors.pop) 363 | print(h) 364 | 365 | st@meta.data[,c('M1','M2','M1M2','M1M2_bin')] = NA 366 | st@meta.data[,c('M1_bin','M2_bin')] = 0 367 | st@meta.data[colnames(st.mac),c('M1','M2','M1M2','M1M2_bin','M1_bin','M2_bin')] = st.mac@meta.data[,c('M1','M2','M1M2','M1M2_bin','M1_bin','M2_bin')] 368 | 369 | nnls = c(nnls, 'M1', 'M2') 370 | nnls_scaled = paste0(nnls, '_scaled') 371 | nnls_bin = paste0(nnls, '_bin') 372 | 373 | nnls1 = setdiff(nnls, c('M1', 'M2')) 374 | nnls1_scaled = paste0(nnls1, '_scaled') 375 | nnls1_bin = paste0(nnls1, '_bin') 376 | 377 | nnls2 = setdiff(nnls, 'Macrophage') 378 | nnls2_scaled = paste0(nnls2, '_scaled') 379 | nnls2_bin = paste0(nnls2, '_bin') 380 | 381 | # Average neighborhood coefficient 382 | 383 | nei = FindSTNeighbors(st, d_min = 0, d_max = 1.5) 384 | coef_nei = sapply(nnls_bin, function(x){ 385 | sapply(nei, function(spot){ 386 | y = st@meta.data[spot,x] 387 | y[y < 0] = 0 388 | mean(y, na.rm = TRUE) 389 | }) 390 | }) 391 | colnames(coef_nei) = nnls 392 | nnls_nei = paste0(nnls, '_nei') 393 | nnls1_nei = paste0(nnls1, '_nei') 394 | nnls2_nei = paste0(nnls2, '_nei') 395 | st = AddMetaData(st, coef_nei, col.name = nnls_nei) 396 | h = SpatialFeaturePlot(st, nnls_nei) 397 | print(h) 398 | 399 | # M1 M2 400 | 401 | M1M2_nei = sapply(nei, function(spot){ 402 | y = st@meta.data[spot,'M1M2'] 403 | mean(y, na.rm = TRUE) 404 | }) 405 | M1M2_nei[is.nan(M1M2_nei)] = NA 406 | st = AddMetaData(st, M1M2_nei, col.name = 'M1M2_nei') 407 | h = SpatialFeaturePlot(st, c('M1','M2','M1M2','M1_nei','M2_nei','M1M2_nei')) 408 | print(h) 409 | 410 | #### Distances #### 411 | 412 | # Add distance to cell types 413 | 414 | coord = st@images$slice1@coordinates[,c('imagerow','imagecol')] 415 | distances = as.matrix(dist(coord)) 416 | distances = distances/min(distances[distances > 0]) 417 | #distances = round(distances, digits = 1) 418 | coef_dist = sapply(nnls_bin, function(x){ 419 | w = colnames(st)[as.logical(st@meta.data[,x])] 420 | w = w[!is.na(w)] 421 | if (length(w) == 1){ 422 | mi = as.numeric(distances[,w]) 423 | } else { 424 | mi = apply(distances[,w], 1, function(x){min(as.numeric(x), na.rm = TRUE)}) 425 | } 426 | return(mi) 427 | }) 428 | if (prox == 'inverse'){ 429 | coef_dist = 1/(1+coef_dist) 430 | } 431 | if (prox == 'opposite'){ 432 | coef_dist = -coef_dist 433 | } 434 | colnames(coef_dist) = nnls 435 | nnls_dist = paste0(nnls, '_dist') 436 | nnls1_dist = paste0(nnls1, '_dist') 437 | nnls2_dist = paste0(nnls2, '_dist') 438 | st = AddMetaData(st, coef_dist, col.name = nnls_dist) 439 | plot.list = lapply(nnls_dist, function(x){ 440 | h = SpatialFeaturePlot(st, x) 441 | }) 442 | print(CombinePlots(plot.list)) 443 | 444 | # Malignant 445 | 446 | depth = apply(coef_dist[,!colnames(coef_dist)=='Malignant'], 1, min) 447 | if (prox == 'inverse'){ 448 | depth = 1/(1+depth) 449 | } 450 | if (prox == 'opposite'){ 451 | depth = -depth 452 | } 453 | st = AddMetaData(st, depth, col.name = 'Depth') 454 | h = SpatialFeaturePlot(st, 'Depth') 455 | print(h) 456 | 457 | # M1 M2 458 | 459 | st$M1M2_dist = st$M1_dist - st$M2_dist 460 | h = SpatialFeaturePlot(st, c('M1_dist','M2_dist','M1M2_dist')) 461 | print(h) 462 | 463 | #### Splitting #### 464 | 465 | # Categories 466 | 467 | st$cat = apply(st@meta.data[,nnls_bin], 1, function(x){ 468 | if (x['Malignant_bin']){ 469 | if (sum(x) == 1){ 470 | return('Malignant') 471 | } else { 472 | return('Both') 473 | } 474 | } else { 475 | if (sum(x) == 0){ 476 | return(NA) 477 | } else { 478 | return('Normal') 479 | } 480 | } 481 | }) 482 | st$cat = factor(st$cat, levels = cats) 483 | h = SpatialDimPlot(st, 'cat', pt.size.factor = 1, stroke = 0) 484 | print(h + scale_fill_manual(values = colors.cat)) 485 | print(table(st$cat)) 486 | 487 | # Compare all metrics 488 | 489 | h = VlnPlot(st, nmf, group.by = 'cat', pt.size = 0) 490 | print(h) 491 | 492 | h = VlnPlot(st, pred, group.by = 'cat', pt.size = 0) 493 | print(h) 494 | 495 | h = VlnPlot(st, nnls1_scaled, group.by = 'cat', pt.size = 0) 496 | print(h) 497 | 498 | plot.list = lapply(nmf, function(x){ 499 | h = FeatureScatter(st, 'axis', x, group.by = 'cat', cols = colors.cat, span = 0.3, plot.cor = FALSE) 500 | }) 501 | print(CombinePlots(plot.list)) 502 | 503 | plot.list = lapply(pred, function(x){ 504 | h = FeatureScatter(st, 'axis', x, group.by = 'cat', cols = colors.cat, span = 0.3, plot.cor = FALSE) 505 | }) 506 | print(CombinePlots(plot.list)) 507 | 508 | plot.list = lapply(nnls1_scaled, function(x){ 509 | h = FeatureScatter(st, 'axis', x, group.by = 'cat', cols = colors.cat, span = 0.3, plot.cor = FALSE) 510 | }) 511 | print(CombinePlots(plot.list)) 512 | 513 | tryCatch(expr = { 514 | co = sapply(nmf, function(x){ 515 | sapply(pred, function(y){ 516 | cor(st@meta.data[,x], st@meta.data[,y]) 517 | }) 518 | }) 519 | h = Heatmap(co, name = 'Correlation', 520 | row_dend_reorder = 1:nrow(co), column_dend_reorder = 1:ncol(co), 521 | show_row_names = TRUE, show_column_names = TRUE, 522 | breaks = seq(-0.5, 0.5, length = 11), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 11)) 523 | print(h) 524 | }, error = function(e){c()}) 525 | 526 | tryCatch(expr = { 527 | co = sapply(pred, function(x){ 528 | sapply(nnls1_scaled, function(y){ 529 | cor(st@meta.data[,x], st@meta.data[,y]) 530 | }) 531 | }) 532 | h = Heatmap(co, name = 'Correlation', 533 | row_dend_reorder = 1:nrow(co), column_dend_reorder = 1:ncol(co), 534 | show_row_names = TRUE, show_column_names = TRUE, 535 | breaks = seq(-1, 1, length = 11), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 11)) 536 | print(h) 537 | }, error = function(e){c()}) 538 | 539 | tryCatch(expr = { 540 | co = sapply(nmf, function(x){ 541 | sapply(nnls1_scaled, function(y){ 542 | cor(st@meta.data[,x], st@meta.data[,y]) 543 | }) 544 | }) 545 | h = Heatmap(co, name = 'Correlation', 546 | row_dend_reorder = 1:nrow(co), column_dend_reorder = 1:ncol(co), 547 | show_row_names = TRUE, show_column_names = TRUE, 548 | breaks = seq(-0.5, 0.5, length = 11), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 11)) 549 | print(h) 550 | }, error = function(e){c()}) 551 | 552 | # Entropy 553 | 554 | ent = apply(st@meta.data[,nnls_bin], 1, function(x){ 555 | x = x/sum(x) 556 | x = x[!x==0] 557 | return(sum(x*log(x))) 558 | }) 559 | st = AddMetaData(st, ent, col.name = 'ent') 560 | h = SpatialFeaturePlot(st, 'ent', min.cutoff = -3, max.cutoff = 0) 561 | print(h) 562 | 563 | # Split 564 | 565 | st.list = SplitObject(st[,!is.na(st$cat)], 'cat') 566 | st.list = st.list[cats] 567 | st.malignant = st.list$Malignant 568 | st.malignant = SCTransform(st.malignant, return.only.var.genes = FALSE, assay = 'Spatial') 569 | # st.malignant = FindSpatiallyVariableFeatures(st.malignant, selection.method = 'markvariogram') 570 | st.normal = st.list$Normal 571 | st.normal = SCTransform(st.normal, return.only.var.genes = FALSE, assay = 'Spatial') 572 | # st.normal = FindSpatiallyVariableFeatures(st.normal, selection.method = 'markvariogram') 573 | 574 | h = SpatialFeaturePlot(st.malignant, nnls_nei) 575 | print(h) 576 | 577 | h = SpatialFeaturePlot(st.malignant, nnls_dist) 578 | print(h) 579 | 580 | 581 | #### Modules #### 582 | 583 | # Add modules 584 | 585 | st.malignant = GeneToEnrichment(st.malignant, db = modules, method = method, nrand = nrand, nbin = nbin) 586 | st.normal = GeneToEnrichment(st.normal, db = modules, method = method, nrand = nrand, nbin = nbin) 587 | 588 | plot.list = lapply(ma, function(m){ 589 | h = SpatialFeaturePlot(st.malignant, m) 590 | }) 591 | print(CombinePlots(plot.list)) 592 | plot.list = lapply(ma, function(x){ 593 | h = FeatureScatter(st.malignant, 'axis', x, group.by = 'orig.ident', cols = colors, span = 0.3, plot.cor = FALSE) + NoLegend() 594 | }) 595 | print(CombinePlots(plot.list)) 596 | 597 | h = Heatmap(cor(st.malignant@meta.data[,ma]), 598 | show_row_names = TRUE, is.symmetric = TRUE, 599 | breaks = seq(-1,1, length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7)) 600 | print(h) 601 | h = Heatmap(cor(st.malignant@meta.data[,ma]), 602 | show_row_names = TRUE, cluster_rows = FALSE, cluster_columns = FALSE, 603 | breaks = seq(-1,1, length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7)) 604 | print(h) 605 | 606 | ma_bin = paste0(ma, '_bin') 607 | scores_bin = st.malignant@meta.data[,ma] > 0 608 | scores_bin[] = as.numeric(scores_bin) 609 | st.malignant = AddMetaData(st.malignant, scores_bin, col.name = ma_bin) 610 | plot.list = lapply(ma_bin, function(m){ 611 | h = SpatialDimPlot(st.malignant, m, cols = colors.bin) 612 | }) 613 | print(CombinePlots(plot.list)) 614 | plot.list = lapply(ma_bin, function(x){ 615 | h = FeatureScatter(st.malignant, 'axis', x, group.by = 'orig.ident', cols = colors, span = 0.3, plot.cor = FALSE) + NoLegend() 616 | }) 617 | print(CombinePlots(plot.list)) 618 | 619 | st@meta.data[,ma] = NA 620 | st@meta.data[colnames(st.malignant),ma] = st.malignant@meta.data[,ma] 621 | st@meta.data[,ma_bin] = 0 622 | st@meta.data[colnames(st.malignant),ma_bin] = st.malignant@meta.data[,ma_bin] 623 | 624 | # Module neighborhood 625 | 626 | nei = FindSTNeighbors(st, d_min = 0, d_max = 1.5) 627 | coef_nei = sapply(ma, function(x){ 628 | sapply(nei, function(spot){ 629 | y = st@meta.data[spot,x] 630 | mean(y, na.rm = TRUE) 631 | }) 632 | }) 633 | colnames(coef_nei) = ma 634 | ma_nei = paste0(ma, '_nei') 635 | st = AddMetaData(st, coef_nei, col.name = ma_nei) 636 | 637 | su = st@meta.data[,c(ma_nei, nnls1_nei)] 638 | su = su[apply(su, 1, function(x){!any(is.na(x))}),] 639 | co = cor(su) 640 | h = Heatmap(co, is.symmetric = TRUE, 641 | show_row_names = TRUE, show_column_names = TRUE, 642 | breaks = seq(-0.3,0.3,length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7)) 643 | print(h) 644 | co = cor(su)[ma_nei, nnls1_nei] 645 | h = Heatmap(co, 646 | show_row_names = TRUE, show_column_names = TRUE, 647 | breaks = seq(-0.3,0.3,length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7)) 648 | print(h) 649 | 650 | su = st.malignant@meta.data[,c(ma, nnls1_nei)] 651 | su = su[apply(su, 1, function(x){!any(is.na(x))}),] 652 | co = cor(su) 653 | h = Heatmap(co, name = 'Correlation', is.symmetric = TRUE, 654 | show_row_names = TRUE, show_column_names = TRUE, 655 | breaks = seq(-0.3,0.3,length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7)) 656 | groups = cutree(as.hclust(row_dend(h)), k = 5) 657 | groups = groups[row_order(h)] 658 | groups = factor(groups, levels = unique(groups)) 659 | h = Heatmap(co, name = 'Correlation', row_order = row_order(h), is.symmetric = TRUE, 660 | show_row_names = TRUE, show_column_names = TRUE, 661 | row_names_gp = gpar(col = c(colors.module, rep('black', length(nnls1_nei)))), 662 | breaks = seq(-0.3,0.3,length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7)) 663 | print(h) 664 | decorate_heatmap_body('Correlation', { 665 | l = c(0,cumsum(table(groups))[1:length(table(groups))]) 666 | for (k in 1:length(l)){ 667 | i = unit(l[k:(k+1)]/ncol(co), 'npc') 668 | j = unit(1-l[k:(k+1)]/nrow(co), 'npc') 669 | grid.lines(i, j[1], gp = gpar(lwd = 1, col = 'black')) 670 | grid.lines(i, j[2], gp = gpar(lwd = 1, col = 'black')) 671 | grid.lines(i[1], j, gp = gpar(lwd = 1, col = 'black')) 672 | grid.lines(i[2], j, gp = gpar(lwd = 1, col = 'black')) 673 | } 674 | }) 675 | 676 | co = cor(su)[ma, nnls1_nei] 677 | h = Heatmap(co, 678 | show_row_names = TRUE, show_column_names = TRUE, 679 | breaks = seq(-0.2,0.2,length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7)) 680 | print(h) 681 | 682 | # Module distance 683 | 684 | coord = st@images$slice1@coordinates[,c('imagerow','imagecol')] 685 | distances = as.matrix(dist(coord)) 686 | distances = distances/min(distances[distances > 0]) 687 | #distances = round(distances, digits = 1) 688 | coef_dist = sapply(ma_bin, function(x){ 689 | w = colnames(st)[as.logical(st@meta.data[,x])] 690 | w = w[!is.na(w)] 691 | if (length(w) == 1){ 692 | mi = as.numeric(distances[,w]) 693 | } else { 694 | mi = apply(distances[,w], 1, function(x){min(as.numeric(x), na.rm = TRUE)}) 695 | } 696 | return(mi) 697 | }) 698 | coef_dist = -coef_dist 699 | colnames(coef_dist) = ma 700 | ma_dist = paste0(ma, '_dist') 701 | st = AddMetaData(st, coef_dist, col.name = ma_dist) 702 | plot.list = lapply(ma_dist, function(x){ 703 | h = SpatialFeaturePlot(st, x) 704 | }) 705 | print(CombinePlots(plot.list)) 706 | 707 | # Plot by axis 708 | 709 | plot.list = lapply(ma, function(m){ 710 | sub = st.malignant@meta.data 711 | model = lm(sub[,m] ~ sub[,'axis']) 712 | est = round(summary(model)$coefficients[2,1], digits = 1) 713 | pval = round(-log10(summary(model)$coefficients[2,4]), digits = 1) 714 | if (pval > pval_thresh){ 715 | p = ggplot(sub, aes_string(x = 'axis', y = m, color = 'orig.ident')) + 716 | geom_jitter(show.legend = FALSE, height = 0.05, width = 0) + 717 | #geom_point(show.legend = FALSE) + 718 | geom_smooth(method='lm', show.legend = FALSE, 719 | fullrange = TRUE) + 720 | annotate('text', label = paste('pval', pval), x = 0.5, y = 0.5) + 721 | scale_color_manual(values = colors.module[[m]], breaks = unique(st.malignant$orig.ident)) + 722 | ggtitle(m) + 723 | theme_classic() + 724 | theme( 725 | plot.title = element_text(hjust = 0.5), 726 | axis.text.x = element_text(size = 0), 727 | axis.text.y = element_text(angle = 90), 728 | axis.title.y = element_text(size = 0), 729 | axis.title.x = element_text(size = 0)) 730 | } else { 731 | p = ggplot(sub, aes_string(x = 'axis', y = m, color = 'orig.ident')) + 732 | geom_jitter(show.legend = FALSE, height = 0.05, width = 0) + 733 | #geom_point(show.legend = FALSE) + 734 | annotate('text', label = 'NS', x = 0.5, y = 0.5) + 735 | scale_color_manual(values = colors.module[[m]], breaks = unique(st.malignant$orig.ident)) + 736 | ggtitle(m) + 737 | theme_classic() + 738 | theme( 739 | plot.title = element_text(hjust = 0.5), 740 | axis.text.x = element_text(size = 0), 741 | axis.text.y = element_text(angle = 90), 742 | axis.title.y = element_text(size = 0), 743 | axis.title.x = element_text(size = 0)) 744 | } 745 | return(p) 746 | }) 747 | print(CombinePlots(plot.list, ncol = 4)) 748 | 749 | #### Integrating #### 750 | 751 | tryCatch(expr = { 752 | 753 | # Integrating 754 | 755 | options(future.globals.maxSize = 5000*1024^2) 756 | object.list = list('SC' = srt, 'ST' = st) 757 | genes.use = intersect(VariableFeatures(srt), SpatiallyVariableFeatures(st)) 758 | object.list = PrepSCTIntegration(object.list = object.list, 759 | anchor.features = genes.use, 760 | verbose = FALSE) 761 | anchors = FindIntegrationAnchors(object.list = object.list, reference = 1, 762 | normalization.method = 'SCT', 763 | anchor.features = genes.use, 764 | verbose = FALSE) 765 | integrated = IntegrateData(anchorset = anchors, 766 | normalization.method = "SCT", 767 | verbose = FALSE) 768 | integrated = RunPCA(integrated) 769 | integrated = RunUMAP(integrated, dims = 1:10) 770 | integrated = RunTSNE(integrated, dims = 1:10) 771 | 772 | # Plotting 773 | 774 | h = DimPlot(integrated, reduction = 'umap', group.by = c('pop'), label = TRUE, repel = TRUE, cols = colors.pop, pt.size = 2) 775 | print(h) 776 | h = DimPlot(integrated, reduction = 'umap', group.by = c('cat'), label = TRUE, repel = TRUE, cols = colors.cat, pt.size = 2) 777 | print(h) 778 | h = FeaturePlot(integrated, reduction = 'umap', features = 'axis', cols = brewer_pal(palette = 'Purples', direction = 1)(n = 9), pt.size = 2) 779 | print(h) 780 | plot.list = lapply(nnls, function(x){ 781 | h = FeaturePlot(integrated, reduction = 'umap', x) 782 | }) 783 | print(CombinePlots(plot.list)) 784 | plot.list = lapply(nnls_bin, function(x){ 785 | h = DimPlot(integrated, reduction = 'umap', group.by = x, cols = colors.bin) 786 | }) 787 | print(CombinePlots(plot.list)) 788 | plot.list = lapply(pred, function(x){ 789 | h = FeaturePlot(integrated, reduction = 'umap', features = x) 790 | }) 791 | print(CombinePlots(plot.list)) 792 | plot.list = lapply(pred_bin, function(x){ 793 | h = DimPlot(integrated, reduction = 'umap', group.by = x, cols = colors.bin) 794 | }) 795 | print(CombinePlots(plot.list)) 796 | plot.list = lapply(nmf, function(x){ 797 | h = FeaturePlot(integrated, reduction = 'umap', x) 798 | }) 799 | print(CombinePlots(plot.list)) 800 | for (m in ma){ 801 | h = FeaturePlot(integrated, reduction = 'umap', features = m, cols = c('lightgrey',colors.module[m]), split.by = 'technique', pt.size = 2) 802 | print(h) 803 | } 804 | 805 | h = DimPlot(integrated, reduction = 'tsne', group.by = c('pop'), label = TRUE, repel = TRUE, cols = colors.pop, pt.size = 2) 806 | print(h) 807 | h = DimPlot(integrated, reduction = 'tsne', group.by = c('cat'), label = TRUE, repel = TRUE, cols = colors.cat, pt.size = 2) 808 | print(h) 809 | h = FeaturePlot(integrated, reduction = 'tsne', features = 'axis', cols = brewer_pal(palette = 'Purples', direction = 1)(n = 9), pt.size = 2) 810 | print(h) 811 | plot.list = lapply(nnls, function(x){ 812 | h = FeaturePlot(integrated, reduction = 'tsne', x) 813 | }) 814 | print(CombinePlots(plot.list)) 815 | plot.list = lapply(nnls_bin, function(x){ 816 | h = DimPlot(integrated, reduction = 'tsne', group.by = x, cols = colors.bin) 817 | }) 818 | print(CombinePlots(plot.list)) 819 | plot.list = lapply(pred, function(x){ 820 | h = FeaturePlot(integrated, reduction = 'tsne', features = x) 821 | }) 822 | print(CombinePlots(plot.list)) 823 | plot.list = lapply(pred_bin, function(x){ 824 | h = DimPlot(integrated, reduction = 'tsne', group.by = x, cols = colors.bin) 825 | }) 826 | print(CombinePlots(plot.list)) 827 | plot.list = lapply(nmf, function(x){ 828 | h = FeaturePlot(integrated, reduction = 'tsne', x) 829 | }) 830 | print(CombinePlots(plot.list)) 831 | for (m in ma){ 832 | h = FeaturePlot(integrated, reduction = 'tsne', features = m, cols = c('lightgrey',colors.module[m]), split.by = 'technique', pt.size = 2) 833 | print(h) 834 | } 835 | 836 | }, error = function(e){warning(e)}) 837 | 838 | dev.off() 839 | print(fa) 840 | meta = st@meta.data 841 | return(meta) 842 | 843 | }, mc.cores = length(files)) 844 | 845 | names(meta.list) = names(files) 846 | save(meta.list, file = paste0('meta.list.',prox,'.RData')) 847 | 848 | } 849 | 850 | for (prox in c('inverse')){ 851 | 852 | pdf(paste0('Visium.',prox,'.pdf'), height = 7, width = 7) 853 | 854 | load('~/Documents/Analysis/Tumors/parameters.RData') 855 | w = which(names(colors) == 'LIHCHs1') 856 | l = colors[['LIHCHs1']] 857 | colors = c(colors, 'LIHCHs1' = l, 'LIHCHs1A' = l, 'LIHCHs1B' = l) 858 | colors = colors[-w] 859 | nrand = 2 860 | nbin = 10 861 | cats = c('Malignant','Both','Normal') 862 | colors.cat = brewer_pal(palette = 'RdYlGn')(8)[c(2,5,7)] 863 | names(colors.cat) = cats 864 | method = 'score' 865 | pval_thresh = 1 866 | 867 | modules = loadRData('../Tumors/modules.RData') 868 | colors.module = loadRData('../Tumors/colors.module.RData') 869 | types.module = loadRData('../Tumors/types.module.RData') 870 | modules = modules[!names(modules) %in% c('AC','OPC','NPC')] 871 | colors.module = colors.module[names(modules)] 872 | types.module = types.module[names(modules)] 873 | ma = names(modules) 874 | ma_bin = paste0(ma, '_bin') 875 | ma_nei = paste0(ma, '_nei') 876 | ma_dist = paste0(ma, '_dist') 877 | 878 | load(paste0('meta.list.',prox,'.RData')) 879 | meta.list = meta.list[sapply(meta.list, length) > 0] 880 | meta.list = meta.list[!names(meta.list) %in% c('LIHCHs1A','LIHCHs1B')] 881 | 882 | # Make meta, adding NAs 883 | keep = Reduce(union, sapply(meta.list, colnames)) 884 | meta.list = lapply(meta.list, function(meta){ 885 | if (all(keep %in% colnames(meta))){ 886 | return(meta) 887 | } else { 888 | add = matrix(NA, nrow = nrow(meta), ncol = length(setdiff(keep, colnames(meta)))) 889 | colnames(add) = setdiff(keep, colnames(meta)) 890 | rownames(add) = rownames(meta) 891 | meta = cbind(meta, add) 892 | return(meta) 893 | } 894 | }) 895 | meta = Reduce(rbind, lapply(meta.list, function(meta){meta[, keep]})) 896 | meta = meta[!is.na(meta$cat),] 897 | meta$orig.ident = factor(meta$orig.ident, levels = intersect(names(colors), unique(meta$orig.ident))) 898 | meta$cat = factor(meta$cat, levels = cats) 899 | nnls_dist = setdiff(grep('_dist', colnames(meta), value = TRUE), ma_dist) 900 | nnls_nei = grep('_nei', colnames(meta), value = TRUE) 901 | nnls = gsub('_dist','',nnls_dist) 902 | 903 | nnls1 = setdiff(nnls, c('M1', 'M2', 'M1M2')) 904 | nnls1_dist = paste0(nnls1, '_dist') 905 | nnls1_nei = paste0(nnls1, '_nei') 906 | nnls1_bin = paste0(nnls1, '_bin') 907 | 908 | nnls2 = setdiff(nnls, 'Macrophage') 909 | nnls2_dist = paste0(nnls2, '_dist') 910 | nnls2_nei = paste0(nnls2, '_nei') 911 | nnls2_bin = paste0(nnls2, '_bin') 912 | 913 | # Sample composition 914 | 915 | for (o in levels(meta$orig.ident)){ 916 | b = meta[meta$orig.ident == o,] 917 | sp = t(sapply(c('Normal','Both'), function(s){ 918 | a = b[b$cat == s, nnls1_bin] 919 | a[a < 0] = 0 920 | a = colMeans(a, na.rm = TRUE) 921 | a = a[!is.nan(a)] 922 | a = a[!names(a) == 'Malignant_bin'] 923 | a = a/sum(a) 924 | })) 925 | colnames(sp) = gsub('_bin', '', colnames(sp)) 926 | sp = sp[,rev(colnames(sp))] 927 | connectedBarplot(t(rbind(sp['Normal',], sp['Both',])), space = 0.2, 928 | main = o, names.arg = c('',''), axes = FALSE, 929 | las = 2, beside = FALSE, col = colors.pop[colnames(sp)]) 930 | } 931 | 932 | 933 | comp.list = lapply(levels(meta$orig.ident), function(o){ 934 | b = meta[meta$orig.ident == o,] 935 | sp = t(sapply(c('Normal','Both'), function(s){ 936 | a = b[b$cat == s, nnls1_bin] 937 | a[a < 0] = 0 938 | a = colMeans(a, na.rm = TRUE) 939 | a = a[!is.nan(a)] 940 | a = a[!names(a) == 'Malignant_bin'] 941 | a = a/sum(a) 942 | })) 943 | colnames(sp) = gsub('_bin', '', colnames(sp)) 944 | return(sp) 945 | }) 946 | names(comp.list) = levels(meta$orig.ident) 947 | comp = melt(comp.list) 948 | names(comp) = c('cat','ct','value','orig.ident') 949 | p.list = lapply(levels(comp$ct), function(ct){ 950 | sub = comp[comp$ct == ct,] 951 | sub = sub[order(sub$cat), ] 952 | p = ggpaired(sub, x = 'cat', y = 'value', 953 | color = 'cat', line.color = 'grey', line.size = 0.4) + 954 | stat_compare_means(paired = TRUE) + 955 | scale_color_manual(values = colors.cat) + 956 | ggtitle(ct) + 957 | theme_classic() + 958 | theme( 959 | plot.title = element_text(hjust = 0.5), 960 | axis.text.x = element_text(size = 10), 961 | axis.title.x = element_text(size = 0), 962 | axis.title.y = element_text(size = 0), 963 | legend.position = 'none') 964 | }) 965 | print(CombinePlots(p.list), ncol = 4) 966 | 967 | comp.list = lapply(levels(meta$orig.ident), function(o){ 968 | b = meta[meta$orig.ident == o,] 969 | sp = t(sapply(c('Normal','Both'), function(s){ 970 | a = b[b$cat == s, nnls1_bin] 971 | a[a < 0] = 0 972 | a = colMeans(a, na.rm = TRUE) 973 | a = a[!is.nan(a)] 974 | a = a[!names(a) == 'Malignant_bin'] 975 | a = a/sum(a) 976 | })) 977 | colnames(sp) = gsub('_bin', '', colnames(sp)) 978 | log2(sp['Both',]/sp['Normal',]) 979 | }) 980 | keep = Reduce(union, sapply(comp.list, names)) 981 | comp.list = lapply(comp.list, function(comp){ 982 | if (all(keep %in% names(comp))){ 983 | return(comp) 984 | } else { 985 | add = matrix(NA, nrow = 1, ncol = length(setdiff(keep, names(comp)))) 986 | names(add) = setdiff(keep, names(comp)) 987 | comp = c(comp, add) 988 | return(comp) 989 | } 990 | }) 991 | comp = Reduce(rbind, lapply(comp.list, function(comp){comp[keep]})) 992 | comp[is.infinite(comp)] = 5 993 | rownames(comp) = levels(meta$orig.ident) 994 | barplot(comp, beside = TRUE, col = colors[rownames(comp)], las = 2) 995 | 996 | # Modules by distance 997 | 998 | val = Reduce(rbind,lapply(ma, function(m){ 999 | Reduce(rbind,lapply(nnls1, function(ct){ 1000 | res = sapply(levels(meta$orig.ident), function(o){ 1001 | val = NA 1002 | tryCatch(expr = { 1003 | ct_dist = paste0(ct, '_dist') 1004 | sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',] 1005 | model = lm(sub[,m] ~ sub[,ct_dist]) 1006 | est = summary(model)$coefficients[2,1] 1007 | pval = -log10(summary(model)$coefficients[2,4]) 1008 | val = pval*sign(est) 1009 | return(val) 1010 | }, error = function(e){return(NA)}) 1011 | }) 1012 | #res[abs(res)= 0.5) & (abs(val$med) >= 0.75) 1025 | val$med[val$med > 4] = 4 1026 | val$med[val$med < -1] = -1 1027 | p = ggplot(val, aes(x=ct,y=module)) + 1028 | geom_point(shape=21,aes(size=amp,fill=med,colour=highlight)) + 1029 | scale_fill_gradient2(low = brewer_pal(palette = 'RdBu', direction = -1)(n = 3)[1], 1030 | mid = brewer_pal(palette = 'RdBu', direction = -1)(n = 3)[2], 1031 | high = brewer_pal(palette = 'RdBu', direction = -1)(n = 3)[3]) + 1032 | scale_colour_manual(breaks=c('FALSE','TRUE'),values=c('white','black')) + 1033 | theme_bw() + 1034 | theme(panel.grid.minor = element_blank(), 1035 | panel.grid.major = element_blank(), 1036 | axis.text = element_text(size=14, colour = "black"), 1037 | axis.text.y = element_text(size=12, colour = "black"), 1038 | axis.text.x = element_text(size=12, colour = "black", angle = 90, hjust = 1), 1039 | axis.title=element_blank(), 1040 | panel.border = element_rect(size = 0.7, linetype = "solid", colour = "black")) 1041 | print(p) 1042 | 1043 | for (ct in nnls){ 1044 | val = Reduce(rbind,lapply(ma, function(m){ 1045 | res = sapply(levels(meta$orig.ident), function(o){ 1046 | val = NA 1047 | tryCatch(expr = { 1048 | ct_dist = paste0(ct, '_dist') 1049 | sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',] 1050 | model = lm(sub[,m] ~ sub[,ct_dist]) 1051 | est = summary(model)$coefficients[2,1] 1052 | pval = -log10(summary(model)$coefficients[2,4]) 1053 | pval[pval > 10] = 10 1054 | val = pval*sign(est) 1055 | return(val) 1056 | }, error = function(e){return(NA)}) 1057 | }) 1058 | return(data.frame('orig.ident' = levels(meta$orig.ident), 'module' = m, 'res' = res)) 1059 | })) 1060 | val$module = factor(val$module, levels = ma) 1061 | val$orig.ident = factor(val$orig.ident) 1062 | p = ggplot(val, aes(x=module,y=res)) + 1063 | geom_boxplot(coef = 0, outlier.shape = NA) + 1064 | #geom_violin() + 1065 | geom_jitter(aes(colour = orig.ident, size = 2), width = 0.25) + 1066 | geom_hline(yintercept = log10(0.05), linetype = 2) + 1067 | geom_hline(yintercept = log10(0.05), linetype = 2) + 1068 | scale_colour_manual(values = colors, breaks = names(colors)) + 1069 | ggtitle(ct) + 1070 | theme_bw() + 1071 | theme(panel.grid.minor = element_blank(), 1072 | panel.grid.major = element_blank(), 1073 | axis.text = element_text(size=14, colour = "black"), 1074 | axis.text.y = element_text(size=12, colour = "black"), 1075 | axis.text.x = element_text(size=12, colour = "black", angle = 90, hjust = 1), 1076 | axis.title=element_blank()) + NoLegend() 1077 | print(p) 1078 | } 1079 | 1080 | for (m in ma){ 1081 | val = Reduce(rbind,lapply(nnls1, function(ct){ 1082 | res = sapply(levels(meta$orig.ident), function(o){ 1083 | val = NA 1084 | tryCatch(expr = { 1085 | ct_dist = paste0(ct, '_dist') 1086 | sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',] 1087 | model = lm(sub[,m] ~ sub[,ct_dist]) 1088 | est = summary(model)$coefficients[2,1] 1089 | pval = -log10(summary(model)$coefficients[2,4]) 1090 | pval[pval > 10] = 10 1091 | val = pval*sign(est) 1092 | return(val) 1093 | }, error = function(e){return(NA)}) 1094 | }) 1095 | return(data.frame('orig.ident' = levels(meta$orig.ident), 'ct' = ct, 'res' = res)) 1096 | })) 1097 | val$ct = factor(val$ct, levels = nnls) 1098 | val$orig.ident = factor(val$orig.ident) 1099 | p = ggplot(val, aes(x=ct,y=res)) + 1100 | geom_boxplot(coef = 0, outlier.shape = NA) + 1101 | #geom_violin() + 1102 | geom_jitter(aes(colour = orig.ident, size = 2), width = 0.25) + 1103 | geom_hline(yintercept = log10(0.05), linetype = 2) + 1104 | geom_hline(yintercept = -log10(0.05), linetype = 2) + 1105 | scale_colour_manual(values = colors, breaks = names(colors)) + 1106 | ggtitle(m) + 1107 | theme_bw() + 1108 | theme(panel.grid.minor = element_blank(), 1109 | panel.grid.major = element_blank(), 1110 | axis.text = element_text(size=14, colour = "black"), 1111 | axis.text.y = element_text(size=12, colour = "black"), 1112 | axis.text.x = element_text(size=12, colour = "black", angle = 90, hjust = 1), 1113 | axis.title=element_blank()) + NoLegend() 1114 | print(p) 1115 | } 1116 | 1117 | for (ct in nnls){ 1118 | for (m in ma){ 1119 | 1120 | ct_nei = paste0(ct, '_nei') 1121 | ct_dist = paste0(ct, '_dist') 1122 | 1123 | plot.list = lapply(levels(meta$orig.ident), function(o){ 1124 | p = NULL 1125 | tryCatch(expr = { 1126 | sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',] 1127 | model = lm(sub[,m] ~ sub[,ct_dist]) 1128 | est = round(summary(model)$coefficients[2,1], digits = 1) 1129 | pval = round(-log10(summary(model)$coefficients[2,4]), digits = 1) 1130 | if (pval > pval_thresh){ 1131 | p = ggplot(sub, aes_string(x = ct_dist, y = m, color = 'orig.ident')) + 1132 | geom_jitter(show.legend = FALSE, height = 0.05, width = 0) + 1133 | #geom_point(show.legend = FALSE) + 1134 | geom_smooth(method='lm', show.legend = FALSE, 1135 | fullrange = TRUE) + 1136 | annotate('text', label = paste('pval', pval), x = 0.05, y = 0.05) + 1137 | scale_color_manual(values = colors) + 1138 | ggtitle(o) + 1139 | theme_classic() + 1140 | theme( 1141 | plot.title = element_text(hjust = 0.5), 1142 | axis.text.x = element_text(size = 0), 1143 | axis.text.y = element_text(angle = 90), 1144 | axis.title.y = element_text(), 1145 | axis.title.x = element_text()) 1146 | } else { 1147 | p = ggplot(sub, aes_string(x = ct_dist, y = m, color = 'orig.ident')) + 1148 | geom_jitter(show.legend = FALSE, height = 0.05, width = 0) + 1149 | #geom_point(show.legend = FALSE) + 1150 | annotate('text', label = 'NS', x = 0.05, y = 0.05) + 1151 | scale_color_manual(values = colors) + 1152 | ggtitle(o) + 1153 | theme_classic() + 1154 | theme( 1155 | plot.title = element_text(hjust = 0.5), 1156 | axis.text.x = element_text(size = 0), 1157 | axis.text.y = element_text(angle = 90), 1158 | axis.title.y = element_text(), 1159 | axis.title.x = element_text()) 1160 | } 1161 | }, error = function(e){c()}) 1162 | return(p) 1163 | }) 1164 | print(CombinePlots(plot.list, ncol = 3)) 1165 | } 1166 | } 1167 | 1168 | # Modules by neighborhood 1169 | 1170 | val = Reduce(rbind,lapply(ma, function(m){ 1171 | Reduce(rbind,lapply(nnls1, function(ct){ 1172 | res = sapply(levels(meta$orig.ident), function(o){ 1173 | val = NA 1174 | tryCatch(expr = { 1175 | ct_nei = paste0(ct, '_nei') 1176 | sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',] 1177 | model = lm(sub[,m] ~ sub[,ct_nei]) 1178 | est = summary(model)$coefficients[2,1] 1179 | pval = -log10(summary(model)$coefficients[2,4]) 1180 | val = pval*sign(est) 1181 | return(val) 1182 | }, error = function(e){return(NA)}) 1183 | }) 1184 | #res[abs(res)= 0.5) & (abs(val$med) >= 0.75) 1197 | val$med[val$med > 4] = 4 1198 | val$med[val$med < -1] = -1 1199 | p = ggplot(val, aes(x=ct,y=module)) + 1200 | geom_point(shape=21,aes(size=amp,fill=med,colour=highlight)) + 1201 | scale_fill_gradient2(low = brewer_pal(palette = 'RdBu', direction = -1)(n = 3)[1], 1202 | mid = brewer_pal(palette = 'RdBu', direction = -1)(n = 3)[2], 1203 | high = brewer_pal(palette = 'RdBu', direction = -1)(n = 3)[3]) + 1204 | scale_colour_manual(breaks=c('FALSE','TRUE'),values=c('white','black')) + 1205 | theme_bw() + 1206 | theme(panel.grid.minor = element_blank(), 1207 | panel.grid.major = element_blank(), 1208 | axis.text = element_text(size=14, colour = "black"), 1209 | axis.text.y = element_text(size=12, colour = "black"), 1210 | axis.text.x = element_text(size=12, colour = "black", angle = 90, hjust = 1), 1211 | axis.title=element_blank(), 1212 | panel.border = element_rect(size = 0.7, linetype = "solid", colour = "black")) 1213 | print(p) 1214 | 1215 | 1216 | for (ct in nnls){ 1217 | val = Reduce(rbind,lapply(ma, function(m){ 1218 | res = sapply(levels(meta$orig.ident), function(o){ 1219 | val = NA 1220 | tryCatch(expr = { 1221 | ct_nei = paste0(ct, '_nei') 1222 | sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',] 1223 | model = lm(sub[,m] ~ sub[,ct_nei]) 1224 | est = summary(model)$coefficients[2,1] 1225 | pval = -log10(summary(model)$coefficients[2,4]) 1226 | pval[pval > 10] = 10 1227 | val = pval*sign(est) 1228 | return(val) 1229 | }, error = function(e){return(NA)}) 1230 | }) 1231 | return(data.frame('orig.ident' = levels(meta$orig.ident), 'module' = m, 'res' = res)) 1232 | })) 1233 | val$module = factor(val$module, levels = ma) 1234 | val$orig.ident = factor(val$orig.ident) 1235 | p = ggplot(val, aes(x=module,y=res)) + 1236 | geom_boxplot(coef = 0, outlier.shape = NA) + 1237 | #geom_violin() + 1238 | geom_jitter(aes(colour = orig.ident, size = 2), width = 0.25) + 1239 | geom_hline(yintercept = log10(0.05), linetype = 2) + 1240 | geom_hline(yintercept = -log10(0.05), linetype = 2) + 1241 | scale_colour_manual(values = colors, breaks = names(colors)) + 1242 | ggtitle(ct) + 1243 | theme_bw() + 1244 | theme(panel.grid.minor = element_blank(), 1245 | panel.grid.major = element_blank(), 1246 | axis.text = element_text(size=14, colour = "black"), 1247 | axis.text.y = element_text(size=12, colour = "black"), 1248 | axis.text.x = element_text(size=12, colour = "black", angle = 90, hjust = 1), 1249 | axis.title=element_blank()) + NoLegend() 1250 | print(p) 1251 | } 1252 | 1253 | for (m in ma){ 1254 | val = Reduce(rbind,lapply(nnls1, function(ct){ 1255 | res = sapply(levels(meta$orig.ident), function(o){ 1256 | val = NA 1257 | tryCatch(expr = { 1258 | ct_nei = paste0(ct, '_nei') 1259 | sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',] 1260 | model = lm(sub[,m] ~ sub[,ct_nei]) 1261 | est = summary(model)$coefficients[2,1] 1262 | pval = -log10(summary(model)$coefficients[2,4]) 1263 | pval[pval > 10] = 10 1264 | val = pval*sign(est) 1265 | return(val) 1266 | }, error = function(e){return(NA)}) 1267 | }) 1268 | return(data.frame('orig.ident' = levels(meta$orig.ident), 'ct' = ct, 'res' = res)) 1269 | })) 1270 | val$ct = factor(val$ct, levels = nnls) 1271 | val$orig.ident = factor(val$orig.ident) 1272 | p = ggplot(val, aes(x=ct,y=res)) + 1273 | geom_boxplot(coef = 0, outlier.shape = NA) + 1274 | #geom_violin() + 1275 | geom_jitter(aes(colour = orig.ident, size = 2), width = 0.25) + 1276 | geom_hline(yintercept = log10(0.05), linetype = 2) + 1277 | geom_hline(yintercept = log10(0.05), linetype = 2) + 1278 | scale_colour_manual(values = colors, breaks = names(colors)) + 1279 | ggtitle(m) + 1280 | theme_bw() + 1281 | theme(panel.grid.minor = element_blank(), 1282 | panel.grid.major = element_blank(), 1283 | axis.text = element_text(size=14, colour = "black"), 1284 | axis.text.y = element_text(size=12, colour = "black"), 1285 | axis.text.x = element_text(size=12, colour = "black", angle = 90, hjust = 1), 1286 | axis.title=element_blank()) + NoLegend() 1287 | print(p) 1288 | } 1289 | 1290 | 1291 | for (ct in nnls){ 1292 | for (m in ma){ 1293 | 1294 | ct_nei = paste0(ct, '_nei') 1295 | ct_dist = paste0(ct, '_dist') 1296 | 1297 | plot.list = lapply(levels(meta$orig.ident), function(o){ 1298 | tryCatch(expr = { 1299 | p = NULL 1300 | sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',] 1301 | model = lm(sub[,m] ~ sub[,ct_nei]) 1302 | est = round(summary(model)$coefficients[2,1], digits = 1) 1303 | pval = round(-log10(summary(model)$coefficients[2,4]), digits = 1) 1304 | if (pval > pval_thresh){ 1305 | p = ggplot(sub, aes_string(x = ct_nei, y = m, color = 'orig.ident')) + 1306 | geom_jitter(show.legend = FALSE, height = 0.05, width = 0) + 1307 | #geom_point(show.legend = FALSE) + 1308 | geom_smooth(method='lm', show.legend = FALSE, 1309 | fullrange = TRUE) + 1310 | annotate('text', label = paste('pval', pval), x = 0.05, y = 0.05) + 1311 | scale_color_manual(values = colors) + 1312 | ggtitle(o) + 1313 | theme_classic() + 1314 | theme( 1315 | plot.title = element_text(hjust = 0.5), 1316 | axis.text.x = element_text(size = 0), 1317 | axis.text.y = element_text(angle = 90), 1318 | axis.title.y = element_text(), 1319 | axis.title.x = element_text()) 1320 | } else { 1321 | p = ggplot(sub, aes_string(x = ct_nei, y = m, color = 'orig.ident')) + 1322 | geom_jitter(show.legend = FALSE, height = 0.05, width = 0) + 1323 | #geom_point(show.legend = FALSE) + 1324 | annotate('text', label = 'NS', x = 0.05, y = 0.05) + 1325 | scale_color_manual(values = colors) + 1326 | ggtitle(o) + 1327 | theme_classic() + 1328 | theme( 1329 | plot.title = element_text(hjust = 0.5), 1330 | axis.text.x = element_text(size = 0), 1331 | axis.text.y = element_text(angle = 90), 1332 | axis.title.y = element_text(), 1333 | axis.title.x = element_text()) 1334 | } 1335 | }, error = function(e){c()}) 1336 | return(p) 1337 | }) 1338 | print(CombinePlots(plot.list, ncol = 3)) 1339 | } 1340 | } 1341 | 1342 | # M1 vs M2 distances 1343 | 1344 | plot.list = lapply(levels(meta$orig.ident), function(o){ 1345 | sub = meta[meta$orig.ident == o,] 1346 | p = ggplot(sub, aes_string(x = 'M1_dist', y = 'M2_dist', fill = 'cat', col = 'cat')) + 1347 | geom_point() + 1348 | geom_smooth(method = 'lm') + 1349 | scale_fill_manual(values = colors.cat, breaks = cats) + 1350 | scale_color_manual(values = colors.cat, breaks = cats) + 1351 | ggtitle(o) + 1352 | theme_classic() + 1353 | theme( 1354 | plot.title = element_text(hjust = 0.5), 1355 | axis.text.y = element_text(angle = 90), 1356 | axis.title.y = element_text(), 1357 | axis.title.x = element_text()) 1358 | }) 1359 | print(CombinePlots(plot.list, ncol = 3)) 1360 | 1361 | sub = meta[,c('orig.ident','cat','M1M2_dist')] 1362 | sub = melt(sub, id.vars = c('orig.ident','cat'), measure.vars = 'M1M2_dist') 1363 | p.values <- sapply(split(sub, sub$orig.ident), function(x){wilcox.test(value~cat, x, subset = cat %in% c('Malignant','Normal'))$p.value}) 1364 | labels <- symnum(p.values, corr = FALSE, cutpoints = c(0, .001,.01,.05, 1), symbols = c("***","**","*","n.s.")) 1365 | y.values <- sapply(split(sub, sub$orig.ident), function(x){max(sapply(split(x, x$cat), function(xx){boxplot(xx$value, plot=F)$stats[4, ]}), na.rm = TRUE)}+0.5) 1366 | p = ggplot(sub, aes_string(x = 'orig.ident', y = 'value', fill = 'cat')) + 1367 | geom_boxplot(coef = 0, outlier.size = 0, outlier.shape = NA, varwidth = FALSE, show.legend = FALSE) + 1368 | geom_signif(annotations = labels, xmin = (1:nlevels(sub$orig.ident))-0.2, xmax = (1:nlevels(sub$orig.ident))+0.2, y_position = y.values, tip_length = 0) + 1369 | scale_fill_manual(values = colors.cat, breaks = names(colors.cat)) + 1370 | ggtitle('') + 1371 | ylim(-2,4) + 1372 | theme_classic() + 1373 | theme( 1374 | plot.title = element_text(hjust = 0.5), 1375 | axis.text.y = element_text(angle = 90), 1376 | axis.text.x = element_text(angle = 90), 1377 | axis.title.y = element_text(size = 0), 1378 | axis.title.x = element_text(size = 0)) 1379 | print(p) 1380 | 1381 | 1382 | # M1 vs M2 neighborhood 1383 | 1384 | plot.list = lapply(levels(meta$orig.ident), function(o){ 1385 | sub = meta[meta$orig.ident == o,] 1386 | p = ggplot(sub, aes_string(x = 'M1_nei', y = 'M2_nei', fill = 'cat', col = 'cat')) + 1387 | geom_point() + 1388 | geom_smooth(method = 'lm') + 1389 | scale_fill_manual(values = colors.cat, breaks = names(colors.cat)) + 1390 | scale_color_manual(values = colors.cat, breaks = names(colors.cat)) + 1391 | ggtitle(o) + 1392 | theme_classic() + 1393 | theme( 1394 | plot.title = element_text(hjust = 0.5), 1395 | axis.text.y = element_text(angle = 90), 1396 | axis.title.y = element_text(), 1397 | axis.title.x = element_text()) 1398 | }) 1399 | print(CombinePlots(plot.list, ncol = 3)) 1400 | 1401 | sub = meta[,c('orig.ident','cat','M1M2_nei')] 1402 | sub = melt(sub, id.vars = c('orig.ident','cat'), measure.vars = 'M1M2_nei') 1403 | p.values <- sapply(split(sub, sub$orig.ident), function(x){wilcox.test(value~cat, x, subset = cat %in% c('Malignant','Normal'))$p.value}) 1404 | labels <- symnum(p.values, corr = FALSE, cutpoints = c(0, .001,.01,.05, 1), symbols = c("***","**","*","n.s.")) 1405 | y.values <- sapply(split(sub, sub$orig.ident), function(x){max(sapply(split(x, x$cat), function(xx){boxplot(xx$value, plot=F)$stats[4, ]}), na.rm = TRUE)}+0.05) 1406 | p = ggplot(sub, aes_string(x = 'orig.ident', y = 'value', fill = 'cat')) + 1407 | geom_boxplot(coef = 0, outlier.size = 0, outlier.shape = NA, varwidth = FALSE, show.legend = FALSE) + 1408 | geom_signif(annotations = labels, xmin = (1:nlevels(sub$orig.ident))-0.2, xmax = (1:nlevels(sub$orig.ident))+0.2, y_position = y.values, tip_length = 0) + 1409 | scale_fill_manual(values = colors.cat, breaks = names(colors.cat)) + 1410 | ggtitle('') + 1411 | ylim(-0.2,0.4) + 1412 | theme_classic() + 1413 | theme( 1414 | plot.title = element_text(hjust = 0.5), 1415 | axis.text.y = element_text(angle = 90), 1416 | axis.text.x = element_text(angle = 90), 1417 | axis.title.y = element_text(size = 0), 1418 | axis.title.x = element_text(size = 0)) 1419 | print(p) 1420 | 1421 | # M1 vs M2 1422 | 1423 | plot.list = lapply(levels(meta$orig.ident), function(o){ 1424 | sub = meta[meta$orig.ident == o,] 1425 | p = ggplot(sub, aes_string(x = 'M1', y = 'M2', fill = 'cat', col = 'cat')) + 1426 | geom_point() + 1427 | geom_smooth(method = 'lm') + 1428 | scale_fill_manual(values = colors.cat, breaks = names(colors.cat)) + 1429 | scale_color_manual(values = colors.cat, breaks = names(colors.cat)) + 1430 | ggtitle(o) + 1431 | theme_classic() + 1432 | theme( 1433 | plot.title = element_text(hjust = 0.5), 1434 | axis.text.y = element_text(angle = 90), 1435 | axis.title.y = element_text(), 1436 | axis.title.x = element_text()) 1437 | }) 1438 | print(CombinePlots(plot.list, ncol = 3)) 1439 | 1440 | sub = meta[,c('orig.ident','cat','M1M2')] 1441 | sub = sub[sub$cat %in% c('Both','Normal') & !is.na(sub$M1M2),] 1442 | sub = melt(sub, id.vars = c('orig.ident','cat'), measure.vars = 'M1M2') 1443 | sub$cat = factor(as.character(sub$cat), levels = c('Normal','Both')) 1444 | p.values <- sapply(split(sub, sub$orig.ident), function(x){wilcox.test(value~cat, x, subset = cat %in% c('Normal','Both'))$p.value}) 1445 | labels <- symnum(p.values, corr = FALSE, cutpoints = c(0, .001,.01,.05, 1), symbols = c("***","**","*","n.s.")) 1446 | y.values <- sapply(split(sub, sub$orig.ident), function(x){max(sapply(split(x, x$cat), function(xx){boxplot(xx$value, plot=F)$stats[4, ]}), na.rm = TRUE)}+0.05) 1447 | p = ggplot(sub, aes_string(x = 'orig.ident', y = 'value', fill = 'cat')) + 1448 | geom_boxplot(coef = 0, outlier.size = 0, outlier.shape = NA, varwidth = FALSE, show.legend = FALSE) + 1449 | geom_signif(annotations = labels, xmin = (1:nlevels(sub$orig.ident))-0.2, xmax = (1:nlevels(sub$orig.ident))+0.2, y_position = y.values, tip_length = 0) + 1450 | scale_fill_manual(values = colors.cat, breaks = names(colors.cat)) + 1451 | ggtitle('') + 1452 | ylim(-0.2,0.4) + 1453 | theme_classic() + 1454 | theme( 1455 | plot.title = element_text(hjust = 0.5), 1456 | axis.text.y = element_text(angle = 90), 1457 | axis.text.x = element_text(angle = 90), 1458 | axis.title.y = element_text(size = 0), 1459 | axis.title.x = element_text(size = 0)) 1460 | print(p) 1461 | 1462 | dev.off() 1463 | 1464 | } 1465 | -------------------------------------------------------------------------------- /experiments.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | source('~/Documents/R/seurat_functions.R') 3 | setwd('~/Documents/Analysis/Experiments/') 4 | pdf('Experiments.pdf', height = 20, width = 20) 5 | 6 | nrand = 3 7 | nbin = 25 8 | range = 8:12 9 | gmin = 5 10 | pval_thresh = 0.1 11 | 12 | dir.list = c('Adaptive/RagWT', 13 | 'Adaptive/RagKO', 14 | 'PancreasPeritoneum/Pancreas', 15 | 'PancreasPeritoneum/Peritoneum', 16 | 'LiverPeritoneum/Peritoneum', 17 | 'LiverPeritoneum/Liver') 18 | 19 | col_ec = c(brewer.pal(name = 'Purples', n = 3)[3], brewer.pal(name = 'Blues', n = 3)[3],brewer.pal(name = 'RdYlGn', n = 11)[c(1,4,7,10)]) 20 | names(col_ec) = c('Adaptive-RagWT','Adaptive-RagKO', 21 | 'PancreasPeritoneum-Pancreas','PancreasPeritoneum-Peritoneum', 22 | 'LiverPeritoneum-Peritoneum','LiverPeritoneum-Liver') 23 | 24 | col_experiment = col_ec[c(1,3,5)] 25 | names(col_experiment) = c('Adaptive','PancreasPeritoneum','LiverPeritoneum') 26 | 27 | col_eci = c(rep(col_ec[1], 4), rep(col_ec[2], 4), rep(col_ec[3], 2), rep(col_ec[4], 2), rep(col_ec[5], 2), rep(col_ec[6], 2)) 28 | names(col_eci) = c("Adaptive-RagWT-F1", "Adaptive-RagWT-F2", 29 | "Adaptive-RagWT-M1", "Adaptive-RagWT-M2", 30 | "Adaptive-RagKO-F1", "Adaptive-RagKO-F2", 31 | "Adaptive-RagKO-M1", "Adaptive-RagKO-M2", 32 | "PancreasPeritoneum-Pancreas-1", "PancreasPeritoneum-Pancreas-2", 33 | "PancreasPeritoneum-Peritoneum-1", "PancreasPeritoneum-Peritoneum-2", 34 | "LiverPeritoneum-Peritoneum-3", "LiverPeritoneum-Peritoneum-4", 35 | "LiverPeritoneum-Liver-5", "LiverPeritoneum-Liver-6" 36 | ) 37 | 38 | #### Part1 #### 39 | 40 | srt.list = lapply(dir.list, function(dir){ 41 | srt = loadRData(paste('~/Documents/Experiments', dir, 'srt.RData', sep = '/')) 42 | return(srt) 43 | }) 44 | save(srt.list, file = 'srt.list.RData') 45 | srt = Reduce(merge2, srt.list) 46 | srt = srt[,!is.na(srt$condition)] 47 | srt = srt[,!is.na(srt$experiment)] 48 | 49 | srt$eci = factor(srt$eci, levels = c( 50 | "Adaptive-RagWT-F1", "Adaptive-RagWT-F2", 51 | "Adaptive-RagWT-M1", "Adaptive-RagWT-M2", 52 | "Adaptive-RagKO-F1", "Adaptive-RagKO-F2", 53 | "Adaptive-RagKO-M1", "Adaptive-RagKO-M2", 54 | "PancreasPeritoneum-Pancreas-1", "PancreasPeritoneum-Pancreas-2", 55 | "PancreasPeritoneum-Peritoneum-1", "PancreasPeritoneum-Peritoneum-2", 56 | "LiverPeritoneum-Peritoneum-3", "LiverPeritoneum-Peritoneum-4", 57 | "LiverPeritoneum-Liver-5", "LiverPeritoneum-Liver-6")) 58 | srt$ec = factor(srt$ec, levels = c( 59 | 'Adaptive-RagWT','Adaptive-RagKO', 60 | 'PancreasPeritoneum-Pancreas','PancreasPeritoneum-Peritoneum', 61 | 'LiverPeritoneum-Peritoneum','LiverPeritoneum-Liver')) 62 | srt$experiment = factor(srt$experiment, levels = c( 63 | 'Adaptive','PancreasPeritoneum','LiverPeritoneum')) 64 | 65 | srt = RunPCA(srt) 66 | srt = RunUMAP(srt, dims = 1:10) 67 | 68 | srt = FindNeighbors(srt) 69 | srt = FindClusters(srt) 70 | srt$cluster = Idents(srt) 71 | col_cluster = hue_pal()(nlevels(srt$cluster)) 72 | names(col_cluster) = levels(srt$cluster) 73 | markers = FindAllMarkers2(srt, label = FALSE) 74 | write.table(markers$genes_10, file = 'markers.csv', sep = ',', col.names = TRUE, row.names = FALSE, quote = FALSE) 75 | 76 | save(srt, file = 'srt.RData') 77 | 78 | #### Part 2 #### 79 | 80 | load('srt.RData') 81 | col_cluster = hue_pal()(nlevels(srt$cluster)) 82 | names(col_cluster) = levels(srt$cluster) 83 | 84 | # Cell types 85 | srt$singler = 'Malignant' 86 | srt$singler[srt$cluster %in% c(7,8)] = 'Macrophage' 87 | srt$singler[srt$cluster %in% c(12)] = 'DC' 88 | srt$singler[srt$cluster %in% c(15)] = 'TCell' 89 | srt$singler[srt$cluster %in% c(17)] = 'Endothelial' 90 | srt$singler[srt$cluster %in% c(18)] = 'Fibroblast' 91 | srt$singler[srt$cluster %in% c(19)] = 'BCell' 92 | srt$singler[srt$cluster %in% c(13)] = 'Neutrophil' 93 | srt$singler = factor(srt$singler) 94 | col_singler = c(brewer.pal(8, 'Dark2'), brewer.pal(8, 'Accent'))[1:nlevels(srt$singler)] 95 | names(col_singler) = levels(srt$singler) 96 | # Finding cells 97 | cluster.malignant = unique(as.character(srt$cluster)[srt$singler == 'Malignant']) 98 | cluster.normal = setdiff(levels(srt$cluster), cluster.malignant) 99 | cells.malignant = WhichCells(srt, expression = cluster %in% cluster.malignant) 100 | cells.normal = WhichCells(srt, expression = cluster %in% cluster.normal) 101 | cells.normal = setdiff(cells.normal, cells.malignant) 102 | # Filtering 103 | test = c('Ccl5','Ptprc','Apoe','Tyrobp','Lyz2') 104 | data = GetAssayData(srt, assay = 'RNA', slot = 'counts') 105 | for (g in test){ 106 | cells.malignant = cells.malignant[data[g, cells.malignant] < 1] 107 | } 108 | # Assigning 109 | srt$type = 'undetermined' 110 | srt@meta.data[cells.malignant, 'type'] = 'malignant' 111 | srt@meta.data[cells.normal, 'type'] = 'normal' 112 | srt$type = factor(srt$type) 113 | print(table(srt$type)) 114 | print(table(srt$singler, srt$cluster, srt$type)) 115 | col_type = c(brewer_pal(palette = 'Blues')(5)[4], brewer_pal(palette = 'Greens')(5)[4], 'grey')[1:nlevels(srt$type)] 116 | names(col_type) = levels(srt$type) 117 | # Pop 118 | srt$pop = as.character(srt$type) 119 | srt$pop[srt$type == 'normal'] = as.character(srt$singler[srt$type == 'normal']) 120 | srt$pop = gsub('malignant','Malignant',srt$pop) 121 | srt$pop = factor(srt$pop) 122 | srt$pop = relevel(srt$pop, ref = 'Malignant') 123 | col_pop = col_singler[intersect(levels(srt$pop),names(col_singler))] 124 | col_pop = c(col_pop, 'Malignant' = 'grey20') 125 | markers = FindAllMarkers2(srt, group.by = 'pop', label = FALSE) 126 | write.table(markers$genes_10, file = 'markers_singler.csv', sep = ',', col.names = TRUE, row.names = FALSE, quote = FALSE) 127 | 128 | # Plotting 129 | o = sample(1:ncol(srt), size = ncol(srt), replace = FALSE) 130 | h = DimPlot(srt, pt.size = 2, group.by = 'type', cols = col_type, order = o) 131 | print(h) 132 | h = DimPlot(srt, pt.size = 2, group.by = 'cluster', cols = col_cluster, label = TRUE, order = o) 133 | print(h) 134 | h = DimPlot(srt, pt.size = 2, group.by = 'singler', cols = col_singler, order = o) 135 | print(h) 136 | h = DimPlot(srt, pt.size = 2, group.by = 'pop', cols = col_pop, order = o) 137 | print(h) 138 | h = DimPlot(srt, pt.size = 2, group.by = 'ec', cols = col_ec, order = o) 139 | print(h) 140 | h = DimPlot(srt, pt.size = 2, group.by = 'experiment', cols = col_experiment, order = o) 141 | print(h) 142 | 143 | o = sample(1:ncol(srt), size = ncol(srt), replace = FALSE) 144 | h = DimPlot(srt, pt.size = 1, group.by = 'type', cols = col_type, order = o, split.by = 'ec') 145 | print(h) 146 | h = DimPlot(srt, pt.size = 1, group.by = 'cluster', cols = col_cluster, label = TRUE, order = o, split.by = 'ec') 147 | print(h) 148 | h = DimPlot(srt, pt.size = 1, group.by = 'singler', cols = col_singler, order = o, split.by = 'ec') 149 | print(h) 150 | h = DimPlot(srt, pt.size = 1, group.by = 'pop', cols = col_pop, order = o, split.by = 'ec') 151 | print(h) 152 | h = DimPlot(srt, pt.size = 1, group.by = 'ec', cols = col_ec, order = o, split.by = 'ec') 153 | print(h) 154 | h = DimPlot(srt, pt.size = 1, group.by = 'experiment', cols = col_experiment, order = o, split.by = 'ec') 155 | print(h) 156 | 157 | tme = table(srt$eci, srt$pop) 158 | tme = tme[c( 159 | "Adaptive-RagWT-F1", "Adaptive-RagWT-F2", 160 | "Adaptive-RagWT-M1", "Adaptive-RagWT-M2", 161 | "Adaptive-RagKO-F1", "Adaptive-RagKO-F2", 162 | "Adaptive-RagKO-M1", "Adaptive-RagKO-M2", 163 | "PancreasPeritoneum-Pancreas-1", "PancreasPeritoneum-Pancreas-2", 164 | "PancreasPeritoneum-Peritoneum-1", "PancreasPeritoneum-Peritoneum-2", 165 | "LiverPeritoneum-Peritoneum-3", "LiverPeritoneum-Peritoneum-4", 166 | "LiverPeritoneum-Liver-5", "LiverPeritoneum-Liver-6"),] 167 | exclude = c('undetermined') 168 | tme = tme[, setdiff(colnames(tme), exclude)] 169 | tme = tme/rowSums(tme) 170 | 171 | barplot(tme, beside = TRUE, col = col_eci[rownames(tme)]) 172 | barplot(t(tme), beside = FALSE, col = col_singler[colnames(tme)], las = 2) 173 | 174 | # Splitting 175 | save(srt, file = 'srt.RData') 176 | srt.malignant = srt[, srt$type == 'malignant'] 177 | save(srt.malignant, file = 'srt.malignant.RData') 178 | srt.normal = srt[, srt$type == 'normal'] 179 | save(srt.normal, file = 'srt.normal.RData') 180 | 181 | #### NMF #### 182 | # 183 | # srt.malignant = loadRData('srt.malignant.RData') 184 | # srt.malignant = srt.malignant[, srt.malignant$condition == 'RagWT'] 185 | # srt.malignant = SCTransform(srt.malignant, return.only.var.genes = FALSE) 186 | # srt.malignant = RunPCA(srt.malignant) 187 | # srt.malignant = RunUMAP(srt.malignant, dims = 1:10) 188 | # 189 | # data = as.matrix(GetAssayData(srt.malignant, slot = 'scale.data')) 190 | # data = data[VariableFeatures(srt.malignant),] 191 | # data[data < 0] = 0 192 | # data = data[apply(data, 1, var) > 0, ] 193 | # print(dim(data)) 194 | # 195 | # res.list = mclapply(range, function(r){ 196 | # nmf(data, rank = r, nrun = 1, seed = 'ica', method = 'nsNMF') 197 | # }, mc.cores = ncores) 198 | # names(res.list) = range 199 | # modules.list = lapply(res.list, NMFToModules, gmin = gmin) 200 | # print(sapply(modules.list,length)) 201 | # r = names(which(sapply(modules.list, length) == as.numeric(names(modules.list)))) 202 | # r = r[length(r)] 203 | # print(r) 204 | # res = res.list[[r]] 205 | # save(res.list, file = 'res.list.RData') 206 | # save(res, file = 'res.RData') 207 | # 208 | # modules = NMFToModules(res, gmin = gmin) 209 | # print(modules) 210 | # modules = c(modules, list('Stress' = sort(Reduce(union, modules[c('m_Egr1','m_Fos','m_Hspb1')])))) 211 | # modules = modules[c('m_Hmgb2','Stress','m_H2_K1', 212 | # 'm_Mt1','m_Rbp4')] 213 | # names(modules) = c('Cycle','Stress','Interferon', 214 | # 'Hypoxia','Glandular') 215 | # ma = names(modules) 216 | # save(modules, file = 'modules.Mm.RData') 217 | 218 | #### Human mouse #### 219 | 220 | srt.malignant = loadRData('srt.malignant.RData') 221 | genes.mm = rownames(GetData(srt.malignant)) 222 | 223 | # mart = useMart("ensembl", dataset = 'mmusculus_gene_ensembl') 224 | # attributes = c("external_gene_name", grep('hsapiens', listAttributes(mart)$name, value = TRUE)) 225 | # map = getBM(attributes = attributes, 226 | # filters = "external_gene_name", values = genes.mm, mart = mart, uniqueRows = TRUE) 227 | # map = dplyr:::filter(map, external_gene_name %in% genes.mm & !duplicated(external_gene_name)) 228 | # conv = data.frame(map[, grep('gene_name', colnames(map), value = TRUE)[c(1,2)]], stringsAsFactors = FALSE) 229 | # colnames(conv) = c('mm','hs') 230 | # conv = conv[!(conv$hs == '' | duplicated(conv$hs)), ] 231 | load('conv.RData') 232 | 233 | types = c('GO','KEGG','REACTOME','HALLMARK') 234 | enrichment.matrix.list = lapply(types, function(type){ 235 | BuildEnrichmentMatrix(genes = conv$hs, type = type) 236 | }) 237 | names(enrichment.matrix.list) = types 238 | 239 | hs_hs = loadRData('../Tumors/modules.RData') 240 | hs_hs = hs_hs[1:(length(hs_hs) - 4)] 241 | hs_mm = lapply(hs_hs, function(mod){ 242 | a = conv[match(mod, conv$hs),'mm'] 243 | a = a[!is.na(a)] 244 | }) 245 | 246 | colors.hs = loadRData('../Tumors/colors.module.RData') 247 | colors.hs = colors.hs[names(hs_hs)] 248 | 249 | mm_mm = loadRData('modules.Mm.RData') 250 | ma = names(mm_mm) 251 | mm_hs = lapply(mm_mm, function(mod){ 252 | a = conv[match(mod, conv$mm),'hs'] 253 | a = a[!is.na(a)] 254 | }) 255 | save(mm_hs, file = 'modules.Mm_Hs.RData') 256 | 257 | colors.mm = colors.hs[names(mm_mm)] 258 | save(colors.mm, file = 'colors.module.Mm.RData') 259 | 260 | ovlp = sapply(mm_hs, function(m1){ 261 | sapply(hs_hs, function(m2){ 262 | phyper(length(intersect(m1,m2)), length(m1), nrow(conv) - length(m1), length(m2), lower.tail = FALSE) 263 | }) 264 | }) 265 | ovlp = -log10(ovlp) 266 | ovlp[is.infinite(ovlp)] = max(ovlp[is.finite(ovlp)]) 267 | df = data.frame('top' = apply(ovlp, 2, which.max)) 268 | rownames(df) = colnames(ovlp) 269 | 270 | h = Heatmap(name = 'Module overlap pvalue', ovlp, 271 | cluster_rows = FALSE, cluster_columns = FALSE, 272 | show_row_names = TRUE, show_column_names = TRUE, 273 | row_names_gp = gpar(col = colors.hs), column_names_gp = gpar(col = colors.mm), 274 | breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Oranges')(n = 9)) 275 | print(h) 276 | 277 | h = Heatmap(name = 'Module overlap pvalue', ovlp[names(mm_hs),], 278 | cluster_rows = FALSE, cluster_columns = FALSE, 279 | show_row_names = TRUE, show_column_names = TRUE, 280 | row_names_gp = gpar(col = colors.hs), column_names_gp = gpar(col = colors.mm), 281 | breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Oranges')(n = 9)) 282 | print(h) 283 | 284 | # Venn diagrams 285 | for (n in names(mm_hs)){ 286 | if (n %in% names(hs_hs)){ 287 | venn.diagram(x = list('mm' = mm_hs[[n]], 'hs' = hs_hs[[n]]), filename = n) 288 | } 289 | } 290 | 291 | ##### Scoring cells ##### 292 | 293 | load('srt.malignant.RData') 294 | modules = loadRData('modules.Mm.RData') 295 | modules = c(modules, list('InterferonRed' = setdiff(modules$Interferon, c('B2m','H2-D1','H2-K1')))) 296 | ma = names(modules) 297 | colors.module = loadRData('colors.module.Mm.RData' ) 298 | colors.module['InterferonRed'] = colors.module['Interferon'] 299 | colors.module = colors.module[ma] 300 | 301 | # Score cells by experiment 302 | srt.malignant.list = SplitObject(srt.malignant, 'experiment') 303 | srt.malignant.list = mclapply(srt.malignant.list, function(srt){ 304 | srt = SCTransform(srt, return.only.var.genes = FALSE) 305 | srt = GeneToEnrichment(srt, db = modules, method = 'rand', nrand = nrand) 306 | }, mc.cores = ncores) 307 | for (srt in srt.malignant.list){ 308 | srt = SCTransform(srt, return.only.var.genes = FALSE) 309 | srt = RunPCA(srt) 310 | srt = RunUMAP(srt, dims = 1:10) 311 | o = sample(colnames(srt), size = ncol(srt), replace = FALSE) 312 | h = DimPlot(srt, group.by = 'ec', cols = col_ec, order = o, pt.size = 2) 313 | print(h) 314 | h = DimPlot(srt, group.by = 'experiment', cols = col_experiment, order = o, pt.size = 2) 315 | print(h) 316 | plot.list = lapply(ma, function(n){ 317 | FeaturePlot(srt, n, pt.size = 1, cols = c('grey',colors.module[n])) 318 | }) 319 | print(CombinePlots(plot.list, ncol = 3)) 320 | save(srt, file = paste0(unique(srt$experiment),'.malignant.RData')) 321 | } 322 | save(srt.malignant.list, file = 'srt.malignant.list.RData') 323 | srt.malignant = Reduce(merge2, srt.malignant.list) 324 | 325 | # Test modules by condition 326 | srt.malignant.list = SplitObject(srt.malignant, 'ec') 327 | vals = mclapply(srt.malignant.list, function(srt){ 328 | v = NULL 329 | modules_rand = MakeRand(srt, db = modules, nrand = nrand, nbin = nbin) 330 | v = sapply(ma, function(m){ 331 | ra = sapply(modules_rand[[m]], function(mod){ 332 | res = c(NA,NA) 333 | tryCatch(expr = { 334 | srt_temp = RunPCA(srt, features = mod, verbose = FALSE, npcs = 2) 335 | res = srt_temp@reductions$pca@stdev 336 | }, error = function(e){}) 337 | return(res) 338 | }) 339 | re = c(NA,NA) 340 | tryCatch(expr = { 341 | srt_temp = RunPCA(srt, features = modules[[m]], verbose = FALSE, npcs = 2) 342 | re = srt_temp@reductions$pca@stdev 343 | }, error = function(e){}) 344 | dispersion = -log10(mean(ra[1,] >= re[1])) 345 | coordination = -log10(mean(ra[1,]/ra[2,] >= re[1]/re[2])) 346 | return(c('dispersion' = dispersion, 'coordination' = coordination)) 347 | }) 348 | }, mc.cores = ncores) 349 | dispersion = sapply(vals, '[', 'dispersion', ma) 350 | dispersion[is.infinite(dispersion)] = nrand 351 | top_ann = HeatmapAnnotation(df = data.frame('ec' = names(srt.malignant.list)), 352 | col = list('ec' = col_ec), 353 | which = 'column') 354 | h = Heatmap(dispersion, name = 'Dispersion', 355 | top_annotation = top_ann, 356 | cluster_rows = FALSE, cluster_columns = FALSE, 357 | show_row_names = TRUE, show_column_names = TRUE, 358 | row_names_gp = gpar(col = colors.module[rownames(dispersion)]), column_names_gp = gpar(col = col_ec[colnames(dispersion)]), 359 | breaks = seq(0, nrand, length = 11), col = viridis_pal(begin = 0, end = 1, option = 'magma')(11)) 360 | print(h) 361 | decorate_heatmap_body('Dispersion', { 362 | #l = c(0,cumsum(rle(as.character(cancer))$lengths)) 363 | l = 0:ncol(dispersion) 364 | for (k in 1:length(l)){ 365 | i = unit(l[k]/max(l), 'npc') 366 | grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black')) 367 | } 368 | l = 0:nrow(dispersion) 369 | for (k in 1:length(l)){ 370 | i = unit(l[k]/max(l), 'npc') 371 | grid.lines(c(0,1), i, gp = gpar(lwd = 1, col = 'black')) 372 | } 373 | }) 374 | cors = mclapply(srt.malignant.list, function(srt){ 375 | data = GetData(srt, slot = 'data') 376 | v = sapply(ma, function(m){ 377 | re = cor(t(as.matrix(data[modules[[m]],]))) 378 | re[is.na(re)] = 0 379 | diag(re) = NA 380 | return(re) 381 | }) 382 | }, mc.cores = ncores) 383 | cors = lapply(ma, function(m){ 384 | a = sapply(cors, '[', m) 385 | names(a) = names(srt.malignant.list) 386 | return(a) 387 | }) 388 | names(cors) = ma 389 | for (m in ma){ 390 | h.list = NULL 391 | for (s in names(srt.malignant.list)){ 392 | h.list = h.list + Heatmap(cors[[m]][[s]], name = m, 393 | cluster_rows = FALSE, cluster_columns = FALSE, show_row_names = TRUE, 394 | breaks = seq(-1, 1, length = 11), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 11)) 395 | } 396 | print(h.list) 397 | } 398 | par(mfrow = c(4,4)) 399 | for (m in ma){ 400 | a = cors[[m]] 401 | a = as.vector(a) 402 | boxplot(a, main = m, col = col_ec[names(cors[[m]])], pch = 20, cex = 0.1, las = 2, ylim = c(-0.5,1), names = rep('', length(names(cors[[m]])))) 403 | #abline(h = 0) 404 | } 405 | par(mfrow = c(1,1)) 406 | for (srt in srt.malignant.list){ 407 | srt = SCTransform(srt, return.only.var.genes = FALSE) 408 | srt = RunPCA(srt) 409 | srt = RunUMAP(srt, dims = 1:10) 410 | o = sample(colnames(srt), size = ncol(srt), replace = FALSE) 411 | h = DimPlot(srt, group.by = 'ec', cols = col_ec, order = o, pt.size = 2) 412 | print(h) 413 | h = DimPlot(srt, group.by = 'experiment', cols = col_experiment, order = o, pt.size = 2) 414 | print(h) 415 | plot.list = lapply(ma, function(n){ 416 | FeaturePlot(srt, n, pt.size = 1, cols = c('grey',colors.module[n])) 417 | }) 418 | print(CombinePlots(plot.list, ncol = 3)) 419 | } 420 | srt.malignant = Reduce(merge2, srt.malignant.list) 421 | 422 | srt.malignant$eci = factor(srt.malignant$eci, levels = c( 423 | "Adaptive-RagWT-F1", "Adaptive-RagWT-F2", 424 | "Adaptive-RagWT-M1", "Adaptive-RagWT-M2", 425 | "Adaptive-RagKO-F1", "Adaptive-RagKO-F2", 426 | "Adaptive-RagKO-M1", "Adaptive-RagKO-M2", 427 | "PancreasPeritoneum-Pancreas-1", "PancreasPeritoneum-Pancreas-2", 428 | "PancreasPeritoneum-Peritoneum-1", "PancreasPeritoneum-Peritoneum-2", 429 | "LiverPeritoneum-Peritoneum-3", "LiverPeritoneum-Peritoneum-4", 430 | "LiverPeritoneum-Liver-5", "LiverPeritoneum-Liver-6" 431 | )) 432 | srt.malignant$ec = factor(srt.malignant$ec, levels = c( 433 | 'Adaptive-RagWT','Adaptive-RagKO', 434 | 'PancreasPeritoneum-Pancreas','PancreasPeritoneum-Peritoneum', 435 | 'LiverPeritoneum-Peritoneum','LiverPeritoneum-Liver')) 436 | srt.malignant$experiment = factor(srt.malignant$experiment, levels = c( 437 | 'Adaptive','PancreasPeritoneum','LiverPeritoneum')) 438 | 439 | srt.malignant = RunPCA(srt.malignant) 440 | srt.malignant = RunUMAP(srt.malignant, dims = 1:10) 441 | 442 | for (m in ma){ 443 | n = paste(m, 'cat', sep = '_') 444 | srt.malignant@meta.data[,n] = factor((srt.malignant@meta.data[,m] > 0.5)*as.numeric(srt.malignant$ec), labels = c(0, levels(srt.malignant$ec))) 445 | } 446 | col_cat_ec = c('lightgrey',col_ec) 447 | names(col_cat_ec) = c('0',names(col_ec)) 448 | 449 | h = VlnPlot(srt.malignant, features = ma, group.by = 'eci', cols = col_eci, pt.size = 0, ncol = 1) 450 | print(h) 451 | 452 | o = sample(colnames(srt.malignant), size = ncol(srt.malignant), replace = FALSE) 453 | h = DimPlot(srt.malignant, group.by = 'ec', cols = col_ec, order = o, pt.size = 2) 454 | print(h) 455 | h = DimPlot(srt.malignant, group.by = 'experiment', cols = col_experiment, order = o, pt.size = 2) 456 | print(h) 457 | print(h) 458 | plot.list = lapply(ma, function(n){ 459 | FeaturePlot(srt.malignant, n, pt.size = 1, cols = c('grey',colors.module[n])) 460 | }) 461 | print(CombinePlots(plot.list, ncol = 3)) 462 | plot.list = lapply(paste(ma, 'cat', sep = '_'), function(n){ 463 | h = DimPlot(srt.malignant, group.by = n, cols = col_cat_ec, order = o, pt.size = 0.1) 464 | }) 465 | print(CombinePlots(plot.list, ncol = 2)) 466 | 467 | o = sample(1:ncol(srt.malignant), size = ncol(srt.malignant), replace = FALSE) 468 | h = DimPlot(srt.malignant, group.by = 'ec', cols = col_ec, order = o, pt.size = 1, split.by = 'ec') 469 | print(h) 470 | h = DimPlot(srt.malignant, group.by = 'experiment', cols = col_experiment, order = o, pt.size = 1, split.by = 'ec') 471 | print(h) 472 | print(h) 473 | plot.list = lapply(ma, function(n){ 474 | FeaturePlot(srt.malignant, n, split.by = 'ec', pt.size = 1, cols = c('grey',colors.module[n])) 475 | }) 476 | print(CombinePlots(plot.list, ncol = 1)) 477 | plot.list = lapply(paste(ma, 'cat', sep = '_'), function(n){ 478 | h = DimPlot(srt.malignant, group.by = n, cols = col_cat_ec, order = (srt.malignant@meta.data[,n] == 0), pt.size = 0.1, split.by = 'ec') 479 | }) 480 | print(CombinePlots(plot.list, ncol = 1)) 481 | 482 | meta = srt.malignant@meta.data 483 | save(meta, file = 'meta.malignant.RData') 484 | save(srt.malignant, file = 'srt.malignant.RData') 485 | 486 | #### Plotting #### 487 | 488 | mat = Reduce(cbind, lapply(ma, function(m){ 489 | freq = (meta[,m] > 0.5) 490 | mat = data.frame('value' = as.matrix(by(freq, meta$eci, mean))) 491 | return(mat) 492 | })) 493 | colnames(mat) = ma 494 | print(mat)*100 495 | 496 | # Barplot 497 | 498 | plot.list = lapply(ma, function(m){ 499 | freq = (meta[,m] > 0.5) 500 | mat = data.frame('value' = as.matrix(by(freq, meta$eci, mean))) 501 | mat$eci = rownames(mat) 502 | mat$experiment = factor(sapply(strsplit(mat$eci, '-'), '[', 1)) 503 | mat$condition = factor(sapply(strsplit(mat$eci, '-'), '[', 2)) 504 | mat$individual = factor(sapply(strsplit(mat$eci, '-'), '[', 3)) 505 | mat$ec = factor(paste(mat$experiment, mat$condition, sep = '-'), 506 | levels = c('Adaptive-RagWT','Adaptive-RagKO', 507 | 'PancreasPeritoneum-Pancreas','PancreasPeritoneum-Peritoneum', 508 | 'LiverPeritoneum-Peritoneum','LiverPeritoneum-Liver')) 509 | p = ggplot(mat, aes(x = ec, y = value, fill = ec, color = ec)) + 510 | stat_summary(fun.y = mean, fun.ymin = mean, fun.ymax = mean, 511 | geom = 'bar', width = 0.5) + 512 | geom_point(color = 'black') + 513 | scale_color_manual(values = col_ec) + 514 | scale_fill_manual(values = col_ec) + 515 | geom_signif(comparisons = list(levels(mat$ec)[1:2]), 516 | test = 'wilcox.test', map_signif_level = TRUE, margin_top = -0.1) + 517 | geom_signif(comparisons = list(levels(mat$ec)[3:4],levels(mat$ec)[5:6]), 518 | test = 't.test', map_signif_level = TRUE, margin_top = -0.1) + 519 | ylim(0, max(mat$value)+0.1) + 520 | ggtitle(m) + 521 | theme_classic() + 522 | theme( 523 | legend.position = 'none', 524 | plot.title = element_text(hjust = 0.5), 525 | axis.text.x = element_text(size = 0), 526 | axis.title.x = element_text(size = 0), 527 | axis.title.y = element_text(size = 0)) 528 | p 529 | }) 530 | print(CombinePlots(plot.list, ncol = 3)) 531 | 532 | # Diagonal plots 533 | 534 | mat = data.frame(Reduce(rbind, lapply(ma, function(m){ 535 | freq = (meta[,m] > 0.5) 536 | sam = data.frame('value' = as.matrix(by(freq, meta$eci, mean))) 537 | sam$eci = rownames(sam) 538 | sam$experiment = factor(sapply(strsplit(sam$eci, '-'), '[', 1)) 539 | sam$condition = factor(sapply(strsplit(sam$eci, '-'), '[', 2)) 540 | sam$individual = factor(sapply(strsplit(sam$eci, '-'), '[', 3)) 541 | sam$ec = factor(paste(sam$experiment, sam$condition, sep = '-'), 542 | levels = c('Adaptive-RagWT','Adaptive-RagKO', 543 | 'PancreasPeritoneum-Pancreas','PancreasPeritoneum-Peritoneum', 544 | 'LiverPeritoneum-Peritoneum','LiverPeritoneum-Liver')) 545 | stats_mean = as.matrix(by(sam$value, sam$ec, mean)) 546 | rownames(stats_mean) = paste('mean', rownames(stats_mean), sep = '_') 547 | stats_sd = as.matrix(by(sam$value, sam$ec, sd)) 548 | rownames(stats_sd) = paste('sd', rownames(stats_sd), sep = '_') 549 | stats_se = stats_sd/ sqrt(length(unique(sam$eci))/length(unique(sam$ec))) 550 | rownames(stats_se) = gsub('sd','se',rownames(stats_se)) 551 | stats = t(rbind(stats_mean, stats_sd, stats_se)) 552 | rownames(stats) = m 553 | return(stats) 554 | }))) 555 | mat$module = rownames(mat) 556 | plot.list = list() 557 | plot.list[[1]] = ggplot(data = mat, aes(x = mean_Adaptive.RagWT, y = mean_Adaptive.RagKO, color = module)) + 558 | geom_point() + 559 | scale_fill_manual(values = colors.module) + 560 | scale_color_manual(values = colors.module) + 561 | geom_errorbarh(aes(xmax = mean_Adaptive.RagWT + se_Adaptive.RagWT, xmin = mean_Adaptive.RagWT - se_Adaptive.RagWT), height = 0.002) + 562 | geom_errorbar(aes(ymax = mean_Adaptive.RagKO + se_Adaptive.RagKO, ymin = mean_Adaptive.RagKO - se_Adaptive.RagKO), width = 0.002) + 563 | geom_text(aes(label = module), nudge_y = 0.01) + 564 | geom_abline(slope = 1, intercept = 0, color = 'grey') + 565 | ggtitle('') + 566 | xlim(0,0.5) + 567 | ylim(0,0.5) + 568 | theme_classic() + 569 | theme( 570 | plot.title = element_text(hjust = 0.5), 571 | axis.text.x = element_text(angle = 0, hjust = 1), 572 | axis.text.y = element_text(angle = 0, hjust = 1)) 573 | plot.list[[2]] = ggplot(data = mat, aes(x = mean_PancreasPeritoneum.Pancreas, y = mean_PancreasPeritoneum.Peritoneum, color = module)) + 574 | geom_point() + 575 | scale_fill_manual(values = colors.module) + 576 | scale_color_manual(values = colors.module) + 577 | geom_errorbarh(aes(xmax = mean_PancreasPeritoneum.Pancreas + se_PancreasPeritoneum.Pancreas, xmin = mean_PancreasPeritoneum.Pancreas - se_PancreasPeritoneum.Pancreas), height = 0.002) + 578 | geom_errorbar(aes(ymax = mean_PancreasPeritoneum.Peritoneum + se_PancreasPeritoneum.Peritoneum, ymin = mean_PancreasPeritoneum.Peritoneum - se_PancreasPeritoneum.Peritoneum), width = 0.002) + 579 | geom_text(aes(label = module), nudge_y = 0.01) + 580 | geom_abline(slope = 1, intercept = 0, color = 'grey') + 581 | ggtitle('') + 582 | xlim(0,0.5) + 583 | ylim(0,0.5) + 584 | theme_classic() + 585 | theme( 586 | plot.title = element_text(hjust = 0.5), 587 | axis.text.x = element_text(angle = 0, hjust = 1), 588 | axis.text.y = element_text(angle = 0, hjust = 1)) 589 | plot.list[[3]] = ggplot(data = mat, aes(x = mean_LiverPeritoneum.Peritoneum, y = mean_LiverPeritoneum.Liver, color = module)) + 590 | geom_point() + 591 | scale_fill_manual(values = colors.module) + 592 | scale_color_manual(values = colors.module) + 593 | geom_errorbarh(aes(xmax = mean_LiverPeritoneum.Peritoneum + se_LiverPeritoneum.Peritoneum, xmin = mean_LiverPeritoneum.Peritoneum - se_LiverPeritoneum.Peritoneum), height = 0.002) + 594 | geom_errorbar(aes(ymax = mean_LiverPeritoneum.Liver + se_LiverPeritoneum.Liver, ymin = mean_LiverPeritoneum.Liver - se_LiverPeritoneum.Liver), width = 0.002) + 595 | geom_text(aes(label = module), nudge_y = 0.01) + 596 | geom_abline(slope = 1, intercept = 0, color = 'grey') + 597 | ggtitle('') + 598 | xlim(0,0.5) + 599 | ylim(0,0.5) + 600 | theme_classic() + 601 | theme( 602 | plot.title = element_text(hjust = 0.5), 603 | axis.text.x = element_text(angle = 0, hjust = 1), 604 | axis.text.y = element_text(angle = 0, hjust = 1)) 605 | print(CombinePlots(plot.list, ncol = 2)) 606 | 607 | # # TME 608 | # 609 | # state = data.frame(Reduce(cbind, lapply(ma, function(m){ 610 | # freq = (meta[,m] > 0.5) 611 | # sam = data.frame('value' = as.matrix(by(freq, meta$eci, mean))) 612 | # sam$eci = rownames(sam) 613 | # sam$experiment = factor(sapply(strsplit(sam$eci, '-'), '[', 1)) 614 | # sam$condition = factor(sapply(strsplit(sam$eci, '-'), '[', 2)) 615 | # sam$individual = factor(sapply(strsplit(sam$eci, '-'), '[', 3)) 616 | # sam$ec = factor(paste(sam$experiment, sam$condition, sep = '-'), 617 | # levels = c('Adaptive-RagWT','Adaptive-RagKO', 618 | # 'PancreasPeritoneum-Pancreas','PancreasPeritoneum-Peritoneum', 619 | # 'LiverPeritoneum-Peritoneum','LiverPeritoneum-Liver')) 620 | # stats_mean = as.matrix(by(sam$value, sam$eci, mean)) 621 | # colnames(stats_mean) = m 622 | # return(stats_mean) 623 | # }))) 624 | # 625 | # # Multiple regression 626 | # exclude = c('Malignant') 627 | # tme = tme[, setdiff(colnames(tme), exclude)] 628 | # 629 | # test = sapply(ma, function(m){ 630 | # fit = summary(lm(state[,m] ~ tme))$coefficients 631 | # pval = -log10(fit[,4]) 632 | # pval[pval < pval_thresh] = 0 633 | # names(pval) = gsub('tme','',names(pval)) 634 | # est = fit[,1] 635 | # names(est) = gsub('tme','',names(est)) 636 | # val = pval*sign(est) 637 | # return(val) 638 | # }) 639 | # h = Heatmap(test, 640 | # show_row_names = TRUE, show_column_names = TRUE, 641 | # cluster_rows = FALSE, cluster_columns = FALSE, 642 | # breaks = seq(-3, 3, length = 11), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 11)) 643 | # print(h) 644 | # 645 | # # Plot hits 646 | # par(mfrow = c(4,4)) 647 | # for (ct in colnames(tme)){ 648 | # for (m in ma){ 649 | # if (!test[ct,m] == 0){ 650 | # fit = summary(lm(state[,m] ~ tme[,ct]))$coefficients 651 | # pval = -log10(fit[2,4]) 652 | # plot(state[,m] ~ tme[,ct], col = col_eci[rownames(state)], pch = 20, xlab = ct, ylab = m, main = paste(round(pval, digit = 2), round(test[ct,m], digit = 2))) 653 | # abline(a = fit[1,1], b = fit[2,1], lty = 2) 654 | # } 655 | # } 656 | # } 657 | # par(mfrow = c(1,1)) 658 | # 659 | # # Plot hits, log 660 | # par(mfrow = c(4,4)) 661 | # for (ct in colnames(tme)){ 662 | # for (m in ma){ 663 | # if (!test[ct,m] == 0){ 664 | # fit = summary(lm(state[,m] ~ tme[,ct]))$coefficients 665 | # pval = -log10(fit[2,4]) 666 | # plot(10^-4+tme[,ct], 10^-4+state[,m], col = col_eci[rownames(state)], pch = 20, xlab = ct, ylab = m, main = paste(round(pval, digit = 2), round(test[ct,m], digit = 2)), log = 'xy') 667 | # } 668 | # } 669 | # } 670 | # par(mfrow = c(1,1)) 671 | 672 | # Individual genes 673 | 674 | srt = loadRData('Adaptive.malignant.RData') 675 | srt$combined = paste(srt$condition, srt$Interferon > 0.5, sep = '_') 676 | Idents(srt) = 'combined' 677 | avg = AverageExpression(srt, slot = 'data')$SCT 678 | 679 | MHCI = c('B2m','H2-D1','H2-K1') 680 | 681 | par(mfrow = c(5,5)) 682 | 683 | plot(avg[modules$Interferon, 'RagWT_TRUE'], avg[modules$Interferon, 'RagKO_TRUE'], 684 | pch = 20, xlab = 'RagWT', ylab = 'RagKO', main = 'Interferon', xlim = c(0,18), ylim = c(0,18)) 685 | text(avg[MHCI, 'RagWT_TRUE'], avg[MHCI, 'RagKO_TRUE'], MHCI, cex = 0.7, pos = 3, offset = 0.2) 686 | abline(a = 0, b = 1, col = 'grey') 687 | 688 | plot(avg[modules$Interferon, 'RagWT_FALSE'], avg[modules$Interferon, 'RagKO_FALSE'], 689 | pch = 20, xlab = 'RagWT', ylab = 'RagKO', main = 'Other', xlim = c(0,18), ylim = c(0,18)) 690 | text(avg[MHCI, 'RagWT_FALSE'], avg[MHCI, 'RagKO_FALSE'], MHCI, cex = 0.7, pos = 3, offset = 0.2) 691 | abline(a = 0, b = 1, col = 'grey') 692 | 693 | plot(avg[modules$Interferon, 'RagWT_FALSE'], avg[modules$Interferon, 'RagWT_TRUE'], 694 | pch = 20, xlab = 'Other', ylab = 'Interferon', main = 'RagWT', xlim = c(0,18), ylim = c(0,18)) 695 | text(avg[MHCI, 'RagWT_FALSE'], avg[MHCI, 'RagWT_TRUE'], MHCI, cex = 0.7, pos = 3, offset = 0.2) 696 | abline(a = 0, b = 1, col = 'grey') 697 | 698 | plot(avg[modules$Interferon, 'RagKO_FALSE'], avg[modules$Interferon, 'RagKO_TRUE'], 699 | pch = 20, xlab = 'Other', ylab = 'Interferon', main = 'RagKO', xlim = c(0,18), ylim = c(0,18)) 700 | text(avg[MHCI, 'RagKO_FALSE'], avg[MHCI, 'RagKO_TRUE'], MHCI, cex = 0.7, pos = 3, offset = 0.2) 701 | abline(a = 0, b = 1, col = 'grey') 702 | 703 | par(mfrow = c(5,1)) 704 | 705 | o = order(avg[modules$Interferon,'RagWT_TRUE'], decreasing = TRUE) 706 | barplot(t(avg[modules$Interferon[o],]), beside = TRUE, col = col_ec[c(1,1,2,2)], las = 2) 707 | barplot(t(avg[modules$Interferon[o],]), beside = TRUE, col = colors.module['Interferon'], density = c(0,20,0,20), las = 2, add = TRUE) 708 | 709 | par(mfrow = c(1,1)) 710 | 711 | dev.off() 712 | 713 | pvals = sapply(ma, function(m){ 714 | sapply(levels(meta$experiment), function(e){ 715 | sub = meta[meta$experiment == e,] 716 | sub$ec = factor(sub$ec) 717 | i = levels(sub$ec)[1] 718 | j = levels(sub$ec)[2] 719 | ks.test(sub[sub$ec == i,m], sub[sub$ec == j,m])$p.value 720 | }) 721 | }) 722 | pvals = -log10(pvals) 723 | pvals[is.infinite(pvals)] = max(pvals[is.finite(pvals)]) 724 | barplot(pvals, beside = TRUE, las= 2, col = col_experiment) 725 | 726 | pvals = sapply(ma, function(m){ 727 | sapply(levels(meta$experiment), function(e){ 728 | sub = meta[meta$experiment == e,] 729 | sub$ec = factor(sub$ec) 730 | i = levels(sub$ec)[1] 731 | j = levels(sub$ec)[2] 732 | li = unique(sub$eci[sub$ec == i]) 733 | lj = unique(sub$eci[sub$ec == j]) 734 | max(sapply(li, function(i){ 735 | sapply(lj, function(j){ 736 | ks.test(sub[sub$eci == li,m], sub[sub$eci == lj,m])$p.value 737 | }) 738 | })) 739 | }) 740 | }) 741 | pvals = -log10(pvals) 742 | pvals[is.infinite(pvals)] = max(pvals[is.finite(pvals)]) 743 | barplot(pvals, beside = TRUE, las= 2, col = col_experiment) 744 | 745 | 746 | 747 | -------------------------------------------------------------------------------- /part1.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | source('~/Documents/R/seurat_functions.R') 3 | 4 | #### Arguments #### 5 | args = commandArgs(trailingOnly = TRUE) 6 | if (length(args) == 3){ 7 | cancer = args[1] 8 | species = args[2] 9 | author = '' 10 | sample = args[3] 11 | } else if (length(args) == 4){ 12 | cancer = args[1] 13 | species = args[2] 14 | author = args[3] 15 | sample = args[4] 16 | } 17 | numi.min = -Inf 18 | ngene.min = -Inf 19 | mt.max = Inf 20 | range = 2:25 21 | gmin = 5 22 | 23 | # Combining 24 | orig.ident = paste0(cancer, species, author, sample) 25 | print(orig.ident) 26 | 27 | #### Load #### 28 | st = Load10X_Spatial(getwd()) 29 | st$cancer = cancer 30 | st$author = author 31 | st$species = species 32 | st$technique = 'Visium' 33 | st$sample = as.character(sample) 34 | st$orig.ident = orig.ident 35 | Idents(st) = 'orig.ident' 36 | 37 | #### Filter #### 38 | pdf('filter.pdf') 39 | #st = FilterCells(st, do.plot = TRUE, assay = 'Spatial', 40 | # numi.min = numi.min, ngene.min = ngene.min, mt.max = mt.max) 41 | st = FilterGenes(st) 42 | st = SCTransform(st, assay = 'Spatial', return.only.var.genes = FALSE) 43 | st = FindSpatiallyVariableFeatures(st, selection.method = 'markvariogram') 44 | st = RunPCA(st, features = SpatiallyVariableFeatures(st)) 45 | st = RunUMAP(st, dims = 1:10) 46 | dev.off() 47 | 48 | #### CNV #### 49 | if (file.exists('Clones/labels_Clones.csv')){ 50 | cnv = read.csv('Clones/labels_Clones.csv') 51 | rownames(cnv) = colnames(st) 52 | st = AddMetaData(st, cnv[,2], col.name = 'clone') 53 | } else { 54 | st$clone = 0 55 | } 56 | st$clone = as.factor(st$clone) 57 | col_clone = brewer_pal(palette = 'Reds')(1+nlevels(st$clone))[-1] 58 | names(col_clone) = levels(st$clone) 59 | 60 | #### Cluster #### 61 | st = FindNeighbors(st) 62 | st = FindClusters(st) 63 | st$cluster = Idents(st) 64 | col_cluster = hue_pal()(nlevels(st$cluster)) 65 | names(col_cluster) = levels(st$cluster) 66 | 67 | #### NMF #### 68 | # Run NMF 69 | data = as.matrix(GetAssayData(st, assay = 'SCT', slot = 'scale.data')) 70 | data = data[SpatiallyVariableFeatures(st),] 71 | data[data < 0] = 0 72 | data = data[apply(data, 1, var) > 0, ] 73 | print(dim(data)) 74 | res.list = mclapply(range, function(r){ 75 | nmf(data, rank = r, nrun = 1, seed = 'ica', method = 'nsNMF') 76 | }, mc.cores = ncores) 77 | names(res.list) = range 78 | save(res.list, file = 'res.list.RData') 79 | # Select rank 80 | modules.list = lapply(res.list, NMFToModules, gmin = gmin) 81 | print(sapply(modules.list,length)) 82 | comp = as.numeric(names(modules.list)) - sapply(modules.list, length) 83 | mi = min(comp) 84 | r = names(which(comp == mi)) 85 | r = r[length(r)] 86 | print(r) 87 | res = res.list[[r]] 88 | save(res, file = 'res.RData') 89 | # Process output 90 | tryCatch(expr = { 91 | modules = NMFToModules(res, gmin = gmin) 92 | scores = basis(res) 93 | colnames(scores) = names(modules) 94 | coefs = coefficients(res) 95 | rownames(coefs) = names(modules) 96 | # Order modules 97 | h = Heatmap(coefs, clustering_distance_columns = 'euclidean') 98 | o = row_order(h) 99 | scores = scores[, o] 100 | coefs = coefs[o, ] 101 | modules = modules[o] 102 | print(modules) 103 | st = AddMetaData(st, t(coefs), col.name = rownames(coefs)) 104 | # Cluster NMF output 105 | h = Heatmap(coefs, clustering_distance_columns = 'euclidean') 106 | hcl = as.hclust(column_dend(h)) 107 | sig = cutree(hcl, k = length(modules)) 108 | nmf = c(by(t(coefs), INDICES = sig, FUN = function(x){names(modules)[which.max(colMeans(x))]}))[sig] 109 | st$nmf = factor(nmf, levels = names(modules)) 110 | col_nmf = c(brewer.pal(12, 'Set3'), brewer.pal(8, 'Set1'))[1:nlevels(st$nmf)] 111 | names(col_nmf) = levels(st$nmf) 112 | }, error = function(e){c()}) 113 | save(st, file = 'st.RData') 114 | 115 | #### Saving #### 116 | 117 | #### Plotting #### 118 | pdf('dimplots.pdf', height = 15, width = 15) 119 | for (reduction in c('pca','umap')){ 120 | h = DimPlot(st, pt.size = 2, reduction = reduction, group.by = 'cluster', cols = col_cluster, label = TRUE) 121 | print(h) 122 | h = DimPlot(st, pt.size = 2, reduction = reduction, group.by = 'clone', cols = col_clone, label = FALSE) 123 | print(h) 124 | h = DimPlot(st, pt.size = 2, reduction = reduction, group.by = 'nmf', cols = col_nmf) 125 | print(h) 126 | h = FeaturePlot(st, reduction = reduction, features = names(modules)) 127 | print(h) 128 | } 129 | h = SpatialDimPlot(st, pt.size = 2, group.by = 'cluster', cols = col_cluster) 130 | print(h) 131 | h = SpatialDimPlot(st, pt.size = 2, group.by = 'cluster', cols = col_cluster) 132 | print(h) 133 | h = SpatialDimPlot(st, pt.size = 2, group.by = 'nmf', cols = col_nmf) 134 | print(h) 135 | h = SpatialFeaturePlot(st, features = names(modules)) 136 | print(h) 137 | dev.off() 138 | pdf('heatmaps.pdf', height = 20, width = 10) 139 | sink('heatmaps.txt') 140 | # Clusters 141 | markers = FindAllMarkers2(st, do.print = TRUE, do.plot = TRUE, enrichment.type = 'GO', group.by = 'cluster', cols = col_cluster, print.bar = FALSE) 142 | # NMF 143 | markers = FindAllMarkers2(st, do.print = TRUE, do.plot = TRUE, enrichment.type = 'GO', group.by = 'nmf', cols = col_nmf, print.bar = FALSE) 144 | # Modules 145 | top_ann = HeatmapAnnotation(df = data.frame('cluster' = st$cluster, 146 | 'nmf' = st$nmf), 147 | col = list('cluster' = col_cluster, 148 | 'nmf' = col_nmf), which = 'column') 149 | h = Heatmap(coefs, name = 'coefs', top_ann = top_ann, 150 | show_row_names = TRUE, 151 | cluster_rows = FALSE, 152 | cluster_columns = FALSE, column_order = order(st$nmf, st$cluster), 153 | breaks = c(0,max(coefs)/2), colors = c('white','red')) 154 | print(h) 155 | h = Heatmap(scores[unlist(modules),], name = 'scores', 156 | show_row_names = TRUE, row_names_gp = gpar(cex = 0.5), 157 | cluster_rows = FALSE, 158 | cluster_columns = FALSE, 159 | split = factor(unlist(mapply(rep, names(modules), sapply(modules, length))), levels = names(modules))) 160 | print(h) 161 | print(modules) 162 | enrichment.matrix = BuildEnrichmentMatrix(rownames(st), type = 'GO') 163 | go_10 = sapply(modules, function(tbl){ 164 | e = Enrichment(geneset = tbl, enrichment.matrix = enrichment.matrix) 165 | names(sort(e))[1:10] 166 | }) 167 | colnames(go_10) = names(modules) 168 | print(go_10) 169 | sink() 170 | dev.off() 171 | 172 | 173 | 174 | -------------------------------------------------------------------------------- /part1.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | #SBATCH --job-name=Gyn_part1 # Job name 3 | #SBATCH --mail-type=END,FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL) 4 | #SBATCH --mail-user=db3562@nyulangone.org 5 | #SBATCH --nodes=1 6 | #SBATCH --ntasks=1 7 | #SBATCH --cpus-per-task=40 8 | #SBATCH --mem=100gb # Job memory request 9 | #SBATCH --time=10:00:00 # Time limit hrs:min:sec 10 | #SBATCH --output=serial_test_%j.log # Standard output and error log 11 | #SBATCH --partition cpu_short 12 | 13 | source ~/.bashrc 14 | 15 | if [ "$#" == 3 ]; then 16 | cd ~/Documents/STARCH 17 | conda activate STARCH 18 | python run_STARCH.py -i ~/Documents/Gyn/${1}${2}/${1}${2}${3}/Clones --output Clones --n_clusters 3 --outdir ~/Documents/Gyn/${1}${2}/${1}${2}${3}/Clones 19 | conda deactivate 20 | cd ~/Documents/Gyn/${1}${2}/${1}${2}${3} 21 | conda activate seurat_functions 22 | module load r/4.0.3 23 | Rscript ~/Documents/Gyn/Runs/part1.R ${1} ${2} ${3} 24 | 25 | elif [ "$#" == 4 ]; then 26 | cd ~/Documents/STARCH 27 | conda activate STARCH 28 | python run_STARCH.py -i ~/Documents/Gyn/${1}${2}${3}/${1}${2}${3}${4}/Clones --output Clones --n_clusters 3 --outdir ~/Documents/Gyn/${1}${2}${3}/${1}${2}${3}${4}/Clones 29 | conda deactivate 30 | cd ~/Documents/Gyn/${1}${2}${3}/${1}${2}${3}${4} 31 | conda activate seurat_functions 32 | module load r/4.0.3 33 | Rscript ~/Documents/Gyn/Runs/part1.R ${1} ${2} ${3} ${4} 34 | 35 | fi 36 | 37 | -------------------------------------------------------------------------------- /part2.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | source('~/Documents/R/seurat_functions.R') 3 | load('srt.RData') 4 | 5 | ##### Arguments ##### 6 | args <- commandArgs(trailingOnly = TRUE) 7 | cluster.malignant = eval(parse(text = args[1])) 8 | clone.malignant = eval(parse(text = args[2])) 9 | k_obs_groups = 2 10 | #if (unique(srt$author) == ''){ 11 | range = 2:25 12 | #} else { 13 | # range = 5:35 14 | #} 15 | gmin = 5 16 | print(unique(srt$orig.ident)) 17 | 18 | ##### Determining type #### 19 | # Determining criteria 20 | singler.malignant = c('Embryonic_stem_cells','iPS_cells','Tissue_stem_cells','Chondrocytes','Fibroblasts','Smooth_muscle_cells','Epithelial_cells','Hepatocytes','Keratinocytes','Astrocyte','Neurons','Neuroepithelial_cell') 21 | cluster.normal = setdiff(levels(srt$cluster), cluster.malignant) 22 | clone.normal = setdiff(levels(srt$clone), clone.malignant) 23 | singler.normal = setdiff(levels(srt$singler), singler.malignant) 24 | # Finding cells 25 | cells.malignant = WhichCells(srt, expression = singler %in% singler.malignant 26 | & cluster %in% cluster.malignant 27 | & clone %in% clone.malignant) 28 | if (length(cells.malignant) == ncol(srt)){ 29 | donormal = FALSE 30 | } else { 31 | donormal = TRUE 32 | } 33 | if (donormal){ 34 | cells.normal = WhichCells(srt, expression = cluster %in% cluster.normal) 35 | cells.normal = setdiff(cells.normal, cells.malignant) 36 | } else { 37 | cells.normal = c() 38 | } 39 | # Filtering 40 | test = intersect(c('B_cell','DC','Macrophage','Monocyte','Neutrophils','NK_cell','T_cells'), levels(srt$singler)) 41 | print(length(cells.malignant)) 42 | for (i in test){ 43 | s = paste('scores',i ,sep = '.') 44 | thresh = min(srt@meta.data[srt$singler == i,s]) 45 | #thresh = quantile(srt@meta.data[srt$singler == i,s], seq(0,1,by=0.01))['5%'] 46 | cells.malignant = cells.malignant[srt@meta.data[cells.malignant, s] <= thresh] 47 | print(i) 48 | print(length(cells.malignant)) 49 | } 50 | Idents(srt) = 'singler' 51 | avg = AverageExpression(srt, assay = 'SCT', slot = 'data')$SCT 52 | data = GetData(srt, assay = 'SCT', slot = 'data')[rownames(avg),cells.malignant] 53 | for (i in test){ 54 | corr = apply(data, 2, function(x){ 55 | cor(x, avg[,i], method = 'pearson') 56 | }) 57 | cells.malignant = setdiff(cells.malignant, colnames(data)[corr > mean(corr) + 2*sd(corr)]) 58 | print(i) 59 | print(length(cells.malignant)) 60 | } 61 | if (unique(srt$author) == 'Puram'){ 62 | cells.malignant = cells.malignant[srt@meta.data[cells.malignant, 'malignant'] == 1] 63 | } 64 | # Assigning 65 | srt$type = 'undetermined' 66 | srt@meta.data[cells.malignant, 'type'] = 'malignant' 67 | srt@meta.data[cells.normal, 'type'] = 'normal' 68 | srt$type = factor(srt$type, levels = c('malignant','normal','undetermined')) 69 | print(table(srt$type)) 70 | print(table(srt$singler, srt$cluster, srt$type)) 71 | # Saving 72 | save(srt, file = 'srt.RData') 73 | 74 | ##### Removing undetermined ##### 75 | srt = srt[, srt$type %in% c('malignant','normal')] 76 | srt$type = factor(srt$type, levels = c('malignant','normal')) 77 | col_type = c(brewer_pal(palette = 'Blues')(5)[4], brewer_pal(palette = 'Greens')(5)[4], 'grey')[1:nlevels(srt$type)] 78 | names(col_type) = levels(srt$type) 79 | srt = SCTransform(srt, return.only.var.genes = FALSE) 80 | srt = RunPCA(srt, features = VariableFeatures(srt)) 81 | srt = RunUMAP(srt, dims = 1:10) 82 | # Cluster 83 | col_cluster = hue_pal()(nlevels(srt$cluster)) 84 | names(col_cluster) = levels(srt$cluster) 85 | # Singler 86 | col_singler = c(brewer.pal(8, 'Dark2'), brewer.pal(8, 'Accent'))[1:nlevels(srt$singler)] 87 | names(col_singler) = levels(srt$singler) 88 | # Pop 89 | srt$pop = as.character(srt$type) 90 | srt$pop[srt$type == 'normal'] = as.character(srt$singler[srt$type == 'normal']) 91 | srt$pop = gsub('malignant','Malignant',srt$pop) 92 | srt$pop = factor(srt$pop) 93 | srt$pop = relevel(srt$pop, ref = 'Malignant') 94 | col_pop = col_singler[intersect(levels(srt$pop),names(col_singler))] 95 | col_pop = c(col_pop, 'Malignant' = 'grey20') 96 | 97 | #### InferCNV #### 98 | # Finding subclones 99 | dir.create('Subclones', showWarnings = FALSE, recursive = TRUE, mode = "0777") 100 | setwd('Subclones') 101 | srt$subclone = 0 102 | srt$subclone = factor(srt$subclone) 103 | col_subclone = brewer_pal(palette = 'Purples')(1+nlevels(srt$subclone))[-1] 104 | names(col_subclone) = levels(srt$subclone) 105 | tryCatch(expr = { 106 | annotations_file = data.frame(srt@meta.data$type, row.names = rownames(srt@meta.data), stringsAsFactors = FALSE) 107 | raw_counts_matrix = as.matrix(GetAssayData(srt, assay = 'SCT', slot = 'counts')) 108 | if (unique(srt$species) == 'Hs'){ 109 | gene_order_file = '~/Documents/CNV/Hs/gene_order.tsv' 110 | } 111 | if (unique(srt$species) == 'Mm'){ 112 | gene_order_file = '~/Documents/CNV/Mm/gene_order.tsv' 113 | } 114 | if (donormal){ 115 | infercnv_obj = CreateInfercnvObject(raw_counts_matrix = raw_counts_matrix, 116 | annotations_file = annotations_file, 117 | gene_order_file = gene_order_file, 118 | delim = "\t", 119 | ref_group_names = 'normal', 120 | chr_exclude = NULL) 121 | } else { 122 | infercnv_obj = CreateInfercnvObject(raw_counts_matrix = raw_counts_matrix, 123 | annotations_file = annotations_file, 124 | gene_order_file = gene_order_file, 125 | delim = "\t", 126 | ref_group_names = NULL, 127 | chr_exclude = NULL) 128 | } 129 | 130 | infercnv_obj = infercnv::run(infercnv_obj, 131 | resume_mode = FALSE, 132 | out_dir = getwd(), 133 | cutoff = 0.1, 134 | cluster_by_groups = FALSE, 135 | analysis_mode = 'subclusters', 136 | tumor_subcluster_partition_method = 'qnorm', 137 | denoise = FALSE, 138 | HMM = FALSE, 139 | HMM_type = 'i6', 140 | num_threads = ncores, 141 | no_plot = TRUE, 142 | plot_steps = FALSE) 143 | save(infercnv_obj, file = 'infer_cnv.RData') 144 | obs_hcl = hclust(dist(t(infercnv_obj@expr.data[, unlist(infercnv_obj@observation_grouped_cell_indices)]), 'euclidean'), 'ward.D2') 145 | subclone = cutree(tree = as.hclust(obs_hcl), k = k_obs_groups) 146 | srt$subclone = 0 147 | srt$subclone[names(subclone)] = subclone[names(subclone)] 148 | srt$subclone = factor(srt$subclone) 149 | col_subclone = brewer_pal(palette = 'Purples')(1+nlevels(srt$subclone))[-1] 150 | names(col_subclone) = levels(srt$subclone) 151 | # Plotting heatmap with subclones 152 | annotations_file = data.frame(srt$subclone, row.names = rownames(srt@meta.data)) 153 | raw_counts_matrix = as.matrix(GetAssayData(srt, assay = 'SCT', slot = 'counts')) 154 | if (unique(srt$species) == 'Hs'){ 155 | gene_order_file = '~/Documents/CNV/Hs/gene_order.tsv' 156 | } 157 | if (unique(srt$species) == 'Mm'){ 158 | gene_order_file = '~/Documents/CNV/Mm/gene_order.tsv' 159 | } 160 | if (donormal){ 161 | infercnv_obj = CreateInfercnvObject(raw_counts_matrix = raw_counts_matrix, 162 | annotations_file = annotations_file, 163 | gene_order_file = gene_order_file, 164 | delim = "\t", 165 | ref_group_names = '0', 166 | chr_exclude = NULL) 167 | } else { 168 | infercnv_obj = CreateInfercnvObject(raw_counts_matrix = raw_counts_matrix, 169 | annotations_file = annotations_file, 170 | gene_order_file = gene_order_file, 171 | delim = "\t", 172 | ref_group_names = NULL, 173 | chr_exclude = NULL) 174 | } 175 | infercnv_obj = infercnv::run(infercnv_obj, 176 | resume_mode = FALSE, 177 | out_dir = getwd(), 178 | cutoff = 0.1, 179 | cluster_by_groups = TRUE, 180 | k_obs_groups = k_obs_groups, 181 | analysis_mode = 'subclusters', 182 | tumor_subcluster_partition_method = 'qnorm', 183 | denoise = TRUE, 184 | sd_amplifier=2, # sets midpoint for logistic 185 | noise_logistic=TRUE, # turns gradient filtering on 186 | HMM = FALSE, 187 | HMM_type = 'i6', 188 | num_threads = ncores, 189 | no_plot = FALSE, 190 | plot_steps = FALSE) 191 | infercnv_obj_medianfiltered = infercnv::apply_median_filtering(infercnv_obj) 192 | infercnv::plot_cnv(infercnv_obj_medianfiltered, 193 | output_filename='infercnv.median_filtered', 194 | x.range=c(0.9,1.1), 195 | x.center=1, 196 | title = "infercnv", 197 | color_safe_pal = FALSE) 198 | }, error = function(e){c()}) 199 | save(infercnv_obj, file = 'infer_cnv.RData') 200 | setwd('../') 201 | 202 | #### Plotting #### 203 | pdf('dimplots.all.pdf', height = 15, width = 15) 204 | sumsq = apply(infercnv_obj@expr.data, 2, function(prof){ 205 | sum(prof^2) 206 | }) 207 | srt$sumsq = sumsq 208 | top = apply(infercnv_obj@expr.data[,names(sort(sumsq, decreasing = TRUE))[1:10]], 1, mean) 209 | topcor = apply(infercnv_obj@expr.data, 2, function(prof){ 210 | cor(prof, top) 211 | }) 212 | srt$topcor = topcor 213 | FeatureScatter(srt, pt.size = 2, 'sumsq', 'topcor', group.by = 'type', col = col_type) 214 | FeatureScatter(srt, pt.size = 2, 'sumsq', 'topcor', group.by = 'subclone', col = col_subclone) 215 | FeatureScatter(srt, pt.size = 2, 'sumsq', 'topcor', group.by = 'cluster', col = col_cluster) 216 | for (reduction in c('pca','umap')){ 217 | h = DimPlot(srt, pt.size = 2, reduction = reduction, group.by = 'type', cols = col_type) 218 | print(h) 219 | h = DimPlot(srt, pt.size = 2, reduction = reduction, group.by = 'cluster', cols = col_cluster, label = TRUE) 220 | print(h) 221 | h = DimPlot(srt, pt.size = 2, reduction = reduction, group.by = 'singler', cols = col_singler) 222 | print(h) 223 | h = DimPlot(srt, pt.size = 2, reduction = reduction, group.by = 'pop', cols = col_pop) 224 | print(h) 225 | h = DimPlot(srt, pt.size = 2, reduction = reduction, group.by = 'subclone', cols = col_subclone) 226 | print(h) 227 | h = FeaturePlot(srt, pt.size = 2, reduction = reduction, features = 'sumsq') 228 | print(h) 229 | h = FeaturePlot(srt, pt.size = 2, reduction = reduction, features = 'topcor') 230 | print(h) 231 | } 232 | dev.off() 233 | 234 | #### Spliting #### 235 | srt.all = srt 236 | save(srt.all, file = 'srt.all.RData') 237 | srt.malignant = srt[, srt$type == 'malignant'] 238 | save(srt.malignant, file = 'srt.malignant.RData') 239 | if (donormal){ 240 | srt.normal = srt[, srt$type == 'normal'] 241 | save(srt.normal, file = 'srt.normal.RData') 242 | } 243 | 244 | #### Malignant #### 245 | srt = srt.malignant 246 | srt = SCTransform(srt, return.only.var.genes = FALSE) 247 | srt = RunPCA(srt, features = VariableFeatures(srt)) 248 | srt = RunUMAP(srt, dims = 1:10) 249 | ## NMF 250 | # Run NMF 251 | data = as.matrix(GetAssayData(srt, assay = 'SCT', slot = 'scale.data')) 252 | #if (unique(srt$author) == ''){ 253 | data = data[VariableFeatures(srt),] 254 | #} 255 | data[data < 0] = 0 256 | data = data[apply(data, 1, var) > 0, ] 257 | print(dim(data)) 258 | res.list = mclapply(range, function(r){ 259 | nmf(data, rank = r, nrun = 1, seed = 'ica', method = 'nsNMF') 260 | }, mc.cores = ncores) 261 | names(res.list) = range 262 | # Select rank 263 | modules.list = lapply(res.list, NMFToModules, gmin = gmin) 264 | print(sapply(modules.list,length)) 265 | comp = as.numeric(names(modules.list)) - sapply(modules.list, length) 266 | mi = min(comp) 267 | r = names(which(comp == mi)) 268 | r = r[length(r)] 269 | print(r) 270 | res = res.list[[r]] 271 | # Process output 272 | modules = NMFToModules(res, gmin = gmin) 273 | scores = basis(res) 274 | colnames(scores) = names(modules) 275 | coefs = coefficients(res) 276 | rownames(coefs) = names(modules) 277 | # Order modules 278 | h = Heatmap(coefs, clustering_distance_columns = 'euclidean') 279 | o = row_order(h) 280 | scores = scores[, o] 281 | coefs = coefs[o, ] 282 | modules = modules[o] 283 | print(modules) 284 | srt = AddMetaData(srt, t(coefs), col.name = rownames(coefs)) 285 | # Cluster NMF output 286 | h = Heatmap(coefs, clustering_distance_columns = 'euclidean') 287 | hcl = as.hclust(column_dend(h)) 288 | sig = cutree(hcl, k = length(modules)) 289 | nmf = c(by(t(coefs), INDICES = sig, FUN = function(x){names(modules)[which.max(colMeans(x))]}))[sig] 290 | srt$nmf = factor(nmf, levels = names(modules)) 291 | col_nmf = c(brewer.pal(12, 'Set3'), brewer.pal(8, 'Set1'))[1:nlevels(srt$nmf)] 292 | names(col_nmf) = levels(srt$nmf) 293 | ## Saving 294 | srt.malignant = srt 295 | save(srt.malignant, file = 'srt.malignant.RData') 296 | res.list.malignant = res.list 297 | save(res.list.malignant, file = 'res.list.malignant.RData') 298 | res.malignant = res 299 | save(res.malignant, file = 'res.malignant.RData') 300 | ## Plotting 301 | pdf('dimplots.malignant.pdf', height = 15, width = 15) 302 | for (reduction in c('pca','umap')){ 303 | h = DimPlot(srt, pt.size = 2, reduction = reduction, group.by = 'nmf', cols = col_nmf) 304 | print(h) 305 | h = DimPlot(srt, pt.size = 2, reduction = reduction, group.by = 'cluster', cols = col_cluster) 306 | print(h) 307 | h = FeaturePlot(srt, reduction = reduction, features = names(modules)) 308 | print(h) 309 | } 310 | dev.off() 311 | pdf('heatmaps.malignant.pdf', height = 20, width = 10) 312 | sink('heatmaps.malignant.txt') 313 | # NMF 314 | # Modules 315 | top_ann = HeatmapAnnotation(df = data.frame('cluster' = srt$cluster, 'singler' = srt$singler, 316 | 'nmf' = srt$nmf, 'subclone' = srt$subclone), 317 | col = list('cluster' = col_cluster, 'singler' = col_singler, 318 | 'nmf' = col_nmf, 'subclone' = col_subclone), which = 'column') 319 | h = Heatmap(coefs, name = 'coefs', top_ann = top_ann, 320 | show_row_names = TRUE, 321 | cluster_rows = FALSE, 322 | cluster_columns = FALSE, column_order = order(srt$nmf, srt$cluster, srt$singler), 323 | breaks = c(0,max(coefs)/2), colors = c('white','red')) 324 | print(h) 325 | h = Heatmap(scores[unlist(modules),], name = 'scores', 326 | show_row_names = TRUE, row_names_gp = gpar(cex = 0.5), 327 | cluster_rows = FALSE, 328 | cluster_columns = FALSE, 329 | split = factor(unlist(mapply(rep, names(modules), sapply(modules, length))), levels = names(modules))) 330 | print(h) 331 | print(modules) 332 | enrichment.matrix = BuildEnrichmentMatrix(rownames(srt), type = 'GO') 333 | go_10 = sapply(modules, function(tbl){ 334 | e = Enrichment(geneset = tbl, enrichment.matrix = enrichment.matrix) 335 | names(sort(e))[1:10] 336 | }) 337 | colnames(go_10) = names(modules) 338 | print(go_10) 339 | sink() 340 | dev.off() 341 | 342 | #### Normal #### 343 | if (donormal){ 344 | srt = srt.normal 345 | srt = SCTransform(srt, return.only.var.genes = FALSE) 346 | srt = RunPCA(srt, features = VariableFeatures(srt)) 347 | srt = RunUMAP(srt, dims = 1:10) 348 | ## Saving 349 | srt.normal = srt 350 | save(srt.normal, file = 'srt.normal.RData') 351 | } 352 | 353 | -------------------------------------------------------------------------------- /part2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | #SBATCH --job-name=Tumors_part2 # Job name 3 | #SBATCH --mail-type=END,FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL) 4 | #SBATCH --mail-user=db3562@nyulangone.org 5 | #SBATCH --nodes=1 6 | #SBATCH --ntasks=1 7 | #SBATCH --cpus-per-task=40 8 | #SBATCH --mem=200gb # Job memory request 9 | #SBATCH --time=20:00:00 # Time limit hrs:min:sec 10 | #SBATCH --output=serial_test_%j.log # Standard output and error log 11 | #SBATCH --partition cpu_short 12 | 13 | source ~/.bashrc 14 | 15 | if [ "$#" == 5 ]; then 16 | cd ~/Documents/Tumors/${1}${2}/${1}${2}${3} 17 | conda activate seurat_functions 18 | module load r/4.0.3 19 | Rscript ~/Documents/Tumors/Runs/part2.R ${4} ${5} 20 | cp Subclones/infercnv.png ./infercnv_subclones.png 21 | cp Subclones/infercnv.median_filtered.png ./infercnv_subclones_filtered.png 22 | conda deactivate 23 | 24 | elif [ "$#" == 6 ]; then 25 | cd ~/Documents/Tumors/${1}${2}${3}/${1}${2}${3}${4} 26 | conda activate seurat_functions 27 | module load r/4.0.3 28 | Rscript ~/Documents/Tumors/Runs/part2.R ${5} ${6} 29 | cp Subclones/infercnv.png ./infercnv_subclones.png 30 | cp Subclones/infercnv.median_filtered.png ./infercnv_subclones_filtered.png 31 | conda deactivate 32 | fi 33 | 34 | 35 | 36 | 37 | conda activate seurat_functions 38 | module load r/4.0.3 39 | Rscript ~/Documents/Tumors/Runs/scenic.malignant1.R 40 | conda deactivate 41 | 42 | cd Scenic.Malignant 43 | module load python/cpu/3.6.5 44 | cp 1.1_exprMatrix_filtered_t.txt 1.1_exprMatrix_filtered_t.tsv 45 | pyscenic grn -o grnboost_output.csv 1.1_exprMatrix_filtered_t.tsv 1.1_inputTFs.txt 46 | module unload python/cpu/3.6.5 47 | cd ../ 48 | 49 | conda activate seurat_functions 50 | module load r/4.0.3 51 | Rscript ~/Documents/Tumors/Runs/scenic.malignant2.R 52 | conda deactivate 53 | 54 | 55 | --------------------------------------------------------------------------------