├── Finding.R
├── MergingOurs.R
├── MergingPaired.R
├── MergingPrimary.R
├── Scoring.R
├── Visium.R
├── experiments.R
├── part1.R
├── part1.sh
├── part2.R
├── part2.sh
└── seurat_functions_public.R


/Finding.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | source('~/Documents/R/seurat_functions.R')
  3 | setwd('~/Documents/Analysis/Tumors/')
  4 | 
  5 | pdf('Finding.pdf', height = 10, width = 12)
  6 | sink('Finding.txt')
  7 | 
  8 | load('parameters.RData')
  9 | 
 10 | ## Data ##
 11 | 
 12 | files = files.primary
 13 | 
 14 | res.list = lapply(files, function(f){
 15 |   loadRData(paste0(f,'res.malignant.RData'))
 16 | })
 17 | modules.list = lapply(res.list, NMFToModules)
 18 | genes.all = sort(unique(unlist(modules.list)))
 19 | 
 20 | srt.list = lapply(files, function(f){
 21 |   loadRData(paste0(f,'srt.malignant.RData'))
 22 | })
 23 | genes.all = sort(unique(unlist(lapply(srt.list, rownames))))
 24 | names(genes.all) = genes.all
 25 | 
 26 | types = c('GO','KEGG','REACTOME','HALLMARK','MOTIF')
 27 | enrichment.matrix.list = lapply(types, function(type){
 28 |   BuildEnrichmentMatrix(genes = genes.all, type = type)
 29 | })
 30 | names(enrichment.matrix.list) = types
 31 | 
 32 | ref = read.delim("PanglaoDB_markers_27_Mar_2020.tsv.gz")
 33 | ref$germ.layer = gsub('mesoderm','Mesoderm',ref$germ.layer)
 34 | byct = lapply(split(ref$official.gene.symbol, ref$cell.type), unique)
 35 | bygl = lapply(split(ref$official.gene.symbol, ref$germ.layer), unique)
 36 | byo = lapply(split(ref$official.gene.symbol, ref$organ), unique)
 37 | enrichment.matrix.list[['Cell.type']] = BuildEnrichmentMatrix(genes = genes.all, db = byct)
 38 | enrichment.matrix.list[['Germ.layer']] = BuildEnrichmentMatrix(genes = genes.all, db = bygl)
 39 | enrichment.matrix.list[['Organ']] = BuildEnrichmentMatrix(genes = genes.all, db = byo)
 40 | types = names(enrichment.matrix.list)
 41 | 
 42 | gbm = read.csv('gbm.csv', header = TRUE, stringsAsFactors = FALSE)
 43 | gbm = as.list(gbm)
 44 | gbm = lapply(gbm, function(x){x[!x=='']})
 45 | names(gbm) = paste0('Neftel_',names(gbm))
 46 | enrichment.matrix.list[['gbm']] = BuildEnrichmentMatrix(genes = genes.all, db = gbm)
 47 | types = names(enrichment.matrix.list)
 48 | 
 49 | hn = read.csv('hn.csv', header = TRUE, stringsAsFactors = FALSE)
 50 | hn = as.list(hn)
 51 | hn = lapply(hn, function(x){x[!x=='']})
 52 | names(hn) = paste0('Puram_',names(hn))
 53 | enrichment.matrix.list[['hn']] = BuildEnrichmentMatrix(genes = genes.all, db = hn)
 54 | types = names(enrichment.matrix.list)
 55 | 
 56 | sk = read.csv('sk.csv', header = TRUE, stringsAsFactors = FALSE)
 57 | sk = as.list(sk)
 58 | sk = lapply(sk, function(x){x[!x=='']})
 59 | names(sk) = paste0('Ji_',names(sk))
 60 | enrichment.matrix.list[['sk']] = BuildEnrichmentMatrix(genes = genes.all, db = sk)
 61 | types = names(enrichment.matrix.list)
 62 | 
 63 | cancer = factor(apply(sapply(strsplit(names(res.list), ''), '[', 1:4), 2, paste0, collapse = ''))
 64 | system = factor(corr[as.character(cancer),'system'])
 65 | 
 66 | ## Modules from graph ##
 67 | 
 68 | modules.list = lapply(res.list, NMFToModules)
 69 | all = unlist(modules.list, recursive = FALSE, use.names = FALSE)
 70 | names(all) = unlist(sapply(modules.list, names))
 71 | ta = table(unlist(all))
 72 | genes.use = names(ta)[ta > 1]
 73 | 
 74 | # Filter non-overlapping modules
 75 | for (i in 1:5){
 76 |   all = unlist(modules.list, recursive = FALSE, use.names = TRUE)
 77 |   all = lapply(all, intersect, genes.all)
 78 |   sim = sapply(all, function(x){
 79 |     sapply(all, function(y){
 80 |       length(intersect(x,y))/length(union(x,y))
 81 |     })
 82 |   })
 83 |   keep = rownames(sim)[apply(sim, 1, function(x){
 84 |     sum(x > 0.05) >= 3
 85 |   })]
 86 |   all = all[keep]
 87 |   modules.list = lapply(names(modules.list), function(x){
 88 |     li = modules.list[[x]]
 89 |     li[names(li)[paste(x,names(li),sep='.') %in% keep]]
 90 |   })
 91 |   names(modules.list) = names(res.list)
 92 |   ta = table(unlist(all))
 93 |   genes.use = names(ta)[ta > 1] 
 94 |   print(length(all))
 95 | }
 96 | 
 97 | # Adjacency matrix, list by cancer
 98 | adj = matrix(0, nrow = length(genes.use), ncol = length(genes.use))
 99 | adj.list = list()
100 | for (can in levels(cancer)){
101 |   sub = matrix(0, nrow = length(genes.use), ncol = length(genes.use))
102 |   rownames(sub) = genes.use
103 |   colnames(sub) = genes.use
104 |   for (s in names(modules.list)[cancer == can]){
105 |     for (mod in modules.list[[s]]){
106 |       mod = intersect(mod, genes.use)
107 |       for (x in mod){
108 |         for (y in mod){
109 |           sub[x,y] = sub[x,y] + 1
110 |         }
111 |       }
112 |     }
113 |   }
114 |   diag(sub) = 0
115 |   adj.list[[can]] = sub
116 |   #adj = adj + (sub > 0)
117 |   adj = adj + sub
118 | }
119 | adj_keep = adj
120 | adj = adj_keep
121 | 
122 | # Remove low connections
123 | adj = adj_keep
124 | adj[] = (adj >= v_min)
125 | #adj[adj <= 1] = 0
126 | for (i in 1:5){
127 |   keep = names(which(rowSums(adj) >= s_min))
128 |   adj = adj[keep,keep]
129 |   print(dim(adj))
130 | }
131 | 
132 | # Cluster
133 | g = graph_from_adjacency_matrix(adj, diag = FALSE, mode = 'undirected', weighted = TRUE)
134 | modules = communities(cluster_infomap(g, nb.trials = 100))
135 | names(modules) = paste0('m_', sapply(modules, '[', 1))
136 | modules = modules[c('m_ACYP1','m_ABHD3','m_ACTN1',
137 |                     'm_ADM','m_ATOX1','m_FKBP5',
138 |                     'm_AQP1','m_ACTB',
139 |                     'm_AGER','m_ALDH3A1','m_AKR1B10','m_AGR2','m_AC007906.2',
140 |                     'm_AGT','m_ASCL1','m_ARL4D')]
141 | names(modules) = c('Cycle','Stress','Interferon',
142 |                    'Hypoxia','Oxphos','Metal',
143 |                    'Mesenchymal','pEMT',
144 |                    'Alveolar','Basal','Squamous','Glandular','Ciliated',
145 |                    'AC','OPC','NPC')
146 | modules
147 | save(modules, file = 'modules.RData')
148 | 
149 | colors.module = c(hue_pal()(length(modules)-3),
150 |                   'thistle3',
151 |                   'thistle2',
152 |                   'thistle1')
153 | names(colors.module) = names(modules)
154 | save(colors.module, file = 'colors.module.RData')
155 | 
156 | types.module = c(rep('Process',6), rep('Type',length(modules)-6))
157 | names(types.module) = names(modules)
158 | save(types.module, file = 'types.module.RData')
159 | 
160 | fi = data.frame(sapply(modules,'[', 1:max(sapply(modules, length))), stringsAsFactors = FALSE)
161 | fi[is.na(fi)] = ''
162 | write.table(fi, file = 'modules.csv', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE)
163 | 
164 | ## Enrichment ##
165 | 
166 | # LUAD vs LUSC
167 | 
168 | data = read.csv('SCCvsAC.csv', row.names=1)
169 | data$PValue.max = apply(data[,c('PValue.x', 'PValue.y')], 1, max)
170 | data$logFC.mean = apply(data[,c('logFC.x', 'logFC.y')], 1, mean)
171 | data$type = 'Other'
172 | for (m in names(modules)){
173 |   data[intersect(rownames(data),modules[[m]]), 'type'] = m
174 | }
175 | plot(data$logFC.mean, -log10(data$PValue.max), ylim = c(0,50), pch = 20, cex = 0.5, col = colors.module[data$type], xlab = 'LogFC', ylab = 'p-value (-log10)')
176 | abline(v = 0, lty = 2)
177 | text(data$logFC.mean[data$type %in% names(modules)], -log10(data$PValue.max)[data$type %in% names(modules)], labels = rownames(data)[data$type %in% names(modules)], cex = 1, col = colors.module[data$type[data$type %in% names(modules)]])
178 | for (m in names(modules)){
179 |   plot(data$logFC.mean, -log10(data$PValue.max), ylim = c(0,50), pch = 20, cex = 0.5, col = colors.module[data$type], xlab = 'LogFC', ylab = 'p-value (-log10)', main = m)
180 |   abline(v = 0, lty = 2)
181 |   tryCatch(expr = {
182 |     text(data$logFC.mean[data$type == m], -log10(data$PValue.max)[data$type == m], labels = rownames(data)[data$type == m], cex = 1, col = colors.module[data$type[data$type == m]])
183 |   }, error = function(e){c()})
184 | }
185 | pvals = sapply(names(modules), function(m){
186 |   a = mHG.test((data$type == m)[order(data$logFC.mean, decreasing = FALSE)])$p.value
187 |   b = mHG.test((data$type == m)[order(data$logFC.mean, decreasing = TRUE)])$p.value
188 |   return(-log10(c(a,b)))
189 | })
190 | rownames(pvals) = c('Squamous','Adenocarcinoma')
191 | h = Heatmap(pvals, name = 'mHG p-pvalue', 
192 |             row_names_gp = gpar(cex = 0.3), column_names_gp = gpar(cex = 0.3, col = colors.module),
193 |             cluster_rows = FALSE, cluster_columns = FALSE,
194 |             show_row_names = TRUE, show_column_names = TRUE,
195 |             breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Greens')(n = 9))
196 | print(h)
197 | 
198 | # Primary vs Met
199 | 
200 | data.list = lapply(c('5K','10K','30K','100K'), function(x){
201 |   data = read.csv(paste0("~/Documents/Analysis/Tumors/PrimaryMet",x,".csv"))
202 |   data = data[!duplicated(data[,1]),]
203 |   rownames(data) = data[,1]
204 |   colnames(data) = paste0(colnames(data),'.',x)
205 |   return(data)
206 | })
207 | keep = sort(Reduce(intersect, lapply(data.list, rownames)))
208 | data = Reduce(cbind, lapply(data.list, function(x){x[keep,]}))
209 | data$pvalues.max = apply(data[,grep('pvalues',colnames(data))], 1, max)
210 | data$log2fc.mean = apply(data[,grep('log2fc',colnames(data))], 1, mean)
211 | data = data[data$pvalues.max < 0.01,]
212 | data$type = 'Other'
213 | for (m in names(modules)){
214 |   data[intersect(rownames(data),modules[[m]]), 'type'] = m
215 | }
216 | plot(data$log2fc.mean, -log10(data$pvalues.max), xlim = c(-2,2), ylim = c(0,50), pch = 20, cex = 0.5, col = colors.module[data$type], xlab = 'LogFC', ylab = 'p-value (-log10)')
217 | abline(v = 0, lty = 2)
218 | text(data$log2fc.mean[data$type %in% names(modules)], -log10(data$pvalues.max)[data$type %in% names(modules)], labels = rownames(data)[data$type %in% names(modules)], cex = 1, col = colors.module[data$type[data$type %in% names(modules)]])
219 | for (m in names(modules)){
220 |   plot(data$log2fc.mean, -log10(data$pvalues.max), xlim = c(-2,2), ylim = c(0,50), pch = 20, cex = 0.5, col = colors.module[data$type], xlab = 'LogFC', ylab = 'p-value (-log10)', main = m)
221 |   abline(v = 0, lty = 2)
222 |   tryCatch(expr = {
223 |     text(data$log2fc.mean[data$type == m], -log10(data$pvalues.max)[data$type == m], labels = rownames(data)[data$type == m], cex = 1, col = colors.module[data$type[data$type == m]])
224 |   }, error = function(e){c()})
225 | }
226 | pvals = sapply(names(modules), function(m){
227 |   a = mHG.test((data$type == m)[order(data$log2fc.mean, decreasing = FALSE)])$p.value
228 |   b = mHG.test((data$type == m)[order(data$log2fc.mean, decreasing = TRUE)])$p.value
229 |   return(-log10(c(a,b)))
230 | })
231 | rownames(pvals) = c('Primary','Metastasis')
232 | h = Heatmap(pvals, name = 'mHG p-pvalue', 
233 |             row_names_gp = gpar(cex = 0.3), column_names_gp = gpar(cex = 0.3, col = colors.module),
234 |             cluster_rows = FALSE, cluster_columns = FALSE,
235 |             show_row_names = TRUE, show_column_names = TRUE,
236 |             breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Greens')(n = 9))
237 | print(h)
238 | 
239 | # Enrichment
240 | 
241 | enrichment = lapply(types, function(type){
242 |   enrichment.matrix = enrichment.matrix.list[[type]]
243 |   e = sapply(modules, function(mod){
244 |     Enrichment(mod, enrichment.matrix = enrichment.matrix)
245 |   })
246 |   e = -log10(e)
247 |   e[is.infinite(e)] = max(e[is.finite(e)])
248 |   e = e[!apply(is.na(e), 1, any), ]
249 |   rownames(e) = gsub(paste0(type,'_'), '', rownames(e))
250 |   rownames(e) = gsub('_', ' ', rownames(e))
251 |   rownames(e) = gsub('^ ', '', rownames(e))
252 |   return(e)
253 | })
254 | names(enrichment) = types
255 | 
256 | for (e in enrichment){
257 |   keep = unique(rownames(e)[apply(e, 2, function(x){order(x, decreasing = TRUE)[1:5]})])
258 |   e = e[keep, ]
259 |   h = Heatmap(e, name = 'Enrichment p-pvalue', 
260 |               row_names_gp = gpar(cex = 0.6), column_names_gp = gpar(cex = 0.6, col = colors.module),
261 |               cluster_rows = TRUE, cluster_columns = FALSE, row_dend_reorder = 1:length(keep),              
262 |               show_row_names = TRUE, show_column_names = TRUE,
263 |               breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Greens')(n = 9))
264 |   print(h)
265 | }
266 | 
267 | for (e in enrichment){
268 |   keep = unique(rownames(e)[apply(e, 2, function(x){order(x, decreasing = TRUE)[1:5]})])
269 |   e = e[keep, ]
270 |   h = Heatmap(e, name = 'Enrichment p-pvalue', 
271 |               row_names_gp = gpar(cex = 0.6), column_names_gp = gpar(cex = 0.6, col = colors.module),
272 |               cluster_rows = FALSE, cluster_columns = FALSE,         
273 |               show_row_names = TRUE, show_column_names = TRUE,
274 |               breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Greens')(n = 9))
275 |   print(h)
276 | }
277 | 
278 | for (e in enrichment){
279 |   keep = unique(rownames(e)[apply(e, 2, function(x){order(x, decreasing = TRUE)[1:2]})])
280 |   e = e[keep, ]
281 |   h = Heatmap(e, name = 'Enrichment p-pvalue', 
282 |               row_names_gp = gpar(cex = 0.6), column_names_gp = gpar(cex = 0.6, col = colors.module),
283 |               cluster_rows = FALSE, cluster_columns = FALSE,               
284 |               show_row_names = TRUE, show_column_names = TRUE,
285 |               breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Greens')(n = 9))
286 |   print(h)
287 | }
288 | 
289 | for (e in enrichment){
290 |   keep = unique(rownames(e)[apply(e, 2, function(x){order(x, decreasing = TRUE)[1]})])
291 |   e = e[keep, ]
292 |   h = Heatmap(e, name = 'Enrichment p-pvalue', 
293 |               row_names_gp = gpar(cex = 0.6), column_names_gp = gpar(cex = 0.6, col = colors.module),
294 |               cluster_rows = FALSE, cluster_columns = FALSE,               
295 |               show_row_names = TRUE, show_column_names = TRUE,
296 |               breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Greens')(n = 9))
297 |   print(h)
298 | }
299 | 
300 | ## Graph plot ##
301 | 
302 | adj = adj_keep
303 | adj = adj[unlist(modules),unlist(modules)]
304 | adj[adj <= 1] = 0
305 | g = graph_from_adjacency_matrix(adj, diag = FALSE, mode = 'undirected', weighted = TRUE)
306 | 
307 | for (coords in list(layout.kamada.kawai(g), layout.fruchterman.reingold(g))){
308 |   E(g)$width = E(g)$weight
309 |   col = colors.module[unlist(mapply(rep, 1:length(modules), sapply(modules, length)))]
310 |   names(col) = unlist(modules)
311 |   col = col[!duplicated(names(col))]
312 |   V(g)$color = col[names(V(g))]
313 |   E(g)$color = 'lightgrey'
314 |   plot(g, 
315 |        layout = coords,
316 |        vertex.label.color = V(g)$color, vertex.label.cex = 0.5,
317 |        vertex.shape = 'none', vertex.size = 0,
318 |        frame=FALSE)
319 |   lab = sapply(names(V(g)), function(x){
320 |     if (x %in% c('TOP2A','PCNA',
321 |                  'JUN', 'FOS',
322 |                  'IFIT1', 'STAT1','HLA-A','HLA-DRA',
323 |                  'HILDPA','VEGFA',
324 |                  'ATP5H','LAMTOR2',
325 |                  'MT1G','MT1E',
326 |                  'COL1A1','FN1',
327 |                  'LAMC2','VIM',
328 |                  'KRT5','KRT15',
329 |                  'KLK10','LY6D',
330 |                  'CLU','MUC5B','TFF1','CEACAM6',
331 |                  'AGER','CAV1',
332 |                  'FOXJ1','PIFO',
333 |                  'ALDOC','APOE',
334 |                  'OLIG1','OLIG2','SOX8',
335 |                  'DLX1','DLX5','SOX11'
336 |                  )){
337 |       return(x)
338 |     } else {
339 |       return('')
340 |     }
341 |   })
342 |   plot(g, 
343 |        layout = coords,
344 |        vertex.label.color = V(g)$color, vertex.label.cex = 1, vertex.label = lab,
345 |        vertex.shape = 'circle', vertex.size = 1, vertex.frame.color = NA,
346 |        frame=FALSE)
347 | }
348 | 
349 | ## Overlap plots ##
350 | 
351 | # Similarity
352 | modules.list = lapply(res.list, NMFToModules)
353 | all = unlist(modules.list, recursive = FALSE, use.names = FALSE)
354 | names(all) = unlist(sapply(modules.list, names))
355 | ta = table(unlist(all))
356 | genes.use = names(ta)[ta > 1]
357 | for (i in 1:5){
358 |   all = unlist(modules.list, recursive = FALSE, use.names = TRUE)
359 |   all = lapply(all, intersect, genes.all)
360 |   sim = sapply(all, function(x){
361 |     sapply(all, function(y){
362 |       length(intersect(x,y))/length(union(x,y))
363 |     })
364 |   })
365 |   keep = rownames(sim)[apply(sim, 1, function(x){
366 |     sum(x > 0.05) >= 3
367 |   })]
368 |   all = all[keep]
369 |   modules.list = lapply(names(modules.list), function(x){
370 |     li = modules.list[[x]]
371 |     li[names(li)[paste(x,names(li),sep='.') %in% keep]]
372 |   })
373 |   names(modules.list) = names(res.list)
374 |   ta = table(unlist(all))
375 |   genes.use = names(ta)[ta > 1] 
376 |   print(length(all))
377 | }
378 | sim = sapply(all, function(x){
379 |   sapply(modules, function(y){
380 |     pval = phyper(length(intersect(x, y)), length(x), 2*10^4 - length(x), length(y), lower.tail = FALSE)
381 |     return(-log10(pval))
382 |   })
383 | })
384 | sim[is.infinite(sim)] = max(sim[is.finite(sim)])
385 | sim = sim[,apply(sim, 2, function(x){any(x > 3)})]
386 | df = data.frame('sample' = sapply(colnames(sim), function(x){
387 |   y = sapply(strsplit(x, '.', fixed = TRUE), '[', 1)
388 | }))
389 | df$cancer = sapply(as.character(df$sample), function(x){
390 |   y = sapply(strsplit(x, ''), '[', 1:4)
391 |   paste0(y, collapse = '')
392 | })
393 | df$system = factor(corr[as.character(df$cancer),'system'])
394 | df$top = apply(sim, 2, which.max)
395 | df$top = factor(names(modules)[df$top], levels = names(modules))
396 | ta = table(df[c('top','system')])
397 | pval = sapply(colnames(ta), function(y){
398 |   sapply(rownames(ta), function(x){
399 |     phyper(ta[x,y], sum(ta[,y]), sum(ta) - sum(ta[,y]), sum(ta[x,]), lower.tail = FALSE)
400 |   })
401 | })
402 | pval = -log10(pval)
403 | pval[is.infinite(pval)] = max(pval[is.finite(pval)])
404 | pval = (pval > max(3,max(apply(pval, 1, function(x){sort(x, decreasing = TRUE)[2]}))))
405 | category = factor(apply(pval, 1, function(x){
406 |   if (any(x)){
407 |     return(names(which(x)))
408 |   } else {
409 |     return('Universal')
410 |   }
411 | }))
412 | df$category = category[df$top]
413 | top_ann = HeatmapAnnotation(df = df[, c('sample','category','top')], 
414 |                             col = list('sample' = colors, 'category' = colors.system, 'top' = colors.module), 
415 |                             which = 'column')
416 | side_ann = HeatmapAnnotation(df = df[, c('sample','category','top')], 
417 |                              col = list('sample' = colors, 'category' = colors.system, 'top' = colors.module), 
418 |                              which = 'row')
419 | h = Heatmap(name = 'Module overlap p-value', sim, 
420 |             top_annotation = top_ann,
421 |             cluster_columns = FALSE, cluster_rows = FALSE,
422 |             column_order = order(df$top),
423 |             show_row_names = TRUE, row_names_gp = gpar(col = colors.module),
424 |             breaks = seq(0, 6, length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7))
425 | print(h)
426 | decorate_heatmap_body('Module overlap p-value', {
427 |   l = c(0,cumsum(table(df$top))[1:length(table(df$top))])
428 |   for (k in 1:length(l)){
429 |     i = unit(l[k]/ncol(sim), 'npc')
430 |     j = unit(k/nrow(sim), 'npc')
431 |     grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black'))
432 |     grid.lines(c(0,1),j, gp = gpar(lwd = 1, col = 'black'))
433 |   }
434 | })
435 | # Overlap
436 | ovlp = sapply(all, function(x){
437 |   sapply(all, function(y){
438 |     pval = phyper(length(intersect(x, y)), length(x), 2*10^4 - length(x), length(y), lower.tail = FALSE)
439 |     return(-log10(pval))
440 |   })
441 | })
442 | ovlp[is.infinite(ovlp)] = max(ovlp[is.finite(ovlp)])
443 | ovlp = ovlp[colnames(sim),colnames(sim)]
444 | h = Heatmap(name = 'Module overlap p-value', ovlp, 
445 |             top_annotation = top_ann,
446 |             is.symmetric = TRUE, row_order = order(df$top),
447 |             #show_row_names = TRUE, row_names_gp = gpar(cex = 0.4),
448 |             breaks = seq(0, 6, length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7)) + side_ann
449 | print(h)
450 | # decorate_heatmap_body('Module overlap p-value', {
451 | #   l = c(0,cumsum(table(df$top))[1:length(table(df$top))])
452 | #   for (k in 1:length(l)){
453 | #     i = unit(l[k]/ncol(ovlp), 'npc')
454 | #     j = unit(1-l[k]/nrow(ovlp), 'npc')
455 | #     grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black'))
456 | #     grid.lines(c(0,1), j, gp = gpar(lwd = 1, col = 'black'))
457 | #   }
458 | # })
459 | decorate_heatmap_body('Module overlap p-value', {
460 |   l = c(0,cumsum(table(df$top))[1:length(table(df$top))])
461 |   for (k in 1:length(l)){
462 |     i = unit(l[k:(k+1)]/ncol(ovlp), 'npc')
463 |     j = unit(1-l[k:(k+1)]/nrow(ovlp), 'npc')
464 |     grid.lines(i, j[1], gp = gpar(lwd = 1, col = 'black'))
465 |     grid.lines(i, j[2], gp = gpar(lwd = 1, col = 'black'))
466 |     grid.lines(i[1], j, gp = gpar(lwd = 1, col = 'black'))
467 |     grid.lines(i[2], j, gp = gpar(lwd = 1, col = 'black'))
468 |   }
469 | })
470 | 
471 | # Schematic
472 | for (s in names(srt.list)){
473 |   mod = modules.list[[s]]
474 |   srt = srt.list[[s]]
475 |   mod = lapply(mod, sort)
476 |   labels = unlist(lapply(names(mod), function(m){
477 |     a = intersect(mod[[m]], modules[[df[paste(s, m, sep = '.'),'top']]])
478 |     if (length(a) > 5){
479 |       a = sample(a, size = 5, replace = FALSE)
480 |     }
481 |     return(a)
482 |   }))
483 |   data = GetData(srt, slot = 'scale.data')[unlist(mod), ]
484 |   h = Heatmap(name = s, data,
485 |               cluster_rows = FALSE,
486 |               cluster_columns = TRUE, show_column_dend = FALSE,
487 |               breaks = c(-3,0,3), colors = c('blue','white','red'))
488 |   col = colors.module[unlist(mapply(rep, 1:length(modules), sapply(modules, length)))]
489 |   names(col) = unlist(modules)
490 |   col = col[!duplicated(names(col))]
491 |   r = rowAnnotation(link = anno_mark(at = which(rownames(data) %in% labels), labels = labels,
492 |                                      labels_gp = gpar(cex = 1.5, col = col[labels])))
493 |   print(h+r)
494 |   decorate_heatmap_body(s, {
495 |     i = c(0,cumsum(sapply(rev(mod), length)))
496 |     x = unit(i/max(i), 'npc')
497 |     for (k in x){
498 |       grid.lines(c(0,1), c(k,k), gp = gpar(lwd = 1, col = 'black'))
499 |     }
500 |     grid.lines(c(0,0), c(0,1), gp = gpar(lwd = 1, col = 'black'))
501 |     grid.lines(c(1,1), c(0,1), gp = gpar(lwd = 1, col = 'black'))
502 |   })
503 | }
504 | 
505 | # Scenic ##
506 | 
507 | regulons.list = lapply(files, function(f){
508 |   loadRData(paste0(f,'regulons.malignant.RData'))
509 | })
510 | regulons = unlist(regulons.list, recursive = FALSE, use.names = TRUE)
511 | # Similarity
512 | sim = sapply(regulons, function(x){
513 |   sapply(modules[setdiff(names(modules), c('Stress','Cycle'))], function(y){
514 |     pval = phyper(length(intersect(x, y)), length(x), 2*10^4 - length(x), length(y), lower.tail = FALSE)
515 |     return(-log10(pval))
516 |   })
517 | })
518 | sim[is.infinite(sim)] = max(sim[is.finite(sim)])
519 | sim = sim[,apply(sim, 2, function(x){any(x > 3)})]
520 | df = data.frame('sample' = sapply(colnames(sim), function(x){
521 |   y = sapply(strsplit(x, '.', fixed = TRUE), '[', 1)
522 | }))
523 | df$tf = sapply(colnames(sim), function(x){
524 |   y = sapply(strsplit(x, '.', fixed = TRUE), '[', 2)
525 | })
526 | df$cancer = sapply(as.character(df$sample), function(x){
527 |   y = sapply(strsplit(x, ''), '[', 1:4)
528 |   paste0(y, collapse = '')
529 | })
530 | df$system = factor(corr[as.character(df$cancer),'system'])
531 | df$top = apply(sim, 2, which.max)
532 | df$top = factor(rownames(sim)[df$top], levels = rownames(sim))
533 | ta = table(df[c('top','system')])
534 | top_ann = HeatmapAnnotation(df = df[, c('sample','top')],
535 |                             col = list('sample' = colors, 'top' = colors.module),
536 |                             which = 'column')
537 | h = Heatmap(name = 'Scenic overlap p-value', sim,
538 |             top_annotation = top_ann,
539 |             cluster_columns = FALSE, cluster_rows = FALSE,
540 |             column_order = order(df$top),
541 |             show_row_names = TRUE, row_names_gp = gpar(col = colors.module),
542 |             breaks = seq(0, 6, length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7))
543 | print(h)
544 | decorate_heatmap_body('Scenic overlap p-value', {
545 |   l = c(0,cumsum(table(df$top))[1:length(table(df$top))])
546 |   for (k in 1:length(l)){
547 |     i = unit(l[k]/ncol(sim), 'npc')
548 |     j = unit(k/nrow(sim), 'npc')
549 |     grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black'))
550 |     grid.lines(c(0,1),j, gp = gpar(lwd = 1, col = 'black'))
551 |   }
552 | })
553 | print(sapply(split(df$tf, df$top), function(tf){sort(table(tf), decreasing = TRUE)}))
554 | 
555 | regulons.list = lapply(files, function(f){
556 |   loadRData(paste0(f,'regulons.malignant.RData'))
557 | })
558 | regulons = unlist(regulons.list, recursive = FALSE, use.names = TRUE)
559 | # Similarity
560 | sim = sapply(regulons, function(x){
561 |   sapply(modules, function(y){
562 |     pval = phyper(length(intersect(x, y)), length(x), 2*10^4 - length(x), length(y), lower.tail = FALSE)
563 |     return(-log10(pval))
564 |   })
565 | })
566 | sim[is.infinite(sim)] = max(sim[is.finite(sim)])
567 | sim = sim[,apply(sim, 2, function(x){any(x > 3)})]
568 | df = data.frame('sample' = sapply(colnames(sim), function(x){
569 |   y = sapply(strsplit(x, '.', fixed = TRUE), '[', 1)
570 | }))
571 | df$tf = sapply(colnames(sim), function(x){
572 |   y = sapply(strsplit(x, '.', fixed = TRUE), '[', 2)
573 | })
574 | df$cancer = sapply(as.character(df$sample), function(x){
575 |   y = sapply(strsplit(x, ''), '[', 1:4)
576 |   paste0(y, collapse = '')
577 | })
578 | df$system = factor(corr[as.character(df$cancer),'system'])
579 | df$top = apply(sim, 2, which.max)
580 | df$top = factor(rownames(sim)[df$top], levels = rownames(sim))
581 | ta = table(df[c('top','system')])
582 | top_ann = HeatmapAnnotation(df = df[, c('sample','top')],
583 |                             col = list('sample' = colors, 'top' = colors.module),
584 |                             which = 'column')
585 | h = Heatmap(name = 'Scenic overlap p-value', sim,
586 |             top_annotation = top_ann,
587 |             cluster_columns = FALSE, cluster_rows = FALSE,
588 |             column_order = order(df$top),
589 |             show_row_names = TRUE, row_names_gp = gpar(col = colors.module),
590 |             breaks = seq(0, 6, length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7))
591 | print(h)
592 | decorate_heatmap_body('Scenic overlap p-value', {
593 |   l = c(0,cumsum(table(df$top))[1:length(table(df$top))])
594 |   for (k in 1:length(l)){
595 |     i = unit(l[k]/ncol(sim), 'npc')
596 |     j = unit(k/nrow(sim), 'npc')
597 |     grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black'))
598 |     grid.lines(c(0,1),j, gp = gpar(lwd = 1, col = 'black'))
599 |   }
600 | })
601 | print(sapply(split(df$tf, df$top), function(tf){sort(table(tf), decreasing = TRUE)}))
602 | 
603 | sink()
604 | dev.off()
605 | 


--------------------------------------------------------------------------------
/MergingOurs.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | source('~/Documents/R/seurat_functions.R')
  3 | setwd('~/Documents/Analysis/Tumors/')
  4 | 
  5 | load('parameters.RData')
  6 | 
  7 | #### Ours ####
  8 | 
  9 | pdf('MergingOurs.pdf', height = 10, width = 12)
 10 | 
 11 | modules = loadRData('modules.RData')
 12 | colors.module = loadRData('colors.module.RData')
 13 | types.module = loadRData('types.module.RData')
 14 | modules = modules[!names(modules) %in% c('AC','OPC','NPC')]
 15 | colors.module = colors.module[names(modules)]
 16 | types.module = types.module[names(modules)]
 17 | 
 18 | #### Malignant ####
 19 | 
 20 | ## Srt
 21 | 
 22 | srt.list.ours = lapply(files.ours, function(f){
 23 |   loadRData(paste0(f,'srt.scored.RData'))
 24 | })
 25 | cancer = factor(apply(sapply(strsplit(names(files.ours), ''), '[', 1:4), 2, paste0, collapse = ''))
 26 | system = factor(corr[as.character(cancer),'system'])
 27 | germlayer = factor(corr[as.character(cancer),'germlayer'])
 28 | 
 29 | for (s in names(srt.list.ours)){
 30 |   srt = srt.list.ours[[s]]
 31 |   h = DimPlot(srt, group.by = 'orig.ident', cols = colors)
 32 |   print(h)
 33 |   plot.list = lapply(names(modules), function(m){
 34 |     h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m])) +
 35 |       NoAxes() + NoLegend() +
 36 |       theme(plot.title = element_text(size = 10))
 37 |   })
 38 |   print(CombinePlots(plot.list, ncol = 4))
 39 |   tryCatch(expr = {
 40 |     srt = RunTSNE(srt, dims = 1:10)
 41 |     plot.list = lapply(names(modules), function(m){
 42 |       h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m]), split.by = 'cat', reduction = 'tsne') +
 43 |         NoAxes() + NoLegend() +
 44 |         theme(plot.title = element_text(size = 10))
 45 |     })
 46 |     print(CombinePlots(plot.list, ncol = 4))
 47 |   }, error = function(e){c()})
 48 |   srt$state = apply(srt@meta.data[,names(modules)], 1, function(x){
 49 |     top = which.max(x)
 50 |     return(names(x)[top])
 51 |   })
 52 |   srt$state = factor(srt$state, levels = names(modules))
 53 |   h = DimPlot(srt, group.by = 'state', cols = colors.module)
 54 |   print(h)
 55 |   srt$num = rowSums(srt@meta.data[,names(modules)] > 0.5)
 56 |   h = FeaturePlot(srt, 'num', cols = c('grey','darkred'))
 57 |   print(h)
 58 |   srt.list.ours[[s]] = srt
 59 | }
 60 | 
 61 | srt.ours = Reduce(merge2, srt.list.ours)
 62 | srt.ours = RunPCA(srt.ours)
 63 | srt.ours = RunUMAP(srt.ours, dims = 1:10)
 64 | 
 65 | h = DimPlot(srt.ours, group.by = 'orig.ident', cols = colors, label = TRUE)
 66 | print(h)
 67 | h = DimPlot(srt.ours, group.by = 'orig.ident', cols = colors)
 68 | print(h)
 69 | plot.list = lapply(names(modules), function(m){
 70 |   h = FeaturePlot(srt.ours, features = m, pt.size = 1, cols = c('grey',colors.module[m])) +
 71 |     NoAxes() + NoLegend() +
 72 |     theme(plot.title = element_text(size = 10))
 73 | })
 74 | print(CombinePlots(plot.list, ncol = 4))
 75 | srt.ours$state = apply(srt.ours@meta.data[,names(modules)], 1, function(x){
 76 |   top = which.max(x)
 77 |   return(names(x)[top])
 78 | })
 79 | srt.ours$state = factor(srt.ours$state, levels = names(modules))
 80 | h = DimPlot(srt.ours, group.by = 'state', cols = colors.module)
 81 | print(h)
 82 | 
 83 | save(srt.ours, file = 'srt.ours.RData')
 84 | 
 85 | srt.ours = SCTransform(srt.ours, return.only.var.genes = FALSE)
 86 | srt.ours = RunPCA(srt.ours)
 87 | srt.ours = RunUMAP(srt.ours, dims = 1:10)
 88 | #srt.ours = GeneToEnrichment(srt.ours, db = modules, method = 'rand', nrand = nrand, nbin = nbin)
 89 | 
 90 | h = DimPlot(srt.ours, group.by = 'orig.ident', cols = colors, label = TRUE)
 91 | print(h)
 92 | h = DimPlot(srt.ours, group.by = 'orig.ident', cols = colors)
 93 | print(h)
 94 | plot.list = lapply(names(modules), function(m){
 95 |   h = FeaturePlot(srt.ours, features = m, pt.size = 0.5, cols = c('grey',colors.module[m])) +
 96 |     NoAxes() + NoLegend() +
 97 |     theme(plot.title = element_text(size = 10))
 98 | })
 99 | print(CombinePlots(plot.list, ncol = 4))
100 | srt.ours$state = apply(srt.ours@meta.data[,names(modules)], 1, function(x){
101 |   top = which.max(x)
102 |   return(names(x)[top])
103 | })
104 | srt.ours$state = factor(srt.ours$state, levels = names(modules))
105 | h = DimPlot(srt.ours, group.by = 'state', cols = colors.module)
106 | print(h)
107 | 
108 | ## TSNE
109 | 
110 | meta = Reduce(rbind, lapply(srt.list.ours, function(srt){
111 |   srt@meta.data[, names(modules)]
112 | }))
113 | ts = tsne(meta, max_iter = max_iter, perplexity = perplexity)
114 | 
115 | ts = data.frame(ts)
116 | colnames(ts) = c('x','y')
117 | ts$state = unlist(lapply(srt.list.ours, function(srt){srt$state}))
118 | ts$num = unlist(lapply(srt.list.ours, function(srt){srt$num}))
119 | ts$orig.ident = unlist(lapply(srt.list.ours, function(srt){srt$orig.ident}))
120 | for (m in names(modules)){
121 |   ts[,m] = as.numeric(unlist(lapply(srt.list.ours, function(srt){srt@meta.data[,m]})))
122 | }
123 | 
124 | distances = as.matrix(dist(t(dist(ts[,c('x','y')])), method = 'euclidean'))
125 | ts$entropy = sapply(1:nrow(ts), function(c){
126 |   nn = order(distances[,c])[1:20]
127 |   ta = table(ts$orig.ident[nn])
128 |   ta = ta/sum(ta)
129 |   ta = ta[!ta == 0]
130 |   return(sum(ta*log(ta)))
131 | })
132 | ts$mentropy = sapply(1:nrow(ts), function(c){
133 |   nn = order(distances[,c])[1:20]
134 |   ta = colSums(ts[nn,names(modules)] > 0.5)
135 |   ta = ta/sum(ta)
136 |   ta = ta[!ta == 0]
137 |   return(sum(ta*log(ta)))
138 | })
139 | 
140 | p = ggplot(ts, aes(x = x, y = y, fill = state, color = state)) +
141 |   geom_point() +
142 |   scale_fill_manual(values = colors.module) +
143 |   scale_color_manual(values = colors.module) +
144 |   theme_classic() +
145 |   theme(
146 |     panel.border = element_rect(fill = NA),
147 |     legend.position = "none",
148 |     plot.title = element_text(hjust = 0.5),
149 |     axis.line.x = element_blank(),
150 |     axis.line.y = element_blank(),
151 |     axis.ticks.x = element_blank(),
152 |     axis.ticks.y = element_blank(),
153 |     axis.text.x = element_blank(),
154 |     axis.text.y = element_blank(),
155 |     axis.title.x = element_blank(),
156 |     axis.title.y = element_blank())
157 | print(p)
158 | p = ggplot(ts, aes(x = x, y = y, fill = orig.ident, color = orig.ident)) +
159 |   geom_point() +
160 |   scale_fill_manual(values = colors) +
161 |   scale_color_manual(values = colors) +
162 |   theme_classic() +
163 |   theme(
164 |     panel.border = element_rect(fill = NA),
165 |     legend.position = "none",
166 |     plot.title = element_text(hjust = 0.5),
167 |     axis.line.x = element_blank(),
168 |     axis.line.y = element_blank(),
169 |     axis.ticks.x = element_blank(),
170 |     axis.ticks.y = element_blank(),
171 |     axis.text.x = element_blank(),
172 |     axis.text.y = element_blank(),
173 |     axis.title.x = element_blank(),
174 |     axis.title.y = element_blank())
175 | print(p)
176 | p = ggplot(ts, aes_string(x = 'x', y = 'y', color = 'entropy')) +
177 |   geom_point() +
178 |   scale_color_gradientn(colors = c("lightgrey", "slateblue4")) +
179 |   ggtitle('entropy') +
180 |   theme_classic() +
181 |   theme(
182 |     panel.border = element_rect(fill = NA),
183 |     legend.position = "none",
184 |     plot.title = element_text(hjust = 0.5),
185 |     axis.line.x = element_blank(),
186 |     axis.line.y = element_blank(),
187 |     axis.ticks.x = element_blank(),
188 |     axis.ticks.y = element_blank(),
189 |     axis.text.x = element_blank(),
190 |     axis.text.y = element_blank(),
191 |     axis.title.x = element_blank(),
192 |     axis.title.y = element_blank())
193 | print(p)
194 | p = ggplot(ts, aes_string(x = 'x', y = 'y', color = 'num')) +
195 |   geom_point() +
196 |   scale_color_gradientn(colors = c("lightgrey", "darkred")) +
197 |   ggtitle('entropy') +
198 |   theme_classic() +
199 |   theme(
200 |     panel.border = element_rect(fill = NA),
201 |     legend.position = "none",
202 |     plot.title = element_text(hjust = 0.5),
203 |     axis.line.x = element_blank(),
204 |     axis.line.y = element_blank(),
205 |     axis.ticks.x = element_blank(),
206 |     axis.ticks.y = element_blank(),
207 |     axis.text.x = element_blank(),
208 |     axis.text.y = element_blank(),
209 |     axis.title.x = element_blank(),
210 |     axis.title.y = element_blank())
211 | print(p)
212 | p = ggplot(ts, aes(x = x, y = y, fill = orig.ident, color = orig.ident)) +
213 |   geom_point() +
214 |   scale_fill_manual(values = colors) +
215 |   scale_color_manual(values = colors) +
216 |   facet_wrap(~ orig.ident, ncol = 5) +
217 |   theme_classic() +
218 |   theme(
219 |     panel.border = element_rect(fill = NA),
220 |     legend.position = "none",
221 |     plot.title = element_text(hjust = 0.5),
222 |     axis.line.x = element_blank(),
223 |     axis.line.y = element_blank(),
224 |     axis.ticks.x = element_blank(),
225 |     axis.ticks.y = element_blank(),
226 |     axis.text.x = element_blank(),
227 |     axis.text.y = element_blank(),
228 |     axis.title.x = element_blank(),
229 |     axis.title.y = element_blank())
230 | print(p)
231 | p = ggplot(ts, aes(x = x, y = y, fill = state, color = state)) +
232 |   geom_point() +
233 |   scale_fill_manual(values = colors.module) +
234 |   scale_color_manual(values = colors.module) +
235 |   facet_wrap(~ orig.ident, ncol = 6) +
236 |   theme_classic() +
237 |   theme(
238 |     panel.border = element_rect(fill = NA),
239 |     legend.position = "none",
240 |     plot.title = element_text(hjust = 0.5),
241 |     axis.line.x = element_blank(),
242 |     axis.line.y = element_blank(),
243 |     axis.ticks.x = element_blank(),
244 |     axis.ticks.y = element_blank(),
245 |     axis.text.x = element_blank(),
246 |     axis.text.y = element_blank(),
247 |     axis.title.x = element_blank(),
248 |     axis.title.y = element_blank())
249 | print(p)
250 | plot.list = lapply(names(modules), function(m){
251 |   p = ggplot(ts, aes_string(x = 'x', y = 'y', color = m)) +
252 |     geom_point(size = 0.25) +
253 |     scale_color_gradientn(colors = c("lightgrey", colors.module[m])) +
254 |     ggtitle(m) +
255 |     theme_classic() +
256 |     theme(
257 |       panel.border = element_rect(fill = NA),
258 |       legend.position = "none",
259 |       plot.title = element_text(hjust = 0.5),
260 |       axis.line.x = element_blank(),
261 |       axis.line.y = element_blank(),
262 |       axis.ticks.x = element_blank(),
263 |       axis.ticks.y = element_blank(),
264 |       axis.text.x = element_blank(),
265 |       axis.text.y = element_blank(),
266 |       axis.title.x = element_blank(),
267 |       axis.title.y = element_blank())
268 | })
269 | print(CombinePlots(plot.list, ncol = 4))
270 | 
271 | ## Stats
272 | 
273 | stats.list.ours = lapply(files.ours, function(f){
274 |   stats = loadRData(paste0(f,'stats.RData'))
275 |   stats = stats[,names(modules)]
276 |   return(stats)
277 | })
278 | 
279 | for (s in names(stats.list.ours)){
280 |   stats = stats.list.ours[[s]]
281 |   srt = srt.ours[, srt.ours$orig.ident == s]
282 |   scores = srt@meta.data[,names(modules)]
283 |   fraction = colMeans(scores > 0.5, na.rm = TRUE)
284 |   data = GetData(srt, slot = 'scale.data')
285 |   average = sapply(modules, function(mod){
286 |     mean(data[intersect(mod,rownames(data)),])
287 |   })
288 |   num = mean(srt$num, na.rm = TRUE)
289 |   stats = rbind(stats, fraction, average, num)
290 |   stats.list.ours[[s]] = stats
291 | }
292 | 
293 | stats.ours = melt(lapply(stats.list.ours, function(stats){
294 |   melt(stats)
295 | }))
296 | names(stats.ours) = c('measure','module','variable','value','sample')
297 | stats.ours$cancer = factor(apply(sapply(strsplit(stats.ours$sample, ''), '[', 1:4), 2, paste0, collapse = ''))
298 | stats.ours$system = factor(corr[as.character(stats.ours$cancer),'system'])
299 | stats.ours$germlayer = factor(corr[as.character(stats.ours$cancer),'germlayer'])
300 | save(stats.ours, file = 'stats.ours.RData')
301 | 
302 | for (measure in levels(stats.ours$measure)){
303 |   
304 |   mat = stats.ours[stats.ours$measure == measure, c('value','module','sample')]
305 |   mat = reshape(mat, timevar = 'sample', idvar = 'module', direction = 'wide')
306 |   rownames(mat) = mat[,1]
307 |   mat = mat[,-1]
308 |   colnames(mat) = gsub('value.', '', colnames(mat), fixed = TRUE)
309 |   
310 |   df = data.frame('subsample' = colnames(mat), stringsAsFactors = FALSE)
311 |   df$sample = sapply(strsplit(df$subsample, split = '.', fixed = TRUE), '[', 1)
312 |   df$cancer = sapply(as.character(df$sample), function(x){
313 |     y = sapply(strsplit(x, ''), '[', 1:4)
314 |     paste0(y, collapse = '')
315 |   })
316 |   df$system = factor(corr[as.character(df$cancer),'system'])
317 |   df$germlayer = factor(corr[as.character(df$cancer),'germlayer'])
318 |   top_ann = HeatmapAnnotation(df = df[,c('sample','system','germlayer')],
319 |                               col = list('sample' = colors, 'system' = colors.system, 'germlayer' = colors.germlayer),
320 |                               which = 'column')
321 |   h = Heatmap(mat, name = measure,
322 |               top_annotation = top_ann,
323 |               cluster_rows = FALSE, cluster_columns = FALSE,
324 |               show_row_names = TRUE, show_column_names = TRUE,
325 |               row_names_gp = gpar(col = colors.module[rownames(mat)]), column_names_gp = gpar(col = colors[colnames(mat)]),
326 |               breaks = seq(quantile(unlist(mat), seq(1:10)/10, na.rm = TRUE)['10%'], quantile(unlist(mat), seq(1:10)/10, na.rm = TRUE)['90%'], length = 11), colors = viridis_pal(begin = 0, end = 1, option = 'magma')(11))
327 |   print(h)
328 |   decorate_heatmap_body(measure, {
329 |     #l = c(0,cumsum(rle(as.character(cancer))$lengths))
330 |     l = 0:ncol(mat)
331 |     for (k in 1:length(l)){
332 |       i = unit(l[k]/max(l), 'npc')
333 |       grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black'))
334 |     }
335 |     l = 0:nrow(mat)
336 |     for (k in 1:length(l)){
337 |       i = unit(l[k]/max(l), 'npc')
338 |       grid.lines(c(0,1), i, gp = gpar(lwd = 1, col = 'black'))
339 |     }
340 |   })
341 |   
342 | }
343 | 
344 | ## Co-occurence
345 | 
346 | par(mfrow = c(2,2))
347 | coocc.list = list()
348 | occ.list = list()
349 | for (s in names(srt.list.ours)){
350 |   
351 |   srt = srt.list.ours[[s]]
352 |   stats = stats.list.ours[[s]]
353 |   meta = srt@meta.data[, names(modules)]
354 |   
355 |   occ = stats['frequency',]
356 |   
357 |   coocc = sapply(names(modules), function(m1){
358 |     sapply(names(modules), function(m2){
359 |       pval = phyper(
360 |         sum((meta[, m1] > 0.5) * (meta[, m2] > 0.5)), 
361 |         sum(meta[, m2] > 0.5), 
362 |         dim(meta)[1] - sum(meta[, m2] > 0.5),
363 |         sum(meta[, m1] > 0.5), 
364 |         lower.tail = FALSE)
365 |       if (pval <= 0.5){
366 |         return(-log10(pval))
367 |       }
368 |       if (pval > 0.5){
369 |         return(log10(1-pval))
370 |       }
371 |     })
372 |   })
373 |   
374 |   coocc[is.infinite(coocc) & coocc > 0] = max(coocc[is.finite(coocc)])
375 |   coocc[is.infinite(coocc) & coocc < 0] = min(coocc[is.finite(coocc)])
376 |   
377 |   occ.list[[s]] = occ
378 |   coocc.list[[s]] = coocc
379 |   
380 |   keep = names(which(stats['presence',] > 0.5 & stats['frequency',] > 0.05))
381 |   occ = occ[keep]
382 |   coocc = coocc[keep, keep]
383 |   coocc[ coocc > -0.5 & coocc < 0.5] = 0
384 |   coocc[coocc > 10] = 10
385 |   coocc[coocc < -10] = -10
386 |   
387 |   tryCatch(expr = {
388 |     g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected')
389 |     V(g)$type = (types.module[names(V(g))] == 'Type')
390 |     coords = layout.circle(g)
391 |     E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight)
392 |     E(g)$width = 5*abs(E(g)$weight)/max(E(g)$weight)
393 |     V(g)$color = colors.module[names(V(g))]
394 |     V(g)$size = 50*occ/max(occ)
395 |     # Plot graph
396 |     plot(g, main = s,
397 |          layout = coords,
398 |          vertex.shape = 'circle', 
399 |          vertex.label.size = 1, vertex.label.color = 'black',
400 |          frame=FALSE)
401 |   }, error = function(e){c()})
402 | }
403 | 
404 | for (can in levels(cancer)){
405 |   coocc = SummarizeMatrices(coocc.list[cancer == can], median)
406 |   occ = sapply(names(modules), function(m){median(sapply(occ.list[cancer == can], '[', m))})
407 |   keep = (names(which(occ >= 0.05)))
408 |   occ = occ[keep]
409 |   coocc = coocc[keep, keep]
410 |   coocc[ coocc > -0.5 & coocc < 0.5] = 0
411 |   coocc[coocc > 10] = 10
412 |   coocc[coocc < -10] = -10
413 |   tryCatch(expr = {
414 |     g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected')
415 |     V(g)$type = (types.module[names(V(g))] == 'Type')
416 |     coords = layout.circle(g)
417 |     E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight)
418 |     E(g)$width = abs(E(g)$weight)/5
419 |     V(g)$color = colors.module[names(V(g))]
420 |     V(g)$size = occ*200
421 |     # Plot graph
422 |     plot(g, main = can,
423 |          layout = coords,
424 |          vertex.shape = 'circle', 
425 |          vertex.label.size = 1, vertex.label.color = 'black',
426 |          frame=FALSE)
427 |   }, error = function(e){c()})
428 | }
429 | 
430 | for (sys in levels(system)){
431 |   coocc = SummarizeMatrices(coocc.list[system == sys], median)
432 |   occ = sapply(names(modules), function(m){median(sapply(occ.list[system == sys], '[', m))})
433 |   keep = (names(which(occ >= 0.05)))
434 |   occ = occ[keep]
435 |   coocc = coocc[keep, keep]
436 |   coocc[ coocc > -0.5 & coocc < 0.5] = 0
437 |   coocc[coocc > 10] = 10
438 |   coocc[coocc < -10] = -10
439 |   tryCatch(expr = {
440 |     g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected')
441 |     V(g)$type = (types.module[names(V(g))] == 'Type')
442 |     coords = layout.circle(g)
443 |     E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight)
444 |     E(g)$width = abs(E(g)$weight)/5
445 |     V(g)$color = colors.module[names(V(g))]
446 |     V(g)$size = occ*200
447 |     # Plot graph
448 |     plot(g, main = sys,
449 |          layout = coords,
450 |          vertex.shape = 'circle', 
451 |          vertex.label.size = 1, vertex.label.color = 'black',
452 |          frame=FALSE)
453 |   }, error = function(e){c()})
454 | }
455 | 
456 | coocc = SummarizeMatrices(coocc.list, median)
457 | occ = sapply(names(modules), function(m){median(sapply(occ.list, '[', m))})
458 | keep = (names(which(occ >= 0.05)))
459 | occ = occ[keep]
460 | coocc = coocc[keep, keep]
461 | coocc[coocc > -0.5 & coocc < 0.5] = 0
462 | coocc[coocc > 10] = 10
463 | coocc[coocc < -10] = -10
464 | tryCatch(expr = {
465 |   g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected')
466 |   V(g)$type = (types.module[names(V(g))] == 'Type')
467 |   coords = layout.circle(g)
468 |   E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight)
469 |   E(g)$width = abs(E(g)$weight)/5
470 |   V(g)$color = colors.module[names(V(g))]
471 |   V(g)$size = occ*200
472 |   # Plot graph
473 |   plot(g, main = 'All',
474 |        layout = coords,
475 |        vertex.shape = 'circle', 
476 |        vertex.label.size = 1, vertex.label.color = 'black',
477 |        frame=FALSE)
478 | }, error = function(e){c()})
479 | 
480 | par(mfrow = c(1,1))
481 | 
482 | 
483 | #### All ####
484 | 
485 | srt.list.ours.all = lapply(files.ours, function(f){
486 |   loadRData(paste0(f,'srt.all.RData'))
487 | })
488 | 
489 | srt.ours.all = Reduce(merge2, srt.list.ours.all)
490 | srt.ours.all = RunPCA(srt.ours.all)
491 | srt.ours.all = RunUMAP(srt.ours.all, dims = 1:10)
492 | save(srt.ours.all, file = 'srt.ours.all.RData')
493 | 
494 | colors.pop = hue_pal()(nlevels(srt.ours.all$pop))
495 | names(colors.pop) = levels(srt.ours.all$pop)
496 | colors.pop['Malignant'] = 'black'
497 | save(colors.pop, file = 'colors.pop.ours.RData')
498 | 
499 | o = sample(1:ncol(srt.ours.all), size = ncol(srt.ours.all), replace = FALSE)
500 | h = DimPlot(srt.ours.all, group.by = 'orig.ident', cols = colors, order = o, pt.size = 1, label = TRUE)
501 | print(h)
502 | h = DimPlot(srt.ours.all, group.by = 'orig.ident', cols = colors, order = o, pt.size = 1)
503 | print(h)
504 | h = DimPlot(srt.ours.all, group.by = 'type', cols = colors.type, order = o, pt.size = 1, label = TRUE)
505 | print(h)
506 | h = DimPlot(srt.ours.all, group.by = 'type', cols = colors.type, order = o, pt.size = 1)
507 | print(h)
508 | h = DimPlot(srt.ours.all, group.by = 'pop', cols = colors.pop, order = o, pt.size = 1, label = TRUE)
509 | print(h)
510 | h = DimPlot(srt.ours.all, group.by = 'pop', cols = colors.pop, order = o, pt.size = 1, label = FALSE)
511 | print(h)
512 | 
513 | srt.ours.tme = srt.ours.all[, !srt.ours.all$pop == 'Malignant']
514 | srt.ours.tme = RunPCA(srt.ours.tme)
515 | srt.ours.tme = RunUMAP(srt.ours.tme, dims = 1:10)
516 | save(srt.ours.tme, file = 'srt.ours.tme.RData')
517 | 
518 | # Frequency regression
519 | 
520 | freq = t(sapply(levels(srt.ours$orig.ident), function(or){
521 |   colMeans(srt.ours@meta.data[srt.ours$orig.ident == or, names(modules)] > 0.5, na.rm = TRUE)
522 | }))
523 | tme = table(srt.ours.tme$orig.ident, srt.ours.tme$pop)
524 | exclude = c('Malignant','Embryonic_stem_cells','iPS_cells','Tissue_stem_cells','Smooth_muscle_cells','Chondrocytes','Epithelial_cells','Hepatocytes','Keratinocytes','Astrocyte','Neurons','Neuroepithelial_cell',
525 |             'CMP','Erythroblast','GMP','Gametocytes','GMP','HSC_CD34+','Pre-B_cell_CD34-','Pro-B_cell_CD34+','Pro-Myelocyte')
526 | tme = tme[, setdiff(colnames(tme), exclude)]
527 | tme = tme/rowSums(tme)
528 | 
529 | # Multiple regression
530 | test = sapply(names(modules), function(m){
531 |   fit = summary(lm(freq[,m] ~ tme))$coefficients
532 |   pval = -log10(fit[,4])
533 |   pval[pval < -log10(pval_thresh)] = 0
534 |   names(pval) = gsub('tme','',names(pval))
535 |   est = fit[,1]
536 |   names(est) = gsub('tme','',names(est))
537 |   val = pval*sign(est)
538 |   return(val)
539 | })
540 | h = Heatmap(test, 
541 |             show_row_names = TRUE, show_column_names = TRUE,
542 |             cluster_rows = FALSE, cluster_columns = FALSE,
543 |             breaks = seq(-3, 3, length = 11), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 11))
544 | print(h)
545 | 
546 | # Plot hits
547 | par(mfrow = c(4,4))
548 | for (ct in colnames(tme)){
549 |   for (m in names(modules)){
550 |     fit = summary(lm(freq[,m] ~ tme[,ct]))$coefficients
551 |     pval = -log10(fit[2,4])
552 |     plot(freq[,m] ~ tme[,ct], col = colors[rownames(tme)], pch = 20, xlab = ct, ylab = m, main = round(pval, digits = 2))
553 |     if (pval > 0.5){
554 |       abline(a = fit[1,1], b = fit[2,1], lty = 2)
555 |     }
556 |   }
557 | }
558 | par(mfrow = c(1,1))
559 | 
560 | dev.off()
561 | 
562 | 


--------------------------------------------------------------------------------
/MergingPaired.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | source('~/Documents/R/seurat_functions.R')
  3 | setwd('~/Documents/Analysis/Tumors/')
  4 | 
  5 | load('parameters.RData')
  6 | 
  7 | #### Paired ####
  8 | 
  9 | pdf('MergingPaired.pdf', height = 10, width = 20)
 10 | 
 11 | modules = loadRData('modules.RData')
 12 | colors.module = loadRData('colors.module.RData')
 13 | types.module = loadRData('types.module.RData')
 14 | modules = modules[!names(modules) %in% c('AC','OPC','NPC')]
 15 | colors.module = colors.module[names(modules)]
 16 | types.module = types.module[names(modules)]
 17 | 
 18 | #### Malignant ####
 19 | 
 20 | ## Srt
 21 | 
 22 | srt.list.paired = lapply(files.paired.list, function(f){
 23 |   loadRData(paste0(f[['normal']],'Paired/srt.scored.RData'))
 24 | })
 25 | cancer = factor(apply(sapply(strsplit(names(files.paired.list), ''), '[', 1:4), 2, paste0, collapse = ''))
 26 | system = factor(corr[as.character(cancer),'system'])
 27 | germlayer = factor(corr[as.character(cancer),'germlayer'])
 28 | 
 29 | for (s in names(srt.list.paired)){
 30 |   srt = srt.list.paired[[s]]
 31 |   h = DimPlot(srt, group.by = 'orig.ident', cols = colors)
 32 |   print(h)
 33 |   h = DimPlot(srt, group.by = 'cat', cols = colors.cat)
 34 |   print(h)
 35 |   plot.list = lapply(names(modules), function(m){
 36 |     h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m]), split.by = 'cat') +
 37 |       NoAxes() + NoLegend() +
 38 |       theme(plot.title = element_text(size = 10))
 39 |   })
 40 |   print(CombinePlots(plot.list, ncol = 4))
 41 |   tryCatch(expr = {
 42 |     srt = RunTSNE(srt, dims = 1:10)
 43 |     plot.list = lapply(names(modules), function(m){
 44 |     h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m]), split.by = 'cat', reduction = 'tsne') +
 45 |       NoAxes() + NoLegend() +
 46 |       theme(plot.title = element_text(size = 10))
 47 |   })
 48 |   print(CombinePlots(plot.list, ncol = 4))
 49 |   }, error = function(e){c()})
 50 |   srt$state = apply(srt@meta.data[,names(modules)], 1, function(x){
 51 |     top = which.max(x)
 52 |     return(names(x)[top])
 53 |   })
 54 |   srt$state = factor(srt$state, levels = names(modules))
 55 |   srt$num = rowSums(srt@meta.data[,names(modules)] > 0.5)
 56 |   h = FeaturePlot(srt, 'num', cols = c('grey','darkred'))
 57 |   print(h)
 58 |   h = VlnPlot(srt, 'num', group.by = 'cat', cols = colors.cat, pt.size = 0)
 59 |   print(h)
 60 |   h = DimPlot(srt, group.by = 'state', cols = colors.module)
 61 |   print(h)
 62 |   srt.list.paired[[s]] = srt
 63 | }
 64 | 
 65 | srt.paired = Reduce(merge2, srt.list.paired)
 66 | srt.paired = RunPCA(srt.paired)
 67 | srt.paired = RunUMAP(srt.paired, dims = 1:10)
 68 | 
 69 | h = DimPlot(srt.paired, group.by = 'orig.ident', cols = colors, label = TRUE)
 70 | print(h)
 71 | h = DimPlot(srt.paired, group.by = 'orig.ident', cols = colors)
 72 | print(h)
 73 | plot.list = lapply(names(modules), function(m){
 74 |   h = FeaturePlot(srt.paired, features = m, pt.size = 0.5, cols = c('grey',colors.module[m])) +
 75 |     NoAxes() + NoLegend() +
 76 |     theme(plot.title = element_text(size = 10))
 77 | })
 78 | print(CombinePlots(plot.list, ncol = 4))
 79 | srt.paired$state = apply(srt.paired@meta.data[,names(modules)], 1, function(x){
 80 |   top = which.max(x)
 81 |   return(names(x)[top])
 82 | })
 83 | srt.paired$state = factor(srt.paired$state, levels = names(modules))
 84 | h = DimPlot(srt.paired, group.by = 'state', cols = colors.module)
 85 | print(h)
 86 | h = FeaturePlot(srt.paired, 'num', cols = c('grey','darkred'))
 87 | print(h)
 88 | h = DimPlot(srt.paired, group.by = 'cat', cols = colors.cat)
 89 | print(h)
 90 | 
 91 | save(srt.paired, file = 'srt.paired.RData')
 92 | 
 93 | srt.paired = SCTransform(srt.paired, return.only.var.genes = FALSE)
 94 | srt.paired = RunPCA(srt.paired)
 95 | srt.paired = RunUMAP(srt.paired, dims = 1:10)
 96 | srt.paired = GeneToEnrichment(srt.paired, db = modules, method = 'rand', nrand = nrand, nbin = nbin)
 97 | 
 98 | h = DimPlot(srt.paired, group.by = 'orig.ident', cols = colors, label = TRUE)
 99 | print(h)
100 | h = DimPlot(srt.paired, group.by = 'orig.ident', cols = colors)
101 | print(h)
102 | h = DimPlot(srt.paired, group.by = 'cat', cols = colors.cat)
103 | print(h)
104 | plot.list = lapply(names(modules), function(m){
105 |   h = FeaturePlot(srt.paired, features = m, pt.size = 1, cols = c('grey',colors.module[m])) +
106 |     NoAxes() + NoLegend() +
107 |     theme(plot.title = element_text(size = 10))
108 | })
109 | print(CombinePlots(plot.list, ncol = 4))
110 | srt.paired$state = apply(srt.paired@meta.data[,names(modules)], 1, function(x){
111 |   top = which.max(x)
112 |   return(names(x)[top])
113 | })
114 | srt.paired$state = factor(srt.paired$state, levels = names(modules))
115 | h = DimPlot(srt.paired, group.by = 'state', cols = colors.module)
116 | print(h)
117 | 
118 | # ## TSNE
119 | # 
120 | # meta = Reduce(rbind, lapply(srt.list.paired, function(srt){
121 | #   srt@meta.data[, names(modules)]
122 | # }))
123 | # ts = tsne(meta, max_iter = max_iter, perplexity = perplexity)
124 | # 
125 | # ts = data.frame(ts)
126 | # colnames(ts) = c('x','y')
127 | # ts$state = unlist(lapply(srt.list.paired, function(srt){srt$state}))
128 | # ts$num = unlist(lapply(srt.list.paired, function(srt){srt$num}))
129 | # ts$orig.ident = unlist(lapply(srt.list.paired, function(srt){srt$orig.ident}))
130 | # for (m in names(modules)){
131 | #   ts[,m] = as.numeric(unlist(lapply(srt.list.paired, function(srt){srt@meta.data[,m]})))
132 | # }
133 | # 
134 | # distances = as.matrix(dist(t(dist(ts[,c('x','y')])), method = 'euclidean'))
135 | # ts$entropy = sapply(1:nrow(ts), function(c){
136 | #   nn = order(distances[,c])[1:20]
137 | #   ta = table(ts$orig.ident[nn])
138 | #   ta = ta/sum(ta)
139 | #   ta = ta[!ta == 0]
140 | #   return(sum(ta*log(ta)))
141 | # })
142 | # ts$mentropy = sapply(1:nrow(ts), function(c){
143 | #   nn = order(distances[,c])[1:20]
144 | #   ta = colSums(ts[nn,names(modules)] > 0.5)
145 | #   ta = ta/sum(ta)
146 | #   ta = ta[!ta == 0]
147 | #   return(sum(ta*log(ta)))
148 | # })
149 | # 
150 | # p = ggplot(ts, aes(x = x, y = y, fill = state, color = state)) +
151 | #   geom_point() +
152 | #   scale_fill_manual(values = colors.module) +
153 | #   scale_color_manual(values = colors.module) +
154 | #   theme_classic() +
155 | #   theme(
156 | #     panel.border = element_rect(fill = NA),
157 | #     legend.position = "none",
158 | #     plot.title = element_text(hjust = 0.5),
159 | #     axis.line.x = element_blank(),
160 | #     axis.line.y = element_blank(),
161 | #     axis.ticks.x = element_blank(),
162 | #     axis.ticks.y = element_blank(),
163 | #     axis.text.x = element_blank(),
164 | #     axis.text.y = element_blank(),
165 | #     axis.title.x = element_blank(),
166 | #     axis.title.y = element_blank())
167 | # print(p)
168 | # p = ggplot(ts, aes(x = x, y = y, fill = orig.ident, color = orig.ident)) +
169 | #   geom_point() +
170 | #   scale_fill_manual(values = colors) +
171 | #   scale_color_manual(values = colors) +
172 | #   theme_classic() +
173 | #   theme(
174 | #     panel.border = element_rect(fill = NA),
175 | #     legend.position = "none",
176 | #     plot.title = element_text(hjust = 0.5),
177 | #     axis.line.x = element_blank(),
178 | #     axis.line.y = element_blank(),
179 | #     axis.ticks.x = element_blank(),
180 | #     axis.ticks.y = element_blank(),
181 | #     axis.text.x = element_blank(),
182 | #     axis.text.y = element_blank(),
183 | #     axis.title.x = element_blank(),
184 | #     axis.title.y = element_blank())
185 | # print(p)
186 | # p = ggplot(ts, aes_string(x = 'x', y = 'y', color = 'entropy')) +
187 | #   geom_point() +
188 | #   scale_color_gradientn(colors = c("lightgrey", "slateblue4")) +
189 | #   ggtitle('entropy') +
190 | #   theme_classic() +
191 | #   theme(
192 | #     panel.border = element_rect(fill = NA),
193 | #     legend.position = "none",
194 | #     plot.title = element_text(hjust = 0.5),
195 | #     axis.line.x = element_blank(),
196 | #     axis.line.y = element_blank(),
197 | #     axis.ticks.x = element_blank(),
198 | #     axis.ticks.y = element_blank(),
199 | #     axis.text.x = element_blank(),
200 | #     axis.text.y = element_blank(),
201 | #     axis.title.x = element_blank(),
202 | #     axis.title.y = element_blank())
203 | # print(p)
204 | # p = ggplot(ts, aes_string(x = 'x', y = 'y', color = 'num')) +
205 | #   geom_point() +
206 | #   scale_color_gradientn(colors = c("lightgrey", "darkred")) +
207 | #   ggtitle('entropy') +
208 | #   theme_classic() +
209 | #   theme(
210 | #     panel.border = element_rect(fill = NA),
211 | #     legend.position = "none",
212 | #     plot.title = element_text(hjust = 0.5),
213 | #     axis.line.x = element_blank(),
214 | #     axis.line.y = element_blank(),
215 | #     axis.ticks.x = element_blank(),
216 | #     axis.ticks.y = element_blank(),
217 | #     axis.text.x = element_blank(),
218 | #     axis.text.y = element_blank(),
219 | #     axis.title.x = element_blank(),
220 | #     axis.title.y = element_blank())
221 | # print(p)
222 | # p = ggplot(ts, aes(x = x, y = y, fill = orig.ident, color = orig.ident)) +
223 | #   geom_point() +
224 | #   scale_fill_manual(values = colors) +
225 | #   scale_color_manual(values = colors) +
226 | #   facet_wrap(~ orig.ident, ncol = 6) +
227 | #   theme_classic() +
228 | #   theme(
229 | #     panel.border = element_rect(fill = NA),
230 | #     legend.position = "none",
231 | #     plot.title = element_text(hjust = 0.5),
232 | #     axis.line.x = element_blank(),
233 | #     axis.line.y = element_blank(),
234 | #     axis.ticks.x = element_blank(),
235 | #     axis.ticks.y = element_blank(),
236 | #     axis.text.x = element_blank(),
237 | #     axis.text.y = element_blank(),
238 | #     axis.title.x = element_blank(),
239 | #     axis.title.y = element_blank())
240 | # print(p)
241 | # p = ggplot(ts, aes(x = x, y = y, fill = state, color = state)) +
242 | #   geom_point() +
243 | #   scale_fill_manual(values = colors.module) +
244 | #   scale_color_manual(values = colors.module) +
245 | #   facet_wrap(~ orig.ident, ncol = 6) +
246 | #   theme_classic() +
247 | #   theme(
248 | #     panel.border = element_rect(fill = NA),
249 | #     legend.position = "none",
250 | #     plot.title = element_text(hjust = 0.5),
251 | #     axis.line.x = element_blank(),
252 | #     axis.line.y = element_blank(),
253 | #     axis.ticks.x = element_blank(),
254 | #     axis.ticks.y = element_blank(),
255 | #     axis.text.x = element_blank(),
256 | #     axis.text.y = element_blank(),
257 | #     axis.title.x = element_blank(),
258 | #     axis.title.y = element_blank())
259 | # print(p)
260 | # plot.list = lapply(names(modules), function(m){
261 | #   p = ggplot(ts, aes_string(x = 'x', y = 'y', color = m)) +
262 | #     geom_point(size = 0.25) +
263 | #     scale_color_gradientn(colors = c("lightgrey", colors.module[m])) +
264 | #     ggtitle(m) +
265 | #     theme_classic() +
266 | #     theme(
267 | #       panel.border = element_rect(fill = NA),
268 | #       legend.position = "none",
269 | #       plot.title = element_text(hjust = 0.5),
270 | #       axis.line.x = element_blank(),
271 | #       axis.line.y = element_blank(),
272 | #       axis.ticks.x = element_blank(),
273 | #       axis.ticks.y = element_blank(),
274 | #       axis.text.x = element_blank(),
275 | #       axis.text.y = element_blank(),
276 | #       axis.title.x = element_blank(),
277 | #       axis.title.y = element_blank())
278 | # })
279 | # print(CombinePlots(plot.list, ncol = 4))
280 | 
281 | ## Stats
282 | 
283 | stats.list.paired = lapply(files.paired.list, function(f){
284 |   stats = loadRData(paste0(f[['normal']],'Paired/stats.RData'))
285 |   stats = stats[,names(modules)]
286 |   return(stats)
287 | })
288 | 
289 | for (s in names(stats.list.paired)){
290 |   stats = stats.list.paired[[s]]
291 |   srt = srt.paired[, srt.paired$orig.ident == s]
292 |   scores = srt@meta.data[,names(modules)]
293 |   fraction.normal = colMeans(scores[srt$cat == 'normal',] > 0.5, na.rm = TRUE)
294 |   fraction.tumor = colMeans(scores[srt$cat == 'tumor',] > 0.5, na.rm = TRUE)
295 |   data = GetData(srt, slot = 'scale.data')
296 |   average.normal = sapply(modules, function(mod){
297 |     mean(data[intersect(mod,rownames(data)),srt$cat == 'normal'])
298 |   })
299 |   average.tumor = sapply(modules, function(mod){
300 |     mean(data[intersect(mod,rownames(data)),srt$cat == 'tumor'])
301 |   })
302 |   num.normal = mean(srt@meta.data[srt$cat == 'normal','num'], na.rm = TRUE)
303 |   num.tumor = mean(srt@meta.data[srt$cat == 'tumor','num'], na.rm = TRUE)
304 |   stats = rbind(stats, fraction.normal, fraction.tumor, average.normal, average.tumor, num.normal, num.tumor)
305 |   stats.list.paired[[s]] = stats
306 | }
307 | 
308 | print(stats.list.paired$PDACHs1[c('num.normal','num.tumor'),1])
309 | print(stats.list.paired$PDACHs2[c('num.normal','num.tumor'),1])
310 | print(stats.list.paired$PDACHs3[c('num.normal','num.tumor'),1])
311 | 
312 | stats.paired = melt(lapply(stats.list.paired, function(stats){
313 |   melt(stats)
314 | }))
315 | names(stats.paired) = c('measure','module','variable','value','sample')
316 | stats.paired$cat = factor(sapply(strsplit(as.character(stats.paired$measure), '.', fixed = TRUE), '[', 2), levels = names(colors.cat))
317 | stats.paired$measure = factor(sapply(strsplit(as.character(stats.paired$measure), '.', fixed = TRUE), '[', 1))
318 | stats.paired$cancer = factor(apply(sapply(strsplit(stats.paired$sample, ''), '[', 1:4), 2, paste0, collapse = ''))
319 | stats.paired$system = factor(corr[as.character(stats.paired$cancer),'system'])
320 | stats.paired$germlayer = factor(corr[as.character(stats.paired$cancer),'germlayer'])
321 | stats.paired$sample.cat = paste(stats.paired$sample, stats.paired$cat, sep = '.')
322 | save(stats.paired, file = 'stats.paired.RData')
323 | 
324 | for (measure in levels(stats.paired$measure)){
325 |   
326 |   mat = stats.paired[stats.paired$measure == measure, c('value','module','sample.cat')]
327 |   mat = reshape(mat, timevar = 'sample.cat', idvar = 'module', direction = 'wide')
328 |   rownames(mat) = mat[,1]
329 |   mat = mat[,-1]
330 |   colnames(mat) = gsub('value.', '', colnames(mat), fixed = TRUE)
331 |   
332 |   df = data.frame('subsample' = colnames(mat), stringsAsFactors = FALSE)
333 |   df$sample = sapply(strsplit(df$subsample, split = '.', fixed = TRUE), '[', 1)
334 |   df$cat = sapply(strsplit(df$subsample, split = '.', fixed = TRUE), '[', 2)
335 |   df$cancer = sapply(as.character(df$sample), function(x){
336 |     y = sapply(strsplit(x, ''), '[', 1:4)
337 |     paste0(y, collapse = '')
338 |   })
339 |   df$system = factor(corr[as.character(df$cancer),'system'])
340 |   df$germlayer = factor(corr[as.character(df$cancer),'germlayer'])
341 |   top_ann = HeatmapAnnotation(df = df[,c('sample','cat')],
342 |                               col = list('sample' = colors, 'cat' = colors.cat),
343 |                               which = 'column')
344 |   h = Heatmap(mat, name = measure,
345 |               top_annotation = top_ann,
346 |               cluster_rows = FALSE, cluster_columns = FALSE,
347 |               show_row_names = TRUE, show_column_names = TRUE,
348 |               row_names_gp = gpar(col = colors.module[rownames(mat)]), column_names_gp = gpar(col = colors[colnames(mat)]),
349 |               breaks = seq(quantile(unlist(mat), seq(1:10)/10, na.rm = TRUE)['10%'], quantile(unlist(mat), seq(1:10)/10, na.rm = TRUE)['90%'], length = 11), colors = viridis_pal(begin = 0, end = 1, option = 'magma')(11))
350 |   print(h)
351 |   decorate_heatmap_body(measure, {
352 |     #l = c(0,cumsum(rle(as.character(cancer))$lengths))
353 |     l = 0:ncol(mat)
354 |     for (k in 1:length(l)){
355 |       i = unit(l[k]/max(l), 'npc')
356 |       grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black'))
357 |     }
358 |     l = 0:nrow(mat)
359 |     for (k in 1:length(l)){
360 |       i = unit(l[k]/max(l), 'npc')
361 |       grid.lines(c(0,1), i, gp = gpar(lwd = 1, col = 'black'))
362 |     }
363 |   })
364 |   
365 |   p.list = lapply(c('Squamous','Glandular','pEMT','Interferon'), function(m){
366 |     sub = stats.paired[stats.paired$measure == measure & stats.paired$module == m,]
367 |     sub = sub[order(sub$cat), ]
368 |     p = ggpaired(sub, x = 'cat', y = 'value',
369 |                color = 'cat', line.color = 'grey', line.size = 0.4, facet.by = 'cancer') +
370 |       stat_compare_means(paired = TRUE) +
371 |       scale_color_manual(values = colors.cat) +
372 |       ggtitle(m) +
373 |       theme_classic() +
374 |       theme(
375 |         plot.title = element_text(hjust = 0.5),
376 |         axis.text.x = element_text(size = 10),
377 |         axis.title.x = element_text(size = 0),
378 |         axis.title.y = element_text(size = 0),
379 |         legend.position = 'none')
380 |   })
381 |   print(CombinePlots(p.list), ncol = 4)
382 |   
383 |   p.list = lapply(c('Cycle','Stress','Hypoxia',
384 |                     'Oxphos','Metal','Mesenchymal',
385 |                     'Alveolar','Basal','Ciliated'), function(m){
386 |     sub = stats.paired[stats.paired$measure == measure & stats.paired$module == m,]
387 |     sub = sub[order(sub$cat), ]
388 |     p = ggpaired(sub, x = 'cat', y = 'value',
389 |                  color = 'cat', line.color = 'grey', line.size = 0.4, facet.by = 'cancer') +
390 |       stat_compare_means(paired = TRUE) +
391 |       scale_color_manual(values = colors.cat) +
392 |       ggtitle(m) +
393 |       theme_classic() +
394 |       theme(
395 |         plot.title = element_text(hjust = 0.5),
396 |         axis.text.x = element_text(size = 10),
397 |         axis.title.x = element_text(size = 0),
398 |         axis.title.y = element_text(size = 0),
399 |         legend.position = 'none')
400 |   })
401 |   print(CombinePlots(p.list), ncol = 3)
402 |   
403 | }
404 | 
405 | 
406 | # #### All ####
407 | # 
408 | # srt.list.paired.all = lapply(files.paired, function(f){
409 | #   loadRData(paste0(f,'srt.all.RData'))
410 | # })
411 | # 
412 | # srt.paired.all = Reduce(merge2, srt.list.paired.all)
413 | # srt.paired.all = RunPCA(srt.paired.all)
414 | # srt.paired.all = RunUMAP(srt.paired.all, dims = 1:10)
415 | # save(srt.paired.all, file = 'srt.paired.all.RData')
416 | # 
417 | # colors.pop = hue_pal()(nlevels(srt.paired.all$pop))
418 | # names(colors.pop) = levels(srt.paired.all$pop)
419 | # colors.pop['Malignant'] = 'black'
420 | # save(colors.pop, file = 'colors.pop.paired.RData')
421 | # 
422 | # o = sample(1:ncol(srt.paired.all), size = ncol(srt.paired.all), replace = FALSE)
423 | # h = DimPlot(srt.paired.all, group.by = 'orig.ident', cols = colors, order = o, pt.size = 1, label = TRUE)
424 | # print(h)
425 | # h = DimPlot(srt.paired.all, group.by = 'orig.ident', cols = colors, order = o, pt.size = 1)
426 | # print(h)
427 | # h = DimPlot(srt.paired.all, group.by = 'type', cols = colors.type, order = o, pt.size = 1, label = TRUE)
428 | # print(h)
429 | # h = DimPlot(srt.paired.all, group.by = 'type', cols = colors.type, order = o, pt.size = 1)
430 | # print(h)
431 | # h = DimPlot(srt.paired.all, group.by = 'pop', cols = colors.pop, order = o, pt.size = 1, label = TRUE)
432 | # print(h)
433 | # h = DimPlot(srt.paired.all, group.by = 'pop', cols = colors.pop, order = o, pt.size = 1, label = FALSE)
434 | # print(h)
435 | # 
436 | # srt.paired.tme = srt.paired.all[, !srt.paired.all$pop == 'Malignant']
437 | # srt.paired.tme = RunPCA(srt.paired.tme)
438 | # srt.paired.tme = RunUMAP(srt.paired.tme, dims = 1:10)
439 | # save(srt.paired.tme, file = 'srt.paired.tme.RData')
440 | 
441 | 
442 | dev.off()


--------------------------------------------------------------------------------
/MergingPrimary.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | source('~/Documents/R/seurat_functions.R')
  3 | setwd('~/Documents/Analysis/Tumors/')
  4 | 
  5 | load('parameters.RData')
  6 | 
  7 | #### Primary ####
  8 | 
  9 | pdf('MergingPrimary.pdf', height = 10, width = 12)
 10 | 
 11 | modules = loadRData('modules.RData')
 12 | colors.module = loadRData('colors.module.RData')
 13 | types.module = loadRData('types.module.RData')
 14 | 
 15 | #### Malignant ####
 16 | 
 17 | ## Srt
 18 | 
 19 | srt.list.primary = lapply(files.primary, function(f){
 20 |   loadRData(paste0(f,'srt.scored.RData'))
 21 | })
 22 | cancer = factor(apply(sapply(strsplit(names(files.primary), ''), '[', 1:4), 2, paste0, collapse = ''))
 23 | system = factor(corr[as.character(cancer),'system'])
 24 | germlayer = factor(corr[as.character(cancer),'germlayer'])
 25 | 
 26 | for (s in names(srt.list.primary)){
 27 |   srt = srt.list.primary[[s]]
 28 |   h = DimPlot(srt, group.by = 'orig.ident', cols = colors)
 29 |   print(h)
 30 |   plot.list = lapply(names(modules), function(m){
 31 |     h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m])) +
 32 |       NoAxes() + NoLegend() +
 33 |       theme(plot.title = element_text(size = 10))
 34 |   })
 35 |   print(CombinePlots(plot.list, ncol = 4))
 36 |   tryCatch(expr = {
 37 |     srt = RunTSNE(srt, dims = 1:10)
 38 |     plot.list = lapply(names(modules), function(m){
 39 |       h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m]), split.by = 'cat', reduction = 'tsne') +
 40 |         NoAxes() + NoLegend() +
 41 |         theme(plot.title = element_text(size = 10))
 42 |     })
 43 |     print(CombinePlots(plot.list, ncol = 4))
 44 |   }, error = function(e){c()})
 45 |   srt$state = apply(srt@meta.data[,names(modules)], 1, function(x){
 46 |     top = which.max(x)
 47 |     return(names(x)[top])
 48 |   })
 49 |   srt$state = factor(srt$state, levels = names(modules))
 50 |   h = DimPlot(srt, group.by = 'state', cols = colors.module)
 51 |   print(h)
 52 |   srt$num = rowSums(srt@meta.data[,names(modules)] > 0.5)
 53 |   h = FeaturePlot(srt, 'num', cols = c('grey','darkred'))
 54 |   print(h)
 55 |   srt.list.primary[[s]] = srt
 56 | }
 57 | 
 58 | srt.primary = Reduce(merge2, srt.list.primary)
 59 | srt.primary = RunPCA(srt.primary)
 60 | srt.primary = RunUMAP(srt.primary, dims = 1:10)
 61 | 
 62 | h = DimPlot(srt.primary, group.by = 'orig.ident', cols = colors, label = TRUE)
 63 | print(h)
 64 | h = DimPlot(srt.primary, group.by = 'orig.ident', cols = colors)
 65 | print(h)
 66 | plot.list = lapply(names(modules), function(m){
 67 |   h = FeaturePlot(srt.primary, features = m, pt.size = 0.5, cols = c('grey',colors.module[m])) +
 68 |     NoAxes() + NoLegend() +
 69 |     theme(plot.title = element_text(size = 10))
 70 | })
 71 | print(CombinePlots(plot.list, ncol = 4))
 72 | srt.primary$state = apply(srt.primary@meta.data[,names(modules)], 1, function(x){
 73 |   top = which.max(x)
 74 |   return(names(x)[top])
 75 | })
 76 | srt.primary$state = factor(srt.primary$state, levels = names(modules))
 77 | h = DimPlot(srt.primary, group.by = 'state', cols = colors.module)
 78 | print(h)
 79 | 
 80 | save(srt.primary, file = 'srt.primary.RData')
 81 | 
 82 | srt.primary = SCTransform(srt.primary, return.only.var.genes = FALSE)
 83 | srt.primary = RunPCA(srt.primary)
 84 | srt.primary = RunUMAP(srt.primary, dims = 1:10)
 85 | #srt.primary = GeneToEnrichment(srt.primary, db = modules, method = 'rand', nrand = nrand, nbin = nbin)
 86 | 
 87 | h = DimPlot(srt.primary, group.by = 'orig.ident', cols = colors, label = TRUE)
 88 | print(h)
 89 | h = DimPlot(srt.primary, group.by = 'orig.ident', cols = colors)
 90 | print(h)
 91 | plot.list = lapply(names(modules), function(m){
 92 |   h = FeaturePlot(srt.primary, features = m, pt.size = 1, cols = c('grey',colors.module[m])) +
 93 |     NoAxes() + NoLegend() +
 94 |     theme(plot.title = element_text(size = 10))
 95 | })
 96 | print(CombinePlots(plot.list, ncol = 4))
 97 | srt.primary$state = apply(srt.primary@meta.data[,names(modules)], 1, function(x){
 98 |   top = which.max(x)
 99 |   return(names(x)[top])
100 | })
101 | srt.primary$state = factor(srt.primary$state, levels = names(modules))
102 | h = DimPlot(srt.primary, group.by = 'state', cols = colors.module)
103 | print(h)
104 | 
105 | # ## TSNE
106 | # 
107 | # meta = Reduce(rbind, lapply(srt.list.primary, function(srt){
108 | #   srt@meta.data[, names(modules)]
109 | # }))
110 | # ts = tsne(meta, max_iter = max_iter, perplexity = perplexity)
111 | # 
112 | # ts = data.frame(ts)
113 | # colnames(ts) = c('x','y')
114 | # ts$state = unlist(lapply(srt.list.primary, function(srt){srt$state}))
115 | # ts$num = unlist(lapply(srt.list.primary, function(srt){srt$num}))
116 | # ts$orig.ident = unlist(lapply(srt.list.primary, function(srt){srt$orig.ident}))
117 | # for (m in names(modules)){
118 | #   ts[,m] = as.numeric(unlist(lapply(srt.list.primary, function(srt){srt@meta.data[,m]})))
119 | # }
120 | # 
121 | # distances = as.matrix(dist(t(dist(ts[,c('x','y')])), method = 'euclidean'))
122 | # ts$entropy = sapply(1:nrow(ts), function(c){
123 | #   nn = order(distances[,c])[1:20]
124 | #   ta = table(ts$orig.ident[nn])
125 | #   ta = ta/sum(ta)
126 | #   ta = ta[!ta == 0]
127 | #   return(sum(ta*log(ta)))
128 | # })
129 | # ts$mentropy = sapply(1:nrow(ts), function(c){
130 | #   nn = order(distances[,c])[1:20]
131 | #   ta = colSums(ts[nn,names(modules)] > 0.5)
132 | #   ta = ta/sum(ta)
133 | #   ta = ta[!ta == 0]
134 | #   return(sum(ta*log(ta)))
135 | # })
136 | # 
137 | # p = ggplot(ts, aes(x = x, y = y, fill = state, color = state)) +
138 | #   geom_point() +
139 | #   scale_fill_manual(values = colors.module) +
140 | #   scale_color_manual(values = colors.module) +
141 | #   theme_classic() +
142 | #   theme(
143 | #     panel.border = element_rect(fill = NA),
144 | #     legend.position = "none",
145 | #     plot.title = element_text(hjust = 0.5),
146 | #     axis.line.x = element_blank(),
147 | #     axis.line.y = element_blank(),
148 | #     axis.ticks.x = element_blank(),
149 | #     axis.ticks.y = element_blank(),
150 | #     axis.text.x = element_blank(),
151 | #     axis.text.y = element_blank(),
152 | #     axis.title.x = element_blank(),
153 | #     axis.title.y = element_blank())
154 | # print(p)
155 | # p = ggplot(ts, aes(x = x, y = y, fill = orig.ident, color = orig.ident)) +
156 | #   geom_point() +
157 | #   scale_fill_manual(values = colors) +
158 | #   scale_color_manual(values = colors) +
159 | #   theme_classic() +
160 | #   theme(
161 | #     panel.border = element_rect(fill = NA),
162 | #     legend.position = "none",
163 | #     plot.title = element_text(hjust = 0.5),
164 | #     axis.line.x = element_blank(),
165 | #     axis.line.y = element_blank(),
166 | #     axis.ticks.x = element_blank(),
167 | #     axis.ticks.y = element_blank(),
168 | #     axis.text.x = element_blank(),
169 | #     axis.text.y = element_blank(),
170 | #     axis.title.x = element_blank(),
171 | #     axis.title.y = element_blank())
172 | # print(p)
173 | # p = ggplot(ts, aes_string(x = 'x', y = 'y', color = 'entropy')) +
174 | #   geom_point() +
175 | #   scale_color_gradientn(colors = c("lightgrey", "slateblue4")) +
176 | #   ggtitle('entropy') +
177 | #   theme_classic() +
178 | #   theme(
179 | #     panel.border = element_rect(fill = NA),
180 | #     legend.position = "none",
181 | #     plot.title = element_text(hjust = 0.5),
182 | #     axis.line.x = element_blank(),
183 | #     axis.line.y = element_blank(),
184 | #     axis.ticks.x = element_blank(),
185 | #     axis.ticks.y = element_blank(),
186 | #     axis.text.x = element_blank(),
187 | #     axis.text.y = element_blank(),
188 | #     axis.title.x = element_blank(),
189 | #     axis.title.y = element_blank())
190 | # print(p)
191 | # p = ggplot(ts, aes_string(x = 'x', y = 'y', color = 'num')) +
192 | #   geom_point() +
193 | #   scale_color_gradientn(colors = c("lightgrey", "darkred")) +
194 | #   ggtitle('entropy') +
195 | #   theme_classic() +
196 | #   theme(
197 | #     panel.border = element_rect(fill = NA),
198 | #     legend.position = "none",
199 | #     plot.title = element_text(hjust = 0.5),
200 | #     axis.line.x = element_blank(),
201 | #     axis.line.y = element_blank(),
202 | #     axis.ticks.x = element_blank(),
203 | #     axis.ticks.y = element_blank(),
204 | #     axis.text.x = element_blank(),
205 | #     axis.text.y = element_blank(),
206 | #     axis.title.x = element_blank(),
207 | #     axis.title.y = element_blank())
208 | # print(p)
209 | # p = ggplot(ts, aes(x = x, y = y, fill = orig.ident, color = orig.ident)) +
210 | #   geom_point() +
211 | #   scale_fill_manual(values = colors) +
212 | #   scale_color_manual(values = colors) +
213 | #   facet_wrap(~ orig.ident, ncol = 6) +
214 | #   theme_classic() +
215 | #   theme(
216 | #     panel.border = element_rect(fill = NA),
217 | #     legend.position = "none",
218 | #     plot.title = element_text(hjust = 0.5),
219 | #     axis.line.x = element_blank(),
220 | #     axis.line.y = element_blank(),
221 | #     axis.ticks.x = element_blank(),
222 | #     axis.ticks.y = element_blank(),
223 | #     axis.text.x = element_blank(),
224 | #     axis.text.y = element_blank(),
225 | #     axis.title.x = element_blank(),
226 | #     axis.title.y = element_blank())
227 | # print(p)
228 | # p = ggplot(ts, aes(x = x, y = y, fill = state, color = state)) +
229 | #   geom_point() +
230 | #   scale_fill_manual(values = colors.module) +
231 | #   scale_color_manual(values = colors.module) +
232 | #   facet_wrap(~ orig.ident, ncol = 6) +
233 | #   theme_classic() +
234 | #   theme(
235 | #     panel.border = element_rect(fill = NA),
236 | #     legend.position = "none",
237 | #     plot.title = element_text(hjust = 0.5),
238 | #     axis.line.x = element_blank(),
239 | #     axis.line.y = element_blank(),
240 | #     axis.ticks.x = element_blank(),
241 | #     axis.ticks.y = element_blank(),
242 | #     axis.text.x = element_blank(),
243 | #     axis.text.y = element_blank(),
244 | #     axis.title.x = element_blank(),
245 | #     axis.title.y = element_blank())
246 | # print(p)
247 | # plot.list = lapply(names(modules), function(m){
248 | #   p = ggplot(ts, aes_string(x = 'x', y = 'y', color = m)) +
249 | #     geom_point(size = 0.25) +
250 | #     scale_color_gradientn(colors = c("lightgrey", colors.module[m])) +
251 | #     ggtitle(m) +
252 | #     theme_classic() +
253 | #     theme(
254 | #       panel.border = element_rect(fill = NA),
255 | #       legend.position = "none",
256 | #       plot.title = element_text(hjust = 0.5),
257 | #       axis.line.x = element_blank(),
258 | #       axis.line.y = element_blank(),
259 | #       axis.ticks.x = element_blank(),
260 | #       axis.ticks.y = element_blank(),
261 | #       axis.text.x = element_blank(),
262 | #       axis.text.y = element_blank(),
263 | #       axis.title.x = element_blank(),
264 | #       axis.title.y = element_blank())
265 | # })
266 | # print(CombinePlots(plot.list, ncol = 4))
267 | 
268 | ## Stats
269 | 
270 | stats.list.primary = lapply(files.primary, function(f){
271 |   stats = loadRData(paste0(f,'stats.RData'))
272 |   stats = stats[,names(modules)]
273 |   return(stats)
274 | })
275 | 
276 | for (s in names(stats.list.primary)){
277 |   stats = stats.list.primary[[s]]
278 |   srt = srt.primary[, srt.primary$orig.ident == s]
279 |   scores = srt@meta.data[,names(modules)]
280 |   fraction = colMeans(scores > 0.5, na.rm = TRUE)
281 |   data = GetData(srt, slot = 'scale.data')
282 |   average = sapply(modules, function(mod){
283 |     mean(data[intersect(mod,rownames(data)),])
284 |   })
285 |   num = mean(srt$num, na.rm = TRUE)
286 |   stats = rbind(stats, fraction, average, num)
287 |   stats.list.primary[[s]] = stats
288 | }
289 | 
290 | stats.primary = melt(lapply(stats.list.primary, function(stats){
291 |   melt(stats)
292 | }))
293 | names(stats.primary) = c('measure','module','variable','value','sample')
294 | stats.primary$cancer = factor(apply(sapply(strsplit(stats.primary$sample, ''), '[', 1:4), 2, paste0, collapse = ''))
295 | stats.primary$system = factor(corr[as.character(stats.primary$cancer),'system'])
296 | stats.primary$germlayer = factor(corr[as.character(stats.primary$cancer),'germlayer'])
297 | save(stats.primary, file = 'stats.primary.RData')
298 | 
299 | for (measure in levels(stats.primary$measure)){
300 |   
301 |   mat = stats.primary[stats.primary$measure == measure, c('value','module','sample')]
302 |   mat = reshape(mat, timevar = 'sample', idvar = 'module', direction = 'wide')
303 |   rownames(mat) = mat[,1]
304 |   mat = mat[,-1]
305 |   colnames(mat) = gsub('value.', '', colnames(mat), fixed = TRUE)
306 |   
307 |   df = data.frame('subsample' = colnames(mat), stringsAsFactors = FALSE)
308 |   df$sample = sapply(strsplit(df$subsample, split = '.', fixed = TRUE), '[', 1)
309 |   df$cancer = sapply(as.character(df$sample), function(x){
310 |     y = sapply(strsplit(x, ''), '[', 1:4)
311 |     paste0(y, collapse = '')
312 |   })
313 |   df$system = factor(corr[as.character(df$cancer),'system'])
314 |   df$germlayer = factor(corr[as.character(df$cancer),'germlayer'])
315 |   top_ann = HeatmapAnnotation(df = df[,c('sample','system','germlayer')],
316 |                               col = list('sample' = colors, 'system' = colors.system, 'germlayer' = colors.germlayer),
317 |                               which = 'column')
318 |   h = Heatmap(mat, name = measure,
319 |               top_annotation = top_ann,
320 |               cluster_rows = FALSE, cluster_columns = FALSE,
321 |               show_row_names = TRUE, show_column_names = TRUE,
322 |               row_names_gp = gpar(col = colors.module[rownames(mat)]), column_names_gp = gpar(col = colors[colnames(mat)]),
323 |               breaks = seq(quantile(unlist(mat), seq(1:10)/10, na.rm = TRUE)['10%'], quantile(unlist(mat), seq(1:10)/10, na.rm = TRUE)['90%'], length = 11), colors = viridis_pal(begin = 0, end = 1, option = 'magma')(11))
324 |   print(h)
325 |   decorate_heatmap_body(measure, {
326 |     #l = c(0,cumsum(rle(as.character(cancer))$lengths))
327 |     l = 0:ncol(mat)
328 |     for (k in 1:length(l)){
329 |       i = unit(l[k]/max(l), 'npc')
330 |       grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black'))
331 |     }
332 |     l = 0:nrow(mat)
333 |     for (k in 1:length(l)){
334 |       i = unit(l[k]/max(l), 'npc')
335 |       grid.lines(c(0,1), i, gp = gpar(lwd = 1, col = 'black'))
336 |     }
337 |   })
338 |   
339 | }
340 | 
341 | ## Co-occurence
342 | 
343 | par(mfrow = c(2,2))
344 | coocc.list = list()
345 | occ.list = list()
346 | for (s in names(srt.list.primary)){
347 |   
348 |   srt = srt.list.primary[[s]]
349 |   stats = stats.list.primary[[s]]
350 |   meta = srt@meta.data[, names(modules)]
351 |   
352 |   occ = stats['frequency',]
353 |   
354 |   coocc = sapply(names(modules), function(m1){
355 |     sapply(names(modules), function(m2){
356 |       pval = phyper(
357 |         sum((meta[, m1] > 0.5) * (meta[, m2] > 0.5)), 
358 |         sum(meta[, m2] > 0.5), 
359 |         dim(meta)[1] - sum(meta[, m2] > 0.5),
360 |         sum(meta[, m1] > 0.5), 
361 |         lower.tail = FALSE)
362 |       if (pval <= 0.5){
363 |         return(-log10(pval))
364 |       }
365 |       if (pval > 0.5){
366 |         return(log10(1-pval))
367 |       }
368 |     })
369 |   })
370 |   
371 |   coocc[is.infinite(coocc) & coocc > 0] = max(coocc[is.finite(coocc)])
372 |   coocc[is.infinite(coocc) & coocc < 0] = min(coocc[is.finite(coocc)])
373 |   
374 |   occ.list[[s]] = occ
375 |   coocc.list[[s]] = coocc
376 |   
377 |   keep = names(which(stats['presence',] > 0.5 & stats['frequency',] > 0.05))
378 |   occ = occ[keep]
379 |   coocc = coocc[keep, keep]
380 |   coocc[ coocc > -0.5 & coocc < 0.5] = 0
381 |   coocc[coocc > 10] = 10
382 |   coocc[coocc < -10] = -10
383 |   
384 |   tryCatch(expr = {
385 |     g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected')
386 |     V(g)$type = (types.module[names(V(g))] == 'Type')
387 |     coords = layout.circle(g)
388 |     E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight)
389 |     E(g)$width = 5*abs(E(g)$weight)/max(E(g)$weight)
390 |     V(g)$color = colors.module[names(V(g))]
391 |     V(g)$size = 50*occ/max(occ)
392 |     # Plot graph
393 |     plot(g, main = s,
394 |          layout = coords,
395 |          vertex.shape = 'circle', 
396 |          vertex.label.size = 1, vertex.label.color = 'black',
397 |          frame=FALSE)
398 |   }, error = function(e){c()})
399 | }
400 | 
401 | for (can in levels(cancer)){
402 |   coocc = SummarizeMatrices(coocc.list[cancer == can], median)
403 |   occ = sapply(names(modules), function(m){median(sapply(occ.list[cancer == can], '[', m))})
404 |   keep = (names(which(occ >= 0.05)))
405 |   occ = occ[keep]
406 |   coocc = coocc[keep, keep]
407 |   coocc[ coocc > -0.5 & coocc < 0.5] = 0
408 |   coocc[coocc > 10] = 10
409 |   coocc[coocc < -10] = -10
410 |   tryCatch(expr = {
411 |     g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected')
412 |     V(g)$type = (types.module[names(V(g))] == 'Type')
413 |     coords = layout.circle(g)
414 |     E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight)
415 |     E(g)$width = abs(E(g)$weight)/5
416 |     V(g)$color = colors.module[names(V(g))]
417 |     V(g)$size = occ*200
418 |     # Plot graph
419 |     plot(g, main = can,
420 |          layout = coords,
421 |          vertex.shape = 'circle', 
422 |          vertex.label.size = 1, vertex.label.color = 'black',
423 |          frame=FALSE)
424 |   }, error = function(e){c()})
425 | }
426 | 
427 | for (sys in levels(system)){
428 |   coocc = SummarizeMatrices(coocc.list[system == sys], median)
429 |   occ = sapply(names(modules), function(m){median(sapply(occ.list[system == sys], '[', m))})
430 |   keep = (names(which(occ >= 0.05)))
431 |   occ = occ[keep]
432 |   coocc = coocc[keep, keep]
433 |   coocc[ coocc > -0.5 & coocc < 0.5] = 0
434 |   coocc[coocc > 10] = 10
435 |   coocc[coocc < -10] = -10
436 |   tryCatch(expr = {
437 |     g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected')
438 |     V(g)$type = (types.module[names(V(g))] == 'Type')
439 |     coords = layout.circle(g)
440 |     E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight)
441 |     E(g)$width = abs(E(g)$weight)/5
442 |     V(g)$color = colors.module[names(V(g))]
443 |     V(g)$size = occ*200
444 |     # Plot graph
445 |     plot(g, main = sys,
446 |          layout = coords,
447 |          vertex.shape = 'circle', 
448 |          vertex.label.size = 1, vertex.label.color = 'black',
449 |          frame=FALSE)
450 |   }, error = function(e){c()})
451 | }
452 | 
453 | coocc = SummarizeMatrices(coocc.list, median)
454 | occ = sapply(names(modules), function(m){median(sapply(occ.list, '[', m))})
455 | keep = (names(which(occ >= 0.05)))
456 | occ = occ[keep]
457 | coocc = coocc[keep, keep]
458 | coocc[coocc > -0.5 & coocc < 0.5] = 0
459 | coocc[coocc > 10] = 10
460 | coocc[coocc < -10] = -10
461 | tryCatch(expr = {
462 |   g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected')
463 |   V(g)$type = (types.module[names(V(g))] == 'Type')
464 |   coords = layout.circle(g)
465 |   E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight)
466 |   E(g)$width = abs(E(g)$weight)/5
467 |   V(g)$color = colors.module[names(V(g))]
468 |   V(g)$size = occ*200
469 |   # Plot graph
470 |   plot(g, main = 'All',
471 |        layout = coords,
472 |        vertex.shape = 'circle', 
473 |        vertex.label.size = 1, vertex.label.color = 'black',
474 |        frame=FALSE)
475 | }, error = function(e){c()})
476 | 
477 | par(mfrow = c(1,1))
478 | 
479 | 
480 | #### All ####
481 | 
482 | srt.list.primary.all = lapply(files.primary, function(f){
483 |   tryCatch(expr = {
484 |     loadRData(paste0(f,'srt.all.RData'))
485 |   }, error = function(e){c()})
486 | })
487 | srt.list.primary.all = srt.list.primary.all[sapply(srt.list.primary.all, length) > 0]
488 | 
489 | srt.primary.all = Reduce(merge2, srt.list.primary.all)
490 | srt.primary.all = RunPCA(srt.primary.all)
491 | srt.primary.all = RunUMAP(srt.primary.all, dims = 1:10)
492 | save(srt.primary.all, file = 'srt.primary.all.RData')
493 | 
494 | colors.pop = hue_pal()(nlevels(srt.primary.all$pop))
495 | names(colors.pop) = levels(srt.primary.all$pop)
496 | colors.pop['Malignant'] = 'black'
497 | save(colors.pop, file = 'colors.pop.primary.RData')
498 | 
499 | o = sample(1:ncol(srt.primary.all), size = ncol(srt.primary.all), replace = FALSE)
500 | h = DimPlot(srt.primary.all, group.by = 'orig.ident', cols = colors, order = o, pt.size = 1, label = TRUE)
501 | print(h)
502 | h = DimPlot(srt.primary.all, group.by = 'orig.ident', cols = colors, order = o, pt.size = 1)
503 | print(h)
504 | h = DimPlot(srt.primary.all, group.by = 'type', cols = colors.type, order = o, pt.size = 1, label = TRUE)
505 | print(h)
506 | h = DimPlot(srt.primary.all, group.by = 'type', cols = colors.type, order = o, pt.size = 1)
507 | print(h)
508 | h = DimPlot(srt.primary.all, group.by = 'pop', cols = colors.pop, order = o, pt.size = 1, label = TRUE)
509 | print(h)
510 | h = DimPlot(srt.primary.all, group.by = 'pop', cols = colors.pop, order = o, pt.size = 1, label = FALSE)
511 | print(h)
512 | 
513 | srt.primary.tme = srt.primary.all[, !srt.primary.all$pop == 'Malignant']
514 | srt.primary.tme = RunPCA(srt.primary.tme)
515 | srt.primary.tme = RunUMAP(srt.primary.tme, dims = 1:10)
516 | save(srt.primary.tme, file = 'srt.primary.tme.RData')
517 | 
518 | # Frequency regression
519 | 
520 | freq = t(sapply(levels(srt.primary$orig.ident), function(or){
521 |   colMeans(srt.primary@meta.data[srt.primary$orig.ident == or, names(modules)] > 0.5, na.rm = TRUE)
522 | }))
523 | tme = table(srt.primary.tme$orig.ident, srt.primary.tme$pop)
524 | exclude = c('Malignant','Embryonic_stem_cells','iPS_cells','Tissue_stem_cells','Smooth_muscle_cells','Chondrocytes','Epithelial_cells','Hepatocytes','Keratinocytes','Astrocyte','Neurons','Neuroepithelial_cell',
525 |             'CMP','Erythroblast','GMP','Gametocytes','GMP','HSC_CD34+','Pre-B_cell_CD34-','Pro-B_cell_CD34+','Pro-Myelocyte')
526 | tme = tme[, setdiff(colnames(tme), exclude)]
527 | tme = tme/rowSums(tme)
528 | 
529 | # Multiple regression
530 | test = sapply(names(modules), function(m){
531 |   fit = summary(lm(freq[,m] ~ tme))$coefficients
532 |   pval = -log10(fit[,4])
533 |   pval[pval < -log10(pval_thresh)] = 0
534 |   names(pval) = gsub('tme','',names(pval))
535 |   est = fit[,1]
536 |   names(est) = gsub('tme','',names(est))
537 |   val = pval*sign(est)
538 |   return(val)
539 | })
540 | h = Heatmap(test, 
541 |             show_row_names = TRUE, show_column_names = TRUE,
542 |             cluster_rows = FALSE, cluster_columns = FALSE,
543 |             breaks = seq(-3, 3, length = 11), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 11))
544 | print(h)
545 | 
546 | # Plot hits
547 | par(mfrow = c(4,4))
548 | for (ct in colnames(tme)){
549 |   for (m in names(modules)){
550 |     fit = summary(lm(freq[,m] ~ tme[,ct]))$coefficients
551 |     pval = -log10(fit[2,4])
552 |     plot(freq[,m] ~ tme[,ct], col = colors[rownames(tme)], pch = 20, xlab = ct, ylab = m, main = round(pval, digits = 2))
553 |     if (pval > pval_thresh){
554 |       abline(a = fit[1,1], b = fit[2,1], lty = 2)
555 |     }
556 |   }
557 | }
558 | par(mfrow = c(1,1))
559 | 
560 | dev.off()
561 | 


--------------------------------------------------------------------------------
/Scoring.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | source('~/Documents/R/seurat_functions.R')
  3 | setwd('~/Documents/Analysis/Tumors/')
  4 | 
  5 | load('parameters.RData')
  6 | modules = loadRData('modules.RData')
  7 | colors.module = loadRData('colors.module.RData')
  8 | 
  9 | ## All
 10 | 
 11 | mclapply(files.all, function(f){
 12 |   setwd(f)
 13 |   srt = loadRData('srt.malignant.RData')
 14 | 
 15 |   modules_rand = MakeRand(srt, db = modules, nrand = nrand, nbin = nbin)
 16 | 
 17 |   ini = matrix(0,nrow = ncol(srt), ncol = length(modules))
 18 |   rownames(ini) = colnames(srt)
 19 |   colnames(ini) = names(modules)
 20 |   srt@meta.data[,names(modules)] = as.data.frame(ini)
 21 | 
 22 |   for (m in names(modules)){
 23 |     tryCatch(expr = {
 24 |       srt = GeneToEnrichment(srt, db = modules[m], method = 'rand', db_rand = modules_rand[m])
 25 |     }, error = function(e){c()})
 26 |   }
 27 | 
 28 |   scores = srt@meta.data[,names(modules)]
 29 |   frequency = colMeans(scores > 0.5, na.rm = TRUE)
 30 |   srt$state = apply(srt@meta.data[,names(modules)], 1, function(x){
 31 |     top = which.max(x)
 32 |     return(names(x)[top])
 33 |   })
 34 |   srt$state = factor(srt$state, levels = names(modules))
 35 |   save(srt, file = 'srt.scored.RData')
 36 | 
 37 |   pdf('scoring.pdf')
 38 |   plot.list = lapply(names(modules), function(m){
 39 |     h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m])) +
 40 |       NoAxes() + NoLegend() +
 41 |       theme(plot.title = element_text(size = 10))
 42 |   })
 43 |   print(CombinePlots(plot.list, ncol = 4))
 44 |   h = DimPlot(srt, group.by = 'state', cols = colors.module)
 45 |   print(h)
 46 |   # Co-occurence
 47 |   occ = colMeans(scores > 0.5)
 48 |   coocc = sapply(names(modules), function(m1){
 49 |     sapply(names(modules), function(m2){
 50 |       pval = phyper(
 51 |         sum((scores[, m1] > 0.5) * (scores[, m2] > 0.5)),
 52 |         sum(scores[, m2] > 0.5),
 53 |         dim(scores)[1] - sum(scores[, m2] > 0.5),
 54 |         sum(scores[, m1] > 0.5),
 55 |         lower.tail = FALSE)
 56 |       if (pval <= 0.5){
 57 |         return(-log10(pval))
 58 |       }
 59 |       if (pval > 0.5){
 60 |         return(log10(1-pval))
 61 |       }
 62 |     })
 63 |   })
 64 |   coocc[is.infinite(coocc) & coocc > 0] = max(coocc[is.finite(coocc)])
 65 |   coocc[is.infinite(coocc) & coocc < 0] = min(coocc[is.finite(coocc)])
 66 |   coocc[which(occ <= 0.01),] = 0
 67 |   coocc[, which(occ <= 0.01)] = 0
 68 |   coocc[coocc > pval_thresh & coocc < - pval_thresh] = 0
 69 |   coocc[coocc > 10] = 10
 70 |   coocc[coocc < -10] = -10
 71 |   g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected')
 72 |   coords = layout.circle(g)
 73 |   E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight)
 74 |   E(g)$width = abs(E(g)$weight)/5
 75 |   V(g)$color = colors.module[names(V(g))]
 76 |   V(g)$size = occ*200
 77 |   plot(g,
 78 |        layout = coords,
 79 |        vertex.shape = 'circle',
 80 |        vertex.label.size = 1, vertex.label.color = 'black',
 81 |        frame=FALSE)
 82 |   dev.off()
 83 | 
 84 |   stats = ScoreModule(srt, db = modules, method = 'rand', db_rand = modules_rand)
 85 |   stats = rbind(stats, frequency)
 86 |   save(stats, file = 'stats.RData')
 87 | 
 88 |   setwd('~/Documents/Analysis/Tumors/')
 89 |   return()
 90 | }, mc.cores = ncores)
 91 | 
 92 | 
 93 | ## Paired
 94 | 
 95 | mclapply(files.paired.list, function(f){
 96 |   
 97 |   setwd(paste0(f[['normal']],'Paired'))
 98 |   
 99 |   srt.normal = loadRData(paste0(f['normal'],'srt.malignant.RData'))
100 |   srt.tumor = loadRData(paste0(f['tumor'],'srt.malignant.RData'))
101 |   
102 |   stats.normal = ScoreModule(srt.normal, db = modules, method = 'rand', nrand = nrand, nbin = nbin)
103 |   rownames(stats.normal) = paste0(rownames(stats.normal),'.normal')
104 |   stats.tumor = ScoreModule(srt.tumor, db = modules, method = 'rand', nrand = nrand, nbin = nbin)
105 |   rownames(stats.tumor) = paste0(rownames(stats.tumor),'.tumor')
106 | 
107 |   srt = merge(srt.normal, srt.tumor)
108 |   srt$cat = 'tumor'
109 |   srt$cat[1:ncol(srt.normal)] = 'normal'
110 |   
111 |   srt = SCTransform(srt, return.only.var.genes = FALSE)
112 |   srt = RunPCA(srt)
113 |   srt = RunUMAP(srt, dims = 1:10)
114 |   
115 |   modules_rand = MakeRand(srt, db = modules, nrand = nrand, nbin = nbin)
116 |   
117 |   ini = matrix(0,nrow = ncol(srt), ncol = length(modules))
118 |   rownames(ini) = colnames(srt)
119 |   colnames(ini) = names(modules)
120 |   srt@meta.data[,names(modules)] = as.data.frame(ini)
121 |   
122 |   for (m in names(modules)){
123 |     tryCatch(expr = {
124 |       srt = GeneToEnrichment(srt, db = modules[m], method = 'rand', db_rand = modules_rand[m])
125 |     }, error = function(e){c()})
126 |   }
127 |   
128 |   scores = srt@meta.data[,names(modules)]
129 |   frequency.normal = colMeans(scores[srt$cat == 'normal',] > 0.5, na.rm = TRUE)
130 |   frequency.tumor = colMeans(scores[srt$cat == 'tumor',] > 0.5, na.rm = TRUE)
131 |   srt$state = apply(srt@meta.data[,names(modules)], 1, function(x){
132 |     top = which.max(x)
133 |     return(names(x)[top])
134 |   })
135 |   srt$state = factor(srt$state, levels = names(modules))
136 |   save(srt, file = 'srt.scored.RData')
137 |   
138 |   pdf('scoring.pdf')
139 |   plot.list = lapply(names(modules), function(m){
140 |     h = FeaturePlot(srt, features = m, pt.size = 1, cols = c('grey',colors.module[m]), split.by = 'cat') +
141 |       NoAxes() + NoLegend() +
142 |       theme(plot.title = element_text(size = 10))
143 |   })
144 |   print(CombinePlots(plot.list, ncol = 4))
145 |   h = DimPlot(srt, group.by = 'state', cols = colors.module, split.by = 'cat')
146 |   print(h)
147 |   h = DimPlot(srt, group.by = 'cat', cols = colors.cat)
148 |   print(h)
149 |   # Co-occurence
150 |   for (ca in c('normal','tumor')){
151 |     occ = colMeans(scores[srt$cat == ca,] > 0.5)
152 |     coocc = sapply(names(modules), function(m1){
153 |       sapply(names(modules), function(m2){
154 |         pval = phyper(
155 |           sum((scores[srt$cat == ca, m1] > 0.5) * (scores[srt$cat == ca, m2] > 0.5)), 
156 |           sum(scores[srt$cat == ca, m2] > 0.5), 
157 |           dim(scores[srt$cat == ca,])[1] - sum(scores[srt$cat == ca, m2] > 0.5),
158 |           sum(scores[srt$cat == ca, m1] > 0.5), 
159 |           lower.tail = FALSE)
160 |         if (pval <= 0.5){
161 |           return(-log10(pval))
162 |         }
163 |         if (pval > 0.5){
164 |           return(log10(1-pval))
165 |         }
166 |       })
167 |     })
168 |     coocc[is.infinite(coocc) & coocc > 0] = max(coocc[is.finite(coocc)])
169 |     coocc[is.infinite(coocc) & coocc < 0] = min(coocc[is.finite(coocc)])
170 |     coocc[which(occ <= 0.01),] = 0
171 |     coocc[, which(occ <= 0.01)] = 0
172 |     coocc[coocc > pval_thresh & coocc < - pval_thresh] = 0
173 |     coocc[coocc > 10] = 10
174 |     coocc[coocc < -10] = -10
175 |     g = graph_from_adjacency_matrix(coocc, diag = FALSE, weighted = TRUE, mode = 'undirected')
176 |     coords = layout.circle(g)
177 |     E(g)$color = colorRamp2(breaks = seq(-10, 10, length = 9), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 9))(E(g)$weight)
178 |     E(g)$width = abs(E(g)$weight)/5
179 |     V(g)$color = colors.module[names(V(g))]
180 |     V(g)$size = occ*200
181 |     plot(g, 
182 |          layout = coords,
183 |          vertex.shape = 'circle', 
184 |          vertex.label.size = 1, vertex.label.color = 'black',
185 |          frame=FALSE) 
186 |   }
187 |   dev.off()
188 |   
189 |   stats = rbind(stats.normal, stats.tumor, frequency.normal, frequency.tumor)
190 |   save(stats, file = 'stats.RData')
191 |   
192 |   setwd('~/Documents/Analysis/Tumors/')
193 |   return()
194 | }, mc.cores = ncores)
195 | 
196 | 


--------------------------------------------------------------------------------
/Visium.R:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env Rscript
   2 | source('~/Documents/R/seurat_functions.R')
   3 | setwd('~/Documents/Analysis/Visium/')
   4 | 
   5 | load('~/Documents/Analysis/Tumors/parameters.RData')
   6 | w = which(names(colors) == 'LIHCHs1')
   7 | l = colors[['LIHCHs1']]
   8 | colors = c(colors, 'LIHCHs1' = l, 'LIHCHs1A' = l,  'LIHCHs1B' = l)
   9 | colors = colors[-w]
  10 | nrand = 2
  11 | nbin = 10
  12 | cats = c('Malignant','Both','Normal')
  13 | colors.cat = brewer_pal(palette = 'RdYlGn')(8)[c(2,5,7)]
  14 | names(colors.cat) = cats
  15 | method = 'score'
  16 | pval_thresh = 1
  17 | 
  18 | for (prox in c('inverse')){
  19 | 
  20 | modules = loadRData('../Tumors/modules.RData')
  21 | colors.module = loadRData('../Tumors/colors.module.RData')
  22 | types.module = loadRData('../Tumors/types.module.RData')
  23 | modules = modules[!names(modules) %in% c('AC','OPC','NPC')]
  24 | colors.module = colors.module[names(modules)]
  25 | types.module = types.module[names(modules)]
  26 | ma = names(modules)
  27 | 
  28 | colors.pop = loadRData('../Tumors/colors.pop.ours.RData')
  29 | 
  30 | colors.bin = rep(Seurat:::SpatialColors(n = 3)[c(1,3)],2)
  31 | names(colors.bin) = c('FALSE','TRUE','0','1')
  32 | 
  33 | srt.tme = loadRData('~/Documents/Analysis/Tumors/srt.ours.tme.RData')
  34 | exclude = c('Embryonic_stem_cells','iPS_cells','Tissue_stem_cells','Smooth_muscle_cells','Chondrocytes','Epithelial_cells','Hepatocytes','Keratinocytes','Astrocyte','Neurons','Neuroepithelial_cell')
  35 | srt.tme = srt.tme[, !srt.tme$pop %in% exclude]
  36 | rm = (srt.tme$pop == 'B_cell' & !colSums(GetData(srt.tme, slot = 'data')[c('CD4','GZMB'),]) == 0) # rm errors in B cell annotations
  37 | srt.tme = srt.tme[, !rm]
  38 | # keep = sample(1:ncol(srt.tme), size = 5000, replace = FALSE)
  39 | # keep = unlist(lapply(levels(srt.tme$pop), function(ct){
  40 | #   x = sample(colnames(srt.tme)[srt.tme$pop == ct])
  41 | #   if (length(x) > 1000){
  42 | #     x = sample(x, size = 1000, replace = FALSE)
  43 | #   }
  44 | #   return(x)
  45 | # }))
  46 | # srt.tme = srt.tme[,keep]
  47 | 
  48 | pdf('macrophages.pdf')
  49 | #srt.mac = srt.tme
  50 | srt.mac = loadRData('~/Documents/Tumors/OVCAHs/OVCAHs1/srt.normal.RData')
  51 | srt.mac = srt.mac[, srt.mac$pop == 'Macrophage']
  52 | srt.mac = SCTransform(srt.mac, return.only.var.genes = FALSE)
  53 | srt.mac = RunPCA(srt.mac)
  54 | srt.mac = RunUMAP(srt.mac, dims = 1:10)
  55 | srt.mac = FindNeighbors(srt.mac)
  56 | srt.mac = FindClusters(srt.mac)
  57 | srt.mac$mac = factor(srt.mac$seurat_clusters, labels = c('M1','M2'))
  58 | h = DimPlot(srt.mac, group.by = 'mac', cols = c('darkorange','darkred'))
  59 | print(h)
  60 | markers = FindAllMarkers2(srt.mac, group.by = 'mac', do.print = FALSE, do.enrichment = FALSE, do.plot = TRUE)
  61 | db_mac = split(markers$genes_100$gene, markers$genes_100$cluster)
  62 | print(db_mac)
  63 | data = GetData(srt.mac, slot = 'scale.data')[unlist(db_mac),]
  64 | top_ann = HeatmapAnnotation(df = data.frame('mac' = srt.mac$mac),
  65 |                             col = list('mac' = c('M1' = 'darkorange','M2' = 'darkred')),
  66 |                             which = 'column')
  67 | h = Heatmap(data, top_annotation = top_ann,
  68 |         cluster_rows = FALSE, cluster_columns = FALSE, 
  69 |         breaks = c(-2,0,2), colors = c('blue','white','red'), 
  70 |         column_split = srt.mac$mac,
  71 |         row_split = unlist(lapply(names(db_mac), function(x){rep(x,length(db_mac[[x]]))})))
  72 | labels = c('IFIT1','TNF','IL1B','C1QA','CD14','CD163','APOE','HLA-DRA','MS4A4A')
  73 | r = rowAnnotation(link = anno_mark(at = which(rownames(data) %in% labels), labels = labels))
  74 | print(h+r)
  75 | dev.off()
  76 | 
  77 | files = c(
  78 |   'OVCAHs/OVCAHs1',
  79 |   #'OVCAHs/OVCAHs2',
  80 |   'OVCAHs/OVCAHs3',
  81 |   'BRCAHs/BRCAHs0',
  82 |   'BRCAHs/BRCAHs1',
  83 |   'BRCAHs/BRCAHs2',
  84 |   'UCECHs/UCECHs3',
  85 |   'LIHCHs/LIHCHs1',
  86 |   #'LIHCHs/LIHCHs1A',
  87 |   #'LIHCHs/LIHCHs1B',
  88 |   'PDACHs/PDACHs1',
  89 |   #'PDACHs/PDACHs2',
  90 |   'GISTHs/GISTHs1',
  91 |   'GISTHs/GISTHs2')
  92 | names(files) = sapply(strsplit(files, '/', fixed = TRUE), '[', 2)
  93 | 
  94 | meta.list = mclapply(names(files), function(fa){
  95 |   
  96 |   pdf(paste0(fa,'.',prox,'.pdf'), height = 10, width = 15)
  97 |   f = files[[fa]]
  98 |   meta = c()
  99 |   
 100 |   #### Loading data ####
 101 |   
 102 |   # ST
 103 |   
 104 |   st = loadRData(paste0('~/Documents/Visium/',f,'/st.RData'))
 105 |   
 106 |   res = loadRData(paste0('~/Documents/Visium/',f,'/res.RData'))
 107 |   modules_st = NMFToModules(res, gmin = 5)
 108 |   nmf = names(modules_st)
 109 |   
 110 |   st = AddMetaData(st, metadata = st@images$slice1@coordinates[,c('row','col')], col.name = c('row','col'))
 111 |   st$axis = st$row
 112 |   if (fa == 'OVCAHs1'){
 113 |     st$axis = st$row
 114 |   }
 115 |   if (fa == 'OVCAHs2'){
 116 |     st$axis = st$col
 117 |   }
 118 |   if (fa == 'OVCAHs3'){
 119 |     st$axis = sqrt((st$row - 0)^2 + (st$col - 70)^2)
 120 |   }
 121 |   if (fa == 'BRCAHs0'){
 122 |     st$axis = st$row - 1/3*st$col
 123 |   }
 124 |   if (fa == 'BRCAHs1'){
 125 |     st$axis = st$row
 126 |   }
 127 |   if (fa == 'BRCAHs2'){
 128 |     st$axis = - sqrt((st$col - 80)^2 + ((st$row - 25)*1.5)^2)
 129 |   }
 130 |   if (fa == 'UCECHs3'){
 131 |     st$axis = st$col - 1.2*st$row
 132 |   }
 133 |   if (fa == 'LIHCHs1'){
 134 |     st$axis = st$row
 135 |   }
 136 |   if (fa == 'PDACHs1'){
 137 |     st$axis = st$col - st$row
 138 |   }
 139 |   if (fa == 'PDACHs2'){
 140 |     st$axis = st$col
 141 |   }
 142 |   if (fa == 'GISTHs1'){
 143 |     st$axis = - st$row - 3/4*st$col
 144 |   }
 145 |   if (fa == 'GISTHs2'){
 146 |     st$axis = st$row + 1/3*st$col
 147 |   }
 148 |   st$axis = (st$axis - min(st$axis))/(max(st$axis) - min(st$axis))
 149 |   h = SpatialFeaturePlot(st, 'axis')
 150 |   print(h)
 151 |   
 152 |   h = SpatialFeaturePlot(st, c('nCount_Spatial','nCount_SCT','nFeature_Spatial','nFeature_SCT'))
 153 |   print(h)
 154 |   
 155 |   colors.clone = brewer_pal(palette = 'Accent')(nlevels(st$clone))
 156 |   names(colors.clone) = levels(st$clone)
 157 |   
 158 |   h = SpatialDimPlot(st, group.by = 'clone', cols = colors.clone)
 159 |   print(h)
 160 |   
 161 |   # Single cell
 162 |   
 163 |   if (fa %in% c('LIHCHs1A','LIHCHs1B')){
 164 |     srt.malignant = loadRData(paste0('~/Documents/Tumors/LIHCHs/LIHCHs1/srt.malignant.RData'))
 165 |   } else {
 166 |     srt.malignant = loadRData(paste0('~/Documents/Tumors/',f,'/srt.malignant.RData'))
 167 |   }
 168 |   if (!all(sapply(ma, function(x){x %in% names(srt.malignant@meta.data)}))){
 169 |     srt.malignant = GeneToEnrichment(srt.malignant, db = modules, method = 'rand', nrand = nrand, nbin = nbin)
 170 |   }
 171 |   ma_bin = paste0(ma, '_bin')
 172 |   scores = srt.malignant@meta.data[,ma]
 173 |   scores_bin = scores
 174 |   scores_bin[] = as.numeric(scores_bin > 0.5)
 175 |   srt.malignant = AddMetaData(srt.malignant, metadata = scores_bin, col.name = ma_bin)
 176 |   
 177 |   if (fa %in% c('LIHCHs1A','LIHCHs1B')){
 178 |     srt.normal = loadRData(paste0('~/Documents/Tumors/LIHCHs/LIHCHs1/srt.normal.RData'))
 179 |   } else {
 180 |     srt.normal = loadRData(paste0('~/Documents/Tumors/',f,'/srt.normal.RData'))
 181 |   }
 182 |   if (f %in% c('GISTHs/GISTHs1')){
 183 |     exclude = c('Embryonic_stem_cells','iPS_cells','Tissue_stem_cells','Smooth_muscle_cells','Chondrocytes','Hepatocytes','Keratinocytes','Astrocyte','Neurons','Neuroepithelial_cell')
 184 |   } else {
 185 |     exclude = c('Embryonic_stem_cells','iPS_cells','Tissue_stem_cells','Smooth_muscle_cells','Chondrocytes','Epithelial_cells','Hepatocytes','Keratinocytes','Astrocyte','Neurons','Neuroepithelial_cell')
 186 |   }
 187 |   srt.normal = srt.normal[, !srt.normal$pop %in% exclude]
 188 |   ta = table(srt.normal$pop)
 189 |   spec = names(ta)[ta > 20]
 190 |   gen = setdiff(levels(srt.tme$pop), spec)
 191 |   srt.normal = merge2(srt.normal[,srt.normal$pop %in% spec], srt.tme[,srt.tme$pop %in% gen])
 192 |   srt.normal = SCTransform(srt.normal, return.only.var.genes = FALSE)
 193 |   
 194 |   srt = merge2(srt.malignant, srt.normal)
 195 |   srt = SCTransform(srt, return.only.var.genes = FALSE)
 196 |   srt$pop = as.character(srt$pop)
 197 |   srt$pop = factor(srt$pop, levels = intersect(names(colors.pop), srt$pop))
 198 |   srt$pop = relevel(srt$pop, ref = 'Malignant')
 199 |   Idents(srt) = 'pop'
 200 |   srt = SCTransform(srt, return.only.var.genes = FALSE)
 201 |   srt = RunPCA(srt)
 202 |   srt = RunUMAP(srt, dims = 1:10)
 203 |   srt = RunTSNE(srt, dims = 1:10)
 204 |   h = DimPlot(srt, reduction = 'umap', group.by = 'pop', cols = colors.pop)
 205 |   print(h)
 206 |   h = DimPlot(srt, reduction = 'tsne', group.by = 'pop', cols = colors.pop)
 207 |   print(h)
 208 |   
 209 |   markers = FindAllMarkers2(srt, group.by = 'pop')
 210 |   genes.srt = markers$genes_100$gene
 211 |   
 212 |   #### NMF ####
 213 |   
 214 |   plot.list = lapply(nmf, function(x){
 215 |     h = SpatialFeaturePlot(st, x)
 216 |   })
 217 |   print(CombinePlots(plot.list))
 218 |   
 219 |   #### Anchoring ####
 220 |   
 221 |   # Find pure cell types from integration labels
 222 |   
 223 |   options(future.globals.maxSize = 5000*1024^2)
 224 |   object.list = list('SC' = srt, 'ST' = st)
 225 |   genes.use = intersect(SpatiallyVariableFeatures(st), VariableFeatures(srt))
 226 |   #genes.use = intersect(SpatiallyVariableFeatures(st), genes.srt)
 227 |   object.list = PrepSCTIntegration(object.list = object.list, 
 228 |                                    anchor.features = genes.use, 
 229 |                                    verbose = FALSE)
 230 |   anchors = FindTransferAnchors(reference = object.list$SC, query = object.list$ST, 
 231 |                                 normalization.method = 'SCT',
 232 |                                 features = genes.use,
 233 |                                 verbose = FALSE)
 234 |   predictions = TransferData(anchorset = anchors, refdata = srt$pop)
 235 |   predictions = predictions[,!colnames(predictions) %in% c('predicted.id','prediction.score.max')]
 236 |   colnames(predictions) = gsub('prediction.score','pred',colnames(predictions))
 237 |   pred = colnames(predictions)
 238 |   st = AddMetaData(st, metadata = predictions, col.name = pred)
 239 |   plot.list = lapply(pred, function(x){
 240 |     h = SpatialFeaturePlot(st, features = x)
 241 |   })
 242 |   print(CombinePlots(plot.list))
 243 |   
 244 |   pred = colnames(predictions)[apply(predictions, 2, var) > 0]
 245 |   pred = pred[order(pred)]
 246 |   pred = c('pred.Malignant', setdiff(pred, 'pred.Malignant'))
 247 |   
 248 |   # Binarize
 249 |   
 250 |   predictions_bin = predictions
 251 |   predictions_bin[] = as.numeric(predictions_bin > 0.9)
 252 |   pred_bin = paste0(pred, '_bin')
 253 |   st = AddMetaData(st, predictions_bin, col.name = pred_bin)
 254 |   plot.list = lapply(pred_bin, function(x){
 255 |     h = SpatialDimPlot(st, x, cols = colors.bin)
 256 |   })
 257 |   print(CombinePlots(plot.list))
 258 |   
 259 |   #### NNLS from single-cell ####
 260 |   
 261 |   # Data
 262 |   
 263 |   genes.use = intersect(SpatiallyVariableFeatures(st), VariableFeatures(srt))
 264 |   #genes.use = intersect(SpatiallyVariableFeatures(st), genes.srt)
 265 |   prof = AverageExpression(srt, assay = 'SCT', slot = 'data')$SCT[genes.use,]
 266 |   data = as.matrix(GetAssayData(st, assay = 'SCT', slot = 'data'))[genes.use,]
 267 |   
 268 |   # Regression
 269 |   
 270 |   coef = t(apply(data, 2, function(y){
 271 |     coef(nnls(as.matrix(prof), y))
 272 |   }))
 273 |   colnames(coef) = colnames(prof)
 274 |   nnls = colnames(coef)[colSums(coef > 0) >= 20]
 275 |   prof = prof[,nnls]
 276 |   coef = t(apply(data, 2, function(y){
 277 |     coef(nnls(as.matrix(prof), y))
 278 |   }))
 279 |   colnames(coef) = nnls
 280 |   st = AddMetaData(st, coef, col.name = nnls)
 281 |   plot.list = lapply(nnls, function(x){
 282 |     h = SpatialFeaturePlot(st, x)
 283 |   })
 284 |   print(CombinePlots(plot.list))
 285 |   plot.list = lapply(nnls, function(x){
 286 |     h = SpatialFeaturePlot(st, x)
 287 |   })
 288 |   print(CombinePlots(plot.list))
 289 |   
 290 |   # Scaling from null from pure cell types
 291 |   
 292 |   coef_scaled = sapply(nnls, function(x){
 293 |     vec = coef[,x]
 294 |     y = paste0('pred.',x)
 295 |     spots.use = colnames(st)[st@meta.data[,y] == 0]
 296 |     #spots.use = colnames(st)[st@meta.data[,y] == 0 & rowSums(predictions_bin) == 1]
 297 |     if (length(spots.use) < 5){
 298 |       spots.use = colnames(st)[order(st@meta.data[,y])[1:5]]
 299 |     }
 300 |     data_rand = Reduce(cbind, lapply(1:10^nrand, function(i){
 301 |       t(apply(data[,spots.use], 1, function(expr){
 302 |         sample(expr, length(expr), replace = FALSE)
 303 |       }))
 304 |     }))
 305 |     coef_rand = t(apply(data_rand, 2, function(y){
 306 |       coef(nnls(as.matrix(prof), y))
 307 |     }))
 308 |     colnames(coef_rand) = nnls
 309 |     if (sd(coef_rand[,x]) == 0){
 310 |       return((coef[,x] - mean(coef_rand[,x]))/min(coef[coef[,x] > 0,x]))
 311 |     } else {
 312 |       return((coef[,x] - mean(coef_rand[,x]))/sd(coef_rand[,x]))
 313 |     }
 314 |   })
 315 |   nnls_scaled = paste0(nnls, '_scaled')
 316 |   st = AddMetaData(st, coef_scaled, col.name = nnls_scaled)
 317 |   plot.list = lapply(nnls_scaled, function(x){
 318 |     h = SpatialFeaturePlot(st, x)
 319 |   })
 320 |   print(CombinePlots(plot.list))
 321 |   
 322 |   # Binarizing
 323 |   
 324 |   coef_bin = coef_scaled
 325 |   coef_bin[] = (coef_bin > 2)
 326 |   # for (x in rownames(coef_bin)){
 327 |   #   if (sum(coef_bin[x,]) == 0){
 328 |   #     y = coef_scaled[x,]
 329 |   #     y = sort(y, decreasing = TRUE)
 330 |   #     if (y[1] > 1 & y[1] - y[2] > 1){
 331 |   #       coef_bin[x,names(y)[1]] = 1
 332 |   #     }
 333 |   #   }
 334 |   # }
 335 |   
 336 |   nnls_bin = paste0(nnls, '_bin')
 337 |   st = AddMetaData(st, coef_bin, col.name = nnls_bin)
 338 |   plot.list = lapply(nnls_bin, function(x){
 339 |     h = SpatialDimPlot(st, x, cols = colors.bin)
 340 |   })
 341 |   print(CombinePlots(plot.list))
 342 |   
 343 |   nnls = setdiff(nnls, 'Epithelial_cells')
 344 |   nnls_scaled = paste0(nnls, '_scaled')
 345 |   nnls_bin = paste0(nnls, '_bin')
 346 |   
 347 |   # M1 vs M2
 348 |   
 349 |   st.mac = st[, st$Macrophage_bin == 1]
 350 |   st.mac = SCTransform(st.mac, return.only.var.genes = FALSE, assay = 'Spatial')
 351 |   st.mac = GeneToEnrichment(st.mac, db = db_mac, method = 'score', nbin = nbin)
 352 |   st.mac$M1M2 = st.mac$M1 - st.mac$M2
 353 |   st.mac$M1_bin = as.numeric(st.mac$M1 > st.mac$M2)
 354 |   st.mac$M2_bin = as.numeric(st.mac$M2 > st.mac$M1)
 355 |   st.mac$M1M2_bin = c('M1','M2')[1 + (st.mac$M1 > st.mac$M2)]
 356 |   
 357 |   h = SpatialFeaturePlot(st.mac, c('M1','M2','M1M2'))
 358 |   print(h)
 359 |   
 360 |   srt = GeneToEnrichment(srt, db = db_mac, method = 'score', nbin = nbin)
 361 |   srt$M1M2 = srt$M1 - srt$M2
 362 |   h = VlnPlot(srt, 'M1M2', group.by = 'pop', cols = colors.pop)
 363 |   print(h)
 364 |   
 365 |   st@meta.data[,c('M1','M2','M1M2','M1M2_bin')] = NA
 366 |   st@meta.data[,c('M1_bin','M2_bin')] = 0
 367 |   st@meta.data[colnames(st.mac),c('M1','M2','M1M2','M1M2_bin','M1_bin','M2_bin')] = st.mac@meta.data[,c('M1','M2','M1M2','M1M2_bin','M1_bin','M2_bin')]
 368 |   
 369 |   nnls = c(nnls, 'M1', 'M2')
 370 |   nnls_scaled = paste0(nnls, '_scaled')
 371 |   nnls_bin = paste0(nnls, '_bin')
 372 |   
 373 |   nnls1 = setdiff(nnls, c('M1', 'M2'))
 374 |   nnls1_scaled = paste0(nnls1, '_scaled')
 375 |   nnls1_bin = paste0(nnls1, '_bin')
 376 |   
 377 |   nnls2 = setdiff(nnls, 'Macrophage')
 378 |   nnls2_scaled = paste0(nnls2, '_scaled')
 379 |   nnls2_bin = paste0(nnls2, '_bin')
 380 |   
 381 |   # Average neighborhood coefficient
 382 |   
 383 |   nei = FindSTNeighbors(st, d_min = 0, d_max = 1.5)
 384 |   coef_nei = sapply(nnls_bin, function(x){
 385 |     sapply(nei, function(spot){
 386 |       y = st@meta.data[spot,x]
 387 |       y[y < 0] = 0
 388 |       mean(y, na.rm = TRUE)
 389 |     })
 390 |   })
 391 |   colnames(coef_nei) = nnls
 392 |   nnls_nei = paste0(nnls, '_nei')
 393 |   nnls1_nei = paste0(nnls1, '_nei')
 394 |   nnls2_nei = paste0(nnls2, '_nei')
 395 |   st = AddMetaData(st, coef_nei, col.name = nnls_nei)
 396 |   h = SpatialFeaturePlot(st, nnls_nei)
 397 |   print(h)
 398 |   
 399 |   # M1 M2
 400 |   
 401 |   M1M2_nei = sapply(nei, function(spot){
 402 |     y = st@meta.data[spot,'M1M2']
 403 |     mean(y, na.rm = TRUE)
 404 |   })
 405 |   M1M2_nei[is.nan(M1M2_nei)] = NA
 406 |   st = AddMetaData(st, M1M2_nei, col.name = 'M1M2_nei')
 407 |   h = SpatialFeaturePlot(st, c('M1','M2','M1M2','M1_nei','M2_nei','M1M2_nei'))
 408 |   print(h)
 409 |   
 410 |   #### Distances ####
 411 |   
 412 |   # Add distance to cell types
 413 |   
 414 |   coord = st@images$slice1@coordinates[,c('imagerow','imagecol')]
 415 |   distances = as.matrix(dist(coord))
 416 |   distances = distances/min(distances[distances > 0])
 417 |   #distances = round(distances, digits = 1)
 418 |   coef_dist = sapply(nnls_bin, function(x){
 419 |     w = colnames(st)[as.logical(st@meta.data[,x])]
 420 |     w = w[!is.na(w)]
 421 |     if (length(w) == 1){
 422 |       mi = as.numeric(distances[,w])
 423 |     } else {
 424 |       mi = apply(distances[,w], 1, function(x){min(as.numeric(x), na.rm = TRUE)})
 425 |     }
 426 |     return(mi)
 427 |   })
 428 |   if (prox == 'inverse'){
 429 |     coef_dist = 1/(1+coef_dist)
 430 |   }
 431 |   if (prox == 'opposite'){
 432 |     coef_dist = -coef_dist
 433 |   }
 434 |   colnames(coef_dist) = nnls
 435 |   nnls_dist = paste0(nnls, '_dist')
 436 |   nnls1_dist = paste0(nnls1, '_dist')
 437 |   nnls2_dist = paste0(nnls2, '_dist')
 438 |   st = AddMetaData(st, coef_dist, col.name = nnls_dist)
 439 |   plot.list = lapply(nnls_dist, function(x){
 440 |     h = SpatialFeaturePlot(st, x)
 441 |   })
 442 |   print(CombinePlots(plot.list))
 443 |   
 444 |   # Malignant
 445 |   
 446 |   depth = apply(coef_dist[,!colnames(coef_dist)=='Malignant'], 1, min)
 447 |   if (prox == 'inverse'){
 448 |     depth = 1/(1+depth)
 449 |   }
 450 |   if (prox == 'opposite'){
 451 |     depth = -depth
 452 |   }  
 453 |   st = AddMetaData(st, depth, col.name = 'Depth')
 454 |   h = SpatialFeaturePlot(st, 'Depth')
 455 |   print(h)
 456 |   
 457 |   # M1 M2
 458 |   
 459 |   st$M1M2_dist = st$M1_dist - st$M2_dist
 460 |   h = SpatialFeaturePlot(st, c('M1_dist','M2_dist','M1M2_dist'))
 461 |   print(h)
 462 |   
 463 |   #### Splitting ####
 464 |   
 465 |   # Categories
 466 |   
 467 |   st$cat = apply(st@meta.data[,nnls_bin], 1, function(x){
 468 |     if (x['Malignant_bin']){
 469 |       if (sum(x) == 1){
 470 |         return('Malignant')
 471 |       } else {
 472 |         return('Both')
 473 |       }
 474 |     } else {
 475 |       if (sum(x) == 0){
 476 |         return(NA)
 477 |       } else {
 478 |         return('Normal')
 479 |       }
 480 |     }
 481 |   })
 482 |   st$cat = factor(st$cat, levels = cats)
 483 |   h = SpatialDimPlot(st, 'cat', pt.size.factor = 1, stroke = 0)
 484 |   print(h + scale_fill_manual(values = colors.cat))
 485 |   print(table(st$cat))
 486 |   
 487 |   # Compare all metrics
 488 |   
 489 |   h = VlnPlot(st, nmf, group.by = 'cat', pt.size = 0)
 490 |   print(h)
 491 |   
 492 |   h = VlnPlot(st, pred, group.by = 'cat', pt.size = 0)
 493 |   print(h)
 494 |   
 495 |   h = VlnPlot(st, nnls1_scaled, group.by = 'cat', pt.size = 0)
 496 |   print(h)
 497 |   
 498 |   plot.list = lapply(nmf, function(x){
 499 |     h = FeatureScatter(st, 'axis', x, group.by = 'cat', cols = colors.cat, span = 0.3, plot.cor = FALSE)
 500 |   })
 501 |   print(CombinePlots(plot.list))
 502 |   
 503 |   plot.list = lapply(pred, function(x){
 504 |     h = FeatureScatter(st, 'axis', x, group.by = 'cat', cols = colors.cat, span = 0.3, plot.cor = FALSE)
 505 |   })
 506 |   print(CombinePlots(plot.list))
 507 |   
 508 |   plot.list = lapply(nnls1_scaled, function(x){
 509 |     h = FeatureScatter(st, 'axis', x, group.by = 'cat', cols = colors.cat, span = 0.3, plot.cor = FALSE)
 510 |   })
 511 |   print(CombinePlots(plot.list))
 512 |   
 513 |   tryCatch(expr = {
 514 |     co = sapply(nmf, function(x){
 515 |       sapply(pred, function(y){
 516 |         cor(st@meta.data[,x], st@meta.data[,y])
 517 |       })
 518 |     })
 519 |     h = Heatmap(co, name = 'Correlation',
 520 |                 row_dend_reorder = 1:nrow(co), column_dend_reorder = 1:ncol(co),
 521 |                 show_row_names = TRUE, show_column_names = TRUE,
 522 |                 breaks = seq(-0.5, 0.5, length = 11), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 11))
 523 |     print(h)
 524 |   }, error = function(e){c()})
 525 |   
 526 |   tryCatch(expr = {
 527 |     co = sapply(pred, function(x){
 528 |       sapply(nnls1_scaled, function(y){
 529 |         cor(st@meta.data[,x], st@meta.data[,y])
 530 |       })
 531 |     })
 532 |     h = Heatmap(co, name = 'Correlation',
 533 |                 row_dend_reorder = 1:nrow(co), column_dend_reorder = 1:ncol(co),
 534 |                 show_row_names = TRUE, show_column_names = TRUE,
 535 |                 breaks = seq(-1, 1, length = 11), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 11))
 536 |     print(h)
 537 |   }, error = function(e){c()})
 538 |   
 539 |   tryCatch(expr = {
 540 |     co = sapply(nmf, function(x){
 541 |       sapply(nnls1_scaled, function(y){
 542 |         cor(st@meta.data[,x], st@meta.data[,y])
 543 |       })
 544 |     })
 545 |     h = Heatmap(co, name = 'Correlation',
 546 |                 row_dend_reorder = 1:nrow(co), column_dend_reorder = 1:ncol(co),
 547 |                 show_row_names = TRUE, show_column_names = TRUE,
 548 |                 breaks = seq(-0.5, 0.5, length = 11), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 11))
 549 |     print(h)
 550 |   }, error = function(e){c()})
 551 |   
 552 |   # Entropy
 553 |   
 554 |   ent = apply(st@meta.data[,nnls_bin], 1, function(x){
 555 |     x = x/sum(x)
 556 |     x = x[!x==0]
 557 |     return(sum(x*log(x)))
 558 |   })
 559 |   st = AddMetaData(st, ent, col.name = 'ent')
 560 |   h = SpatialFeaturePlot(st, 'ent', min.cutoff = -3, max.cutoff = 0)
 561 |   print(h)
 562 |   
 563 |   # Split
 564 |   
 565 |   st.list = SplitObject(st[,!is.na(st$cat)], 'cat')
 566 |   st.list = st.list[cats]
 567 |   st.malignant = st.list$Malignant
 568 |   st.malignant = SCTransform(st.malignant, return.only.var.genes = FALSE, assay = 'Spatial')  
 569 |   # st.malignant = FindSpatiallyVariableFeatures(st.malignant, selection.method = 'markvariogram')
 570 |   st.normal = st.list$Normal
 571 |   st.normal = SCTransform(st.normal, return.only.var.genes = FALSE, assay = 'Spatial')  
 572 |   # st.normal = FindSpatiallyVariableFeatures(st.normal, selection.method = 'markvariogram')
 573 |   
 574 |   h = SpatialFeaturePlot(st.malignant, nnls_nei)
 575 |   print(h)
 576 |   
 577 |   h = SpatialFeaturePlot(st.malignant, nnls_dist)
 578 |   print(h)
 579 |   
 580 |   
 581 |   #### Modules ####
 582 |   
 583 |   # Add modules
 584 |   
 585 |   st.malignant = GeneToEnrichment(st.malignant, db = modules, method = method, nrand = nrand, nbin = nbin)
 586 |   st.normal = GeneToEnrichment(st.normal, db = modules, method = method, nrand = nrand, nbin = nbin)
 587 |   
 588 |   plot.list = lapply(ma, function(m){
 589 |     h = SpatialFeaturePlot(st.malignant, m)
 590 |   })
 591 |   print(CombinePlots(plot.list))
 592 |   plot.list = lapply(ma, function(x){
 593 |     h = FeatureScatter(st.malignant, 'axis', x, group.by = 'orig.ident', cols = colors, span = 0.3, plot.cor = FALSE) + NoLegend()
 594 |   })
 595 |   print(CombinePlots(plot.list))
 596 |   
 597 |   h = Heatmap(cor(st.malignant@meta.data[,ma]), 
 598 |               show_row_names = TRUE, is.symmetric = TRUE,
 599 |               breaks = seq(-1,1, length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7))
 600 |   print(h)
 601 |   h = Heatmap(cor(st.malignant@meta.data[,ma]), 
 602 |               show_row_names = TRUE, cluster_rows = FALSE, cluster_columns = FALSE,
 603 |               breaks = seq(-1,1, length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7))
 604 |   print(h)
 605 |   
 606 |   ma_bin = paste0(ma, '_bin')
 607 |   scores_bin = st.malignant@meta.data[,ma] > 0
 608 |   scores_bin[] = as.numeric(scores_bin)
 609 |   st.malignant = AddMetaData(st.malignant, scores_bin, col.name = ma_bin)
 610 |   plot.list = lapply(ma_bin, function(m){
 611 |     h = SpatialDimPlot(st.malignant, m, cols = colors.bin)
 612 |   })
 613 |   print(CombinePlots(plot.list))
 614 |   plot.list = lapply(ma_bin, function(x){
 615 |     h = FeatureScatter(st.malignant, 'axis', x, group.by = 'orig.ident', cols = colors, span = 0.3, plot.cor = FALSE) + NoLegend()
 616 |   })
 617 |   print(CombinePlots(plot.list))
 618 |   
 619 |   st@meta.data[,ma] = NA
 620 |   st@meta.data[colnames(st.malignant),ma] = st.malignant@meta.data[,ma]
 621 |   st@meta.data[,ma_bin] = 0
 622 |   st@meta.data[colnames(st.malignant),ma_bin] = st.malignant@meta.data[,ma_bin]
 623 |   
 624 |   # Module neighborhood
 625 |   
 626 |   nei = FindSTNeighbors(st, d_min = 0, d_max = 1.5)
 627 |   coef_nei = sapply(ma, function(x){
 628 |     sapply(nei, function(spot){
 629 |       y = st@meta.data[spot,x]
 630 |       mean(y, na.rm = TRUE)
 631 |     })
 632 |   })
 633 |   colnames(coef_nei) = ma
 634 |   ma_nei = paste0(ma, '_nei')
 635 |   st = AddMetaData(st, coef_nei, col.name = ma_nei)
 636 |   
 637 |   su = st@meta.data[,c(ma_nei, nnls1_nei)]
 638 |   su = su[apply(su, 1, function(x){!any(is.na(x))}),]
 639 |   co = cor(su)
 640 |   h = Heatmap(co, is.symmetric = TRUE,
 641 |               show_row_names = TRUE, show_column_names = TRUE,
 642 |               breaks = seq(-0.3,0.3,length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7))
 643 |   print(h)
 644 |   co = cor(su)[ma_nei, nnls1_nei]
 645 |   h = Heatmap(co, 
 646 |               show_row_names = TRUE, show_column_names = TRUE,
 647 |               breaks = seq(-0.3,0.3,length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7))
 648 |   print(h)
 649 |   
 650 |   su = st.malignant@meta.data[,c(ma, nnls1_nei)]
 651 |   su = su[apply(su, 1, function(x){!any(is.na(x))}),]
 652 |   co = cor(su)
 653 |   h = Heatmap(co, name = 'Correlation', is.symmetric = TRUE,
 654 |               show_row_names = TRUE, show_column_names = TRUE,
 655 |               breaks = seq(-0.3,0.3,length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7))
 656 |   groups = cutree(as.hclust(row_dend(h)), k = 5)
 657 |   groups = groups[row_order(h)]
 658 |   groups = factor(groups, levels = unique(groups))
 659 |   h = Heatmap(co, name = 'Correlation', row_order = row_order(h), is.symmetric = TRUE,
 660 |               show_row_names = TRUE, show_column_names = TRUE,
 661 |               row_names_gp = gpar(col = c(colors.module, rep('black', length(nnls1_nei)))),
 662 |               breaks = seq(-0.3,0.3,length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7))
 663 |   print(h)
 664 |   decorate_heatmap_body('Correlation', {
 665 |     l = c(0,cumsum(table(groups))[1:length(table(groups))])
 666 |     for (k in 1:length(l)){
 667 |       i = unit(l[k:(k+1)]/ncol(co), 'npc')
 668 |       j = unit(1-l[k:(k+1)]/nrow(co), 'npc')
 669 |       grid.lines(i, j[1], gp = gpar(lwd = 1, col = 'black'))
 670 |       grid.lines(i, j[2], gp = gpar(lwd = 1, col = 'black'))
 671 |       grid.lines(i[1], j, gp = gpar(lwd = 1, col = 'black'))
 672 |       grid.lines(i[2], j, gp = gpar(lwd = 1, col = 'black'))
 673 |     }
 674 |   })
 675 |   
 676 |   co = cor(su)[ma, nnls1_nei]
 677 |   h = Heatmap(co, 
 678 |               show_row_names = TRUE, show_column_names = TRUE,
 679 |               breaks = seq(-0.2,0.2,length = 7), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 7))
 680 |   print(h)
 681 |   
 682 |   # Module distance
 683 |   
 684 |   coord = st@images$slice1@coordinates[,c('imagerow','imagecol')]
 685 |   distances = as.matrix(dist(coord))
 686 |   distances = distances/min(distances[distances > 0])
 687 |   #distances = round(distances, digits = 1)
 688 |   coef_dist = sapply(ma_bin, function(x){
 689 |     w = colnames(st)[as.logical(st@meta.data[,x])]
 690 |     w = w[!is.na(w)]
 691 |     if (length(w) == 1){
 692 |       mi = as.numeric(distances[,w])
 693 |     } else {
 694 |       mi = apply(distances[,w], 1, function(x){min(as.numeric(x), na.rm = TRUE)})
 695 |     }
 696 |     return(mi)
 697 |   })
 698 |   coef_dist = -coef_dist
 699 |   colnames(coef_dist) = ma
 700 |   ma_dist = paste0(ma, '_dist')
 701 |   st = AddMetaData(st, coef_dist, col.name = ma_dist)
 702 |   plot.list = lapply(ma_dist, function(x){
 703 |     h = SpatialFeaturePlot(st, x)
 704 |   })
 705 |   print(CombinePlots(plot.list))
 706 |   
 707 |   # Plot by axis
 708 |   
 709 |   plot.list = lapply(ma, function(m){
 710 |     sub = st.malignant@meta.data
 711 |     model = lm(sub[,m] ~ sub[,'axis'])
 712 |     est = round(summary(model)$coefficients[2,1], digits = 1)
 713 |     pval = round(-log10(summary(model)$coefficients[2,4]), digits = 1)
 714 |     if (pval > pval_thresh){
 715 |       p = ggplot(sub, aes_string(x = 'axis', y = m, color = 'orig.ident')) +
 716 |         geom_jitter(show.legend = FALSE, height = 0.05, width = 0) +
 717 |         #geom_point(show.legend = FALSE) +
 718 |         geom_smooth(method='lm', show.legend = FALSE,
 719 |                     fullrange = TRUE) + 
 720 |         annotate('text', label = paste('pval', pval), x = 0.5, y = 0.5) + 
 721 |         scale_color_manual(values = colors.module[[m]], breaks = unique(st.malignant$orig.ident)) +
 722 |         ggtitle(m) +
 723 |         theme_classic() +
 724 |         theme(
 725 |           plot.title = element_text(hjust = 0.5),
 726 |           axis.text.x = element_text(size = 0),
 727 |           axis.text.y = element_text(angle = 90),
 728 |           axis.title.y = element_text(size = 0),
 729 |           axis.title.x = element_text(size = 0))
 730 |     } else {
 731 |       p = ggplot(sub, aes_string(x = 'axis', y = m, color = 'orig.ident')) +
 732 |         geom_jitter(show.legend = FALSE, height = 0.05, width = 0) +
 733 |         #geom_point(show.legend = FALSE) +
 734 |         annotate('text', label = 'NS', x = 0.5, y = 0.5) + 
 735 |         scale_color_manual(values = colors.module[[m]], breaks = unique(st.malignant$orig.ident)) +
 736 |         ggtitle(m) +
 737 |         theme_classic() +
 738 |         theme(
 739 |           plot.title = element_text(hjust = 0.5),
 740 |           axis.text.x = element_text(size = 0),
 741 |           axis.text.y = element_text(angle = 90),
 742 |           axis.title.y = element_text(size = 0),
 743 |           axis.title.x = element_text(size = 0))
 744 |     }
 745 |     return(p)
 746 |   })
 747 |   print(CombinePlots(plot.list, ncol = 4))
 748 |   
 749 |   #### Integrating ####
 750 |   
 751 |   tryCatch(expr = {
 752 |     
 753 |     # Integrating
 754 |     
 755 |     options(future.globals.maxSize = 5000*1024^2)
 756 |     object.list = list('SC' = srt, 'ST' = st)
 757 |     genes.use = intersect(VariableFeatures(srt), SpatiallyVariableFeatures(st))
 758 |     object.list = PrepSCTIntegration(object.list = object.list,
 759 |                                      anchor.features = genes.use,
 760 |                                      verbose = FALSE)
 761 |     anchors = FindIntegrationAnchors(object.list = object.list, reference = 1,
 762 |                                      normalization.method =  'SCT',
 763 |                                      anchor.features = genes.use,
 764 |                                      verbose = FALSE)
 765 |     integrated = IntegrateData(anchorset = anchors,
 766 |                                normalization.method = "SCT",
 767 |                                verbose = FALSE)
 768 |     integrated = RunPCA(integrated)
 769 |     integrated = RunUMAP(integrated, dims = 1:10)
 770 |     integrated = RunTSNE(integrated, dims = 1:10)
 771 |     
 772 |     # Plotting
 773 |     
 774 |     h = DimPlot(integrated, reduction = 'umap', group.by = c('pop'), label = TRUE, repel = TRUE, cols = colors.pop, pt.size = 2)
 775 |     print(h)
 776 |     h = DimPlot(integrated, reduction = 'umap', group.by = c('cat'), label = TRUE, repel = TRUE, cols = colors.cat, pt.size = 2)
 777 |     print(h)
 778 |     h = FeaturePlot(integrated, reduction = 'umap', features = 'axis', cols = brewer_pal(palette = 'Purples', direction = 1)(n = 9), pt.size = 2)
 779 |     print(h)
 780 |     plot.list = lapply(nnls, function(x){
 781 |       h = FeaturePlot(integrated, reduction = 'umap', x)
 782 |     })
 783 |     print(CombinePlots(plot.list))
 784 |     plot.list = lapply(nnls_bin, function(x){
 785 |       h = DimPlot(integrated, reduction = 'umap', group.by = x, cols = colors.bin)
 786 |     })
 787 |     print(CombinePlots(plot.list))
 788 |     plot.list = lapply(pred, function(x){
 789 |       h = FeaturePlot(integrated, reduction = 'umap', features = x)
 790 |     })
 791 |     print(CombinePlots(plot.list))
 792 |     plot.list = lapply(pred_bin, function(x){
 793 |       h = DimPlot(integrated, reduction = 'umap', group.by = x, cols = colors.bin)
 794 |     })
 795 |     print(CombinePlots(plot.list))
 796 |     plot.list = lapply(nmf, function(x){
 797 |       h = FeaturePlot(integrated, reduction = 'umap', x)
 798 |     })
 799 |     print(CombinePlots(plot.list))
 800 |     for (m in ma){
 801 |       h = FeaturePlot(integrated, reduction = 'umap', features = m, cols = c('lightgrey',colors.module[m]), split.by = 'technique', pt.size = 2)
 802 |       print(h)
 803 |     }
 804 |     
 805 |     h = DimPlot(integrated, reduction = 'tsne', group.by = c('pop'), label = TRUE, repel = TRUE, cols = colors.pop, pt.size = 2)
 806 |     print(h)
 807 |     h = DimPlot(integrated, reduction = 'tsne', group.by = c('cat'), label = TRUE, repel = TRUE, cols = colors.cat, pt.size = 2)
 808 |     print(h)
 809 |     h = FeaturePlot(integrated, reduction = 'tsne', features = 'axis', cols = brewer_pal(palette = 'Purples', direction = 1)(n = 9), pt.size = 2)
 810 |     print(h)
 811 |     plot.list = lapply(nnls, function(x){
 812 |       h = FeaturePlot(integrated, reduction = 'tsne', x)
 813 |     })
 814 |     print(CombinePlots(plot.list))
 815 |     plot.list = lapply(nnls_bin, function(x){
 816 |       h = DimPlot(integrated, reduction = 'tsne', group.by = x, cols = colors.bin)
 817 |     })
 818 |     print(CombinePlots(plot.list))
 819 |     plot.list = lapply(pred, function(x){
 820 |       h = FeaturePlot(integrated, reduction = 'tsne', features = x)
 821 |     })
 822 |     print(CombinePlots(plot.list))
 823 |     plot.list = lapply(pred_bin, function(x){
 824 |       h = DimPlot(integrated, reduction = 'tsne', group.by = x, cols = colors.bin)
 825 |     })
 826 |     print(CombinePlots(plot.list))
 827 |     plot.list = lapply(nmf, function(x){
 828 |       h = FeaturePlot(integrated, reduction = 'tsne', x)
 829 |     })
 830 |     print(CombinePlots(plot.list))
 831 |     for (m in ma){
 832 |       h = FeaturePlot(integrated, reduction = 'tsne', features = m, cols = c('lightgrey',colors.module[m]), split.by = 'technique', pt.size = 2)
 833 |       print(h)
 834 |     }
 835 |     
 836 |   }, error = function(e){warning(e)})
 837 |   
 838 |   dev.off()
 839 |   print(fa)
 840 |   meta = st@meta.data
 841 |   return(meta)
 842 |   
 843 | }, mc.cores = length(files))
 844 | 
 845 | names(meta.list) = names(files)
 846 | save(meta.list, file = paste0('meta.list.',prox,'.RData'))
 847 | 
 848 | }
 849 | 
 850 | for (prox in c('inverse')){
 851 | 
 852 | pdf(paste0('Visium.',prox,'.pdf'), height = 7, width = 7)
 853 | 
 854 | load('~/Documents/Analysis/Tumors/parameters.RData')
 855 | w = which(names(colors) == 'LIHCHs1')
 856 | l = colors[['LIHCHs1']]
 857 | colors = c(colors, 'LIHCHs1' = l, 'LIHCHs1A' = l,  'LIHCHs1B' = l)
 858 | colors = colors[-w]
 859 | nrand = 2
 860 | nbin = 10
 861 | cats = c('Malignant','Both','Normal')
 862 | colors.cat = brewer_pal(palette = 'RdYlGn')(8)[c(2,5,7)]
 863 | names(colors.cat) = cats
 864 | method = 'score'
 865 | pval_thresh = 1
 866 | 
 867 | modules = loadRData('../Tumors/modules.RData')
 868 | colors.module = loadRData('../Tumors/colors.module.RData')
 869 | types.module = loadRData('../Tumors/types.module.RData')
 870 | modules = modules[!names(modules) %in% c('AC','OPC','NPC')]
 871 | colors.module = colors.module[names(modules)]
 872 | types.module = types.module[names(modules)]
 873 | ma = names(modules)
 874 | ma_bin = paste0(ma, '_bin')
 875 | ma_nei = paste0(ma, '_nei')
 876 | ma_dist = paste0(ma, '_dist')
 877 | 
 878 | load(paste0('meta.list.',prox,'.RData'))
 879 | meta.list = meta.list[sapply(meta.list, length) > 0]
 880 | meta.list = meta.list[!names(meta.list) %in% c('LIHCHs1A','LIHCHs1B')]
 881 | 
 882 | # Make meta, adding NAs
 883 | keep = Reduce(union, sapply(meta.list, colnames))
 884 | meta.list = lapply(meta.list, function(meta){
 885 |   if (all(keep %in% colnames(meta))){
 886 |     return(meta)
 887 |   } else {
 888 |     add = matrix(NA, nrow = nrow(meta), ncol = length(setdiff(keep, colnames(meta))))
 889 |     colnames(add) = setdiff(keep, colnames(meta))
 890 |     rownames(add) = rownames(meta)
 891 |     meta = cbind(meta, add)
 892 |     return(meta)
 893 |   }
 894 | })
 895 | meta = Reduce(rbind, lapply(meta.list, function(meta){meta[, keep]}))
 896 | meta = meta[!is.na(meta$cat),]
 897 | meta$orig.ident = factor(meta$orig.ident, levels = intersect(names(colors), unique(meta$orig.ident)))
 898 | meta$cat = factor(meta$cat, levels = cats)
 899 | nnls_dist = setdiff(grep('_dist', colnames(meta), value = TRUE), ma_dist)
 900 | nnls_nei = grep('_nei', colnames(meta), value = TRUE)
 901 | nnls = gsub('_dist','',nnls_dist)
 902 | 
 903 | nnls1 = setdiff(nnls, c('M1', 'M2', 'M1M2'))
 904 | nnls1_dist = paste0(nnls1, '_dist')
 905 | nnls1_nei = paste0(nnls1, '_nei')
 906 | nnls1_bin = paste0(nnls1, '_bin')
 907 | 
 908 | nnls2 = setdiff(nnls, 'Macrophage')
 909 | nnls2_dist = paste0(nnls2, '_dist')
 910 | nnls2_nei = paste0(nnls2, '_nei')
 911 | nnls2_bin = paste0(nnls2, '_bin')
 912 | 
 913 | # Sample composition
 914 | 
 915 | for (o in levels(meta$orig.ident)){
 916 |   b = meta[meta$orig.ident == o,]
 917 |   sp = t(sapply(c('Normal','Both'), function(s){
 918 |     a = b[b$cat == s, nnls1_bin]
 919 |     a[a < 0] = 0
 920 |     a = colMeans(a, na.rm = TRUE)
 921 |     a = a[!is.nan(a)]
 922 |     a = a[!names(a) == 'Malignant_bin']
 923 |     a = a/sum(a)
 924 |   }))
 925 |   colnames(sp) = gsub('_bin', '', colnames(sp))
 926 |   sp = sp[,rev(colnames(sp))]
 927 |   connectedBarplot(t(rbind(sp['Normal',], sp['Both',])), space = 0.2,
 928 |                    main = o, names.arg = c('',''), axes = FALSE,
 929 |                    las = 2, beside = FALSE, col = colors.pop[colnames(sp)])
 930 | }
 931 | 
 932 | 
 933 | comp.list = lapply(levels(meta$orig.ident), function(o){
 934 |   b = meta[meta$orig.ident == o,]
 935 |   sp = t(sapply(c('Normal','Both'), function(s){
 936 |     a = b[b$cat == s, nnls1_bin]
 937 |     a[a < 0] = 0
 938 |     a = colMeans(a, na.rm = TRUE)
 939 |     a = a[!is.nan(a)]
 940 |     a = a[!names(a) == 'Malignant_bin']
 941 |     a = a/sum(a)
 942 |   }))
 943 |   colnames(sp) = gsub('_bin', '', colnames(sp))
 944 |   return(sp)
 945 | })
 946 | names(comp.list) = levels(meta$orig.ident)
 947 | comp = melt(comp.list)
 948 | names(comp) = c('cat','ct','value','orig.ident')
 949 | p.list = lapply(levels(comp$ct), function(ct){
 950 |   sub = comp[comp$ct == ct,]
 951 |   sub = sub[order(sub$cat), ]
 952 |   p = ggpaired(sub, x = 'cat', y = 'value',
 953 |                color = 'cat', line.color = 'grey', line.size = 0.4) +
 954 |     stat_compare_means(paired = TRUE) +
 955 |     scale_color_manual(values = colors.cat) +
 956 |     ggtitle(ct) +
 957 |     theme_classic() +
 958 |     theme(
 959 |       plot.title = element_text(hjust = 0.5),
 960 |       axis.text.x = element_text(size = 10),
 961 |       axis.title.x = element_text(size = 0),
 962 |       axis.title.y = element_text(size = 0),
 963 |       legend.position = 'none')
 964 | })
 965 | print(CombinePlots(p.list), ncol = 4)
 966 | 
 967 | comp.list = lapply(levels(meta$orig.ident), function(o){
 968 |   b = meta[meta$orig.ident == o,]
 969 |   sp = t(sapply(c('Normal','Both'), function(s){
 970 |     a = b[b$cat == s, nnls1_bin]
 971 |     a[a < 0] = 0
 972 |     a = colMeans(a, na.rm = TRUE)
 973 |     a = a[!is.nan(a)]
 974 |     a = a[!names(a) == 'Malignant_bin']
 975 |     a = a/sum(a)
 976 |   }))
 977 |   colnames(sp) = gsub('_bin', '', colnames(sp))
 978 |   log2(sp['Both',]/sp['Normal',])
 979 | })
 980 | keep = Reduce(union, sapply(comp.list, names))
 981 | comp.list = lapply(comp.list, function(comp){
 982 |   if (all(keep %in% names(comp))){
 983 |     return(comp)
 984 |   } else {
 985 |     add = matrix(NA, nrow = 1, ncol = length(setdiff(keep, names(comp))))
 986 |     names(add) = setdiff(keep, names(comp))
 987 |     comp = c(comp, add)
 988 |     return(comp)
 989 |   }
 990 | })
 991 | comp = Reduce(rbind, lapply(comp.list, function(comp){comp[keep]}))
 992 | comp[is.infinite(comp)] = 5
 993 | rownames(comp) = levels(meta$orig.ident)
 994 | barplot(comp, beside = TRUE, col = colors[rownames(comp)], las = 2)
 995 | 
 996 | # Modules by distance
 997 | 
 998 | val = Reduce(rbind,lapply(ma, function(m){
 999 |   Reduce(rbind,lapply(nnls1, function(ct){
1000 |     res = sapply(levels(meta$orig.ident), function(o){
1001 |       val = NA
1002 |       tryCatch(expr = {
1003 |         ct_dist = paste0(ct, '_dist')
1004 |         sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',]
1005 |         model = lm(sub[,m] ~ sub[,ct_dist])
1006 |         est = summary(model)$coefficients[2,1]
1007 |         pval = -log10(summary(model)$coefficients[2,4])
1008 |         val = pval*sign(est)
1009 |         return(val)
1010 |       }, error = function(e){return(NA)})
1011 |     })
1012 |     #res[abs(res)<pval_thresh] = 0
1013 |     #res = sign(res)
1014 |     #med = sum(res, na.rm = TRUE)
1015 |     #amp = sum(res == sign(med), na.rm = TRUE)
1016 |     med = median(res, na.rm = TRUE)
1017 |     amp = mean(sign(res) == sign(med), na.rm = TRUE)
1018 |     return(data.frame('module' = m, 'ct' = ct, 'med' = med, 'amp' = amp))
1019 |     return(data.frame('module' = m, 'ct' = ct, 'med' = med, 'amp' = amp))
1020 |   }))
1021 | }))
1022 | val$module = factor(val$module, levels = rev(ma))
1023 | val$ct = factor(val$ct, levels = nnls)
1024 | val$highlight = (val$amp >= 0.5) & (abs(val$med) >= 0.75)
1025 | val$med[val$med > 4] = 4
1026 | val$med[val$med < -1] = -1
1027 | p = ggplot(val, aes(x=ct,y=module)) +
1028 |   geom_point(shape=21,aes(size=amp,fill=med,colour=highlight)) +
1029 |   scale_fill_gradient2(low = brewer_pal(palette = 'RdBu', direction = -1)(n = 3)[1],
1030 |                        mid = brewer_pal(palette = 'RdBu', direction = -1)(n = 3)[2], 
1031 |                        high = brewer_pal(palette = 'RdBu', direction = -1)(n = 3)[3]) +
1032 |   scale_colour_manual(breaks=c('FALSE','TRUE'),values=c('white','black')) + 
1033 |   theme_bw() +
1034 |   theme(panel.grid.minor = element_blank(),
1035 |         panel.grid.major = element_blank(),
1036 |         axis.text = element_text(size=14, colour = "black"),
1037 |         axis.text.y = element_text(size=12, colour = "black"),
1038 |         axis.text.x = element_text(size=12, colour = "black", angle = 90, hjust = 1),
1039 |         axis.title=element_blank(),
1040 |         panel.border = element_rect(size = 0.7, linetype = "solid", colour = "black"))
1041 | print(p)
1042 | 
1043 | for (ct in nnls){
1044 |   val = Reduce(rbind,lapply(ma, function(m){
1045 |     res = sapply(levels(meta$orig.ident), function(o){
1046 |       val = NA
1047 |       tryCatch(expr = {
1048 |         ct_dist = paste0(ct, '_dist')
1049 |         sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',]
1050 |         model = lm(sub[,m] ~ sub[,ct_dist])
1051 |         est = summary(model)$coefficients[2,1]
1052 |         pval = -log10(summary(model)$coefficients[2,4])
1053 |         pval[pval > 10] = 10
1054 |         val = pval*sign(est)
1055 |         return(val)
1056 |       }, error = function(e){return(NA)})
1057 |     })
1058 |     return(data.frame('orig.ident' = levels(meta$orig.ident), 'module' = m, 'res' = res))
1059 |   }))
1060 |   val$module = factor(val$module, levels = ma)
1061 |   val$orig.ident = factor(val$orig.ident)
1062 |   p = ggplot(val, aes(x=module,y=res)) +
1063 |     geom_boxplot(coef = 0, outlier.shape = NA) +
1064 |     #geom_violin() + 
1065 |     geom_jitter(aes(colour = orig.ident, size = 2), width = 0.25) +
1066 |     geom_hline(yintercept = log10(0.05), linetype = 2) + 
1067 |     geom_hline(yintercept = log10(0.05), linetype = 2) + 
1068 |     scale_colour_manual(values = colors, breaks = names(colors)) +
1069 |     ggtitle(ct) + 
1070 |     theme_bw() +
1071 |     theme(panel.grid.minor = element_blank(),
1072 |           panel.grid.major = element_blank(),
1073 |           axis.text = element_text(size=14, colour = "black"),
1074 |           axis.text.y = element_text(size=12, colour = "black"),
1075 |           axis.text.x = element_text(size=12, colour = "black", angle = 90, hjust = 1),
1076 |           axis.title=element_blank()) + NoLegend()
1077 |   print(p)
1078 | }
1079 | 
1080 | for (m in ma){
1081 |   val = Reduce(rbind,lapply(nnls1, function(ct){
1082 |     res = sapply(levels(meta$orig.ident), function(o){
1083 |       val = NA
1084 |       tryCatch(expr = {
1085 |         ct_dist = paste0(ct, '_dist')
1086 |         sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',]
1087 |         model = lm(sub[,m] ~ sub[,ct_dist])
1088 |         est = summary(model)$coefficients[2,1]
1089 |         pval = -log10(summary(model)$coefficients[2,4])
1090 |         pval[pval > 10] = 10
1091 |         val = pval*sign(est)
1092 |         return(val)
1093 |       }, error = function(e){return(NA)})
1094 |     })
1095 |     return(data.frame('orig.ident' = levels(meta$orig.ident), 'ct' = ct, 'res' = res))
1096 |   }))
1097 |   val$ct = factor(val$ct, levels = nnls)
1098 |   val$orig.ident = factor(val$orig.ident)
1099 |   p = ggplot(val, aes(x=ct,y=res)) +
1100 |     geom_boxplot(coef = 0, outlier.shape = NA) +
1101 |     #geom_violin() + 
1102 |     geom_jitter(aes(colour = orig.ident, size = 2), width = 0.25) +
1103 |     geom_hline(yintercept = log10(0.05), linetype = 2) + 
1104 |     geom_hline(yintercept = -log10(0.05), linetype = 2) + 
1105 |     scale_colour_manual(values = colors, breaks = names(colors)) +
1106 |     ggtitle(m) + 
1107 |     theme_bw() +
1108 |     theme(panel.grid.minor = element_blank(),
1109 |           panel.grid.major = element_blank(),
1110 |           axis.text = element_text(size=14, colour = "black"),
1111 |           axis.text.y = element_text(size=12, colour = "black"),
1112 |           axis.text.x = element_text(size=12, colour = "black", angle = 90, hjust = 1),
1113 |           axis.title=element_blank()) + NoLegend()
1114 |   print(p)
1115 | }
1116 | 
1117 | for (ct in nnls){
1118 |   for (m in ma){
1119 |     
1120 |     ct_nei = paste0(ct, '_nei')
1121 |     ct_dist = paste0(ct, '_dist')
1122 |     
1123 |     plot.list = lapply(levels(meta$orig.ident), function(o){
1124 |       p = NULL
1125 |       tryCatch(expr = {
1126 |         sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',]
1127 |         model = lm(sub[,m] ~ sub[,ct_dist])
1128 |         est = round(summary(model)$coefficients[2,1], digits = 1)
1129 |         pval = round(-log10(summary(model)$coefficients[2,4]), digits = 1)
1130 |         if (pval > pval_thresh){
1131 |           p = ggplot(sub, aes_string(x = ct_dist, y = m, color = 'orig.ident')) +
1132 |             geom_jitter(show.legend = FALSE, height = 0.05, width = 0) +
1133 |             #geom_point(show.legend = FALSE) +
1134 |             geom_smooth(method='lm', show.legend = FALSE,
1135 |                         fullrange = TRUE) + 
1136 |             annotate('text', label = paste('pval', pval), x = 0.05, y = 0.05) + 
1137 |             scale_color_manual(values = colors) +
1138 |             ggtitle(o) +
1139 |             theme_classic() +
1140 |             theme(
1141 |               plot.title = element_text(hjust = 0.5),
1142 |               axis.text.x = element_text(size = 0),
1143 |               axis.text.y = element_text(angle = 90),
1144 |               axis.title.y = element_text(),
1145 |               axis.title.x = element_text())
1146 |         } else {
1147 |           p = ggplot(sub, aes_string(x = ct_dist, y = m, color = 'orig.ident')) +
1148 |             geom_jitter(show.legend = FALSE, height = 0.05, width = 0) +
1149 |             #geom_point(show.legend = FALSE) +
1150 |             annotate('text', label = 'NS', x = 0.05, y = 0.05) + 
1151 |             scale_color_manual(values = colors) +
1152 |             ggtitle(o) +
1153 |             theme_classic() +
1154 |             theme(
1155 |               plot.title = element_text(hjust = 0.5),
1156 |               axis.text.x = element_text(size = 0),
1157 |               axis.text.y = element_text(angle = 90),
1158 |               axis.title.y = element_text(),
1159 |               axis.title.x = element_text())
1160 |         }
1161 |       }, error = function(e){c()})
1162 |       return(p)
1163 |     })
1164 |     print(CombinePlots(plot.list, ncol = 3))
1165 |   }
1166 | }
1167 | 
1168 | # Modules by neighborhood
1169 | 
1170 | val = Reduce(rbind,lapply(ma, function(m){
1171 |   Reduce(rbind,lapply(nnls1, function(ct){
1172 |     res = sapply(levels(meta$orig.ident), function(o){
1173 |       val = NA
1174 |       tryCatch(expr = {
1175 |         ct_nei = paste0(ct, '_nei')
1176 |         sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',]
1177 |         model = lm(sub[,m] ~ sub[,ct_nei])
1178 |         est = summary(model)$coefficients[2,1]
1179 |         pval = -log10(summary(model)$coefficients[2,4])
1180 |         val = pval*sign(est)
1181 |         return(val)
1182 |       }, error = function(e){return(NA)})
1183 |     })
1184 |     #res[abs(res)<pval_thresh] = 0
1185 |     #res = sign(res)
1186 |     #med = sum(res, na.rm = TRUE)
1187 |     #amp = sum(res == sign(med), na.rm = TRUE)
1188 |     med = median(res, na.rm = TRUE)
1189 |     amp = mean(sign(res) == sign(med), na.rm = TRUE)
1190 |     return(data.frame('module' = m, 'ct' = ct, 'med' = med, 'amp' = amp))
1191 |     return(data.frame('module' = m, 'ct' = ct, 'med' = med, 'amp' = amp))
1192 |   }))
1193 | }))
1194 | val$module = factor(val$module, levels = rev(ma))
1195 | val$ct = factor(val$ct, levels = nnls)
1196 | val$highlight = (val$amp >= 0.5) & (abs(val$med) >= 0.75)
1197 | val$med[val$med > 4] = 4
1198 | val$med[val$med < -1] = -1
1199 | p = ggplot(val, aes(x=ct,y=module)) +
1200 |   geom_point(shape=21,aes(size=amp,fill=med,colour=highlight)) +
1201 |   scale_fill_gradient2(low = brewer_pal(palette = 'RdBu', direction = -1)(n = 3)[1],
1202 |                          mid = brewer_pal(palette = 'RdBu', direction = -1)(n = 3)[2], 
1203 |                          high = brewer_pal(palette = 'RdBu', direction = -1)(n = 3)[3]) +
1204 |   scale_colour_manual(breaks=c('FALSE','TRUE'),values=c('white','black')) + 
1205 |   theme_bw() +
1206 |   theme(panel.grid.minor = element_blank(),
1207 |         panel.grid.major = element_blank(),
1208 |         axis.text = element_text(size=14, colour = "black"),
1209 |         axis.text.y = element_text(size=12, colour = "black"),
1210 |         axis.text.x = element_text(size=12, colour = "black", angle = 90, hjust = 1),
1211 |         axis.title=element_blank(),
1212 |         panel.border = element_rect(size = 0.7, linetype = "solid", colour = "black"))
1213 | print(p)
1214 | 
1215 | 
1216 | for (ct in nnls){
1217 |   val = Reduce(rbind,lapply(ma, function(m){
1218 |     res = sapply(levels(meta$orig.ident), function(o){
1219 |       val = NA
1220 |       tryCatch(expr = {
1221 |         ct_nei = paste0(ct, '_nei')
1222 |         sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',]
1223 |         model = lm(sub[,m] ~ sub[,ct_nei])
1224 |         est = summary(model)$coefficients[2,1]
1225 |         pval = -log10(summary(model)$coefficients[2,4])
1226 |         pval[pval > 10] = 10
1227 |         val = pval*sign(est)
1228 |         return(val)
1229 |       }, error = function(e){return(NA)})
1230 |     })
1231 |     return(data.frame('orig.ident' = levels(meta$orig.ident), 'module' = m, 'res' = res))
1232 |   }))
1233 |   val$module = factor(val$module, levels = ma)
1234 |   val$orig.ident = factor(val$orig.ident)
1235 |   p = ggplot(val, aes(x=module,y=res)) +
1236 |     geom_boxplot(coef = 0, outlier.shape = NA) +
1237 |     #geom_violin() + 
1238 |     geom_jitter(aes(colour = orig.ident, size = 2), width = 0.25) +
1239 |     geom_hline(yintercept = log10(0.05), linetype = 2) + 
1240 |     geom_hline(yintercept = -log10(0.05), linetype = 2) + 
1241 |     scale_colour_manual(values = colors, breaks = names(colors)) +
1242 |     ggtitle(ct) + 
1243 |     theme_bw() +
1244 |     theme(panel.grid.minor = element_blank(),
1245 |           panel.grid.major = element_blank(),
1246 |           axis.text = element_text(size=14, colour = "black"),
1247 |           axis.text.y = element_text(size=12, colour = "black"),
1248 |           axis.text.x = element_text(size=12, colour = "black", angle = 90, hjust = 1),
1249 |           axis.title=element_blank()) + NoLegend()
1250 |   print(p)
1251 | }
1252 | 
1253 | for (m in ma){
1254 |   val = Reduce(rbind,lapply(nnls1, function(ct){
1255 |     res = sapply(levels(meta$orig.ident), function(o){
1256 |       val = NA
1257 |       tryCatch(expr = {
1258 |         ct_nei = paste0(ct, '_nei')
1259 |         sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',]
1260 |         model = lm(sub[,m] ~ sub[,ct_nei])
1261 |         est = summary(model)$coefficients[2,1]
1262 |         pval = -log10(summary(model)$coefficients[2,4])
1263 |         pval[pval > 10] = 10
1264 |         val = pval*sign(est)
1265 |         return(val)
1266 |       }, error = function(e){return(NA)})
1267 |     })
1268 |     return(data.frame('orig.ident' = levels(meta$orig.ident), 'ct' = ct, 'res' = res))
1269 |   }))
1270 |   val$ct = factor(val$ct, levels = nnls)
1271 |   val$orig.ident = factor(val$orig.ident)
1272 |   p = ggplot(val, aes(x=ct,y=res)) +
1273 |     geom_boxplot(coef = 0, outlier.shape = NA) +
1274 |     #geom_violin() + 
1275 |     geom_jitter(aes(colour = orig.ident, size = 2), width = 0.25) +
1276 |     geom_hline(yintercept = log10(0.05), linetype = 2) + 
1277 |     geom_hline(yintercept = log10(0.05), linetype = 2) + 
1278 |     scale_colour_manual(values = colors, breaks = names(colors)) +
1279 |     ggtitle(m) + 
1280 |     theme_bw() +
1281 |     theme(panel.grid.minor = element_blank(),
1282 |           panel.grid.major = element_blank(),
1283 |           axis.text = element_text(size=14, colour = "black"),
1284 |           axis.text.y = element_text(size=12, colour = "black"),
1285 |           axis.text.x = element_text(size=12, colour = "black", angle = 90, hjust = 1),
1286 |           axis.title=element_blank()) + NoLegend()
1287 |   print(p)
1288 | }
1289 | 
1290 | 
1291 | for (ct in nnls){
1292 |   for (m in ma){
1293 |     
1294 |     ct_nei = paste0(ct, '_nei')
1295 |     ct_dist = paste0(ct, '_dist')
1296 |     
1297 |     plot.list = lapply(levels(meta$orig.ident), function(o){
1298 |       tryCatch(expr = {
1299 |         p = NULL
1300 |         sub = meta[meta$orig.ident == o & meta$cat == 'Malignant',]
1301 |         model = lm(sub[,m] ~ sub[,ct_nei])
1302 |         est = round(summary(model)$coefficients[2,1], digits = 1)
1303 |         pval = round(-log10(summary(model)$coefficients[2,4]), digits = 1)
1304 |         if (pval > pval_thresh){
1305 |           p = ggplot(sub, aes_string(x = ct_nei, y = m, color = 'orig.ident')) +
1306 |             geom_jitter(show.legend = FALSE, height = 0.05, width = 0) +
1307 |             #geom_point(show.legend = FALSE) +
1308 |             geom_smooth(method='lm', show.legend = FALSE,
1309 |                         fullrange = TRUE) + 
1310 |             annotate('text', label = paste('pval', pval), x = 0.05, y = 0.05) + 
1311 |             scale_color_manual(values = colors) +
1312 |             ggtitle(o) +
1313 |             theme_classic() +
1314 |             theme(
1315 |               plot.title = element_text(hjust = 0.5),
1316 |               axis.text.x = element_text(size = 0),
1317 |               axis.text.y = element_text(angle = 90),
1318 |               axis.title.y = element_text(),
1319 |               axis.title.x = element_text())
1320 |         } else {
1321 |           p = ggplot(sub, aes_string(x = ct_nei, y = m, color = 'orig.ident')) +
1322 |             geom_jitter(show.legend = FALSE, height = 0.05, width = 0) +
1323 |             #geom_point(show.legend = FALSE) +
1324 |             annotate('text', label = 'NS', x = 0.05, y = 0.05) + 
1325 |             scale_color_manual(values = colors) +
1326 |             ggtitle(o) +
1327 |             theme_classic() +
1328 |             theme(
1329 |               plot.title = element_text(hjust = 0.5),
1330 |               axis.text.x = element_text(size = 0),
1331 |               axis.text.y = element_text(angle = 90),
1332 |               axis.title.y = element_text(),
1333 |               axis.title.x = element_text())
1334 |         }
1335 |       }, error = function(e){c()})
1336 |       return(p)
1337 |     })
1338 |     print(CombinePlots(plot.list, ncol = 3))
1339 |   }
1340 | }
1341 | 
1342 | # M1 vs M2 distances
1343 | 
1344 | plot.list = lapply(levels(meta$orig.ident), function(o){
1345 |   sub = meta[meta$orig.ident == o,]
1346 |   p = ggplot(sub, aes_string(x = 'M1_dist', y = 'M2_dist', fill = 'cat', col = 'cat')) + 
1347 |     geom_point() +
1348 |     geom_smooth(method = 'lm') + 
1349 |     scale_fill_manual(values = colors.cat, breaks = cats) +
1350 |     scale_color_manual(values = colors.cat, breaks = cats) +
1351 |     ggtitle(o) +
1352 |     theme_classic() +
1353 |     theme(
1354 |       plot.title = element_text(hjust = 0.5),
1355 |       axis.text.y = element_text(angle = 90),
1356 |       axis.title.y = element_text(),
1357 |       axis.title.x = element_text())
1358 | })
1359 | print(CombinePlots(plot.list, ncol = 3))
1360 | 
1361 | sub = meta[,c('orig.ident','cat','M1M2_dist')]
1362 | sub = melt(sub, id.vars = c('orig.ident','cat'), measure.vars = 'M1M2_dist')
1363 | p.values <- sapply(split(sub, sub$orig.ident), function(x){wilcox.test(value~cat, x, subset = cat %in% c('Malignant','Normal'))$p.value})
1364 | labels <- symnum(p.values, corr = FALSE, cutpoints = c(0,  .001,.01,.05, 1), symbols = c("***","**","*","n.s."))
1365 | y.values <- sapply(split(sub, sub$orig.ident), function(x){max(sapply(split(x, x$cat), function(xx){boxplot(xx$value, plot=F)$stats[4, ]}), na.rm = TRUE)}+0.5)
1366 | p = ggplot(sub, aes_string(x = 'orig.ident', y = 'value', fill = 'cat')) + 
1367 |   geom_boxplot(coef = 0, outlier.size = 0, outlier.shape = NA, varwidth = FALSE, show.legend = FALSE) +
1368 |   geom_signif(annotations = labels, xmin = (1:nlevels(sub$orig.ident))-0.2, xmax = (1:nlevels(sub$orig.ident))+0.2, y_position = y.values, tip_length = 0) + 
1369 |   scale_fill_manual(values = colors.cat, breaks = names(colors.cat)) +
1370 |   ggtitle('') +
1371 |   ylim(-2,4) + 
1372 |   theme_classic() +
1373 |   theme(
1374 |     plot.title = element_text(hjust = 0.5),
1375 |     axis.text.y = element_text(angle = 90),
1376 |     axis.text.x = element_text(angle = 90),
1377 |     axis.title.y = element_text(size = 0),
1378 |     axis.title.x = element_text(size = 0))
1379 | print(p)
1380 | 
1381 | 
1382 | # M1 vs M2 neighborhood
1383 | 
1384 | plot.list = lapply(levels(meta$orig.ident), function(o){
1385 |   sub = meta[meta$orig.ident == o,]
1386 |   p = ggplot(sub, aes_string(x = 'M1_nei', y = 'M2_nei', fill = 'cat', col = 'cat')) + 
1387 |     geom_point() +
1388 |     geom_smooth(method = 'lm') + 
1389 |     scale_fill_manual(values = colors.cat, breaks = names(colors.cat)) +
1390 |     scale_color_manual(values = colors.cat, breaks = names(colors.cat)) +
1391 |     ggtitle(o) +
1392 |     theme_classic() +
1393 |     theme(
1394 |       plot.title = element_text(hjust = 0.5),
1395 |       axis.text.y = element_text(angle = 90),
1396 |       axis.title.y = element_text(),
1397 |       axis.title.x = element_text())
1398 | })
1399 | print(CombinePlots(plot.list, ncol = 3))
1400 | 
1401 | sub = meta[,c('orig.ident','cat','M1M2_nei')]
1402 | sub = melt(sub, id.vars = c('orig.ident','cat'), measure.vars = 'M1M2_nei')
1403 | p.values <- sapply(split(sub, sub$orig.ident), function(x){wilcox.test(value~cat, x, subset = cat %in% c('Malignant','Normal'))$p.value})
1404 | labels <- symnum(p.values, corr = FALSE, cutpoints = c(0,  .001,.01,.05, 1), symbols = c("***","**","*","n.s."))
1405 | y.values <- sapply(split(sub, sub$orig.ident), function(x){max(sapply(split(x, x$cat), function(xx){boxplot(xx$value, plot=F)$stats[4, ]}), na.rm = TRUE)}+0.05)
1406 | p = ggplot(sub, aes_string(x = 'orig.ident', y = 'value', fill = 'cat')) + 
1407 |   geom_boxplot(coef = 0, outlier.size = 0, outlier.shape = NA, varwidth = FALSE, show.legend = FALSE) +
1408 |   geom_signif(annotations = labels, xmin = (1:nlevels(sub$orig.ident))-0.2, xmax = (1:nlevels(sub$orig.ident))+0.2, y_position = y.values, tip_length = 0) + 
1409 |   scale_fill_manual(values = colors.cat, breaks = names(colors.cat)) +
1410 |   ggtitle('') +
1411 |   ylim(-0.2,0.4) + 
1412 |   theme_classic() +
1413 |   theme(
1414 |     plot.title = element_text(hjust = 0.5),
1415 |     axis.text.y = element_text(angle = 90),
1416 |     axis.text.x = element_text(angle = 90),
1417 |     axis.title.y = element_text(size = 0),
1418 |     axis.title.x = element_text(size = 0))
1419 | print(p)
1420 | 
1421 | # M1 vs M2
1422 | 
1423 | plot.list = lapply(levels(meta$orig.ident), function(o){
1424 |   sub = meta[meta$orig.ident == o,]
1425 |   p = ggplot(sub, aes_string(x = 'M1', y = 'M2', fill = 'cat', col = 'cat')) + 
1426 |     geom_point() +
1427 |     geom_smooth(method = 'lm') + 
1428 |     scale_fill_manual(values = colors.cat, breaks = names(colors.cat)) +
1429 |     scale_color_manual(values = colors.cat, breaks = names(colors.cat)) +
1430 |     ggtitle(o) +
1431 |     theme_classic() +
1432 |     theme(
1433 |       plot.title = element_text(hjust = 0.5),
1434 |       axis.text.y = element_text(angle = 90),
1435 |       axis.title.y = element_text(),
1436 |       axis.title.x = element_text())
1437 | })
1438 | print(CombinePlots(plot.list, ncol = 3))
1439 | 
1440 | sub = meta[,c('orig.ident','cat','M1M2')]
1441 | sub = sub[sub$cat %in% c('Both','Normal') & !is.na(sub$M1M2),]
1442 | sub = melt(sub, id.vars = c('orig.ident','cat'), measure.vars = 'M1M2')
1443 | sub$cat = factor(as.character(sub$cat), levels = c('Normal','Both'))
1444 | p.values <- sapply(split(sub, sub$orig.ident), function(x){wilcox.test(value~cat, x, subset = cat %in% c('Normal','Both'))$p.value})
1445 | labels <- symnum(p.values, corr = FALSE, cutpoints = c(0,  .001,.01,.05, 1), symbols = c("***","**","*","n.s."))
1446 | y.values <- sapply(split(sub, sub$orig.ident), function(x){max(sapply(split(x, x$cat), function(xx){boxplot(xx$value, plot=F)$stats[4, ]}), na.rm = TRUE)}+0.05)
1447 | p = ggplot(sub, aes_string(x = 'orig.ident', y = 'value', fill = 'cat')) + 
1448 |   geom_boxplot(coef = 0, outlier.size = 0, outlier.shape = NA, varwidth = FALSE, show.legend = FALSE) +
1449 |   geom_signif(annotations = labels, xmin = (1:nlevels(sub$orig.ident))-0.2, xmax = (1:nlevels(sub$orig.ident))+0.2, y_position = y.values, tip_length = 0) + 
1450 |   scale_fill_manual(values = colors.cat, breaks = names(colors.cat)) +
1451 |   ggtitle('') +
1452 |   ylim(-0.2,0.4) + 
1453 |   theme_classic() +
1454 |   theme(
1455 |     plot.title = element_text(hjust = 0.5),
1456 |     axis.text.y = element_text(angle = 90),
1457 |     axis.text.x = element_text(angle = 90),
1458 |     axis.title.y = element_text(size = 0),
1459 |     axis.title.x = element_text(size = 0))
1460 | print(p)
1461 | 
1462 | dev.off()
1463 | 
1464 | }
1465 | 


--------------------------------------------------------------------------------
/experiments.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | source('~/Documents/R/seurat_functions.R')
  3 | setwd('~/Documents/Analysis/Experiments/')
  4 | pdf('Experiments.pdf', height = 20, width = 20)
  5 | 
  6 | nrand = 3
  7 | nbin = 25
  8 | range = 8:12
  9 | gmin = 5
 10 | pval_thresh = 0.1
 11 | 
 12 | dir.list = c('Adaptive/RagWT',
 13 |              'Adaptive/RagKO',
 14 |              'PancreasPeritoneum/Pancreas',
 15 |              'PancreasPeritoneum/Peritoneum',
 16 |              'LiverPeritoneum/Peritoneum',
 17 |              'LiverPeritoneum/Liver')
 18 | 
 19 | col_ec = c(brewer.pal(name = 'Purples', n = 3)[3], brewer.pal(name = 'Blues', n = 3)[3],brewer.pal(name = 'RdYlGn', n = 11)[c(1,4,7,10)])
 20 | names(col_ec) = c('Adaptive-RagWT','Adaptive-RagKO',
 21 |                   'PancreasPeritoneum-Pancreas','PancreasPeritoneum-Peritoneum',
 22 |                   'LiverPeritoneum-Peritoneum','LiverPeritoneum-Liver')
 23 | 
 24 | col_experiment = col_ec[c(1,3,5)]
 25 | names(col_experiment) = c('Adaptive','PancreasPeritoneum','LiverPeritoneum')
 26 | 
 27 | col_eci = c(rep(col_ec[1], 4), rep(col_ec[2], 4), rep(col_ec[3], 2), rep(col_ec[4], 2), rep(col_ec[5], 2), rep(col_ec[6], 2))
 28 | names(col_eci) = c("Adaptive-RagWT-F1", "Adaptive-RagWT-F2",
 29 |                    "Adaptive-RagWT-M1", "Adaptive-RagWT-M2",
 30 |                    "Adaptive-RagKO-F1", "Adaptive-RagKO-F2",
 31 |                    "Adaptive-RagKO-M1", "Adaptive-RagKO-M2",
 32 |                    "PancreasPeritoneum-Pancreas-1", "PancreasPeritoneum-Pancreas-2", 
 33 |                    "PancreasPeritoneum-Peritoneum-1", "PancreasPeritoneum-Peritoneum-2",
 34 |                    "LiverPeritoneum-Peritoneum-3", "LiverPeritoneum-Peritoneum-4",
 35 |                    "LiverPeritoneum-Liver-5", "LiverPeritoneum-Liver-6"
 36 | )
 37 | 
 38 | #### Part1 ####
 39 | 
 40 | srt.list = lapply(dir.list, function(dir){
 41 |   srt = loadRData(paste('~/Documents/Experiments', dir, 'srt.RData', sep = '/'))
 42 |   return(srt)
 43 | })
 44 | save(srt.list, file = 'srt.list.RData')
 45 | srt = Reduce(merge2, srt.list)
 46 | srt = srt[,!is.na(srt$condition)]
 47 | srt = srt[,!is.na(srt$experiment)]
 48 | 
 49 | srt$eci = factor(srt$eci, levels = c(
 50 |   "Adaptive-RagWT-F1", "Adaptive-RagWT-F2",
 51 |   "Adaptive-RagWT-M1", "Adaptive-RagWT-M2",
 52 |   "Adaptive-RagKO-F1", "Adaptive-RagKO-F2",
 53 |   "Adaptive-RagKO-M1", "Adaptive-RagKO-M2",
 54 |   "PancreasPeritoneum-Pancreas-1", "PancreasPeritoneum-Pancreas-2",
 55 |   "PancreasPeritoneum-Peritoneum-1", "PancreasPeritoneum-Peritoneum-2",
 56 |   "LiverPeritoneum-Peritoneum-3", "LiverPeritoneum-Peritoneum-4",
 57 |   "LiverPeritoneum-Liver-5", "LiverPeritoneum-Liver-6"))
 58 | srt$ec = factor(srt$ec, levels = c(
 59 |   'Adaptive-RagWT','Adaptive-RagKO',
 60 |   'PancreasPeritoneum-Pancreas','PancreasPeritoneum-Peritoneum',
 61 |   'LiverPeritoneum-Peritoneum','LiverPeritoneum-Liver'))
 62 | srt$experiment = factor(srt$experiment, levels = c(
 63 |   'Adaptive','PancreasPeritoneum','LiverPeritoneum'))
 64 | 
 65 | srt = RunPCA(srt)
 66 | srt = RunUMAP(srt, dims = 1:10)
 67 | 
 68 | srt = FindNeighbors(srt)
 69 | srt = FindClusters(srt)
 70 | srt$cluster = Idents(srt)
 71 | col_cluster = hue_pal()(nlevels(srt$cluster))
 72 | names(col_cluster) = levels(srt$cluster)
 73 | markers = FindAllMarkers2(srt, label = FALSE)
 74 | write.table(markers$genes_10, file = 'markers.csv', sep = ',', col.names = TRUE, row.names = FALSE, quote = FALSE)
 75 | 
 76 | save(srt, file = 'srt.RData')
 77 | 
 78 | #### Part 2 ####
 79 | 
 80 | load('srt.RData')
 81 | col_cluster = hue_pal()(nlevels(srt$cluster))
 82 | names(col_cluster) = levels(srt$cluster)
 83 | 
 84 | # Cell types
 85 | srt$singler = 'Malignant'
 86 | srt$singler[srt$cluster %in% c(7,8)] = 'Macrophage'
 87 | srt$singler[srt$cluster %in% c(12)] = 'DC'
 88 | srt$singler[srt$cluster %in% c(15)] = 'TCell'
 89 | srt$singler[srt$cluster %in% c(17)] = 'Endothelial'
 90 | srt$singler[srt$cluster %in% c(18)] = 'Fibroblast'
 91 | srt$singler[srt$cluster %in% c(19)] = 'BCell'
 92 | srt$singler[srt$cluster %in% c(13)] = 'Neutrophil'
 93 | srt$singler = factor(srt$singler)
 94 | col_singler = c(brewer.pal(8, 'Dark2'), brewer.pal(8, 'Accent'))[1:nlevels(srt$singler)]
 95 | names(col_singler) = levels(srt$singler)
 96 | # Finding cells
 97 | cluster.malignant = unique(as.character(srt$cluster)[srt$singler == 'Malignant'])
 98 | cluster.normal = setdiff(levels(srt$cluster), cluster.malignant)
 99 | cells.malignant = WhichCells(srt, expression = cluster %in% cluster.malignant)
100 | cells.normal = WhichCells(srt, expression = cluster %in% cluster.normal)
101 | cells.normal = setdiff(cells.normal, cells.malignant)
102 | # Filtering
103 | test = c('Ccl5','Ptprc','Apoe','Tyrobp','Lyz2')
104 | data = GetAssayData(srt, assay = 'RNA', slot = 'counts')
105 | for (g in test){
106 |   cells.malignant = cells.malignant[data[g, cells.malignant] < 1]
107 | }
108 | # Assigning
109 | srt$type = 'undetermined'
110 | srt@meta.data[cells.malignant, 'type'] = 'malignant'
111 | srt@meta.data[cells.normal, 'type'] = 'normal'
112 | srt$type = factor(srt$type)
113 | print(table(srt$type))
114 | print(table(srt$singler, srt$cluster, srt$type))
115 | col_type = c(brewer_pal(palette = 'Blues')(5)[4], brewer_pal(palette = 'Greens')(5)[4], 'grey')[1:nlevels(srt$type)]
116 | names(col_type) = levels(srt$type)
117 | # Pop
118 | srt$pop = as.character(srt$type)
119 | srt$pop[srt$type == 'normal'] = as.character(srt$singler[srt$type == 'normal'])
120 | srt$pop = gsub('malignant','Malignant',srt$pop)
121 | srt$pop = factor(srt$pop)
122 | srt$pop = relevel(srt$pop, ref = 'Malignant')
123 | col_pop = col_singler[intersect(levels(srt$pop),names(col_singler))]
124 | col_pop = c(col_pop, 'Malignant' = 'grey20')
125 | markers = FindAllMarkers2(srt, group.by = 'pop', label = FALSE)
126 | write.table(markers$genes_10, file = 'markers_singler.csv', sep = ',', col.names = TRUE, row.names = FALSE, quote = FALSE)
127 | 
128 | # Plotting
129 | o = sample(1:ncol(srt), size = ncol(srt), replace = FALSE)
130 | h = DimPlot(srt, pt.size = 2, group.by = 'type', cols = col_type, order = o)
131 | print(h)
132 | h = DimPlot(srt, pt.size = 2, group.by = 'cluster', cols = col_cluster, label = TRUE, order = o)
133 | print(h)
134 | h = DimPlot(srt, pt.size = 2, group.by = 'singler', cols = col_singler, order = o)
135 | print(h)
136 | h = DimPlot(srt, pt.size = 2, group.by = 'pop', cols = col_pop, order = o)
137 | print(h)
138 | h = DimPlot(srt, pt.size = 2, group.by = 'ec', cols = col_ec, order = o)
139 | print(h)
140 | h = DimPlot(srt, pt.size = 2, group.by = 'experiment', cols = col_experiment, order = o)
141 | print(h)
142 | 
143 | o = sample(1:ncol(srt), size = ncol(srt), replace = FALSE)
144 | h = DimPlot(srt, pt.size = 1, group.by = 'type', cols = col_type, order = o, split.by = 'ec')
145 | print(h)
146 | h = DimPlot(srt, pt.size = 1, group.by = 'cluster', cols = col_cluster, label = TRUE, order = o, split.by = 'ec')
147 | print(h)
148 | h = DimPlot(srt, pt.size = 1, group.by = 'singler', cols = col_singler, order = o, split.by = 'ec')
149 | print(h)
150 | h = DimPlot(srt, pt.size = 1, group.by = 'pop', cols = col_pop, order = o, split.by = 'ec')
151 | print(h)
152 | h = DimPlot(srt, pt.size = 1, group.by = 'ec', cols = col_ec, order = o, split.by = 'ec')
153 | print(h)
154 | h = DimPlot(srt, pt.size = 1, group.by = 'experiment', cols = col_experiment, order = o, split.by = 'ec')
155 | print(h)
156 | 
157 | tme = table(srt$eci, srt$pop)
158 | tme = tme[c(
159 |   "Adaptive-RagWT-F1", "Adaptive-RagWT-F2",
160 |   "Adaptive-RagWT-M1", "Adaptive-RagWT-M2",
161 |   "Adaptive-RagKO-F1", "Adaptive-RagKO-F2",
162 |   "Adaptive-RagKO-M1", "Adaptive-RagKO-M2",
163 |   "PancreasPeritoneum-Pancreas-1", "PancreasPeritoneum-Pancreas-2",
164 |   "PancreasPeritoneum-Peritoneum-1", "PancreasPeritoneum-Peritoneum-2",
165 |   "LiverPeritoneum-Peritoneum-3", "LiverPeritoneum-Peritoneum-4",
166 |   "LiverPeritoneum-Liver-5", "LiverPeritoneum-Liver-6"),]
167 | exclude = c('undetermined')
168 | tme = tme[, setdiff(colnames(tme), exclude)]
169 | tme = tme/rowSums(tme)
170 | 
171 | barplot(tme, beside = TRUE, col = col_eci[rownames(tme)])
172 | barplot(t(tme), beside = FALSE, col = col_singler[colnames(tme)], las = 2)
173 | 
174 | # Splitting
175 | save(srt, file = 'srt.RData')
176 | srt.malignant = srt[, srt$type == 'malignant']
177 | save(srt.malignant, file = 'srt.malignant.RData')
178 | srt.normal = srt[, srt$type == 'normal']
179 | save(srt.normal, file = 'srt.normal.RData')
180 | 
181 | #### NMF ####
182 | # 
183 | # srt.malignant = loadRData('srt.malignant.RData')
184 | # srt.malignant = srt.malignant[, srt.malignant$condition == 'RagWT']
185 | # srt.malignant = SCTransform(srt.malignant, return.only.var.genes = FALSE)
186 | # srt.malignant = RunPCA(srt.malignant)
187 | # srt.malignant = RunUMAP(srt.malignant, dims = 1:10)
188 | # 
189 | # data = as.matrix(GetAssayData(srt.malignant, slot = 'scale.data'))
190 | # data = data[VariableFeatures(srt.malignant),]
191 | # data[data < 0] = 0
192 | # data = data[apply(data, 1, var) > 0, ]
193 | # print(dim(data))
194 | # 
195 | # res.list = mclapply(range, function(r){
196 | #   nmf(data, rank = r, nrun = 1, seed = 'ica', method = 'nsNMF')
197 | # }, mc.cores = ncores)
198 | # names(res.list) = range
199 | # modules.list = lapply(res.list, NMFToModules, gmin = gmin)
200 | # print(sapply(modules.list,length))
201 | # r = names(which(sapply(modules.list, length) == as.numeric(names(modules.list))))
202 | # r = r[length(r)]
203 | # print(r)
204 | # res = res.list[[r]]
205 | # save(res.list, file = 'res.list.RData')
206 | # save(res, file = 'res.RData')
207 | # 
208 | # modules = NMFToModules(res, gmin = gmin)
209 | # print(modules)
210 | # modules = c(modules, list('Stress' = sort(Reduce(union, modules[c('m_Egr1','m_Fos','m_Hspb1')]))))
211 | # modules = modules[c('m_Hmgb2','Stress','m_H2_K1',
212 | #                     'm_Mt1','m_Rbp4')]
213 | # names(modules) = c('Cycle','Stress','Interferon',
214 | #                    'Hypoxia','Glandular')
215 | # ma = names(modules)
216 | # save(modules, file = 'modules.Mm.RData')
217 | 
218 | #### Human mouse ####
219 | 
220 | srt.malignant = loadRData('srt.malignant.RData')
221 | genes.mm = rownames(GetData(srt.malignant))
222 | 
223 | # mart = useMart("ensembl", dataset = 'mmusculus_gene_ensembl')
224 | # attributes = c("external_gene_name", grep('hsapiens', listAttributes(mart)$name, value = TRUE))
225 | # map = getBM(attributes = attributes,
226 | #             filters = "external_gene_name", values = genes.mm, mart = mart, uniqueRows = TRUE)
227 | # map = dplyr:::filter(map, external_gene_name %in% genes.mm & !duplicated(external_gene_name))
228 | # conv = data.frame(map[, grep('gene_name', colnames(map), value = TRUE)[c(1,2)]], stringsAsFactors = FALSE)
229 | # colnames(conv) = c('mm','hs')
230 | # conv = conv[!(conv$hs == '' | duplicated(conv$hs)), ]
231 | load('conv.RData')
232 | 
233 | types = c('GO','KEGG','REACTOME','HALLMARK')
234 | enrichment.matrix.list = lapply(types, function(type){
235 |   BuildEnrichmentMatrix(genes = conv$hs, type = type)
236 | })
237 | names(enrichment.matrix.list) = types
238 | 
239 | hs_hs = loadRData('../Tumors/modules.RData')
240 | hs_hs = hs_hs[1:(length(hs_hs) - 4)]
241 | hs_mm = lapply(hs_hs, function(mod){
242 |   a = conv[match(mod, conv$hs),'mm']
243 |   a = a[!is.na(a)]
244 | })
245 | 
246 | colors.hs = loadRData('../Tumors/colors.module.RData')
247 | colors.hs = colors.hs[names(hs_hs)]
248 | 
249 | mm_mm = loadRData('modules.Mm.RData')
250 | ma = names(mm_mm)
251 | mm_hs = lapply(mm_mm, function(mod){
252 |   a = conv[match(mod, conv$mm),'hs']
253 |   a = a[!is.na(a)]
254 | })
255 | save(mm_hs, file = 'modules.Mm_Hs.RData')
256 | 
257 | colors.mm = colors.hs[names(mm_mm)]
258 | save(colors.mm, file = 'colors.module.Mm.RData')
259 | 
260 | ovlp = sapply(mm_hs, function(m1){
261 |   sapply(hs_hs, function(m2){
262 |     phyper(length(intersect(m1,m2)), length(m1), nrow(conv) - length(m1), length(m2), lower.tail = FALSE)
263 |   })
264 | })
265 | ovlp = -log10(ovlp)
266 | ovlp[is.infinite(ovlp)] = max(ovlp[is.finite(ovlp)])
267 | df = data.frame('top' = apply(ovlp, 2, which.max))
268 | rownames(df) = colnames(ovlp)
269 | 
270 | h = Heatmap(name = 'Module overlap pvalue', ovlp,
271 |             cluster_rows = FALSE, cluster_columns = FALSE,
272 |             show_row_names = TRUE, show_column_names = TRUE,
273 |             row_names_gp = gpar(col = colors.hs), column_names_gp = gpar(col = colors.mm),
274 |             breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Oranges')(n = 9))
275 | print(h)
276 | 
277 | h = Heatmap(name = 'Module overlap pvalue', ovlp[names(mm_hs),],
278 |             cluster_rows = FALSE, cluster_columns = FALSE,
279 |             show_row_names = TRUE, show_column_names = TRUE,
280 |             row_names_gp = gpar(col = colors.hs), column_names_gp = gpar(col = colors.mm),
281 |             breaks = seq(0, 8, length = 9), colors = brewer_pal(palette = 'Oranges')(n = 9))
282 | print(h)
283 | 
284 | # Venn diagrams
285 | for (n in names(mm_hs)){
286 |   if (n %in% names(hs_hs)){
287 |     venn.diagram(x = list('mm' = mm_hs[[n]], 'hs' = hs_hs[[n]]), filename = n)
288 |   }
289 | }
290 | 
291 | ##### Scoring cells #####
292 | 
293 | load('srt.malignant.RData')
294 | modules = loadRData('modules.Mm.RData')
295 | modules = c(modules, list('InterferonRed' = setdiff(modules$Interferon, c('B2m','H2-D1','H2-K1'))))
296 | ma = names(modules)
297 | colors.module = loadRData('colors.module.Mm.RData' )
298 | colors.module['InterferonRed'] = colors.module['Interferon']
299 | colors.module = colors.module[ma]
300 | 
301 | # Score cells by experiment
302 | srt.malignant.list = SplitObject(srt.malignant, 'experiment')
303 | srt.malignant.list = mclapply(srt.malignant.list, function(srt){
304 |   srt = SCTransform(srt, return.only.var.genes = FALSE)
305 |   srt = GeneToEnrichment(srt, db = modules, method = 'rand', nrand = nrand)
306 | }, mc.cores = ncores)
307 | for (srt in srt.malignant.list){
308 |   srt = SCTransform(srt, return.only.var.genes = FALSE)
309 |   srt = RunPCA(srt)
310 |   srt = RunUMAP(srt, dims = 1:10)
311 |   o = sample(colnames(srt), size = ncol(srt), replace = FALSE)
312 |   h = DimPlot(srt, group.by = 'ec', cols = col_ec, order = o, pt.size = 2)
313 |   print(h)
314 |   h = DimPlot(srt, group.by = 'experiment', cols = col_experiment, order = o, pt.size = 2)
315 |   print(h)
316 |   plot.list = lapply(ma, function(n){
317 |     FeaturePlot(srt, n, pt.size = 1, cols = c('grey',colors.module[n]))
318 |   })
319 |   print(CombinePlots(plot.list, ncol = 3))
320 |   save(srt, file = paste0(unique(srt$experiment),'.malignant.RData'))
321 | }
322 | save(srt.malignant.list, file = 'srt.malignant.list.RData')
323 | srt.malignant = Reduce(merge2, srt.malignant.list)
324 | 
325 | # Test modules by condition
326 | srt.malignant.list = SplitObject(srt.malignant, 'ec')
327 | vals = mclapply(srt.malignant.list, function(srt){
328 |   v = NULL
329 |   modules_rand = MakeRand(srt, db = modules, nrand = nrand, nbin = nbin)
330 |   v = sapply(ma, function(m){
331 |     ra = sapply(modules_rand[[m]], function(mod){
332 |       res = c(NA,NA)
333 |       tryCatch(expr = {
334 |         srt_temp = RunPCA(srt, features = mod, verbose = FALSE, npcs = 2)
335 |         res = srt_temp@reductions$pca@stdev
336 |       }, error = function(e){})
337 |       return(res)
338 |     })
339 |     re = c(NA,NA)
340 |     tryCatch(expr = {
341 |       srt_temp = RunPCA(srt, features = modules[[m]], verbose = FALSE, npcs = 2)
342 |       re = srt_temp@reductions$pca@stdev
343 |     }, error = function(e){})
344 |     dispersion = -log10(mean(ra[1,] >= re[1]))
345 |     coordination = -log10(mean(ra[1,]/ra[2,] >= re[1]/re[2]))
346 |     return(c('dispersion' = dispersion, 'coordination' = coordination))
347 |   })
348 | }, mc.cores = ncores)
349 | dispersion = sapply(vals, '[', 'dispersion', ma)
350 | dispersion[is.infinite(dispersion)] = nrand
351 | top_ann = HeatmapAnnotation(df = data.frame('ec' = names(srt.malignant.list)), 
352 |                             col = list('ec' = col_ec), 
353 |                             which = 'column')
354 | h = Heatmap(dispersion, name = 'Dispersion',
355 |             top_annotation = top_ann,
356 |             cluster_rows = FALSE, cluster_columns = FALSE,
357 |             show_row_names = TRUE, show_column_names = TRUE,
358 |             row_names_gp = gpar(col = colors.module[rownames(dispersion)]), column_names_gp = gpar(col = col_ec[colnames(dispersion)]),
359 |             breaks = seq(0, nrand, length = 11), col = viridis_pal(begin = 0, end = 1, option = 'magma')(11))
360 | print(h)
361 | decorate_heatmap_body('Dispersion', {
362 |   #l = c(0,cumsum(rle(as.character(cancer))$lengths))
363 |   l = 0:ncol(dispersion)
364 |   for (k in 1:length(l)){
365 |     i = unit(l[k]/max(l), 'npc')
366 |     grid.lines(i, c(0,1), gp = gpar(lwd = 1, col = 'black'))
367 |   }
368 |   l = 0:nrow(dispersion)
369 |   for (k in 1:length(l)){
370 |     i = unit(l[k]/max(l), 'npc')
371 |     grid.lines(c(0,1), i, gp = gpar(lwd = 1, col = 'black'))
372 |   }
373 | })
374 | cors = mclapply(srt.malignant.list, function(srt){
375 |   data = GetData(srt, slot = 'data')
376 |   v = sapply(ma, function(m){
377 |     re = cor(t(as.matrix(data[modules[[m]],])))
378 |     re[is.na(re)] = 0
379 |     diag(re) = NA
380 |     return(re)
381 |   })
382 | }, mc.cores = ncores)
383 | cors = lapply(ma, function(m){
384 |   a = sapply(cors, '[', m)
385 |   names(a) = names(srt.malignant.list)
386 |   return(a)
387 | })
388 | names(cors) = ma
389 | for (m in ma){
390 |   h.list = NULL
391 |   for (s in names(srt.malignant.list)){
392 |     h.list = h.list + Heatmap(cors[[m]][[s]], name = m,
393 |                                 cluster_rows = FALSE, cluster_columns = FALSE, show_row_names = TRUE,
394 |                               breaks = seq(-1, 1, length = 11), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 11))
395 |   }
396 |   print(h.list)
397 | }
398 | par(mfrow = c(4,4))
399 | for (m in ma){
400 |   a = cors[[m]]
401 |   a = as.vector(a)
402 |   boxplot(a, main = m, col = col_ec[names(cors[[m]])], pch = 20, cex = 0.1, las = 2, ylim = c(-0.5,1), names = rep('', length(names(cors[[m]]))))
403 |   #abline(h = 0)
404 | }
405 | par(mfrow = c(1,1))
406 | for (srt in srt.malignant.list){
407 |   srt = SCTransform(srt, return.only.var.genes = FALSE)
408 |   srt = RunPCA(srt)
409 |   srt = RunUMAP(srt, dims = 1:10)
410 |   o = sample(colnames(srt), size = ncol(srt), replace = FALSE)
411 |   h = DimPlot(srt, group.by = 'ec', cols = col_ec, order = o, pt.size = 2)
412 |   print(h)
413 |   h = DimPlot(srt, group.by = 'experiment', cols = col_experiment, order = o, pt.size = 2)
414 |   print(h)
415 |   plot.list = lapply(ma, function(n){
416 |     FeaturePlot(srt, n, pt.size = 1, cols = c('grey',colors.module[n]))
417 |   })
418 |   print(CombinePlots(plot.list, ncol = 3))
419 | }
420 | srt.malignant = Reduce(merge2, srt.malignant.list)
421 | 
422 | srt.malignant$eci = factor(srt.malignant$eci, levels = c(
423 |   "Adaptive-RagWT-F1", "Adaptive-RagWT-F2",
424 |   "Adaptive-RagWT-M1", "Adaptive-RagWT-M2",
425 |   "Adaptive-RagKO-F1", "Adaptive-RagKO-F2",
426 |   "Adaptive-RagKO-M1", "Adaptive-RagKO-M2",
427 |   "PancreasPeritoneum-Pancreas-1", "PancreasPeritoneum-Pancreas-2", 
428 |   "PancreasPeritoneum-Peritoneum-1", "PancreasPeritoneum-Peritoneum-2",
429 |   "LiverPeritoneum-Peritoneum-3", "LiverPeritoneum-Peritoneum-4",
430 |   "LiverPeritoneum-Liver-5", "LiverPeritoneum-Liver-6"
431 | ))
432 | srt.malignant$ec = factor(srt.malignant$ec, levels = c(
433 |   'Adaptive-RagWT','Adaptive-RagKO',
434 |   'PancreasPeritoneum-Pancreas','PancreasPeritoneum-Peritoneum',
435 |   'LiverPeritoneum-Peritoneum','LiverPeritoneum-Liver'))
436 | srt.malignant$experiment = factor(srt.malignant$experiment, levels = c(
437 |   'Adaptive','PancreasPeritoneum','LiverPeritoneum'))
438 | 
439 | srt.malignant = RunPCA(srt.malignant)
440 | srt.malignant = RunUMAP(srt.malignant, dims = 1:10)
441 | 
442 | for (m in ma){
443 |   n = paste(m, 'cat', sep = '_')
444 |   srt.malignant@meta.data[,n] = factor((srt.malignant@meta.data[,m] > 0.5)*as.numeric(srt.malignant$ec), labels = c(0, levels(srt.malignant$ec)))
445 | }
446 | col_cat_ec = c('lightgrey',col_ec)
447 | names(col_cat_ec) = c('0',names(col_ec))
448 | 
449 | h = VlnPlot(srt.malignant, features = ma, group.by = 'eci', cols = col_eci, pt.size = 0, ncol = 1)
450 | print(h)
451 | 
452 | o = sample(colnames(srt.malignant), size = ncol(srt.malignant), replace = FALSE)
453 | h = DimPlot(srt.malignant, group.by = 'ec', cols = col_ec, order = o, pt.size = 2)
454 | print(h)
455 | h = DimPlot(srt.malignant, group.by = 'experiment', cols = col_experiment, order = o, pt.size = 2)
456 | print(h)
457 | print(h)
458 | plot.list = lapply(ma, function(n){
459 |   FeaturePlot(srt.malignant, n, pt.size = 1, cols = c('grey',colors.module[n]))
460 | })
461 | print(CombinePlots(plot.list, ncol = 3))
462 | plot.list = lapply(paste(ma, 'cat', sep = '_'), function(n){
463 |   h = DimPlot(srt.malignant, group.by = n, cols = col_cat_ec, order = o, pt.size = 0.1)
464 | })
465 | print(CombinePlots(plot.list, ncol = 2))
466 | 
467 | o = sample(1:ncol(srt.malignant), size = ncol(srt.malignant), replace = FALSE)
468 | h = DimPlot(srt.malignant, group.by = 'ec', cols = col_ec, order = o, pt.size = 1, split.by = 'ec')
469 | print(h)
470 | h = DimPlot(srt.malignant, group.by = 'experiment', cols = col_experiment, order = o, pt.size = 1, split.by = 'ec')
471 | print(h)
472 | print(h)
473 | plot.list = lapply(ma, function(n){
474 |   FeaturePlot(srt.malignant, n, split.by = 'ec', pt.size = 1, cols = c('grey',colors.module[n]))
475 | })
476 | print(CombinePlots(plot.list, ncol = 1))
477 | plot.list = lapply(paste(ma, 'cat', sep = '_'), function(n){
478 |   h = DimPlot(srt.malignant, group.by = n, cols = col_cat_ec, order = (srt.malignant@meta.data[,n] == 0), pt.size = 0.1, split.by = 'ec')
479 | })
480 | print(CombinePlots(plot.list, ncol = 1))
481 | 
482 | meta = srt.malignant@meta.data
483 | save(meta, file = 'meta.malignant.RData')
484 | save(srt.malignant, file = 'srt.malignant.RData')
485 | 
486 | #### Plotting ####
487 | 
488 | mat = Reduce(cbind, lapply(ma, function(m){
489 |   freq = (meta[,m] > 0.5)
490 |   mat = data.frame('value' = as.matrix(by(freq, meta$eci, mean)))
491 |   return(mat)
492 | }))
493 | colnames(mat) = ma
494 | print(mat)*100
495 | 
496 | # Barplot
497 | 
498 | plot.list = lapply(ma, function(m){
499 |   freq = (meta[,m] > 0.5)
500 |   mat = data.frame('value' = as.matrix(by(freq, meta$eci, mean)))
501 |   mat$eci = rownames(mat)
502 |   mat$experiment = factor(sapply(strsplit(mat$eci, '-'), '[', 1))
503 |   mat$condition = factor(sapply(strsplit(mat$eci, '-'), '[', 2))
504 |   mat$individual = factor(sapply(strsplit(mat$eci, '-'), '[', 3))
505 |   mat$ec = factor(paste(mat$experiment, mat$condition, sep = '-'), 
506 |                   levels = c('Adaptive-RagWT','Adaptive-RagKO',
507 |                              'PancreasPeritoneum-Pancreas','PancreasPeritoneum-Peritoneum',
508 |                              'LiverPeritoneum-Peritoneum','LiverPeritoneum-Liver'))
509 |   p = ggplot(mat, aes(x = ec, y = value, fill = ec, color = ec)) +
510 |     stat_summary(fun.y = mean, fun.ymin = mean, fun.ymax = mean,
511 |                  geom = 'bar', width = 0.5) +
512 |     geom_point(color = 'black') +
513 |     scale_color_manual(values = col_ec) +
514 |     scale_fill_manual(values = col_ec) +
515 |     geom_signif(comparisons = list(levels(mat$ec)[1:2]), 
516 |                 test = 'wilcox.test', map_signif_level = TRUE, margin_top = -0.1) +
517 |     geom_signif(comparisons = list(levels(mat$ec)[3:4],levels(mat$ec)[5:6]), 
518 |                 test = 't.test', map_signif_level = TRUE, margin_top = -0.1) +
519 |     ylim(0, max(mat$value)+0.1) +
520 |     ggtitle(m) +
521 |     theme_classic() + 
522 |     theme(
523 |       legend.position = 'none',
524 |       plot.title = element_text(hjust = 0.5),
525 |       axis.text.x = element_text(size = 0), 
526 |       axis.title.x = element_text(size = 0), 
527 |       axis.title.y = element_text(size = 0))
528 |   p
529 | })
530 | print(CombinePlots(plot.list, ncol = 3))
531 | 
532 | # Diagonal plots
533 | 
534 | mat = data.frame(Reduce(rbind, lapply(ma, function(m){
535 |   freq = (meta[,m] > 0.5)
536 |   sam = data.frame('value' = as.matrix(by(freq, meta$eci, mean)))
537 |   sam$eci = rownames(sam)
538 |   sam$experiment = factor(sapply(strsplit(sam$eci, '-'), '[', 1))
539 |   sam$condition = factor(sapply(strsplit(sam$eci, '-'), '[', 2))
540 |   sam$individual = factor(sapply(strsplit(sam$eci, '-'), '[', 3))
541 |   sam$ec = factor(paste(sam$experiment, sam$condition, sep = '-'), 
542 |                   levels = c('Adaptive-RagWT','Adaptive-RagKO',
543 |                              'PancreasPeritoneum-Pancreas','PancreasPeritoneum-Peritoneum',
544 |                              'LiverPeritoneum-Peritoneum','LiverPeritoneum-Liver'))
545 |   stats_mean = as.matrix(by(sam$value, sam$ec, mean))
546 |   rownames(stats_mean) = paste('mean', rownames(stats_mean), sep = '_')
547 |   stats_sd = as.matrix(by(sam$value, sam$ec, sd))
548 |   rownames(stats_sd) = paste('sd', rownames(stats_sd), sep = '_')
549 |   stats_se = stats_sd/ sqrt(length(unique(sam$eci))/length(unique(sam$ec)))
550 |   rownames(stats_se) = gsub('sd','se',rownames(stats_se))
551 |   stats = t(rbind(stats_mean, stats_sd, stats_se))
552 |   rownames(stats) = m
553 |   return(stats)
554 | })))
555 | mat$module = rownames(mat)
556 | plot.list = list()
557 | plot.list[[1]] = ggplot(data = mat, aes(x = mean_Adaptive.RagWT, y = mean_Adaptive.RagKO, color = module)) + 
558 |   geom_point() +
559 |   scale_fill_manual(values = colors.module) +
560 |   scale_color_manual(values = colors.module) +
561 |   geom_errorbarh(aes(xmax = mean_Adaptive.RagWT + se_Adaptive.RagWT, xmin = mean_Adaptive.RagWT - se_Adaptive.RagWT), height = 0.002) +
562 |   geom_errorbar(aes(ymax = mean_Adaptive.RagKO + se_Adaptive.RagKO, ymin = mean_Adaptive.RagKO - se_Adaptive.RagKO), width = 0.002) + 
563 |   geom_text(aes(label = module), nudge_y = 0.01) +
564 |   geom_abline(slope = 1, intercept = 0, color = 'grey') + 
565 |   ggtitle('') +
566 |   xlim(0,0.5) +
567 |   ylim(0,0.5) +
568 |   theme_classic() + 
569 |   theme(
570 |     plot.title = element_text(hjust = 0.5),
571 |     axis.text.x = element_text(angle = 0, hjust = 1), 
572 |     axis.text.y = element_text(angle = 0, hjust = 1))
573 | plot.list[[2]] = ggplot(data = mat, aes(x = mean_PancreasPeritoneum.Pancreas, y = mean_PancreasPeritoneum.Peritoneum, color = module)) + 
574 |   geom_point() +
575 |   scale_fill_manual(values = colors.module) +
576 |   scale_color_manual(values = colors.module) +
577 |   geom_errorbarh(aes(xmax = mean_PancreasPeritoneum.Pancreas + se_PancreasPeritoneum.Pancreas, xmin = mean_PancreasPeritoneum.Pancreas - se_PancreasPeritoneum.Pancreas), height = 0.002) +
578 |   geom_errorbar(aes(ymax = mean_PancreasPeritoneum.Peritoneum + se_PancreasPeritoneum.Peritoneum, ymin = mean_PancreasPeritoneum.Peritoneum - se_PancreasPeritoneum.Peritoneum), width = 0.002) + 
579 |   geom_text(aes(label = module), nudge_y = 0.01) +
580 |   geom_abline(slope = 1, intercept = 0, color = 'grey') + 
581 |   ggtitle('') +
582 |   xlim(0,0.5) +
583 |   ylim(0,0.5) +
584 |   theme_classic() + 
585 |   theme(
586 |     plot.title = element_text(hjust = 0.5),
587 |     axis.text.x = element_text(angle = 0, hjust = 1), 
588 |     axis.text.y = element_text(angle = 0, hjust = 1))
589 | plot.list[[3]] = ggplot(data = mat, aes(x = mean_LiverPeritoneum.Peritoneum, y = mean_LiverPeritoneum.Liver, color = module)) + 
590 |   geom_point() +
591 |   scale_fill_manual(values = colors.module) +
592 |   scale_color_manual(values = colors.module) +
593 |   geom_errorbarh(aes(xmax = mean_LiverPeritoneum.Peritoneum + se_LiverPeritoneum.Peritoneum, xmin = mean_LiverPeritoneum.Peritoneum - se_LiverPeritoneum.Peritoneum), height = 0.002) +
594 |   geom_errorbar(aes(ymax = mean_LiverPeritoneum.Liver + se_LiverPeritoneum.Liver, ymin = mean_LiverPeritoneum.Liver - se_LiverPeritoneum.Liver), width = 0.002) + 
595 |   geom_text(aes(label = module), nudge_y = 0.01) +
596 |   geom_abline(slope = 1, intercept = 0, color = 'grey') + 
597 |   ggtitle('') +
598 |   xlim(0,0.5) +
599 |   ylim(0,0.5) +
600 |   theme_classic() + 
601 |   theme(
602 |     plot.title = element_text(hjust = 0.5),
603 |     axis.text.x = element_text(angle = 0, hjust = 1), 
604 |     axis.text.y = element_text(angle = 0, hjust = 1))
605 | print(CombinePlots(plot.list, ncol = 2))
606 | 
607 | # # TME
608 | # 
609 | # state = data.frame(Reduce(cbind, lapply(ma, function(m){
610 | #   freq = (meta[,m] > 0.5)
611 | #   sam = data.frame('value' = as.matrix(by(freq, meta$eci, mean)))
612 | #   sam$eci = rownames(sam)
613 | #   sam$experiment = factor(sapply(strsplit(sam$eci, '-'), '[', 1))
614 | #   sam$condition = factor(sapply(strsplit(sam$eci, '-'), '[', 2))
615 | #   sam$individual = factor(sapply(strsplit(sam$eci, '-'), '[', 3))
616 | #   sam$ec = factor(paste(sam$experiment, sam$condition, sep = '-'), 
617 | #                   levels = c('Adaptive-RagWT','Adaptive-RagKO',
618 | #                              'PancreasPeritoneum-Pancreas','PancreasPeritoneum-Peritoneum',
619 | #                              'LiverPeritoneum-Peritoneum','LiverPeritoneum-Liver'))
620 | #   stats_mean = as.matrix(by(sam$value, sam$eci, mean))
621 | #   colnames(stats_mean) = m
622 | #   return(stats_mean)
623 | # })))
624 | # 
625 | # # Multiple regression
626 | # exclude = c('Malignant')
627 | # tme = tme[, setdiff(colnames(tme), exclude)]
628 | # 
629 | # test = sapply(ma, function(m){
630 | #   fit = summary(lm(state[,m] ~ tme))$coefficients
631 | #   pval = -log10(fit[,4])
632 | #   pval[pval < pval_thresh] = 0
633 | #   names(pval) = gsub('tme','',names(pval))
634 | #   est = fit[,1]
635 | #   names(est) = gsub('tme','',names(est))
636 | #   val = pval*sign(est)
637 | #   return(val)
638 | # })
639 | # h = Heatmap(test, 
640 | #             show_row_names = TRUE, show_column_names = TRUE,
641 | #             cluster_rows = FALSE, cluster_columns = FALSE,
642 | #             breaks = seq(-3, 3, length = 11), colors = brewer_pal(palette = 'RdBu', direction = -1)(n = 11))
643 | # print(h)
644 | # 
645 | # # Plot hits
646 | # par(mfrow = c(4,4))
647 | # for (ct in colnames(tme)){
648 | #   for (m in ma){
649 | #     if (!test[ct,m] == 0){
650 | #       fit = summary(lm(state[,m] ~ tme[,ct]))$coefficients
651 | #       pval = -log10(fit[2,4])
652 | #       plot(state[,m] ~ tme[,ct], col = col_eci[rownames(state)], pch = 20, xlab = ct, ylab = m, main = paste(round(pval, digit = 2), round(test[ct,m], digit = 2)))
653 | #       abline(a = fit[1,1], b = fit[2,1], lty = 2)
654 | #     }
655 | #   }
656 | # }
657 | # par(mfrow = c(1,1))
658 | # 
659 | # # Plot hits, log
660 | # par(mfrow = c(4,4))
661 | # for (ct in colnames(tme)){
662 | #   for (m in ma){
663 | #     if (!test[ct,m] == 0){
664 | #       fit = summary(lm(state[,m] ~ tme[,ct]))$coefficients
665 | #       pval = -log10(fit[2,4])
666 | #       plot(10^-4+tme[,ct], 10^-4+state[,m], col = col_eci[rownames(state)], pch = 20, xlab = ct, ylab = m, main = paste(round(pval, digit = 2), round(test[ct,m], digit = 2)), log = 'xy')
667 | #     }
668 | #   }
669 | # }
670 | # par(mfrow = c(1,1))
671 | 
672 | # Individual genes
673 | 
674 | srt = loadRData('Adaptive.malignant.RData')
675 | srt$combined = paste(srt$condition, srt$Interferon > 0.5, sep = '_')
676 | Idents(srt) = 'combined'
677 | avg = AverageExpression(srt, slot = 'data')$SCT
678 | 
679 | MHCI = c('B2m','H2-D1','H2-K1')
680 | 
681 | par(mfrow = c(5,5))
682 | 
683 | plot(avg[modules$Interferon, 'RagWT_TRUE'], avg[modules$Interferon, 'RagKO_TRUE'], 
684 |      pch = 20, xlab = 'RagWT', ylab = 'RagKO', main = 'Interferon', xlim = c(0,18), ylim = c(0,18))
685 | text(avg[MHCI, 'RagWT_TRUE'], avg[MHCI, 'RagKO_TRUE'], MHCI, cex = 0.7, pos = 3, offset = 0.2)
686 | abline(a = 0, b = 1, col = 'grey')
687 | 
688 | plot(avg[modules$Interferon, 'RagWT_FALSE'], avg[modules$Interferon, 'RagKO_FALSE'], 
689 |      pch = 20, xlab = 'RagWT', ylab = 'RagKO', main = 'Other', xlim = c(0,18), ylim = c(0,18))
690 | text(avg[MHCI, 'RagWT_FALSE'], avg[MHCI, 'RagKO_FALSE'], MHCI, cex = 0.7, pos = 3, offset = 0.2)
691 | abline(a = 0, b = 1, col = 'grey')
692 | 
693 | plot(avg[modules$Interferon, 'RagWT_FALSE'], avg[modules$Interferon, 'RagWT_TRUE'], 
694 |      pch = 20, xlab = 'Other', ylab = 'Interferon', main = 'RagWT', xlim = c(0,18), ylim = c(0,18))
695 | text(avg[MHCI, 'RagWT_FALSE'], avg[MHCI, 'RagWT_TRUE'], MHCI, cex = 0.7, pos = 3, offset = 0.2)
696 | abline(a = 0, b = 1, col = 'grey')
697 | 
698 | plot(avg[modules$Interferon, 'RagKO_FALSE'], avg[modules$Interferon, 'RagKO_TRUE'], 
699 |      pch = 20, xlab = 'Other', ylab = 'Interferon', main = 'RagKO', xlim = c(0,18), ylim = c(0,18))
700 | text(avg[MHCI, 'RagKO_FALSE'], avg[MHCI, 'RagKO_TRUE'], MHCI, cex = 0.7, pos = 3, offset = 0.2)
701 | abline(a = 0, b = 1, col = 'grey')
702 | 
703 | par(mfrow = c(5,1))
704 | 
705 | o = order(avg[modules$Interferon,'RagWT_TRUE'], decreasing = TRUE)
706 | barplot(t(avg[modules$Interferon[o],]), beside = TRUE, col = col_ec[c(1,1,2,2)], las = 2)
707 | barplot(t(avg[modules$Interferon[o],]), beside = TRUE, col = colors.module['Interferon'], density = c(0,20,0,20), las = 2, add = TRUE)
708 | 
709 | par(mfrow = c(1,1))
710 | 
711 | dev.off()
712 | 
713 | pvals = sapply(ma, function(m){
714 |   sapply(levels(meta$experiment), function(e){
715 |     sub = meta[meta$experiment == e,]
716 |     sub$ec = factor(sub$ec)
717 |     i = levels(sub$ec)[1]
718 |     j = levels(sub$ec)[2]
719 |     ks.test(sub[sub$ec == i,m], sub[sub$ec == j,m])$p.value
720 |   })
721 | })
722 | pvals = -log10(pvals)
723 | pvals[is.infinite(pvals)] = max(pvals[is.finite(pvals)])
724 | barplot(pvals, beside = TRUE, las= 2, col = col_experiment)
725 | 
726 | pvals = sapply(ma, function(m){
727 |   sapply(levels(meta$experiment), function(e){
728 |     sub = meta[meta$experiment == e,]
729 |     sub$ec = factor(sub$ec)
730 |     i = levels(sub$ec)[1]
731 |     j = levels(sub$ec)[2]
732 |     li = unique(sub$eci[sub$ec == i])
733 |     lj = unique(sub$eci[sub$ec == j])
734 |     max(sapply(li, function(i){
735 |       sapply(lj, function(j){
736 |           ks.test(sub[sub$eci == li,m], sub[sub$eci == lj,m])$p.value
737 |       })
738 |     }))
739 |   })
740 | })
741 | pvals = -log10(pvals)
742 | pvals[is.infinite(pvals)] = max(pvals[is.finite(pvals)])
743 | barplot(pvals, beside = TRUE, las= 2, col = col_experiment)
744 | 
745 | 
746 | 
747 | 


--------------------------------------------------------------------------------
/part1.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | source('~/Documents/R/seurat_functions.R')
  3 | 
  4 | #### Arguments ####
  5 | args = commandArgs(trailingOnly = TRUE)
  6 | if (length(args) == 3){
  7 |   cancer = args[1]
  8 |   species = args[2]
  9 |   author = ''
 10 |   sample = args[3]
 11 | } else if (length(args) == 4){
 12 |   cancer = args[1]
 13 |   species = args[2]
 14 |   author = args[3]
 15 |   sample = args[4]
 16 | }
 17 | numi.min = -Inf
 18 | ngene.min = -Inf
 19 | mt.max = Inf
 20 | range = 2:25
 21 | gmin = 5
 22 | 
 23 | # Combining
 24 | orig.ident = paste0(cancer, species, author, sample)
 25 | print(orig.ident)
 26 | 
 27 | #### Load ####
 28 | st = Load10X_Spatial(getwd())
 29 | st$cancer = cancer
 30 | st$author = author
 31 | st$species = species
 32 | st$technique = 'Visium'
 33 | st$sample = as.character(sample)
 34 | st$orig.ident = orig.ident
 35 | Idents(st) = 'orig.ident'
 36 | 
 37 | #### Filter ####
 38 | pdf('filter.pdf')
 39 | #st = FilterCells(st, do.plot = TRUE, assay = 'Spatial', 
 40 | #                 numi.min = numi.min, ngene.min = ngene.min, mt.max = mt.max)
 41 | st = FilterGenes(st)
 42 | st = SCTransform(st, assay = 'Spatial', return.only.var.genes = FALSE)
 43 | st = FindSpatiallyVariableFeatures(st, selection.method = 'markvariogram')
 44 | st = RunPCA(st, features = SpatiallyVariableFeatures(st))
 45 | st = RunUMAP(st, dims = 1:10)
 46 | dev.off()
 47 | 
 48 | #### CNV ####
 49 | if (file.exists('Clones/labels_Clones.csv')){
 50 |   cnv = read.csv('Clones/labels_Clones.csv')
 51 |   rownames(cnv) = colnames(st)
 52 |   st = AddMetaData(st, cnv[,2], col.name = 'clone')
 53 | } else {
 54 |   st$clone = 0
 55 | }
 56 | st$clone = as.factor(st$clone)
 57 | col_clone = brewer_pal(palette = 'Reds')(1+nlevels(st$clone))[-1]
 58 | names(col_clone) = levels(st$clone)
 59 | 
 60 | #### Cluster ####
 61 | st = FindNeighbors(st)
 62 | st = FindClusters(st)
 63 | st$cluster = Idents(st)
 64 | col_cluster = hue_pal()(nlevels(st$cluster))
 65 | names(col_cluster) = levels(st$cluster)
 66 | 
 67 | #### NMF ####
 68 | # Run NMF
 69 | data = as.matrix(GetAssayData(st, assay = 'SCT', slot = 'scale.data'))
 70 | data = data[SpatiallyVariableFeatures(st),]
 71 | data[data < 0] = 0
 72 | data = data[apply(data, 1, var) > 0, ]
 73 | print(dim(data))
 74 | res.list = mclapply(range, function(r){
 75 |   nmf(data, rank = r, nrun = 1, seed = 'ica', method = 'nsNMF')
 76 | }, mc.cores = ncores)
 77 | names(res.list) = range
 78 | save(res.list, file = 'res.list.RData')
 79 | # Select rank
 80 | modules.list = lapply(res.list, NMFToModules, gmin = gmin)
 81 | print(sapply(modules.list,length))
 82 | comp = as.numeric(names(modules.list)) - sapply(modules.list, length)
 83 | mi = min(comp)
 84 | r = names(which(comp == mi))
 85 | r = r[length(r)]
 86 | print(r)
 87 | res = res.list[[r]]
 88 | save(res, file = 'res.RData')
 89 | # Process output
 90 | tryCatch(expr = {
 91 |   modules = NMFToModules(res, gmin = gmin)
 92 |   scores = basis(res)
 93 |   colnames(scores) = names(modules)
 94 |   coefs = coefficients(res)
 95 |   rownames(coefs) = names(modules)
 96 |   # Order modules
 97 |   h = Heatmap(coefs, clustering_distance_columns = 'euclidean')
 98 |   o = row_order(h)
 99 |   scores = scores[, o]
100 |   coefs = coefs[o, ]
101 |   modules = modules[o]
102 |   print(modules)
103 |   st = AddMetaData(st, t(coefs), col.name = rownames(coefs))
104 |   # Cluster NMF output
105 |   h = Heatmap(coefs, clustering_distance_columns = 'euclidean')
106 |   hcl = as.hclust(column_dend(h))
107 |   sig = cutree(hcl, k = length(modules))
108 |   nmf = c(by(t(coefs), INDICES = sig, FUN = function(x){names(modules)[which.max(colMeans(x))]}))[sig]
109 |   st$nmf = factor(nmf, levels = names(modules))
110 |   col_nmf = c(brewer.pal(12, 'Set3'), brewer.pal(8, 'Set1'))[1:nlevels(st$nmf)]
111 |   names(col_nmf) = levels(st$nmf)
112 | }, error = function(e){c()})
113 | save(st, file = 'st.RData')
114 | 
115 | #### Saving ####
116 | 
117 | #### Plotting ####
118 | pdf('dimplots.pdf', height = 15, width = 15)
119 | for (reduction in c('pca','umap')){
120 |   h = DimPlot(st, pt.size = 2, reduction = reduction, group.by = 'cluster', cols = col_cluster, label = TRUE)
121 |   print(h)
122 |   h = DimPlot(st, pt.size = 2, reduction = reduction, group.by = 'clone', cols = col_clone, label = FALSE)
123 |   print(h)
124 |   h = DimPlot(st, pt.size = 2, reduction = reduction, group.by = 'nmf', cols = col_nmf)
125 |   print(h)
126 |   h = FeaturePlot(st, reduction = reduction, features = names(modules))
127 |   print(h)
128 | }
129 | h = SpatialDimPlot(st, pt.size = 2, group.by = 'cluster', cols = col_cluster)
130 | print(h)
131 | h = SpatialDimPlot(st, pt.size = 2, group.by = 'cluster', cols = col_cluster)
132 | print(h)
133 | h = SpatialDimPlot(st, pt.size = 2, group.by = 'nmf', cols = col_nmf)
134 | print(h)
135 | h = SpatialFeaturePlot(st, features = names(modules))
136 | print(h)
137 | dev.off()
138 | pdf('heatmaps.pdf', height = 20, width = 10)
139 | sink('heatmaps.txt')
140 | # Clusters
141 | markers = FindAllMarkers2(st, do.print = TRUE, do.plot = TRUE, enrichment.type = 'GO', group.by = 'cluster', cols = col_cluster, print.bar = FALSE)
142 | # NMF
143 | markers = FindAllMarkers2(st, do.print = TRUE, do.plot = TRUE, enrichment.type = 'GO', group.by = 'nmf', cols = col_nmf, print.bar = FALSE)
144 | # Modules
145 | top_ann = HeatmapAnnotation(df = data.frame('cluster' = st$cluster,
146 |                                             'nmf' = st$nmf), 
147 |                             col = list('cluster' = col_cluster,
148 |                                        'nmf' = col_nmf), which = 'column')
149 | h = Heatmap(coefs, name = 'coefs', top_ann = top_ann,
150 |             show_row_names = TRUE, 
151 |             cluster_rows = FALSE,
152 |             cluster_columns = FALSE, column_order = order(st$nmf, st$cluster),
153 |             breaks = c(0,max(coefs)/2), colors = c('white','red'))
154 | print(h)
155 | h = Heatmap(scores[unlist(modules),], name = 'scores', 
156 |             show_row_names = TRUE, row_names_gp = gpar(cex = 0.5),
157 |             cluster_rows = FALSE,
158 |             cluster_columns = FALSE,
159 |             split = factor(unlist(mapply(rep, names(modules), sapply(modules, length))), levels = names(modules)))
160 | print(h)
161 | print(modules)
162 | enrichment.matrix = BuildEnrichmentMatrix(rownames(st), type = 'GO')
163 | go_10 = sapply(modules, function(tbl){
164 |   e = Enrichment(geneset = tbl, enrichment.matrix = enrichment.matrix)
165 |   names(sort(e))[1:10]
166 | })
167 | colnames(go_10) = names(modules)
168 | print(go_10)
169 | sink()
170 | dev.off()
171 | 
172 | 
173 | 
174 | 


--------------------------------------------------------------------------------
/part1.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | #SBATCH --job-name=Gyn_part1 # Job name
 3 | #SBATCH --mail-type=END,FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL)
 4 | #SBATCH --mail-user=db3562@nyulangone.org
 5 | #SBATCH --nodes=1
 6 | #SBATCH --ntasks=1
 7 | #SBATCH --cpus-per-task=40
 8 | #SBATCH --mem=100gb # Job memory request
 9 | #SBATCH --time=10:00:00 # Time limit hrs:min:sec
10 | #SBATCH --output=serial_test_%j.log # Standard output and error log
11 | #SBATCH --partition cpu_short
12 | 
13 | source ~/.bashrc
14 | 
15 | if [ "$#" == 3 ]; then
16 | 	cd ~/Documents/STARCH
17 | 	conda activate STARCH
18 | 	python run_STARCH.py -i ~/Documents/Gyn/${1}${2}/${1}${2}${3}/Clones --output Clones --n_clusters 3 --outdir ~/Documents/Gyn/${1}${2}/${1}${2}${3}/Clones
19 | 	conda deactivate
20 | 	cd ~/Documents/Gyn/${1}${2}/${1}${2}${3}
21 | 	conda activate seurat_functions
22 | 	module load r/4.0.3
23 | 	Rscript ~/Documents/Gyn/Runs/part1.R ${1} ${2} ${3}
24 | 
25 | elif [ "$#" == 4 ]; then
26 | 	cd ~/Documents/STARCH
27 | 	conda activate STARCH
28 | 	python run_STARCH.py -i ~/Documents/Gyn/${1}${2}${3}/${1}${2}${3}${4}/Clones --output Clones --n_clusters 3 --outdir ~/Documents/Gyn/${1}${2}${3}/${1}${2}${3}${4}/Clones
29 | 	conda deactivate
30 | 	cd ~/Documents/Gyn/${1}${2}${3}/${1}${2}${3}${4}
31 | 	conda activate seurat_functions
32 | 	module load r/4.0.3
33 | 	Rscript ~/Documents/Gyn/Runs/part1.R ${1} ${2} ${3} ${4}
34 | 
35 | fi
36 | 
37 | 


--------------------------------------------------------------------------------
/part2.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | source('~/Documents/R/seurat_functions.R')
  3 | load('srt.RData')
  4 | 
  5 | ##### Arguments #####
  6 | args <- commandArgs(trailingOnly = TRUE)
  7 | cluster.malignant = eval(parse(text = args[1]))
  8 | clone.malignant = eval(parse(text = args[2]))
  9 | k_obs_groups = 2
 10 | #if (unique(srt$author) == ''){
 11 |   range = 2:25
 12 | #} else {
 13 | #  range = 5:35
 14 | #}
 15 | gmin = 5
 16 | print(unique(srt$orig.ident))
 17 | 
 18 | ##### Determining type ####
 19 | # Determining criteria
 20 | singler.malignant = c('Embryonic_stem_cells','iPS_cells','Tissue_stem_cells','Chondrocytes','Fibroblasts','Smooth_muscle_cells','Epithelial_cells','Hepatocytes','Keratinocytes','Astrocyte','Neurons','Neuroepithelial_cell')
 21 | cluster.normal = setdiff(levels(srt$cluster), cluster.malignant)
 22 | clone.normal = setdiff(levels(srt$clone), clone.malignant)
 23 | singler.normal = setdiff(levels(srt$singler), singler.malignant)
 24 | # Finding cells
 25 | cells.malignant = WhichCells(srt, expression = singler %in% singler.malignant
 26 |                              & cluster %in% cluster.malignant
 27 |                              & clone %in% clone.malignant)
 28 | if (length(cells.malignant) == ncol(srt)){
 29 |   donormal = FALSE
 30 | } else {
 31 |   donormal = TRUE
 32 | }
 33 | if (donormal){
 34 |   cells.normal = WhichCells(srt, expression = cluster %in% cluster.normal)
 35 |   cells.normal = setdiff(cells.normal, cells.malignant)
 36 | } else {
 37 |   cells.normal = c()
 38 | }
 39 | # Filtering
 40 | test = intersect(c('B_cell','DC','Macrophage','Monocyte','Neutrophils','NK_cell','T_cells'), levels(srt$singler))
 41 | print(length(cells.malignant))
 42 | for (i in test){
 43 |   s = paste('scores',i ,sep = '.')
 44 |   thresh = min(srt@meta.data[srt$singler == i,s])
 45 |   #thresh = quantile(srt@meta.data[srt$singler == i,s], seq(0,1,by=0.01))['5%']
 46 |   cells.malignant = cells.malignant[srt@meta.data[cells.malignant, s] <= thresh]
 47 |   print(i)
 48 |   print(length(cells.malignant))
 49 | }
 50 | Idents(srt) = 'singler'
 51 | avg = AverageExpression(srt, assay = 'SCT', slot = 'data')$SCT
 52 | data = GetData(srt, assay = 'SCT', slot = 'data')[rownames(avg),cells.malignant]
 53 | for (i in test){
 54 |   corr = apply(data, 2, function(x){
 55 |     cor(x, avg[,i], method = 'pearson')
 56 |   })
 57 |   cells.malignant = setdiff(cells.malignant, colnames(data)[corr > mean(corr) + 2*sd(corr)])
 58 |   print(i)
 59 |   print(length(cells.malignant))
 60 | }
 61 | if (unique(srt$author) == 'Puram'){
 62 |   cells.malignant = cells.malignant[srt@meta.data[cells.malignant, 'malignant'] == 1]
 63 | }
 64 | # Assigning
 65 | srt$type = 'undetermined'
 66 | srt@meta.data[cells.malignant, 'type'] = 'malignant'
 67 | srt@meta.data[cells.normal, 'type'] = 'normal'
 68 | srt$type = factor(srt$type, levels = c('malignant','normal','undetermined'))
 69 | print(table(srt$type))
 70 | print(table(srt$singler, srt$cluster, srt$type))
 71 | # Saving
 72 | save(srt, file = 'srt.RData')
 73 | 
 74 | ##### Removing undetermined #####
 75 | srt = srt[, srt$type %in% c('malignant','normal')]
 76 | srt$type = factor(srt$type, levels = c('malignant','normal'))
 77 | col_type = c(brewer_pal(palette = 'Blues')(5)[4], brewer_pal(palette = 'Greens')(5)[4], 'grey')[1:nlevels(srt$type)]
 78 | names(col_type) = levels(srt$type)
 79 | srt = SCTransform(srt, return.only.var.genes = FALSE)
 80 | srt = RunPCA(srt, features = VariableFeatures(srt))
 81 | srt = RunUMAP(srt, dims = 1:10)
 82 | # Cluster
 83 | col_cluster = hue_pal()(nlevels(srt$cluster))
 84 | names(col_cluster) = levels(srt$cluster)
 85 | # Singler
 86 | col_singler = c(brewer.pal(8, 'Dark2'), brewer.pal(8, 'Accent'))[1:nlevels(srt$singler)]
 87 | names(col_singler) = levels(srt$singler)
 88 | # Pop
 89 | srt$pop = as.character(srt$type)
 90 | srt$pop[srt$type == 'normal'] = as.character(srt$singler[srt$type == 'normal'])
 91 | srt$pop = gsub('malignant','Malignant',srt$pop)
 92 | srt$pop = factor(srt$pop)
 93 | srt$pop = relevel(srt$pop, ref = 'Malignant')
 94 | col_pop = col_singler[intersect(levels(srt$pop),names(col_singler))]
 95 | col_pop = c(col_pop, 'Malignant' = 'grey20')
 96 | 
 97 | #### InferCNV ####
 98 | # Finding subclones
 99 | dir.create('Subclones', showWarnings = FALSE, recursive = TRUE, mode = "0777")
100 | setwd('Subclones')
101 | srt$subclone = 0
102 | srt$subclone = factor(srt$subclone)
103 | col_subclone = brewer_pal(palette = 'Purples')(1+nlevels(srt$subclone))[-1]
104 | names(col_subclone) = levels(srt$subclone)
105 | tryCatch(expr = {
106 | annotations_file = data.frame(srt@meta.data$type, row.names = rownames(srt@meta.data), stringsAsFactors = FALSE)
107 | raw_counts_matrix = as.matrix(GetAssayData(srt, assay = 'SCT', slot = 'counts'))
108 | if (unique(srt$species) == 'Hs'){
109 |   gene_order_file = '~/Documents/CNV/Hs/gene_order.tsv'
110 | }
111 | if (unique(srt$species) == 'Mm'){
112 |   gene_order_file = '~/Documents/CNV/Mm/gene_order.tsv'
113 | }
114 | if (donormal){
115 |   infercnv_obj = CreateInfercnvObject(raw_counts_matrix = raw_counts_matrix,
116 |                                       annotations_file = annotations_file,
117 |                                       gene_order_file = gene_order_file,
118 |                                       delim = "\t",
119 |                                       ref_group_names = 'normal',
120 |                                       chr_exclude = NULL)
121 | } else {
122 |   infercnv_obj = CreateInfercnvObject(raw_counts_matrix = raw_counts_matrix,
123 |                                       annotations_file = annotations_file,
124 |                                       gene_order_file = gene_order_file,
125 |                                       delim = "\t",
126 |                                       ref_group_names = NULL,
127 |                                       chr_exclude = NULL)
128 | }
129 | 
130 | infercnv_obj = infercnv::run(infercnv_obj,
131 |                              resume_mode = FALSE,
132 |                              out_dir = getwd(),
133 |                              cutoff = 0.1,
134 |                              cluster_by_groups = FALSE,
135 |                              analysis_mode = 'subclusters',
136 |                              tumor_subcluster_partition_method = 'qnorm',
137 |                              denoise = FALSE,
138 |                              HMM = FALSE,
139 |                              HMM_type = 'i6',
140 |                              num_threads = ncores,
141 |                              no_plot = TRUE,
142 |                              plot_steps = FALSE)
143 | save(infercnv_obj, file = 'infer_cnv.RData')
144 | obs_hcl = hclust(dist(t(infercnv_obj@expr.data[, unlist(infercnv_obj@observation_grouped_cell_indices)]), 'euclidean'), 'ward.D2')
145 | subclone = cutree(tree = as.hclust(obs_hcl), k = k_obs_groups)
146 | srt$subclone = 0
147 | srt$subclone[names(subclone)] = subclone[names(subclone)]
148 | srt$subclone = factor(srt$subclone)
149 | col_subclone = brewer_pal(palette = 'Purples')(1+nlevels(srt$subclone))[-1]
150 | names(col_subclone) = levels(srt$subclone)
151 | # Plotting heatmap with subclones
152 | annotations_file = data.frame(srt$subclone, row.names = rownames(srt@meta.data))
153 | raw_counts_matrix = as.matrix(GetAssayData(srt, assay = 'SCT', slot = 'counts'))
154 | if (unique(srt$species) == 'Hs'){
155 |   gene_order_file = '~/Documents/CNV/Hs/gene_order.tsv'
156 | }
157 | if (unique(srt$species) == 'Mm'){
158 |   gene_order_file = '~/Documents/CNV/Mm/gene_order.tsv'
159 | }
160 | if (donormal){
161 |   infercnv_obj = CreateInfercnvObject(raw_counts_matrix = raw_counts_matrix,
162 |                                       annotations_file = annotations_file,
163 |                                       gene_order_file = gene_order_file,
164 |                                       delim = "\t",
165 |                                       ref_group_names = '0',
166 |                                       chr_exclude = NULL)
167 | } else {
168 |   infercnv_obj = CreateInfercnvObject(raw_counts_matrix = raw_counts_matrix,
169 |                                       annotations_file = annotations_file,
170 |                                       gene_order_file = gene_order_file,
171 |                                       delim = "\t",
172 |                                       ref_group_names = NULL,
173 |                                       chr_exclude = NULL)
174 | }
175 | infercnv_obj = infercnv::run(infercnv_obj,
176 |                              resume_mode = FALSE,
177 |                              out_dir = getwd(),
178 |                              cutoff = 0.1,
179 |                              cluster_by_groups = TRUE,
180 |                              k_obs_groups = k_obs_groups,
181 |                              analysis_mode = 'subclusters',
182 |                              tumor_subcluster_partition_method = 'qnorm',
183 |                              denoise = TRUE,
184 |                              sd_amplifier=2,  # sets midpoint for logistic
185 |                              noise_logistic=TRUE, # turns gradient filtering on
186 |                              HMM = FALSE,
187 |                              HMM_type = 'i6',
188 |                              num_threads = ncores,
189 |                              no_plot = FALSE,
190 |                              plot_steps = FALSE)
191 | infercnv_obj_medianfiltered = infercnv::apply_median_filtering(infercnv_obj)
192 | infercnv::plot_cnv(infercnv_obj_medianfiltered,
193 |                    output_filename='infercnv.median_filtered',
194 |                    x.range=c(0.9,1.1),
195 |                    x.center=1,
196 |                    title = "infercnv",
197 |                    color_safe_pal = FALSE)
198 | }, error = function(e){c()})
199 | save(infercnv_obj, file = 'infer_cnv.RData')
200 | setwd('../')
201 | 
202 | #### Plotting ####
203 | pdf('dimplots.all.pdf', height = 15, width = 15)
204 | sumsq = apply(infercnv_obj@expr.data, 2, function(prof){
205 |   sum(prof^2)
206 | })
207 | srt$sumsq = sumsq
208 | top = apply(infercnv_obj@expr.data[,names(sort(sumsq, decreasing = TRUE))[1:10]], 1, mean)
209 | topcor = apply(infercnv_obj@expr.data, 2, function(prof){
210 |   cor(prof, top)
211 | })
212 | srt$topcor = topcor
213 | FeatureScatter(srt, pt.size = 2, 'sumsq', 'topcor', group.by = 'type', col = col_type)
214 | FeatureScatter(srt, pt.size = 2, 'sumsq', 'topcor', group.by = 'subclone', col = col_subclone)
215 | FeatureScatter(srt, pt.size = 2, 'sumsq', 'topcor', group.by = 'cluster', col = col_cluster)
216 | for (reduction in c('pca','umap')){
217 |   h = DimPlot(srt, pt.size = 2, reduction = reduction, group.by = 'type', cols = col_type)
218 |   print(h)
219 |   h = DimPlot(srt, pt.size = 2, reduction = reduction, group.by = 'cluster', cols = col_cluster, label = TRUE)
220 |   print(h)
221 |   h = DimPlot(srt, pt.size = 2, reduction = reduction, group.by = 'singler', cols = col_singler)
222 |   print(h)
223 |   h = DimPlot(srt, pt.size = 2, reduction = reduction, group.by = 'pop', cols = col_pop)
224 |   print(h)
225 |   h = DimPlot(srt, pt.size = 2, reduction = reduction, group.by = 'subclone', cols = col_subclone)
226 |   print(h)
227 |   h = FeaturePlot(srt, pt.size = 2, reduction = reduction, features = 'sumsq')
228 |   print(h)
229 |   h = FeaturePlot(srt, pt.size = 2, reduction = reduction, features = 'topcor')
230 |   print(h)
231 | }
232 | dev.off()
233 | 
234 | #### Spliting ####
235 | srt.all = srt
236 | save(srt.all, file = 'srt.all.RData')
237 | srt.malignant = srt[, srt$type == 'malignant']
238 | save(srt.malignant, file = 'srt.malignant.RData')
239 | if (donormal){
240 |   srt.normal = srt[, srt$type == 'normal']
241 |   save(srt.normal, file = 'srt.normal.RData')
242 | }
243 | 
244 | #### Malignant ####
245 | srt = srt.malignant
246 | srt = SCTransform(srt, return.only.var.genes = FALSE)
247 | srt = RunPCA(srt, features = VariableFeatures(srt))
248 | srt = RunUMAP(srt, dims = 1:10)
249 | ## NMF
250 | # Run NMF
251 | data = as.matrix(GetAssayData(srt, assay = 'SCT', slot = 'scale.data'))
252 | #if (unique(srt$author) == ''){
253 |   data = data[VariableFeatures(srt),]
254 | #}
255 | data[data < 0] = 0
256 | data = data[apply(data, 1, var) > 0, ]
257 | print(dim(data))
258 | res.list = mclapply(range, function(r){
259 |   nmf(data, rank = r, nrun = 1, seed = 'ica', method = 'nsNMF')
260 | }, mc.cores = ncores)
261 | names(res.list) = range
262 | # Select rank
263 | modules.list = lapply(res.list, NMFToModules, gmin = gmin)
264 | print(sapply(modules.list,length))
265 | comp = as.numeric(names(modules.list)) - sapply(modules.list, length)
266 | mi = min(comp)
267 | r = names(which(comp == mi))
268 | r = r[length(r)]
269 | print(r)
270 | res = res.list[[r]]
271 | # Process output
272 | modules = NMFToModules(res, gmin = gmin)
273 | scores = basis(res)
274 | colnames(scores) = names(modules)
275 | coefs = coefficients(res)
276 | rownames(coefs) = names(modules)
277 | # Order modules
278 | h = Heatmap(coefs, clustering_distance_columns = 'euclidean')
279 | o = row_order(h)
280 | scores = scores[, o]
281 | coefs = coefs[o, ]
282 | modules = modules[o]
283 | print(modules)
284 | srt = AddMetaData(srt, t(coefs), col.name = rownames(coefs))
285 | # Cluster NMF output
286 | h = Heatmap(coefs, clustering_distance_columns = 'euclidean')
287 | hcl = as.hclust(column_dend(h))
288 | sig = cutree(hcl, k = length(modules))
289 | nmf = c(by(t(coefs), INDICES = sig, FUN = function(x){names(modules)[which.max(colMeans(x))]}))[sig]
290 | srt$nmf = factor(nmf, levels = names(modules))
291 | col_nmf = c(brewer.pal(12, 'Set3'), brewer.pal(8, 'Set1'))[1:nlevels(srt$nmf)]
292 | names(col_nmf) = levels(srt$nmf)
293 | ## Saving
294 | srt.malignant = srt
295 | save(srt.malignant, file = 'srt.malignant.RData')
296 | res.list.malignant = res.list
297 | save(res.list.malignant, file = 'res.list.malignant.RData')
298 | res.malignant = res
299 | save(res.malignant, file = 'res.malignant.RData')
300 | ## Plotting
301 | pdf('dimplots.malignant.pdf', height = 15, width = 15)
302 | for (reduction in c('pca','umap')){
303 |   h = DimPlot(srt, pt.size = 2, reduction = reduction, group.by = 'nmf', cols = col_nmf)
304 |   print(h)
305 |   h = DimPlot(srt, pt.size = 2, reduction = reduction, group.by = 'cluster', cols = col_cluster)
306 |   print(h)
307 |   h = FeaturePlot(srt, reduction = reduction, features = names(modules))
308 |   print(h)
309 | }
310 | dev.off()
311 | pdf('heatmaps.malignant.pdf', height = 20, width = 10)
312 | sink('heatmaps.malignant.txt')
313 | # NMF
314 | # Modules
315 | top_ann = HeatmapAnnotation(df = data.frame('cluster' = srt$cluster, 'singler' = srt$singler,
316 |                                             'nmf' = srt$nmf, 'subclone' = srt$subclone), 
317 |                             col = list('cluster' = col_cluster, 'singler' = col_singler, 
318 |                                        'nmf' = col_nmf, 'subclone' = col_subclone), which = 'column')
319 | h = Heatmap(coefs, name = 'coefs', top_ann = top_ann,
320 |             show_row_names = TRUE, 
321 |             cluster_rows = FALSE,
322 |             cluster_columns = FALSE, column_order = order(srt$nmf, srt$cluster, srt$singler),
323 |             breaks = c(0,max(coefs)/2), colors = c('white','red'))
324 | print(h)
325 | h = Heatmap(scores[unlist(modules),], name = 'scores', 
326 |             show_row_names = TRUE, row_names_gp = gpar(cex = 0.5),
327 |             cluster_rows = FALSE,
328 |             cluster_columns = FALSE,
329 |             split = factor(unlist(mapply(rep, names(modules), sapply(modules, length))), levels = names(modules)))
330 | print(h)
331 | print(modules)
332 | enrichment.matrix = BuildEnrichmentMatrix(rownames(srt), type = 'GO')
333 | go_10 = sapply(modules, function(tbl){
334 |   e = Enrichment(geneset = tbl, enrichment.matrix = enrichment.matrix)
335 |   names(sort(e))[1:10]
336 | })
337 | colnames(go_10) = names(modules)
338 | print(go_10)
339 | sink()
340 | dev.off()
341 | 
342 | #### Normal ####
343 | if (donormal){
344 |   srt = srt.normal
345 |   srt = SCTransform(srt, return.only.var.genes = FALSE)
346 |   srt = RunPCA(srt, features = VariableFeatures(srt))
347 |   srt = RunUMAP(srt, dims = 1:10)
348 |   ## Saving
349 |   srt.normal = srt
350 |   save(srt.normal, file = 'srt.normal.RData')
351 | }
352 | 
353 | 


--------------------------------------------------------------------------------
/part2.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | #SBATCH --job-name=Tumors_part2 # Job name
 3 | #SBATCH --mail-type=END,FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL)
 4 | #SBATCH --mail-user=db3562@nyulangone.org
 5 | #SBATCH --nodes=1
 6 | #SBATCH --ntasks=1
 7 | #SBATCH --cpus-per-task=40
 8 | #SBATCH --mem=200gb # Job memory request
 9 | #SBATCH --time=20:00:00 # Time limit hrs:min:sec
10 | #SBATCH --output=serial_test_%j.log # Standard output and error log
11 | #SBATCH --partition cpu_short
12 | 
13 | source ~/.bashrc
14 | 
15 | if [ "$#" == 5 ]; then
16 | 	cd ~/Documents/Tumors/${1}${2}/${1}${2}${3}
17 | 	conda activate seurat_functions
18 | 	module load r/4.0.3
19 | 	Rscript ~/Documents/Tumors/Runs/part2.R ${4} ${5}
20 | 	cp Subclones/infercnv.png ./infercnv_subclones.png
21 | 	cp Subclones/infercnv.median_filtered.png ./infercnv_subclones_filtered.png
22 | 	conda deactivate
23 | 	
24 | elif [ "$#" == 6 ]; then
25 | 	cd ~/Documents/Tumors/${1}${2}${3}/${1}${2}${3}${4}
26 | 	conda activate seurat_functions
27 | 	module load r/4.0.3
28 | 	Rscript ~/Documents/Tumors/Runs/part2.R ${5} ${6}
29 | 	cp Subclones/infercnv.png ./infercnv_subclones.png
30 | 	cp Subclones/infercnv.median_filtered.png ./infercnv_subclones_filtered.png
31 | 	conda deactivate
32 | fi
33 | 
34 | 
35 | 
36 | 
37 | conda activate seurat_functions
38 | module load r/4.0.3
39 | Rscript ~/Documents/Tumors/Runs/scenic.malignant1.R
40 | conda deactivate
41 | 
42 | cd Scenic.Malignant
43 | module load python/cpu/3.6.5
44 | cp 1.1_exprMatrix_filtered_t.txt 1.1_exprMatrix_filtered_t.tsv
45 | pyscenic grn -o grnboost_output.csv 1.1_exprMatrix_filtered_t.tsv 1.1_inputTFs.txt
46 | module unload python/cpu/3.6.5
47 | cd ../
48 | 
49 | conda activate seurat_functions
50 | module load r/4.0.3
51 | Rscript ~/Documents/Tumors/Runs/scenic.malignant2.R
52 | conda deactivate
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------