├── README.md
└── original-analysis
    ├── src
        ├── 05_pseudotime.R
        ├── DE.R
        ├── 02_wu-data.R
        ├── parameters.R
        ├── 01_normalisation.R
        ├── 04_clustering.R
        ├── 03_heterogeneity.R
        └── 00_filtering.R
    ├── figure_src
        ├── Figure_6a.R
        ├── Figure_6cd.R
        ├── Figure_8b.R
        ├── Figure_7bc_8acd.R
        ├── Figure_8f.R
        ├── Figure_8g.R
        ├── Figure_2ef.R
        ├── Figure_5.R
        ├── Figure_2cd.R
        ├── Figure_1e.R
        ├── Figure_3cd.R
        └── SupplementaryFigure_7.R
    └── README.md


/README.md:
--------------------------------------------------------------------------------
 1 | # Single cell RNA-seq and ATAC seq analysis of cardiac progenitor cell transition states and lineage settlement
 2 | 
 3 | ## Research article
 4 | 
 5 | This repository contains code and supplementary files for the analysis conducted in
 6 | 
 7 | Guangshuai Jia, Jens Preussner, Xi Chen, Stefan Guenther, Xuejun Yuan, Michail Yekelchyk, Carsten Kuenne, Mario Looso, Yonggang Zhou, Sarah Teichmann and Thomas Braun. Single cell RNA-seq and ATAC seq analysis of cardiac progenitor cell transition states and lineage settlement. **Nature Communications** 9, 4877 (*2018*), doi: [10.1038/s41467-018-07307-6](https://doi.org/10.1038/s41467-018-07307-6).
 8 | 
 9 | Please cite the research article when using data from this repository.
10 | 
11 | ## Original analysis
12 | 
13 | The original analysis relied on the **R programming languange** and used `R` in version `3.5.0`. Additionally, we heavily relied on the bioconductor package [scater](http://bioconductor.org/packages/release/bioc/html/scater.html) in version `scater 1.4.0` for results from single-cell RNA-seq and `scater 1.8.0` for results from single-cell ATAC-seq. The details on the [session](https://stat.ethz.ch/R-manual/R-devel/library/utils/html/sessionInfo.html) can be found in [original-analysis/RNA-seq_sessionInfo.txt](original-analysis/RNA-seq_sessionInfo.txt) and [original-analysis/ATAC-seq_sessionInfo.txt](original-analysis/ATAC-seq_sessionInfo.txt), respectively.
14 | 
15 | Please see the accompanying [README.md](original-analysis/README.md) for details on how to replicate preprocessing or re-create figures from the manuscript.
16 | 
17 | ## Updated analysis
18 | 
19 | In addition to the original analysis, we will provide an up-to-date re-analysis of the data from the manuscript. *Watch* this repository to be notified on further updates.
20 | 


--------------------------------------------------------------------------------
/original-analysis/src/05_pseudotime.R:
--------------------------------------------------------------------------------
 1 | library(scater)
 2 | library(destiny)
 3 | library(singlecellutils)
 4 | library(RColorBrewer)
 5 | library(multipanelfigure)
 6 | 
 7 | #
 8 | # Load data
 9 | #
10 | source("src/parameters.R")
11 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata"))
12 | 
13 | #
14 | # Nkx2-5 diffusion map
15 | #
16 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1")
17 | nkx_de_genes <- which(fData(c1_subset)$nkx_de & fData(c1_subset)$nkx_marker)
18 | 
19 | data <- get_exprs(c1_subset[nkx_de_genes, nkx_cells], "norm_exprs_sf")
20 | set.seed(1001)
21 | nkx_dm <- DiffusionMap(t(data), distance = 'euclidean')
22 | 
23 | tip_cell <- find_tips(nkx_dm)[which(pData(c1_subset[, nkx_cells])$cluster[find_tips(nkx_dm)] == 1)]
24 | 
25 | nkx_dpt <- DPT(nkx_dm, tips = tip_cell)
26 | nkx_pseudotime <- nkx_dpt[tip_cell]
27 | 
28 | #
29 | # Save dpt and diffusion components to object
30 | #
31 | pData(c1_subset)$dpt <- NA
32 | pData(c1_subset)$dm1 <- NA
33 | pData(c1_subset)$dm2 <- NA
34 | 
35 | pData(c1_subset)$dpt[nkx_cells] <- nkx_pseudotime
36 | pData(c1_subset)$dm1[nkx_cells] <- nkx_dm@eigenvectors[, 1]
37 | pData(c1_subset)$dm2[nkx_cells] <- nkx_dm@eigenvectors[, 2]
38 | 
39 | save(nkx_dm, file = file.path(parameters$general$path_supdata, "Nkx2-5-DM.Rdata"))
40 | 
41 | #
42 | # Isl1 diffusion map
43 | #
44 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1")
45 | isl1_de_genes <- which(fData(c1_subset)$isl1_de & fData(c1_subset)$isl1_marker)
46 | 
47 | data <- get_exprs(c1_subset[isl1_de_genes, isl1_cells], "norm_exprs_sf")
48 | set.seed(1002)
49 | isl1_dm <- DiffusionMap(t(data), distance = 'e')
50 | 
51 | tip_cell <- find_tips(isl1_dm)[which(pData(c1_subset[, isl1_cells])$cluster[find_tips(isl1_dm)] == 5)]
52 | 
53 | isl_dpt <- DPT(isl1_dm, tips = tip_cell)
54 | isl_pseudotime <- isl_dpt[tip_cell,]
55 | 
56 | #
57 | # Save dpt and diffusion components to object
58 | #
59 | pData(c1_subset)$dpt[isl1_cells] <- isl_pseudotime
60 | pData(c1_subset)$dm1[isl1_cells] <- isl1_dm@eigenvectors[, 1]
61 | pData(c1_subset)$dm2[isl1_cells] <- isl1_dm@eigenvectors[, 3]
62 | pData(c1_subset)$dm3[isl1_cells] <- isl1_dm@eigenvectors[, 2]
63 | 
64 | save(isl1_dm, file = file.path(parameters$general$path_supdata, "Isl1-DM.Rdata"))
65 | save(c1_subset, file = file.path(parameters$general$path_supdata, "c1_subset.Rdata"))
66 | 


--------------------------------------------------------------------------------
/original-analysis/figure_src/Figure_6a.R:
--------------------------------------------------------------------------------
 1 | library(scater)
 2 | library(ggplot2)
 3 | library(multipanelfigure)
 4 | 
 5 | #
 6 | # Load data
 7 | #
 8 | source("src/parameters.R")
 9 | load(file.path(parameters$general$path_supdata, "wu.Rdata"))
10 | 
11 | #
12 | # Smooth muscle marker genes
13 | #
14 | timepoint <- which(pData(wu)$timepoint == "e9.5")
15 | sm_marker_genes <- c("ENSMUSG00000015579", "ENSMUSG00000032085", "ENSMUSG00000001349", "ENSMUSG00000035783", "ENSMUSG00000029761", "ENSMUSG00000022836", "ENSMUSG00000048878", "ENSMUSG00000045667")
16 | 
17 | subset <- wu[sm_marker_genes, timepoint]
18 | rownames(subset) <- fData(subset)$symbol
19 | 
20 | #plotExpression(subset, x = "type", features = 1:nrow(subset), colour_by = "batch", exprs_values = "norm_exprs_sf", log2_values = F)
21 | 
22 | #
23 | # SM cell inference
24 | #
25 | marker_expression <- get_exprs(subset, "norm_exprs_sf")
26 | 
27 | is_smc_cl <- list(
28 |   as.numeric(marker_expression["Nkx2-5",]) < 1,
29 |   as.numeric(marker_expression["Tagln",]) > 2,
30 |   as.numeric(marker_expression["Cnn1",]) > 2,
31 |   as.numeric(marker_expression["Acta2",]) > 2,
32 |   as.numeric(marker_expression["Cald1",]) > 2,
33 |   as.numeric(marker_expression["Mylk",]) > 2,
34 |   #as.numeric(marker_expression["Hexim1",]) > 2,
35 |   as.numeric(marker_expression["Smtnl2",]) > 2
36 | )
37 | 
38 | is_smc_c <- do.call("rbind", is_smc_cl)
39 | is_smc <- (colSums(is_smc_c[2:nrow(is_smc_c), ]) >= 5) & is_smc_c[1, ]
40 | 
41 | t <- table(isko = pData(subset)$batch == "KO", is_smc)[c(2,1), c(2,1)]
42 | 
43 | tr <- prop.test(t, correct=T, conf.level = 0.99)
44 | 
45 | ggMMplot <- function(var1, var2){
46 |   require(ggplot2)
47 |   levVar1 <- length(levels(var1))
48 |   levVar2 <- length(levels(var2))
49 | 
50 |   jointTable <- prop.table(table(var1, var2))
51 |   plotData <- as.data.frame(jointTable)
52 |   plotData$marginVar1 <- prop.table(table(var1))
53 |   plotData$var2Height <- plotData$Freq / plotData$marginVar1
54 |   plotData$var1Center <- c(0, cumsum(plotData$marginVar1)[1:levVar1 -1]) +
55 |     plotData$marginVar1 / 2
56 | 
57 |   ggplot(plotData, aes(var1Center, var2Height)) +
58 |     geom_bar(stat = "identity", aes(fill = var2, width = marginVar1), col = "White") +
59 |     geom_label(aes(label = as.character(var1), x = var1Center, y = 1.05), label.r = unit(0, "lines"))
60 | }
61 | 
62 | plot_data <- data.frame(Celltype = factor(ifelse(is_smc, "SMC", "non-SMC"), levels = c("SMC", "non-SMC")), Genotype = factor(ifelse(pData(subset)$batch == "KO", "KO", "WT"), levels = c("WT", "KO")))
63 | 
64 | plot <- ggMMplot(plot_data$Genotype, plot_data$Celltype) +
65 |   guides(fill = F) +
66 |   scale_fill_manual(values = c("#A8DBA8", "#3B8686")) +
67 |   ylab("SMC Proportion") +
68 |   xlab("Cells") +
69 |   theme(panel.background = element_rect(fill = "transparent"), axis.line.y = element_line(colour = "black"), axis.line.x = element_line(colour = "black"))#, axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank())
70 | 
71 | m <- multi_panel_figure(width = 205, height = 230, columns = 2, rows = 5)
72 | m <- fill_panel(m, plot, row = c(1,2))
73 | ggsave(
74 |   plot = m,
75 |   filename = file.path(parameters$general$path_rfigures, "Supplementary_Figure_SMCprop.pdf"),
76 |   width = 205,
77 |   height = 230,
78 |   units = "mm",
79 |   dpi = 600)
80 | 


--------------------------------------------------------------------------------
/original-analysis/figure_src/Figure_6cd.R:
--------------------------------------------------------------------------------
 1 | library(destiny)
 2 | library(scater)
 3 | library(singlecellutils)
 4 | library(multipanelfigure)
 5 | 
 6 | #
 7 | # Graphical settings
 8 | #
 9 | global.settings <- list(layout.heights=list(main.key.padding = -2, bottom.padding = 0, axis.xlab.padding = -0.5), layout.widths = list(ylab.axis.padding = -0.5))
10 | 
11 | #
12 | # Load neccessary data
13 | #
14 | source("src/parameters.R")
15 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata"))
16 | load(file.path(parameters$general$path_supdata, "wg_subset.Rdata"))
17 | load(file.path(parameters$general$path_supdata, "Isl1-DM.Rdata"))
18 | load(file.path(parameters$general$path_supdata, "Nkx2-5-DM.Rdata"))
19 | 
20 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1")
21 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1")
22 | nkx_dm_coords <- cbind(-pData(c1_subset[, nkx_cells])$dm1, -pData(c1_subset[, nkx_cells])$dm2)
23 | isl1_dm_coords <- cbind(-pData(c1_subset[, isl1_cells])$dm1, -pData(c1_subset[, isl1_cells])$dm2, -pData(c1_subset[, isl1_cells])$dm3)
24 | 
25 | #
26 | # Predict Nkx-OE onto Nkx2-5 trajectory
27 | #
28 | nkxoe_cells <- which(pData(c1_subset)$Background == "nkx2-5oe" & pData(c1_subset)$Platform == "C1")
29 | m <- match(colnames(nkx_dm@data_env$data), rownames(c1_subset))
30 | 
31 | nkxoe_data <- get_exprs(c1_subset[m, nkxoe_cells], "norm_exprs_sf")
32 | 
33 | nkxoe_dcs <- dm_predict(nkx_dm, t(nkxoe_data))
34 | nkxoe_coords <- matrix(nkxoe_dcs@x, ncol = nkxoe_dcs@Dim[2])
35 | nkx_dm_coords_oe <- rbind(nkx_dm_coords, cbind(-nkxoe_coords[, 1], -nkxoe_coords[, 2]))
36 | 
37 | cluster <- c(pData(c1_subset[, nkx_cells])$cluster, rep("OE", nrow(nkxoe_coords)))
38 | rm_obs <- is.na(cluster)
39 | cluster <- paste0("cluster", cluster[!rm_obs])
40 | 
41 | p_nkx_nkxoe <- colorAMap(nkx_dm_coords_oe[!rm_obs,], colour_by = factor(cluster), palette = parameters$colors$nkxko_cluster_palette, main = list(label = "Nkx2-5 overexpression prediction", cex = 0.75), scales = list(cex = 0.4, tck = c(0.5, 0)), xlab = list(label = "Dimension 1", cex = 0.6), ylab = list(label = "Dimension 2", cex = 0.6), par.settings = global.settings)
42 | 
43 | #
44 | # Predict Nkx2-5OE onto Isl1 trajectory
45 | #
46 | m <- match(colnames(isl1_dm@data_env$data), rownames(c1_subset))
47 | nkxoe_cells <- which(pData(c1_subset)$Background == "nkx2-5oe" & pData(c1_subset)$Platform == "C1")
48 | nkxoe_data <- get_exprs(c1_subset[m, nkxoe_cells], "norm_exprs_sf")
49 | 
50 | nkxoe_dcs <- dm_predict(isl1_dm, t(nkxoe_data))
51 | nkxoe_coords <- matrix(nkxoe_dcs@x, ncol = nkxoe_dcs@Dim[2])
52 | isl1_dm_coords_oe <- rbind(isl1_dm_coords[, c(1,2)], cbind(-nkxoe_coords[, 1], -nkxoe_coords[, 3]))
53 | 
54 | cluster <- c(pData(c1_subset[, isl1_cells])$cluster, rep("OE", nrow(nkxoe_coords)))
55 | rm_obs <- is.na(cluster)
56 | cluster <- paste0("cluster", cluster[!rm_obs])
57 | 
58 | p_isl1_oe <- colorAMap(isl1_dm_coords_oe[!rm_obs,], colour_by = factor(cluster), palette = parameters$colors$islko_cluster_palette, ylab = list(label = "Dimension 3", cex = 0.6), main = list(label = "Isl1 Nkx2-5 overexpression prediction", cex = 0.75), scales = list(cex = 0.4, tck = c(0.5, 0)), xlab = list(label = "Dimension 1", cex = 0.6), par.settings = global.settings)
59 | 
60 | #
61 | # Save
62 | #
63 | m <- multi_panel_figure(width = 205, height = 230, columns = 2, rows = 3)
64 | m <- fill_panel(m, p_nkx_nkxoe)
65 | m <- fill_panel(m, p_isl1_oe)
66 | 
67 | ggsave(
68 |   plot = m,
69 |   filename = file.path(parameters$general$path_rfigures, "Figure6_cd.pdf"),
70 |   width = 205,
71 |   height = 230,
72 |   units = "mm",
73 |   dpi = 600)
74 | 


--------------------------------------------------------------------------------
/original-analysis/src/DE.R:
--------------------------------------------------------------------------------
 1 | library(MAST)
 2 | library(ROCR)
 3 | 
 4 | differentialExpression <- function(expression, contrasts, fData) {
 5 |   diff_data <- lapply(names(contrasts), function(l) {
 6 |     cells <- contrasts[[l]]
 7 | 
 8 |     data <- cbind(expression[, cells[[1]]], expression[, cells[[2]]])
 9 |     cond_A <- unlist(strsplit(l, "_"))[1]
10 |     cond_B <- unlist(strsplit(l, "_"))[2]
11 | 
12 |     cond <- c(rep(cond_A, length(cells[[1]])), rep(cond_B, length(cells[[2]])))
13 | 
14 |     cdat <- data.frame(wellKey = colnames(data), condition = factor(cond), stringsAsFactors = F)
15 |     fdat <- data.frame(primerid = rownames(data), stringsAsFactors = F)
16 |     sca <- MAST::FromMatrix(class = "SingleCellAssay",
17 |                             exprsArray=data,
18 |                             cData = cdat,
19 |                             fData = fdat)
20 | 
21 |     zlm <- MAST::zlm(~ condition, sca, method = "bayesglm", ebayes = TRUE, ebayesControl = list(method = "MLE", model = "H1"))
22 |     s <- MAST::summary(zlm, doLRT = paste0('condition', cond_B))$datatable
23 |     res <- merge(s[contrast==paste0('condition', cond_B) & component=='H',.(primerid, `Pr(>Chisq)`)], #hurdle P values
24 |                  s[contrast==paste0('condition', cond_B) & component=='logFC', .(primerid, coef, ci.hi, ci.lo)], by='primerid') #logFC coefficients
25 |     colnames(res) <- c("geneID", "pval", "lfc", "lfc.hi", "lfc.lo")
26 |     # Calculation of FDR
27 |     res$fdr <- p.adjust(res$pval, method = "fdr")
28 | 
29 |     # Calculation of basemeanA expressions
30 |     basemeanA <- rowMeans(data[, cond == cond_A])
31 |     m <- match(res$geneID, names(basemeanA))
32 |     res$basemeanA <- basemeanA[m]
33 | 
34 |     # Calculation of basemeanB expressions
35 |     basemeanB <- rowMeans(data[, cond == cond_B])
36 |     m <- match(res$geneID, names(basemeanB))
37 |     res$basemeanB <- basemeanB[m]
38 | 
39 |     colnames(res[, c("basemeanA", "basemeanB")]) <- c(cond_A, cond_B)
40 | 
41 |     # Adding number of cells with detectable expression
42 |     # n_exprsA <- rowSums(counts(subset[, cond == cond_A]) > 10, na.rm = T)
43 |     # m <- match(res$geneID, names(n_exprsA))
44 |     # res$n_exprsA <- n_exprsA[m]
45 |     # n_exprsB <- rowSums(counts(subset[, cond == cond_B]) > 10, na.rm = T)
46 |     # m <- match(res$geneID, names(n_exprsB))
47 |     # res$n_exprsB <- n_exprsB[m]
48 | 
49 |     m <- match(res$geneID, rownames(fData))
50 |     res <- cbind(res, fData[m, c("symbol", "biotype", "description")])
51 | 
52 |     res
53 |   })
54 | }
55 | 
56 | get_auroc <- function(gene, labels) {
57 |   gene <- gene[!is.na(labels)]
58 |   labels <- na.omit(labels)
59 |   score <- rank(gene)
60 |   # Get average score for each cluster
61 |   ms <- aggregate(score ~ labels, FUN = mean)
62 |   # Get cluster with highest average score
63 |   posgroup <- ms[ms$score == max(ms$score), ]$labels
64 |   # Return NAs if there is a tie for cluster with highest average score (by definition this is
65 |   # not cluster specific)
66 |   if (length(posgroup) > 1) {
67 |     return(c(NA, NA, NA))
68 |   }
69 |   # Create 1/0 vector of truths for predictions, cluster with highest average score vs
70 |   # everything else
71 |   truth <- as.numeric(labels == posgroup)
72 |   # Make predictions & get auc using RCOR package.
73 |   pred <- prediction(score, truth)
74 |   val <- unlist(ROCR::performance(pred, "auc")@y.values)
75 |   pval <- suppressWarnings(wilcox.test(score[truth == 1], score[truth == 0])$p.value)
76 |   return(c(val, posgroup, pval))
77 | }
78 | 
79 | get_marker_genes <- function(dataset, labels) {
80 |   res <- apply(dataset, 1, get_auroc, labels = labels)
81 |   res <- data.frame(matrix(unlist(res), ncol = 3, byrow = T))
82 |   colnames(res) <- c("auroc", "clusts", "pvalue")
83 |   res$fdr <- p.adjust(res$pvalue)
84 |   res$geneID <- rownames(dataset)
85 |   return(res)
86 | }
87 | 


--------------------------------------------------------------------------------
/original-analysis/src/02_wu-data.R:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(biomaRt)
  3 | library(scater)
  4 | library(scran)
  5 | 
  6 | source("src/parameters.R")
  7 | #
  8 | # Load Wu et. al., Nkx2-5 WT data
  9 | #
 10 | e85_attributes <- read.table("ext_data/e8.5_wt_cell_attributes2.csv", sep=";", header=T)
 11 | e95_attributes <- read.table("ext_data/e9.5_wt_cell_attributes.csv", sep=",", header=T)
 12 | e105_attributes <- read.table("ext_data/e10.5_wt_cell_attributes.csv", sep=",", header=T)
 13 | 
 14 | wt_metadata <- bind_rows(e85_attributes, e95_attributes, e105_attributes)
 15 | wt_metadata$timepoint <- c(rep("e8.5", nrow(e85_attributes)), rep("e9.5", nrow(e95_attributes)), rep("e10.5", nrow(e105_attributes)))
 16 | 
 17 | e85_counts <- read.table("ext_data/e8.5_wt_counts.csv", sep=",", header=T, row.names = 1)
 18 | e95_counts <- read.table("ext_data/e9.5_wt_counts.csv", sep=",", header=T, row.names = 1)
 19 | e105_counts <- read.table("ext_data/e10.5_wt_counts.csv", sep=",", header=T, row.names = 1)
 20 | 
 21 | wt_counts <- counts <- cbind(e85_counts, e95_counts, e105_counts)
 22 | 
 23 | #
 24 | # Load Wu et. al., Nkx2-5 KO data
 25 | #
 26 | ko_counts <- read.csv("ext_data/e9.5_Nkx2.5mut_counts.csv", row.names = 1)
 27 | ko_metadata <- read.csv("ext_data/e9.5_Nkx2.5mut_cell_attributes.csv")
 28 | 
 29 | ko_metadata$timepoint <- "e9.5"
 30 | ko_metadata$batch <- "KO"
 31 | 
 32 | #
 33 | # Merge counts and metadata
 34 | #
 35 | assertthat::are_equal(rownames(wt_counts), rownames(ko_counts))
 36 | wu_counts <- cbind(wt_counts, ko_counts)
 37 | 
 38 | wu_metadata <- bind_rows(wt_metadata, ko_metadata)
 39 | rownames(wu_metadata) <- make.names(wu_metadata$index)
 40 | 
 41 | assertthat::are_equal(rownames(wu_metadata), colnames(wu_counts))
 42 | 
 43 | # Filter cells based on quality in metadata
 44 | wu_counts <- wu_counts[, wu_metadata$quality]
 45 | wu_metadata <- wu_metadata[wu_metadata$quality, ]
 46 | 
 47 | # Filter out genes that are not in our data
 48 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata"))
 49 | m <- match(rownames(c1_subset), rownames(wu_counts))
 50 | 
 51 | wu_counts <- wu_counts[na.omit(m), ]
 52 | 
 53 | #
 54 | # Fetch featureData from bioMart
 55 | #
 56 | mart <- useMart("ensembl", dataset = parameters$general$biomart_dataset)
 57 | bm <- getBM(c("ensembl_gene_id","external_gene_name","gene_biotype","description"), filters = "ensembl_gene_id", values = rownames(wu_counts), mart = mart)
 58 | 
 59 | #
 60 | # Assemble SCESet for Wu data
 61 | #
 62 | m <- match(rownames(wu_counts), bm$ensembl_gene_id)
 63 | data <- data.frame(bm[na.omit(m),])
 64 | rownames(data) <- data$ensembl_gene_id
 65 | colnames(data) <- c("gene_id", "symbol","biotype","description")
 66 | 
 67 | # Add neccessary metadata
 68 | wu_metadata$Cell <- make.names(wu_metadata$index)
 69 | 
 70 | featureData <- new("AnnotatedDataFrame", data = data[,c(2:ncol(data))])
 71 | phenoData <- new("AnnotatedDataFrame", data = wu_metadata)
 72 | 
 73 | wu <- newSCESet(countData = wu_counts, featureData = featureData, phenoData = phenoData)
 74 | 
 75 | #
 76 | # Assign cell cycle
 77 | #
 78 | pairs <- readRDS(system.file("exdata", parameters$normalisation$cycle_markers, package="scran"))
 79 | cellcycle <- cyclone(wu, pairs, gene.names = rownames(wu), assay="counts", verbose=F)
 80 | 
 81 | pData(wu)$cellcycle <- factor(cellcycle$phases)
 82 | 
 83 | #
 84 | # sumfactor normalisation
 85 | #
 86 | clusters <- quickCluster(wu, method = "hclust")
 87 | 
 88 | wu <- computeSumFactors(wu, clusters = clusters, sizes = seq(40, 115, 5))
 89 | wu <- normalize(wu, return_norm_as_exprs = F)
 90 | 
 91 | set_exprs(wu, "norm_exprs_sf") <- get_exprs(wu, "norm_exprs")
 92 | 
 93 | #
 94 | # TMM normalisation
 95 | #
 96 | wu <- normalizeExprs(wu, method="TMM", return_norm_as_exprs = FALSE)
 97 | set_exprs(wu, "norm_exprs_lineage") <- get_exprs(wu, "norm_exprs")
 98 | 
 99 | # Save
100 | save(wu, file = file.path(parameters$general$path_supdata, "wu.Rdata"))
101 | 


--------------------------------------------------------------------------------
/original-analysis/figure_src/Figure_8b.R:
--------------------------------------------------------------------------------
 1 | library(magrittr)
 2 | 
 3 | #
 4 | # Load QC filtered data
 5 | #
 6 | load("data/scData_filtered_2.Rda")
 7 | 
 8 | #
 9 | # Load TF motif variability data
10 | #
11 | load("data/dev_cluster_specific_2.Rda")
12 | load("data/dev_clustered_cluster_specific_2.Rda")
13 | 
14 | load("data/variability_cluster_specific_2.Rda")
15 | load("data/variability_clustered_cluster_specific_2.Rda")
16 | 
17 | #
18 | # Get variability data
19 | #
20 | var_cutoff <- 1.5
21 | n_var_cutoff <- which(variability$variability > var_cutoff)
22 | var_data <- chromVAR::deviationScores(dev)[rownames(variability)[n_var_cutoff], ]
23 | 
24 | rownames(var_data) <- sapply(rownames(var_data), function(s) {
25 |   s <- tolower(gsub("_MA[0-9]+", "", s))
26 |   l <- unlist(strsplit(s, ""))
27 |   paste0(toupper(l[1]), paste0(l[2:length(l)], collapse = ""))
28 | })
29 | 
30 | #
31 | # Use TF families to split heatmap rows
32 | #
33 | tf_distance <- as.dist(read.table("data/teichmann_similarity.txt", sep = "\t", header = T, row.names = 1))
34 | 
35 | hc <- hclust(d = tf_distance, method = "average")
36 | tf_clusters <- factor(cutree(hc, h = 0.6))
37 | 
38 | m <- match(rownames(variability)[n_var_cutoff], names(tf_clusters))
39 | split <- droplevels(tf_clusters[m])
40 | 
41 | split <- factor(split, levels = c("72", "50", "51", "52", "63", "46", "157", "160", "161", "180", "56", "75", "76", "21", "82", "94", "78", "107", "186", "57"), ordered = T)
42 | 
43 | #
44 | # Generate Heatmaps
45 | #
46 | heatmaps <- list("reference" = list(branch = "cardiac", cluster = 2),
47 |                  "intermediate" = list(branch = "cl3", cluster = 3),
48 |                  "endo" = list(branch = "endo", cluster = 5),
49 |                  "cardiac" = list(branch = "cardiac", cluster = 1),
50 |                  "cluster4" = list(branch = "cl4", cluster = 4))
51 | 
52 | col_fun_atac <- circlize::colorRamp2(seq(from = -1, to = 5, length.out = 9), colors = RColorBrewer::brewer.pal(9, "YlGnBu"))
53 | 
54 | htlist_input <- lapply(heatmaps, function(x) {
55 |   i <- which(SummarizedExperiment::colData(scData_filtered)$.cluster_5 == x$cluster)
56 | 
57 |   var_data_subset <- var_data[, i]
58 |   if(!is.na(x$branch)) {
59 |     var_data_subset <- var_data_subset[, order(SummarizedExperiment::colData(scData_filtered)[i, paste0("dpt_", x$branch)])]
60 |   }
61 |   var_data_smooth <- t(zoo::rollapply(t(var_data_subset), width = 9, FUN = mean, fill = "extend", by.column = T))
62 | 
63 |   ComplexHeatmap::Heatmap(var_data_smooth,
64 |                           col = col_fun_atac,
65 |                           cluster_columns = F,
66 |                           split = split,
67 |                           cluster_rows = T,
68 |                           show_column_names = F,
69 |                           show_row_names = ifelse(x$cluster == 2, TRUE, FALSE),
70 |                           row_names_side = "left",
71 |                           show_row_dend = F,
72 |                           name = paste("Cluster", x$cluster),
73 |                           column_title_side = "top",
74 |                           column_title = paste("Cluster", x$cluster),
75 |                           show_heatmap_legend = ifelse(x$cluster == 2, TRUE, FALSE),
76 |                           row_names_gp = grid::gpar(fontsize = 6),
77 |                           heatmap_legend_param = list(legend_direction = "horizontal", at = c(-1, 5), labels = c("low", "high"), title = "Motif accessibility of TF"))
78 | })
79 | 
80 | htlist <- Reduce(ComplexHeatmap::add_heatmap, htlist_input)
81 | plot_chromvar_dpt <- grid::grid.grabExpr(ComplexHeatmap::draw(htlist, column_title_gp = grid::gpar(fontface = "bold"), heatmap_legend_side = "bottom"))
82 | 
83 | figure_chromvar_dpt <- multipanelfigure::multi_panel_figure(rows = 4, columns = 1, height = 230, width = 205)
84 | figure_chromvar_dpt <- multipanelfigure::fill_panel(figure_chromvar_dpt, plot_chromvar_dpt, row = 2:4)
85 | 
86 | ggplot2::ggsave(figure_chromvar_dpt,
87 |                 file = "figures_2/chromVar_dpt_heatmap.pdf",
88 |                 height = 230,
89 |                 width = 205,
90 |                 unit = "mm")
91 | 
92 | 


--------------------------------------------------------------------------------
/original-analysis/figure_src/Figure_7bc_8acd.R:
--------------------------------------------------------------------------------
 1 | load(file = "data/scData_filtered_2.Rda")
 2 | 
 3 | #
 4 | # Figure 7 b/c
 5 | #
 6 | 
 7 | plot_lsi_clustering <- scater::plotReducedDim(scData_filtered, use_dimred = "tsne", colour_by = ".cluster_5", shape_by = "batch", add_ticks = F) +
 8 |   ggplot2::ggtitle("LSI + HDBSCAN clustering") +
 9 |   ggplot2::guides(shape = FALSE) +
10 |   ggplot2::xlab("") + ggplot2::ylab("") +
11 |   ggplot2::theme(plot.title = ggplot2::element_text(face="bold", color="black", size=6), legend.title = ggplot2::element_blank()) +
12 |   ggplot2::scale_color_manual(values = c("1" = "#33a02c", "2" = "#6a3d9a", "3" = "#1f78b4", "4" = "#e31a1c", "5" = "#ff7f00")) +
13 |   ggplot2::scale_shape_manual(values = c(16, 16))
14 | 
15 | plot_lsi_batch <- scater::plotReducedDim(scData_filtered, use_dimred = "tsne", colour_by = "batch", add_ticks = F) +
16 |   ggplot2::ggtitle("LSI: Cell timepoint") +
17 |   ggplot2::guides(shape = FALSE) +
18 |   ggplot2::xlab("") + ggplot2::ylab("") +
19 |   ggplot2::theme(plot.title = ggplot2::element_text(face="bold", color="black", size=6), legend.title = ggplot2::element_blank())
20 | 
21 | figure_7bc <- multipanelfigure::multi_panel_figure(rows = 4, columns = 2, width = 205, height = 230)
22 | 
23 | figure_7bc <- multipanelfigure::fill_panel(figure_7bc, plot_lsi_clustering, column = 1:2, row = 1:2)
24 | figure_7bc <- multipanelfigure::fill_panel(figure_7bc, plot_lsi_batch, column = 1:2, row = 3:4)
25 | 
26 | #
27 | # Figure 8a
28 | #
29 | plot_chromvar_clustering <- scater::plotReducedDim(scData_filtered, use_dimred = "chromvar_cluster_specific", colour_by = ".cluster_5", shape_by = "batch", add_ticks = F) +
30 |   ggplot2::ggtitle("Clustering on chromVar sample correlation") +
31 |   ggplot2::guides(shape = FALSE, color = FALSE) +
32 |   ggplot2::theme(plot.title = ggplot2::element_text(face="bold", color="black", size=6), legend.title = ggplot2::element_blank()) +
33 |   ggplot2::scale_shape_manual(values = c(16, 16)) +
34 |   ggplot2::scale_color_manual(values = c("3" = "#1f78b4", "2" = "#6a3d9a", "1" = "#33a02c", "4" = "#e31a1c", "5" = "#ff7f00"))
35 | 
36 | plot_chromvar_batch <- scater::plotReducedDim(scData_filtered, use_dimred = "chromvar_cluster_specific", colour_by = "batch", shape_by = "batch", add_ticks = F) +
37 |   ggplot2::ggtitle("Clustering on chromVar sample correlation") +
38 |   ggplot2::guides(color = FALSE, shape = FALSE) +
39 |   ggplot2::scale_shape_manual(values = c(16, 16)) +
40 |   ggplot2::theme(plot.title = ggplot2::element_text(face="bold", color="black", size=6), legend.title = ggplot2::element_blank())
41 | 
42 | figure_8a <- multipanelfigure::multi_panel_figure(rows = 4, columns = 2, width = 205, height = 230)
43 | 
44 | figure_8a <- multipanelfigure::fill_panel(figure_8a, plot_chromvar_clustering, column = 1:2, row = 1:2)
45 | figure_8a <- multipanelfigure::fill_panel(figure_8a, plot_chromvar_batch, column = 1:2, row = 3:4)
46 | 
47 | #
48 | # Figure 8c/d
49 | #
50 | figure_dpt_tsne <- multipanelfigure::multi_panel_figure(columns = 3, rows = 3, width = 205, height = 230)
51 | 
52 | dpts <- c("dpt_cardiac", "dpt_endo")
53 | for(d in dpts) {
54 |   f <-scater::plotReducedDim(scData_filtered, use_dimred = "tsne", colour_by = d, shape_by = "batch", add_ticks = F) +
55 |     ggplot2::ggtitle(paste("LSI:", d)) +
56 |     viridis::scale_color_viridis() +
57 |     ggplot2::guides(shape = FALSE) +
58 |     ggplot2::scale_shape_manual(values = c(16, 16)) +
59 |     ggplot2::theme(plot.title = ggplot2::element_text(face="bold", color="black", size=6),
60 |                    legend.title = ggplot2::element_blank(),
61 |                    legend.key.size =  grid::unit(2, "mm"),
62 |                    legend.margin = ggplot2::margin(b = 2),
63 |                    legend.position = c(0.7,1),
64 |                    legend.justification = c(0,0),
65 |                    legend.background = ggplot2::element_rect(fill = "white"),
66 |                    legend.direction = "horizontal",
67 |                    legend.text = ggplot2::element_text(size = 4)) +
68 |     ggplot2::xlab("") + ggplot2::ylab("")
69 |   figure_dpt_tsne <- multipanelfigure::fill_panel(figure_dpt_tsne, f)
70 | }
71 | 
72 | ggplot2::ggsave(figure_dpt_tsne,
73 |                 file = "figures_2/LSI_dpt.pdf",
74 |                 height = 230,
75 |                 width = 205,
76 |                 unit = "mm")
77 | 
78 | #
79 | # Figure 8f/g
80 | #
81 | 


--------------------------------------------------------------------------------
/original-analysis/src/parameters.R:
--------------------------------------------------------------------------------
 1 | library(scales)
 2 | #
 3 | # General options
 4 | #
 5 | parameters <- list(
 6 |   "general" = list("project" = 'Cardiac progenitor single cells',
 7 |                    "path_rdata" = 'data',
 8 |                    "path_rextdata" = "ext_data",
 9 |                    "path_rfigures" = 'supplementary_figures',
10 |                    "path_supdata" = 'supplementary_data',
11 |                    "lower_detection_limit" = 10,
12 |                    "biomart_dataset" = "mmusculus_gene_ensembl"),
13 |   "filtering" = list("qc_columns" = c("total_features", "pct_dropout", "totalalignments", "exonic", "intronic", "intergenic", "secondaryalignments","exprs_feature_controls_housekeeping", "pct_counts_feature_controls_mito", "percent_genes"),
14 |                      "log_qc_columns" = c("total_features", "totalalignments", "exonic", "intronic", "intergenic","secondaryalignments", "exprs_feature_controls_housekeeping"),
15 |                      "cell_outlier_columns" = list(pct_counts_feature_controls_mito = c(1.5, "higher", F), total_features = c(2, "both", F), pct_dropout = c(2, "higher", F), exprs_feature_controls_housekeeping = c(2, "lower", F), percent_genes = c(1.5, "both", F)),
16 |                      "cell_outlier_maxfailed" = 1,
17 |                      "EpOverA_A" = 2000,
18 |                      "kOverA_k" = 10,
19 |                      "kOverA_A" = 10),
20 |   "normalisation" = list("cycle_markers" = "mouse_cycle_markers.rds"),
21 |   "heterogeneity" = list("het_zscore" = qnorm(0.99)),
22 |   "diffusionmaps" = list("de_fdr" = 0.01,
23 |                          "de_abs_logfc" = 2,
24 |                          "marker_fdr" = 0.01,
25 |                          "marker_auroc" = 0.8),
26 |   "correlation" = list(),
27 |   "colors" = list("nkx_cluster_palette" = c("#8B0000", "#CDAD00", "#00008B"),
28 |                   "nkx_timepoint_palette" = c("#FF003C", "#248F8D", "#987F69"),
29 |                   "nkx_run_palette" = c("#ECD078", "#C02942", "#53777A", "#542437"),
30 |                   "nkxko_cluster_palette" = c(alpha(c("#00008B", "#8B0000", "#CDAD00"), 1), "red"),
31 |                   "isl_cluster_palette" = c("#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E", "grey"),
32 |                   "isl_timepoint_palette" = c("#FF003C", "#248F8D", "#987F69"),
33 |                   "islko_cluster_palette" = c(alpha(c("#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E"), 0.8), "#FF0001", "grey"),
34 |                   "run_palette" = c("#ECD078", "#C02942", "#53777A", "#542437", "#0B486B", "#D95B43"))
35 | )
36 | 
37 | # #
38 | # # Options for chapter 02_filtering.Rmd
39 | # #
40 | # # [cellfilter]
41 | # qc_columns <- c("total_features", "pct_dropout", "totalalignments", "exonic", "intronic", "intergenic", "secondaryalignments","exprs_feature_controls_housekeeping", "pct_counts_feature_controls_mito", "percent_genes")
42 | # log_qc_columns <- c("total_features", "totalalignments", "exonic", "intronic", "intergenic","secondaryalignments", "exprs_feature_controls_housekeeping")
43 | # cell_outlier_columns <- list(pct_counts_feature_controls_mito = c(1.5, "higher", F), total_features = c(2, "both", F), pct_dropout = c(2, "higher", F), exprs_feature_controls_housekeeping = c(2, "lower", F), percent_genes = c(1.5, "both", F))
44 | # cell_outlier_maxfailed <- 1
45 | #
46 | # # [genefilter]
47 | # EpOverA_A <- 2000
48 | # kOverA_k <- 10
49 | # kOverA_A <- lower_detection_limit
50 | #
51 | # # [downsampling]
52 | #
53 | # #
54 | # # Options for chapter 03_normalisation.Rmd
55 | # #
56 | # # [cell cylce]
57 | # cycle_markers <- "mouse_cycle_markers.rds"
58 | #
59 | # # [PCA]
60 | # init_pca_variables = c("Platform", "pct_dropout", "log10_total_counts", "pct_counts_feature_controls_mito", "pct_counts_top_200_endogenous_features", "totalalignments", "total_features", "cellcycle", "Type", "Chip")
61 | #
62 | # #
63 | # # Options for chapter 04_heterogeneity.Rmd
64 | # #
65 | # het_zscore <- qnorm(0.99)
66 | #
67 | # #
68 | # # Options for chapter 05_clustering.Rmd
69 | # #
70 | # timpoint_col_palette <- "Set1"
71 | # cycle_col_palette <- "Set1"
72 | # cluster_col_palette <- "Set2"
73 | # cluster_na_col <- "#969696"
74 | #
75 | # # [differential expression]
76 | # logfc_threshold <- 3
77 | # fdr_threshold <- 0.01
78 | #
79 | # # [violin plots]
80 | # show_max_de_genes <- 20
81 | #
82 | # #
83 | # # Options for 07_diffusion.Rmd
84 | # #
85 | # proj_col <- "#bd0026"
86 | 
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/original-analysis/figure_src/Figure_8f.R:
--------------------------------------------------------------------------------
 1 | library(magrittr)
 2 | 
 3 | #
 4 | # Load scATAC-seq data
 5 | #
 6 | load("data/scData_filtered_2.Rda")
 7 | 
 8 | #
 9 | # Load scRNA-seq data
10 | #
11 | RNAseq <- read.table("data/Isl1_cardiac_branch_expression.txt", header = T, row.names = 1, sep = "\t")
12 | 
13 | #RNAseq <- RNAseq[c("Hoxa9", "Hoxc10", "Hoxa10", "Hoxd8", "Foxc1", "Foxo4", "Tcf4", "Foxp1", "Gata4", "Gata6", "Gata5"), ]
14 | RNAseq <- RNAseq[c("Hoxa9", "Hoxc10", "Hoxd8", "Tead1", "Tead2", "Tead4", "Hand1", "Tbx5", "Gata4", "Gata6"), ]
15 | split <- c(rep("first", 3), rep("second", 3), rep("third", 4))
16 | 
17 | #RNAseq_scaled <- t(scale(t(RNAseq), scale = F))
18 | RNAseq_smoothed <- t(zoo::rollapply(t(RNAseq), width = 15, FUN = mean, fill = "extend", by.column = T))
19 | 
20 | #
21 | # Get TF deviation data
22 | #
23 | load("data/dev_cluster_specific_2.Rda")
24 | 
25 | # Extract deviation scores
26 | tfs_limited <- c("Hoxa9_MA0594.1", "HOXC10_MA0905.1", "HOXA10_MA0899.1", "Hoxd8_MA0910.1",
27 |                  "FOXC1_MA0032.2", "FOXO4_MA0848.1", "TCF4_MA0830.1", "FOXP1_MA0481.2",
28 |                  "Gata4_MA0482.1", "GATA6_MA1104.1", "GATA5_MA0766.1")
29 | tfs_limited <- c("Hoxa9_MA0594.1", "HOXC10_MA0905.1", "Hoxd8_MA0910.1",
30 |                 "TEAD1_MA0090.2", "TEAD2_MA1121.1", "TEAD4_MA0809.1",
31 |                 "Hand1_Tcf3_MA0092.1", "TBX5_MA0807.1","Gata4_MA0482.1", "GATA6_MA1104.1")
32 | 
33 | var_data_limited <- chromVAR::deviationScores(dev)[tfs_limited, ]
34 | 
35 | #
36 | # Get cluster specific cells with their pseudotime
37 | #
38 | cells <- which(SummarizedExperiment::colData(scData_filtered)$.cluster_5 %in% c(1, 2))
39 | 
40 | var_data_limited <- var_data_limited[, cells]
41 | 
42 | # Order var_data by dpt
43 | var_data_limited <- var_data_limited[, order(SummarizedExperiment::colData(scData_filtered)$dpt_cardiac[cells])]
44 | 
45 | #
46 | # Smooth variability data
47 | #
48 | var_data_limited_smoothed <- t(zoo::rollapply(t(var_data_limited), width = 13, FUN = mean, fill = "extend", by.column = T))
49 | 
50 | #
51 | # Define color mapping
52 | #
53 | col_fun_rna <- circlize::colorRamp2(seq(from = -4, to = 4, length.out = 11), colors = rev(RColorBrewer::brewer.pal(11, "RdBu")))
54 | col_fun_atac <- circlize::colorRamp2(seq(from = -1, to = 5, length.out = 9), colors = RColorBrewer::brewer.pal(9, "YlGnBu"))
55 | 
56 | #
57 | # Draw Heatmap
58 | #
59 | h_left <- ComplexHeatmap::Heatmap(RNAseq_smoothed,
60 |                                   col = col_fun_rna,
61 |                                   split = split,
62 |                                   cluster_columns = F,
63 |                                   cluster_rows = T,
64 |                                   show_row_names = T,
65 |                                   row_names_side = "left",
66 |                                   show_row_dend = F,
67 |                                   name = "Expression",
68 |                                   column_title_side = "top",
69 |                                   column_title = "RNA expression of TF",
70 |                                   width = grid::unit(90, "mm"),
71 |                                   heatmap_legend_param = list(legend_direction = "horizontal", at = c(-4, 4), labels = c("low", "high")))
72 | 
73 | h_right <- ComplexHeatmap::Heatmap(var_data_limited_smoothed,
74 |                                    col = col_fun_atac,
75 |                                    split = split,
76 |                                    cluster_columns = F,
77 |                                    cluster_rows = F,
78 |                                    show_column_names = F,
79 |                                    show_row_names = F,
80 |                                    show_row_dend = F,
81 |                                    name = "Motif accessibility",
82 |                                    column_title_side = "top",
83 |                                    column_title = "Motif accessibility of TF",
84 |                                    width = grid::unit(90, "mm"),
85 |                                    heatmap_legend_param = list(legend_direction = "horizontal", at = c(-1, 5), labels = c("low", "high")))
86 | 
87 | plot_cardiac_RNA_ATAC <- grid::grid.grabExpr(ComplexHeatmap::draw(ComplexHeatmap::add_heatmap(h_left, h_right), column_title_gp = grid::gpar(fontface = "bold"), heatmap_legend_side = "bottom"))
88 | 
89 | figure_cardiac_RNA_ATAC <- multipanelfigure::multi_panel_figure(width = 230, height = 205, columns = 1, rows = 1)
90 | figure_cardiac_RNA_ATAC <- multipanelfigure::fill_panel(figure_cardiac_RNA_ATAC, plot_cardiac_RNA_ATAC)
91 | 
92 | ggplot2::ggsave(figure_cardiac_RNA_ATAC,
93 |        file = "figures_2/Cardiac_RNA_ATAC.pdf",
94 |        height = 205,
95 |        width = 230,
96 |        unit = "mm")
97 | 


--------------------------------------------------------------------------------
/original-analysis/figure_src/Figure_8g.R:
--------------------------------------------------------------------------------
  1 | library(magrittr)
  2 | 
  3 | #
  4 | # Load scATAC-seq data
  5 | #
  6 | load("data/scData_filtered_2.Rda")
  7 | 
  8 | #
  9 | # Load scRNA-seq data
 10 | #
 11 | RNAseq <- read.table("data/Isl1_endo_branch_expression.txt", header = T, row.names = 1, sep = "\t")
 12 | 
 13 | RNAseq <- RNAseq[c("Sox13", "Sox6", "Sox9", "Tead1", "Gata6", "Zeb1", "Fosl2", "Junb", "Gata2", "Tal1"), ]
 14 | RNAseq <- RNAseq[c("Gata4", "Gata5", "Sox6", "Sox9", "Sox13", "Tead1", "Tead2", "Tead4", "Gata6", "Gata2"), ]
 15 | split <- c(rep("first", 6), rep("second", 4))
 16 | 
 17 | #RNAseq_scaled <- t(scale(t(RNAseq), scale = F))
 18 | RNAseq_smoothed <- t(zoo::rollapply(t(RNAseq), width = 15, FUN = mean, fill = "extend", by.column = T))
 19 | 
 20 | #
 21 | # Get TF deviation data
 22 | #
 23 | load("data/dev_cluster_specific_2.Rda")
 24 | 
 25 | # Extract deviation scores
 26 | tfs_limited <- c("SOX13_MA1120.1", "Sox6_MA0515.1", "SOX9_MA0077.1", "TEAD1_MA0090.2",
 27 |                  "GATA6_MA1104.1",
 28 |                  "ZEB1_MA0103.3", "FOSL2_JUNB_MA1138.1", "FOSL2_JUNB_MA1138.1", "GATA2_MA0036.3", "GATA1_TAL1_MA0140.2")
 29 | tfs_limited <- c("Gata4_MA0482.1", "GATA5_MA0766.1", "Sox6_MA0515.1", "SOX9_MA0077.1", "SOX13_MA1120.1", "TEAD1_MA0090.2", "TEAD2_MA1121.1", "TEAD4_MA0809.1", "GATA6_MA1104.1", "GATA2_MA0036.3")
 30 | 
 31 | var_data_limited <- chromVAR::deviationScores(dev)[tfs_limited, ]
 32 | 
 33 | #
 34 | # Get cluster specific cells ordered by their pseudotime
 35 | #
 36 | heatmaps <- list("reference" = list(branch = "cardiac", cluster = 2),
 37 |                  "intermediate" = list(branch = "cl3", cluster = 3),
 38 |                  "endo" = list(branch = "endo", cluster = 5))
 39 | 
 40 | var_data_list <- lapply(heatmaps, function(x) {
 41 |   i <- which(SummarizedExperiment::colData(scData_filtered)$.cluster_5 == x$cluster)
 42 |   data <- var_data_limited[, i]
 43 |   if(!is.na(x$branch)) {
 44 |     data <- data[, order(SummarizedExperiment::colData(scData_filtered)[i, paste0("dpt_", x$branch)])]
 45 |   }
 46 |   data
 47 | })
 48 | var_data_limited <- do.call("cbind", var_data_list)
 49 | 
 50 | #
 51 | # Smooth variability data
 52 | #
 53 | var_data_limited_smoothed <- t(zoo::rollapply(t(var_data_limited), width = 13, FUN = mean, fill = "extend", by.column = T))
 54 | 
 55 | #
 56 | # Define color mapping
 57 | #
 58 | col_fun_rna <- circlize::colorRamp2(seq(from = -4, to = 4, length.out = 11), colors = rev(RColorBrewer::brewer.pal(11, "RdBu")))
 59 | col_fun_atac <- circlize::colorRamp2(seq(from = -1, to = 5, length.out = 9), colors = RColorBrewer::brewer.pal(9, "YlGnBu"))
 60 | 
 61 | #
 62 | # Draw Heatmap
 63 | #
 64 | h_left <- ComplexHeatmap::Heatmap(RNAseq_smoothed,
 65 |                                   col = col_fun_rna,
 66 |                                   split = split,
 67 |                                   cluster_columns = F,
 68 |                                   cluster_rows = F,
 69 |                                   show_row_names = T,
 70 |                                   row_names_side = "left",
 71 |                                   show_row_dend = F,
 72 |                                   name = "Expression",
 73 |                                   column_title_side = "top",
 74 |                                   column_title = "RNA expression of TF",
 75 |                                   width = grid::unit(90, "mm"),
 76 |                                   heatmap_legend_param = list(legend_direction = "horizontal", at = c(-4, 4), labels = c("low", "high")))
 77 | 
 78 | h_right <- ComplexHeatmap::Heatmap(var_data_limited_smoothed,
 79 |                                    col = col_fun_atac,
 80 |                                    split = split,
 81 |                                    cluster_columns = F,
 82 |                                    cluster_rows = F,
 83 |                                    show_column_names = F,
 84 |                                    show_row_names = F,
 85 |                                    show_row_dend = F,
 86 |                                    name = "Motif accessibility",
 87 |                                    column_title_side = "top",
 88 |                                    column_title = "Motif accessibility of TF",
 89 |                                    width = grid::unit(90, "mm"),
 90 |                                    heatmap_legend_param = list(legend_direction = "horizontal", at = c(-1, 5), labels = c("low", "high")))
 91 | 
 92 | plot_endo_RNA_ATAC <- grid::grid.grabExpr(ComplexHeatmap::draw(ComplexHeatmap::add_heatmap(h_left, h_right), column_title_gp = grid::gpar(fontface = "bold"), heatmap_legend_side = "bottom"))
 93 | 
 94 | figure_endo_RNA_ATAC <- multipanelfigure::multi_panel_figure(width = 230, height = 205, columns = 1, rows = 1)
 95 | figure_endo_RNA_ATAC <- multipanelfigure::fill_panel(figure_endo_RNA_ATAC, plot_endo_RNA_ATAC)
 96 | 
 97 | ggplot2::ggsave(figure_endo_RNA_ATAC,
 98 |                 file = "figures_2/Endo_RNA_ATAC.pdf",
 99 |                 height = 205,
100 |                 width = 230,
101 |                 unit = "mm")
102 | 


--------------------------------------------------------------------------------
/original-analysis/src/01_normalisation.R:
--------------------------------------------------------------------------------
  1 | library(scran)
  2 | library(scater)
  3 | library(SCnorm)
  4 | library(multipanelfigure)
  5 | library(gridExtra)
  6 | library(limma)
  7 | 
  8 | #
  9 | # Load data
 10 | #
 11 | source("src/parameters.R")
 12 | load(file.path(parameters$general$path_supdata, "scd.RData"))
 13 | 
 14 | #
 15 | # Assign cell cycle stage
 16 | #
 17 | pairs <- readRDS(system.file("exdata", parameters$normalisation$cycle_markers, package="scran"))
 18 | cellcycle <- cyclone(scd, pairs, gene.names = rownames(scd), assay="exprs", verbose=F)
 19 | 
 20 | pData(scd)$cellcycle <- factor(cellcycle$phases)
 21 | plot(cellcycle$score$G1, cellcycle$score$G2M, pch=16)
 22 | 
 23 | cc_tab <- data.frame(count = table(cellcycle$phases))
 24 | colnames(cc_tab) <- c("Cell cycle stage", "Cells")
 25 | 
 26 | tt1 <- ttheme_minimal(core=list(fg_params=list(hjust=1, x=0.9)),
 27 |                       rowhead=list(fg_params=list(hjust=1, x=0.9)),
 28 |                       colhead = list(fg_params=list(hjust=1, x=0.9)))
 29 | 
 30 | cell_cycle_table <- tableGrob(cc_tab, theme=tt1, rows = NULL)
 31 | 
 32 | #
 33 | # Perfom lineage-specific TMM normalization with batch correction
 34 | #
 35 | # Prepare lineages
 36 | lineages <- list(isl1 = pData(scd)$Background == "isl1" | pData(scd)$Background == "isl1ko" | pData(scd)$Background == "nkx2-5oe",
 37 |                  nkx = pData(scd)$Background == "nkx2-5")
 38 | 
 39 | sf.normalized <- lapply(lineages, function(lineage_cells) {
 40 |   lineage <- scd[, lineage_cells]
 41 | 
 42 |   lineage <- computeSumFactors(lineage, clusters = pData(lineage)$Timepoint)
 43 |   lineage <- normalise(lineage)
 44 |   get_exprs(lineage, "norm_exprs")
 45 | })
 46 | 
 47 | tmm.normalized <- lapply(lineages, function(lineage_cells) {
 48 |   lineage <- scd[, lineage_cells]
 49 | 
 50 |   # Create design matrix
 51 |   #design <- matrix(c(rep(1, ncol(lineage)), as.numeric(pData(lineage)$Timepoint)), nrow = ncol(lineage))
 52 | 
 53 |   # Remove batch effect
 54 |   #corrected <- removeBatchEffect(get_exprs(lineage, "exprs"), design = design, block = pData(lineage)$Platform)
 55 | 
 56 |   # Set corrected expression
 57 |   #set_exprs(lineage, "exprs") <- corrected
 58 | 
 59 |   # Normalize lineages
 60 |   lineage <- normalizeExprs(lineage, method="TMM", return_norm_as_exprs = FALSE)
 61 | 
 62 |   # Create design matrix
 63 |   #design <- matrix(c(rep(1, ncol(lineage)), as.numeric(pData(lineage)$Timepoint)), nrow = ncol(lineage))
 64 | 
 65 |   # Remove batch effect
 66 |   #tmm <- removeBatchEffect(get_exprs(lineage, "norm_exprs"), design = design, block = pData(lineage)$Platform)
 67 | 
 68 |   get_exprs(lineage, "norm_exprs")
 69 | })
 70 | 
 71 | # Establish subsets
 72 | isl1_subset <- scd[, lineages$isl1]
 73 | set_exprs(isl1_subset, "norm_exprs_lineage") <- tmm.normalized$isl1
 74 | set_exprs(isl1_subset, "norm_exprs_sf") <- sf.normalized$isl1
 75 | 
 76 | nkx_subset <- scd[, lineages$nkx]
 77 | set_exprs(nkx_subset, "norm_exprs_lineage") <- tmm.normalized$nkx
 78 | set_exprs(nkx_subset, "norm_exprs_sf") <- sf.normalized$nkx
 79 | 
 80 | #
 81 | # Save created objects for further analysis
 82 | #
 83 | save(isl1_subset, file = file.path(parameters$general$path_supdata, "Isl1_subset.Rdata"))
 84 | save(nkx_subset, file = file.path(parameters$general$path_supdata, "Nkx2-5_subset.Rdata"))
 85 | 
 86 | #
 87 | # Prepare data platform subsets
 88 | #
 89 | subsets <- list(c1 = pData(scd)$Platform == "C1",
 90 |                 wg = pData(scd)$Platform == "WG")
 91 | 
 92 | #
 93 | # Perfom lineage- and platform-specific TMM normalisation
 94 | #
 95 | 
 96 | sf.normalized <- lapply(subsets, function(platform) {
 97 |   isl_lineage <- platform & (pData(scd)$Background == "isl1" | pData(scd)$Background == "isl1ko" | pData(scd)$Background == "nkx2-5oe")
 98 |   nkx_lineage <- platform & (pData(scd)$Background == "nkx2-5")
 99 | 
100 |   # Normalize lineages
101 |   isl_subset <- scd[, isl_lineage]
102 |   isl_subset <- computeSumFactors(isl_subset)#, clusters = pData(isl_subset)$Timepoint)
103 |   isl_subset <- normalise(isl_subset)
104 | 
105 |   nkx_subset <- scd[, nkx_lineage]
106 |   nkx_subset <- computeSumFactors(nkx_subset)#, clusters = pData(nkx_subset)$Timepoint)
107 |   nkx_subset <- normalise(nkx_subset)
108 | 
109 |   # Bind results and return
110 |   sf <- cbind(get_exprs(isl_subset, "norm_exprs"), get_exprs(nkx_subset, "norm_exprs"))
111 |   m <- match(colnames(scd), colnames(sf))
112 |   sf[, na.omit(m)]
113 | })
114 | 
115 | c1_subset <- scd[, subsets$c1]
116 | set_exprs(c1_subset, "norm_exprs_sf") <- sf.normalized$c1
117 | 
118 | wg_subset <- scd[, subsets$wg]
119 | set_exprs(wg_subset, "norm_exprs_sf") <- sf.normalized$wg
120 | 
121 | tmm.normalized <- lapply(subsets, function(platform) {
122 |   # Establish lineages
123 |   isl_lineage <- platform & (pData(scd)$Background == "isl1" | pData(scd)$Background == "isl1ko" | pData(scd)$Background == "nkx2-5oe")
124 |   nkx_lineage <- platform & (pData(scd)$Background == "nkx2-5")
125 | 
126 |   # Normalize lineages
127 |   isl_subset <- scd[, isl_lineage]
128 |   isl_subset <- normalizeExprs(isl_subset, method="TMM", return_norm_as_exprs = FALSE)
129 | 
130 |   nkx_subset <- scd[, nkx_lineage]
131 |   nkx_subset <- normalizeExprs(nkx_subset, method="TMM", return_norm_as_exprs = FALSE)
132 | 
133 |   # Bind results and return
134 |   tmm <- cbind(get_exprs(isl_subset, "norm_exprs"), get_exprs(nkx_subset, "norm_exprs"))
135 |   m <- match(colnames(scd), colnames(tmm))
136 |   tmm[, na.omit(m)]
137 | })
138 | 
139 | # Establish subsets
140 | #c1_subset <- scd[, subsets$c1]
141 | set_exprs(c1_subset, "norm_exprs_lineage") <- tmm.normalized$c1
142 | 
143 | #wg_subset <- scd[, subsets$wg]
144 | set_exprs(wg_subset, "norm_exprs_lineage") <- tmm.normalized$wg
145 | 
146 | #
147 | # Save created objects for further analysis
148 | #
149 | save(c1_subset, file = file.path(parameters$general$path_supdata, "c1_subset.Rdata"))
150 | save(wg_subset, file = file.path(parameters$general$path_supdata, "wg_subset.Rdata"))
151 | 
152 | 


--------------------------------------------------------------------------------
/original-analysis/figure_src/Figure_2ef.R:
--------------------------------------------------------------------------------
  1 | library(scater)
  2 | library(tidyr)
  3 | library(ggplot2)
  4 | 
  5 | #
  6 | # Load data
  7 | #
  8 | source("src/parameters.R")
  9 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata"))
 10 | 
 11 | #
 12 | # Setup
 13 | #
 14 | # Genes for Isl1 dpt lineplots
 15 | isl1_genes <- factor(c("Isl1", "Mef2c", "Nkx2-5", "Tgfb1li1", "Ankrd1", "Myocd", "Smyd1", "Gata2", "Hand1", "Hhex", "Hoxa7", "Hoxa9", "Hoxb6", "Hoxc8", "Msx1", "Pitx1", "Nkx1-2", "Snai1", "Tbx3", "Tbx4", "Ets2", "Sall4"),
 16 |                      levels = c("Isl1", "Mef2c", "Nkx2-5", "Tgfb1li1", "Ankrd1", "Myocd", "Smyd1", "Gata2", "Hand1", "Hhex", "Hoxa7", "Hoxa9", "Hoxb6", "Hoxc8", "Msx1", "Pitx1", "Nkx1-2", "Snai1", "Tbx3", "Tbx4", "Ets2", "Sall4"),
 17 |                      ordered = T)
 18 | 
 19 | # Genes for Nkx2-5 dpt lineplots
 20 | nkx_genes <- factor(c("Nkx2-5", "Ankrd1", "Cdkn2d", "Hopx", "Mef2c", "Myocd", "Smyd1", "Tgfb1li1", "Tbx20", "Dnmt3b", "Gata2", "Gata3", "Hand1", "Msx1"),
 21 |                     levels = c("Nkx2-5", "Ankrd1", "Cdkn2d", "Hopx", "Mef2c", "Myocd", "Smyd1", "Tgfb1li1", "Tbx20", "Dnmt3b", "Gata2", "Gata3", "Hand1", "Msx1"),
 22 |                     ordered = T)
 23 | 
 24 | theme <- theme(panel.grid.major = element_blank(),
 25 |                panel.background = element_blank(),
 26 |                strip.background = element_blank(),
 27 |                strip.text = element_text(face = "bold", size = 5, margin = margin(0.5, 1, 0, 1, unit = "mm")),
 28 |                panel.border = element_rect(fill = NA, color = "black"),
 29 |                panel.spacing = grid::unit(0.5,"mm"),
 30 |                axis.text = element_text(size = 4),
 31 |                axis.title = element_text(size = 5),
 32 |                axis.title.x = element_text(margin = margin(0,0,0,0)),
 33 |                axis.title.y = element_text(margin = margin(0,0,0,0))
 34 |                )
 35 | 
 36 | #
 37 | # Generate Isl1 dpt line plots
 38 | #
 39 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1")
 40 | 
 41 | m <- match(isl1_genes, fData(c1_subset)$symbol)
 42 | isl1_expression <- as.data.frame(get_exprs(c1_subset[na.omit(m), isl1_cells], "norm_exprs_sf"))
 43 | isl1_expression$symbol <- isl1_genes[!is.na(m)]
 44 | 
 45 | # Tidyr gather into tidy data
 46 | isl1_expression_df <- gather(isl1_expression, key = "cell", value = "expression", -symbol)
 47 | 
 48 | # Add cell metadata
 49 | m <- match(isl1_expression_df$cell, colnames(c1_subset[, isl1_cells]))
 50 | isl1_expression_df$dpt <- pData(c1_subset[, isl1_cells])$dpt[m]
 51 | isl1_expression_df$cluster <- factor(pData(c1_subset[, isl1_cells])$cluster[m])
 52 | 
 53 | # Exclude endothelial branch
 54 | isl1_expression_df <- subset(isl1_expression_df, isl1_expression_df$cluster != 1)
 55 | 
 56 | isl1_dpt_lineplot <- ggplot(isl1_expression_df, aes(x = dpt, y = expression)) +
 57 |   geom_point(data = subset(isl1_expression_df, !is.na(isl1_expression_df$cluster)), aes(color = cluster), size = 0.3) +
 58 |   geom_point(data = subset(isl1_expression_df, is.na(isl1_expression_df$cluster)), color = "grey", size = 0.3) +
 59 |   stat_smooth(span = 0.9, method = "loess", n = 30, color = "grey", se = FALSE) +
 60 |   scale_color_manual(values = parameters$colors$isl_cluster_palette[2:5]) +
 61 |   facet_wrap(~ symbol, ncol = 6) +
 62 |   #ylab("Normalized expression") +
 63 |   #xlab("Pseudotime") +
 64 |   ylab("") +
 65 |   xlab("") +
 66 |   guides(color = FALSE) +
 67 |   theme
 68 | 
 69 | #
 70 | # Generate Nkx2-5 dpt line plots
 71 | #
 72 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1")
 73 | 
 74 | m <- match(nkx_genes, fData(c1_subset)$symbol)
 75 | nkx_expression <- as.data.frame(get_exprs(c1_subset[na.omit(m), nkx_cells], "norm_exprs_sf"))
 76 | nkx_expression$symbol <- nkx_genes[!is.na(m)]
 77 | 
 78 | # Tidyr gather into tidy data
 79 | nkx_expression_df <- gather(nkx_expression, key = "cell", value = "expression", -symbol)
 80 | 
 81 | # Add cell metadata
 82 | m <- match(nkx_expression_df$cell, colnames(c1_subset[, nkx_cells]))
 83 | nkx_expression_df$dpt <- pData(c1_subset[, nkx_cells])$dpt[m]
 84 | nkx_expression_df$cluster <- factor(pData(c1_subset[, nkx_cells])$cluster[m])
 85 | 
 86 | nkx_dpt_lineplot <- ggplot(nkx_expression_df, aes(x = dpt, y = expression)) +
 87 |   geom_point(data = subset(nkx_expression_df, !is.na(nkx_expression_df$cluster)), aes(color = cluster), size = 0.3) +
 88 |   geom_point(data = subset(nkx_expression_df, is.na(nkx_expression_df$cluster)), color = "grey", size = 0.3) +
 89 |   stat_smooth(span = 0.9, method = "loess", n = 30, color = "grey", se = FALSE) +
 90 |   scale_color_manual(values = parameters$colors$nkx_cluster_palette) +
 91 |   facet_wrap(~ symbol, nrow = 3) +
 92 |   #ylab("Normalized expression") +
 93 |   #xlab("Pseudotime") +
 94 |   ylab("") +
 95 |   xlab("") +
 96 |   guides(color = FALSE) +
 97 |   theme
 98 | 
 99 | #
100 | # Save plots (uncomment to activate)
101 | #
102 | figure <- multipanelfigure::multi_panel_figure(width = 205, height = 230, columns = 2, rows = 2)
103 | figure <- multipanelfigure::fill_panel(figure, nkx_dpt_lineplot, column = 1, row = 2, label = "e")
104 | figure <- multipanelfigure::fill_panel(figure, isl1_dpt_lineplot, column = 2, row = 2, label = "f")
105 | 
106 | ggsave(plot = figure,
107 |        filename = file.path(parameters$general$path_rfigures, "Figure_2_ef.pdf"),
108 |        width = 205,
109 |        height = 230,
110 |        units = "mm",
111 |        dpi = 600)
112 | # ggsave(plot = isl1_dpt_lineplot,
113 | #        filename = "Figure_2_f.pdf",
114 | #        width = 205,
115 | #        height = 230,
116 | #        units = "mm",
117 | #        dpi = 600)
118 | 
119 | #
120 | # Save raw tables for Source Data
121 | #
122 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_Figure_2e.txt"),
123 |             x = isl1_expression_df,
124 |             col.names = T,
125 |             row.names = F,
126 |             sep = "\t",
127 |             quote = F)
128 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_Figure_2f.txt"),
129 |             x = nkx_expression_df,
130 |             col.names = T,
131 |             row.names = F,
132 |             sep = "\t",
133 |             quote = F)
134 | 


--------------------------------------------------------------------------------
/original-analysis/README.md:
--------------------------------------------------------------------------------
 1 | # Single cell RNA-seq and ATAC seq analysis of cardiac progenitor cell transition states and lineage settlement
 2 | 
 3 | ### R session information
 4 | 
 5 | For requirements, please see the file `RNA-seq_sessionInfo.txt` and `ATAC-seq_sessionInfo.txt` for specific R packages and their versions.
 6 | 
 7 | ### Data download
 8 | 
 9 | Data for the original analysis is approximately 500 MB in size and can be downloaded from a bucket of an S3 object store using [cloudyr/aws.s3](https://github.com/cloudyr/aws.s3):
10 | 
11 | From within R, install [cloudyr/aws.s3](https://github.com/cloudyr/aws.s3) as follows and set up the connection to our data server:
12 | 
13 | ```
14 | remotes::install_github("cloudyr/aws.s3")
15 | Sys.setenv("AWS_S3_ENDPOINT" = "mpi-bn.mpg.de",
16 |            "AWS_DEFAULT_REGION" = "s3")
17 | 
18 | if(!dir.exists("original_analysis/data")) {
19 |     dir.create("original_analysis/data", recursive = TRUE)
20 | }
21 | 
22 | if(!dir.exists("original_analysis/supplementary_data")) {
23 |     dir.create("original_analysis/supplementary_data", recursive = TRUE)
24 | }
25 | ```
26 | 
27 | Then, use the following code snipped to download all files:
28 | 
29 | ```
30 | data_files <- c("Isl1_cardiac_branch_expression.txt", "Isl1_endo_branch_expression.txt", "dev_cluster_specific_2.Rda", "dev_clustered_cluster_specific_2.Rda", "scData_filtered_2.Rda", "variability_cluster_specific_2.Rda", "variability_clustered_cluster_specific_2.Rda")
31 | 
32 | for(file in data_files) {
33 |     aws.s3::save_object(paste0("original-data/", file),
34 |                         "data-cpc-2018",
35 |                         paste0("original_analysis/data/", file))
36 | }
37 | 
38 | supplementary_data_files <- c("Isl1-DM.Rdata", "Isl1-diffExprs.Rdata", "Isl1-markers.Rdata", "Nkx2-5-DM.Rdata", "Nkx2-5-diffExprs.Rdata", "Nkx2-5-markers.Rdata", "c1_subset.Rdata", "wu.Rdata")
39 | 
40 | for(file in supplementary_data_files) {
41 |     aws.s3::save_object(paste0("original-data/", file),
42 |                         "data-cpc-2018",
43 |                         paste0("original_analysis/supplementary_data/", file))
44 | }
45 | ```
46 | 
47 | or use `aws.s3::save_object()` with the following parameters to download individual files:
48 | 
49 | Object | Target | Call
50 | ------ | ------ | ----
51 | `Isl1_cardiac_branch_expression.txt` | `data` | `aws.s3::save_object("original-data/Isl1_cardiac_branch_expression.txt", "data-cpc-2018", "original_analysis/data/Isl1_cardiac_branch_expression.txt")`
52 | `Isl1_endo_branch_expression.txt` | `data` | `aws.s3::save_object("original-data/Isl1_endo_branch_expression.txt", "data-cpc-2018", "original_analysis/data/Isl1_endo_branch_expression.txt")`
53 | `dev_cluster_specific_2.Rda` | `data` | `aws.s3::save_object("original-data/dev_cluster_specific_2.Rda", "data-cpc-2018", "original_analysis/data/dev_cluster_specific_2.Rda")`
54 | `dev_clustered_cluster_specific_2.Rda` | `data` | `aws.s3::save_object("original-data/dev_clustered_cluster_specific_2.Rda", "data-cpc-2018", "original_analysis/data/dev_clustered_cluster_specific_2.Rda")`
55 | `scData_filtered_2.Rda` | `data` | `aws.s3::save_object("original-data/scData_filtered_2.Rda", "data-cpc-2018", "original_analysis/data/scData_filtered_2.Rda")`
56 | `variability_cluster_specific_2.Rda` | `data` | `aws.s3::save_object("original-data/variability_cluster_specific_2.Rda", "data-cpc-2018", "original_analysis/data/variability_cluster_specific_2.Rda")`
57 | `variability_clustered_cluster_specific_2.Rda` | `data` | `aws.s3::save_object("original-data/variability_clustered_cluster_specific_2.Rda", "data-cpc-2018", "original_analysis/data/variability_clustered_cluster_specific_2.Rda")`
58 | `Isl1-DM.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/Isl1-DM.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/Isl1-DM.Rdata")`
59 | `Isl1-diffExprs.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/Isl1-diffExprs.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/variability_clustered_cluster_specific_2.Rda")`
60 | `Isl1-markers.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/Isl1-markers.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/Isl1-markers.Rdata")`
61 | `Isl1_cardiac_branch_expression.txt` | `supplementary_data` | `aws.s3::save_object("original-data/Isl1_cardiac_branch_expression.txt", "data-cpc-2018", "original_analysis/supplementary_data/Isl1_cardiac_branch_expression.txt")`
62 | `Isl1_endo_branch_expression.txt` | `supplementary_data` | `aws.s3::save_object("original-data/Isl1_endo_branch_expression.txt", "data-cpc-2018", "original_analysis/supplementary_data/Isl1_endo_branch_expression.txt")`
63 | `Nkx2-5-DM.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/Nkx2-5-DM.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/Nkx2-5-DM.Rdata")`
64 | `Nkx2-5-diffExprs.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/Nkx2-5-diffExprs.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/Nkx2-5-diffExprs.Rdata")`
65 | `Nkx2-5-markers.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/Nkx2-5-markers.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/Nkx2-5-markers.Rdata")`
66 | `c1_subset.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/c1_subset.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/c1_subset.Rdata")`
67 | `wu.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/wu.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/wu.Rdata")`
68 | 
69 | Finally, set the working directory to the path `original-analysis` and continue below:
70 | 
71 | ```
72 | setwd("original-analysis")
73 | ```
74 | 
75 | ### RNA-seq analysis
76 | 
77 | #### Preprocessing
78 | 
79 | R code to preprocess single-cell RNA-seq data from scratch can be found in the `src` directory. Steps (e.g. *quality control*, *filtering*, *normalization*, etc.) should be run in the order indicated by the leading counter in the filename (e.g. `00_filtering.R` before `01_normalisation.R`)
80 | 
81 | #### Figures
82 | 
83 | We additionally provide scripts to reproduce most of the main figures. The R code can be found in the `figure_src` directory and is split into individual files. Figures can be recreated without the need of *preprocessing* the data.
84 | 
85 | ### ATAC-seq analysis
86 | 
87 | #### Figures
88 | 
89 | We provide scripts to reproduce figures from the ATAC-seq data analysis of the manuscript. The R code can be found in the `figure_src` directory.
90 | 
91 | ### Citation
92 | 
93 | Please refer to the following research article when using data from this repository:
94 | 
95 | Guangshuai Jia, Jens Preussner, Xi Chen, Stefan Guenther, Xuejun Yuan, Michail Yekelchyk, Carsten Kuenne, Mario Looso, Yonggang Zhou, Sarah Teichmann and Thomas Braun. Single cell RNA-seq and ATAC seq analysis of cardiac progenitor cell transition states and lineage settlement. **Nature Communications** 9, 4877 (*2018*), doi: [10.1038/s41467-018-07307-6](https://doi.org/10.1038/s41467-018-07307-6).
96 | 


--------------------------------------------------------------------------------
/original-analysis/src/04_clustering.R:
--------------------------------------------------------------------------------
  1 | library(scater)
  2 | library(singlecellutils)
  3 | library(Rtsne)
  4 | library(multipanelfigure)
  5 | library(dplyr)
  6 | 
  7 | #
  8 | # Load data
  9 | #
 10 | source("src/parameters.R")
 11 | source("src/DE.R")
 12 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata"))
 13 | 
 14 | #
 15 | # Load heterogeneous genes
 16 | #
 17 | # Add variable genes from C1 analysis
 18 | het_early <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-nkx.early.txt"))
 19 | het_mid <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-nkx.mid.txt"))
 20 | het_late <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-nkx.late.txt"))
 21 | het_pool <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-nkx.txt"))
 22 | 
 23 | nkx_het <- unique(c(het_early, het_mid, het_late, het_pool))
 24 | 
 25 | het_early <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.early.txt"))
 26 | het_mid <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.mid.txt"))
 27 | het_late <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.late.txt"))
 28 | het_pool <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.txt"))
 29 | 
 30 | isl1_het <- unique(c(het_early, het_mid, het_late, het_pool))
 31 | 
 32 | #
 33 | # Perform SOM clustering
 34 | #
 35 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1")
 36 | nkx_expression <- get_exprs(c1_subset[, nkx_cells], "norm_exprs_sf")
 37 | nkx_expression_scaled <- t(scale(t(nkx_expression[nkx_het, ])))
 38 | 
 39 | nkx_som <- calcSOM(nkx_expression_scaled, train = 1:nrow(nkx_expression_scaled), num_epochs = 2000, seed = 1004)
 40 | nkx_som$codes <- nkx_som$codes[[1]]
 41 | 
 42 | save(nkx_som, file = file.path(parameters$general$path_supdata, "Nkx2-5_SOM.Rdata"))
 43 | 
 44 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1")
 45 | isl1_cells <- which((pData(c1_subset)$Background == "isl1" | pData(c1_subset)$Background == "isl1ko") & pData(c1_subset)$Platform == "C1")
 46 | isl1_expression <- get_exprs(c1_subset[, isl1_cells], "norm_exprs_sf")
 47 | isl1_expression_scaled <- t(scale(t(isl1_expression[isl1_het, ])))
 48 | 
 49 | isl1_som <- calcSOM(isl1_expression_scaled, train = 1:nrow(isl1_expression_scaled), num_epochs = 2000, seed = 1004)
 50 | isl1_som$codes <- isl1_som$codes[[1]]
 51 | 
 52 | save(isl1_som, file = file.path(parameters$general$path_supdata, "Isl1_SOM.Rdata"))
 53 | 
 54 | hdbscan <- function(data, min_samples = 7L, min_cluster_size = 9L, outlier = 0, seed = NULL) {
 55 |   if (!is.null(seed)) set.seed(seed)
 56 |   h <- reticulate::import("hdbscan")
 57 |   cl <- h$HDBSCAN(min_samples = min_samples, min_cluster_size = min_cluster_size)
 58 |   labels <- cl$fit_predict(data) + 1
 59 |   labels[labels == 0] <- outlier
 60 |   return(factor(labels))
 61 | }
 62 | 
 63 | #
 64 | # Nkx2-5 dimension reduction and clustering
 65 | #
 66 | seed <- 2596
 67 | set.seed(seed)
 68 | nkx_tsne <- Rtsne(t(nkx_som$codes), perplexity = 15, theta = 0.05, max_iter = 2000)
 69 | 
 70 | nkx_cl <- hdbscan(nkx_tsne$Y, min.samples = 7, min.cluster.size = 9)
 71 | nkx_cl$.cluster[nkx_cl$.cluster == 0] <- NA
 72 | nkx_cl$.cluster <- factor(nkx_cl$.cluster)
 73 | 
 74 | colorAMap(nkx_tsne$Y, shape_by = factor(pData(c1_subset[, nkx_cells])$Platform), colour_by = nkx_cl$.cluster, pch=16)
 75 | colorAMap(nkx_tsne$Y, shape_by = factor(pData(c1_subset[, nkx_cells])$Platform), colour_by = pData(c1_subset[, nkx_cells])$Timepoint, pch=16)
 76 | colorAMap(nkx_tsne$Y, colour_by = factor(pData(c1_subset[, nkx_cells])$Platform), shape_by = nkx_cl$.cluster, pch=16)
 77 | colorAMap(nkx_tsne$Y, shape_by = factor(pData(c1_subset[, nkx_cells])$Platform), colour_by = nkx_cl$.cluster, pch=16)
 78 | 
 79 | pData(c1_subset)$cluster <- NA
 80 | pData(c1_subset)$tsne1 <- NA
 81 | pData(c1_subset)$tsne2 <- NA
 82 | pData(c1_subset)$cluster[nkx_cells] <- nkx_cl$.cluster
 83 | pData(c1_subset)$tsne1[nkx_cells] <- nkx_cl$X1
 84 | pData(c1_subset)$tsne2[nkx_cells] <- nkx_cl$X2
 85 | 
 86 | 
 87 | #
 88 | # Isl1 dimension reduction and clustering
 89 | #
 90 | seed <- 3465
 91 | set.seed(seed)
 92 | isl1_tsne <- Rtsne(t(isl1_som$codes), perplexity = 15, theta = 0.05, max_iter = 2000)
 93 | 
 94 | isl1_cl <- hdbscan(isl1_tsne$Y, min_samples = 7L, min_cluster_size = 9L)
 95 | isl1_cl[isl1_cl == 0] <- NA
 96 | isl1_cl <- factor(isl1_cl)
 97 | 
 98 | # i <- which(pData(c1_subset)$Background == "isl1ko" & pData(c1_subset)$Platform == "C1")
 99 | # cluster_ko <- factor(pData(c1_subset[, isl1_cells])$cluster, levels = c("1", "2", "3", "4", "5", "KO"))
100 | # cluster_ko[i] <- "KO"
101 | # colorAMap(isl1_tsne$Y, shape_by = factor(pData(c1_subset[, isl1_cells])$Platform), colour_by = cluster_ko, pch=16, palette = parameters$colors$islko_cluster_palette)
102 | colorAMap(isl1_tsne$Y, shape_by = factor(pData(c1_subset[, isl1_cells])$Platform), colour_by = isl1_cl, pch=16)
103 | colorAMap(isl1_tsne$Y, shape_by = factor(pData(c1_subset[, isl1_cells])$Platform), colour_by = pData(c1_subset[, isl1_cells])$Timepoint, pch=16)
104 | colorAMap(isl1_tsne$Y, shape_by = factor(pData(c1_subset[, isl1_cells])$Platform), colour_by = isl1_cl, pch=16)
105 | colorAMap(isl1_tsne$Y, shape_by = factor(pData(c1_subset[, isl1_cells])$Platform), colour_by = droplevels(pData(c1_subset[, isl1_cells])$Background), pch=16)
106 | 
107 | 
108 | pData(c1_subset)$cluster[isl1_cells] <- isl1_cl$.cluster
109 | pData(c1_subset)$tsne1[isl1_cells] <- isl1_cl$X1
110 | pData(c1_subset)$tsne2[isl1_cells] <- isl1_cl$X2
111 | 
112 | #
113 | # Nkx2-5 differential expression
114 | #
115 | nkx_contrasts <- list(
116 |   cluster1_rest = list(which(nkx_cl$.cluster == 1), which(nkx_cl$.cluster != 1)),
117 |   cluster2_rest = list(which(nkx_cl$.cluster == 2), which(nkx_cl$.cluster != 2)),
118 |   cluster3_rest = list(which(nkx_cl$.cluster == 3), which(nkx_cl$.cluster != 3))
119 | )
120 | 
121 | nkx_diff_data <- differentialExpression(nkx_expression, contrasts = nkx_contrasts, fData(c1_subset))
122 | names(nkx_diff_data) <- names(nkx_contrasts)
123 | 
124 | # Nkx2-5 marker
125 | nkx_markers <- get_marker_genes(nkx_expression, nkx_cl$.cluster)
126 | 
127 | save(nkx_diff_data, file = file.path(parameters$general$path_supdata, "Nkx2-5-diffExprs.Rdata"))
128 | save(nkx_markers, file = file.path(parameters$general$path_supdata, "Nkx2-5-markers.Rdata"))
129 | 
130 | # Add information to object
131 | nkx_de_genes_l <- lapply(names(nkx_diff_data), function(c) {
132 |   data <- nkx_diff_data[[c]]
133 |   # Add marker gene data
134 |   data %>%
135 |     left_join(nkx_markers, by = "geneID") %>%
136 |     dplyr::rename(marker_pval = pvalue, marker_fdr = fdr.y) -> data_marker
137 |   write.table(data_marker, file = file.path(parameters$general$path_supdata, paste0("differentialExpression/Nkx2-5_",c,".txt")), sep="\t", quote = F, row.names = F, col.names = T)
138 |   i <- which(data$fdr < parameters$diffusionmaps$de_fdr & data$biotype == "protein_coding" & (data$lfc.hi < -parameters$diffusionmaps$de_abs_logfc | data$lfc.lo > parameters$diffusionmaps$de_abs_logfc))
139 |   data$geneID[i]
140 | })
141 | nkx_de_genes <- unique(unlist(nkx_de_genes_l))
142 | 
143 | m <- match(nkx_de_genes, rownames(fData(c1_subset)))
144 | fData(c1_subset)$nkx_de <- FALSE
145 | fData(c1_subset)$nkx_de[m] <- TRUE
146 | 
147 | i <- which(nkx_markers$auroc > parameters$diffusionmaps$marker_auroc & nkx_markers$fdr < parameters$diffusionmaps$marker_fdr)
148 | nkx_marker_genes <- nkx_markers$geneID[i]
149 | 
150 | m <- match(nkx_marker_genes, rownames(fData(c1_subset)))
151 | fData(c1_subset)$nkx_marker <- FALSE
152 | fData(c1_subset)$nkx_marker[m] <- TRUE
153 | 
154 | #
155 | # Isl1 differential expression
156 | #
157 | isl1_contrasts <- list(
158 |   cluster1_rest = list(which(isl1_cl$.cluster == 1), which(isl1_cl$.cluster != 1)),
159 |   cluster2_rest = list(which(isl1_cl$.cluster == 2), which(isl1_cl$.cluster != 2)),
160 |   cluster3_rest = list(which(isl1_cl$.cluster == 3), which(isl1_cl$.cluster != 3)),
161 |   cluster4_rest = list(which(isl1_cl$.cluster == 4), which(isl1_cl$.cluster != 4)),
162 |   cluster5_rest = list(which(isl1_cl$.cluster == 5), which(isl1_cl$.cluster != 5))
163 | )
164 | 
165 | isl1_diff_data <- differentialExpression(isl1_expression, contrasts = isl1_contrasts, fData(c1_subset))
166 | names(isl1_diff_data) <- names(isl1_contrasts)
167 | 
168 | # Isl1 marker
169 | isl1_markers <- get_marker_genes(isl1_expression, isl1_cl$.cluster)
170 | 
171 | save(isl1_diff_data, file = file.path(parameters$general$path_supdata, "Isl1-diffExprs.Rdata"))
172 | save(isl1_markers, file=file.path(parameters$general$path_supdata, "Isl1-markers.Rdata"))
173 | 
174 | # Add information to object
175 | isl1_de_genes_l <- lapply(names(isl1_diff_data), function(c) {
176 |   data <- isl1_diff_data[[c]]
177 |   # Add marker gene data
178 |   data %>%
179 |     left_join(nkx_markers, by = "geneID") %>%
180 |     dplyr::rename(marker_pval = pvalue, marker_fdr = fdr.y) -> data_marker
181 |   write.table(data_marker, file = file.path(parameters$general$path_supdata, paste0("differentialExpression/Isl1_",c,".txt")), sep="\t", quote = F, row.names = F, col.names = T)
182 |   i <- which(data$fdr < parameters$diffusionmaps$de_fdr & data$biotype == "protein_coding" & (data$lfc.hi < -parameters$diffusionmaps$de_abs_logfc | data$lfc.lo > parameters$diffusionmaps$de_abs_logfc))
183 |   data$geneID[i]
184 | })
185 | isl1_de_genes <- unique(unlist(isl1_de_genes_l))
186 | 
187 | m <- match(isl1_de_genes, rownames(fData(c1_subset)))
188 | fData(c1_subset)$isl1_de <- FALSE
189 | fData(c1_subset)$isl1_de[m] <- TRUE
190 | 
191 | i <- which(isl1_markers$auroc > parameters$diffusionmaps$marker_auroc & isl1_markers$fdr < parameters$diffusionmaps$marker_fdr)
192 | isl1_marker_genes <- isl1_markers$geneID[i]
193 | 
194 | m <- match(isl1_marker_genes, rownames(fData(c1_subset)))
195 | fData(c1_subset)$isl1_marker <- FALSE
196 | fData(c1_subset)$isl1_marker[m] <- TRUE
197 | 
198 | #
199 | # Save back subset
200 | #
201 | save(c1_subset, file = file.path(parameters$general$path_supdata, "c1_subset.Rdata"))
202 | 


--------------------------------------------------------------------------------
/original-analysis/src/03_heterogeneity.R:
--------------------------------------------------------------------------------
  1 | library(biomaRt)
  2 | library(singlecellutils)
  3 | library(dplyr)
  4 | library(ggplot2)
  5 | library(cowplot)
  6 | library(UpSetR)
  7 | 
  8 | #
  9 | # Load the C1 data
 10 | #
 11 | source("src/parameters.R")
 12 | load(file = file.path(parameters$general$path_supdata, "c1_subset.Rdata"))
 13 | #save(wg_subset, file = file.path(parameters$general$path_supdata, "wg_subset.Rdata"))
 14 | 
 15 | het_subset <- c1_subset[fData(c1_subset)$biotype == "protein_coding" & !fData(c1_subset)$is_feature_control_mito, ]
 16 | 
 17 | #
 18 | # Assemble list of genes to exclude from being an HVG
 19 | #
 20 | mart <- useMart("ensembl", dataset = parameters$general$biomart_dataset)
 21 | go.cellcycle <- getBM(attributes=c('ensembl_gene_id'), filters = 'go', values = 'GO:0007049', mart = mart)
 22 | go.translation <- getBM(attributes=c('ensembl_gene_id'), filters = 'go', values = 'GO:0006412', mart = mart)
 23 | go.ribosome <- getBM(attributes=c('ensembl_gene_id'), filters = 'go', values = 'GO:0005840', mart = mart)
 24 | 
 25 | exclude <- (rownames(het_subset) %in% unique(c(go.cellcycle$ensembl_gene_id, go.translation$ensembl_gene_id, go.ribosome$ensembl_gene_id)))
 26 | het_subset <- het_subset[!exclude, ]
 27 | 
 28 | #
 29 | # Assemble lineages
 30 | #
 31 | isl1.lineage <- which(pData(c1_subset)$Background == "isl1")
 32 | nkx.lineage <- which(pData(c1_subset)$Background == "nkx2-5")
 33 | 
 34 | lineages <- list(isl = isl1.lineage, nkx = nkx.lineage)
 35 | 
 36 | #
 37 | # Lineage and Time-point specific heterogeneity
 38 | #
 39 | early <- which(pData(het_subset)$Timepoint == "e7.5")
 40 | mid <- which(pData(het_subset)$Timepoint == "e8.5")
 41 | late <- which(pData(het_subset)$Timepoint == "e9.5")
 42 | 
 43 | time.lineages <- list(isl1 = isl1.lineage, isl1.early = intersect(isl1.lineage, early), isl1.mid = intersect(isl1.lineage, mid), isl1.late = intersect(isl1.lineage, late),
 44 |                       nkx = nkx.lineage, nkx.early = intersect(nkx.lineage, early), nkx.mid = intersect(nkx.lineage, mid), nkx.late = intersect(nkx.lineage, late))
 45 | 
 46 | means <- lapply(time.lineages, function(x) {
 47 |   subset <- het_subset[, x]
 48 |   data <- 2^get_exprs(subset, "norm_exprs_lineage")-1
 49 | 
 50 |   log(rowMeans(data))/log(10)
 51 | })
 52 | 
 53 | dropouts <- lapply(time.lineages, function(x) {
 54 |   subset <- het_subset[, x]
 55 |   data <- 2^get_exprs(subset, "norm_exprs_lineage")-1
 56 | 
 57 |   apply(data, 1, dropout.fun)
 58 | })
 59 | 
 60 | cvs <- lapply(time.lineages, function(x) {
 61 |   subset <- het_subset[, x]
 62 |   data <- 2^get_exprs(subset, "norm_exprs_lineage")-1
 63 | 
 64 |   log10(apply(data, 1, cv2.fun))
 65 | })
 66 | 
 67 | dependencies <- data.frame(dataset = rep(c("Isl1", "Nkx2-5"), each=nrow(het_subset)*4), timepoint = rep(c("all","e7.5","e8.5","e9.5","all","e7.5","e8.5","e9.5"), each=nrow(het_subset)), mean = unlist(means), dropout = unlist(dropouts), cv = unlist(cvs))
 68 | rownames(dependencies) <- c(
 69 |   paste0("isl1",rep(c("all","e7.5","e8.5","e9.5"), each=nrow(het_subset)), rep(rownames(het_subset), times=4)),
 70 |   paste0("nkx",rep(c("all","e7.5","e8.5","e9.5"), each=nrow(het_subset)), rep(rownames(het_subset), times=4)))
 71 | 
 72 | het <- lapply(time.lineages, function(l) {
 73 |   subset <- het_subset[, l]
 74 |   data <- 2^get_exprs(subset, "norm_exprs_lineage")-1
 75 | 
 76 |   dro <- apply(data, 1, dropout.fun)
 77 |   cvs <- log10(apply(data, 1, cv2.fun))
 78 | 
 79 |   dm.do <- heterogeneity(data, statistic = "mean", order_by = dro, normalization = "windows", window = 200)
 80 |   dm.cv <- heterogeneity(data, statistic = "mean", order_by = cvs, normalization = "windows", window = 200)
 81 | 
 82 |   unique(c(names(dm.do[which(dm.do > parameters$heterogeneity$het_zscore)]), names(dm.cv[which(dm.cv > parameters$heterogeneity$het_zscore)])))
 83 | })
 84 | 
 85 | lapply(names(het), function(l) {
 86 |   write.table(het[[l]], quote = F, row.names = F, col.names = F, file = paste0(parameters$general$path_supdata,"/heterogeneity-",l,".txt"))
 87 | })
 88 | 
 89 | is.het <- c(paste0("isl1all",het[["isl1"]]), paste0("isl1e7.5",het[["isl1.early"]]), paste0("isl1e8.5",het[["isl1.mid"]]), paste0("isl1e9.5",het[["isl1.late"]]),
 90 |             paste0("nkxall",het[["nkx"]]), paste0("nkxe7.5",het[["nkx.early"]]), paste0("nkxe8.5",het[["nkx.mid"]]), paste0("nkxe9.5",het[["nkx.late"]]))
 91 | dependencies$het <- ifelse(rownames(dependencies) %in% is.het, T, F)
 92 | 
 93 | #
 94 | # Supplementary Figure S2
 95 | #
 96 | theme2 <- theme(plot.background = element_blank(), panel.grid.major = element_line(size=.2, colour = "grey"),panel.grid.minor = element_line(size=.1, colour = "grey"),
 97 |                 panel.border = element_blank(),panel.background = element_blank(),axis.line.x = element_line(size=.3),axis.line.y = element_line(size=.3), legend.text = element_text(size=6),
 98 |                 #axis.title.x = element_blank(), axis.title.y = element_blank(),
 99 |                 plot.title = element_text(face="bold", color="black", size=6), legend.key.size =  unit(2, "mm"), legend.margin=unit(-25, "mm"), axis.text = element_text(size=6))
100 | 
101 | s2_isl_do <- dependencies %>%
102 |   filter(dataset == "Isl1", timepoint == "all") %>%
103 |   ggplot(aes_string(x="mean", y="dropout", color = "het")) +
104 |   geom_point(size=0.2, aes(alpha=0.3)) +
105 |   scale_colour_manual(values = c("black", "red")) +
106 |   #ggtitle(paste0("Isl1 heterogeneity")) +
107 |   #xlab("Mean gene expression") +
108 |   #ylab("Dropout rate") +
109 |   theme2 + guides(alpha=FALSE, color=FALSE, size=FALSE)
110 | s2_isl_cv <- dependencies %>%
111 |   filter(dataset == "Isl1", timepoint == "all") %>%
112 |   ggplot(aes_string(x="mean", y="cv", color = "het")) +
113 |   geom_point(size=0.2, aes(alpha=0.3)) +
114 |   scale_colour_manual(values = c("black", "red")) +
115 |   #ggtitle(paste0("Isl1 heterogeneity")) +
116 |   #xlab("Mean gene expression") +
117 |   #ylab("Coefficient of variation") +
118 |   theme2 + guides(alpha=FALSE, color=FALSE, size=FALSE)
119 | 
120 | s2_isl_het <- plot_grid(s2_isl_cv, s2_isl_do)
121 | 
122 | s2_nkx_do <- dependencies %>%
123 |   filter(dataset == "Isl1", timepoint == "all") %>%
124 |   ggplot(aes_string(x="mean", y="dropout", color="het")) +
125 |   geom_point(size=0.2, aes(alpha=0.3)) +
126 |   scale_colour_manual(values = c("black", "red")) +
127 |   #ggtitle(paste0("Nkx2-5 heterogeneity")) +
128 |   theme2 + guides(alpha=FALSE, color=FALSE, size=FALSE)
129 | s2_nkx_cv <- dependencies %>%
130 |   filter(dataset == "Nkx2-5", timepoint == "all") %>%
131 |   ggplot(aes_string(x="mean", y="cv", color="het")) +
132 |   geom_point(size=0.2, aes(alpha=0.3)) +
133 |   scale_colour_manual(values = c("black", "red")) +
134 |   #ggtitle(paste0("Nkx2-5 heterogeneity")) +
135 |   theme2 + guides(alpha=FALSE, color=FALSE, size=FALSE)
136 | 
137 | s2_nkx_het <- plot_grid(s2_nkx_cv, s2_nkx_do)
138 | p <- grid.grabExpr(upset(fromList(het[c("isl1", "isl1.early", "isl1.late", "isl1.mid")]), order.by = "freq"), wrap = T)
139 | q <- grid.grabExpr(upset(fromList(het[c("nkx", "nkx.early", "nkx.late", "nkx.mid")]), order.by = "freq"), wrap = T)
140 | 
141 | m <- multi_panel_figure(width = 205, height = 230, columns = 2, rows = 4)
142 | m <- fill_panel(m, s2_isl_het, row = 1, col = 1)
143 | m <- fill_panel(m, s2_nkx_het, row = 1, col = 2)
144 | m <- fill_panel(m, p, row = c(2,3,4), col = 1)
145 | m <- fill_panel(m, q, row = c(2,3,4), col = 2)
146 | 
147 | #
148 | # Load the WG data
149 | #
150 | source("src/parameters.R")
151 | load(file = file.path(parameters$general$path_supdata, "wg_subset.Rdata"))
152 | #save(wg_subset, file = file.path(parameters$general$path_supdata, "wg_subset.Rdata"))
153 | 
154 | het_subset <- wg_subset[fData(wg_subset)$biotype == "protein_coding" & !fData(wg_subset)$is_feature_control_mito, ]
155 | 
156 | #
157 | # Assemble list of genes to exclude from being an HVG
158 | #
159 | mart <- useMart("ensembl", dataset = parameters$general$biomart_dataset)
160 | go.cellcycle <- getBM(attributes=c('ensembl_gene_id'), filters = 'go', values = 'GO:0007049', mart = mart)
161 | go.translation <- getBM(attributes=c('ensembl_gene_id'), filters = 'go', values = 'GO:0006412', mart = mart)
162 | go.ribosome <- getBM(attributes=c('ensembl_gene_id'), filters = 'go', values = 'GO:0005840', mart = mart)
163 | 
164 | exclude <- (rownames(het_subset) %in% unique(c(go.cellcycle$ensembl_gene_id, go.translation$ensembl_gene_id, go.ribosome$ensembl_gene_id)))
165 | het_subset <- het_subset[!exclude, ]
166 | 
167 | #
168 | # Assemble lineages
169 | #
170 | isl1.lineage <- which(pData(wg_subset)$Background == "isl1")
171 | nkx.lineage <- which(pData(wg_subset)$Background == "nkx2-5")
172 | 
173 | #
174 | # Lineage and Time-point specific heterogeneity
175 | #
176 | early <- which(pData(het_subset)$Timepoint == "e7.5")
177 | mid <- which(pData(het_subset)$Timepoint == "e8.5")
178 | late <- which(pData(het_subset)$Timepoint == "e9.5")
179 | 
180 | time.lineages <- list(isl1 = isl1.lineage, isl1.early = intersect(isl1.lineage, early), isl1.late = intersect(isl1.lineage, late),
181 |                       nkx = nkx.lineage, nkx.early = intersect(nkx.lineage, early), nkx.mid = intersect(nkx.lineage, mid), nkx.late = intersect(nkx.lineage, late))
182 | 
183 | means <- lapply(time.lineages, function(x) {
184 |   subset <- het_subset[, x]
185 |   data <- 2^get_exprs(subset, "norm_exprs_lineage")-1
186 | 
187 |   log(rowMeans(data))/log(10)
188 | })
189 | 
190 | dropouts <- lapply(time.lineages, function(x) {
191 |   subset <- het_subset[, x]
192 |   data <- 2^get_exprs(subset, "norm_exprs_lineage")-1
193 | 
194 |   apply(data, 1, dropout.fun)
195 | })
196 | 
197 | cvs <- lapply(time.lineages, function(x) {
198 |   subset <- het_subset[, x]
199 |   data <- 2^get_exprs(subset, "norm_exprs_lineage")-1
200 | 
201 |   log10(apply(data, 1, cv2.fun))
202 | })
203 | 
204 | dependencies <- data.frame(dataset = c(rep("Isl1", times=nrow(het_subset)*3), rep("Nkx2-5", times=nrow(het_subset)*4)),
205 |                            timepoint = rep(c("all","e7.5","e9.5","all","e7.5","e8.5","e9.5"), each=nrow(het_subset)),
206 |                            mean = unlist(means),
207 |                            dropout = unlist(dropouts),
208 |                            cv = unlist(cvs)
209 |                            )
210 | rownames(dependencies) <- c(
211 |   paste0("isl1",rep(c("all","e7.5","e9.5"), each=nrow(het_subset)), rep(rownames(het_subset), times=3)),
212 |   paste0("nkx",rep(c("all","e7.5","e8.5","e9.5"), each=nrow(het_subset)), rep(rownames(het_subset), times=4)))
213 | 
214 | het <- lapply(time.lineages, function(l) {
215 |   subset <- het_subset[, l]
216 |   data <- 2^get_exprs(subset, "norm_exprs_lineage")-1
217 | 
218 |   dro <- apply(data, 1, dropout.fun)
219 |   cvs <- log10(apply(data, 1, cv2.fun))
220 | 
221 |   dm.do <- heterogeneity(data, statistic = "mean", order_by = dro, normalization = "windows", window = 200)
222 |   dm.cv <- heterogeneity(data, statistic = "mean", order_by = cvs, normalization = "windows", window = 200)
223 | 
224 |   unique(c(names(dm.do[which(dm.do > parameters$heterogeneity$het_zscore)]), names(dm.cv[which(dm.cv > parameters$heterogeneity$het_zscore)])))
225 | })
226 | 
227 | lapply(names(het), function(l) {
228 |   write.table(het[[l]], quote = F, row.names = F, col.names = F, file = paste0(parameters$general$path_supdata,"/heterogeneity-wg-",l,".txt"))
229 | })
230 | 
231 | is.het <- c(paste0("isl1all",het[["isl1"]]), paste0("isl1e7.5",het[["isl1.early"]]), paste0("isl1e8.5",het[["isl1.mid"]]), paste0("isl1e9.5",het[["isl1.late"]]),
232 |             paste0("nkxall",het[["nkx"]]), paste0("nkxe7.5",het[["nkx.early"]]), paste0("nkxe8.5",het[["nkx.mid"]]), paste0("nkxe9.5",het[["nkx.late"]]))
233 | dependencies$het <- ifelse(rownames(dependencies) %in% is.het, T, F)
234 | 


--------------------------------------------------------------------------------
/original-analysis/src/00_filtering.R:
--------------------------------------------------------------------------------
  1 | library(scater)
  2 | library(Rtsne)
  3 | library(multipanelfigure)
  4 | library(viridis)
  5 | library(dplyr)
  6 | library(tidyr)
  7 | library(gridExtra)
  8 | library(singlecellutils)
  9 | library(genefilter)
 10 | library(RColorBrewer)
 11 | 
 12 | #
 13 | # Load data
 14 | #
 15 | source("src/parameters.R")
 16 | load(file.path(parameters$general$path_rdata, "sceData.RData"))
 17 | 
 18 | #
 19 | # Create map of QC data
 20 | #
 21 | qcdata <- pData(sceData)[,parameters$filtering$qc_columns]
 22 | qcdata$platform <- factor(pData(sceData)$Platform)
 23 | qcdata$background <- factor(pData(sceData)$Background)
 24 | 
 25 | if (!is.null(parameters$filtering$log_qc_columns)) {
 26 |   qcdata[, parameters$filtering$log_qc_columns] <- log2(qcdata[, parameters$filtering$log_qc_columns] + 1)
 27 | }
 28 | 
 29 | is_c1 <- pData(sceData)$Platform == "C1"
 30 | 
 31 | set.seed(5411)
 32 | t_c1 <- Rtsne(scale(qcdata[is_c1, parameters$filtering$qc_columns]), perplexity = 40, max_iter = 2000)
 33 | set.seed(5411)
 34 | t_wg <- Rtsne(scale(qcdata[!is_c1, parameters$filtering$qc_columns]), perplexity = 40, max_iter = 2000)
 35 | 
 36 | # Add coordinates to qcdata
 37 | qcdata$tsne.x <- NA
 38 | qcdata$tsne.y <- NA
 39 | qcdata$tsne.x[is_c1] <- t_c1$Y[,1]
 40 | qcdata$tsne.x[!is_c1] <- t_wg$Y[,1]
 41 | qcdata$tsne.y[is_c1] <- t_c1$Y[,2]
 42 | qcdata$tsne.y[!is_c1] <- t_wg$Y[,2]
 43 | 
 44 | theme1 <- theme(plot.background = element_blank(),panel.grid.major = element_line(size=.2, colour = "grey"),panel.grid.minor = element_line(size=.1, colour = "grey"),
 45 |                 panel.border = element_blank(),panel.background = element_blank(),axis.line.x = element_line(size=.3),axis.line.y = element_line(size=.3), legend.text = element_text(size=8),
 46 |                 axis.title.x = element_blank(),axis.title.y = element_blank(), plot.title = element_text(face="bold", color="black", size=8),legend.key.size =  unit(2, "mm"), legend.margin=unit(-25, "mm"),
 47 |                 strip.background = element_blank(), strip.text = element_text(face="bold", color="black", size=6, margin = margin(0,0,-1,0)), axis.text = element_text(color = "black", size = 8))
 48 | 
 49 | m <- multi_panel_figure(width = 205, height = 230, columns = 2, rows = 5)
 50 | for(c in parameters$filtering$qc_columns) {
 51 |   f <- ggplot(qcdata, aes_string(x="tsne.x", y="tsne.y", color=eval(c))) +
 52 |     geom_point(size=0.5) +
 53 |     scale_color_viridis(name="") +
 54 |     facet_wrap(~platform, ncol=2) +
 55 |     xlab("") + ylab("") +
 56 |     ggtitle(c) +
 57 |     theme1
 58 |   m <- fill_panel(m, f)
 59 | }
 60 | 
 61 | #m
 62 | 
 63 | ggsave(
 64 |   plot = m,
 65 |   filename = file.path(parameters$general$path_rfigures, "Supplementary-Cell-QC-map.png"),
 66 |   width = 205,
 67 |   height = 230,
 68 |   units = "mm",
 69 |   dpi = 600)
 70 | 
 71 | #
 72 | # Filter cells based on defined criteria
 73 | #
 74 | # We construct a binary QC matrix, with TRUE meaning that the cell failed for this QC measure.
 75 | binary.qc.l <- lapply(names(parameters$filtering$cell_outlier_columns), function(c) {
 76 |   res <- rep(NA, nrow(qcdata))
 77 |   c1o <- isOutlier(qcdata[is_c1, c], nmads = as.numeric(parameters$filtering$cell_outlier_columns[[c]][1]), type=parameters$filtering$cell_outlier_columns[[c]][2], log=as.logical(parameters$filtering$cell_outlier_columns[[c]][3]))
 78 |   wgo <- isOutlier(qcdata[!is_c1, c], nmads = as.numeric(parameters$filtering$cell_outlier_columns[[c]][1]), type=parameters$filtering$cell_outlier_columns[[c]][2], log=as.logical(parameters$filtering$cell_outlier_columns[[c]][3]))
 79 |   res[!is_c1] <- wgo
 80 |   res[is_c1] <- c1o
 81 |   return(res)
 82 | })
 83 | 
 84 | binary.qc <- do.call("cbind", binary.qc.l)
 85 | colnames(binary.qc) <- names(parameters$filtering$cell_outlier_columns)
 86 | rownames(binary.qc) <- rownames(qcdata)
 87 | 
 88 | # Determine failed cells as those with more than 1 failed criterium
 89 | failed.cells <- (rowSums(binary.qc) > parameters$filtering$cell_outlier_maxfailed)
 90 | qcdata$qc.exclude <- failed.cells
 91 | 
 92 | write.table(x=qcdata, file=file.path(parameters$general$path_supdata, "QC-metrics.txt"), sep="\t", quote = F, row.names = T, col.names = T)
 93 | pData(sceData)$qc.exclude <- failed.cells
 94 | 
 95 | #
 96 | # Supplementary Figure 1: Cell QC
 97 | #
 98 | theme2 <- theme(plot.background = element_blank(),panel.grid.major = element_line(size=.2, colour = "grey"),panel.grid.minor = element_line(size=.1, colour = "grey"),
 99 |                 panel.border = element_blank(), panel.background = element_blank(), axis.line.x = element_line(size=.3), axis.line.y = element_line(size=.3), legend.text = element_text(size=6),
100 |                 axis.title.x = element_blank(), axis.title.y = element_blank(), plot.title = element_text(face="bold", color="black", size=6), legend.key.size =  unit(2, "mm"), legend.margin=unit(-25, "mm"),
101 |                 strip.background = element_blank(), strip.text = element_blank(), axis.text = element_text(color="black", size=8) )
102 | 
103 | xintercept_colors <- c("#8c510a", "#bf812d","#01665e", "#35978f")
104 | xintercept_platforms <- c("C1", "WG")
105 | get_min <- function(c) {
106 |   c(min(qcdata[!binary.qc[,c] & is_c1,c]), min(qcdata[!binary.qc[,c] & !is_c1,c]))
107 | }
108 | get_max <- function(c) {
109 |   c(max(qcdata[!binary.qc[,c] & is_c1,c]), max(qcdata[!binary.qc[,c] & !is_c1,c]))
110 | }
111 | 
112 | # Mitochondrial content
113 | c <- 'pct_counts_feature_controls_mito'
114 | xintercept_mins <- get_min(c)
115 | xintercept_maxs <- get_max(c)
116 | intercept_data <- data.frame(platform = xintercept_platforms, min = xintercept_mins, max = xintercept_maxs)
117 | s1_mito <- ggplot(qcdata, aes_string(x=eval(c))) +
118 |   geom_histogram(binwidth = 1) +
119 |   geom_vline(data = intercept_data, aes(xintercept=min), color="#8c510a") +
120 |   geom_vline(data = intercept_data, aes(xintercept=max), color="#01665e") +
121 |   facet_wrap(~platform, ncol=2) +
122 |   xlab("") + ylab("") + guides(color=FALSE) +
123 |   xlim(0,15) +
124 |   #ggtitle(c) +
125 |   theme2
126 | 
127 | # Total detected features
128 | c <- 'total_features'
129 | xintercept_mins <- get_min(c)
130 | xintercept_maxs <- get_max(c)
131 | intercept_data <- data.frame(platform = xintercept_platforms, min = xintercept_mins, max = xintercept_maxs)
132 | s1_features <- ggplot(qcdata, aes_string(x=eval(c))) +
133 |   geom_histogram(binwidth = 0.05) +
134 |   geom_vline(data = intercept_data, aes(xintercept=min), color="#8c510a") +
135 |   geom_vline(data = intercept_data, aes(xintercept=max), color="#01665e") +
136 |   facet_wrap(~platform, ncol=2) +
137 |   xlab("") + ylab("") + guides(color=FALSE) +
138 |   xlim(6,8) +
139 |   #ggtitle(c) +
140 |   theme1
141 | 
142 | # Dropout
143 | c <- 'pct_dropout'
144 | xintercept_mins <- get_min(c)
145 | xintercept_maxs <- get_max(c)
146 | intercept_data <- data.frame(platform = xintercept_platforms, min = xintercept_mins, max = xintercept_maxs)
147 | s1_dropout <- ggplot(qcdata, aes_string(x=eval(c))) +
148 |   geom_histogram(binwidth = 0.01) +
149 |   geom_vline(data = intercept_data, aes(xintercept=min), color="#8c510a") +
150 |   geom_vline(data = intercept_data, aes(xintercept=max), color="#01665e") +
151 |   facet_wrap(~platform, ncol=2) +
152 |   xlab("") + ylab("") + guides(color=FALSE) +
153 |   #ggtitle(c) +
154 |   theme2
155 | 
156 | # Percent of genes
157 | c <- 'percent_genes'
158 | xintercept_mins <- get_min(c)
159 | xintercept_maxs <- get_max(c)
160 | intercept_data <- data.frame(platform = xintercept_platforms, min = xintercept_mins, max = xintercept_maxs)
161 | s1_genes <- ggplot(qcdata, aes_string(x=eval(c))) +
162 |   geom_histogram(binwidth = 1) +
163 |   geom_vline(data = intercept_data, aes(xintercept=min), color="#8c510a") +
164 |   geom_vline(data = intercept_data, aes(xintercept=max), color="#01665e") +
165 |   facet_wrap(~platform, ncol=2) +
166 |   xlab("") + ylab("") + guides(color=FALSE) +
167 |   #ggtitle(c) +
168 |   theme2
169 | 
170 | # Expression of Rplp0
171 | c <- 'exprs_feature_controls_housekeeping'
172 | xintercept_mins <- get_min(c)
173 | xintercept_maxs <- get_max(c)
174 | intercept_data <- data.frame(platform = xintercept_platforms, min = xintercept_mins, max = xintercept_maxs)
175 | s1_rplp <- ggplot(qcdata, aes_string(x=eval(c))) +
176 |   geom_histogram(binwidth = 0.1) +
177 |   geom_vline(data = intercept_data, aes(xintercept=min), color="#8c510a") +
178 |   geom_vline(data = intercept_data, aes(xintercept=max), color="#01665e") +
179 |   facet_wrap(~platform, ncol=2) +
180 |   xlab("") + ylab("") + guides(color=FALSE) +
181 |   xlim(2.5, 4) +
182 |   #ggtitle(c) +
183 |   theme2
184 | 
185 | # QC map with features
186 | s1_qc_features <- ggplot(qcdata, aes_string(x="tsne.x", y="tsne.y", color=eval(c))) +
187 |   geom_point(size=0.5) +
188 |   scale_color_viridis(name="") +
189 |   facet_wrap(~platform, ncol=2) +
190 |   xlab("") + ylab("") +
191 |   #ggtitle("Total features (log2 scale)") +
192 |   theme1 +
193 |   theme(legend.position="bottom")
194 | 
195 | s1_qc_fail <- ggplot(qcdata, aes_string(x="tsne.x", y="tsne.y", color="qc.exclude")) +
196 |   geom_point(size=1) +
197 |   scale_colour_manual(values = c("black", "red"), name="") +
198 |   facet_wrap(~platform, ncol=2) +
199 |   xlab("") + ylab("") +
200 |   #ggtitle("QC failed cells") +
201 |   guides(color=FALSE) +
202 |   theme2
203 | 
204 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_SupplementaryFigure_1ae.txt"),
205 |             x = qcdata[, c("platform", "total_features", "pct_dropout", "pct_counts_feature_controls_mito", "percent_genes", "exprs_feature_controls_housekeeping", "qc.exclude")],
206 |             quote = F,
207 |             row.names = T,
208 |             col.names = T,
209 |             sep = "\t")
210 | 
211 | qcdata %>%
212 |   select(Lineage = background, platform, qc.exclude) %>%
213 |   group_by(Lineage, platform, qc.exclude) %>%
214 |   summarise(cells = n()) %>%
215 |   unite("key", platform, qc.exclude, sep=".") %>%
216 |   spread(key, cells) %>%
217 |   mutate(WG.TRUE = ifelse(is.na(WG.TRUE), 0, WG.TRUE), WG.FALSE = ifelse(is.na(WG.FALSE), 0, WG.FALSE)) %>%
218 |   mutate(C1.sum = C1.TRUE + C1.FALSE, WG.sum = WG.TRUE + WG.FALSE, raw.sum = C1.sum + WG.sum, clean.sum = C1.FALSE + WG.FALSE) %>%
219 |   mutate(Raw = paste0(raw.sum, " (", C1.sum, "/", WG.sum, ")")) %>%
220 |   mutate(Clean = paste0(clean.sum, " (", C1.FALSE, "/", WG.FALSE, ")")) %>%
221 |   select(Lineage, Raw, Clean) -> filter.table.raw
222 | 
223 | tt1 <- ttheme_minimal(core=list(fg_params=list(hjust=1, x=0.9)),
224 |                       rowhead=list(fg_params=list(hjust=1, x=0.9)),
225 |                       colhead = list(fg_params=list(hjust=1, x=0.9)))
226 | 
227 | s1_table <- tableGrob(filter.table.raw, theme=tt1, rows = NULL)
228 | 
229 | m <- multi_panel_figure(width = 205, height = 230, columns = 2, rows = 5)
230 | m <- fill_panel(m, s1_features, row = 1, col = 1)
231 | m <- fill_panel(m, s1_dropout, row = 2, col = 1)
232 | m <- fill_panel(m, s1_mito, row = 3, col = 1)
233 | m <- fill_panel(m, s1_genes, row = 4, col = 1)
234 | m <- fill_panel(m, s1_rplp, row = 5, col = 1)
235 | m <- fill_panel(m, s1_qc_features, row = c(1,2), column = 2)
236 | m <- fill_panel(m, s1_qc_fail, row = c(3,4), column = 2)
237 | m <- fill_panel(m, s1_table, row = 5, column = 2)
238 | 
239 | ggsave(
240 |   plot = m,
241 |   filename = file.path(parameters$general$path_rfigures, "Supplementary_Figure_1.pdf"),
242 |   width = 205,
243 |   height = 230,
244 |   units = "mm",
245 |   dpi = 600)
246 | 
247 | #
248 | # Gene filtering
249 | #
250 | isl1.lineage <- pData(sceData)$Background == "isl1"
251 | nkx.lineage <- pData(sceData)$Background == "nkx2-5"
252 | 
253 | # Build gene filters
254 | aggregate.expr <- EpOverA(A = parameters$filtering$EpOverA_A)
255 | k.cell.expr <- kOverA(k=parameters$filtering$kOverA_k, A=parameters$filtering$kOverA_A)
256 | 
257 | # Make sure both platform counts satisfies the criteria
258 | ffun <- filterfun(aggregate.expr, k.cell.expr)
259 | keep_wg_crit <- genefilter(counts(sceData[,(pData(sceData)$is.sc & !pData(sceData)$qc.exclude & !is_c1)]), ffun)
260 | keep_c1_crit <- genefilter(counts(sceData[,(pData(sceData)$is.sc & !pData(sceData)$qc.exclude & is_c1)]), ffun)
261 | 
262 | keep_isl1_crit <- genefilter(counts(sceData[,(pData(sceData)$is.sc & !pData(sceData)$qc.exclude & is_c1 & isl1.lineage)]), ffun)
263 | keep_nkx_crit <- genefilter(counts(sceData[,(pData(sceData)$is.sc & !pData(sceData)$qc.exclude & is_c1 & nkx.lineage)]), ffun)
264 | 
265 | # Make sure that the corresponding count is not 0
266 | one <- kOverA(k=1,A=0)
267 | 
268 | # Make sure that the wafergene count is also not 0
269 | keep_wg <- genefilter(counts(sceData[,(pData(sceData)$is.sc & !pData(sceData)$qc.exclude & !is_c1)]), one)
270 | 
271 | # Make sure that the C1 count is also not 0
272 | keep_c1 <- genefilter(counts(sceData[,(pData(sceData)$is.sc & !pData(sceData)$qc.exclude & is_c1)]), one)
273 | 
274 | keep <- (keep_isl1_crit | keep_nkx_crit) & keep_c1 & keep_wg
275 | sum(keep)
276 | scd <- sceData[keep,(pData(sceData)$is.sc & !pData(sceData)$qc.exclude)]
277 | scd <- calculateQCMetrics(scd)
278 | 
279 | #
280 | # Save object for further downstream analysis
281 | #
282 | save(scd, file = file.path(parameters$general$path_supdata, "scd.Rdata"))
283 | 


--------------------------------------------------------------------------------
/original-analysis/figure_src/Figure_5.R:
--------------------------------------------------------------------------------
  1 | library(scran)
  2 | library(scater)
  3 | library(singlecellutils)
  4 | library(RColorBrewer)
  5 | library(viridis)
  6 | library(dplyr)
  7 | library(tidyr)
  8 | library(rioja)
  9 | library(multipanelfigure)
 10 | library(ComplexHeatmap)
 11 | library(circlize)
 12 | library(zoo)
 13 | 
 14 | #
 15 | # Load data
 16 | #
 17 | source("src/parameters.R")
 18 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata"))
 19 | 
 20 | theme2 <- theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 21 |                 panel.border = element_blank(), panel.background = element_blank(), axis.line.x = element_line(size=.3), axis.line.y = element_line(size=.3), legend.text = element_text(size=6),
 22 |                 axis.title.x = element_blank(), axis.title.y = element_text(color="black", size=10), plot.title = element_text(face="bold", color="black", size=6), legend.key.size =  unit(2, "mm"), legend.margin=unit(-25, "mm"),
 23 |                 strip.background = element_blank(), strip.text = element_blank(), axis.text = element_text(color="black", size=8) )
 24 | 
 25 | ggMMplot <- function(var1, var2){
 26 |   require(ggplot2)
 27 |   levVar1 <- length(levels(var1))
 28 |   levVar2 <- length(levels(var2))
 29 | 
 30 |   jointTable <- prop.table(table(var1, var2))
 31 |   plotData <- as.data.frame(jointTable)
 32 |   plotData$marginVar1 <- prop.table(table(var1))
 33 |   plotData$var2Height <- plotData$Freq / plotData$marginVar1
 34 |   plotData$var1Center <- c(0, cumsum(plotData$marginVar1)[1:levVar1 -1]) +
 35 |     plotData$marginVar1 / 2
 36 | 
 37 |   ggplot(plotData, aes(var1Center, var2Height)) +
 38 |     geom_bar(stat = "identity", aes(fill = var2, width = marginVar1), col = "White") +
 39 |     geom_label(aes(label = as.character(var1), x = var1Center, y = 1.05), label.r = unit(0, "lines"))
 40 | }
 41 | 
 42 | #
 43 | # Isl1 KO Heatmap Top 30 genes
 44 | #
 45 | n_top_genes <- 30
 46 | isl1_wt_cells <- which(pData(c1_subset)$Background == "isl1" & (pData(c1_subset)$cluster == "2" | pData(c1_subset)$cluster == "1" | pData(c1_subset)$cluster == "5"))
 47 | isl1_ko_cells <- which(pData(c1_subset)$Background == "isl1ko")
 48 | #isl1_ki67 <- get_exprs(c1_subset, "norm_exprs_sf")["ENSMUSG00000031004", c(isl1_wt_cells, isl1_ko_cells)]
 49 | 
 50 | #
 51 | # Cycling score
 52 | #
 53 | load(file.path(parameters$general$path_rextdata, "c2.cpg.whitfield.v6.0.mmusculus.Rdata"))
 54 | names(c2_cpg_whitfield) <- gsub("WHITFIELD_CELL_CYCLE_", "", names(c2_cpg_whitfield))
 55 | 
 56 | # Identify genes from the sets that correlate with the score
 57 | cycle_score_genes <- lapply(c2_cpg_whitfield, function(gs) {
 58 |   m <- match(gs, rownames(c1_subset))
 59 |   e <- get_exprs(c1_subset[na.omit(m), c(isl1_wt_cells, isl1_ko_cells)], "exprs")
 60 |   score <- colMeans(e)
 61 |   cor <- cor(t(e), score, method = "spearman")
 62 |   u <- which(cor[,1] > 0.4)
 63 |   return(names(u))
 64 | })
 65 | 
 66 | # Calculate scores from genes
 67 | cycle_score_l <- lapply(cycle_score_genes, function(g) {
 68 |   m <- match(g, rownames(c1_subset))
 69 |   e <- get_exprs(c1_subset[na.omit(m), c(isl1_wt_cells, isl1_ko_cells)], "exprs")
 70 |   colMeans(e)
 71 | })
 72 | cycle_score <- as.data.frame(t(scale(t(do.call("rbind", cycle_score_l)))))
 73 | cycle_score$set <- rownames(cycle_score)
 74 | 
 75 | cycle_score_df <- spread(gather(as.data.frame(cycle_score), key = "cell", value = "score", -set), key = "set", value = "score")
 76 | m <- match(cycle_score_df$cell, colnames(c1_subset))
 77 | cycle_score_df$Background <- pData(c1_subset)$Background[m]
 78 | 
 79 | cycle_score_df %>%
 80 |   group_by(cell) %>%
 81 |   mutate(max_score = max(G1_S, G2_M)) %>%
 82 |   ungroup() %>%
 83 |   mutate(rank = dense_rank(desc(max_score)),
 84 |          is_cycling = max_score > -0.1) -> cycle_score_df
 85 | 
 86 | isl1_ko_cycleplot <- ggplot(cycle_score_df, aes(x = G1_S, y = G2_M)) +
 87 |   geom_point(aes(color = is_cycling, shape = Background)) +
 88 |   scale_color_manual(values = c("red", "black"), labels =  c("non cycling", "cycling")) +
 89 |   scale_shape_manual(values = 16:17, labels = c("Isl1 WT", "Isl1 KO")) +
 90 |   xlab("G1/S score") +
 91 |   ylab("G2/M score") +
 92 |   guides(color = guide_legend(title = "", label = T), shape = guide_legend(title = "")) +
 93 |   theme_classic() +
 94 |   theme(axis.title = element_text(size = 7), axis.text = element_text(size = 5))
 95 | 
 96 | isl1_ko_cyclerankplot <- ggplot(cycle_score_df, aes(x = rank, y = max_score)) +
 97 |   geom_point(aes(color = is_cycling, shape = Background)) +
 98 |   scale_color_manual(values = c("red", "black")) +
 99 |   xlab("Rank") +
100 |   ylab("Maximal score (G1/S, G2/M)") +
101 |   guides(color = FALSE, shape = FALSE) +
102 |   theme_classic() +
103 |   theme(axis.title = element_text(size = 7), axis.text = element_text(size = 5))
104 | 
105 | g1_s_g2_m_genes <- unlist(cycle_score_genes[c("G1_S", "G2_M")])
106 | g1_s_g2_m_lengths <- unlist(lapply(cycle_score_genes[c("G1_S", "G2_M")], length))
107 | names(g1_s_g2_m_genes) <- rep(names(g1_s_g2_m_lengths), times = g1_s_g2_m_lengths)
108 | 
109 | g1_s_g2_m_cor <- cor(t(get_exprs(c1_subset[g1_s_g2_m_genes, cycle_score_df$cell[cycle_score_df$is_cycling]], "exprs")))
110 | colnames(g1_s_g2_m_cor) <- fData(c1_subset[colnames(g1_s_g2_m_cor),])$symbol
111 | rownames(g1_s_g2_m_cor) <- fData(c1_subset[rownames(g1_s_g2_m_cor),])$symbol
112 | 
113 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_SupplementaryFigure_9c.txt"),
114 |             x = g1_s_g2_m_cor,
115 |             row.names = T,
116 |             col.names = T,
117 |             quote = F,
118 |             sep = "\t")
119 | 
120 | col_fun <- colorRamp2(seq(-0.9, 0.9, length.out = 10), rev(brewer.pal(10, "RdBu")))
121 | 
122 | isl1_ko_cycle_genes <- Heatmap(g1_s_g2_m_cor,
123 |                                col = col_fun,
124 |                                name = "Pearson\ncorrelation",
125 |                                split = names(g1_s_g2_m_genes),
126 |                                cluster_columns = F,
127 |                                cluster_rows = F,
128 |                                row_names_gp = gpar(fontsize = 6),
129 |                                column_names_gp = gpar(fontsize = 6))
130 | 
131 | t <- table(isko = droplevels(cycle_score_df$Background) == "isl1ko", is_cycling = cycle_score_df$is_cycling)[c(2,1), c(2,1)]
132 | tr <- prop.test(t, correct=F, conf.level = 0.95)
133 | 
134 | isl1_ko_mmplot <- ggMMplot(droplevels(cycle_score_df$Background), cycle_score_df$is_cycling) +
135 |   guides(fill = F) +
136 |   scale_fill_manual(values = c("#A8DBA8", "#3B8686")) +
137 |   ylab("Proportion of cycling cells") +
138 |   xlab("Cells") +
139 |   theme(panel.background = element_rect(fill = "transparent"), axis.line.y = element_line(colour = "black"), axis.line.x = element_line(colour = "black"))#, axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank())
140 | 
141 | m <- multi_panel_figure(width = c(85,110), height = c(80, 150), label_type = "lower_alpha")
142 | m <- fill_panel(m, isl1_ko_cyclerankplot)
143 | m <- fill_panel(m, isl1_ko_cycleplot)
144 | m <- fill_panel(m, isl1_ko_cycle_genes, column = 1:2)
145 | 
146 | ggsave(
147 |   plot = m,
148 |   filename = file.path(parameters$general$path_rfigures, "Supplementary_Figure_9.pdf"),
149 |   width = figure_width(m),
150 |   height = figure_height(m),
151 |   units = "mm",
152 |   dpi = 600,
153 |   useDingbats = FALSE)
154 | 
155 | #
156 | # Find top 30 genes per KO-cluster comparison
157 | #
158 | comparisons <- c("progenitor", "endothelial", "cardiomyocyte")
159 | 
160 | isl1_ko_data_l <- lapply(comparisons, function(x) {
161 |   d <- read.table(file.path(parameters$general$path_supdata, paste0("differentialExpression/Isl1.", x, "_Isl1KO.txt")), sep="\t", header = T, stringsAsFactors = F)
162 |   d$contrast <- x
163 |   i <- which(d$fdr < parameters$diffusionmaps$de_fdr & (d$lfc.lo > 1.5 | d$lfc.hi < -1.5) & d$biotype == "protein_coding" & !startsWith(d$symbol, "mt") & !startsWith(d$symbol, "Rpl") & !startsWith(d$symbol, "venus"))
164 |   m <- order(abs(d$lfc.hi[i]), decreasing = T)
165 |   na.omit(d$geneID[i][m[1:min(n_top_genes, length(m))]])
166 | })
167 | 
168 | isl1_de_selected <- na.omit(unique(unlist(isl1_ko_data_l)))
169 | isl1_subset <- c1_subset[isl1_de_selected, c(isl1_wt_cells, isl1_ko_cells)]
170 | 
171 | #
172 | # Isl1 KO heatmap
173 | #
174 | isl1_ko_cell_order <- order(pData(isl1_subset)$cluster)
175 | 
176 | isl1_ko_expression <- t(scale(t(get_exprs(isl1_subset, "norm_exprs_sf")[, isl1_ko_cell_order])))
177 | rownames(isl1_ko_expression) <- fData(isl1_subset)$symbol
178 | 
179 | isl1_ko_dist <- as.dist(1 - cor(isl1_ko_expression, method="pearson"))
180 | 
181 | isl1_ko_const_clust <- chclust(isl1_ko_dist)
182 | isl1_ko_const_clust_dendro <- as.dendrogram(isl1_ko_const_clust)
183 | 
184 | # Color mapping
185 | e_min <- min(isl1_ko_expression)
186 | e_max <- max(isl1_ko_expression)
187 | 
188 | bounds <- max(abs(e_min), abs(e_max))
189 | 
190 | breaks <- seq(-bounds, bounds, length.out = 99)
191 | #breaks <- seq(e_min, e_max, length.out = 99)
192 | colors <- colorRampPalette(rev(brewer.pal(10, "RdBu")))(99)
193 | #colors <- inferno(99)
194 | 
195 | col_fun <- colorRamp2(breaks, colors)
196 | 
197 | #
198 | # Heatmap Annotation
199 | #
200 | isl1_ko_annotation_data <- data.frame(Genotype = paste(pData(isl1_subset)$Background))
201 | 
202 | isl1_ko_annotation <- HeatmapAnnotation(isl1_ko_annotation_data,
203 |                                         col = list(Genotype = c("isl1" = "#000000", "isl1ko" = "#FF0000")), #008B45, 00CD66
204 |                                         show_legend = F,
205 |                                         show_annotation_name = T,
206 |                                         annotation_name_gp = gpar(fontsize = 6),
207 |                                         annotation_name_offset = unit(0.8, "mm"),
208 |                                         gap = unit(0.1, "mm"),
209 |                                         annotation_height = unit.c(unit(1.5, "mm")))
210 | 
211 | isl1_ko_heatmap <- Heatmap(isl1_ko_expression,
212 |                           col = col_fun,
213 |                           top_annotation = isl1_ko_annotation,
214 |                           cluster_columns = isl1_ko_const_clust_dendro,
215 |                           cluster_rows = T,
216 |                           show_column_names = F,
217 |                           show_row_dend = F,
218 |                           show_column_dend = F,
219 |                           name = "islko",
220 |                           #column_title = "Nkx2-5+ cells",
221 |                           column_title_gp = gpar(fontface = "bold", fontsize = 8),
222 |                           #split = rep(paste("Marker genes\ncluster", c(1,2,3)), times = sapply(nkx_de_gene_order, length)),
223 |                           gap = unit(0.3, "mm"),
224 |                           row_title_rot = 90,
225 |                           row_title_gp = gpar(fontsize = 4),
226 |                           row_names_gp = gpar(fontsize = 6),
227 |                           heatmap_legend_param = list("plot" = F, "color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Scaled expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm")))
228 | 
229 | fig_3c <- grid.grabExpr(draw(isl1_ko_heatmap, heatmap_legend_side = "bottom", padding = unit(c(0, 0, 0, 0), "mm")))
230 | 
231 | # Write out for Source Data file
232 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_Figure5_d.txt"),
233 |             x = isl1_ko_expression,
234 |             col.names = T,
235 |             row.names = T,
236 |             sep = "\t",
237 |             quote = T)
238 | 
239 | #
240 | # Predict Isl1-KO onto Isl1 trajectory
241 | #
242 | library(destiny)
243 | global.settings <- list(layout.heights=list(main.key.padding = -2, bottom.padding = 0, axis.xlab.padding = -0.5), layout.widths = list(ylab.axis.padding = -0.5, right.padding = 0))
244 | 
245 | load(file.path(parameters$general$path_supdata, "Isl1-DM.Rdata"))
246 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1")
247 | isl1_dm_coords <- cbind(-pData(c1_subset[, isl1_cells])$dm1, -pData(c1_subset[, isl1_cells])$dm2)
248 | 
249 | m <- match(colnames(isl1_dm@data_env$data), rownames(c1_subset))
250 | isl1ko_cells <- which(pData(c1_subset)$Background == "isl1ko")
251 | isl1ko_data <- get_exprs(c1_subset[na.omit(m), isl1ko_cells], "norm_exprs_sf")
252 | 
253 | isl1ko_dcs <- dm_predict(isl1_dm, t(isl1ko_data))
254 | isl1ko_coords <- matrix(isl1ko_dcs@x, ncol = isl1ko_dcs@Dim[2])
255 | isl1_dm_coords_ko <- rbind(isl1_dm_coords, cbind(-isl1ko_coords[, 1], -isl1ko_coords[, 3]))
256 | 
257 | cluster <- c(pData(c1_subset[, isl1_cells])$cluster, rep("KO", nrow(isl1ko_coords)))
258 | rm_obs <- is.na(cluster)
259 | cluster <- paste0("cluster", cluster[!rm_obs])
260 | 
261 | p_isl1_ko <- colorAMap(isl1_dm_coords_ko[!rm_obs,], colour_by = factor(cluster), palette = parameters$colors$islko_cluster_palette, ylab = list(label = "Dimension 3", cex = 0.6), main = list(label = "", cex = 0.75), scales = list(cex = 0.4, tck = c(0.5, 0)), xlab = list(label = "Dimension 1", cex = 0.6), par.settings = global.settings)
262 | 
263 | #
264 | # Assemble figure
265 | #
266 | m <- multi_panel_figure(width = c(69, 69, 64, 5), height = c(65, 165), column_spacing = c(6,0,0,0), panel_label_type = "lower-alpha")
267 | m <- fill_panel(m, "ext_data/Isl1_KO.png", label = "a", row = 1, column = 1)
268 | m <- fill_panel(m, p_isl1_ko, row = 1, column = 2, label = "b")
269 | m <- fill_panel(m, isl1_ko_mmplot, label = "c", row = 1, column = 3:4)
270 | m <- fill_panel(m, fig_3c, label = "d", column = 1:3, row = 2)
271 | 
272 | ggsave(
273 |   plot = m,
274 |   filename = file.path(parameters$general$path_rfigures, "Figure_3.pdf"),
275 |   width = figure_width(m),
276 |   height = figure_height(m),
277 |   units = "mm",
278 |   dpi = 600,
279 |   useDingbats = FALSE)
280 | 
281 | 


--------------------------------------------------------------------------------
/original-analysis/figure_src/Figure_2cd.R:
--------------------------------------------------------------------------------
  1 | library(scater)
  2 | library(dplyr)
  3 | library(tidyr)
  4 | library(singlecellutils)
  5 | library(multipanelfigure)
  6 | library(gridExtra)
  7 | 
  8 | #
  9 | # Load data
 10 | #
 11 | source("src/parameters.R")
 12 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata"))
 13 | 
 14 | theme2 <- theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 15 |                 panel.border = element_blank(), panel.background = element_blank(), axis.line.x = element_line(size=.3), axis.line.y = element_line(size=.3), legend.text = element_text(size=6),
 16 |                 axis.title.x = element_blank(), axis.title.y = element_text(color="black", size=10), plot.title = element_text(face="bold", color="black", size=6), legend.key.size =  unit(2, "mm"), legend.margin=unit(-25, "mm"),
 17 |                 strip.background = element_blank(), strip.text = element_blank(), axis.text = element_text(color="black", size=8), axis.text.x = element_blank())
 18 | 
 19 | theme_violins <- theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 20 |                      panel.border = element_blank(), panel.background = element_blank(), axis.line.y = element_line(size=.3), axis.line.x = element_line(size=.3), legend.text = element_text(size=6),
 21 |                      axis.title.y = element_text(color="black", size=10), axis.title.x = element_blank(), plot.title = element_text(face="bold", color="black", size=6), legend.key.size =  unit(2, "mm"), legend.margin=unit(-25, "mm"),
 22 |                      strip.background = element_blank(), strip.text = element_blank(), axis.text = element_text(color="black", size=8), axis.text.x = element_blank())
 23 | 
 24 | theme_hists <- theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 25 |                      panel.border = element_blank(), panel.background = element_blank(), axis.line.x = element_line(size=.3), axis.line.y = element_blank(), legend.text = element_text(size=6),
 26 |                      axis.title.x = element_text(color="black", size=10), axis.title.y = element_blank(), plot.title = element_text(face="bold", color="black", size=6), legend.key.size =  unit(2, "mm"), legend.margin=unit(-25, "mm"),
 27 |                      strip.background = element_blank(), strip.text = element_blank(), axis.text = element_text(color="black", size=8), axis.ticks.y = element_blank(), axis.text.y = element_blank() )
 28 | 
 29 | theme_hists_2 <- theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 30 |                      panel.border = element_blank(), panel.background = element_blank(), axis.line.x = element_line(size=.3), axis.line.y = element_blank(), legend.text = element_text(size=6),
 31 |                      axis.title.x = element_text(color="black", size=10), axis.title.y = element_blank(), plot.title = element_text(face="bold", color="black", size=6), legend.key.size =  unit(2, "mm"), legend.margin=unit(-25, "mm"),
 32 |                      strip.background = element_blank(), strip.text = element_blank(), axis.text = element_blank(), axis.ticks.y = element_blank(), axis.text.y = element_blank() )
 33 | 
 34 | #
 35 | # Nkx2-5 IC analysis
 36 | #
 37 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1")
 38 | nkx_de_genes <- rownames(fData(c1_subset))[which(fData(c1_subset)$nkx_de & fData(c1_subset)$nkx_marker)]
 39 | nkx_cluster <- factor(pData(c1_subset[, nkx_cells])$cluster)
 40 | 
 41 | nkx_data <- get_exprs(c1_subset[nkx_de_genes, nkx_cells], "norm_exprs_sf")
 42 | 
 43 | nkx_ic_df <- as.data.frame(boot.ic(nkx_data, groups = nkx_cluster, R = 1000, n = 30, p.val = 0.05))
 44 | colnames(nkx_ic_df) <- paste("cluster", levels(nkx_cluster))
 45 | 
 46 | # Create plots
 47 | nkx_ic <- gather(nkx_ic_df, key = "cluster", value = "ic")
 48 | nkx_ic$cluster <- factor(nkx_ic$cluster)
 49 | 
 50 | nkx_ic_plot <- ggplot(nkx_ic, aes(x = cluster, y = ic, fill = cluster)) +
 51 |   geom_boxplot() +
 52 |   scale_fill_manual(values = parameters$colors$nkx_cluster_palette) +
 53 |   guides(fill = F) +
 54 |   ylab("Critical transition index") +
 55 |   xlab("") +
 56 |   theme2 +
 57 |   theme(plot.margin = unit(c(0, -5, 2.5, 0), "mm"))
 58 | 
 59 | #
 60 | # Nkx pairwise statistical tests
 61 | #
 62 | nkx_ptest_list <- apply(combn(levels(nkx_cluster), 2), 2, function(x) {
 63 |   x <- as.numeric(x)
 64 |   ks <- ks.test(nkx_ic_df[, x[1]], nkx_ic_df[, x[2]])
 65 |   wr <- wilcox.test(nkx_ic_df[, x[1]], nkx_ic_df[, x[2]])
 66 | 
 67 |   data.frame(combination = paste(x[1], "vs.", x[2]),
 68 |        ks_p_value = ks$p.value,
 69 |        ks_statistic = ks$statistic,
 70 |        wilcox_p_value = wr$p.value,
 71 |        wilcox_statistic = wr$statistic)
 72 | })
 73 | nkx_ptest <- data.frame(do.call("rbind", nkx_ptest_list))
 74 | 
 75 | #
 76 | # Calculate transcriptome variance
 77 | #
 78 | nkx_colors <- parameters$colors$nkx_cluster_palette
 79 | names(nkx_colors) <- levels(nkx_cluster)
 80 | 
 81 | nkx_data <- get_exprs(c1_subset[nkx_de_genes, nkx_cells], "norm_exprs_sf")
 82 | 
 83 | nkx_cluster_noise_list <- lapply(levels(nkx_cluster), function(c) {
 84 |   cells <- which(nkx_cluster == c)
 85 |   distances <- apply(combn(cells, 2), 2, function(d) {
 86 |     p <- cor(nkx_data[, d[1]], nkx_data[, d[2]], method = "spearman")
 87 |     sqrt((1-p)/2)
 88 |   })
 89 | })
 90 | 
 91 | nkx_cluster_noise <- data.frame(cluster = rep(levels(nkx_cluster), times = sapply(nkx_cluster_noise_list, length)), distance = unlist(nkx_cluster_noise_list))
 92 | p_nkx_cluster_noise <- ggplot(data = nkx_cluster_noise, aes(x = cluster, y = distance, fill = cluster)) +
 93 |   geom_violin(trim = F) +
 94 |   geom_boxplot(width = 0.1, fill = "white") +
 95 |   scale_fill_manual(values = nkx_colors) +
 96 |   scale_y_continuous(expand = c(0.05, 0)) +
 97 |   guides(fill = FALSE, colour = FALSE) +
 98 |   xlab("") + ylab("Cell to cell distance") + theme_violins +
 99 |   theme(plot.margin = unit(c(0, -5, 2.5, 0), "mm"))
100 | 
101 | #
102 | # Nkx2-5 CxC and GxG correlation analysis
103 | #
104 | nkx_colors <- parameters$colors$nkx_cluster_palette
105 | names(nkx_colors) <- levels(nkx_cluster)
106 | 
107 | nkx_correlation_plot_list <- lapply(levels(nkx_cluster), function(c) {
108 |   cells <- which(nkx_cluster == c)
109 |   ic <- computeIC(nkx_data[, cells], p.val = 1)
110 | 
111 |   p1 <- ggplot(data.frame(t=ic$between), aes(x=t)) +
112 |     geom_histogram(binwidth = 0.01, fill = nkx_colors[c]) +
113 |     scale_x_continuous(expand = c(0,0), limits = c(0,1)) +
114 |     scale_y_continuous(expand = c(0,0.3))
115 | 
116 |   p2 <- ggplot(data.frame(t=ic$within), aes(x=t)) +
117 |     geom_histogram(binwidth = 0.01, fill = nkx_colors[c]) +
118 |     scale_x_continuous(expand = c(0,0), limits = c(0,1)) +
119 |     scale_y_continuous(expand = c(0,0.3))
120 | 
121 |   if( c == levels(nkx_cluster)[length(levels(nkx_cluster))] ) {
122 |     p1 <- p1 + xlab("Pearson's correlation") + theme_hists
123 |     p2 <- p2 + xlab("Pearson's correlation") + theme_hists
124 |   } else {
125 |     p1 <- p1 + xlab("") + theme_hists_2
126 |     p2 <- p2 + xlab("") + theme_hists_2
127 |   }
128 |   p <- grid.arrange(p2, p1, ncol = 2)
129 | })
130 | 
131 | #
132 | # Isl1 IC analysis
133 | #
134 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1")
135 | isl1_de_genes <- rownames(fData(c1_subset))[which(fData(c1_subset)$isl1_de & fData(c1_subset)$isl1_marker)]
136 | isl1_cluster <- factor(pData(c1_subset[, isl1_cells])$cluster)
137 | 
138 | isl1_ko_cells <- which((pData(c1_subset)$Background == "isl1" | pData(c1_subset)$Background == "isl1ko") & pData(c1_subset)$Platform == "C1")
139 | isl1_ko_cluster <- pData(c1_subset[, isl1_ko_cells])$cluster
140 | isl1_ko_cluster[which((pData(c1_subset[, isl1_ko_cells])$Background == "isl1ko") & pData(c1_subset[, isl1_ko_cells])$Platform == "C1")] <- "KO"
141 | isl1_ko_cluster <- factor(isl1_ko_cluster)
142 | 
143 | isl1_data <- get_exprs(c1_subset[isl1_de_genes, isl1_cells], "norm_exprs_sf")
144 | isl1_ko_data <- get_exprs(c1_subset[isl1_de_genes, isl1_ko_cells], "norm_exprs_sf")
145 | 
146 | isl1_ic_df <- as.data.frame(boot.ic(isl1_data, groups = isl1_cluster, R = 1000, n = 20, p.val = 0.05))
147 | colnames(isl1_ic_df) <- paste("cluster", levels(isl1_cluster))
148 | 
149 | isl1_ko_ic_df <- as.data.frame(boot.ic(isl1_ko_data, groups = isl1_ko_cluster, R = 1000, n = 20, p.val = 0.05))
150 | colnames(isl1_ko_ic_df) <- paste("cluster", levels(isl1_ko_cluster))
151 | 
152 | # Create plots
153 | isl1_ic <- gather(isl1_ic_df, key = "cluster", value = "ic")
154 | isl1_ko_ic <- gather(isl1_ko_ic_df, key = "cluster", value = "ic")
155 | 
156 | isl1_ic_plot <- ggplot(isl1_ic, aes(x = cluster, y = ic, fill = cluster)) +
157 |   geom_boxplot() +
158 |   scale_fill_manual(values = parameters$colors$isl_cluster_palette) +
159 |   guides(fill = F) +
160 |   ylab("") +
161 |   xlab("") +
162 |   theme2
163 | 
164 | isl1_ko_ic_plot <- ggplot(isl1_ko_ic, aes(x = cluster, y = ic, fill = cluster)) +
165 |   geom_boxplot() +
166 |   scale_fill_manual(values = parameters$colors$isl_cluster_palette) +
167 |   guides(fill = F) +
168 |   ylab("") +
169 |   xlab("") +
170 |   theme2
171 | 
172 | #
173 | # Isl1 pairwise statistical tests
174 | #
175 | isl1_ptest_list <- apply(combn(levels(isl1_cluster), 2), 2, function(x) {
176 |   x <- as.numeric(x)
177 |   ks <- ks.test(isl1_ic_df[, x[1]], isl1_ic_df[, x[2]])
178 |   wr <- wilcox.test(isl1_ic_df[, x[1]], isl1_ic_df[, x[2]])
179 | 
180 |   data.frame(combination = paste(x[1], "vs.", x[2]),
181 |        ks.p_value = ks$p.value,
182 |        ks.statistic = ks$statistic,
183 |        wilcox.p_value = wr$p.value,
184 |        wilcox.statistic = wr$statistic)
185 | })
186 | isl1_ptest <- do.call("rbind", isl1_ptest_list)
187 | 
188 | #
189 | # Calculate transcriptome variance
190 | #
191 | isl1_colors <- parameters$colors$isl_cluster_palette
192 | names(isl1_colors) <- levels(isl1_cluster)
193 | 
194 | isl1_cluster_noise_list <- lapply(levels(isl1_cluster), function(c) {
195 |   cells <- which(isl1_cluster == c)
196 |   distances <- apply(combn(cells, 2), 2, function(d) {
197 |     p <- cor(isl1_data[, d[1]], isl1_data[, d[2]], method = "spearman")
198 |     sqrt((1-p)/2)
199 |   })
200 | })
201 | 
202 | isl1_cluster_noise <- data.frame(cluster = rep(levels(isl1_cluster), times = sapply(isl1_cluster_noise_list, length)), distance = unlist(isl1_cluster_noise_list))
203 | p_isl1_cluster_noise <- ggplot(data = isl1_cluster_noise, aes(x = cluster, y = distance, fill = cluster)) +
204 |   geom_violin(trim = F) +
205 |   geom_boxplot(width = 0.1, fill = "white") +
206 |   scale_fill_manual(values = isl1_colors) +
207 |   scale_y_continuous(expand = c(0.05, 0)) +
208 |   guides(fill = FALSE, colour = FALSE) +
209 |   xlab("") + ylab("") + theme_violins
210 | 
211 | #
212 | # Isl1 CxC and GxG correlation analysis
213 | #
214 | isl1_ko_cells <- which((pData(c1_subset)$Background == "isl1" | pData(c1_subset)$Background == "isl1ko") & pData(c1_subset)$Platform == "C1")
215 | isl1_ko_cluster <- pData(c1_subset[, isl1_ko_cells])$cluster
216 | isl1_ko_cluster[which((pData(c1_subset)$Background == "isl1ko") & pData(c1_subset)$Platform == "C1")] <- "KO"
217 | isl1_ko_cluster <- factor(isl1_ko_cluster)
218 | 
219 | isl1_colors <- parameters$colors$isl_cluster_palette
220 | names(isl1_colors) <- levels(isl1_ko_cluster)
221 | 
222 | isl1_ko_data <- get_exprs(c1_subset[isl1_de_genes, isl1_ko_cells], "norm_exprs_sf")
223 | 
224 | isl1_correlation_plot_list <- lapply(levels(isl1_ko_cluster), function(c) {
225 |   cells <- which(isl1_ko_cluster == c)
226 |   ic <- computeIC(isl1_ko_data[, cells], p.val = 1)
227 | 
228 |   p1 <- ggplot(data.frame(t=ic$between), aes(x=t)) +
229 |     geom_histogram(binwidth = 0.01, fill = isl1_colors[c]) +
230 |     scale_x_continuous(expand = c(0,0), limits = c(0,1)) +
231 |     scale_y_continuous(expand = c(0,0.3))
232 |   p2 <- ggplot(data.frame(t=ic$within), aes(x=t)) +
233 |     geom_histogram(binwidth = 0.01, fill = isl1_colors[c]) +
234 |     scale_x_continuous(expand = c(0,0), limits = c(0,1)) +
235 |     scale_y_continuous(expand = c(0,0.3))
236 | 
237 |   if( c == levels(isl1_ko_cluster)[length(levels(isl1_ko_cluster))] ) {
238 |     p1 <- p1 + xlab("Pearson's correlation") + theme_hists
239 |     p2 <- p2 + xlab("Pearson's correlation") + theme_hists
240 |   } else {
241 |     p1 <- p1 + xlab("") + theme_hists_2
242 |     p2 <- p2 + xlab("") + theme_hists_2
243 |   }
244 |   p <- grid.arrange(p2, p1, ncol = 2)
245 | })
246 | 
247 | # Fig2 c,d
248 | m <- multi_panel_figure(width = 205, height = 230, columns = 5, rows = 4, column_spacing = c(unit(5, "mm"), unit(5, "mm"), unit(5, "mm"), unit(0, "mm"), unit(0, "mm")))
249 | m <- fill_panel(m, nkx_ic_plot, label = "C", column = 3, row = 1)
250 | m <- fill_panel(m, isl1_ic_plot, label = "", column = 4:5, row = 1)
251 | m <- fill_panel(m, p_nkx_cluster_noise, label = "D", column = 3, row = 2)
252 | m <- fill_panel(m, p_isl1_cluster_noise, label = "", column = 4:5, row = 2)
253 | 
254 | ggsave(
255 |   plot = m,
256 |   filename = file.path(parameters$general$path_rfigures, "Figure_2_cd.pdf"),
257 |   width = 205,
258 |   height = 230,
259 |   units = "mm",
260 |   dpi = 600)
261 | 
262 | #
263 | # Save raw data for Source File
264 | #
265 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_Figure2_c_Nkx.txt"),
266 |             x = nkx_ic,
267 |             row.names = F,
268 |             col.names = T,
269 |             quote = F,
270 |             sep = "\t")
271 | 
272 | # Supplementary Figure 2c/d
273 | nkx_correlation_plot <- grid.arrange(grobs = nkx_correlation_plot_list, ncol = 1)
274 | isl1_correlation_plot <- grid.arrange(grobs = isl1_correlation_plot_list, ncol = 1)
275 | 
276 | m <- multi_panel_figure(width = 205, height = 230, columns = 2, rows = 7)
277 | m <- fill_panel(m, nkx_correlation_plot, label = "C", column = 1, row = 5:7)
278 | m <- fill_panel(m, isl1_correlation_plot, label = "D", column = 2, row = 3:7)
279 | 
280 | ggsave(
281 |   plot = m,
282 |   filename = file.path(parameters$general$path_rfigures, "Supplementary_Figure_2_IC.pdf"),
283 |   width = 205,
284 |   height = 230,
285 |   units = "mm",
286 |   dpi = 600)
287 | 
288 | # Supplementary Table with p-values
289 | tt1 <- ttheme_minimal(core=list(fg_params=list(hjust=1, x=0.9)),
290 |                       rowhead=list(fg_params=list(hjust=1, x=0.9)),
291 |                       colhead = list(fg_params=list(hjust=1, x=0.9)))
292 | 
293 | s1_table_nkx <- tableGrob(nkx_ptest, theme=tt1, rows = NULL)
294 | s1_table_isl1 <- tableGrob(isl1_ptest, theme=tt1, rows = NULL)
295 | 
296 | m <- multi_panel_figure(width = 205, height = 230, columns = 1, rows = 2)
297 | m <- fill_panel(m, s1_table_nkx)
298 | m <- fill_panel(m, s1_table_isl1)
299 | 
300 | ggsave(
301 |   plot = m,
302 |   filename = file.path(parameters$general$path_rfigures, "Supplementary_Table_IC-pvalues.pdf"),
303 |   width = 205,
304 |   height = 230,
305 |   units = "mm",
306 |   dpi = 600)
307 | 


--------------------------------------------------------------------------------
/original-analysis/figure_src/Figure_1e.R:
--------------------------------------------------------------------------------
  1 | library(RColorBrewer)
  2 | library(ComplexHeatmap)
  3 | library(circlize)
  4 | library(rioja)
  5 | library(gridExtra)
  6 | library(cowplot)
  7 | library(multipanelfigure)
  8 | 
  9 | #
 10 | # Load data
 11 | #
 12 | source("src/parameters.R")
 13 | do_zscore <- T
 14 | n_top_genes <- 30
 15 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata"))
 16 | 
 17 | #
 18 | # Nkx2-5 differential expression heatmap
 19 | #
 20 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1")
 21 | nkx_de_genes <- rownames(fData(c1_subset))[which(fData(c1_subset)$nkx_de & fData(c1_subset)$nkx_marker)]
 22 | 
 23 | # Find top 30 genes per cluster
 24 | load(file.path(parameters$general$path_supdata, "Nkx2-5-diffExprs.Rdata"))
 25 | load(file.path(parameters$general$path_supdata, "Nkx2-5-markers.Rdata"))
 26 | 
 27 | nkx_de_gene_order <- lapply(names(nkx_diff_data), function(n) {
 28 |   data <- nkx_diff_data[[n]]
 29 |   j <- which(data$lfc.hi < -2)
 30 |   data <- data[j, ]
 31 |   m <- na.omit(match(nkx_de_genes, data$geneID))
 32 |   i <- order(abs(data$lfc.hi[m]), decreasing = T)
 33 |   data$geneID[m][i[1:min(n_top_genes, length(m))]]
 34 | })
 35 | 
 36 | nkx_de_selected <- unique(unlist(nkx_de_gene_order))
 37 | nkx_subset <- c1_subset[nkx_de_selected, nkx_cells]
 38 | 
 39 | #
 40 | # Constrained hierarchical clustering
 41 | #
 42 | nkx_subset <- nkx_subset[, !is.na(pData(nkx_subset)$cluster)]
 43 | 
 44 | # Get clustering to order cells
 45 | nkx_cl_order <- order(pData(nkx_subset)$cluster)
 46 | nkx_expr_data <- get_exprs(nkx_subset, "norm_exprs_sf")[, nkx_cl_order]
 47 | nkx_dist <- as.dist(1 - cor(nkx_expr_data, method="pearson"))
 48 | 
 49 | nkx_const_clust <- chclust(nkx_dist)
 50 | nkx_const_clust_dendro <- as.dendrogram(nkx_const_clust)
 51 | 
 52 | rownames(nkx_expr_data) <- fData(nkx_subset)$symbol
 53 | 
 54 | # Zscore
 55 | if (do_zscore) {
 56 |   nkx_expr_data <- t(scale(t(nkx_expr_data), scale = F))
 57 | }
 58 | 
 59 | #
 60 | # Isl1 differential expression heatmap
 61 | #
 62 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1")
 63 | isl1_de_genes <- rownames(fData(c1_subset))[which(fData(c1_subset)$isl1_de & fData(c1_subset)$isl1_marker)]
 64 | 
 65 | # Find top 30 genes per cluster
 66 | load(file.path(parameters$general$path_supdata, "Isl1-diffExprs.Rdata"))
 67 | load(file.path(parameters$general$path_supdata, "Isl1-markers.Rdata"))
 68 | 
 69 | isl1_de_gene_order <- lapply(names(isl1_diff_data), function(n) {
 70 |   data <- isl1_diff_data[[n]]
 71 |   j <- which(data$lfc.hi < -2)
 72 |   data <- data[j, ]
 73 |   m <- na.omit(match(isl1_de_genes, data$geneID))
 74 |   i <- order(abs(data$lfc.hi[m]), decreasing = T)
 75 |   na.omit(data$geneID[m][i[1:min(n_top_genes, length(m))]])
 76 | })
 77 | 
 78 | isl1_de_selected <- na.omit(unique(unlist(isl1_de_gene_order)))
 79 | isl1_subset <- c1_subset[isl1_de_selected, isl1_cells]
 80 | 
 81 | #
 82 | # Constrained hierarchical clustering
 83 | #
 84 | isl1_subset <- isl1_subset[, !is.na(pData(isl1_subset)$cluster)]
 85 | 
 86 | # Get clustering to order cells
 87 | isl1_cl_order <- order(pData(isl1_subset)$cluster)
 88 | isl1_expr_data <- get_exprs(isl1_subset, "norm_exprs_sf")[, isl1_cl_order]
 89 | isl1_dist <- as.dist(1 - cor(isl1_expr_data, method="pearson"))
 90 | 
 91 | isl1_const_clust <- chclust(isl1_dist)
 92 | isl1_const_clust_dendro <- as.dendrogram(isl1_const_clust)
 93 | 
 94 | rownames(isl1_expr_data) <- fData(isl1_subset)$symbol
 95 | 
 96 | # Zscore
 97 | if (do_zscore) {
 98 |   isl1_expr_data <- t(scale(t(isl1_expr_data), scale = F))
 99 | }
100 | 
101 | #
102 | # Color mapping function to bring both heatmaps to same scale
103 | #
104 | e_min <- min(nkx_expr_data, isl1_expr_data)
105 | e_max <- max(nkx_expr_data, isl1_expr_data)
106 | 
107 | bounds <- max(abs(e_min), abs(e_max))
108 | 
109 | breaks <- seq(-bounds, bounds, length.out = 99)
110 | colors <- colorRampPalette(rev(brewer.pal(10, "RdBu")))(99)
111 | 
112 | col_fun <- colorRamp2(breaks, colors)
113 | 
114 | #
115 | # Nkx2-5 Heatmap annotation and Heatmap
116 | #
117 | nkx_cell_annotation_data <- data.frame(Timepoint = pData(nkx_subset)$Timepoint[nkx_cl_order], Cluster = pData(nkx_subset)$cluster[nkx_cl_order])
118 | nkx_cell_annotation <- HeatmapAnnotation(nkx_cell_annotation_data, col = list(Timepoint = c("e7.5" = "#FF003C", "e8.5" = "#248F8D", "e9.5" = "#987F69"),
119 |                                                                               Cluster = c("3" = "#00008B", "1" = "#8B0000", "2" = "#CDAD00")),
120 |                                          show_legend = F,
121 |                                          show_annotation_name = T,
122 |                                          annotation_name_gp = gpar(fontsize = 4),
123 |                                          annotation_name_offset = unit(0.8, "mm"),
124 |                                          gap = unit(0.1, "mm"),
125 |                                          annotation_height = unit.c(unit(1.5, "mm"), unit(1.5, "mm")))
126 | 
127 | nkx_de_heatmap <- Heatmap(nkx_expr_data,
128 |                           col = col_fun,
129 |                           top_annotation = nkx_cell_annotation,
130 |                           cluster_columns = nkx_const_clust_dendro,
131 |                           cluster_rows = T,
132 |                           show_column_names = F,
133 |                           show_row_dend = F,
134 |                           show_column_dend = F,
135 |                           name = "nkx",
136 |                           #column_title = "Nkx2-5+ cells",
137 |                           column_title_gp = gpar(fontface = "bold", fontsize = 8),
138 |                           split = rep(paste("Marker genes\ncluster", c(1,2,3)), times = sapply(nkx_de_gene_order, length)),
139 |                           gap = unit(0.3, "mm"),
140 |                           row_title_rot = 90,
141 |                           row_title_gp = gpar(fontsize = 4),
142 |                           row_names_gp = gpar(fontsize = 3),
143 |                           heatmap_legend_param = list("plot" = F, "color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Scaled expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm")))
144 | 
145 | #
146 | # Isl1 Heatmap annotation and Heatmap
147 | #
148 | isl1_cell_annotation_data <- data.frame(Timepoint = pData(isl1_subset)$Timepoint[isl1_cl_order], Cluster = pData(isl1_subset)$cluster[isl1_cl_order])
149 | isl1_cell_annotation <- HeatmapAnnotation(isl1_cell_annotation_data, col = list(Timepoint = c("e7.5" = "#FF003C", "e8.5" = "#248F8D", "e9.5" = "#987F69"),
150 |                                                                                 Cluster = c("1" = "#1B9E77", "2" = "#D95F02", "3" = "#7570B3", "4" = "#E7298A", "5" = "#66A61E")),
151 |                                           show_legend = F,
152 |                                           show_annotation_name = T,
153 |                                           annotation_name_gp = gpar(fontsize = 4),
154 |                                           annotation_name_offset = unit(0.8, "mm"),
155 |                                           gap = unit(0.1, "mm"),
156 |                                           annotation_height = unit.c(unit(1.5, "mm"), unit(1.5, "mm")))
157 | 
158 | isl1_de_heatmap <- Heatmap(isl1_expr_data,
159 |                            col = col_fun,
160 |                            top_annotation = isl1_cell_annotation,
161 |                            cluster_columns = isl1_const_clust_dendro,
162 |                            cluster_rows = T,
163 |                            show_column_names = F,
164 |                            show_row_dend = F,
165 |                            show_column_dend = F,
166 |                            name = "isl1",
167 |                            #column_title = "Isl1+ cells",
168 |                            column_title_gp = gpar(fontface = "bold", fontsize = 8),
169 |                            split = rep(paste("Marker genes\ncluster", c(1,2,3,4,5)), times = sapply(isl1_de_gene_order, length)),
170 |                            gap = unit(0.3, "mm"),
171 |                            row_title_rot = 90,
172 |                            row_title_gp = gpar(fontsize = 4),
173 |                            row_names_gp = gpar(fontsize = 3),
174 |                            heatmap_legend_param = list("plot" = T ,"color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Scaled expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm")))
175 | 
176 | #
177 | # Build heatmap for Figure 1e
178 | #
179 | fig_1e <- grid.arrange(grid.grabExpr(draw(nkx_de_heatmap, heatmap_legend_side = "bottom", padding = unit(c(0, 0, 0, 0), "mm"))), grid.grabExpr(draw(isl1_de_heatmap, heatmap_legend_side = "bottom", padding = unit(c(0, 0, -5, 0), "mm"))), nrow = 2)
180 | 
181 | m <- multi_panel_figure(width = 205, height = 230, columns = 4, rows = 4)
182 | m <- fill_panel(m, fig_1e, label = "E", column = c(1,2), row = c(2,3,4))
183 | 
184 | ggsave(
185 |   plot = m,
186 |   filename = file.path(parameters$general$path_rfigures, "Figure_1_e2.pdf"),
187 |   width = 205,
188 |   height = 230,
189 |   units = "mm",
190 |   dpi = 600)
191 | 
192 | #
193 | # Heatmap for all Nkx2-5 DE genes
194 | #
195 | nkx_de_gene_order <- lapply(names(nkx_diff_data), function(n) {
196 |   data <- nkx_diff_data[[n]]
197 |   j <- which(data$lfc.hi < -1)
198 |   data <- data[j, ]
199 |   m <- na.omit(match(nkx_de_genes, data$geneID))
200 |   i <- order(abs(data$lfc.hi[m]), decreasing = T)
201 |   data$geneID[m]
202 | })
203 | 
204 | nkx_de_selected <- unique(unlist(nkx_de_gene_order))
205 | nkx_subset <- c1_subset[nkx_de_selected, nkx_cells]
206 | 
207 | #
208 | # Constrained hierarchical clustering
209 | #
210 | nkx_subset <- nkx_subset[, !is.na(pData(nkx_subset)$cluster)]
211 | 
212 | # Get clustering to order cells
213 | nkx_cl_order <- order(pData(nkx_subset)$cluster)
214 | nkx_expr_data <- get_exprs(nkx_subset, "norm_exprs_sf")[, nkx_cl_order]
215 | nkx_dist <- as.dist(1 - cor(nkx_expr_data, method="pearson"))
216 | 
217 | nkx_const_clust <- chclust(nkx_dist)
218 | nkx_const_clust_dendro <- as.dendrogram(nkx_const_clust)
219 | 
220 | rownames(nkx_expr_data) <- fData(nkx_subset)$symbol
221 | 
222 | # Zscore
223 | if (do_zscore) {
224 |   nkx_expr_data <- t(scale(t(nkx_expr_data), scale = F))
225 | }
226 | 
227 | # Heatmap
228 | nkx_all_de_heatmap <- Heatmap(nkx_expr_data,
229 |                           col = colorRampPalette(rev(brewer.pal(10, "RdBu")))(99),
230 |                           top_annotation = nkx_cell_annotation,
231 |                           cluster_columns = nkx_const_clust_dendro,
232 |                           cluster_rows = T,
233 |                           show_column_names = F,
234 |                           show_row_dend = F,
235 |                           name = "nkx",
236 |                           column_title = "Nkx2-5+ cells",
237 |                           column_title_gp = gpar(fontface = "bold", fontsize = 8),
238 |                           split = rep(paste("Marker genes\ncluster", c(1,2,3)), times = sapply(nkx_de_gene_order, length)),
239 |                           gap = unit(0.5, "mm"),
240 |                           row_title_rot = 90,
241 |                           row_title_gp = gpar(fontsize = 4),
242 |                           row_names_gp = gpar(fontsize = 1),
243 |                           heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Scaled expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm")))
244 | 
245 | #
246 | # Heatmap for all Isl1 DE genes
247 | #
248 | isl1_de_gene_order <- lapply(names(isl1_diff_data), function(n) {
249 |   data <- isl1_diff_data[[n]]
250 |   j <- which(data$lfc.hi < -2)
251 |   data <- data[j, ]
252 |   m <- na.omit(match(isl1_de_genes, data$geneID))
253 |   i <- order(abs(data$lfc.hi[m]), decreasing = T)
254 |   na.omit(data$geneID[m])
255 | })
256 | # Remove Etv2 from cluster 5
257 | isl1_de_gene_order[[5]] <- isl1_de_gene_order[[5]][! isl1_de_gene_order[[5]] %in% c("ENSMUSG00000006311")]
258 | # Rescue two missing genes
259 | isl1_de_gene_order[[2]] <- c(isl1_de_gene_order[[2]], "ENSMUSG00000029432")
260 | isl1_de_gene_order[[5]] <- c(isl1_de_gene_order[[5]], "ENSMUSG00000050953")
261 | isl1_de_selected <- na.omit(unique(unlist(isl1_de_gene_order)))
262 | isl1_subset <- c1_subset[isl1_de_selected, isl1_cells]
263 | 
264 | #
265 | # Constrained hierarchical clustering
266 | #
267 | isl1_subset <- isl1_subset[, !is.na(pData(isl1_subset)$cluster)]
268 | 
269 | # Get clustering to order cells
270 | isl1_cl_order <- order(pData(isl1_subset)$cluster)
271 | isl1_expr_data <- get_exprs(isl1_subset, "norm_exprs_sf")[, isl1_cl_order]
272 | isl1_dist <- as.dist(1 - cor(isl1_expr_data, method="pearson"))
273 | 
274 | isl1_const_clust <- chclust(isl1_dist)
275 | isl1_const_clust_dendro <- as.dendrogram(isl1_const_clust)
276 | 
277 | rownames(isl1_expr_data) <- fData(isl1_subset)$symbol
278 | 
279 | # Zscore
280 | if (do_zscore) {
281 |   isl1_expr_data <- t(scale(t(isl1_expr_data), scale = F))
282 | }
283 | 
284 | # Heatmap
285 | isl1_all_de_heatmap <- Heatmap(isl1_expr_data,
286 |                            col = rev(brewer.pal(10, "RdBu")),
287 |                            top_annotation = isl1_cell_annotation,
288 |                            cluster_columns = isl1_const_clust_dendro,
289 |                            cluster_rows = T,
290 |                            show_column_names = F,
291 |                            show_row_dend = F,
292 |                            name = "isl1",
293 |                            column_title = "Isl1+ cells",
294 |                            column_title_gp = gpar(fontface = "bold", fontsize = 8),
295 |                            split = rep(paste("Marker genes\ncluster", c(1,2,3,4,5)), times = sapply(isl1_de_gene_order, length)),
296 |                            gap = unit(0.5, "mm"),
297 |                            row_title_rot = 90,
298 |                            row_title_gp = gpar(fontsize = 4),
299 |                            row_names_gp = gpar(fontsize = 1),
300 |                            heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Scaled expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm")))
301 | 
302 | #
303 | # Supplementary Figure X
304 | #
305 | sup_figX_a <- grid.grabExpr(draw(nkx_all_de_heatmap, heatmap_legend_side = "bottom"))
306 | sup_figX_b <- grid.grabExpr(draw(isl1_all_de_heatmap, heatmap_legend_side = "bottom"))
307 | 
308 | m <- multi_panel_figure(width = 205, height = 230, columns = 1, rows = 2)
309 | m <- fill_panel(m, sup_figX_a, row = 1)
310 | m <- fill_panel(m, sup_figX_b, row = 2)
311 | 
312 | ggsave(
313 |   plot = m,
314 |   filename = file.path(parameters$general$path_rfigures, "Supplementary_Figure_DE_genes.pdf"),
315 |   width = 205,
316 |   height = 230,
317 |   units = "mm",
318 |   dpi = 600)
319 | 


--------------------------------------------------------------------------------
/original-analysis/figure_src/Figure_3cd.R:
--------------------------------------------------------------------------------
  1 | library(scater)
  2 | library(tidyr)
  3 | library(ggplot2)
  4 | library(singlecellutils)
  5 | 
  6 | #
  7 | # Load data
  8 | #
  9 | source("src/parameters.R")
 10 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata"))
 11 | load(file.path(parameters$general$path_supdata, "Isl1-DM.Rdata"))
 12 | 
 13 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1")
 14 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1")
 15 | 
 16 | isl1_dm_coords <- cbind(-pData(c1_subset[, isl1_cells])$dm1, -pData(c1_subset[, isl1_cells])$dm2, -pData(c1_subset[, isl1_cells])$dm3)
 17 | 
 18 | #
 19 | # Graphical settings
 20 | #
 21 | global.settings <- list(layout.heights=list(main.key.padding = -2, bottom.padding = 0, axis.xlab.padding = -0.5), layout.widths = list(ylab.axis.padding = -0.5))
 22 | 
 23 | #
 24 | # Color scalings
 25 | #
 26 | scale_channel <- function(x, min, max, a, b) {
 27 |   x[x > max] <-  max
 28 |   return(ceiling(((b - a) * (x - min) / (max - min)) + a))
 29 | }
 30 | 
 31 | min_isl1 <- min(get_exprs(c1_subset["ENSMUSG00000042258", c(isl1_cells, nkx_cells)], "norm_exprs_sf")[1,])
 32 | max_isl1 <- max(get_exprs(c1_subset["ENSMUSG00000042258", c(isl1_cells, nkx_cells)], "norm_exprs_sf")[1,])
 33 | min_nkx <- min(get_exprs(c1_subset["ENSMUSG00000015579", c(isl1_cells, nkx_cells)], "norm_exprs_sf")[1,])
 34 | max_nkx <- max(get_exprs(c1_subset["ENSMUSG00000015579", c(isl1_cells, nkx_cells)], "norm_exprs_sf")[1,])
 35 | 
 36 | isl1_color <- scale_channel(get_exprs(c1_subset["ENSMUSG00000042258", c(isl1_cells, nkx_cells)], "norm_exprs_sf")[1, ], min_isl1, 7, 0, 255)
 37 | nkx_color <- scale_channel(get_exprs(c1_subset["ENSMUSG00000015579", c(isl1_cells, nkx_cells)], "norm_exprs_sf")[1, ], min_nkx, 7, 0, 255)
 38 | 
 39 | #
 40 | # Mix the colors of Isl1 and Nkx2-5
 41 | #
 42 | mix_color <- sapply(1:length(isl1_color), function(i) {
 43 |   red <- nkx_color[i]
 44 |   green <- isl1_color[i]
 45 |   blue <- 0
 46 |   rgb(red, green, blue, maxColorValue = 255)
 47 | })
 48 | 
 49 | #
 50 | # Plot clusterings from Figure 1d with coexpressing cells highlighted
 51 | #
 52 | isl1_isl1 <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", isl1_cells], "norm_exprs_sf") > 0.2, "Isl1+", "Isl1-")
 53 | isl1_nkx <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", isl1_cells], "norm_exprs_sf") > 0.2, "Nkx2-5+", "Nkx2-5-")
 54 | nkx_isl1 <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", nkx_cells], "norm_exprs_sf") > 0.2, "Isl1+", "Isl1-")
 55 | nkx_nkx <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", nkx_cells], "norm_exprs_sf") > 0.2, "Nkx2-5+", "Nkx2-5-")
 56 | 
 57 | co_expression_isl <- which(isl1_isl1 == "Isl1+" & isl1_nkx == "Nkx2-5+")
 58 | co_expression_nkx <- which(nkx_isl1 == "Isl1+" & nkx_nkx == "Nkx2-5+")
 59 | 
 60 | mix_color_grey <- rep("darkgrey", length(mix_color))
 61 | mix_color_grey[co_expression_isl] <- mix_color[co_expression_isl]
 62 | mix_color_grey[length(isl1_cells)+co_expression_nkx] <- mix_color[length(isl1_cells)+co_expression_nkx]
 63 | 
 64 | plot_mix_clustering_isl1 <- lattice::xyplot(pData(c1_subset)$tsne2[isl1_cells] ~ pData(c1_subset)$tsne1[isl1_cells],
 65 |                                              col = mix_color_grey[1:length(isl1_cells)], cex = .5, pch = 16, ylab = list(label = "t-SNE 2", cex = 0.6), main = list(label = "Isl1 cell clustering", cex = 0.75), scales = list(cex = 0.4, tck = c(0.5, 0)), xlab = list(label = "t-SNE 1", cex = 0.6), par.settings = global.settings)
 66 | plot_mix_clustering_nkx <- lattice::xyplot(pData(c1_subset)$tsne2[nkx_cells] ~ pData(c1_subset)$tsne1[nkx_cells],
 67 |                                             col = mix_color_grey[(length(isl1_cells)+1):length(nkx_cells)], cex = .5, pch = 16, ylab = list(label = "t-SNE 2", cex = 0.6), main = list(label = "Nkx2-5 cell clustering", cex = 0.75), scales = list(cex = 0.4, tck = c(0.5, 0)), xlab = list(label = "t-SNE 1", cex = 0.6), par.settings = global.settings)
 68 | 
 69 | #
 70 | # Table Co-expressing
 71 | #
 72 | nkx_cells_early <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1" & pData(c1_subset)$Timepoint == "e7.5")
 73 | isl1_cells_early <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1" & pData(c1_subset)$Timepoint == "e7.5")
 74 | nkx_cells_middle <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1" & pData(c1_subset)$Timepoint == "e8.5")
 75 | isl1_cells_middle <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1" & pData(c1_subset)$Timepoint == "e8.5")
 76 | nkx_cells_late <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1" & pData(c1_subset)$Timepoint == "e9.5")
 77 | isl1_cells_late <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1" & pData(c1_subset)$Timepoint == "e9.5")
 78 | 
 79 | isl1_isl1_early <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", isl1_cells_early], "norm_exprs_sf") > 0.1, "Isl1+", "Isl1-")
 80 | isl1_nkx_early <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", isl1_cells_early], "norm_exprs_sf") > 0.1, "Nkx2-5+", "Nkx2-5-")
 81 | nkx_isl1_early <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", nkx_cells_early], "norm_exprs_sf") > 0.1, "Isl1+", "Isl1-")
 82 | nkx_nkx_early <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", nkx_cells_early], "norm_exprs_sf") > 0.1, "Nkx2-5+", "Nkx2-5-")
 83 | 
 84 | isl1_isl1_middle <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", isl1_cells_middle], "norm_exprs_sf") > 0.1, "Isl1+", "Isl1-")
 85 | isl1_nkx_middle <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", isl1_cells_middle], "norm_exprs_sf") > 0.1, "Nkx2-5+", "Nkx2-5-")
 86 | nkx_isl1_middle <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", nkx_cells_middle], "norm_exprs_sf") > 0.1, "Isl1+", "Isl1-")
 87 | nkx_nkx_middle <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", nkx_cells_middle], "norm_exprs_sf") > 0.1, "Nkx2-5+", "Nkx2-5-")
 88 | 
 89 | isl1_isl1_late <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", isl1_cells_late], "norm_exprs_sf") > 0.1, "Isl1+", "Isl1-")
 90 | isl1_nkx_late <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", isl1_cells_late], "norm_exprs_sf") > 0.1, "Nkx2-5+", "Nkx2-5-")
 91 | nkx_isl1_late <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", nkx_cells_late], "norm_exprs_sf") > 0.1, "Isl1+", "Isl1-")
 92 | nkx_nkx_late <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", nkx_cells_late], "norm_exprs_sf") > 0.1, "Nkx2-5+", "Nkx2-5-")
 93 | 
 94 | 
 95 | double_negative_isl1 <- c(isl1_cells_early[which(isl1_isl1_early == "Isl1-" & isl1_nkx_early == "Nkx2-5-")],
 96 |                           isl1_cells_middle[which(isl1_isl1_middle == "Isl1-" & isl1_nkx_middle == "Nkx2-5-")],
 97 |                           isl1_cells_late[which(isl1_isl1_late == "Isl1-" & isl1_nkx_late == "Nkx2-5-")])
 98 | double_negative_nkx <- c(nkx_cells_early[which(nkx_isl1_early == "Isl1-" & nkx_nkx_early == "Nkx2-5-")],
 99 |                          nkx_cells_middle[which(nkx_isl1_middle == "Isl1-" & nkx_nkx_middle == "Nkx2-5-")],
100 |                          nkx_cells_late[which(nkx_isl1_late == "Isl1-" & nkx_nkx_late == "Nkx2-5-")])
101 | non_double_negatives_isl1 <- c(isl1_cells_early[-which(isl1_isl1_early == "Isl1-" & isl1_nkx_early == "Nkx2-5-")],
102 |                            isl1_cells_middle[-which(isl1_isl1_middle == "Isl1-" & isl1_nkx_middle == "Nkx2-5-")],
103 |                            isl1_cells_late[-which(isl1_isl1_late == "Isl1-" & isl1_nkx_late == "Nkx2-5-")])
104 | non_double_negatives_nkx <- c(nkx_cells_early[-which(nkx_isl1_early == "Isl1-" & nkx_nkx_early == "Nkx2-5-")],
105 |                           nkx_cells_middle[-which(nkx_isl1_middle == "Isl1-" & nkx_nkx_middle == "Nkx2-5-")],
106 |                           nkx_cells_late[-which(nkx_isl1_late == "Isl1-" & nkx_nkx_late == "Nkx2-5-")])
107 | 
108 | 
109 | tt1 <- gridExtra::ttheme_minimal(base_size = 8,
110 |                                  padding = unit(c(2, 2), "mm"),
111 |                                  core = list(fg_params = list(hjust = 1, x = 0.9)),
112 |                                  rowhead = list(fg_params = list(fontface = 2L, hjust = 1, x = 0.9)),
113 |                                  colhead = list(fg_params = list(fontface = 2L, hjust = 1, x = 0.9)))
114 | 
115 | table_isl1_early <- gridExtra::tableGrob(table(isl1_isl1_early, isl1_nkx_early), theme=tt1)
116 | table_nkx_early <- gridExtra::tableGrob(table(nkx_isl1_early, nkx_nkx_early), theme=tt1, rows = NULL)
117 | table_isl1_middle <- gridExtra::tableGrob(table(isl1_isl1_middle, isl1_nkx_middle), theme=tt1)
118 | table_nkx_middle <- gridExtra::tableGrob(table(nkx_isl1_middle, nkx_nkx_middle), theme=tt1, rows = NULL)
119 | table_isl1_late <- gridExtra::tableGrob(table(isl1_isl1_late, isl1_nkx_late), theme=tt1)
120 | table_nkx_late <- gridExtra::tableGrob(table(nkx_isl1_late, nkx_nkx_late), theme=tt1, rows = NULL)
121 | 
122 | #
123 | # Load SOM input genes and predict Nkx2-5 data onto Isl1-SOM
124 | #
125 | het_early <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.early.txt"))
126 | het_mid <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.mid.txt"))
127 | het_late <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.late.txt"))
128 | het_pool <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.txt"))
129 | 
130 | isl1_het <- unique(c(het_early, het_mid, het_late, het_pool))
131 | 
132 | expression <- get_exprs(c1_subset[, c(isl1_cells, nkx_cells)], "norm_exprs_sf")
133 | expression_scaled <- t(scale(t(expression[isl1_het, ])))
134 | 
135 | som <- singlecellutils::calcSOM(expression_scaled, train = 1:nrow(expression_scaled), num_epochs = 2000, seed = 1004)
136 | som$codes <- som$codes[[1]]
137 | 
138 | # Clustering
139 | 
140 | hdbscan <- function(data, min_samples = 7L, min_cluster_size = 9L, outlier = 0, seed = NULL) {
141 |   if (!is.null(seed)) set.seed(seed)
142 |   h <- reticulate::import("hdbscan")
143 |   cl <- h$HDBSCAN(min_samples = min_samples, min_cluster_size = min_cluster_size)
144 |   labels <- cl$fit_predict(data) + 1
145 |   labels[labels == 0] <- outlier
146 |   return(factor(labels))
147 | }
148 | 
149 | seed <- 3465
150 | set.seed(seed)
151 | tsne <- Rtsne::Rtsne(t(som$codes), perplexity = 21, theta = 0.05, max_iter = 2000)
152 | 
153 | p_color_background <- droplevels(pData(c1_subset[, c(isl1_cells, nkx_cells)])$Background)
154 | p_color_background[match(c(double_negative_isl1, double_negative_nkx), c(isl1_cells, nkx_cells))] <- NA
155 | p_cluster_background <- singlecellutils::colorAMap(tsne$Y, colour_by = p_color_background, palette = c("#31a354", "#3182bd"), cex = rep(0.5, nrow(tsne$Y)), na.cex = 0.5, xlab = list(label = "t-SNE 1", cex = 0.6), ylab = list(label = "t-SNE 2", cex = 0.6), main = list(label = ""), scales = list(cex = 0.4, tck = c(0.5, 0)), par.settings = global.settings)
156 | 
157 | p_color_time <- droplevels(pData(c1_subset[, c(isl1_cells, nkx_cells)])$Timepoint)
158 | p_color_time[match(c(double_negative_isl1, double_negative_nkx), c(isl1_cells, nkx_cells))] <- NA
159 | p_cluster_time <- singlecellutils::colorAMap(tsne$Y, colour_by = p_color_time, palette = parameters$colors$nkx_timepoint_palette, cex = rep(0.5, nrow(tsne$Y)), na.cex = 0.5, xlab = list(label = "t-SNE 1", cex = 0.6), ylab = list(label = ""), main = list(label = ""), scales = list(cex = 0.4, tck = c(0.5, 0)), par.settings = global.settings)
160 | 
161 | isl1_clustering <- factor(c(pData(c1_subset[, isl1_cells])$cluster, rep(NA, length(nkx_cells))), levels = as.character(1:5))
162 | p_cluster_isl1 <- singlecellutils::colorAMap(tsne$Y, colour_by = isl1_clustering, palette = parameters$colors$isl_cluster_palette, cex = rep(0.5, nrow(tsne$Y)), na.cex = 0.5, xlab = list(label = "t-SNE 1", cex = 0.6), ylab = list(label = ""), main = list(label = ""), scales = list(cex = 0.4, tck = c(0.5, 0)), par.settings = global.settings)
163 | 
164 | #
165 | # Predict Nkx2-5 data onto Isl1 trajectory
166 | #
167 | 
168 | # Temp
169 | #nkx_cells <- non_double_negatives_nkx
170 | #isl1_cells <- non_double_negatives_isl1
171 | # End Temp
172 | 
173 | m <- match(colnames(isl1_dm@data_env$data), rownames(c1_subset))
174 | nkx_data <- get_exprs(c1_subset[m, nkx_cells], "norm_exprs_sf")
175 | 
176 | nkx_dcs <- dm_predict(isl1_dm, t(nkx_data))
177 | nkx_coords <- matrix(nkx_dcs@x, ncol = nkx_dcs@Dim[2])
178 | isl1_dm_coords_nkx <- rbind(isl1_dm_coords, cbind(-nkx_coords[, 1], -nkx_coords[, 3], -nkx_coords[, 2]))
179 | 
180 | #
181 | # Prepare plots
182 | #
183 | # Create col
184 | isl1_col <- rep("darkgrey", length(c(isl1_cells, nkx_cells)))
185 | isl1_col[1:length(isl1_cells)] <- mix_color[1:length(isl1_cells)]
186 | nkx_col <- rep("darkgrey", length(c(isl1_cells, nkx_cells)))
187 | nkx_col[(length(isl1_cells)+1):length(nkx_col)] <- mix_color[(length(isl1_cells)+1):length(nkx_col)]
188 | 
189 | # Create pch
190 | isl1_pch <- rep(4, length(c(isl1_cells, nkx_cells)))
191 | isl1_pch[1:length(isl1_cells)] <- 16
192 | nkx_pch <- rep(4, length(c(isl1_cells, nkx_cells)))
193 | nkx_pch[(length(isl1_cells)+1):length(nkx_pch)] <- 16
194 | 
195 | # Create cex
196 | isl1_cex <- rep(0.5, length(c(isl1_cells, nkx_cells)))
197 | isl1_cex[1:length(isl1_cells)] <- .5
198 | nkx_cex <- rep(0.5, length(c(isl1_cells, nkx_cells)))
199 | nkx_cex[(length(isl1_cells)+1):length(nkx_cex)] <- .5
200 | 
201 | # Create data
202 | plot_isl1_data_x <- rev(isl1_dm_coords_nkx[,1])
203 | plot_isl1_data_y <- rev(isl1_dm_coords_nkx[,2])
204 | plot_nkx_data_x <- isl1_dm_coords_nkx[,1]
205 | plot_nkx_data_y <- isl1_dm_coords_nkx[,2]
206 | 
207 | #
208 | # Create figure
209 | #
210 | table <- gridExtra::grid.arrange(table_isl1_early, table_nkx_early,
211 |                                  table_isl1_middle, table_nkx_middle,
212 |                                  table_isl1_late, table_nkx_late, nrow = 3)
213 | plot_lineage_overlap_isl1 <- lattice::xyplot(plot_isl1_data_y ~ plot_isl1_data_x, cex = rev(isl1_cex), col = rev(isl1_col), pch = rev(isl1_pch), ylab = list(label = "Dimension 3", cex = 0.6), main = list(label = "Isl1 cells in Isl1 lineage", cex = 0.75), scales = list(cex = 0.4, tck = c(0.5, 0)), xlab = list(label = "Dimension 1", cex = 0.6), par.settings = global.settings)
214 | plot_lineage_overlap_nkx <- lattice::xyplot(isl1_dm_coords_nkx[,2] ~ isl1_dm_coords_nkx[,1], cex = nkx_cex, col = nkx_col, pch = nkx_pch, ylab = list(label = "Dimension 3", cex = 0.6), main = list(label = "Nkx2-5 cells in Isl1 lineage", cex = 0.75), scales = list(cex = 0.4, tck = c(0.5, 0)), xlab = list(label = "Dimension 1", cex = 0.6), par.settings = global.settings)
215 | plot_clustering <- gridExtra::grid.arrange(p_cluster_background, p_cluster_time, p_cluster_isl1, nrow = 1)
216 | 
217 | figure <- multipanelfigure::multi_panel_figure(width = 205, height = 230, columns = 3, rows = 3)
218 | figure <- multipanelfigure::fill_panel(figure, table, scaling = "fit", row = 1, column = 1)
219 | 
220 | figure <- multipanelfigure::fill_panel(figure, plot_mix_clustering_isl1, row = 1, column = 3)
221 | figure <- multipanelfigure::fill_panel(figure, plot_mix_clustering_nkx, row = 1, column = 2)
222 | 
223 | figure <- multipanelfigure::fill_panel(figure, plot_clustering, row = 2, column = 1:3)
224 | 
225 | figure <- multipanelfigure::fill_panel(figure, plot_lineage_overlap_isl1, row = 3, column = 1)
226 | figure <- multipanelfigure::fill_panel(figure, plot_lineage_overlap_nkx, row = 3, column = 2)
227 | 
228 | # Save figure
229 | ggplot2::ggsave(figure,
230 |                 file = "supplementary_figures/99_review_2c.pdf",
231 |                 width = 205,
232 |                 height = 230,
233 |                 units = "mm",
234 |                 useDingbats = FALSE)
235 | 


--------------------------------------------------------------------------------
/original-analysis/figure_src/SupplementaryFigure_7.R:
--------------------------------------------------------------------------------
  1 | library(scater)
  2 | library(dplyr)
  3 | library(tidyr)
  4 | library(singlecellutils)
  5 | library(RColorBrewer)
  6 | library(viridis)
  7 | library(multipanelfigure)
  8 | library(ComplexHeatmap)
  9 | library(circlize)
 10 | library(zoo)
 11 | library(MPAgenomics)
 12 | library(gridExtra)
 13 | 
 14 | #
 15 | # Load data
 16 | #
 17 | source("src/parameters.R")
 18 | min_cor_cluster <- 0.5
 19 | min_cor <- 0.7
 20 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata"))
 21 | 
 22 | #
 23 | # Nkx2-5 Rank based correlation
 24 | #
 25 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1")
 26 | nkx_de_genes <- rownames(fData(c1_subset))[which(fData(c1_subset)$nkx_de & fData(c1_subset)$nkx_marker)]
 27 | nkx_cluster <- factor(pData(c1_subset[, nkx_cells])$cluster)
 28 | 
 29 | nkx_data <- get_exprs(c1_subset[nkx_de_genes, nkx_cells], "norm_exprs_sf")
 30 | 
 31 | nkx_cor <- cor(t(nkx_data), method = "pearson")
 32 | nkx_dpt <- pData(c1_subset[, nkx_cells])$dpt
 33 | 
 34 | nkx_cor_list <- lapply(levels(nkx_cluster), function(c) {
 35 |   ind <- which(nkx_cluster == c)
 36 |   cc <- cor(t(nkx_data[, ind]), cbind(nkx_dpt[ind]), method = "spearman")
 37 | })
 38 | nkx_cor_list$dpt <- cor(t(nkx_data), cbind(nkx_dpt), method = "spearman")
 39 | 
 40 | nkx_correlation <- as.data.frame(do.call("cbind", nkx_cor_list))
 41 | colnames(nkx_correlation) <- c(paste0("cluster", levels(nkx_cluster)), "nkx_dpt")
 42 | 
 43 | write.table(nkx_correlation, file = file.path(parameters$general$path_supdata,"Nkx2-5-correlation.txt"), quote = F, row.names = T, col.names = T, sep = "\t")
 44 | 
 45 | #
 46 | # Extract highly correlated genes
 47 | #
 48 | # From cluster correlation
 49 | nkx_i <- unique(unlist(apply(nkx_correlation[, -ncol(nkx_correlation)], 2, function(x) which(abs(x) > min_cor_cluster))))
 50 | 
 51 | # From dpt correlation
 52 | nkx_i <- unique(c(nkx_i, which(abs(nkx_correlation$nkx_dpt) > min_cor)))
 53 | 
 54 | nkx_cor_data <- nkx_cor[nkx_i, nkx_i]
 55 | colnames(nkx_cor_data) <- fData(c1_subset[nkx_de_genes, ][nkx_i, ])$symbol
 56 | rownames(nkx_cor_data) <- fData(c1_subset[nkx_de_genes, ][nkx_i, ])$symbol
 57 | 
 58 | #
 59 | # Isl1 Rank based correlation
 60 | #
 61 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1")
 62 | isl1_de_genes <- rownames(fData(c1_subset))[which(fData(c1_subset)$isl1_de & fData(c1_subset)$isl1_marker)]
 63 | isl1_cluster <- factor(pData(c1_subset[, isl1_cells])$cluster)
 64 | 
 65 | isl1_data <- get_exprs(c1_subset[isl1_de_genes, isl1_cells], "norm_exprs_sf")
 66 | 
 67 | isl1_cor <- cor(t(isl1_data), method = "pearson")
 68 | isl1_dpt <- pData(c1_subset[, isl1_cells])$dpt
 69 | 
 70 | isl1_cor_list <- lapply(levels(isl1_cluster), function(c) {
 71 |   ind <- which(isl1_cluster == c)
 72 |   cc <- cor(t(isl1_data[, ind]), cbind(isl1_dpt[ind]), method = "spearman")
 73 | })
 74 | isl1_cor_list$dpt <- cor(t(isl1_data), cbind(isl1_dpt), method = "spearman")
 75 | 
 76 | isl1_correlation <- as.data.frame(do.call("cbind", isl1_cor_list))
 77 | colnames(isl1_correlation) <- c(paste0("cluster", levels(isl1_cluster)), "isl1_dpt")
 78 | 
 79 | write.table(isl1_correlation, file = file.path(parameters$general$path_supdata,"Isl1-correlation.txt"), quote = F, row.names = T, col.names = T, sep = "\t")
 80 | 
 81 | #
 82 | # Extract highly correlated genes
 83 | #
 84 | # From cluster correlation
 85 | isl1_i <- unique(unlist(apply(isl1_correlation[, -ncol(isl1_correlation)], 2, function(x) which(abs(x) > min_cor_cluster))))
 86 | 
 87 | # From dpt correlation
 88 | isl1_i <- unique(c(isl1_i, which(abs(isl1_correlation$isl1_dpt) > min_cor)))
 89 | 
 90 | isl1_cor_data <- isl1_cor[isl1_i, isl1_i]
 91 | colnames(isl1_cor_data) <- fData(c1_subset[isl1_de_genes, ][isl1_i, ])$symbol
 92 | rownames(isl1_cor_data) <- fData(c1_subset[isl1_de_genes, ][isl1_i, ])$symbol
 93 | 
 94 | #
 95 | # Nkx2-5 Heatmaps
 96 | #
 97 | 
 98 | nkx_cor_heatmap <- Heatmap(nkx_cor_data,
 99 |               col = colorRamp2(seq(from = -1, to = 1, by = 0.25), rev(brewer.pal(9, "RdBu"))),
100 |               row_names_gp = gpar(fontsize = 2),
101 |               column_names_gp = gpar(fontsize = 2),
102 |               show_column_dend = F,
103 |               column_title = "Nkx2-5+ gene correlation",
104 |               column_title_gp = gpar(fontface = "bold", fontsize = 8),
105 |               name = "nkx_cor",
106 |               heatmap_legend_param = list("at" = c(-1, 0, 1), "color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Pearson correlation", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm")))
107 | 
108 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_SupplementaryFigure_7a.txt"),
109 |             x = nkx_cor_data,
110 |             row.names = T,
111 |             col.names = T,
112 |             sep = "\t",
113 |             quote = F)
114 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_SupplementaryFigure_7b.txt"),
115 |             x = isl1_cor_data,
116 |             row.names = T,
117 |             col.names = T,
118 |             sep = "\t",
119 |             quote = F)
120 | 
121 | #
122 | # Smoothed expression heatmap
123 | #
124 | nkx_data <- nkx_data[nkx_i,]
125 | rownames(nkx_data) <- fData(c1_subset[nkx_de_genes, ][nkx_i, ])$symbol
126 | 
127 | nkx_pseudotime_order <- order(nkx_dpt)
128 | nkx_data_smoothed <- t(apply(nkx_data[, nkx_pseudotime_order], 1, function(r) rollapply(r, 11, mean, fill="extend")))
129 | 
130 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_SupplementaryFigure_7c.txt"),
131 |             x = nkx_data_smoothed,
132 |             row.names = T,
133 |             col.names = T,
134 |             sep = "\t",
135 |             quote = F)
136 | 
137 | # Annotation of columns
138 | nkx_cell_annotation_data <- data.frame(Timepoint = pData(c1_subset[, nkx_cells])$Timepoint[nkx_pseudotime_order], Cluster = pData(c1_subset[, nkx_cells])$cluster[nkx_pseudotime_order], Pseudotime = nkx_dpt[nkx_pseudotime_order])
139 | nkx_cell_annotation <- HeatmapAnnotation(nkx_cell_annotation_data, col = list(Timepoint = c("e7.5" = "#36648B", "e8.5" = "#4F94CD", "e9.5" = "#63B8FF"),
140 |                                                                               Cluster = c("1" = "#00008B", "2" = "#8B0000", "3" = "#CDAD00"),
141 |                                                                               Pseudotime = colorRamp2(c(min(nkx_dpt), mean(nkx_dpt), max(nkx_dpt)), colors = c("#36648B", "#4F94CD", "#63B8FF"))),
142 |                                          show_legend = F,
143 |                                          show_annotation_name = T,
144 |                                          annotation_name_gp = gpar(fontsize = 4),
145 |                                          annotation_name_offset = unit(0.8, "mm"),
146 |                                          gap = unit(0.1, "mm"),
147 |                                          annotation_height = unit.c(unit(1.5, "mm"), unit(1.5, "mm"), unit(1.5, "mm")))
148 | 
149 | nkx_smooth_heatmap <- Heatmap(nkx_data_smoothed,
150 |                               col = inferno(99),
151 |                               cluster_columns = F,
152 |                               #clustering_distance_rows = 'manhattan',
153 |                               clustering_method_rows = 'ward.D',
154 |                               #split = cl$clustering,
155 |                               row_names_gp = gpar(fontsize = 1.5),
156 |                               column_names_gp = gpar(fontsize = 1.5),
157 |                               show_column_names = F,
158 |                               name = "nkx_smooth",
159 |                               top_annotation = nkx_cell_annotation,
160 |                               heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm")))
161 | 
162 | #
163 | # Gene classification into "upregulated", "downregulated" and "other"
164 | #
165 | nkx_segments <- factor(apply(nkx_data_smoothed, 1, function(g) {
166 |   smntn <- segmentation(as.numeric(g), method = "PELT", plot = F, verbose = F)
167 |   s <- smntn$segment
168 |   s$level <- ifelse(s$means < 1, "off", ifelse(s$means < 5, "low", ifelse(s$means < 8, "moderate", ifelse(s$means < 10, "high", "very high"))))
169 |   s$level <- factor(s$level, levels = c("off", "low", "moderate", "high", "very high"), ordered = T)
170 |   n <- nrow(s)
171 | 
172 |   direction <- paste(sapply(2:nrow(s), function(i) {
173 |     diff <- as.numeric(s$level[i]) - as.numeric(s$level[i-1])
174 |     if(diff >= 1) return("up")
175 |     if(diff <= -1) return("down")
176 |   }), collapse = "-")
177 | 
178 |   class = "other"
179 |   if(nrow(s) > 1) {
180 |     direction <- paste(sapply(2:nrow(s), function(i) {
181 |       diff <- as.numeric(s$level[i]) - as.numeric(s$level[i-1])
182 |       if(diff >= 1) return("up")
183 |       if(diff <= -1) return("down")
184 |     }), collapse = "-")
185 |     if (startsWith(direction, "up") & endsWith(direction, "up")) class <- "upregulated"
186 |     if (startsWith(direction, "down") & endsWith(direction, "down")) class <- "downregulated"
187 |     #if (startsWith(direction, "up") & endsWith(direction, "down")) class <- "intermediate"
188 |     #if (startsWith(direction, "up") & endsWith(direction, "up") & s$level[1] > "off") class <- "upregulated - primed"
189 |     #if (startsWith(direction, "up") & endsWith(direction, "up") & s$level[1] == "off") class <- "upregulated - de novo"
190 |     #if (startsWith(direction, "down") & endsWith(direction, "down")) class <- "downregulated"
191 |   }
192 |   return(class)
193 | }), levels = c("downregulated", "intermediate", "upregulated", "upregulated - primed", "upregulated - de novo", "other"), ordered = T)
194 | 
195 | nkx_smooth_heatmap_class <- Heatmap(nkx_data_smoothed,
196 |                                  col = inferno(99),
197 |                                  cluster_columns = F,
198 |                                  split = nkx_segments,
199 |                                  row_names_gp = gpar(fontsize = 3),
200 |                                  column_names_gp = gpar(fontsize = 1.5),
201 |                                  row_title_gp = gpar(fontsize = 4),
202 |                                  gap = unit(0.5, "mm"),
203 |                                  show_column_names = F,
204 |                                  show_row_dend = F,
205 |                                  name = "nkx_smooth",
206 |                                  top_annotation = nkx_cell_annotation,
207 |                                  heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm")))
208 | 
209 | #
210 | # Isl1 heatmaps
211 | #
212 | 
213 | isl1_cor_heatmap <- Heatmap(isl1_cor_data,
214 |                            col = colorRamp2(seq(from = -1, to = 1, by = 0.25), rev(brewer.pal(9, "RdBu"))),
215 |                            row_names_gp = gpar(fontsize = 2),
216 |                            column_names_gp = gpar(fontsize = 2),
217 |                            show_column_dend = F,
218 |                            column_title = "Isl1+ gene correlation",
219 |                            column_title_gp = gpar(fontface = "bold", fontsize = 8),
220 |                            name = "nkx_cor",
221 |                            heatmap_legend_param = list("at" = c(-1, 0, 1), "color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Pearson correlation", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm")))
222 | 
223 | #
224 | # Smoothed expression heatmap
225 | #
226 | isl1_data <- isl1_data[isl1_i,]
227 | rownames(isl1_data) <- fData(c1_subset[isl1_de_genes, ][isl1_i, ])$symbol
228 | 
229 | isl1_pseudotime_order <- order(isl1_dpt)
230 | isl1_data_smoothed <- t(apply(isl1_data[, isl1_pseudotime_order], 1, function(r) rollapply(r, 11, mean, fill="extend")))
231 | 
232 | isl1_branch1 <- which(isl1_cluster[isl1_pseudotime_order] %in% c(2,3,4,5))
233 | isl1_branch2 <- which(isl1_cluster[isl1_pseudotime_order] %in% c(1,5))
234 | 
235 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_SupplementaryFigure_7d.txt"),
236 |             x = isl1_data_smoothed,
237 |             row.names = T,
238 |             col.names = T,
239 |             sep = "\t",
240 |             quote = F)
241 | 
242 | #
243 | # Color mapping function to bring both heatmaps to same scale
244 | #
245 | e_min <- min(isl1_data_smoothed)
246 | e_max <- max(isl1_data_smoothed)
247 | 
248 | breaks <- seq(e_min, e_max, length.out = 99)
249 | colors <- inferno(99)
250 | 
251 | col_fun <- colorRamp2(breaks, colors)
252 | 
253 | 
254 | # Annotation of columns
255 | isl1_cell_annotation_data <- data.frame(Timepoint = pData(c1_subset[, isl1_cells])$Timepoint[isl1_pseudotime_order], Cluster = pData(c1_subset[, isl1_cells])$cluster[isl1_pseudotime_order], Pseudotime = isl1_dpt[isl1_pseudotime_order])
256 | isl1_cell_annotation_data_branch1 <- isl1_cell_annotation_data[isl1_branch1, ]
257 | isl1_cell_annotation_data_branch2 <- isl1_cell_annotation_data[isl1_branch2, ]
258 | 
259 | isl1_cell_annotation <- HeatmapAnnotation(isl1_cell_annotation_data, col = list(Timepoint = c("e7.5" = "#008B45", "e8.5" = "#00CD66", "e9.5" = "#00FF7F"),
260 |                                                                               Cluster = c("1" = "#1B9E77", "2" = "#D95F02", "3" = "#7570B3", "4" = "#E7298A", "5" = "#66A61E"),
261 |                                                                               Pseudotime = colorRamp2(c(min(isl1_dpt), mean(isl1_dpt), max(isl1_dpt)), colors = c("#008B45", "#00CD66", "#00FF7F"))),
262 |                                          show_legend = F,
263 |                                          show_annotation_name = T,
264 |                                          annotation_name_gp = gpar(fontsize = 4),
265 |                                          annotation_name_offset = unit(0.8, "mm"),
266 |                                          gap = unit(0.1, "mm"),
267 |                                          annotation_height = unit.c(unit(1.5, "mm"), unit(1.5, "mm"), unit(1.5, "mm")))
268 | 
269 | isl1_cell_annotation_branch1 <- HeatmapAnnotation(isl1_cell_annotation_data_branch1, col = list(Timepoint = c("e7.5" = "#008B45", "e8.5" = "#00CD66", "e9.5" = "#00FF7F"),
270 |                                                                                 Cluster = c("1" = "#1B9E77", "2" = "#D95F02", "3" = "#7570B3", "4" = "#E7298A", "5" = "#66A61E"),
271 |                                                                                 Pseudotime = colorRamp2(c(min(isl1_dpt), mean(isl1_dpt), max(isl1_dpt)), colors = c("#008B45", "#00CD66", "#00FF7F"))),
272 |                                           show_legend = F,
273 |                                           show_annotation_name = T,
274 |                                           annotation_name_gp = gpar(fontsize = 4),
275 |                                           annotation_name_offset = unit(0.8, "mm"),
276 |                                           gap = unit(0.1, "mm"),
277 |                                           annotation_height = unit.c(unit(1.5, "mm"), unit(1.5, "mm"), unit(1.5, "mm")))
278 | 
279 | isl1_cell_annotation_branch2 <- HeatmapAnnotation(isl1_cell_annotation_data_branch2, col = list(Timepoint = c("e7.5" = "#008B45", "e8.5" = "#00CD66", "e9.5" = "#00FF7F"),
280 |                                                                                 Cluster = c("1" = "#1B9E77", "2" = "#D95F02", "3" = "#7570B3", "4" = "#E7298A", "5" = "#66A61E"),
281 |                                                                                 Pseudotime = colorRamp2(c(min(isl1_dpt), mean(isl1_dpt), max(isl1_dpt)), colors = c("#008B45", "#00CD66", "#00FF7F"))),
282 |                                           show_legend = F,
283 |                                           show_annotation_name = F,
284 |                                           annotation_name_gp = gpar(fontsize = 4),
285 |                                           annotation_name_offset = unit(0.8, "mm"),
286 |                                           gap = unit(0.1, "mm"),
287 |                                           annotation_height = unit.c(unit(1.5, "mm"), unit(1.5, "mm"), unit(1.5, "mm")))
288 | 
289 | isl1_smooth_heatmap <- Heatmap(isl1_data_smoothed,
290 |                               col = col_fun,
291 |                               cluster_columns = F,
292 |                               row_names_gp = gpar(fontsize = 4),
293 |                               column_names_gp = gpar(fontsize = 4),
294 |                               show_column_names = F,
295 |                               name = "isl1_smooth",
296 |                               top_annotation = isl1_cell_annotation,
297 |                               heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm")))
298 | 
299 | isl1_smooth_heatmap_branch1 <- Heatmap(isl1_data_smoothed[, isl1_branch1],
300 |                                col = col_fun,
301 |                                cluster_columns = F,
302 |                                row_names_gp = gpar(fontsize = 1.5),
303 |                                column_names_gp = gpar(fontsize = 1.5),
304 |                                show_column_names = F,
305 |                                name = "isl1_smooth_b1",
306 |                                top_annotation = isl1_cell_annotation_branch1,
307 |                                heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm")))
308 | 
309 | isl1_smooth_heatmap_branch2 <- Heatmap(isl1_data_smoothed[, isl1_branch2],
310 |                                col = col_fun,
311 |                                cluster_columns = F,
312 |                                row_names_gp = gpar(fontsize = 1.5),
313 |                                column_names_gp = gpar(fontsize = 1.5),
314 |                                show_column_names = F,
315 |                                show_row_names = F,
316 |                                name = "isl1_smooth_b2",
317 |                                top_annotation = isl1_cell_annotation_branch2,
318 |                                heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm")))
319 | 
320 | # CLassification
321 | isl1_segments <- factor(apply(isl1_data_smoothed[, isl1_branch1], 1, function(g) {
322 |   smntn <- segmentation(as.numeric(g), method = "PELT", plot = F, verbose = F)
323 |   s <- smntn$segment
324 |   s$level <- ifelse(s$means < 1, "off", ifelse(s$means < 5, "low", ifelse(s$means < 8, "moderate", ifelse(s$means < 10, "high", "very high"))))
325 |   s$level <- factor(s$level, levels = c("off", "low", "moderate", "high", "very high"), ordered = T)
326 |   n <- nrow(s)
327 | 
328 |   class = "other"
329 |   if(nrow(s) > 1) {
330 |     direction <- paste(sapply(2:nrow(s), function(i) {
331 |       diff <- as.numeric(s$level[i]) - as.numeric(s$level[i-1])
332 |       if(diff >= 1) return("up")
333 |       if(diff <= -1) return("down")
334 |     }), collapse = "-")
335 |     if (startsWith(direction, "up") & endsWith(direction, "up")) class <- "upregulated"
336 |     if (startsWith(direction, "down") & endsWith(direction, "down")) class <- "downregulated"
337 |     #if (startsWith(direction, "up") & endsWith(direction, "down")) class <- "intermediate"
338 |     #if (startsWith(direction, "up") & endsWith(direction, "up") & s$level[1] > "off") class <- "upregulated - primed"
339 |     #if (startsWith(direction, "up") & endsWith(direction, "up") & s$level[1] == "off") class <- "upregulated - de novo"
340 |     #if (startsWith(direction, "down") & endsWith(direction, "down")) class <- "downregulated"
341 |   }
342 |   return(class)
343 | }), levels = c("downregulated", "intermediate", "upregulated", "upregulated - primed", "upregulated - de novo", "other"), ordered = T)
344 | 
345 | isl1_smooth_heatmap_class_b1 <- Heatmap(isl1_data_smoothed[, isl1_branch1],
346 |                                     col = col_fun,
347 |                                     cluster_columns = F,
348 |                                     split = isl1_segments,
349 |                                     row_names_gp = gpar(fontsize = 3),
350 |                                     column_names_gp = gpar(fontsize = 1.5),
351 |                                     row_title_gp = gpar(fontsize = 4),
352 |                                     gap = unit(0.5, "mm"),
353 |                                     show_column_names = F,
354 |                                     show_row_dend = F,
355 |                                     name = "isl1_smooth_b1",
356 |                                     top_annotation = isl1_cell_annotation_branch1,
357 |                                     heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm")))
358 | 
359 | isl1_smooth_heatmap_class_b2 <- Heatmap(isl1_data_smoothed[, isl1_branch2],
360 |                                         col = col_fun,
361 |                                         cluster_columns = F,
362 |                                         split = isl1_segments,
363 |                                         row_names_gp = gpar(fontsize = 3),
364 |                                         column_names_gp = gpar(fontsize = 1.5),
365 |                                         row_title_gp = gpar(fontsize = 4),
366 |                                         gap = unit(0.5, "mm"),
367 |                                         show_column_names = F,
368 |                                         show_row_names = F,
369 |                                         show_row_dend = F,
370 |                                         name = "isl1_smooth_b2",
371 |                                         top_annotation = isl1_cell_annotation_branch2,
372 |                                         heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm")))
373 | 
374 | 
375 | #
376 | # Create Figures
377 | #
378 | # Fig2 e,f
379 | fig_2e <- grid.grabExpr(draw(nkx_smooth_heatmap_class, heatmap_legend_side = "bottom"))
380 | fig_2f <- grid.grabExpr(draw(isl1_smooth_heatmap_class_b2 + isl1_smooth_heatmap_class_b1, main_heatmap = "isl1_smooth_b1", heatmap_legend_side = "bottom", gap = unit(1, "mm")))
381 | 
382 | m <- multi_panel_figure(width = 205, height = 230, columns = 7, rows = 7)
383 | 
384 | m <- fill_panel(m, fig_2e, label = "E", column = c(1:3), row = c(4,5,6,7))
385 | m <- fill_panel(m, fig_2f, label = "F", column = c(4:7), row = c(4,5,6,7))
386 | 
387 | ggsave(
388 |   plot = m,
389 |   filename = file.path(parameters$general$path_rfigures, "Figure_2_ef.pdf"),
390 |   width = 205,
391 |   height = 230,
392 |   units = "mm",
393 |   dpi = 600)
394 | 
395 | # Figure 2
396 | m <- multi_panel_figure(width = 205, height = 230, columns = 7, rows = 14)
397 | 
398 | m <- fill_panel(m, nkx_ic_plot, label = "C", column = 5:7, row = 1:3)
399 | m <- fill_panel(m, isl1_ic_plot, label = "D", column = 5:7, row = 4:6)
400 | m <- fill_panel(m, fig_2e, label = "E", column = 1:3, row = 7:14)
401 | m <- fill_panel(m, fig_2f, label = "F", column = 4:7, row = 7:14)
402 | 
403 | ggsave(
404 |   plot = m,
405 |   filename = file.path(parameters$general$path_rfigures, "Figure_2.pdf"),
406 |   width = 205,
407 |   height = 230,
408 |   units = "mm",
409 |   dpi = 600)
410 | 
411 | # Supplementary figure for classified heatmaps
412 | sup_figX_a <- grid.grabExpr(draw(nkx_smooth_heatmap, heatmap_legend_side = "bottom"))
413 | sup_figX_b <- grid.grabExpr(draw(isl1_smooth_heatmap_branch2 + isl1_smooth_heatmap_branch1, main_heatmap = "isl1_smooth_b1", heatmap_legend_side = "bottom", gap = unit(1, "mm"), padding = unit(c(1, 1, 1, 1), "mm")))
414 | 
415 | m <- multi_panel_figure(width = 205, height = 230, columns = 7, rows = 3)
416 | 
417 | m <- fill_panel(m, sup_figX_a, label = "A", column = c(1:3), row = 2)
418 | m <- fill_panel(m, sup_figX_b, label = "B", column = c(4:7), row = 2)
419 | 
420 | ggsave(
421 |   plot = m,
422 |   filename = file.path(parameters$general$path_rfigures, "Supplementary_Figure_smoothHeatmap.pdf"),
423 |   width = 205,
424 |   height = 230,
425 |   units = "mm",
426 |   dpi = 600)
427 | 
428 | # Supplementary figure for correlation heatmaps
429 | sup_figX_a <- grid.grabExpr(draw(nkx_cor_heatmap, heatmap_legend_side = "bottom"))
430 | sup_figX_b <- grid.grabExpr(draw(isl1_cor_heatmap, heatmap_legend_side = "bottom"))
431 | 
432 | m <- multi_panel_figure(width = 205, height = 230, columns = 3, rows = 2)
433 | m <- fill_panel(m, sup_figX_a, row = 1, column = c(1,2))
434 | m <- fill_panel(m, sup_figX_b, row = 2, column = c(1,2))
435 | 
436 | ggsave(
437 |   plot = m,
438 |   filename = file.path(parameters$general$path_rfigures, "Supplementary_Figure_Corheatmaps.pdf"),
439 |   width = 205,
440 |   height = 230,
441 |   units = "mm",
442 |   dpi = 600)
443 | 


--------------------------------------------------------------------------------