├── README.md └── original-analysis ├── src ├── 05_pseudotime.R ├── DE.R ├── 02_wu-data.R ├── parameters.R ├── 01_normalisation.R ├── 04_clustering.R ├── 03_heterogeneity.R └── 00_filtering.R ├── figure_src ├── Figure_6a.R ├── Figure_6cd.R ├── Figure_8b.R ├── Figure_7bc_8acd.R ├── Figure_8f.R ├── Figure_8g.R ├── Figure_2ef.R ├── Figure_5.R ├── Figure_2cd.R ├── Figure_1e.R ├── Figure_3cd.R └── SupplementaryFigure_7.R └── README.md /README.md: -------------------------------------------------------------------------------- 1 | # Single cell RNA-seq and ATAC seq analysis of cardiac progenitor cell transition states and lineage settlement 2 | 3 | ## Research article 4 | 5 | This repository contains code and supplementary files for the analysis conducted in 6 | 7 | Guangshuai Jia, Jens Preussner, Xi Chen, Stefan Guenther, Xuejun Yuan, Michail Yekelchyk, Carsten Kuenne, Mario Looso, Yonggang Zhou, Sarah Teichmann and Thomas Braun. Single cell RNA-seq and ATAC seq analysis of cardiac progenitor cell transition states and lineage settlement. **Nature Communications** 9, 4877 (*2018*), doi: [10.1038/s41467-018-07307-6](https://doi.org/10.1038/s41467-018-07307-6). 8 | 9 | Please cite the research article when using data from this repository. 10 | 11 | ## Original analysis 12 | 13 | The original analysis relied on the **R programming languange** and used `R` in version `3.5.0`. Additionally, we heavily relied on the bioconductor package [scater](http://bioconductor.org/packages/release/bioc/html/scater.html) in version `scater 1.4.0` for results from single-cell RNA-seq and `scater 1.8.0` for results from single-cell ATAC-seq. The details on the [session](https://stat.ethz.ch/R-manual/R-devel/library/utils/html/sessionInfo.html) can be found in [original-analysis/RNA-seq_sessionInfo.txt](original-analysis/RNA-seq_sessionInfo.txt) and [original-analysis/ATAC-seq_sessionInfo.txt](original-analysis/ATAC-seq_sessionInfo.txt), respectively. 14 | 15 | Please see the accompanying [README.md](original-analysis/README.md) for details on how to replicate preprocessing or re-create figures from the manuscript. 16 | 17 | ## Updated analysis 18 | 19 | In addition to the original analysis, we will provide an up-to-date re-analysis of the data from the manuscript. *Watch* this repository to be notified on further updates. 20 | -------------------------------------------------------------------------------- /original-analysis/src/05_pseudotime.R: -------------------------------------------------------------------------------- 1 | library(scater) 2 | library(destiny) 3 | library(singlecellutils) 4 | library(RColorBrewer) 5 | library(multipanelfigure) 6 | 7 | # 8 | # Load data 9 | # 10 | source("src/parameters.R") 11 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata")) 12 | 13 | # 14 | # Nkx2-5 diffusion map 15 | # 16 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1") 17 | nkx_de_genes <- which(fData(c1_subset)$nkx_de & fData(c1_subset)$nkx_marker) 18 | 19 | data <- get_exprs(c1_subset[nkx_de_genes, nkx_cells], "norm_exprs_sf") 20 | set.seed(1001) 21 | nkx_dm <- DiffusionMap(t(data), distance = 'euclidean') 22 | 23 | tip_cell <- find_tips(nkx_dm)[which(pData(c1_subset[, nkx_cells])$cluster[find_tips(nkx_dm)] == 1)] 24 | 25 | nkx_dpt <- DPT(nkx_dm, tips = tip_cell) 26 | nkx_pseudotime <- nkx_dpt[tip_cell] 27 | 28 | # 29 | # Save dpt and diffusion components to object 30 | # 31 | pData(c1_subset)$dpt <- NA 32 | pData(c1_subset)$dm1 <- NA 33 | pData(c1_subset)$dm2 <- NA 34 | 35 | pData(c1_subset)$dpt[nkx_cells] <- nkx_pseudotime 36 | pData(c1_subset)$dm1[nkx_cells] <- nkx_dm@eigenvectors[, 1] 37 | pData(c1_subset)$dm2[nkx_cells] <- nkx_dm@eigenvectors[, 2] 38 | 39 | save(nkx_dm, file = file.path(parameters$general$path_supdata, "Nkx2-5-DM.Rdata")) 40 | 41 | # 42 | # Isl1 diffusion map 43 | # 44 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1") 45 | isl1_de_genes <- which(fData(c1_subset)$isl1_de & fData(c1_subset)$isl1_marker) 46 | 47 | data <- get_exprs(c1_subset[isl1_de_genes, isl1_cells], "norm_exprs_sf") 48 | set.seed(1002) 49 | isl1_dm <- DiffusionMap(t(data), distance = 'e') 50 | 51 | tip_cell <- find_tips(isl1_dm)[which(pData(c1_subset[, isl1_cells])$cluster[find_tips(isl1_dm)] == 5)] 52 | 53 | isl_dpt <- DPT(isl1_dm, tips = tip_cell) 54 | isl_pseudotime <- isl_dpt[tip_cell,] 55 | 56 | # 57 | # Save dpt and diffusion components to object 58 | # 59 | pData(c1_subset)$dpt[isl1_cells] <- isl_pseudotime 60 | pData(c1_subset)$dm1[isl1_cells] <- isl1_dm@eigenvectors[, 1] 61 | pData(c1_subset)$dm2[isl1_cells] <- isl1_dm@eigenvectors[, 3] 62 | pData(c1_subset)$dm3[isl1_cells] <- isl1_dm@eigenvectors[, 2] 63 | 64 | save(isl1_dm, file = file.path(parameters$general$path_supdata, "Isl1-DM.Rdata")) 65 | save(c1_subset, file = file.path(parameters$general$path_supdata, "c1_subset.Rdata")) 66 | -------------------------------------------------------------------------------- /original-analysis/figure_src/Figure_6a.R: -------------------------------------------------------------------------------- 1 | library(scater) 2 | library(ggplot2) 3 | library(multipanelfigure) 4 | 5 | # 6 | # Load data 7 | # 8 | source("src/parameters.R") 9 | load(file.path(parameters$general$path_supdata, "wu.Rdata")) 10 | 11 | # 12 | # Smooth muscle marker genes 13 | # 14 | timepoint <- which(pData(wu)$timepoint == "e9.5") 15 | sm_marker_genes <- c("ENSMUSG00000015579", "ENSMUSG00000032085", "ENSMUSG00000001349", "ENSMUSG00000035783", "ENSMUSG00000029761", "ENSMUSG00000022836", "ENSMUSG00000048878", "ENSMUSG00000045667") 16 | 17 | subset <- wu[sm_marker_genes, timepoint] 18 | rownames(subset) <- fData(subset)$symbol 19 | 20 | #plotExpression(subset, x = "type", features = 1:nrow(subset), colour_by = "batch", exprs_values = "norm_exprs_sf", log2_values = F) 21 | 22 | # 23 | # SM cell inference 24 | # 25 | marker_expression <- get_exprs(subset, "norm_exprs_sf") 26 | 27 | is_smc_cl <- list( 28 | as.numeric(marker_expression["Nkx2-5",]) < 1, 29 | as.numeric(marker_expression["Tagln",]) > 2, 30 | as.numeric(marker_expression["Cnn1",]) > 2, 31 | as.numeric(marker_expression["Acta2",]) > 2, 32 | as.numeric(marker_expression["Cald1",]) > 2, 33 | as.numeric(marker_expression["Mylk",]) > 2, 34 | #as.numeric(marker_expression["Hexim1",]) > 2, 35 | as.numeric(marker_expression["Smtnl2",]) > 2 36 | ) 37 | 38 | is_smc_c <- do.call("rbind", is_smc_cl) 39 | is_smc <- (colSums(is_smc_c[2:nrow(is_smc_c), ]) >= 5) & is_smc_c[1, ] 40 | 41 | t <- table(isko = pData(subset)$batch == "KO", is_smc)[c(2,1), c(2,1)] 42 | 43 | tr <- prop.test(t, correct=T, conf.level = 0.99) 44 | 45 | ggMMplot <- function(var1, var2){ 46 | require(ggplot2) 47 | levVar1 <- length(levels(var1)) 48 | levVar2 <- length(levels(var2)) 49 | 50 | jointTable <- prop.table(table(var1, var2)) 51 | plotData <- as.data.frame(jointTable) 52 | plotData$marginVar1 <- prop.table(table(var1)) 53 | plotData$var2Height <- plotData$Freq / plotData$marginVar1 54 | plotData$var1Center <- c(0, cumsum(plotData$marginVar1)[1:levVar1 -1]) + 55 | plotData$marginVar1 / 2 56 | 57 | ggplot(plotData, aes(var1Center, var2Height)) + 58 | geom_bar(stat = "identity", aes(fill = var2, width = marginVar1), col = "White") + 59 | geom_label(aes(label = as.character(var1), x = var1Center, y = 1.05), label.r = unit(0, "lines")) 60 | } 61 | 62 | plot_data <- data.frame(Celltype = factor(ifelse(is_smc, "SMC", "non-SMC"), levels = c("SMC", "non-SMC")), Genotype = factor(ifelse(pData(subset)$batch == "KO", "KO", "WT"), levels = c("WT", "KO"))) 63 | 64 | plot <- ggMMplot(plot_data$Genotype, plot_data$Celltype) + 65 | guides(fill = F) + 66 | scale_fill_manual(values = c("#A8DBA8", "#3B8686")) + 67 | ylab("SMC Proportion") + 68 | xlab("Cells") + 69 | theme(panel.background = element_rect(fill = "transparent"), axis.line.y = element_line(colour = "black"), axis.line.x = element_line(colour = "black"))#, axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank()) 70 | 71 | m <- multi_panel_figure(width = 205, height = 230, columns = 2, rows = 5) 72 | m <- fill_panel(m, plot, row = c(1,2)) 73 | ggsave( 74 | plot = m, 75 | filename = file.path(parameters$general$path_rfigures, "Supplementary_Figure_SMCprop.pdf"), 76 | width = 205, 77 | height = 230, 78 | units = "mm", 79 | dpi = 600) 80 | -------------------------------------------------------------------------------- /original-analysis/figure_src/Figure_6cd.R: -------------------------------------------------------------------------------- 1 | library(destiny) 2 | library(scater) 3 | library(singlecellutils) 4 | library(multipanelfigure) 5 | 6 | # 7 | # Graphical settings 8 | # 9 | global.settings <- list(layout.heights=list(main.key.padding = -2, bottom.padding = 0, axis.xlab.padding = -0.5), layout.widths = list(ylab.axis.padding = -0.5)) 10 | 11 | # 12 | # Load neccessary data 13 | # 14 | source("src/parameters.R") 15 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata")) 16 | load(file.path(parameters$general$path_supdata, "wg_subset.Rdata")) 17 | load(file.path(parameters$general$path_supdata, "Isl1-DM.Rdata")) 18 | load(file.path(parameters$general$path_supdata, "Nkx2-5-DM.Rdata")) 19 | 20 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1") 21 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1") 22 | nkx_dm_coords <- cbind(-pData(c1_subset[, nkx_cells])$dm1, -pData(c1_subset[, nkx_cells])$dm2) 23 | isl1_dm_coords <- cbind(-pData(c1_subset[, isl1_cells])$dm1, -pData(c1_subset[, isl1_cells])$dm2, -pData(c1_subset[, isl1_cells])$dm3) 24 | 25 | # 26 | # Predict Nkx-OE onto Nkx2-5 trajectory 27 | # 28 | nkxoe_cells <- which(pData(c1_subset)$Background == "nkx2-5oe" & pData(c1_subset)$Platform == "C1") 29 | m <- match(colnames(nkx_dm@data_env$data), rownames(c1_subset)) 30 | 31 | nkxoe_data <- get_exprs(c1_subset[m, nkxoe_cells], "norm_exprs_sf") 32 | 33 | nkxoe_dcs <- dm_predict(nkx_dm, t(nkxoe_data)) 34 | nkxoe_coords <- matrix(nkxoe_dcs@x, ncol = nkxoe_dcs@Dim[2]) 35 | nkx_dm_coords_oe <- rbind(nkx_dm_coords, cbind(-nkxoe_coords[, 1], -nkxoe_coords[, 2])) 36 | 37 | cluster <- c(pData(c1_subset[, nkx_cells])$cluster, rep("OE", nrow(nkxoe_coords))) 38 | rm_obs <- is.na(cluster) 39 | cluster <- paste0("cluster", cluster[!rm_obs]) 40 | 41 | p_nkx_nkxoe <- colorAMap(nkx_dm_coords_oe[!rm_obs,], colour_by = factor(cluster), palette = parameters$colors$nkxko_cluster_palette, main = list(label = "Nkx2-5 overexpression prediction", cex = 0.75), scales = list(cex = 0.4, tck = c(0.5, 0)), xlab = list(label = "Dimension 1", cex = 0.6), ylab = list(label = "Dimension 2", cex = 0.6), par.settings = global.settings) 42 | 43 | # 44 | # Predict Nkx2-5OE onto Isl1 trajectory 45 | # 46 | m <- match(colnames(isl1_dm@data_env$data), rownames(c1_subset)) 47 | nkxoe_cells <- which(pData(c1_subset)$Background == "nkx2-5oe" & pData(c1_subset)$Platform == "C1") 48 | nkxoe_data <- get_exprs(c1_subset[m, nkxoe_cells], "norm_exprs_sf") 49 | 50 | nkxoe_dcs <- dm_predict(isl1_dm, t(nkxoe_data)) 51 | nkxoe_coords <- matrix(nkxoe_dcs@x, ncol = nkxoe_dcs@Dim[2]) 52 | isl1_dm_coords_oe <- rbind(isl1_dm_coords[, c(1,2)], cbind(-nkxoe_coords[, 1], -nkxoe_coords[, 3])) 53 | 54 | cluster <- c(pData(c1_subset[, isl1_cells])$cluster, rep("OE", nrow(nkxoe_coords))) 55 | rm_obs <- is.na(cluster) 56 | cluster <- paste0("cluster", cluster[!rm_obs]) 57 | 58 | p_isl1_oe <- colorAMap(isl1_dm_coords_oe[!rm_obs,], colour_by = factor(cluster), palette = parameters$colors$islko_cluster_palette, ylab = list(label = "Dimension 3", cex = 0.6), main = list(label = "Isl1 Nkx2-5 overexpression prediction", cex = 0.75), scales = list(cex = 0.4, tck = c(0.5, 0)), xlab = list(label = "Dimension 1", cex = 0.6), par.settings = global.settings) 59 | 60 | # 61 | # Save 62 | # 63 | m <- multi_panel_figure(width = 205, height = 230, columns = 2, rows = 3) 64 | m <- fill_panel(m, p_nkx_nkxoe) 65 | m <- fill_panel(m, p_isl1_oe) 66 | 67 | ggsave( 68 | plot = m, 69 | filename = file.path(parameters$general$path_rfigures, "Figure6_cd.pdf"), 70 | width = 205, 71 | height = 230, 72 | units = "mm", 73 | dpi = 600) 74 | -------------------------------------------------------------------------------- /original-analysis/src/DE.R: -------------------------------------------------------------------------------- 1 | library(MAST) 2 | library(ROCR) 3 | 4 | differentialExpression <- function(expression, contrasts, fData) { 5 | diff_data <- lapply(names(contrasts), function(l) { 6 | cells <- contrasts[[l]] 7 | 8 | data <- cbind(expression[, cells[[1]]], expression[, cells[[2]]]) 9 | cond_A <- unlist(strsplit(l, "_"))[1] 10 | cond_B <- unlist(strsplit(l, "_"))[2] 11 | 12 | cond <- c(rep(cond_A, length(cells[[1]])), rep(cond_B, length(cells[[2]]))) 13 | 14 | cdat <- data.frame(wellKey = colnames(data), condition = factor(cond), stringsAsFactors = F) 15 | fdat <- data.frame(primerid = rownames(data), stringsAsFactors = F) 16 | sca <- MAST::FromMatrix(class = "SingleCellAssay", 17 | exprsArray=data, 18 | cData = cdat, 19 | fData = fdat) 20 | 21 | zlm <- MAST::zlm(~ condition, sca, method = "bayesglm", ebayes = TRUE, ebayesControl = list(method = "MLE", model = "H1")) 22 | s <- MAST::summary(zlm, doLRT = paste0('condition', cond_B))$datatable 23 | res <- merge(s[contrast==paste0('condition', cond_B) & component=='H',.(primerid, `Pr(>Chisq)`)], #hurdle P values 24 | s[contrast==paste0('condition', cond_B) & component=='logFC', .(primerid, coef, ci.hi, ci.lo)], by='primerid') #logFC coefficients 25 | colnames(res) <- c("geneID", "pval", "lfc", "lfc.hi", "lfc.lo") 26 | # Calculation of FDR 27 | res$fdr <- p.adjust(res$pval, method = "fdr") 28 | 29 | # Calculation of basemeanA expressions 30 | basemeanA <- rowMeans(data[, cond == cond_A]) 31 | m <- match(res$geneID, names(basemeanA)) 32 | res$basemeanA <- basemeanA[m] 33 | 34 | # Calculation of basemeanB expressions 35 | basemeanB <- rowMeans(data[, cond == cond_B]) 36 | m <- match(res$geneID, names(basemeanB)) 37 | res$basemeanB <- basemeanB[m] 38 | 39 | colnames(res[, c("basemeanA", "basemeanB")]) <- c(cond_A, cond_B) 40 | 41 | # Adding number of cells with detectable expression 42 | # n_exprsA <- rowSums(counts(subset[, cond == cond_A]) > 10, na.rm = T) 43 | # m <- match(res$geneID, names(n_exprsA)) 44 | # res$n_exprsA <- n_exprsA[m] 45 | # n_exprsB <- rowSums(counts(subset[, cond == cond_B]) > 10, na.rm = T) 46 | # m <- match(res$geneID, names(n_exprsB)) 47 | # res$n_exprsB <- n_exprsB[m] 48 | 49 | m <- match(res$geneID, rownames(fData)) 50 | res <- cbind(res, fData[m, c("symbol", "biotype", "description")]) 51 | 52 | res 53 | }) 54 | } 55 | 56 | get_auroc <- function(gene, labels) { 57 | gene <- gene[!is.na(labels)] 58 | labels <- na.omit(labels) 59 | score <- rank(gene) 60 | # Get average score for each cluster 61 | ms <- aggregate(score ~ labels, FUN = mean) 62 | # Get cluster with highest average score 63 | posgroup <- ms[ms$score == max(ms$score), ]$labels 64 | # Return NAs if there is a tie for cluster with highest average score (by definition this is 65 | # not cluster specific) 66 | if (length(posgroup) > 1) { 67 | return(c(NA, NA, NA)) 68 | } 69 | # Create 1/0 vector of truths for predictions, cluster with highest average score vs 70 | # everything else 71 | truth <- as.numeric(labels == posgroup) 72 | # Make predictions & get auc using RCOR package. 73 | pred <- prediction(score, truth) 74 | val <- unlist(ROCR::performance(pred, "auc")@y.values) 75 | pval <- suppressWarnings(wilcox.test(score[truth == 1], score[truth == 0])$p.value) 76 | return(c(val, posgroup, pval)) 77 | } 78 | 79 | get_marker_genes <- function(dataset, labels) { 80 | res <- apply(dataset, 1, get_auroc, labels = labels) 81 | res <- data.frame(matrix(unlist(res), ncol = 3, byrow = T)) 82 | colnames(res) <- c("auroc", "clusts", "pvalue") 83 | res$fdr <- p.adjust(res$pvalue) 84 | res$geneID <- rownames(dataset) 85 | return(res) 86 | } 87 | -------------------------------------------------------------------------------- /original-analysis/src/02_wu-data.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(biomaRt) 3 | library(scater) 4 | library(scran) 5 | 6 | source("src/parameters.R") 7 | # 8 | # Load Wu et. al., Nkx2-5 WT data 9 | # 10 | e85_attributes <- read.table("ext_data/e8.5_wt_cell_attributes2.csv", sep=";", header=T) 11 | e95_attributes <- read.table("ext_data/e9.5_wt_cell_attributes.csv", sep=",", header=T) 12 | e105_attributes <- read.table("ext_data/e10.5_wt_cell_attributes.csv", sep=",", header=T) 13 | 14 | wt_metadata <- bind_rows(e85_attributes, e95_attributes, e105_attributes) 15 | wt_metadata$timepoint <- c(rep("e8.5", nrow(e85_attributes)), rep("e9.5", nrow(e95_attributes)), rep("e10.5", nrow(e105_attributes))) 16 | 17 | e85_counts <- read.table("ext_data/e8.5_wt_counts.csv", sep=",", header=T, row.names = 1) 18 | e95_counts <- read.table("ext_data/e9.5_wt_counts.csv", sep=",", header=T, row.names = 1) 19 | e105_counts <- read.table("ext_data/e10.5_wt_counts.csv", sep=",", header=T, row.names = 1) 20 | 21 | wt_counts <- counts <- cbind(e85_counts, e95_counts, e105_counts) 22 | 23 | # 24 | # Load Wu et. al., Nkx2-5 KO data 25 | # 26 | ko_counts <- read.csv("ext_data/e9.5_Nkx2.5mut_counts.csv", row.names = 1) 27 | ko_metadata <- read.csv("ext_data/e9.5_Nkx2.5mut_cell_attributes.csv") 28 | 29 | ko_metadata$timepoint <- "e9.5" 30 | ko_metadata$batch <- "KO" 31 | 32 | # 33 | # Merge counts and metadata 34 | # 35 | assertthat::are_equal(rownames(wt_counts), rownames(ko_counts)) 36 | wu_counts <- cbind(wt_counts, ko_counts) 37 | 38 | wu_metadata <- bind_rows(wt_metadata, ko_metadata) 39 | rownames(wu_metadata) <- make.names(wu_metadata$index) 40 | 41 | assertthat::are_equal(rownames(wu_metadata), colnames(wu_counts)) 42 | 43 | # Filter cells based on quality in metadata 44 | wu_counts <- wu_counts[, wu_metadata$quality] 45 | wu_metadata <- wu_metadata[wu_metadata$quality, ] 46 | 47 | # Filter out genes that are not in our data 48 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata")) 49 | m <- match(rownames(c1_subset), rownames(wu_counts)) 50 | 51 | wu_counts <- wu_counts[na.omit(m), ] 52 | 53 | # 54 | # Fetch featureData from bioMart 55 | # 56 | mart <- useMart("ensembl", dataset = parameters$general$biomart_dataset) 57 | bm <- getBM(c("ensembl_gene_id","external_gene_name","gene_biotype","description"), filters = "ensembl_gene_id", values = rownames(wu_counts), mart = mart) 58 | 59 | # 60 | # Assemble SCESet for Wu data 61 | # 62 | m <- match(rownames(wu_counts), bm$ensembl_gene_id) 63 | data <- data.frame(bm[na.omit(m),]) 64 | rownames(data) <- data$ensembl_gene_id 65 | colnames(data) <- c("gene_id", "symbol","biotype","description") 66 | 67 | # Add neccessary metadata 68 | wu_metadata$Cell <- make.names(wu_metadata$index) 69 | 70 | featureData <- new("AnnotatedDataFrame", data = data[,c(2:ncol(data))]) 71 | phenoData <- new("AnnotatedDataFrame", data = wu_metadata) 72 | 73 | wu <- newSCESet(countData = wu_counts, featureData = featureData, phenoData = phenoData) 74 | 75 | # 76 | # Assign cell cycle 77 | # 78 | pairs <- readRDS(system.file("exdata", parameters$normalisation$cycle_markers, package="scran")) 79 | cellcycle <- cyclone(wu, pairs, gene.names = rownames(wu), assay="counts", verbose=F) 80 | 81 | pData(wu)$cellcycle <- factor(cellcycle$phases) 82 | 83 | # 84 | # sumfactor normalisation 85 | # 86 | clusters <- quickCluster(wu, method = "hclust") 87 | 88 | wu <- computeSumFactors(wu, clusters = clusters, sizes = seq(40, 115, 5)) 89 | wu <- normalize(wu, return_norm_as_exprs = F) 90 | 91 | set_exprs(wu, "norm_exprs_sf") <- get_exprs(wu, "norm_exprs") 92 | 93 | # 94 | # TMM normalisation 95 | # 96 | wu <- normalizeExprs(wu, method="TMM", return_norm_as_exprs = FALSE) 97 | set_exprs(wu, "norm_exprs_lineage") <- get_exprs(wu, "norm_exprs") 98 | 99 | # Save 100 | save(wu, file = file.path(parameters$general$path_supdata, "wu.Rdata")) 101 | -------------------------------------------------------------------------------- /original-analysis/figure_src/Figure_8b.R: -------------------------------------------------------------------------------- 1 | library(magrittr) 2 | 3 | # 4 | # Load QC filtered data 5 | # 6 | load("data/scData_filtered_2.Rda") 7 | 8 | # 9 | # Load TF motif variability data 10 | # 11 | load("data/dev_cluster_specific_2.Rda") 12 | load("data/dev_clustered_cluster_specific_2.Rda") 13 | 14 | load("data/variability_cluster_specific_2.Rda") 15 | load("data/variability_clustered_cluster_specific_2.Rda") 16 | 17 | # 18 | # Get variability data 19 | # 20 | var_cutoff <- 1.5 21 | n_var_cutoff <- which(variability$variability > var_cutoff) 22 | var_data <- chromVAR::deviationScores(dev)[rownames(variability)[n_var_cutoff], ] 23 | 24 | rownames(var_data) <- sapply(rownames(var_data), function(s) { 25 | s <- tolower(gsub("_MA[0-9]+", "", s)) 26 | l <- unlist(strsplit(s, "")) 27 | paste0(toupper(l[1]), paste0(l[2:length(l)], collapse = "")) 28 | }) 29 | 30 | # 31 | # Use TF families to split heatmap rows 32 | # 33 | tf_distance <- as.dist(read.table("data/teichmann_similarity.txt", sep = "\t", header = T, row.names = 1)) 34 | 35 | hc <- hclust(d = tf_distance, method = "average") 36 | tf_clusters <- factor(cutree(hc, h = 0.6)) 37 | 38 | m <- match(rownames(variability)[n_var_cutoff], names(tf_clusters)) 39 | split <- droplevels(tf_clusters[m]) 40 | 41 | split <- factor(split, levels = c("72", "50", "51", "52", "63", "46", "157", "160", "161", "180", "56", "75", "76", "21", "82", "94", "78", "107", "186", "57"), ordered = T) 42 | 43 | # 44 | # Generate Heatmaps 45 | # 46 | heatmaps <- list("reference" = list(branch = "cardiac", cluster = 2), 47 | "intermediate" = list(branch = "cl3", cluster = 3), 48 | "endo" = list(branch = "endo", cluster = 5), 49 | "cardiac" = list(branch = "cardiac", cluster = 1), 50 | "cluster4" = list(branch = "cl4", cluster = 4)) 51 | 52 | col_fun_atac <- circlize::colorRamp2(seq(from = -1, to = 5, length.out = 9), colors = RColorBrewer::brewer.pal(9, "YlGnBu")) 53 | 54 | htlist_input <- lapply(heatmaps, function(x) { 55 | i <- which(SummarizedExperiment::colData(scData_filtered)$.cluster_5 == x$cluster) 56 | 57 | var_data_subset <- var_data[, i] 58 | if(!is.na(x$branch)) { 59 | var_data_subset <- var_data_subset[, order(SummarizedExperiment::colData(scData_filtered)[i, paste0("dpt_", x$branch)])] 60 | } 61 | var_data_smooth <- t(zoo::rollapply(t(var_data_subset), width = 9, FUN = mean, fill = "extend", by.column = T)) 62 | 63 | ComplexHeatmap::Heatmap(var_data_smooth, 64 | col = col_fun_atac, 65 | cluster_columns = F, 66 | split = split, 67 | cluster_rows = T, 68 | show_column_names = F, 69 | show_row_names = ifelse(x$cluster == 2, TRUE, FALSE), 70 | row_names_side = "left", 71 | show_row_dend = F, 72 | name = paste("Cluster", x$cluster), 73 | column_title_side = "top", 74 | column_title = paste("Cluster", x$cluster), 75 | show_heatmap_legend = ifelse(x$cluster == 2, TRUE, FALSE), 76 | row_names_gp = grid::gpar(fontsize = 6), 77 | heatmap_legend_param = list(legend_direction = "horizontal", at = c(-1, 5), labels = c("low", "high"), title = "Motif accessibility of TF")) 78 | }) 79 | 80 | htlist <- Reduce(ComplexHeatmap::add_heatmap, htlist_input) 81 | plot_chromvar_dpt <- grid::grid.grabExpr(ComplexHeatmap::draw(htlist, column_title_gp = grid::gpar(fontface = "bold"), heatmap_legend_side = "bottom")) 82 | 83 | figure_chromvar_dpt <- multipanelfigure::multi_panel_figure(rows = 4, columns = 1, height = 230, width = 205) 84 | figure_chromvar_dpt <- multipanelfigure::fill_panel(figure_chromvar_dpt, plot_chromvar_dpt, row = 2:4) 85 | 86 | ggplot2::ggsave(figure_chromvar_dpt, 87 | file = "figures_2/chromVar_dpt_heatmap.pdf", 88 | height = 230, 89 | width = 205, 90 | unit = "mm") 91 | 92 | -------------------------------------------------------------------------------- /original-analysis/figure_src/Figure_7bc_8acd.R: -------------------------------------------------------------------------------- 1 | load(file = "data/scData_filtered_2.Rda") 2 | 3 | # 4 | # Figure 7 b/c 5 | # 6 | 7 | plot_lsi_clustering <- scater::plotReducedDim(scData_filtered, use_dimred = "tsne", colour_by = ".cluster_5", shape_by = "batch", add_ticks = F) + 8 | ggplot2::ggtitle("LSI + HDBSCAN clustering") + 9 | ggplot2::guides(shape = FALSE) + 10 | ggplot2::xlab("") + ggplot2::ylab("") + 11 | ggplot2::theme(plot.title = ggplot2::element_text(face="bold", color="black", size=6), legend.title = ggplot2::element_blank()) + 12 | ggplot2::scale_color_manual(values = c("1" = "#33a02c", "2" = "#6a3d9a", "3" = "#1f78b4", "4" = "#e31a1c", "5" = "#ff7f00")) + 13 | ggplot2::scale_shape_manual(values = c(16, 16)) 14 | 15 | plot_lsi_batch <- scater::plotReducedDim(scData_filtered, use_dimred = "tsne", colour_by = "batch", add_ticks = F) + 16 | ggplot2::ggtitle("LSI: Cell timepoint") + 17 | ggplot2::guides(shape = FALSE) + 18 | ggplot2::xlab("") + ggplot2::ylab("") + 19 | ggplot2::theme(plot.title = ggplot2::element_text(face="bold", color="black", size=6), legend.title = ggplot2::element_blank()) 20 | 21 | figure_7bc <- multipanelfigure::multi_panel_figure(rows = 4, columns = 2, width = 205, height = 230) 22 | 23 | figure_7bc <- multipanelfigure::fill_panel(figure_7bc, plot_lsi_clustering, column = 1:2, row = 1:2) 24 | figure_7bc <- multipanelfigure::fill_panel(figure_7bc, plot_lsi_batch, column = 1:2, row = 3:4) 25 | 26 | # 27 | # Figure 8a 28 | # 29 | plot_chromvar_clustering <- scater::plotReducedDim(scData_filtered, use_dimred = "chromvar_cluster_specific", colour_by = ".cluster_5", shape_by = "batch", add_ticks = F) + 30 | ggplot2::ggtitle("Clustering on chromVar sample correlation") + 31 | ggplot2::guides(shape = FALSE, color = FALSE) + 32 | ggplot2::theme(plot.title = ggplot2::element_text(face="bold", color="black", size=6), legend.title = ggplot2::element_blank()) + 33 | ggplot2::scale_shape_manual(values = c(16, 16)) + 34 | ggplot2::scale_color_manual(values = c("3" = "#1f78b4", "2" = "#6a3d9a", "1" = "#33a02c", "4" = "#e31a1c", "5" = "#ff7f00")) 35 | 36 | plot_chromvar_batch <- scater::plotReducedDim(scData_filtered, use_dimred = "chromvar_cluster_specific", colour_by = "batch", shape_by = "batch", add_ticks = F) + 37 | ggplot2::ggtitle("Clustering on chromVar sample correlation") + 38 | ggplot2::guides(color = FALSE, shape = FALSE) + 39 | ggplot2::scale_shape_manual(values = c(16, 16)) + 40 | ggplot2::theme(plot.title = ggplot2::element_text(face="bold", color="black", size=6), legend.title = ggplot2::element_blank()) 41 | 42 | figure_8a <- multipanelfigure::multi_panel_figure(rows = 4, columns = 2, width = 205, height = 230) 43 | 44 | figure_8a <- multipanelfigure::fill_panel(figure_8a, plot_chromvar_clustering, column = 1:2, row = 1:2) 45 | figure_8a <- multipanelfigure::fill_panel(figure_8a, plot_chromvar_batch, column = 1:2, row = 3:4) 46 | 47 | # 48 | # Figure 8c/d 49 | # 50 | figure_dpt_tsne <- multipanelfigure::multi_panel_figure(columns = 3, rows = 3, width = 205, height = 230) 51 | 52 | dpts <- c("dpt_cardiac", "dpt_endo") 53 | for(d in dpts) { 54 | f <-scater::plotReducedDim(scData_filtered, use_dimred = "tsne", colour_by = d, shape_by = "batch", add_ticks = F) + 55 | ggplot2::ggtitle(paste("LSI:", d)) + 56 | viridis::scale_color_viridis() + 57 | ggplot2::guides(shape = FALSE) + 58 | ggplot2::scale_shape_manual(values = c(16, 16)) + 59 | ggplot2::theme(plot.title = ggplot2::element_text(face="bold", color="black", size=6), 60 | legend.title = ggplot2::element_blank(), 61 | legend.key.size = grid::unit(2, "mm"), 62 | legend.margin = ggplot2::margin(b = 2), 63 | legend.position = c(0.7,1), 64 | legend.justification = c(0,0), 65 | legend.background = ggplot2::element_rect(fill = "white"), 66 | legend.direction = "horizontal", 67 | legend.text = ggplot2::element_text(size = 4)) + 68 | ggplot2::xlab("") + ggplot2::ylab("") 69 | figure_dpt_tsne <- multipanelfigure::fill_panel(figure_dpt_tsne, f) 70 | } 71 | 72 | ggplot2::ggsave(figure_dpt_tsne, 73 | file = "figures_2/LSI_dpt.pdf", 74 | height = 230, 75 | width = 205, 76 | unit = "mm") 77 | 78 | # 79 | # Figure 8f/g 80 | # 81 | -------------------------------------------------------------------------------- /original-analysis/src/parameters.R: -------------------------------------------------------------------------------- 1 | library(scales) 2 | # 3 | # General options 4 | # 5 | parameters <- list( 6 | "general" = list("project" = 'Cardiac progenitor single cells', 7 | "path_rdata" = 'data', 8 | "path_rextdata" = "ext_data", 9 | "path_rfigures" = 'supplementary_figures', 10 | "path_supdata" = 'supplementary_data', 11 | "lower_detection_limit" = 10, 12 | "biomart_dataset" = "mmusculus_gene_ensembl"), 13 | "filtering" = list("qc_columns" = c("total_features", "pct_dropout", "totalalignments", "exonic", "intronic", "intergenic", "secondaryalignments","exprs_feature_controls_housekeeping", "pct_counts_feature_controls_mito", "percent_genes"), 14 | "log_qc_columns" = c("total_features", "totalalignments", "exonic", "intronic", "intergenic","secondaryalignments", "exprs_feature_controls_housekeeping"), 15 | "cell_outlier_columns" = list(pct_counts_feature_controls_mito = c(1.5, "higher", F), total_features = c(2, "both", F), pct_dropout = c(2, "higher", F), exprs_feature_controls_housekeeping = c(2, "lower", F), percent_genes = c(1.5, "both", F)), 16 | "cell_outlier_maxfailed" = 1, 17 | "EpOverA_A" = 2000, 18 | "kOverA_k" = 10, 19 | "kOverA_A" = 10), 20 | "normalisation" = list("cycle_markers" = "mouse_cycle_markers.rds"), 21 | "heterogeneity" = list("het_zscore" = qnorm(0.99)), 22 | "diffusionmaps" = list("de_fdr" = 0.01, 23 | "de_abs_logfc" = 2, 24 | "marker_fdr" = 0.01, 25 | "marker_auroc" = 0.8), 26 | "correlation" = list(), 27 | "colors" = list("nkx_cluster_palette" = c("#8B0000", "#CDAD00", "#00008B"), 28 | "nkx_timepoint_palette" = c("#FF003C", "#248F8D", "#987F69"), 29 | "nkx_run_palette" = c("#ECD078", "#C02942", "#53777A", "#542437"), 30 | "nkxko_cluster_palette" = c(alpha(c("#00008B", "#8B0000", "#CDAD00"), 1), "red"), 31 | "isl_cluster_palette" = c("#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E", "grey"), 32 | "isl_timepoint_palette" = c("#FF003C", "#248F8D", "#987F69"), 33 | "islko_cluster_palette" = c(alpha(c("#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E"), 0.8), "#FF0001", "grey"), 34 | "run_palette" = c("#ECD078", "#C02942", "#53777A", "#542437", "#0B486B", "#D95B43")) 35 | ) 36 | 37 | # # 38 | # # Options for chapter 02_filtering.Rmd 39 | # # 40 | # # [cellfilter] 41 | # qc_columns <- c("total_features", "pct_dropout", "totalalignments", "exonic", "intronic", "intergenic", "secondaryalignments","exprs_feature_controls_housekeeping", "pct_counts_feature_controls_mito", "percent_genes") 42 | # log_qc_columns <- c("total_features", "totalalignments", "exonic", "intronic", "intergenic","secondaryalignments", "exprs_feature_controls_housekeeping") 43 | # cell_outlier_columns <- list(pct_counts_feature_controls_mito = c(1.5, "higher", F), total_features = c(2, "both", F), pct_dropout = c(2, "higher", F), exprs_feature_controls_housekeeping = c(2, "lower", F), percent_genes = c(1.5, "both", F)) 44 | # cell_outlier_maxfailed <- 1 45 | # 46 | # # [genefilter] 47 | # EpOverA_A <- 2000 48 | # kOverA_k <- 10 49 | # kOverA_A <- lower_detection_limit 50 | # 51 | # # [downsampling] 52 | # 53 | # # 54 | # # Options for chapter 03_normalisation.Rmd 55 | # # 56 | # # [cell cylce] 57 | # cycle_markers <- "mouse_cycle_markers.rds" 58 | # 59 | # # [PCA] 60 | # init_pca_variables = c("Platform", "pct_dropout", "log10_total_counts", "pct_counts_feature_controls_mito", "pct_counts_top_200_endogenous_features", "totalalignments", "total_features", "cellcycle", "Type", "Chip") 61 | # 62 | # # 63 | # # Options for chapter 04_heterogeneity.Rmd 64 | # # 65 | # het_zscore <- qnorm(0.99) 66 | # 67 | # # 68 | # # Options for chapter 05_clustering.Rmd 69 | # # 70 | # timpoint_col_palette <- "Set1" 71 | # cycle_col_palette <- "Set1" 72 | # cluster_col_palette <- "Set2" 73 | # cluster_na_col <- "#969696" 74 | # 75 | # # [differential expression] 76 | # logfc_threshold <- 3 77 | # fdr_threshold <- 0.01 78 | # 79 | # # [violin plots] 80 | # show_max_de_genes <- 20 81 | # 82 | # # 83 | # # Options for 07_diffusion.Rmd 84 | # # 85 | # proj_col <- "#bd0026" 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /original-analysis/figure_src/Figure_8f.R: -------------------------------------------------------------------------------- 1 | library(magrittr) 2 | 3 | # 4 | # Load scATAC-seq data 5 | # 6 | load("data/scData_filtered_2.Rda") 7 | 8 | # 9 | # Load scRNA-seq data 10 | # 11 | RNAseq <- read.table("data/Isl1_cardiac_branch_expression.txt", header = T, row.names = 1, sep = "\t") 12 | 13 | #RNAseq <- RNAseq[c("Hoxa9", "Hoxc10", "Hoxa10", "Hoxd8", "Foxc1", "Foxo4", "Tcf4", "Foxp1", "Gata4", "Gata6", "Gata5"), ] 14 | RNAseq <- RNAseq[c("Hoxa9", "Hoxc10", "Hoxd8", "Tead1", "Tead2", "Tead4", "Hand1", "Tbx5", "Gata4", "Gata6"), ] 15 | split <- c(rep("first", 3), rep("second", 3), rep("third", 4)) 16 | 17 | #RNAseq_scaled <- t(scale(t(RNAseq), scale = F)) 18 | RNAseq_smoothed <- t(zoo::rollapply(t(RNAseq), width = 15, FUN = mean, fill = "extend", by.column = T)) 19 | 20 | # 21 | # Get TF deviation data 22 | # 23 | load("data/dev_cluster_specific_2.Rda") 24 | 25 | # Extract deviation scores 26 | tfs_limited <- c("Hoxa9_MA0594.1", "HOXC10_MA0905.1", "HOXA10_MA0899.1", "Hoxd8_MA0910.1", 27 | "FOXC1_MA0032.2", "FOXO4_MA0848.1", "TCF4_MA0830.1", "FOXP1_MA0481.2", 28 | "Gata4_MA0482.1", "GATA6_MA1104.1", "GATA5_MA0766.1") 29 | tfs_limited <- c("Hoxa9_MA0594.1", "HOXC10_MA0905.1", "Hoxd8_MA0910.1", 30 | "TEAD1_MA0090.2", "TEAD2_MA1121.1", "TEAD4_MA0809.1", 31 | "Hand1_Tcf3_MA0092.1", "TBX5_MA0807.1","Gata4_MA0482.1", "GATA6_MA1104.1") 32 | 33 | var_data_limited <- chromVAR::deviationScores(dev)[tfs_limited, ] 34 | 35 | # 36 | # Get cluster specific cells with their pseudotime 37 | # 38 | cells <- which(SummarizedExperiment::colData(scData_filtered)$.cluster_5 %in% c(1, 2)) 39 | 40 | var_data_limited <- var_data_limited[, cells] 41 | 42 | # Order var_data by dpt 43 | var_data_limited <- var_data_limited[, order(SummarizedExperiment::colData(scData_filtered)$dpt_cardiac[cells])] 44 | 45 | # 46 | # Smooth variability data 47 | # 48 | var_data_limited_smoothed <- t(zoo::rollapply(t(var_data_limited), width = 13, FUN = mean, fill = "extend", by.column = T)) 49 | 50 | # 51 | # Define color mapping 52 | # 53 | col_fun_rna <- circlize::colorRamp2(seq(from = -4, to = 4, length.out = 11), colors = rev(RColorBrewer::brewer.pal(11, "RdBu"))) 54 | col_fun_atac <- circlize::colorRamp2(seq(from = -1, to = 5, length.out = 9), colors = RColorBrewer::brewer.pal(9, "YlGnBu")) 55 | 56 | # 57 | # Draw Heatmap 58 | # 59 | h_left <- ComplexHeatmap::Heatmap(RNAseq_smoothed, 60 | col = col_fun_rna, 61 | split = split, 62 | cluster_columns = F, 63 | cluster_rows = T, 64 | show_row_names = T, 65 | row_names_side = "left", 66 | show_row_dend = F, 67 | name = "Expression", 68 | column_title_side = "top", 69 | column_title = "RNA expression of TF", 70 | width = grid::unit(90, "mm"), 71 | heatmap_legend_param = list(legend_direction = "horizontal", at = c(-4, 4), labels = c("low", "high"))) 72 | 73 | h_right <- ComplexHeatmap::Heatmap(var_data_limited_smoothed, 74 | col = col_fun_atac, 75 | split = split, 76 | cluster_columns = F, 77 | cluster_rows = F, 78 | show_column_names = F, 79 | show_row_names = F, 80 | show_row_dend = F, 81 | name = "Motif accessibility", 82 | column_title_side = "top", 83 | column_title = "Motif accessibility of TF", 84 | width = grid::unit(90, "mm"), 85 | heatmap_legend_param = list(legend_direction = "horizontal", at = c(-1, 5), labels = c("low", "high"))) 86 | 87 | plot_cardiac_RNA_ATAC <- grid::grid.grabExpr(ComplexHeatmap::draw(ComplexHeatmap::add_heatmap(h_left, h_right), column_title_gp = grid::gpar(fontface = "bold"), heatmap_legend_side = "bottom")) 88 | 89 | figure_cardiac_RNA_ATAC <- multipanelfigure::multi_panel_figure(width = 230, height = 205, columns = 1, rows = 1) 90 | figure_cardiac_RNA_ATAC <- multipanelfigure::fill_panel(figure_cardiac_RNA_ATAC, plot_cardiac_RNA_ATAC) 91 | 92 | ggplot2::ggsave(figure_cardiac_RNA_ATAC, 93 | file = "figures_2/Cardiac_RNA_ATAC.pdf", 94 | height = 205, 95 | width = 230, 96 | unit = "mm") 97 | -------------------------------------------------------------------------------- /original-analysis/figure_src/Figure_8g.R: -------------------------------------------------------------------------------- 1 | library(magrittr) 2 | 3 | # 4 | # Load scATAC-seq data 5 | # 6 | load("data/scData_filtered_2.Rda") 7 | 8 | # 9 | # Load scRNA-seq data 10 | # 11 | RNAseq <- read.table("data/Isl1_endo_branch_expression.txt", header = T, row.names = 1, sep = "\t") 12 | 13 | RNAseq <- RNAseq[c("Sox13", "Sox6", "Sox9", "Tead1", "Gata6", "Zeb1", "Fosl2", "Junb", "Gata2", "Tal1"), ] 14 | RNAseq <- RNAseq[c("Gata4", "Gata5", "Sox6", "Sox9", "Sox13", "Tead1", "Tead2", "Tead4", "Gata6", "Gata2"), ] 15 | split <- c(rep("first", 6), rep("second", 4)) 16 | 17 | #RNAseq_scaled <- t(scale(t(RNAseq), scale = F)) 18 | RNAseq_smoothed <- t(zoo::rollapply(t(RNAseq), width = 15, FUN = mean, fill = "extend", by.column = T)) 19 | 20 | # 21 | # Get TF deviation data 22 | # 23 | load("data/dev_cluster_specific_2.Rda") 24 | 25 | # Extract deviation scores 26 | tfs_limited <- c("SOX13_MA1120.1", "Sox6_MA0515.1", "SOX9_MA0077.1", "TEAD1_MA0090.2", 27 | "GATA6_MA1104.1", 28 | "ZEB1_MA0103.3", "FOSL2_JUNB_MA1138.1", "FOSL2_JUNB_MA1138.1", "GATA2_MA0036.3", "GATA1_TAL1_MA0140.2") 29 | tfs_limited <- c("Gata4_MA0482.1", "GATA5_MA0766.1", "Sox6_MA0515.1", "SOX9_MA0077.1", "SOX13_MA1120.1", "TEAD1_MA0090.2", "TEAD2_MA1121.1", "TEAD4_MA0809.1", "GATA6_MA1104.1", "GATA2_MA0036.3") 30 | 31 | var_data_limited <- chromVAR::deviationScores(dev)[tfs_limited, ] 32 | 33 | # 34 | # Get cluster specific cells ordered by their pseudotime 35 | # 36 | heatmaps <- list("reference" = list(branch = "cardiac", cluster = 2), 37 | "intermediate" = list(branch = "cl3", cluster = 3), 38 | "endo" = list(branch = "endo", cluster = 5)) 39 | 40 | var_data_list <- lapply(heatmaps, function(x) { 41 | i <- which(SummarizedExperiment::colData(scData_filtered)$.cluster_5 == x$cluster) 42 | data <- var_data_limited[, i] 43 | if(!is.na(x$branch)) { 44 | data <- data[, order(SummarizedExperiment::colData(scData_filtered)[i, paste0("dpt_", x$branch)])] 45 | } 46 | data 47 | }) 48 | var_data_limited <- do.call("cbind", var_data_list) 49 | 50 | # 51 | # Smooth variability data 52 | # 53 | var_data_limited_smoothed <- t(zoo::rollapply(t(var_data_limited), width = 13, FUN = mean, fill = "extend", by.column = T)) 54 | 55 | # 56 | # Define color mapping 57 | # 58 | col_fun_rna <- circlize::colorRamp2(seq(from = -4, to = 4, length.out = 11), colors = rev(RColorBrewer::brewer.pal(11, "RdBu"))) 59 | col_fun_atac <- circlize::colorRamp2(seq(from = -1, to = 5, length.out = 9), colors = RColorBrewer::brewer.pal(9, "YlGnBu")) 60 | 61 | # 62 | # Draw Heatmap 63 | # 64 | h_left <- ComplexHeatmap::Heatmap(RNAseq_smoothed, 65 | col = col_fun_rna, 66 | split = split, 67 | cluster_columns = F, 68 | cluster_rows = F, 69 | show_row_names = T, 70 | row_names_side = "left", 71 | show_row_dend = F, 72 | name = "Expression", 73 | column_title_side = "top", 74 | column_title = "RNA expression of TF", 75 | width = grid::unit(90, "mm"), 76 | heatmap_legend_param = list(legend_direction = "horizontal", at = c(-4, 4), labels = c("low", "high"))) 77 | 78 | h_right <- ComplexHeatmap::Heatmap(var_data_limited_smoothed, 79 | col = col_fun_atac, 80 | split = split, 81 | cluster_columns = F, 82 | cluster_rows = F, 83 | show_column_names = F, 84 | show_row_names = F, 85 | show_row_dend = F, 86 | name = "Motif accessibility", 87 | column_title_side = "top", 88 | column_title = "Motif accessibility of TF", 89 | width = grid::unit(90, "mm"), 90 | heatmap_legend_param = list(legend_direction = "horizontal", at = c(-1, 5), labels = c("low", "high"))) 91 | 92 | plot_endo_RNA_ATAC <- grid::grid.grabExpr(ComplexHeatmap::draw(ComplexHeatmap::add_heatmap(h_left, h_right), column_title_gp = grid::gpar(fontface = "bold"), heatmap_legend_side = "bottom")) 93 | 94 | figure_endo_RNA_ATAC <- multipanelfigure::multi_panel_figure(width = 230, height = 205, columns = 1, rows = 1) 95 | figure_endo_RNA_ATAC <- multipanelfigure::fill_panel(figure_endo_RNA_ATAC, plot_endo_RNA_ATAC) 96 | 97 | ggplot2::ggsave(figure_endo_RNA_ATAC, 98 | file = "figures_2/Endo_RNA_ATAC.pdf", 99 | height = 205, 100 | width = 230, 101 | unit = "mm") 102 | -------------------------------------------------------------------------------- /original-analysis/src/01_normalisation.R: -------------------------------------------------------------------------------- 1 | library(scran) 2 | library(scater) 3 | library(SCnorm) 4 | library(multipanelfigure) 5 | library(gridExtra) 6 | library(limma) 7 | 8 | # 9 | # Load data 10 | # 11 | source("src/parameters.R") 12 | load(file.path(parameters$general$path_supdata, "scd.RData")) 13 | 14 | # 15 | # Assign cell cycle stage 16 | # 17 | pairs <- readRDS(system.file("exdata", parameters$normalisation$cycle_markers, package="scran")) 18 | cellcycle <- cyclone(scd, pairs, gene.names = rownames(scd), assay="exprs", verbose=F) 19 | 20 | pData(scd)$cellcycle <- factor(cellcycle$phases) 21 | plot(cellcycle$score$G1, cellcycle$score$G2M, pch=16) 22 | 23 | cc_tab <- data.frame(count = table(cellcycle$phases)) 24 | colnames(cc_tab) <- c("Cell cycle stage", "Cells") 25 | 26 | tt1 <- ttheme_minimal(core=list(fg_params=list(hjust=1, x=0.9)), 27 | rowhead=list(fg_params=list(hjust=1, x=0.9)), 28 | colhead = list(fg_params=list(hjust=1, x=0.9))) 29 | 30 | cell_cycle_table <- tableGrob(cc_tab, theme=tt1, rows = NULL) 31 | 32 | # 33 | # Perfom lineage-specific TMM normalization with batch correction 34 | # 35 | # Prepare lineages 36 | lineages <- list(isl1 = pData(scd)$Background == "isl1" | pData(scd)$Background == "isl1ko" | pData(scd)$Background == "nkx2-5oe", 37 | nkx = pData(scd)$Background == "nkx2-5") 38 | 39 | sf.normalized <- lapply(lineages, function(lineage_cells) { 40 | lineage <- scd[, lineage_cells] 41 | 42 | lineage <- computeSumFactors(lineage, clusters = pData(lineage)$Timepoint) 43 | lineage <- normalise(lineage) 44 | get_exprs(lineage, "norm_exprs") 45 | }) 46 | 47 | tmm.normalized <- lapply(lineages, function(lineage_cells) { 48 | lineage <- scd[, lineage_cells] 49 | 50 | # Create design matrix 51 | #design <- matrix(c(rep(1, ncol(lineage)), as.numeric(pData(lineage)$Timepoint)), nrow = ncol(lineage)) 52 | 53 | # Remove batch effect 54 | #corrected <- removeBatchEffect(get_exprs(lineage, "exprs"), design = design, block = pData(lineage)$Platform) 55 | 56 | # Set corrected expression 57 | #set_exprs(lineage, "exprs") <- corrected 58 | 59 | # Normalize lineages 60 | lineage <- normalizeExprs(lineage, method="TMM", return_norm_as_exprs = FALSE) 61 | 62 | # Create design matrix 63 | #design <- matrix(c(rep(1, ncol(lineage)), as.numeric(pData(lineage)$Timepoint)), nrow = ncol(lineage)) 64 | 65 | # Remove batch effect 66 | #tmm <- removeBatchEffect(get_exprs(lineage, "norm_exprs"), design = design, block = pData(lineage)$Platform) 67 | 68 | get_exprs(lineage, "norm_exprs") 69 | }) 70 | 71 | # Establish subsets 72 | isl1_subset <- scd[, lineages$isl1] 73 | set_exprs(isl1_subset, "norm_exprs_lineage") <- tmm.normalized$isl1 74 | set_exprs(isl1_subset, "norm_exprs_sf") <- sf.normalized$isl1 75 | 76 | nkx_subset <- scd[, lineages$nkx] 77 | set_exprs(nkx_subset, "norm_exprs_lineage") <- tmm.normalized$nkx 78 | set_exprs(nkx_subset, "norm_exprs_sf") <- sf.normalized$nkx 79 | 80 | # 81 | # Save created objects for further analysis 82 | # 83 | save(isl1_subset, file = file.path(parameters$general$path_supdata, "Isl1_subset.Rdata")) 84 | save(nkx_subset, file = file.path(parameters$general$path_supdata, "Nkx2-5_subset.Rdata")) 85 | 86 | # 87 | # Prepare data platform subsets 88 | # 89 | subsets <- list(c1 = pData(scd)$Platform == "C1", 90 | wg = pData(scd)$Platform == "WG") 91 | 92 | # 93 | # Perfom lineage- and platform-specific TMM normalisation 94 | # 95 | 96 | sf.normalized <- lapply(subsets, function(platform) { 97 | isl_lineage <- platform & (pData(scd)$Background == "isl1" | pData(scd)$Background == "isl1ko" | pData(scd)$Background == "nkx2-5oe") 98 | nkx_lineage <- platform & (pData(scd)$Background == "nkx2-5") 99 | 100 | # Normalize lineages 101 | isl_subset <- scd[, isl_lineage] 102 | isl_subset <- computeSumFactors(isl_subset)#, clusters = pData(isl_subset)$Timepoint) 103 | isl_subset <- normalise(isl_subset) 104 | 105 | nkx_subset <- scd[, nkx_lineage] 106 | nkx_subset <- computeSumFactors(nkx_subset)#, clusters = pData(nkx_subset)$Timepoint) 107 | nkx_subset <- normalise(nkx_subset) 108 | 109 | # Bind results and return 110 | sf <- cbind(get_exprs(isl_subset, "norm_exprs"), get_exprs(nkx_subset, "norm_exprs")) 111 | m <- match(colnames(scd), colnames(sf)) 112 | sf[, na.omit(m)] 113 | }) 114 | 115 | c1_subset <- scd[, subsets$c1] 116 | set_exprs(c1_subset, "norm_exprs_sf") <- sf.normalized$c1 117 | 118 | wg_subset <- scd[, subsets$wg] 119 | set_exprs(wg_subset, "norm_exprs_sf") <- sf.normalized$wg 120 | 121 | tmm.normalized <- lapply(subsets, function(platform) { 122 | # Establish lineages 123 | isl_lineage <- platform & (pData(scd)$Background == "isl1" | pData(scd)$Background == "isl1ko" | pData(scd)$Background == "nkx2-5oe") 124 | nkx_lineage <- platform & (pData(scd)$Background == "nkx2-5") 125 | 126 | # Normalize lineages 127 | isl_subset <- scd[, isl_lineage] 128 | isl_subset <- normalizeExprs(isl_subset, method="TMM", return_norm_as_exprs = FALSE) 129 | 130 | nkx_subset <- scd[, nkx_lineage] 131 | nkx_subset <- normalizeExprs(nkx_subset, method="TMM", return_norm_as_exprs = FALSE) 132 | 133 | # Bind results and return 134 | tmm <- cbind(get_exprs(isl_subset, "norm_exprs"), get_exprs(nkx_subset, "norm_exprs")) 135 | m <- match(colnames(scd), colnames(tmm)) 136 | tmm[, na.omit(m)] 137 | }) 138 | 139 | # Establish subsets 140 | #c1_subset <- scd[, subsets$c1] 141 | set_exprs(c1_subset, "norm_exprs_lineage") <- tmm.normalized$c1 142 | 143 | #wg_subset <- scd[, subsets$wg] 144 | set_exprs(wg_subset, "norm_exprs_lineage") <- tmm.normalized$wg 145 | 146 | # 147 | # Save created objects for further analysis 148 | # 149 | save(c1_subset, file = file.path(parameters$general$path_supdata, "c1_subset.Rdata")) 150 | save(wg_subset, file = file.path(parameters$general$path_supdata, "wg_subset.Rdata")) 151 | 152 | -------------------------------------------------------------------------------- /original-analysis/figure_src/Figure_2ef.R: -------------------------------------------------------------------------------- 1 | library(scater) 2 | library(tidyr) 3 | library(ggplot2) 4 | 5 | # 6 | # Load data 7 | # 8 | source("src/parameters.R") 9 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata")) 10 | 11 | # 12 | # Setup 13 | # 14 | # Genes for Isl1 dpt lineplots 15 | isl1_genes <- factor(c("Isl1", "Mef2c", "Nkx2-5", "Tgfb1li1", "Ankrd1", "Myocd", "Smyd1", "Gata2", "Hand1", "Hhex", "Hoxa7", "Hoxa9", "Hoxb6", "Hoxc8", "Msx1", "Pitx1", "Nkx1-2", "Snai1", "Tbx3", "Tbx4", "Ets2", "Sall4"), 16 | levels = c("Isl1", "Mef2c", "Nkx2-5", "Tgfb1li1", "Ankrd1", "Myocd", "Smyd1", "Gata2", "Hand1", "Hhex", "Hoxa7", "Hoxa9", "Hoxb6", "Hoxc8", "Msx1", "Pitx1", "Nkx1-2", "Snai1", "Tbx3", "Tbx4", "Ets2", "Sall4"), 17 | ordered = T) 18 | 19 | # Genes for Nkx2-5 dpt lineplots 20 | nkx_genes <- factor(c("Nkx2-5", "Ankrd1", "Cdkn2d", "Hopx", "Mef2c", "Myocd", "Smyd1", "Tgfb1li1", "Tbx20", "Dnmt3b", "Gata2", "Gata3", "Hand1", "Msx1"), 21 | levels = c("Nkx2-5", "Ankrd1", "Cdkn2d", "Hopx", "Mef2c", "Myocd", "Smyd1", "Tgfb1li1", "Tbx20", "Dnmt3b", "Gata2", "Gata3", "Hand1", "Msx1"), 22 | ordered = T) 23 | 24 | theme <- theme(panel.grid.major = element_blank(), 25 | panel.background = element_blank(), 26 | strip.background = element_blank(), 27 | strip.text = element_text(face = "bold", size = 5, margin = margin(0.5, 1, 0, 1, unit = "mm")), 28 | panel.border = element_rect(fill = NA, color = "black"), 29 | panel.spacing = grid::unit(0.5,"mm"), 30 | axis.text = element_text(size = 4), 31 | axis.title = element_text(size = 5), 32 | axis.title.x = element_text(margin = margin(0,0,0,0)), 33 | axis.title.y = element_text(margin = margin(0,0,0,0)) 34 | ) 35 | 36 | # 37 | # Generate Isl1 dpt line plots 38 | # 39 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1") 40 | 41 | m <- match(isl1_genes, fData(c1_subset)$symbol) 42 | isl1_expression <- as.data.frame(get_exprs(c1_subset[na.omit(m), isl1_cells], "norm_exprs_sf")) 43 | isl1_expression$symbol <- isl1_genes[!is.na(m)] 44 | 45 | # Tidyr gather into tidy data 46 | isl1_expression_df <- gather(isl1_expression, key = "cell", value = "expression", -symbol) 47 | 48 | # Add cell metadata 49 | m <- match(isl1_expression_df$cell, colnames(c1_subset[, isl1_cells])) 50 | isl1_expression_df$dpt <- pData(c1_subset[, isl1_cells])$dpt[m] 51 | isl1_expression_df$cluster <- factor(pData(c1_subset[, isl1_cells])$cluster[m]) 52 | 53 | # Exclude endothelial branch 54 | isl1_expression_df <- subset(isl1_expression_df, isl1_expression_df$cluster != 1) 55 | 56 | isl1_dpt_lineplot <- ggplot(isl1_expression_df, aes(x = dpt, y = expression)) + 57 | geom_point(data = subset(isl1_expression_df, !is.na(isl1_expression_df$cluster)), aes(color = cluster), size = 0.3) + 58 | geom_point(data = subset(isl1_expression_df, is.na(isl1_expression_df$cluster)), color = "grey", size = 0.3) + 59 | stat_smooth(span = 0.9, method = "loess", n = 30, color = "grey", se = FALSE) + 60 | scale_color_manual(values = parameters$colors$isl_cluster_palette[2:5]) + 61 | facet_wrap(~ symbol, ncol = 6) + 62 | #ylab("Normalized expression") + 63 | #xlab("Pseudotime") + 64 | ylab("") + 65 | xlab("") + 66 | guides(color = FALSE) + 67 | theme 68 | 69 | # 70 | # Generate Nkx2-5 dpt line plots 71 | # 72 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1") 73 | 74 | m <- match(nkx_genes, fData(c1_subset)$symbol) 75 | nkx_expression <- as.data.frame(get_exprs(c1_subset[na.omit(m), nkx_cells], "norm_exprs_sf")) 76 | nkx_expression$symbol <- nkx_genes[!is.na(m)] 77 | 78 | # Tidyr gather into tidy data 79 | nkx_expression_df <- gather(nkx_expression, key = "cell", value = "expression", -symbol) 80 | 81 | # Add cell metadata 82 | m <- match(nkx_expression_df$cell, colnames(c1_subset[, nkx_cells])) 83 | nkx_expression_df$dpt <- pData(c1_subset[, nkx_cells])$dpt[m] 84 | nkx_expression_df$cluster <- factor(pData(c1_subset[, nkx_cells])$cluster[m]) 85 | 86 | nkx_dpt_lineplot <- ggplot(nkx_expression_df, aes(x = dpt, y = expression)) + 87 | geom_point(data = subset(nkx_expression_df, !is.na(nkx_expression_df$cluster)), aes(color = cluster), size = 0.3) + 88 | geom_point(data = subset(nkx_expression_df, is.na(nkx_expression_df$cluster)), color = "grey", size = 0.3) + 89 | stat_smooth(span = 0.9, method = "loess", n = 30, color = "grey", se = FALSE) + 90 | scale_color_manual(values = parameters$colors$nkx_cluster_palette) + 91 | facet_wrap(~ symbol, nrow = 3) + 92 | #ylab("Normalized expression") + 93 | #xlab("Pseudotime") + 94 | ylab("") + 95 | xlab("") + 96 | guides(color = FALSE) + 97 | theme 98 | 99 | # 100 | # Save plots (uncomment to activate) 101 | # 102 | figure <- multipanelfigure::multi_panel_figure(width = 205, height = 230, columns = 2, rows = 2) 103 | figure <- multipanelfigure::fill_panel(figure, nkx_dpt_lineplot, column = 1, row = 2, label = "e") 104 | figure <- multipanelfigure::fill_panel(figure, isl1_dpt_lineplot, column = 2, row = 2, label = "f") 105 | 106 | ggsave(plot = figure, 107 | filename = file.path(parameters$general$path_rfigures, "Figure_2_ef.pdf"), 108 | width = 205, 109 | height = 230, 110 | units = "mm", 111 | dpi = 600) 112 | # ggsave(plot = isl1_dpt_lineplot, 113 | # filename = "Figure_2_f.pdf", 114 | # width = 205, 115 | # height = 230, 116 | # units = "mm", 117 | # dpi = 600) 118 | 119 | # 120 | # Save raw tables for Source Data 121 | # 122 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_Figure_2e.txt"), 123 | x = isl1_expression_df, 124 | col.names = T, 125 | row.names = F, 126 | sep = "\t", 127 | quote = F) 128 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_Figure_2f.txt"), 129 | x = nkx_expression_df, 130 | col.names = T, 131 | row.names = F, 132 | sep = "\t", 133 | quote = F) 134 | -------------------------------------------------------------------------------- /original-analysis/README.md: -------------------------------------------------------------------------------- 1 | # Single cell RNA-seq and ATAC seq analysis of cardiac progenitor cell transition states and lineage settlement 2 | 3 | ### R session information 4 | 5 | For requirements, please see the file `RNA-seq_sessionInfo.txt` and `ATAC-seq_sessionInfo.txt` for specific R packages and their versions. 6 | 7 | ### Data download 8 | 9 | Data for the original analysis is approximately 500 MB in size and can be downloaded from a bucket of an S3 object store using [cloudyr/aws.s3](https://github.com/cloudyr/aws.s3): 10 | 11 | From within R, install [cloudyr/aws.s3](https://github.com/cloudyr/aws.s3) as follows and set up the connection to our data server: 12 | 13 | ``` 14 | remotes::install_github("cloudyr/aws.s3") 15 | Sys.setenv("AWS_S3_ENDPOINT" = "mpi-bn.mpg.de", 16 | "AWS_DEFAULT_REGION" = "s3") 17 | 18 | if(!dir.exists("original_analysis/data")) { 19 | dir.create("original_analysis/data", recursive = TRUE) 20 | } 21 | 22 | if(!dir.exists("original_analysis/supplementary_data")) { 23 | dir.create("original_analysis/supplementary_data", recursive = TRUE) 24 | } 25 | ``` 26 | 27 | Then, use the following code snipped to download all files: 28 | 29 | ``` 30 | data_files <- c("Isl1_cardiac_branch_expression.txt", "Isl1_endo_branch_expression.txt", "dev_cluster_specific_2.Rda", "dev_clustered_cluster_specific_2.Rda", "scData_filtered_2.Rda", "variability_cluster_specific_2.Rda", "variability_clustered_cluster_specific_2.Rda") 31 | 32 | for(file in data_files) { 33 | aws.s3::save_object(paste0("original-data/", file), 34 | "data-cpc-2018", 35 | paste0("original_analysis/data/", file)) 36 | } 37 | 38 | supplementary_data_files <- c("Isl1-DM.Rdata", "Isl1-diffExprs.Rdata", "Isl1-markers.Rdata", "Nkx2-5-DM.Rdata", "Nkx2-5-diffExprs.Rdata", "Nkx2-5-markers.Rdata", "c1_subset.Rdata", "wu.Rdata") 39 | 40 | for(file in supplementary_data_files) { 41 | aws.s3::save_object(paste0("original-data/", file), 42 | "data-cpc-2018", 43 | paste0("original_analysis/supplementary_data/", file)) 44 | } 45 | ``` 46 | 47 | or use `aws.s3::save_object()` with the following parameters to download individual files: 48 | 49 | Object | Target | Call 50 | ------ | ------ | ---- 51 | `Isl1_cardiac_branch_expression.txt` | `data` | `aws.s3::save_object("original-data/Isl1_cardiac_branch_expression.txt", "data-cpc-2018", "original_analysis/data/Isl1_cardiac_branch_expression.txt")` 52 | `Isl1_endo_branch_expression.txt` | `data` | `aws.s3::save_object("original-data/Isl1_endo_branch_expression.txt", "data-cpc-2018", "original_analysis/data/Isl1_endo_branch_expression.txt")` 53 | `dev_cluster_specific_2.Rda` | `data` | `aws.s3::save_object("original-data/dev_cluster_specific_2.Rda", "data-cpc-2018", "original_analysis/data/dev_cluster_specific_2.Rda")` 54 | `dev_clustered_cluster_specific_2.Rda` | `data` | `aws.s3::save_object("original-data/dev_clustered_cluster_specific_2.Rda", "data-cpc-2018", "original_analysis/data/dev_clustered_cluster_specific_2.Rda")` 55 | `scData_filtered_2.Rda` | `data` | `aws.s3::save_object("original-data/scData_filtered_2.Rda", "data-cpc-2018", "original_analysis/data/scData_filtered_2.Rda")` 56 | `variability_cluster_specific_2.Rda` | `data` | `aws.s3::save_object("original-data/variability_cluster_specific_2.Rda", "data-cpc-2018", "original_analysis/data/variability_cluster_specific_2.Rda")` 57 | `variability_clustered_cluster_specific_2.Rda` | `data` | `aws.s3::save_object("original-data/variability_clustered_cluster_specific_2.Rda", "data-cpc-2018", "original_analysis/data/variability_clustered_cluster_specific_2.Rda")` 58 | `Isl1-DM.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/Isl1-DM.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/Isl1-DM.Rdata")` 59 | `Isl1-diffExprs.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/Isl1-diffExprs.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/variability_clustered_cluster_specific_2.Rda")` 60 | `Isl1-markers.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/Isl1-markers.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/Isl1-markers.Rdata")` 61 | `Isl1_cardiac_branch_expression.txt` | `supplementary_data` | `aws.s3::save_object("original-data/Isl1_cardiac_branch_expression.txt", "data-cpc-2018", "original_analysis/supplementary_data/Isl1_cardiac_branch_expression.txt")` 62 | `Isl1_endo_branch_expression.txt` | `supplementary_data` | `aws.s3::save_object("original-data/Isl1_endo_branch_expression.txt", "data-cpc-2018", "original_analysis/supplementary_data/Isl1_endo_branch_expression.txt")` 63 | `Nkx2-5-DM.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/Nkx2-5-DM.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/Nkx2-5-DM.Rdata")` 64 | `Nkx2-5-diffExprs.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/Nkx2-5-diffExprs.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/Nkx2-5-diffExprs.Rdata")` 65 | `Nkx2-5-markers.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/Nkx2-5-markers.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/Nkx2-5-markers.Rdata")` 66 | `c1_subset.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/c1_subset.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/c1_subset.Rdata")` 67 | `wu.Rdata` | `supplementary_data` | `aws.s3::save_object("original-data/wu.Rdata", "data-cpc-2018", "original_analysis/supplementary_data/wu.Rdata")` 68 | 69 | Finally, set the working directory to the path `original-analysis` and continue below: 70 | 71 | ``` 72 | setwd("original-analysis") 73 | ``` 74 | 75 | ### RNA-seq analysis 76 | 77 | #### Preprocessing 78 | 79 | R code to preprocess single-cell RNA-seq data from scratch can be found in the `src` directory. Steps (e.g. *quality control*, *filtering*, *normalization*, etc.) should be run in the order indicated by the leading counter in the filename (e.g. `00_filtering.R` before `01_normalisation.R`) 80 | 81 | #### Figures 82 | 83 | We additionally provide scripts to reproduce most of the main figures. The R code can be found in the `figure_src` directory and is split into individual files. Figures can be recreated without the need of *preprocessing* the data. 84 | 85 | ### ATAC-seq analysis 86 | 87 | #### Figures 88 | 89 | We provide scripts to reproduce figures from the ATAC-seq data analysis of the manuscript. The R code can be found in the `figure_src` directory. 90 | 91 | ### Citation 92 | 93 | Please refer to the following research article when using data from this repository: 94 | 95 | Guangshuai Jia, Jens Preussner, Xi Chen, Stefan Guenther, Xuejun Yuan, Michail Yekelchyk, Carsten Kuenne, Mario Looso, Yonggang Zhou, Sarah Teichmann and Thomas Braun. Single cell RNA-seq and ATAC seq analysis of cardiac progenitor cell transition states and lineage settlement. **Nature Communications** 9, 4877 (*2018*), doi: [10.1038/s41467-018-07307-6](https://doi.org/10.1038/s41467-018-07307-6). 96 | -------------------------------------------------------------------------------- /original-analysis/src/04_clustering.R: -------------------------------------------------------------------------------- 1 | library(scater) 2 | library(singlecellutils) 3 | library(Rtsne) 4 | library(multipanelfigure) 5 | library(dplyr) 6 | 7 | # 8 | # Load data 9 | # 10 | source("src/parameters.R") 11 | source("src/DE.R") 12 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata")) 13 | 14 | # 15 | # Load heterogeneous genes 16 | # 17 | # Add variable genes from C1 analysis 18 | het_early <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-nkx.early.txt")) 19 | het_mid <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-nkx.mid.txt")) 20 | het_late <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-nkx.late.txt")) 21 | het_pool <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-nkx.txt")) 22 | 23 | nkx_het <- unique(c(het_early, het_mid, het_late, het_pool)) 24 | 25 | het_early <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.early.txt")) 26 | het_mid <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.mid.txt")) 27 | het_late <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.late.txt")) 28 | het_pool <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.txt")) 29 | 30 | isl1_het <- unique(c(het_early, het_mid, het_late, het_pool)) 31 | 32 | # 33 | # Perform SOM clustering 34 | # 35 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1") 36 | nkx_expression <- get_exprs(c1_subset[, nkx_cells], "norm_exprs_sf") 37 | nkx_expression_scaled <- t(scale(t(nkx_expression[nkx_het, ]))) 38 | 39 | nkx_som <- calcSOM(nkx_expression_scaled, train = 1:nrow(nkx_expression_scaled), num_epochs = 2000, seed = 1004) 40 | nkx_som$codes <- nkx_som$codes[[1]] 41 | 42 | save(nkx_som, file = file.path(parameters$general$path_supdata, "Nkx2-5_SOM.Rdata")) 43 | 44 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1") 45 | isl1_cells <- which((pData(c1_subset)$Background == "isl1" | pData(c1_subset)$Background == "isl1ko") & pData(c1_subset)$Platform == "C1") 46 | isl1_expression <- get_exprs(c1_subset[, isl1_cells], "norm_exprs_sf") 47 | isl1_expression_scaled <- t(scale(t(isl1_expression[isl1_het, ]))) 48 | 49 | isl1_som <- calcSOM(isl1_expression_scaled, train = 1:nrow(isl1_expression_scaled), num_epochs = 2000, seed = 1004) 50 | isl1_som$codes <- isl1_som$codes[[1]] 51 | 52 | save(isl1_som, file = file.path(parameters$general$path_supdata, "Isl1_SOM.Rdata")) 53 | 54 | hdbscan <- function(data, min_samples = 7L, min_cluster_size = 9L, outlier = 0, seed = NULL) { 55 | if (!is.null(seed)) set.seed(seed) 56 | h <- reticulate::import("hdbscan") 57 | cl <- h$HDBSCAN(min_samples = min_samples, min_cluster_size = min_cluster_size) 58 | labels <- cl$fit_predict(data) + 1 59 | labels[labels == 0] <- outlier 60 | return(factor(labels)) 61 | } 62 | 63 | # 64 | # Nkx2-5 dimension reduction and clustering 65 | # 66 | seed <- 2596 67 | set.seed(seed) 68 | nkx_tsne <- Rtsne(t(nkx_som$codes), perplexity = 15, theta = 0.05, max_iter = 2000) 69 | 70 | nkx_cl <- hdbscan(nkx_tsne$Y, min.samples = 7, min.cluster.size = 9) 71 | nkx_cl$.cluster[nkx_cl$.cluster == 0] <- NA 72 | nkx_cl$.cluster <- factor(nkx_cl$.cluster) 73 | 74 | colorAMap(nkx_tsne$Y, shape_by = factor(pData(c1_subset[, nkx_cells])$Platform), colour_by = nkx_cl$.cluster, pch=16) 75 | colorAMap(nkx_tsne$Y, shape_by = factor(pData(c1_subset[, nkx_cells])$Platform), colour_by = pData(c1_subset[, nkx_cells])$Timepoint, pch=16) 76 | colorAMap(nkx_tsne$Y, colour_by = factor(pData(c1_subset[, nkx_cells])$Platform), shape_by = nkx_cl$.cluster, pch=16) 77 | colorAMap(nkx_tsne$Y, shape_by = factor(pData(c1_subset[, nkx_cells])$Platform), colour_by = nkx_cl$.cluster, pch=16) 78 | 79 | pData(c1_subset)$cluster <- NA 80 | pData(c1_subset)$tsne1 <- NA 81 | pData(c1_subset)$tsne2 <- NA 82 | pData(c1_subset)$cluster[nkx_cells] <- nkx_cl$.cluster 83 | pData(c1_subset)$tsne1[nkx_cells] <- nkx_cl$X1 84 | pData(c1_subset)$tsne2[nkx_cells] <- nkx_cl$X2 85 | 86 | 87 | # 88 | # Isl1 dimension reduction and clustering 89 | # 90 | seed <- 3465 91 | set.seed(seed) 92 | isl1_tsne <- Rtsne(t(isl1_som$codes), perplexity = 15, theta = 0.05, max_iter = 2000) 93 | 94 | isl1_cl <- hdbscan(isl1_tsne$Y, min_samples = 7L, min_cluster_size = 9L) 95 | isl1_cl[isl1_cl == 0] <- NA 96 | isl1_cl <- factor(isl1_cl) 97 | 98 | # i <- which(pData(c1_subset)$Background == "isl1ko" & pData(c1_subset)$Platform == "C1") 99 | # cluster_ko <- factor(pData(c1_subset[, isl1_cells])$cluster, levels = c("1", "2", "3", "4", "5", "KO")) 100 | # cluster_ko[i] <- "KO" 101 | # colorAMap(isl1_tsne$Y, shape_by = factor(pData(c1_subset[, isl1_cells])$Platform), colour_by = cluster_ko, pch=16, palette = parameters$colors$islko_cluster_palette) 102 | colorAMap(isl1_tsne$Y, shape_by = factor(pData(c1_subset[, isl1_cells])$Platform), colour_by = isl1_cl, pch=16) 103 | colorAMap(isl1_tsne$Y, shape_by = factor(pData(c1_subset[, isl1_cells])$Platform), colour_by = pData(c1_subset[, isl1_cells])$Timepoint, pch=16) 104 | colorAMap(isl1_tsne$Y, shape_by = factor(pData(c1_subset[, isl1_cells])$Platform), colour_by = isl1_cl, pch=16) 105 | colorAMap(isl1_tsne$Y, shape_by = factor(pData(c1_subset[, isl1_cells])$Platform), colour_by = droplevels(pData(c1_subset[, isl1_cells])$Background), pch=16) 106 | 107 | 108 | pData(c1_subset)$cluster[isl1_cells] <- isl1_cl$.cluster 109 | pData(c1_subset)$tsne1[isl1_cells] <- isl1_cl$X1 110 | pData(c1_subset)$tsne2[isl1_cells] <- isl1_cl$X2 111 | 112 | # 113 | # Nkx2-5 differential expression 114 | # 115 | nkx_contrasts <- list( 116 | cluster1_rest = list(which(nkx_cl$.cluster == 1), which(nkx_cl$.cluster != 1)), 117 | cluster2_rest = list(which(nkx_cl$.cluster == 2), which(nkx_cl$.cluster != 2)), 118 | cluster3_rest = list(which(nkx_cl$.cluster == 3), which(nkx_cl$.cluster != 3)) 119 | ) 120 | 121 | nkx_diff_data <- differentialExpression(nkx_expression, contrasts = nkx_contrasts, fData(c1_subset)) 122 | names(nkx_diff_data) <- names(nkx_contrasts) 123 | 124 | # Nkx2-5 marker 125 | nkx_markers <- get_marker_genes(nkx_expression, nkx_cl$.cluster) 126 | 127 | save(nkx_diff_data, file = file.path(parameters$general$path_supdata, "Nkx2-5-diffExprs.Rdata")) 128 | save(nkx_markers, file = file.path(parameters$general$path_supdata, "Nkx2-5-markers.Rdata")) 129 | 130 | # Add information to object 131 | nkx_de_genes_l <- lapply(names(nkx_diff_data), function(c) { 132 | data <- nkx_diff_data[[c]] 133 | # Add marker gene data 134 | data %>% 135 | left_join(nkx_markers, by = "geneID") %>% 136 | dplyr::rename(marker_pval = pvalue, marker_fdr = fdr.y) -> data_marker 137 | write.table(data_marker, file = file.path(parameters$general$path_supdata, paste0("differentialExpression/Nkx2-5_",c,".txt")), sep="\t", quote = F, row.names = F, col.names = T) 138 | i <- which(data$fdr < parameters$diffusionmaps$de_fdr & data$biotype == "protein_coding" & (data$lfc.hi < -parameters$diffusionmaps$de_abs_logfc | data$lfc.lo > parameters$diffusionmaps$de_abs_logfc)) 139 | data$geneID[i] 140 | }) 141 | nkx_de_genes <- unique(unlist(nkx_de_genes_l)) 142 | 143 | m <- match(nkx_de_genes, rownames(fData(c1_subset))) 144 | fData(c1_subset)$nkx_de <- FALSE 145 | fData(c1_subset)$nkx_de[m] <- TRUE 146 | 147 | i <- which(nkx_markers$auroc > parameters$diffusionmaps$marker_auroc & nkx_markers$fdr < parameters$diffusionmaps$marker_fdr) 148 | nkx_marker_genes <- nkx_markers$geneID[i] 149 | 150 | m <- match(nkx_marker_genes, rownames(fData(c1_subset))) 151 | fData(c1_subset)$nkx_marker <- FALSE 152 | fData(c1_subset)$nkx_marker[m] <- TRUE 153 | 154 | # 155 | # Isl1 differential expression 156 | # 157 | isl1_contrasts <- list( 158 | cluster1_rest = list(which(isl1_cl$.cluster == 1), which(isl1_cl$.cluster != 1)), 159 | cluster2_rest = list(which(isl1_cl$.cluster == 2), which(isl1_cl$.cluster != 2)), 160 | cluster3_rest = list(which(isl1_cl$.cluster == 3), which(isl1_cl$.cluster != 3)), 161 | cluster4_rest = list(which(isl1_cl$.cluster == 4), which(isl1_cl$.cluster != 4)), 162 | cluster5_rest = list(which(isl1_cl$.cluster == 5), which(isl1_cl$.cluster != 5)) 163 | ) 164 | 165 | isl1_diff_data <- differentialExpression(isl1_expression, contrasts = isl1_contrasts, fData(c1_subset)) 166 | names(isl1_diff_data) <- names(isl1_contrasts) 167 | 168 | # Isl1 marker 169 | isl1_markers <- get_marker_genes(isl1_expression, isl1_cl$.cluster) 170 | 171 | save(isl1_diff_data, file = file.path(parameters$general$path_supdata, "Isl1-diffExprs.Rdata")) 172 | save(isl1_markers, file=file.path(parameters$general$path_supdata, "Isl1-markers.Rdata")) 173 | 174 | # Add information to object 175 | isl1_de_genes_l <- lapply(names(isl1_diff_data), function(c) { 176 | data <- isl1_diff_data[[c]] 177 | # Add marker gene data 178 | data %>% 179 | left_join(nkx_markers, by = "geneID") %>% 180 | dplyr::rename(marker_pval = pvalue, marker_fdr = fdr.y) -> data_marker 181 | write.table(data_marker, file = file.path(parameters$general$path_supdata, paste0("differentialExpression/Isl1_",c,".txt")), sep="\t", quote = F, row.names = F, col.names = T) 182 | i <- which(data$fdr < parameters$diffusionmaps$de_fdr & data$biotype == "protein_coding" & (data$lfc.hi < -parameters$diffusionmaps$de_abs_logfc | data$lfc.lo > parameters$diffusionmaps$de_abs_logfc)) 183 | data$geneID[i] 184 | }) 185 | isl1_de_genes <- unique(unlist(isl1_de_genes_l)) 186 | 187 | m <- match(isl1_de_genes, rownames(fData(c1_subset))) 188 | fData(c1_subset)$isl1_de <- FALSE 189 | fData(c1_subset)$isl1_de[m] <- TRUE 190 | 191 | i <- which(isl1_markers$auroc > parameters$diffusionmaps$marker_auroc & isl1_markers$fdr < parameters$diffusionmaps$marker_fdr) 192 | isl1_marker_genes <- isl1_markers$geneID[i] 193 | 194 | m <- match(isl1_marker_genes, rownames(fData(c1_subset))) 195 | fData(c1_subset)$isl1_marker <- FALSE 196 | fData(c1_subset)$isl1_marker[m] <- TRUE 197 | 198 | # 199 | # Save back subset 200 | # 201 | save(c1_subset, file = file.path(parameters$general$path_supdata, "c1_subset.Rdata")) 202 | -------------------------------------------------------------------------------- /original-analysis/src/03_heterogeneity.R: -------------------------------------------------------------------------------- 1 | library(biomaRt) 2 | library(singlecellutils) 3 | library(dplyr) 4 | library(ggplot2) 5 | library(cowplot) 6 | library(UpSetR) 7 | 8 | # 9 | # Load the C1 data 10 | # 11 | source("src/parameters.R") 12 | load(file = file.path(parameters$general$path_supdata, "c1_subset.Rdata")) 13 | #save(wg_subset, file = file.path(parameters$general$path_supdata, "wg_subset.Rdata")) 14 | 15 | het_subset <- c1_subset[fData(c1_subset)$biotype == "protein_coding" & !fData(c1_subset)$is_feature_control_mito, ] 16 | 17 | # 18 | # Assemble list of genes to exclude from being an HVG 19 | # 20 | mart <- useMart("ensembl", dataset = parameters$general$biomart_dataset) 21 | go.cellcycle <- getBM(attributes=c('ensembl_gene_id'), filters = 'go', values = 'GO:0007049', mart = mart) 22 | go.translation <- getBM(attributes=c('ensembl_gene_id'), filters = 'go', values = 'GO:0006412', mart = mart) 23 | go.ribosome <- getBM(attributes=c('ensembl_gene_id'), filters = 'go', values = 'GO:0005840', mart = mart) 24 | 25 | exclude <- (rownames(het_subset) %in% unique(c(go.cellcycle$ensembl_gene_id, go.translation$ensembl_gene_id, go.ribosome$ensembl_gene_id))) 26 | het_subset <- het_subset[!exclude, ] 27 | 28 | # 29 | # Assemble lineages 30 | # 31 | isl1.lineage <- which(pData(c1_subset)$Background == "isl1") 32 | nkx.lineage <- which(pData(c1_subset)$Background == "nkx2-5") 33 | 34 | lineages <- list(isl = isl1.lineage, nkx = nkx.lineage) 35 | 36 | # 37 | # Lineage and Time-point specific heterogeneity 38 | # 39 | early <- which(pData(het_subset)$Timepoint == "e7.5") 40 | mid <- which(pData(het_subset)$Timepoint == "e8.5") 41 | late <- which(pData(het_subset)$Timepoint == "e9.5") 42 | 43 | time.lineages <- list(isl1 = isl1.lineage, isl1.early = intersect(isl1.lineage, early), isl1.mid = intersect(isl1.lineage, mid), isl1.late = intersect(isl1.lineage, late), 44 | nkx = nkx.lineage, nkx.early = intersect(nkx.lineage, early), nkx.mid = intersect(nkx.lineage, mid), nkx.late = intersect(nkx.lineage, late)) 45 | 46 | means <- lapply(time.lineages, function(x) { 47 | subset <- het_subset[, x] 48 | data <- 2^get_exprs(subset, "norm_exprs_lineage")-1 49 | 50 | log(rowMeans(data))/log(10) 51 | }) 52 | 53 | dropouts <- lapply(time.lineages, function(x) { 54 | subset <- het_subset[, x] 55 | data <- 2^get_exprs(subset, "norm_exprs_lineage")-1 56 | 57 | apply(data, 1, dropout.fun) 58 | }) 59 | 60 | cvs <- lapply(time.lineages, function(x) { 61 | subset <- het_subset[, x] 62 | data <- 2^get_exprs(subset, "norm_exprs_lineage")-1 63 | 64 | log10(apply(data, 1, cv2.fun)) 65 | }) 66 | 67 | dependencies <- data.frame(dataset = rep(c("Isl1", "Nkx2-5"), each=nrow(het_subset)*4), timepoint = rep(c("all","e7.5","e8.5","e9.5","all","e7.5","e8.5","e9.5"), each=nrow(het_subset)), mean = unlist(means), dropout = unlist(dropouts), cv = unlist(cvs)) 68 | rownames(dependencies) <- c( 69 | paste0("isl1",rep(c("all","e7.5","e8.5","e9.5"), each=nrow(het_subset)), rep(rownames(het_subset), times=4)), 70 | paste0("nkx",rep(c("all","e7.5","e8.5","e9.5"), each=nrow(het_subset)), rep(rownames(het_subset), times=4))) 71 | 72 | het <- lapply(time.lineages, function(l) { 73 | subset <- het_subset[, l] 74 | data <- 2^get_exprs(subset, "norm_exprs_lineage")-1 75 | 76 | dro <- apply(data, 1, dropout.fun) 77 | cvs <- log10(apply(data, 1, cv2.fun)) 78 | 79 | dm.do <- heterogeneity(data, statistic = "mean", order_by = dro, normalization = "windows", window = 200) 80 | dm.cv <- heterogeneity(data, statistic = "mean", order_by = cvs, normalization = "windows", window = 200) 81 | 82 | unique(c(names(dm.do[which(dm.do > parameters$heterogeneity$het_zscore)]), names(dm.cv[which(dm.cv > parameters$heterogeneity$het_zscore)]))) 83 | }) 84 | 85 | lapply(names(het), function(l) { 86 | write.table(het[[l]], quote = F, row.names = F, col.names = F, file = paste0(parameters$general$path_supdata,"/heterogeneity-",l,".txt")) 87 | }) 88 | 89 | is.het <- c(paste0("isl1all",het[["isl1"]]), paste0("isl1e7.5",het[["isl1.early"]]), paste0("isl1e8.5",het[["isl1.mid"]]), paste0("isl1e9.5",het[["isl1.late"]]), 90 | paste0("nkxall",het[["nkx"]]), paste0("nkxe7.5",het[["nkx.early"]]), paste0("nkxe8.5",het[["nkx.mid"]]), paste0("nkxe9.5",het[["nkx.late"]])) 91 | dependencies$het <- ifelse(rownames(dependencies) %in% is.het, T, F) 92 | 93 | # 94 | # Supplementary Figure S2 95 | # 96 | theme2 <- theme(plot.background = element_blank(), panel.grid.major = element_line(size=.2, colour = "grey"),panel.grid.minor = element_line(size=.1, colour = "grey"), 97 | panel.border = element_blank(),panel.background = element_blank(),axis.line.x = element_line(size=.3),axis.line.y = element_line(size=.3), legend.text = element_text(size=6), 98 | #axis.title.x = element_blank(), axis.title.y = element_blank(), 99 | plot.title = element_text(face="bold", color="black", size=6), legend.key.size = unit(2, "mm"), legend.margin=unit(-25, "mm"), axis.text = element_text(size=6)) 100 | 101 | s2_isl_do <- dependencies %>% 102 | filter(dataset == "Isl1", timepoint == "all") %>% 103 | ggplot(aes_string(x="mean", y="dropout", color = "het")) + 104 | geom_point(size=0.2, aes(alpha=0.3)) + 105 | scale_colour_manual(values = c("black", "red")) + 106 | #ggtitle(paste0("Isl1 heterogeneity")) + 107 | #xlab("Mean gene expression") + 108 | #ylab("Dropout rate") + 109 | theme2 + guides(alpha=FALSE, color=FALSE, size=FALSE) 110 | s2_isl_cv <- dependencies %>% 111 | filter(dataset == "Isl1", timepoint == "all") %>% 112 | ggplot(aes_string(x="mean", y="cv", color = "het")) + 113 | geom_point(size=0.2, aes(alpha=0.3)) + 114 | scale_colour_manual(values = c("black", "red")) + 115 | #ggtitle(paste0("Isl1 heterogeneity")) + 116 | #xlab("Mean gene expression") + 117 | #ylab("Coefficient of variation") + 118 | theme2 + guides(alpha=FALSE, color=FALSE, size=FALSE) 119 | 120 | s2_isl_het <- plot_grid(s2_isl_cv, s2_isl_do) 121 | 122 | s2_nkx_do <- dependencies %>% 123 | filter(dataset == "Isl1", timepoint == "all") %>% 124 | ggplot(aes_string(x="mean", y="dropout", color="het")) + 125 | geom_point(size=0.2, aes(alpha=0.3)) + 126 | scale_colour_manual(values = c("black", "red")) + 127 | #ggtitle(paste0("Nkx2-5 heterogeneity")) + 128 | theme2 + guides(alpha=FALSE, color=FALSE, size=FALSE) 129 | s2_nkx_cv <- dependencies %>% 130 | filter(dataset == "Nkx2-5", timepoint == "all") %>% 131 | ggplot(aes_string(x="mean", y="cv", color="het")) + 132 | geom_point(size=0.2, aes(alpha=0.3)) + 133 | scale_colour_manual(values = c("black", "red")) + 134 | #ggtitle(paste0("Nkx2-5 heterogeneity")) + 135 | theme2 + guides(alpha=FALSE, color=FALSE, size=FALSE) 136 | 137 | s2_nkx_het <- plot_grid(s2_nkx_cv, s2_nkx_do) 138 | p <- grid.grabExpr(upset(fromList(het[c("isl1", "isl1.early", "isl1.late", "isl1.mid")]), order.by = "freq"), wrap = T) 139 | q <- grid.grabExpr(upset(fromList(het[c("nkx", "nkx.early", "nkx.late", "nkx.mid")]), order.by = "freq"), wrap = T) 140 | 141 | m <- multi_panel_figure(width = 205, height = 230, columns = 2, rows = 4) 142 | m <- fill_panel(m, s2_isl_het, row = 1, col = 1) 143 | m <- fill_panel(m, s2_nkx_het, row = 1, col = 2) 144 | m <- fill_panel(m, p, row = c(2,3,4), col = 1) 145 | m <- fill_panel(m, q, row = c(2,3,4), col = 2) 146 | 147 | # 148 | # Load the WG data 149 | # 150 | source("src/parameters.R") 151 | load(file = file.path(parameters$general$path_supdata, "wg_subset.Rdata")) 152 | #save(wg_subset, file = file.path(parameters$general$path_supdata, "wg_subset.Rdata")) 153 | 154 | het_subset <- wg_subset[fData(wg_subset)$biotype == "protein_coding" & !fData(wg_subset)$is_feature_control_mito, ] 155 | 156 | # 157 | # Assemble list of genes to exclude from being an HVG 158 | # 159 | mart <- useMart("ensembl", dataset = parameters$general$biomart_dataset) 160 | go.cellcycle <- getBM(attributes=c('ensembl_gene_id'), filters = 'go', values = 'GO:0007049', mart = mart) 161 | go.translation <- getBM(attributes=c('ensembl_gene_id'), filters = 'go', values = 'GO:0006412', mart = mart) 162 | go.ribosome <- getBM(attributes=c('ensembl_gene_id'), filters = 'go', values = 'GO:0005840', mart = mart) 163 | 164 | exclude <- (rownames(het_subset) %in% unique(c(go.cellcycle$ensembl_gene_id, go.translation$ensembl_gene_id, go.ribosome$ensembl_gene_id))) 165 | het_subset <- het_subset[!exclude, ] 166 | 167 | # 168 | # Assemble lineages 169 | # 170 | isl1.lineage <- which(pData(wg_subset)$Background == "isl1") 171 | nkx.lineage <- which(pData(wg_subset)$Background == "nkx2-5") 172 | 173 | # 174 | # Lineage and Time-point specific heterogeneity 175 | # 176 | early <- which(pData(het_subset)$Timepoint == "e7.5") 177 | mid <- which(pData(het_subset)$Timepoint == "e8.5") 178 | late <- which(pData(het_subset)$Timepoint == "e9.5") 179 | 180 | time.lineages <- list(isl1 = isl1.lineage, isl1.early = intersect(isl1.lineage, early), isl1.late = intersect(isl1.lineage, late), 181 | nkx = nkx.lineage, nkx.early = intersect(nkx.lineage, early), nkx.mid = intersect(nkx.lineage, mid), nkx.late = intersect(nkx.lineage, late)) 182 | 183 | means <- lapply(time.lineages, function(x) { 184 | subset <- het_subset[, x] 185 | data <- 2^get_exprs(subset, "norm_exprs_lineage")-1 186 | 187 | log(rowMeans(data))/log(10) 188 | }) 189 | 190 | dropouts <- lapply(time.lineages, function(x) { 191 | subset <- het_subset[, x] 192 | data <- 2^get_exprs(subset, "norm_exprs_lineage")-1 193 | 194 | apply(data, 1, dropout.fun) 195 | }) 196 | 197 | cvs <- lapply(time.lineages, function(x) { 198 | subset <- het_subset[, x] 199 | data <- 2^get_exprs(subset, "norm_exprs_lineage")-1 200 | 201 | log10(apply(data, 1, cv2.fun)) 202 | }) 203 | 204 | dependencies <- data.frame(dataset = c(rep("Isl1", times=nrow(het_subset)*3), rep("Nkx2-5", times=nrow(het_subset)*4)), 205 | timepoint = rep(c("all","e7.5","e9.5","all","e7.5","e8.5","e9.5"), each=nrow(het_subset)), 206 | mean = unlist(means), 207 | dropout = unlist(dropouts), 208 | cv = unlist(cvs) 209 | ) 210 | rownames(dependencies) <- c( 211 | paste0("isl1",rep(c("all","e7.5","e9.5"), each=nrow(het_subset)), rep(rownames(het_subset), times=3)), 212 | paste0("nkx",rep(c("all","e7.5","e8.5","e9.5"), each=nrow(het_subset)), rep(rownames(het_subset), times=4))) 213 | 214 | het <- lapply(time.lineages, function(l) { 215 | subset <- het_subset[, l] 216 | data <- 2^get_exprs(subset, "norm_exprs_lineage")-1 217 | 218 | dro <- apply(data, 1, dropout.fun) 219 | cvs <- log10(apply(data, 1, cv2.fun)) 220 | 221 | dm.do <- heterogeneity(data, statistic = "mean", order_by = dro, normalization = "windows", window = 200) 222 | dm.cv <- heterogeneity(data, statistic = "mean", order_by = cvs, normalization = "windows", window = 200) 223 | 224 | unique(c(names(dm.do[which(dm.do > parameters$heterogeneity$het_zscore)]), names(dm.cv[which(dm.cv > parameters$heterogeneity$het_zscore)]))) 225 | }) 226 | 227 | lapply(names(het), function(l) { 228 | write.table(het[[l]], quote = F, row.names = F, col.names = F, file = paste0(parameters$general$path_supdata,"/heterogeneity-wg-",l,".txt")) 229 | }) 230 | 231 | is.het <- c(paste0("isl1all",het[["isl1"]]), paste0("isl1e7.5",het[["isl1.early"]]), paste0("isl1e8.5",het[["isl1.mid"]]), paste0("isl1e9.5",het[["isl1.late"]]), 232 | paste0("nkxall",het[["nkx"]]), paste0("nkxe7.5",het[["nkx.early"]]), paste0("nkxe8.5",het[["nkx.mid"]]), paste0("nkxe9.5",het[["nkx.late"]])) 233 | dependencies$het <- ifelse(rownames(dependencies) %in% is.het, T, F) 234 | -------------------------------------------------------------------------------- /original-analysis/src/00_filtering.R: -------------------------------------------------------------------------------- 1 | library(scater) 2 | library(Rtsne) 3 | library(multipanelfigure) 4 | library(viridis) 5 | library(dplyr) 6 | library(tidyr) 7 | library(gridExtra) 8 | library(singlecellutils) 9 | library(genefilter) 10 | library(RColorBrewer) 11 | 12 | # 13 | # Load data 14 | # 15 | source("src/parameters.R") 16 | load(file.path(parameters$general$path_rdata, "sceData.RData")) 17 | 18 | # 19 | # Create map of QC data 20 | # 21 | qcdata <- pData(sceData)[,parameters$filtering$qc_columns] 22 | qcdata$platform <- factor(pData(sceData)$Platform) 23 | qcdata$background <- factor(pData(sceData)$Background) 24 | 25 | if (!is.null(parameters$filtering$log_qc_columns)) { 26 | qcdata[, parameters$filtering$log_qc_columns] <- log2(qcdata[, parameters$filtering$log_qc_columns] + 1) 27 | } 28 | 29 | is_c1 <- pData(sceData)$Platform == "C1" 30 | 31 | set.seed(5411) 32 | t_c1 <- Rtsne(scale(qcdata[is_c1, parameters$filtering$qc_columns]), perplexity = 40, max_iter = 2000) 33 | set.seed(5411) 34 | t_wg <- Rtsne(scale(qcdata[!is_c1, parameters$filtering$qc_columns]), perplexity = 40, max_iter = 2000) 35 | 36 | # Add coordinates to qcdata 37 | qcdata$tsne.x <- NA 38 | qcdata$tsne.y <- NA 39 | qcdata$tsne.x[is_c1] <- t_c1$Y[,1] 40 | qcdata$tsne.x[!is_c1] <- t_wg$Y[,1] 41 | qcdata$tsne.y[is_c1] <- t_c1$Y[,2] 42 | qcdata$tsne.y[!is_c1] <- t_wg$Y[,2] 43 | 44 | theme1 <- theme(plot.background = element_blank(),panel.grid.major = element_line(size=.2, colour = "grey"),panel.grid.minor = element_line(size=.1, colour = "grey"), 45 | panel.border = element_blank(),panel.background = element_blank(),axis.line.x = element_line(size=.3),axis.line.y = element_line(size=.3), legend.text = element_text(size=8), 46 | axis.title.x = element_blank(),axis.title.y = element_blank(), plot.title = element_text(face="bold", color="black", size=8),legend.key.size = unit(2, "mm"), legend.margin=unit(-25, "mm"), 47 | strip.background = element_blank(), strip.text = element_text(face="bold", color="black", size=6, margin = margin(0,0,-1,0)), axis.text = element_text(color = "black", size = 8)) 48 | 49 | m <- multi_panel_figure(width = 205, height = 230, columns = 2, rows = 5) 50 | for(c in parameters$filtering$qc_columns) { 51 | f <- ggplot(qcdata, aes_string(x="tsne.x", y="tsne.y", color=eval(c))) + 52 | geom_point(size=0.5) + 53 | scale_color_viridis(name="") + 54 | facet_wrap(~platform, ncol=2) + 55 | xlab("") + ylab("") + 56 | ggtitle(c) + 57 | theme1 58 | m <- fill_panel(m, f) 59 | } 60 | 61 | #m 62 | 63 | ggsave( 64 | plot = m, 65 | filename = file.path(parameters$general$path_rfigures, "Supplementary-Cell-QC-map.png"), 66 | width = 205, 67 | height = 230, 68 | units = "mm", 69 | dpi = 600) 70 | 71 | # 72 | # Filter cells based on defined criteria 73 | # 74 | # We construct a binary QC matrix, with TRUE meaning that the cell failed for this QC measure. 75 | binary.qc.l <- lapply(names(parameters$filtering$cell_outlier_columns), function(c) { 76 | res <- rep(NA, nrow(qcdata)) 77 | c1o <- isOutlier(qcdata[is_c1, c], nmads = as.numeric(parameters$filtering$cell_outlier_columns[[c]][1]), type=parameters$filtering$cell_outlier_columns[[c]][2], log=as.logical(parameters$filtering$cell_outlier_columns[[c]][3])) 78 | wgo <- isOutlier(qcdata[!is_c1, c], nmads = as.numeric(parameters$filtering$cell_outlier_columns[[c]][1]), type=parameters$filtering$cell_outlier_columns[[c]][2], log=as.logical(parameters$filtering$cell_outlier_columns[[c]][3])) 79 | res[!is_c1] <- wgo 80 | res[is_c1] <- c1o 81 | return(res) 82 | }) 83 | 84 | binary.qc <- do.call("cbind", binary.qc.l) 85 | colnames(binary.qc) <- names(parameters$filtering$cell_outlier_columns) 86 | rownames(binary.qc) <- rownames(qcdata) 87 | 88 | # Determine failed cells as those with more than 1 failed criterium 89 | failed.cells <- (rowSums(binary.qc) > parameters$filtering$cell_outlier_maxfailed) 90 | qcdata$qc.exclude <- failed.cells 91 | 92 | write.table(x=qcdata, file=file.path(parameters$general$path_supdata, "QC-metrics.txt"), sep="\t", quote = F, row.names = T, col.names = T) 93 | pData(sceData)$qc.exclude <- failed.cells 94 | 95 | # 96 | # Supplementary Figure 1: Cell QC 97 | # 98 | theme2 <- theme(plot.background = element_blank(),panel.grid.major = element_line(size=.2, colour = "grey"),panel.grid.minor = element_line(size=.1, colour = "grey"), 99 | panel.border = element_blank(), panel.background = element_blank(), axis.line.x = element_line(size=.3), axis.line.y = element_line(size=.3), legend.text = element_text(size=6), 100 | axis.title.x = element_blank(), axis.title.y = element_blank(), plot.title = element_text(face="bold", color="black", size=6), legend.key.size = unit(2, "mm"), legend.margin=unit(-25, "mm"), 101 | strip.background = element_blank(), strip.text = element_blank(), axis.text = element_text(color="black", size=8) ) 102 | 103 | xintercept_colors <- c("#8c510a", "#bf812d","#01665e", "#35978f") 104 | xintercept_platforms <- c("C1", "WG") 105 | get_min <- function(c) { 106 | c(min(qcdata[!binary.qc[,c] & is_c1,c]), min(qcdata[!binary.qc[,c] & !is_c1,c])) 107 | } 108 | get_max <- function(c) { 109 | c(max(qcdata[!binary.qc[,c] & is_c1,c]), max(qcdata[!binary.qc[,c] & !is_c1,c])) 110 | } 111 | 112 | # Mitochondrial content 113 | c <- 'pct_counts_feature_controls_mito' 114 | xintercept_mins <- get_min(c) 115 | xintercept_maxs <- get_max(c) 116 | intercept_data <- data.frame(platform = xintercept_platforms, min = xintercept_mins, max = xintercept_maxs) 117 | s1_mito <- ggplot(qcdata, aes_string(x=eval(c))) + 118 | geom_histogram(binwidth = 1) + 119 | geom_vline(data = intercept_data, aes(xintercept=min), color="#8c510a") + 120 | geom_vline(data = intercept_data, aes(xintercept=max), color="#01665e") + 121 | facet_wrap(~platform, ncol=2) + 122 | xlab("") + ylab("") + guides(color=FALSE) + 123 | xlim(0,15) + 124 | #ggtitle(c) + 125 | theme2 126 | 127 | # Total detected features 128 | c <- 'total_features' 129 | xintercept_mins <- get_min(c) 130 | xintercept_maxs <- get_max(c) 131 | intercept_data <- data.frame(platform = xintercept_platforms, min = xintercept_mins, max = xintercept_maxs) 132 | s1_features <- ggplot(qcdata, aes_string(x=eval(c))) + 133 | geom_histogram(binwidth = 0.05) + 134 | geom_vline(data = intercept_data, aes(xintercept=min), color="#8c510a") + 135 | geom_vline(data = intercept_data, aes(xintercept=max), color="#01665e") + 136 | facet_wrap(~platform, ncol=2) + 137 | xlab("") + ylab("") + guides(color=FALSE) + 138 | xlim(6,8) + 139 | #ggtitle(c) + 140 | theme1 141 | 142 | # Dropout 143 | c <- 'pct_dropout' 144 | xintercept_mins <- get_min(c) 145 | xintercept_maxs <- get_max(c) 146 | intercept_data <- data.frame(platform = xintercept_platforms, min = xintercept_mins, max = xintercept_maxs) 147 | s1_dropout <- ggplot(qcdata, aes_string(x=eval(c))) + 148 | geom_histogram(binwidth = 0.01) + 149 | geom_vline(data = intercept_data, aes(xintercept=min), color="#8c510a") + 150 | geom_vline(data = intercept_data, aes(xintercept=max), color="#01665e") + 151 | facet_wrap(~platform, ncol=2) + 152 | xlab("") + ylab("") + guides(color=FALSE) + 153 | #ggtitle(c) + 154 | theme2 155 | 156 | # Percent of genes 157 | c <- 'percent_genes' 158 | xintercept_mins <- get_min(c) 159 | xintercept_maxs <- get_max(c) 160 | intercept_data <- data.frame(platform = xintercept_platforms, min = xintercept_mins, max = xintercept_maxs) 161 | s1_genes <- ggplot(qcdata, aes_string(x=eval(c))) + 162 | geom_histogram(binwidth = 1) + 163 | geom_vline(data = intercept_data, aes(xintercept=min), color="#8c510a") + 164 | geom_vline(data = intercept_data, aes(xintercept=max), color="#01665e") + 165 | facet_wrap(~platform, ncol=2) + 166 | xlab("") + ylab("") + guides(color=FALSE) + 167 | #ggtitle(c) + 168 | theme2 169 | 170 | # Expression of Rplp0 171 | c <- 'exprs_feature_controls_housekeeping' 172 | xintercept_mins <- get_min(c) 173 | xintercept_maxs <- get_max(c) 174 | intercept_data <- data.frame(platform = xintercept_platforms, min = xintercept_mins, max = xintercept_maxs) 175 | s1_rplp <- ggplot(qcdata, aes_string(x=eval(c))) + 176 | geom_histogram(binwidth = 0.1) + 177 | geom_vline(data = intercept_data, aes(xintercept=min), color="#8c510a") + 178 | geom_vline(data = intercept_data, aes(xintercept=max), color="#01665e") + 179 | facet_wrap(~platform, ncol=2) + 180 | xlab("") + ylab("") + guides(color=FALSE) + 181 | xlim(2.5, 4) + 182 | #ggtitle(c) + 183 | theme2 184 | 185 | # QC map with features 186 | s1_qc_features <- ggplot(qcdata, aes_string(x="tsne.x", y="tsne.y", color=eval(c))) + 187 | geom_point(size=0.5) + 188 | scale_color_viridis(name="") + 189 | facet_wrap(~platform, ncol=2) + 190 | xlab("") + ylab("") + 191 | #ggtitle("Total features (log2 scale)") + 192 | theme1 + 193 | theme(legend.position="bottom") 194 | 195 | s1_qc_fail <- ggplot(qcdata, aes_string(x="tsne.x", y="tsne.y", color="qc.exclude")) + 196 | geom_point(size=1) + 197 | scale_colour_manual(values = c("black", "red"), name="") + 198 | facet_wrap(~platform, ncol=2) + 199 | xlab("") + ylab("") + 200 | #ggtitle("QC failed cells") + 201 | guides(color=FALSE) + 202 | theme2 203 | 204 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_SupplementaryFigure_1ae.txt"), 205 | x = qcdata[, c("platform", "total_features", "pct_dropout", "pct_counts_feature_controls_mito", "percent_genes", "exprs_feature_controls_housekeeping", "qc.exclude")], 206 | quote = F, 207 | row.names = T, 208 | col.names = T, 209 | sep = "\t") 210 | 211 | qcdata %>% 212 | select(Lineage = background, platform, qc.exclude) %>% 213 | group_by(Lineage, platform, qc.exclude) %>% 214 | summarise(cells = n()) %>% 215 | unite("key", platform, qc.exclude, sep=".") %>% 216 | spread(key, cells) %>% 217 | mutate(WG.TRUE = ifelse(is.na(WG.TRUE), 0, WG.TRUE), WG.FALSE = ifelse(is.na(WG.FALSE), 0, WG.FALSE)) %>% 218 | mutate(C1.sum = C1.TRUE + C1.FALSE, WG.sum = WG.TRUE + WG.FALSE, raw.sum = C1.sum + WG.sum, clean.sum = C1.FALSE + WG.FALSE) %>% 219 | mutate(Raw = paste0(raw.sum, " (", C1.sum, "/", WG.sum, ")")) %>% 220 | mutate(Clean = paste0(clean.sum, " (", C1.FALSE, "/", WG.FALSE, ")")) %>% 221 | select(Lineage, Raw, Clean) -> filter.table.raw 222 | 223 | tt1 <- ttheme_minimal(core=list(fg_params=list(hjust=1, x=0.9)), 224 | rowhead=list(fg_params=list(hjust=1, x=0.9)), 225 | colhead = list(fg_params=list(hjust=1, x=0.9))) 226 | 227 | s1_table <- tableGrob(filter.table.raw, theme=tt1, rows = NULL) 228 | 229 | m <- multi_panel_figure(width = 205, height = 230, columns = 2, rows = 5) 230 | m <- fill_panel(m, s1_features, row = 1, col = 1) 231 | m <- fill_panel(m, s1_dropout, row = 2, col = 1) 232 | m <- fill_panel(m, s1_mito, row = 3, col = 1) 233 | m <- fill_panel(m, s1_genes, row = 4, col = 1) 234 | m <- fill_panel(m, s1_rplp, row = 5, col = 1) 235 | m <- fill_panel(m, s1_qc_features, row = c(1,2), column = 2) 236 | m <- fill_panel(m, s1_qc_fail, row = c(3,4), column = 2) 237 | m <- fill_panel(m, s1_table, row = 5, column = 2) 238 | 239 | ggsave( 240 | plot = m, 241 | filename = file.path(parameters$general$path_rfigures, "Supplementary_Figure_1.pdf"), 242 | width = 205, 243 | height = 230, 244 | units = "mm", 245 | dpi = 600) 246 | 247 | # 248 | # Gene filtering 249 | # 250 | isl1.lineage <- pData(sceData)$Background == "isl1" 251 | nkx.lineage <- pData(sceData)$Background == "nkx2-5" 252 | 253 | # Build gene filters 254 | aggregate.expr <- EpOverA(A = parameters$filtering$EpOverA_A) 255 | k.cell.expr <- kOverA(k=parameters$filtering$kOverA_k, A=parameters$filtering$kOverA_A) 256 | 257 | # Make sure both platform counts satisfies the criteria 258 | ffun <- filterfun(aggregate.expr, k.cell.expr) 259 | keep_wg_crit <- genefilter(counts(sceData[,(pData(sceData)$is.sc & !pData(sceData)$qc.exclude & !is_c1)]), ffun) 260 | keep_c1_crit <- genefilter(counts(sceData[,(pData(sceData)$is.sc & !pData(sceData)$qc.exclude & is_c1)]), ffun) 261 | 262 | keep_isl1_crit <- genefilter(counts(sceData[,(pData(sceData)$is.sc & !pData(sceData)$qc.exclude & is_c1 & isl1.lineage)]), ffun) 263 | keep_nkx_crit <- genefilter(counts(sceData[,(pData(sceData)$is.sc & !pData(sceData)$qc.exclude & is_c1 & nkx.lineage)]), ffun) 264 | 265 | # Make sure that the corresponding count is not 0 266 | one <- kOverA(k=1,A=0) 267 | 268 | # Make sure that the wafergene count is also not 0 269 | keep_wg <- genefilter(counts(sceData[,(pData(sceData)$is.sc & !pData(sceData)$qc.exclude & !is_c1)]), one) 270 | 271 | # Make sure that the C1 count is also not 0 272 | keep_c1 <- genefilter(counts(sceData[,(pData(sceData)$is.sc & !pData(sceData)$qc.exclude & is_c1)]), one) 273 | 274 | keep <- (keep_isl1_crit | keep_nkx_crit) & keep_c1 & keep_wg 275 | sum(keep) 276 | scd <- sceData[keep,(pData(sceData)$is.sc & !pData(sceData)$qc.exclude)] 277 | scd <- calculateQCMetrics(scd) 278 | 279 | # 280 | # Save object for further downstream analysis 281 | # 282 | save(scd, file = file.path(parameters$general$path_supdata, "scd.Rdata")) 283 | -------------------------------------------------------------------------------- /original-analysis/figure_src/Figure_5.R: -------------------------------------------------------------------------------- 1 | library(scran) 2 | library(scater) 3 | library(singlecellutils) 4 | library(RColorBrewer) 5 | library(viridis) 6 | library(dplyr) 7 | library(tidyr) 8 | library(rioja) 9 | library(multipanelfigure) 10 | library(ComplexHeatmap) 11 | library(circlize) 12 | library(zoo) 13 | 14 | # 15 | # Load data 16 | # 17 | source("src/parameters.R") 18 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata")) 19 | 20 | theme2 <- theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 21 | panel.border = element_blank(), panel.background = element_blank(), axis.line.x = element_line(size=.3), axis.line.y = element_line(size=.3), legend.text = element_text(size=6), 22 | axis.title.x = element_blank(), axis.title.y = element_text(color="black", size=10), plot.title = element_text(face="bold", color="black", size=6), legend.key.size = unit(2, "mm"), legend.margin=unit(-25, "mm"), 23 | strip.background = element_blank(), strip.text = element_blank(), axis.text = element_text(color="black", size=8) ) 24 | 25 | ggMMplot <- function(var1, var2){ 26 | require(ggplot2) 27 | levVar1 <- length(levels(var1)) 28 | levVar2 <- length(levels(var2)) 29 | 30 | jointTable <- prop.table(table(var1, var2)) 31 | plotData <- as.data.frame(jointTable) 32 | plotData$marginVar1 <- prop.table(table(var1)) 33 | plotData$var2Height <- plotData$Freq / plotData$marginVar1 34 | plotData$var1Center <- c(0, cumsum(plotData$marginVar1)[1:levVar1 -1]) + 35 | plotData$marginVar1 / 2 36 | 37 | ggplot(plotData, aes(var1Center, var2Height)) + 38 | geom_bar(stat = "identity", aes(fill = var2, width = marginVar1), col = "White") + 39 | geom_label(aes(label = as.character(var1), x = var1Center, y = 1.05), label.r = unit(0, "lines")) 40 | } 41 | 42 | # 43 | # Isl1 KO Heatmap Top 30 genes 44 | # 45 | n_top_genes <- 30 46 | isl1_wt_cells <- which(pData(c1_subset)$Background == "isl1" & (pData(c1_subset)$cluster == "2" | pData(c1_subset)$cluster == "1" | pData(c1_subset)$cluster == "5")) 47 | isl1_ko_cells <- which(pData(c1_subset)$Background == "isl1ko") 48 | #isl1_ki67 <- get_exprs(c1_subset, "norm_exprs_sf")["ENSMUSG00000031004", c(isl1_wt_cells, isl1_ko_cells)] 49 | 50 | # 51 | # Cycling score 52 | # 53 | load(file.path(parameters$general$path_rextdata, "c2.cpg.whitfield.v6.0.mmusculus.Rdata")) 54 | names(c2_cpg_whitfield) <- gsub("WHITFIELD_CELL_CYCLE_", "", names(c2_cpg_whitfield)) 55 | 56 | # Identify genes from the sets that correlate with the score 57 | cycle_score_genes <- lapply(c2_cpg_whitfield, function(gs) { 58 | m <- match(gs, rownames(c1_subset)) 59 | e <- get_exprs(c1_subset[na.omit(m), c(isl1_wt_cells, isl1_ko_cells)], "exprs") 60 | score <- colMeans(e) 61 | cor <- cor(t(e), score, method = "spearman") 62 | u <- which(cor[,1] > 0.4) 63 | return(names(u)) 64 | }) 65 | 66 | # Calculate scores from genes 67 | cycle_score_l <- lapply(cycle_score_genes, function(g) { 68 | m <- match(g, rownames(c1_subset)) 69 | e <- get_exprs(c1_subset[na.omit(m), c(isl1_wt_cells, isl1_ko_cells)], "exprs") 70 | colMeans(e) 71 | }) 72 | cycle_score <- as.data.frame(t(scale(t(do.call("rbind", cycle_score_l))))) 73 | cycle_score$set <- rownames(cycle_score) 74 | 75 | cycle_score_df <- spread(gather(as.data.frame(cycle_score), key = "cell", value = "score", -set), key = "set", value = "score") 76 | m <- match(cycle_score_df$cell, colnames(c1_subset)) 77 | cycle_score_df$Background <- pData(c1_subset)$Background[m] 78 | 79 | cycle_score_df %>% 80 | group_by(cell) %>% 81 | mutate(max_score = max(G1_S, G2_M)) %>% 82 | ungroup() %>% 83 | mutate(rank = dense_rank(desc(max_score)), 84 | is_cycling = max_score > -0.1) -> cycle_score_df 85 | 86 | isl1_ko_cycleplot <- ggplot(cycle_score_df, aes(x = G1_S, y = G2_M)) + 87 | geom_point(aes(color = is_cycling, shape = Background)) + 88 | scale_color_manual(values = c("red", "black"), labels = c("non cycling", "cycling")) + 89 | scale_shape_manual(values = 16:17, labels = c("Isl1 WT", "Isl1 KO")) + 90 | xlab("G1/S score") + 91 | ylab("G2/M score") + 92 | guides(color = guide_legend(title = "", label = T), shape = guide_legend(title = "")) + 93 | theme_classic() + 94 | theme(axis.title = element_text(size = 7), axis.text = element_text(size = 5)) 95 | 96 | isl1_ko_cyclerankplot <- ggplot(cycle_score_df, aes(x = rank, y = max_score)) + 97 | geom_point(aes(color = is_cycling, shape = Background)) + 98 | scale_color_manual(values = c("red", "black")) + 99 | xlab("Rank") + 100 | ylab("Maximal score (G1/S, G2/M)") + 101 | guides(color = FALSE, shape = FALSE) + 102 | theme_classic() + 103 | theme(axis.title = element_text(size = 7), axis.text = element_text(size = 5)) 104 | 105 | g1_s_g2_m_genes <- unlist(cycle_score_genes[c("G1_S", "G2_M")]) 106 | g1_s_g2_m_lengths <- unlist(lapply(cycle_score_genes[c("G1_S", "G2_M")], length)) 107 | names(g1_s_g2_m_genes) <- rep(names(g1_s_g2_m_lengths), times = g1_s_g2_m_lengths) 108 | 109 | g1_s_g2_m_cor <- cor(t(get_exprs(c1_subset[g1_s_g2_m_genes, cycle_score_df$cell[cycle_score_df$is_cycling]], "exprs"))) 110 | colnames(g1_s_g2_m_cor) <- fData(c1_subset[colnames(g1_s_g2_m_cor),])$symbol 111 | rownames(g1_s_g2_m_cor) <- fData(c1_subset[rownames(g1_s_g2_m_cor),])$symbol 112 | 113 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_SupplementaryFigure_9c.txt"), 114 | x = g1_s_g2_m_cor, 115 | row.names = T, 116 | col.names = T, 117 | quote = F, 118 | sep = "\t") 119 | 120 | col_fun <- colorRamp2(seq(-0.9, 0.9, length.out = 10), rev(brewer.pal(10, "RdBu"))) 121 | 122 | isl1_ko_cycle_genes <- Heatmap(g1_s_g2_m_cor, 123 | col = col_fun, 124 | name = "Pearson\ncorrelation", 125 | split = names(g1_s_g2_m_genes), 126 | cluster_columns = F, 127 | cluster_rows = F, 128 | row_names_gp = gpar(fontsize = 6), 129 | column_names_gp = gpar(fontsize = 6)) 130 | 131 | t <- table(isko = droplevels(cycle_score_df$Background) == "isl1ko", is_cycling = cycle_score_df$is_cycling)[c(2,1), c(2,1)] 132 | tr <- prop.test(t, correct=F, conf.level = 0.95) 133 | 134 | isl1_ko_mmplot <- ggMMplot(droplevels(cycle_score_df$Background), cycle_score_df$is_cycling) + 135 | guides(fill = F) + 136 | scale_fill_manual(values = c("#A8DBA8", "#3B8686")) + 137 | ylab("Proportion of cycling cells") + 138 | xlab("Cells") + 139 | theme(panel.background = element_rect(fill = "transparent"), axis.line.y = element_line(colour = "black"), axis.line.x = element_line(colour = "black"))#, axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank()) 140 | 141 | m <- multi_panel_figure(width = c(85,110), height = c(80, 150), label_type = "lower_alpha") 142 | m <- fill_panel(m, isl1_ko_cyclerankplot) 143 | m <- fill_panel(m, isl1_ko_cycleplot) 144 | m <- fill_panel(m, isl1_ko_cycle_genes, column = 1:2) 145 | 146 | ggsave( 147 | plot = m, 148 | filename = file.path(parameters$general$path_rfigures, "Supplementary_Figure_9.pdf"), 149 | width = figure_width(m), 150 | height = figure_height(m), 151 | units = "mm", 152 | dpi = 600, 153 | useDingbats = FALSE) 154 | 155 | # 156 | # Find top 30 genes per KO-cluster comparison 157 | # 158 | comparisons <- c("progenitor", "endothelial", "cardiomyocyte") 159 | 160 | isl1_ko_data_l <- lapply(comparisons, function(x) { 161 | d <- read.table(file.path(parameters$general$path_supdata, paste0("differentialExpression/Isl1.", x, "_Isl1KO.txt")), sep="\t", header = T, stringsAsFactors = F) 162 | d$contrast <- x 163 | i <- which(d$fdr < parameters$diffusionmaps$de_fdr & (d$lfc.lo > 1.5 | d$lfc.hi < -1.5) & d$biotype == "protein_coding" & !startsWith(d$symbol, "mt") & !startsWith(d$symbol, "Rpl") & !startsWith(d$symbol, "venus")) 164 | m <- order(abs(d$lfc.hi[i]), decreasing = T) 165 | na.omit(d$geneID[i][m[1:min(n_top_genes, length(m))]]) 166 | }) 167 | 168 | isl1_de_selected <- na.omit(unique(unlist(isl1_ko_data_l))) 169 | isl1_subset <- c1_subset[isl1_de_selected, c(isl1_wt_cells, isl1_ko_cells)] 170 | 171 | # 172 | # Isl1 KO heatmap 173 | # 174 | isl1_ko_cell_order <- order(pData(isl1_subset)$cluster) 175 | 176 | isl1_ko_expression <- t(scale(t(get_exprs(isl1_subset, "norm_exprs_sf")[, isl1_ko_cell_order]))) 177 | rownames(isl1_ko_expression) <- fData(isl1_subset)$symbol 178 | 179 | isl1_ko_dist <- as.dist(1 - cor(isl1_ko_expression, method="pearson")) 180 | 181 | isl1_ko_const_clust <- chclust(isl1_ko_dist) 182 | isl1_ko_const_clust_dendro <- as.dendrogram(isl1_ko_const_clust) 183 | 184 | # Color mapping 185 | e_min <- min(isl1_ko_expression) 186 | e_max <- max(isl1_ko_expression) 187 | 188 | bounds <- max(abs(e_min), abs(e_max)) 189 | 190 | breaks <- seq(-bounds, bounds, length.out = 99) 191 | #breaks <- seq(e_min, e_max, length.out = 99) 192 | colors <- colorRampPalette(rev(brewer.pal(10, "RdBu")))(99) 193 | #colors <- inferno(99) 194 | 195 | col_fun <- colorRamp2(breaks, colors) 196 | 197 | # 198 | # Heatmap Annotation 199 | # 200 | isl1_ko_annotation_data <- data.frame(Genotype = paste(pData(isl1_subset)$Background)) 201 | 202 | isl1_ko_annotation <- HeatmapAnnotation(isl1_ko_annotation_data, 203 | col = list(Genotype = c("isl1" = "#000000", "isl1ko" = "#FF0000")), #008B45, 00CD66 204 | show_legend = F, 205 | show_annotation_name = T, 206 | annotation_name_gp = gpar(fontsize = 6), 207 | annotation_name_offset = unit(0.8, "mm"), 208 | gap = unit(0.1, "mm"), 209 | annotation_height = unit.c(unit(1.5, "mm"))) 210 | 211 | isl1_ko_heatmap <- Heatmap(isl1_ko_expression, 212 | col = col_fun, 213 | top_annotation = isl1_ko_annotation, 214 | cluster_columns = isl1_ko_const_clust_dendro, 215 | cluster_rows = T, 216 | show_column_names = F, 217 | show_row_dend = F, 218 | show_column_dend = F, 219 | name = "islko", 220 | #column_title = "Nkx2-5+ cells", 221 | column_title_gp = gpar(fontface = "bold", fontsize = 8), 222 | #split = rep(paste("Marker genes\ncluster", c(1,2,3)), times = sapply(nkx_de_gene_order, length)), 223 | gap = unit(0.3, "mm"), 224 | row_title_rot = 90, 225 | row_title_gp = gpar(fontsize = 4), 226 | row_names_gp = gpar(fontsize = 6), 227 | heatmap_legend_param = list("plot" = F, "color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Scaled expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm"))) 228 | 229 | fig_3c <- grid.grabExpr(draw(isl1_ko_heatmap, heatmap_legend_side = "bottom", padding = unit(c(0, 0, 0, 0), "mm"))) 230 | 231 | # Write out for Source Data file 232 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_Figure5_d.txt"), 233 | x = isl1_ko_expression, 234 | col.names = T, 235 | row.names = T, 236 | sep = "\t", 237 | quote = T) 238 | 239 | # 240 | # Predict Isl1-KO onto Isl1 trajectory 241 | # 242 | library(destiny) 243 | global.settings <- list(layout.heights=list(main.key.padding = -2, bottom.padding = 0, axis.xlab.padding = -0.5), layout.widths = list(ylab.axis.padding = -0.5, right.padding = 0)) 244 | 245 | load(file.path(parameters$general$path_supdata, "Isl1-DM.Rdata")) 246 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1") 247 | isl1_dm_coords <- cbind(-pData(c1_subset[, isl1_cells])$dm1, -pData(c1_subset[, isl1_cells])$dm2) 248 | 249 | m <- match(colnames(isl1_dm@data_env$data), rownames(c1_subset)) 250 | isl1ko_cells <- which(pData(c1_subset)$Background == "isl1ko") 251 | isl1ko_data <- get_exprs(c1_subset[na.omit(m), isl1ko_cells], "norm_exprs_sf") 252 | 253 | isl1ko_dcs <- dm_predict(isl1_dm, t(isl1ko_data)) 254 | isl1ko_coords <- matrix(isl1ko_dcs@x, ncol = isl1ko_dcs@Dim[2]) 255 | isl1_dm_coords_ko <- rbind(isl1_dm_coords, cbind(-isl1ko_coords[, 1], -isl1ko_coords[, 3])) 256 | 257 | cluster <- c(pData(c1_subset[, isl1_cells])$cluster, rep("KO", nrow(isl1ko_coords))) 258 | rm_obs <- is.na(cluster) 259 | cluster <- paste0("cluster", cluster[!rm_obs]) 260 | 261 | p_isl1_ko <- colorAMap(isl1_dm_coords_ko[!rm_obs,], colour_by = factor(cluster), palette = parameters$colors$islko_cluster_palette, ylab = list(label = "Dimension 3", cex = 0.6), main = list(label = "", cex = 0.75), scales = list(cex = 0.4, tck = c(0.5, 0)), xlab = list(label = "Dimension 1", cex = 0.6), par.settings = global.settings) 262 | 263 | # 264 | # Assemble figure 265 | # 266 | m <- multi_panel_figure(width = c(69, 69, 64, 5), height = c(65, 165), column_spacing = c(6,0,0,0), panel_label_type = "lower-alpha") 267 | m <- fill_panel(m, "ext_data/Isl1_KO.png", label = "a", row = 1, column = 1) 268 | m <- fill_panel(m, p_isl1_ko, row = 1, column = 2, label = "b") 269 | m <- fill_panel(m, isl1_ko_mmplot, label = "c", row = 1, column = 3:4) 270 | m <- fill_panel(m, fig_3c, label = "d", column = 1:3, row = 2) 271 | 272 | ggsave( 273 | plot = m, 274 | filename = file.path(parameters$general$path_rfigures, "Figure_3.pdf"), 275 | width = figure_width(m), 276 | height = figure_height(m), 277 | units = "mm", 278 | dpi = 600, 279 | useDingbats = FALSE) 280 | 281 | -------------------------------------------------------------------------------- /original-analysis/figure_src/Figure_2cd.R: -------------------------------------------------------------------------------- 1 | library(scater) 2 | library(dplyr) 3 | library(tidyr) 4 | library(singlecellutils) 5 | library(multipanelfigure) 6 | library(gridExtra) 7 | 8 | # 9 | # Load data 10 | # 11 | source("src/parameters.R") 12 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata")) 13 | 14 | theme2 <- theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 15 | panel.border = element_blank(), panel.background = element_blank(), axis.line.x = element_line(size=.3), axis.line.y = element_line(size=.3), legend.text = element_text(size=6), 16 | axis.title.x = element_blank(), axis.title.y = element_text(color="black", size=10), plot.title = element_text(face="bold", color="black", size=6), legend.key.size = unit(2, "mm"), legend.margin=unit(-25, "mm"), 17 | strip.background = element_blank(), strip.text = element_blank(), axis.text = element_text(color="black", size=8), axis.text.x = element_blank()) 18 | 19 | theme_violins <- theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 20 | panel.border = element_blank(), panel.background = element_blank(), axis.line.y = element_line(size=.3), axis.line.x = element_line(size=.3), legend.text = element_text(size=6), 21 | axis.title.y = element_text(color="black", size=10), axis.title.x = element_blank(), plot.title = element_text(face="bold", color="black", size=6), legend.key.size = unit(2, "mm"), legend.margin=unit(-25, "mm"), 22 | strip.background = element_blank(), strip.text = element_blank(), axis.text = element_text(color="black", size=8), axis.text.x = element_blank()) 23 | 24 | theme_hists <- theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 25 | panel.border = element_blank(), panel.background = element_blank(), axis.line.x = element_line(size=.3), axis.line.y = element_blank(), legend.text = element_text(size=6), 26 | axis.title.x = element_text(color="black", size=10), axis.title.y = element_blank(), plot.title = element_text(face="bold", color="black", size=6), legend.key.size = unit(2, "mm"), legend.margin=unit(-25, "mm"), 27 | strip.background = element_blank(), strip.text = element_blank(), axis.text = element_text(color="black", size=8), axis.ticks.y = element_blank(), axis.text.y = element_blank() ) 28 | 29 | theme_hists_2 <- theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 30 | panel.border = element_blank(), panel.background = element_blank(), axis.line.x = element_line(size=.3), axis.line.y = element_blank(), legend.text = element_text(size=6), 31 | axis.title.x = element_text(color="black", size=10), axis.title.y = element_blank(), plot.title = element_text(face="bold", color="black", size=6), legend.key.size = unit(2, "mm"), legend.margin=unit(-25, "mm"), 32 | strip.background = element_blank(), strip.text = element_blank(), axis.text = element_blank(), axis.ticks.y = element_blank(), axis.text.y = element_blank() ) 33 | 34 | # 35 | # Nkx2-5 IC analysis 36 | # 37 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1") 38 | nkx_de_genes <- rownames(fData(c1_subset))[which(fData(c1_subset)$nkx_de & fData(c1_subset)$nkx_marker)] 39 | nkx_cluster <- factor(pData(c1_subset[, nkx_cells])$cluster) 40 | 41 | nkx_data <- get_exprs(c1_subset[nkx_de_genes, nkx_cells], "norm_exprs_sf") 42 | 43 | nkx_ic_df <- as.data.frame(boot.ic(nkx_data, groups = nkx_cluster, R = 1000, n = 30, p.val = 0.05)) 44 | colnames(nkx_ic_df) <- paste("cluster", levels(nkx_cluster)) 45 | 46 | # Create plots 47 | nkx_ic <- gather(nkx_ic_df, key = "cluster", value = "ic") 48 | nkx_ic$cluster <- factor(nkx_ic$cluster) 49 | 50 | nkx_ic_plot <- ggplot(nkx_ic, aes(x = cluster, y = ic, fill = cluster)) + 51 | geom_boxplot() + 52 | scale_fill_manual(values = parameters$colors$nkx_cluster_palette) + 53 | guides(fill = F) + 54 | ylab("Critical transition index") + 55 | xlab("") + 56 | theme2 + 57 | theme(plot.margin = unit(c(0, -5, 2.5, 0), "mm")) 58 | 59 | # 60 | # Nkx pairwise statistical tests 61 | # 62 | nkx_ptest_list <- apply(combn(levels(nkx_cluster), 2), 2, function(x) { 63 | x <- as.numeric(x) 64 | ks <- ks.test(nkx_ic_df[, x[1]], nkx_ic_df[, x[2]]) 65 | wr <- wilcox.test(nkx_ic_df[, x[1]], nkx_ic_df[, x[2]]) 66 | 67 | data.frame(combination = paste(x[1], "vs.", x[2]), 68 | ks_p_value = ks$p.value, 69 | ks_statistic = ks$statistic, 70 | wilcox_p_value = wr$p.value, 71 | wilcox_statistic = wr$statistic) 72 | }) 73 | nkx_ptest <- data.frame(do.call("rbind", nkx_ptest_list)) 74 | 75 | # 76 | # Calculate transcriptome variance 77 | # 78 | nkx_colors <- parameters$colors$nkx_cluster_palette 79 | names(nkx_colors) <- levels(nkx_cluster) 80 | 81 | nkx_data <- get_exprs(c1_subset[nkx_de_genes, nkx_cells], "norm_exprs_sf") 82 | 83 | nkx_cluster_noise_list <- lapply(levels(nkx_cluster), function(c) { 84 | cells <- which(nkx_cluster == c) 85 | distances <- apply(combn(cells, 2), 2, function(d) { 86 | p <- cor(nkx_data[, d[1]], nkx_data[, d[2]], method = "spearman") 87 | sqrt((1-p)/2) 88 | }) 89 | }) 90 | 91 | nkx_cluster_noise <- data.frame(cluster = rep(levels(nkx_cluster), times = sapply(nkx_cluster_noise_list, length)), distance = unlist(nkx_cluster_noise_list)) 92 | p_nkx_cluster_noise <- ggplot(data = nkx_cluster_noise, aes(x = cluster, y = distance, fill = cluster)) + 93 | geom_violin(trim = F) + 94 | geom_boxplot(width = 0.1, fill = "white") + 95 | scale_fill_manual(values = nkx_colors) + 96 | scale_y_continuous(expand = c(0.05, 0)) + 97 | guides(fill = FALSE, colour = FALSE) + 98 | xlab("") + ylab("Cell to cell distance") + theme_violins + 99 | theme(plot.margin = unit(c(0, -5, 2.5, 0), "mm")) 100 | 101 | # 102 | # Nkx2-5 CxC and GxG correlation analysis 103 | # 104 | nkx_colors <- parameters$colors$nkx_cluster_palette 105 | names(nkx_colors) <- levels(nkx_cluster) 106 | 107 | nkx_correlation_plot_list <- lapply(levels(nkx_cluster), function(c) { 108 | cells <- which(nkx_cluster == c) 109 | ic <- computeIC(nkx_data[, cells], p.val = 1) 110 | 111 | p1 <- ggplot(data.frame(t=ic$between), aes(x=t)) + 112 | geom_histogram(binwidth = 0.01, fill = nkx_colors[c]) + 113 | scale_x_continuous(expand = c(0,0), limits = c(0,1)) + 114 | scale_y_continuous(expand = c(0,0.3)) 115 | 116 | p2 <- ggplot(data.frame(t=ic$within), aes(x=t)) + 117 | geom_histogram(binwidth = 0.01, fill = nkx_colors[c]) + 118 | scale_x_continuous(expand = c(0,0), limits = c(0,1)) + 119 | scale_y_continuous(expand = c(0,0.3)) 120 | 121 | if( c == levels(nkx_cluster)[length(levels(nkx_cluster))] ) { 122 | p1 <- p1 + xlab("Pearson's correlation") + theme_hists 123 | p2 <- p2 + xlab("Pearson's correlation") + theme_hists 124 | } else { 125 | p1 <- p1 + xlab("") + theme_hists_2 126 | p2 <- p2 + xlab("") + theme_hists_2 127 | } 128 | p <- grid.arrange(p2, p1, ncol = 2) 129 | }) 130 | 131 | # 132 | # Isl1 IC analysis 133 | # 134 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1") 135 | isl1_de_genes <- rownames(fData(c1_subset))[which(fData(c1_subset)$isl1_de & fData(c1_subset)$isl1_marker)] 136 | isl1_cluster <- factor(pData(c1_subset[, isl1_cells])$cluster) 137 | 138 | isl1_ko_cells <- which((pData(c1_subset)$Background == "isl1" | pData(c1_subset)$Background == "isl1ko") & pData(c1_subset)$Platform == "C1") 139 | isl1_ko_cluster <- pData(c1_subset[, isl1_ko_cells])$cluster 140 | isl1_ko_cluster[which((pData(c1_subset[, isl1_ko_cells])$Background == "isl1ko") & pData(c1_subset[, isl1_ko_cells])$Platform == "C1")] <- "KO" 141 | isl1_ko_cluster <- factor(isl1_ko_cluster) 142 | 143 | isl1_data <- get_exprs(c1_subset[isl1_de_genes, isl1_cells], "norm_exprs_sf") 144 | isl1_ko_data <- get_exprs(c1_subset[isl1_de_genes, isl1_ko_cells], "norm_exprs_sf") 145 | 146 | isl1_ic_df <- as.data.frame(boot.ic(isl1_data, groups = isl1_cluster, R = 1000, n = 20, p.val = 0.05)) 147 | colnames(isl1_ic_df) <- paste("cluster", levels(isl1_cluster)) 148 | 149 | isl1_ko_ic_df <- as.data.frame(boot.ic(isl1_ko_data, groups = isl1_ko_cluster, R = 1000, n = 20, p.val = 0.05)) 150 | colnames(isl1_ko_ic_df) <- paste("cluster", levels(isl1_ko_cluster)) 151 | 152 | # Create plots 153 | isl1_ic <- gather(isl1_ic_df, key = "cluster", value = "ic") 154 | isl1_ko_ic <- gather(isl1_ko_ic_df, key = "cluster", value = "ic") 155 | 156 | isl1_ic_plot <- ggplot(isl1_ic, aes(x = cluster, y = ic, fill = cluster)) + 157 | geom_boxplot() + 158 | scale_fill_manual(values = parameters$colors$isl_cluster_palette) + 159 | guides(fill = F) + 160 | ylab("") + 161 | xlab("") + 162 | theme2 163 | 164 | isl1_ko_ic_plot <- ggplot(isl1_ko_ic, aes(x = cluster, y = ic, fill = cluster)) + 165 | geom_boxplot() + 166 | scale_fill_manual(values = parameters$colors$isl_cluster_palette) + 167 | guides(fill = F) + 168 | ylab("") + 169 | xlab("") + 170 | theme2 171 | 172 | # 173 | # Isl1 pairwise statistical tests 174 | # 175 | isl1_ptest_list <- apply(combn(levels(isl1_cluster), 2), 2, function(x) { 176 | x <- as.numeric(x) 177 | ks <- ks.test(isl1_ic_df[, x[1]], isl1_ic_df[, x[2]]) 178 | wr <- wilcox.test(isl1_ic_df[, x[1]], isl1_ic_df[, x[2]]) 179 | 180 | data.frame(combination = paste(x[1], "vs.", x[2]), 181 | ks.p_value = ks$p.value, 182 | ks.statistic = ks$statistic, 183 | wilcox.p_value = wr$p.value, 184 | wilcox.statistic = wr$statistic) 185 | }) 186 | isl1_ptest <- do.call("rbind", isl1_ptest_list) 187 | 188 | # 189 | # Calculate transcriptome variance 190 | # 191 | isl1_colors <- parameters$colors$isl_cluster_palette 192 | names(isl1_colors) <- levels(isl1_cluster) 193 | 194 | isl1_cluster_noise_list <- lapply(levels(isl1_cluster), function(c) { 195 | cells <- which(isl1_cluster == c) 196 | distances <- apply(combn(cells, 2), 2, function(d) { 197 | p <- cor(isl1_data[, d[1]], isl1_data[, d[2]], method = "spearman") 198 | sqrt((1-p)/2) 199 | }) 200 | }) 201 | 202 | isl1_cluster_noise <- data.frame(cluster = rep(levels(isl1_cluster), times = sapply(isl1_cluster_noise_list, length)), distance = unlist(isl1_cluster_noise_list)) 203 | p_isl1_cluster_noise <- ggplot(data = isl1_cluster_noise, aes(x = cluster, y = distance, fill = cluster)) + 204 | geom_violin(trim = F) + 205 | geom_boxplot(width = 0.1, fill = "white") + 206 | scale_fill_manual(values = isl1_colors) + 207 | scale_y_continuous(expand = c(0.05, 0)) + 208 | guides(fill = FALSE, colour = FALSE) + 209 | xlab("") + ylab("") + theme_violins 210 | 211 | # 212 | # Isl1 CxC and GxG correlation analysis 213 | # 214 | isl1_ko_cells <- which((pData(c1_subset)$Background == "isl1" | pData(c1_subset)$Background == "isl1ko") & pData(c1_subset)$Platform == "C1") 215 | isl1_ko_cluster <- pData(c1_subset[, isl1_ko_cells])$cluster 216 | isl1_ko_cluster[which((pData(c1_subset)$Background == "isl1ko") & pData(c1_subset)$Platform == "C1")] <- "KO" 217 | isl1_ko_cluster <- factor(isl1_ko_cluster) 218 | 219 | isl1_colors <- parameters$colors$isl_cluster_palette 220 | names(isl1_colors) <- levels(isl1_ko_cluster) 221 | 222 | isl1_ko_data <- get_exprs(c1_subset[isl1_de_genes, isl1_ko_cells], "norm_exprs_sf") 223 | 224 | isl1_correlation_plot_list <- lapply(levels(isl1_ko_cluster), function(c) { 225 | cells <- which(isl1_ko_cluster == c) 226 | ic <- computeIC(isl1_ko_data[, cells], p.val = 1) 227 | 228 | p1 <- ggplot(data.frame(t=ic$between), aes(x=t)) + 229 | geom_histogram(binwidth = 0.01, fill = isl1_colors[c]) + 230 | scale_x_continuous(expand = c(0,0), limits = c(0,1)) + 231 | scale_y_continuous(expand = c(0,0.3)) 232 | p2 <- ggplot(data.frame(t=ic$within), aes(x=t)) + 233 | geom_histogram(binwidth = 0.01, fill = isl1_colors[c]) + 234 | scale_x_continuous(expand = c(0,0), limits = c(0,1)) + 235 | scale_y_continuous(expand = c(0,0.3)) 236 | 237 | if( c == levels(isl1_ko_cluster)[length(levels(isl1_ko_cluster))] ) { 238 | p1 <- p1 + xlab("Pearson's correlation") + theme_hists 239 | p2 <- p2 + xlab("Pearson's correlation") + theme_hists 240 | } else { 241 | p1 <- p1 + xlab("") + theme_hists_2 242 | p2 <- p2 + xlab("") + theme_hists_2 243 | } 244 | p <- grid.arrange(p2, p1, ncol = 2) 245 | }) 246 | 247 | # Fig2 c,d 248 | m <- multi_panel_figure(width = 205, height = 230, columns = 5, rows = 4, column_spacing = c(unit(5, "mm"), unit(5, "mm"), unit(5, "mm"), unit(0, "mm"), unit(0, "mm"))) 249 | m <- fill_panel(m, nkx_ic_plot, label = "C", column = 3, row = 1) 250 | m <- fill_panel(m, isl1_ic_plot, label = "", column = 4:5, row = 1) 251 | m <- fill_panel(m, p_nkx_cluster_noise, label = "D", column = 3, row = 2) 252 | m <- fill_panel(m, p_isl1_cluster_noise, label = "", column = 4:5, row = 2) 253 | 254 | ggsave( 255 | plot = m, 256 | filename = file.path(parameters$general$path_rfigures, "Figure_2_cd.pdf"), 257 | width = 205, 258 | height = 230, 259 | units = "mm", 260 | dpi = 600) 261 | 262 | # 263 | # Save raw data for Source File 264 | # 265 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_Figure2_c_Nkx.txt"), 266 | x = nkx_ic, 267 | row.names = F, 268 | col.names = T, 269 | quote = F, 270 | sep = "\t") 271 | 272 | # Supplementary Figure 2c/d 273 | nkx_correlation_plot <- grid.arrange(grobs = nkx_correlation_plot_list, ncol = 1) 274 | isl1_correlation_plot <- grid.arrange(grobs = isl1_correlation_plot_list, ncol = 1) 275 | 276 | m <- multi_panel_figure(width = 205, height = 230, columns = 2, rows = 7) 277 | m <- fill_panel(m, nkx_correlation_plot, label = "C", column = 1, row = 5:7) 278 | m <- fill_panel(m, isl1_correlation_plot, label = "D", column = 2, row = 3:7) 279 | 280 | ggsave( 281 | plot = m, 282 | filename = file.path(parameters$general$path_rfigures, "Supplementary_Figure_2_IC.pdf"), 283 | width = 205, 284 | height = 230, 285 | units = "mm", 286 | dpi = 600) 287 | 288 | # Supplementary Table with p-values 289 | tt1 <- ttheme_minimal(core=list(fg_params=list(hjust=1, x=0.9)), 290 | rowhead=list(fg_params=list(hjust=1, x=0.9)), 291 | colhead = list(fg_params=list(hjust=1, x=0.9))) 292 | 293 | s1_table_nkx <- tableGrob(nkx_ptest, theme=tt1, rows = NULL) 294 | s1_table_isl1 <- tableGrob(isl1_ptest, theme=tt1, rows = NULL) 295 | 296 | m <- multi_panel_figure(width = 205, height = 230, columns = 1, rows = 2) 297 | m <- fill_panel(m, s1_table_nkx) 298 | m <- fill_panel(m, s1_table_isl1) 299 | 300 | ggsave( 301 | plot = m, 302 | filename = file.path(parameters$general$path_rfigures, "Supplementary_Table_IC-pvalues.pdf"), 303 | width = 205, 304 | height = 230, 305 | units = "mm", 306 | dpi = 600) 307 | -------------------------------------------------------------------------------- /original-analysis/figure_src/Figure_1e.R: -------------------------------------------------------------------------------- 1 | library(RColorBrewer) 2 | library(ComplexHeatmap) 3 | library(circlize) 4 | library(rioja) 5 | library(gridExtra) 6 | library(cowplot) 7 | library(multipanelfigure) 8 | 9 | # 10 | # Load data 11 | # 12 | source("src/parameters.R") 13 | do_zscore <- T 14 | n_top_genes <- 30 15 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata")) 16 | 17 | # 18 | # Nkx2-5 differential expression heatmap 19 | # 20 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1") 21 | nkx_de_genes <- rownames(fData(c1_subset))[which(fData(c1_subset)$nkx_de & fData(c1_subset)$nkx_marker)] 22 | 23 | # Find top 30 genes per cluster 24 | load(file.path(parameters$general$path_supdata, "Nkx2-5-diffExprs.Rdata")) 25 | load(file.path(parameters$general$path_supdata, "Nkx2-5-markers.Rdata")) 26 | 27 | nkx_de_gene_order <- lapply(names(nkx_diff_data), function(n) { 28 | data <- nkx_diff_data[[n]] 29 | j <- which(data$lfc.hi < -2) 30 | data <- data[j, ] 31 | m <- na.omit(match(nkx_de_genes, data$geneID)) 32 | i <- order(abs(data$lfc.hi[m]), decreasing = T) 33 | data$geneID[m][i[1:min(n_top_genes, length(m))]] 34 | }) 35 | 36 | nkx_de_selected <- unique(unlist(nkx_de_gene_order)) 37 | nkx_subset <- c1_subset[nkx_de_selected, nkx_cells] 38 | 39 | # 40 | # Constrained hierarchical clustering 41 | # 42 | nkx_subset <- nkx_subset[, !is.na(pData(nkx_subset)$cluster)] 43 | 44 | # Get clustering to order cells 45 | nkx_cl_order <- order(pData(nkx_subset)$cluster) 46 | nkx_expr_data <- get_exprs(nkx_subset, "norm_exprs_sf")[, nkx_cl_order] 47 | nkx_dist <- as.dist(1 - cor(nkx_expr_data, method="pearson")) 48 | 49 | nkx_const_clust <- chclust(nkx_dist) 50 | nkx_const_clust_dendro <- as.dendrogram(nkx_const_clust) 51 | 52 | rownames(nkx_expr_data) <- fData(nkx_subset)$symbol 53 | 54 | # Zscore 55 | if (do_zscore) { 56 | nkx_expr_data <- t(scale(t(nkx_expr_data), scale = F)) 57 | } 58 | 59 | # 60 | # Isl1 differential expression heatmap 61 | # 62 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1") 63 | isl1_de_genes <- rownames(fData(c1_subset))[which(fData(c1_subset)$isl1_de & fData(c1_subset)$isl1_marker)] 64 | 65 | # Find top 30 genes per cluster 66 | load(file.path(parameters$general$path_supdata, "Isl1-diffExprs.Rdata")) 67 | load(file.path(parameters$general$path_supdata, "Isl1-markers.Rdata")) 68 | 69 | isl1_de_gene_order <- lapply(names(isl1_diff_data), function(n) { 70 | data <- isl1_diff_data[[n]] 71 | j <- which(data$lfc.hi < -2) 72 | data <- data[j, ] 73 | m <- na.omit(match(isl1_de_genes, data$geneID)) 74 | i <- order(abs(data$lfc.hi[m]), decreasing = T) 75 | na.omit(data$geneID[m][i[1:min(n_top_genes, length(m))]]) 76 | }) 77 | 78 | isl1_de_selected <- na.omit(unique(unlist(isl1_de_gene_order))) 79 | isl1_subset <- c1_subset[isl1_de_selected, isl1_cells] 80 | 81 | # 82 | # Constrained hierarchical clustering 83 | # 84 | isl1_subset <- isl1_subset[, !is.na(pData(isl1_subset)$cluster)] 85 | 86 | # Get clustering to order cells 87 | isl1_cl_order <- order(pData(isl1_subset)$cluster) 88 | isl1_expr_data <- get_exprs(isl1_subset, "norm_exprs_sf")[, isl1_cl_order] 89 | isl1_dist <- as.dist(1 - cor(isl1_expr_data, method="pearson")) 90 | 91 | isl1_const_clust <- chclust(isl1_dist) 92 | isl1_const_clust_dendro <- as.dendrogram(isl1_const_clust) 93 | 94 | rownames(isl1_expr_data) <- fData(isl1_subset)$symbol 95 | 96 | # Zscore 97 | if (do_zscore) { 98 | isl1_expr_data <- t(scale(t(isl1_expr_data), scale = F)) 99 | } 100 | 101 | # 102 | # Color mapping function to bring both heatmaps to same scale 103 | # 104 | e_min <- min(nkx_expr_data, isl1_expr_data) 105 | e_max <- max(nkx_expr_data, isl1_expr_data) 106 | 107 | bounds <- max(abs(e_min), abs(e_max)) 108 | 109 | breaks <- seq(-bounds, bounds, length.out = 99) 110 | colors <- colorRampPalette(rev(brewer.pal(10, "RdBu")))(99) 111 | 112 | col_fun <- colorRamp2(breaks, colors) 113 | 114 | # 115 | # Nkx2-5 Heatmap annotation and Heatmap 116 | # 117 | nkx_cell_annotation_data <- data.frame(Timepoint = pData(nkx_subset)$Timepoint[nkx_cl_order], Cluster = pData(nkx_subset)$cluster[nkx_cl_order]) 118 | nkx_cell_annotation <- HeatmapAnnotation(nkx_cell_annotation_data, col = list(Timepoint = c("e7.5" = "#FF003C", "e8.5" = "#248F8D", "e9.5" = "#987F69"), 119 | Cluster = c("3" = "#00008B", "1" = "#8B0000", "2" = "#CDAD00")), 120 | show_legend = F, 121 | show_annotation_name = T, 122 | annotation_name_gp = gpar(fontsize = 4), 123 | annotation_name_offset = unit(0.8, "mm"), 124 | gap = unit(0.1, "mm"), 125 | annotation_height = unit.c(unit(1.5, "mm"), unit(1.5, "mm"))) 126 | 127 | nkx_de_heatmap <- Heatmap(nkx_expr_data, 128 | col = col_fun, 129 | top_annotation = nkx_cell_annotation, 130 | cluster_columns = nkx_const_clust_dendro, 131 | cluster_rows = T, 132 | show_column_names = F, 133 | show_row_dend = F, 134 | show_column_dend = F, 135 | name = "nkx", 136 | #column_title = "Nkx2-5+ cells", 137 | column_title_gp = gpar(fontface = "bold", fontsize = 8), 138 | split = rep(paste("Marker genes\ncluster", c(1,2,3)), times = sapply(nkx_de_gene_order, length)), 139 | gap = unit(0.3, "mm"), 140 | row_title_rot = 90, 141 | row_title_gp = gpar(fontsize = 4), 142 | row_names_gp = gpar(fontsize = 3), 143 | heatmap_legend_param = list("plot" = F, "color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Scaled expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm"))) 144 | 145 | # 146 | # Isl1 Heatmap annotation and Heatmap 147 | # 148 | isl1_cell_annotation_data <- data.frame(Timepoint = pData(isl1_subset)$Timepoint[isl1_cl_order], Cluster = pData(isl1_subset)$cluster[isl1_cl_order]) 149 | isl1_cell_annotation <- HeatmapAnnotation(isl1_cell_annotation_data, col = list(Timepoint = c("e7.5" = "#FF003C", "e8.5" = "#248F8D", "e9.5" = "#987F69"), 150 | Cluster = c("1" = "#1B9E77", "2" = "#D95F02", "3" = "#7570B3", "4" = "#E7298A", "5" = "#66A61E")), 151 | show_legend = F, 152 | show_annotation_name = T, 153 | annotation_name_gp = gpar(fontsize = 4), 154 | annotation_name_offset = unit(0.8, "mm"), 155 | gap = unit(0.1, "mm"), 156 | annotation_height = unit.c(unit(1.5, "mm"), unit(1.5, "mm"))) 157 | 158 | isl1_de_heatmap <- Heatmap(isl1_expr_data, 159 | col = col_fun, 160 | top_annotation = isl1_cell_annotation, 161 | cluster_columns = isl1_const_clust_dendro, 162 | cluster_rows = T, 163 | show_column_names = F, 164 | show_row_dend = F, 165 | show_column_dend = F, 166 | name = "isl1", 167 | #column_title = "Isl1+ cells", 168 | column_title_gp = gpar(fontface = "bold", fontsize = 8), 169 | split = rep(paste("Marker genes\ncluster", c(1,2,3,4,5)), times = sapply(isl1_de_gene_order, length)), 170 | gap = unit(0.3, "mm"), 171 | row_title_rot = 90, 172 | row_title_gp = gpar(fontsize = 4), 173 | row_names_gp = gpar(fontsize = 3), 174 | heatmap_legend_param = list("plot" = T ,"color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Scaled expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm"))) 175 | 176 | # 177 | # Build heatmap for Figure 1e 178 | # 179 | fig_1e <- grid.arrange(grid.grabExpr(draw(nkx_de_heatmap, heatmap_legend_side = "bottom", padding = unit(c(0, 0, 0, 0), "mm"))), grid.grabExpr(draw(isl1_de_heatmap, heatmap_legend_side = "bottom", padding = unit(c(0, 0, -5, 0), "mm"))), nrow = 2) 180 | 181 | m <- multi_panel_figure(width = 205, height = 230, columns = 4, rows = 4) 182 | m <- fill_panel(m, fig_1e, label = "E", column = c(1,2), row = c(2,3,4)) 183 | 184 | ggsave( 185 | plot = m, 186 | filename = file.path(parameters$general$path_rfigures, "Figure_1_e2.pdf"), 187 | width = 205, 188 | height = 230, 189 | units = "mm", 190 | dpi = 600) 191 | 192 | # 193 | # Heatmap for all Nkx2-5 DE genes 194 | # 195 | nkx_de_gene_order <- lapply(names(nkx_diff_data), function(n) { 196 | data <- nkx_diff_data[[n]] 197 | j <- which(data$lfc.hi < -1) 198 | data <- data[j, ] 199 | m <- na.omit(match(nkx_de_genes, data$geneID)) 200 | i <- order(abs(data$lfc.hi[m]), decreasing = T) 201 | data$geneID[m] 202 | }) 203 | 204 | nkx_de_selected <- unique(unlist(nkx_de_gene_order)) 205 | nkx_subset <- c1_subset[nkx_de_selected, nkx_cells] 206 | 207 | # 208 | # Constrained hierarchical clustering 209 | # 210 | nkx_subset <- nkx_subset[, !is.na(pData(nkx_subset)$cluster)] 211 | 212 | # Get clustering to order cells 213 | nkx_cl_order <- order(pData(nkx_subset)$cluster) 214 | nkx_expr_data <- get_exprs(nkx_subset, "norm_exprs_sf")[, nkx_cl_order] 215 | nkx_dist <- as.dist(1 - cor(nkx_expr_data, method="pearson")) 216 | 217 | nkx_const_clust <- chclust(nkx_dist) 218 | nkx_const_clust_dendro <- as.dendrogram(nkx_const_clust) 219 | 220 | rownames(nkx_expr_data) <- fData(nkx_subset)$symbol 221 | 222 | # Zscore 223 | if (do_zscore) { 224 | nkx_expr_data <- t(scale(t(nkx_expr_data), scale = F)) 225 | } 226 | 227 | # Heatmap 228 | nkx_all_de_heatmap <- Heatmap(nkx_expr_data, 229 | col = colorRampPalette(rev(brewer.pal(10, "RdBu")))(99), 230 | top_annotation = nkx_cell_annotation, 231 | cluster_columns = nkx_const_clust_dendro, 232 | cluster_rows = T, 233 | show_column_names = F, 234 | show_row_dend = F, 235 | name = "nkx", 236 | column_title = "Nkx2-5+ cells", 237 | column_title_gp = gpar(fontface = "bold", fontsize = 8), 238 | split = rep(paste("Marker genes\ncluster", c(1,2,3)), times = sapply(nkx_de_gene_order, length)), 239 | gap = unit(0.5, "mm"), 240 | row_title_rot = 90, 241 | row_title_gp = gpar(fontsize = 4), 242 | row_names_gp = gpar(fontsize = 1), 243 | heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Scaled expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm"))) 244 | 245 | # 246 | # Heatmap for all Isl1 DE genes 247 | # 248 | isl1_de_gene_order <- lapply(names(isl1_diff_data), function(n) { 249 | data <- isl1_diff_data[[n]] 250 | j <- which(data$lfc.hi < -2) 251 | data <- data[j, ] 252 | m <- na.omit(match(isl1_de_genes, data$geneID)) 253 | i <- order(abs(data$lfc.hi[m]), decreasing = T) 254 | na.omit(data$geneID[m]) 255 | }) 256 | # Remove Etv2 from cluster 5 257 | isl1_de_gene_order[[5]] <- isl1_de_gene_order[[5]][! isl1_de_gene_order[[5]] %in% c("ENSMUSG00000006311")] 258 | # Rescue two missing genes 259 | isl1_de_gene_order[[2]] <- c(isl1_de_gene_order[[2]], "ENSMUSG00000029432") 260 | isl1_de_gene_order[[5]] <- c(isl1_de_gene_order[[5]], "ENSMUSG00000050953") 261 | isl1_de_selected <- na.omit(unique(unlist(isl1_de_gene_order))) 262 | isl1_subset <- c1_subset[isl1_de_selected, isl1_cells] 263 | 264 | # 265 | # Constrained hierarchical clustering 266 | # 267 | isl1_subset <- isl1_subset[, !is.na(pData(isl1_subset)$cluster)] 268 | 269 | # Get clustering to order cells 270 | isl1_cl_order <- order(pData(isl1_subset)$cluster) 271 | isl1_expr_data <- get_exprs(isl1_subset, "norm_exprs_sf")[, isl1_cl_order] 272 | isl1_dist <- as.dist(1 - cor(isl1_expr_data, method="pearson")) 273 | 274 | isl1_const_clust <- chclust(isl1_dist) 275 | isl1_const_clust_dendro <- as.dendrogram(isl1_const_clust) 276 | 277 | rownames(isl1_expr_data) <- fData(isl1_subset)$symbol 278 | 279 | # Zscore 280 | if (do_zscore) { 281 | isl1_expr_data <- t(scale(t(isl1_expr_data), scale = F)) 282 | } 283 | 284 | # Heatmap 285 | isl1_all_de_heatmap <- Heatmap(isl1_expr_data, 286 | col = rev(brewer.pal(10, "RdBu")), 287 | top_annotation = isl1_cell_annotation, 288 | cluster_columns = isl1_const_clust_dendro, 289 | cluster_rows = T, 290 | show_column_names = F, 291 | show_row_dend = F, 292 | name = "isl1", 293 | column_title = "Isl1+ cells", 294 | column_title_gp = gpar(fontface = "bold", fontsize = 8), 295 | split = rep(paste("Marker genes\ncluster", c(1,2,3,4,5)), times = sapply(isl1_de_gene_order, length)), 296 | gap = unit(0.5, "mm"), 297 | row_title_rot = 90, 298 | row_title_gp = gpar(fontsize = 4), 299 | row_names_gp = gpar(fontsize = 1), 300 | heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Scaled expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm"))) 301 | 302 | # 303 | # Supplementary Figure X 304 | # 305 | sup_figX_a <- grid.grabExpr(draw(nkx_all_de_heatmap, heatmap_legend_side = "bottom")) 306 | sup_figX_b <- grid.grabExpr(draw(isl1_all_de_heatmap, heatmap_legend_side = "bottom")) 307 | 308 | m <- multi_panel_figure(width = 205, height = 230, columns = 1, rows = 2) 309 | m <- fill_panel(m, sup_figX_a, row = 1) 310 | m <- fill_panel(m, sup_figX_b, row = 2) 311 | 312 | ggsave( 313 | plot = m, 314 | filename = file.path(parameters$general$path_rfigures, "Supplementary_Figure_DE_genes.pdf"), 315 | width = 205, 316 | height = 230, 317 | units = "mm", 318 | dpi = 600) 319 | -------------------------------------------------------------------------------- /original-analysis/figure_src/Figure_3cd.R: -------------------------------------------------------------------------------- 1 | library(scater) 2 | library(tidyr) 3 | library(ggplot2) 4 | library(singlecellutils) 5 | 6 | # 7 | # Load data 8 | # 9 | source("src/parameters.R") 10 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata")) 11 | load(file.path(parameters$general$path_supdata, "Isl1-DM.Rdata")) 12 | 13 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1") 14 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1") 15 | 16 | isl1_dm_coords <- cbind(-pData(c1_subset[, isl1_cells])$dm1, -pData(c1_subset[, isl1_cells])$dm2, -pData(c1_subset[, isl1_cells])$dm3) 17 | 18 | # 19 | # Graphical settings 20 | # 21 | global.settings <- list(layout.heights=list(main.key.padding = -2, bottom.padding = 0, axis.xlab.padding = -0.5), layout.widths = list(ylab.axis.padding = -0.5)) 22 | 23 | # 24 | # Color scalings 25 | # 26 | scale_channel <- function(x, min, max, a, b) { 27 | x[x > max] <- max 28 | return(ceiling(((b - a) * (x - min) / (max - min)) + a)) 29 | } 30 | 31 | min_isl1 <- min(get_exprs(c1_subset["ENSMUSG00000042258", c(isl1_cells, nkx_cells)], "norm_exprs_sf")[1,]) 32 | max_isl1 <- max(get_exprs(c1_subset["ENSMUSG00000042258", c(isl1_cells, nkx_cells)], "norm_exprs_sf")[1,]) 33 | min_nkx <- min(get_exprs(c1_subset["ENSMUSG00000015579", c(isl1_cells, nkx_cells)], "norm_exprs_sf")[1,]) 34 | max_nkx <- max(get_exprs(c1_subset["ENSMUSG00000015579", c(isl1_cells, nkx_cells)], "norm_exprs_sf")[1,]) 35 | 36 | isl1_color <- scale_channel(get_exprs(c1_subset["ENSMUSG00000042258", c(isl1_cells, nkx_cells)], "norm_exprs_sf")[1, ], min_isl1, 7, 0, 255) 37 | nkx_color <- scale_channel(get_exprs(c1_subset["ENSMUSG00000015579", c(isl1_cells, nkx_cells)], "norm_exprs_sf")[1, ], min_nkx, 7, 0, 255) 38 | 39 | # 40 | # Mix the colors of Isl1 and Nkx2-5 41 | # 42 | mix_color <- sapply(1:length(isl1_color), function(i) { 43 | red <- nkx_color[i] 44 | green <- isl1_color[i] 45 | blue <- 0 46 | rgb(red, green, blue, maxColorValue = 255) 47 | }) 48 | 49 | # 50 | # Plot clusterings from Figure 1d with coexpressing cells highlighted 51 | # 52 | isl1_isl1 <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", isl1_cells], "norm_exprs_sf") > 0.2, "Isl1+", "Isl1-") 53 | isl1_nkx <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", isl1_cells], "norm_exprs_sf") > 0.2, "Nkx2-5+", "Nkx2-5-") 54 | nkx_isl1 <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", nkx_cells], "norm_exprs_sf") > 0.2, "Isl1+", "Isl1-") 55 | nkx_nkx <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", nkx_cells], "norm_exprs_sf") > 0.2, "Nkx2-5+", "Nkx2-5-") 56 | 57 | co_expression_isl <- which(isl1_isl1 == "Isl1+" & isl1_nkx == "Nkx2-5+") 58 | co_expression_nkx <- which(nkx_isl1 == "Isl1+" & nkx_nkx == "Nkx2-5+") 59 | 60 | mix_color_grey <- rep("darkgrey", length(mix_color)) 61 | mix_color_grey[co_expression_isl] <- mix_color[co_expression_isl] 62 | mix_color_grey[length(isl1_cells)+co_expression_nkx] <- mix_color[length(isl1_cells)+co_expression_nkx] 63 | 64 | plot_mix_clustering_isl1 <- lattice::xyplot(pData(c1_subset)$tsne2[isl1_cells] ~ pData(c1_subset)$tsne1[isl1_cells], 65 | col = mix_color_grey[1:length(isl1_cells)], cex = .5, pch = 16, ylab = list(label = "t-SNE 2", cex = 0.6), main = list(label = "Isl1 cell clustering", cex = 0.75), scales = list(cex = 0.4, tck = c(0.5, 0)), xlab = list(label = "t-SNE 1", cex = 0.6), par.settings = global.settings) 66 | plot_mix_clustering_nkx <- lattice::xyplot(pData(c1_subset)$tsne2[nkx_cells] ~ pData(c1_subset)$tsne1[nkx_cells], 67 | col = mix_color_grey[(length(isl1_cells)+1):length(nkx_cells)], cex = .5, pch = 16, ylab = list(label = "t-SNE 2", cex = 0.6), main = list(label = "Nkx2-5 cell clustering", cex = 0.75), scales = list(cex = 0.4, tck = c(0.5, 0)), xlab = list(label = "t-SNE 1", cex = 0.6), par.settings = global.settings) 68 | 69 | # 70 | # Table Co-expressing 71 | # 72 | nkx_cells_early <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1" & pData(c1_subset)$Timepoint == "e7.5") 73 | isl1_cells_early <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1" & pData(c1_subset)$Timepoint == "e7.5") 74 | nkx_cells_middle <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1" & pData(c1_subset)$Timepoint == "e8.5") 75 | isl1_cells_middle <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1" & pData(c1_subset)$Timepoint == "e8.5") 76 | nkx_cells_late <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1" & pData(c1_subset)$Timepoint == "e9.5") 77 | isl1_cells_late <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1" & pData(c1_subset)$Timepoint == "e9.5") 78 | 79 | isl1_isl1_early <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", isl1_cells_early], "norm_exprs_sf") > 0.1, "Isl1+", "Isl1-") 80 | isl1_nkx_early <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", isl1_cells_early], "norm_exprs_sf") > 0.1, "Nkx2-5+", "Nkx2-5-") 81 | nkx_isl1_early <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", nkx_cells_early], "norm_exprs_sf") > 0.1, "Isl1+", "Isl1-") 82 | nkx_nkx_early <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", nkx_cells_early], "norm_exprs_sf") > 0.1, "Nkx2-5+", "Nkx2-5-") 83 | 84 | isl1_isl1_middle <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", isl1_cells_middle], "norm_exprs_sf") > 0.1, "Isl1+", "Isl1-") 85 | isl1_nkx_middle <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", isl1_cells_middle], "norm_exprs_sf") > 0.1, "Nkx2-5+", "Nkx2-5-") 86 | nkx_isl1_middle <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", nkx_cells_middle], "norm_exprs_sf") > 0.1, "Isl1+", "Isl1-") 87 | nkx_nkx_middle <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", nkx_cells_middle], "norm_exprs_sf") > 0.1, "Nkx2-5+", "Nkx2-5-") 88 | 89 | isl1_isl1_late <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", isl1_cells_late], "norm_exprs_sf") > 0.1, "Isl1+", "Isl1-") 90 | isl1_nkx_late <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", isl1_cells_late], "norm_exprs_sf") > 0.1, "Nkx2-5+", "Nkx2-5-") 91 | nkx_isl1_late <- ifelse(get_exprs(c1_subset["ENSMUSG00000042258", nkx_cells_late], "norm_exprs_sf") > 0.1, "Isl1+", "Isl1-") 92 | nkx_nkx_late <- ifelse(get_exprs(c1_subset["ENSMUSG00000015579", nkx_cells_late], "norm_exprs_sf") > 0.1, "Nkx2-5+", "Nkx2-5-") 93 | 94 | 95 | double_negative_isl1 <- c(isl1_cells_early[which(isl1_isl1_early == "Isl1-" & isl1_nkx_early == "Nkx2-5-")], 96 | isl1_cells_middle[which(isl1_isl1_middle == "Isl1-" & isl1_nkx_middle == "Nkx2-5-")], 97 | isl1_cells_late[which(isl1_isl1_late == "Isl1-" & isl1_nkx_late == "Nkx2-5-")]) 98 | double_negative_nkx <- c(nkx_cells_early[which(nkx_isl1_early == "Isl1-" & nkx_nkx_early == "Nkx2-5-")], 99 | nkx_cells_middle[which(nkx_isl1_middle == "Isl1-" & nkx_nkx_middle == "Nkx2-5-")], 100 | nkx_cells_late[which(nkx_isl1_late == "Isl1-" & nkx_nkx_late == "Nkx2-5-")]) 101 | non_double_negatives_isl1 <- c(isl1_cells_early[-which(isl1_isl1_early == "Isl1-" & isl1_nkx_early == "Nkx2-5-")], 102 | isl1_cells_middle[-which(isl1_isl1_middle == "Isl1-" & isl1_nkx_middle == "Nkx2-5-")], 103 | isl1_cells_late[-which(isl1_isl1_late == "Isl1-" & isl1_nkx_late == "Nkx2-5-")]) 104 | non_double_negatives_nkx <- c(nkx_cells_early[-which(nkx_isl1_early == "Isl1-" & nkx_nkx_early == "Nkx2-5-")], 105 | nkx_cells_middle[-which(nkx_isl1_middle == "Isl1-" & nkx_nkx_middle == "Nkx2-5-")], 106 | nkx_cells_late[-which(nkx_isl1_late == "Isl1-" & nkx_nkx_late == "Nkx2-5-")]) 107 | 108 | 109 | tt1 <- gridExtra::ttheme_minimal(base_size = 8, 110 | padding = unit(c(2, 2), "mm"), 111 | core = list(fg_params = list(hjust = 1, x = 0.9)), 112 | rowhead = list(fg_params = list(fontface = 2L, hjust = 1, x = 0.9)), 113 | colhead = list(fg_params = list(fontface = 2L, hjust = 1, x = 0.9))) 114 | 115 | table_isl1_early <- gridExtra::tableGrob(table(isl1_isl1_early, isl1_nkx_early), theme=tt1) 116 | table_nkx_early <- gridExtra::tableGrob(table(nkx_isl1_early, nkx_nkx_early), theme=tt1, rows = NULL) 117 | table_isl1_middle <- gridExtra::tableGrob(table(isl1_isl1_middle, isl1_nkx_middle), theme=tt1) 118 | table_nkx_middle <- gridExtra::tableGrob(table(nkx_isl1_middle, nkx_nkx_middle), theme=tt1, rows = NULL) 119 | table_isl1_late <- gridExtra::tableGrob(table(isl1_isl1_late, isl1_nkx_late), theme=tt1) 120 | table_nkx_late <- gridExtra::tableGrob(table(nkx_isl1_late, nkx_nkx_late), theme=tt1, rows = NULL) 121 | 122 | # 123 | # Load SOM input genes and predict Nkx2-5 data onto Isl1-SOM 124 | # 125 | het_early <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.early.txt")) 126 | het_mid <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.mid.txt")) 127 | het_late <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.late.txt")) 128 | het_pool <- readLines(file.path(parameters$general$path_supdata, "heterogeneity-isl1.txt")) 129 | 130 | isl1_het <- unique(c(het_early, het_mid, het_late, het_pool)) 131 | 132 | expression <- get_exprs(c1_subset[, c(isl1_cells, nkx_cells)], "norm_exprs_sf") 133 | expression_scaled <- t(scale(t(expression[isl1_het, ]))) 134 | 135 | som <- singlecellutils::calcSOM(expression_scaled, train = 1:nrow(expression_scaled), num_epochs = 2000, seed = 1004) 136 | som$codes <- som$codes[[1]] 137 | 138 | # Clustering 139 | 140 | hdbscan <- function(data, min_samples = 7L, min_cluster_size = 9L, outlier = 0, seed = NULL) { 141 | if (!is.null(seed)) set.seed(seed) 142 | h <- reticulate::import("hdbscan") 143 | cl <- h$HDBSCAN(min_samples = min_samples, min_cluster_size = min_cluster_size) 144 | labels <- cl$fit_predict(data) + 1 145 | labels[labels == 0] <- outlier 146 | return(factor(labels)) 147 | } 148 | 149 | seed <- 3465 150 | set.seed(seed) 151 | tsne <- Rtsne::Rtsne(t(som$codes), perplexity = 21, theta = 0.05, max_iter = 2000) 152 | 153 | p_color_background <- droplevels(pData(c1_subset[, c(isl1_cells, nkx_cells)])$Background) 154 | p_color_background[match(c(double_negative_isl1, double_negative_nkx), c(isl1_cells, nkx_cells))] <- NA 155 | p_cluster_background <- singlecellutils::colorAMap(tsne$Y, colour_by = p_color_background, palette = c("#31a354", "#3182bd"), cex = rep(0.5, nrow(tsne$Y)), na.cex = 0.5, xlab = list(label = "t-SNE 1", cex = 0.6), ylab = list(label = "t-SNE 2", cex = 0.6), main = list(label = ""), scales = list(cex = 0.4, tck = c(0.5, 0)), par.settings = global.settings) 156 | 157 | p_color_time <- droplevels(pData(c1_subset[, c(isl1_cells, nkx_cells)])$Timepoint) 158 | p_color_time[match(c(double_negative_isl1, double_negative_nkx), c(isl1_cells, nkx_cells))] <- NA 159 | p_cluster_time <- singlecellutils::colorAMap(tsne$Y, colour_by = p_color_time, palette = parameters$colors$nkx_timepoint_palette, cex = rep(0.5, nrow(tsne$Y)), na.cex = 0.5, xlab = list(label = "t-SNE 1", cex = 0.6), ylab = list(label = ""), main = list(label = ""), scales = list(cex = 0.4, tck = c(0.5, 0)), par.settings = global.settings) 160 | 161 | isl1_clustering <- factor(c(pData(c1_subset[, isl1_cells])$cluster, rep(NA, length(nkx_cells))), levels = as.character(1:5)) 162 | p_cluster_isl1 <- singlecellutils::colorAMap(tsne$Y, colour_by = isl1_clustering, palette = parameters$colors$isl_cluster_palette, cex = rep(0.5, nrow(tsne$Y)), na.cex = 0.5, xlab = list(label = "t-SNE 1", cex = 0.6), ylab = list(label = ""), main = list(label = ""), scales = list(cex = 0.4, tck = c(0.5, 0)), par.settings = global.settings) 163 | 164 | # 165 | # Predict Nkx2-5 data onto Isl1 trajectory 166 | # 167 | 168 | # Temp 169 | #nkx_cells <- non_double_negatives_nkx 170 | #isl1_cells <- non_double_negatives_isl1 171 | # End Temp 172 | 173 | m <- match(colnames(isl1_dm@data_env$data), rownames(c1_subset)) 174 | nkx_data <- get_exprs(c1_subset[m, nkx_cells], "norm_exprs_sf") 175 | 176 | nkx_dcs <- dm_predict(isl1_dm, t(nkx_data)) 177 | nkx_coords <- matrix(nkx_dcs@x, ncol = nkx_dcs@Dim[2]) 178 | isl1_dm_coords_nkx <- rbind(isl1_dm_coords, cbind(-nkx_coords[, 1], -nkx_coords[, 3], -nkx_coords[, 2])) 179 | 180 | # 181 | # Prepare plots 182 | # 183 | # Create col 184 | isl1_col <- rep("darkgrey", length(c(isl1_cells, nkx_cells))) 185 | isl1_col[1:length(isl1_cells)] <- mix_color[1:length(isl1_cells)] 186 | nkx_col <- rep("darkgrey", length(c(isl1_cells, nkx_cells))) 187 | nkx_col[(length(isl1_cells)+1):length(nkx_col)] <- mix_color[(length(isl1_cells)+1):length(nkx_col)] 188 | 189 | # Create pch 190 | isl1_pch <- rep(4, length(c(isl1_cells, nkx_cells))) 191 | isl1_pch[1:length(isl1_cells)] <- 16 192 | nkx_pch <- rep(4, length(c(isl1_cells, nkx_cells))) 193 | nkx_pch[(length(isl1_cells)+1):length(nkx_pch)] <- 16 194 | 195 | # Create cex 196 | isl1_cex <- rep(0.5, length(c(isl1_cells, nkx_cells))) 197 | isl1_cex[1:length(isl1_cells)] <- .5 198 | nkx_cex <- rep(0.5, length(c(isl1_cells, nkx_cells))) 199 | nkx_cex[(length(isl1_cells)+1):length(nkx_cex)] <- .5 200 | 201 | # Create data 202 | plot_isl1_data_x <- rev(isl1_dm_coords_nkx[,1]) 203 | plot_isl1_data_y <- rev(isl1_dm_coords_nkx[,2]) 204 | plot_nkx_data_x <- isl1_dm_coords_nkx[,1] 205 | plot_nkx_data_y <- isl1_dm_coords_nkx[,2] 206 | 207 | # 208 | # Create figure 209 | # 210 | table <- gridExtra::grid.arrange(table_isl1_early, table_nkx_early, 211 | table_isl1_middle, table_nkx_middle, 212 | table_isl1_late, table_nkx_late, nrow = 3) 213 | plot_lineage_overlap_isl1 <- lattice::xyplot(plot_isl1_data_y ~ plot_isl1_data_x, cex = rev(isl1_cex), col = rev(isl1_col), pch = rev(isl1_pch), ylab = list(label = "Dimension 3", cex = 0.6), main = list(label = "Isl1 cells in Isl1 lineage", cex = 0.75), scales = list(cex = 0.4, tck = c(0.5, 0)), xlab = list(label = "Dimension 1", cex = 0.6), par.settings = global.settings) 214 | plot_lineage_overlap_nkx <- lattice::xyplot(isl1_dm_coords_nkx[,2] ~ isl1_dm_coords_nkx[,1], cex = nkx_cex, col = nkx_col, pch = nkx_pch, ylab = list(label = "Dimension 3", cex = 0.6), main = list(label = "Nkx2-5 cells in Isl1 lineage", cex = 0.75), scales = list(cex = 0.4, tck = c(0.5, 0)), xlab = list(label = "Dimension 1", cex = 0.6), par.settings = global.settings) 215 | plot_clustering <- gridExtra::grid.arrange(p_cluster_background, p_cluster_time, p_cluster_isl1, nrow = 1) 216 | 217 | figure <- multipanelfigure::multi_panel_figure(width = 205, height = 230, columns = 3, rows = 3) 218 | figure <- multipanelfigure::fill_panel(figure, table, scaling = "fit", row = 1, column = 1) 219 | 220 | figure <- multipanelfigure::fill_panel(figure, plot_mix_clustering_isl1, row = 1, column = 3) 221 | figure <- multipanelfigure::fill_panel(figure, plot_mix_clustering_nkx, row = 1, column = 2) 222 | 223 | figure <- multipanelfigure::fill_panel(figure, plot_clustering, row = 2, column = 1:3) 224 | 225 | figure <- multipanelfigure::fill_panel(figure, plot_lineage_overlap_isl1, row = 3, column = 1) 226 | figure <- multipanelfigure::fill_panel(figure, plot_lineage_overlap_nkx, row = 3, column = 2) 227 | 228 | # Save figure 229 | ggplot2::ggsave(figure, 230 | file = "supplementary_figures/99_review_2c.pdf", 231 | width = 205, 232 | height = 230, 233 | units = "mm", 234 | useDingbats = FALSE) 235 | -------------------------------------------------------------------------------- /original-analysis/figure_src/SupplementaryFigure_7.R: -------------------------------------------------------------------------------- 1 | library(scater) 2 | library(dplyr) 3 | library(tidyr) 4 | library(singlecellutils) 5 | library(RColorBrewer) 6 | library(viridis) 7 | library(multipanelfigure) 8 | library(ComplexHeatmap) 9 | library(circlize) 10 | library(zoo) 11 | library(MPAgenomics) 12 | library(gridExtra) 13 | 14 | # 15 | # Load data 16 | # 17 | source("src/parameters.R") 18 | min_cor_cluster <- 0.5 19 | min_cor <- 0.7 20 | load(file.path(parameters$general$path_supdata, "c1_subset.Rdata")) 21 | 22 | # 23 | # Nkx2-5 Rank based correlation 24 | # 25 | nkx_cells <- which(pData(c1_subset)$Background == "nkx2-5" & pData(c1_subset)$Platform == "C1") 26 | nkx_de_genes <- rownames(fData(c1_subset))[which(fData(c1_subset)$nkx_de & fData(c1_subset)$nkx_marker)] 27 | nkx_cluster <- factor(pData(c1_subset[, nkx_cells])$cluster) 28 | 29 | nkx_data <- get_exprs(c1_subset[nkx_de_genes, nkx_cells], "norm_exprs_sf") 30 | 31 | nkx_cor <- cor(t(nkx_data), method = "pearson") 32 | nkx_dpt <- pData(c1_subset[, nkx_cells])$dpt 33 | 34 | nkx_cor_list <- lapply(levels(nkx_cluster), function(c) { 35 | ind <- which(nkx_cluster == c) 36 | cc <- cor(t(nkx_data[, ind]), cbind(nkx_dpt[ind]), method = "spearman") 37 | }) 38 | nkx_cor_list$dpt <- cor(t(nkx_data), cbind(nkx_dpt), method = "spearman") 39 | 40 | nkx_correlation <- as.data.frame(do.call("cbind", nkx_cor_list)) 41 | colnames(nkx_correlation) <- c(paste0("cluster", levels(nkx_cluster)), "nkx_dpt") 42 | 43 | write.table(nkx_correlation, file = file.path(parameters$general$path_supdata,"Nkx2-5-correlation.txt"), quote = F, row.names = T, col.names = T, sep = "\t") 44 | 45 | # 46 | # Extract highly correlated genes 47 | # 48 | # From cluster correlation 49 | nkx_i <- unique(unlist(apply(nkx_correlation[, -ncol(nkx_correlation)], 2, function(x) which(abs(x) > min_cor_cluster)))) 50 | 51 | # From dpt correlation 52 | nkx_i <- unique(c(nkx_i, which(abs(nkx_correlation$nkx_dpt) > min_cor))) 53 | 54 | nkx_cor_data <- nkx_cor[nkx_i, nkx_i] 55 | colnames(nkx_cor_data) <- fData(c1_subset[nkx_de_genes, ][nkx_i, ])$symbol 56 | rownames(nkx_cor_data) <- fData(c1_subset[nkx_de_genes, ][nkx_i, ])$symbol 57 | 58 | # 59 | # Isl1 Rank based correlation 60 | # 61 | isl1_cells <- which(pData(c1_subset)$Background == "isl1" & pData(c1_subset)$Platform == "C1") 62 | isl1_de_genes <- rownames(fData(c1_subset))[which(fData(c1_subset)$isl1_de & fData(c1_subset)$isl1_marker)] 63 | isl1_cluster <- factor(pData(c1_subset[, isl1_cells])$cluster) 64 | 65 | isl1_data <- get_exprs(c1_subset[isl1_de_genes, isl1_cells], "norm_exprs_sf") 66 | 67 | isl1_cor <- cor(t(isl1_data), method = "pearson") 68 | isl1_dpt <- pData(c1_subset[, isl1_cells])$dpt 69 | 70 | isl1_cor_list <- lapply(levels(isl1_cluster), function(c) { 71 | ind <- which(isl1_cluster == c) 72 | cc <- cor(t(isl1_data[, ind]), cbind(isl1_dpt[ind]), method = "spearman") 73 | }) 74 | isl1_cor_list$dpt <- cor(t(isl1_data), cbind(isl1_dpt), method = "spearman") 75 | 76 | isl1_correlation <- as.data.frame(do.call("cbind", isl1_cor_list)) 77 | colnames(isl1_correlation) <- c(paste0("cluster", levels(isl1_cluster)), "isl1_dpt") 78 | 79 | write.table(isl1_correlation, file = file.path(parameters$general$path_supdata,"Isl1-correlation.txt"), quote = F, row.names = T, col.names = T, sep = "\t") 80 | 81 | # 82 | # Extract highly correlated genes 83 | # 84 | # From cluster correlation 85 | isl1_i <- unique(unlist(apply(isl1_correlation[, -ncol(isl1_correlation)], 2, function(x) which(abs(x) > min_cor_cluster)))) 86 | 87 | # From dpt correlation 88 | isl1_i <- unique(c(isl1_i, which(abs(isl1_correlation$isl1_dpt) > min_cor))) 89 | 90 | isl1_cor_data <- isl1_cor[isl1_i, isl1_i] 91 | colnames(isl1_cor_data) <- fData(c1_subset[isl1_de_genes, ][isl1_i, ])$symbol 92 | rownames(isl1_cor_data) <- fData(c1_subset[isl1_de_genes, ][isl1_i, ])$symbol 93 | 94 | # 95 | # Nkx2-5 Heatmaps 96 | # 97 | 98 | nkx_cor_heatmap <- Heatmap(nkx_cor_data, 99 | col = colorRamp2(seq(from = -1, to = 1, by = 0.25), rev(brewer.pal(9, "RdBu"))), 100 | row_names_gp = gpar(fontsize = 2), 101 | column_names_gp = gpar(fontsize = 2), 102 | show_column_dend = F, 103 | column_title = "Nkx2-5+ gene correlation", 104 | column_title_gp = gpar(fontface = "bold", fontsize = 8), 105 | name = "nkx_cor", 106 | heatmap_legend_param = list("at" = c(-1, 0, 1), "color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Pearson correlation", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm"))) 107 | 108 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_SupplementaryFigure_7a.txt"), 109 | x = nkx_cor_data, 110 | row.names = T, 111 | col.names = T, 112 | sep = "\t", 113 | quote = F) 114 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_SupplementaryFigure_7b.txt"), 115 | x = isl1_cor_data, 116 | row.names = T, 117 | col.names = T, 118 | sep = "\t", 119 | quote = F) 120 | 121 | # 122 | # Smoothed expression heatmap 123 | # 124 | nkx_data <- nkx_data[nkx_i,] 125 | rownames(nkx_data) <- fData(c1_subset[nkx_de_genes, ][nkx_i, ])$symbol 126 | 127 | nkx_pseudotime_order <- order(nkx_dpt) 128 | nkx_data_smoothed <- t(apply(nkx_data[, nkx_pseudotime_order], 1, function(r) rollapply(r, 11, mean, fill="extend"))) 129 | 130 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_SupplementaryFigure_7c.txt"), 131 | x = nkx_data_smoothed, 132 | row.names = T, 133 | col.names = T, 134 | sep = "\t", 135 | quote = F) 136 | 137 | # Annotation of columns 138 | nkx_cell_annotation_data <- data.frame(Timepoint = pData(c1_subset[, nkx_cells])$Timepoint[nkx_pseudotime_order], Cluster = pData(c1_subset[, nkx_cells])$cluster[nkx_pseudotime_order], Pseudotime = nkx_dpt[nkx_pseudotime_order]) 139 | nkx_cell_annotation <- HeatmapAnnotation(nkx_cell_annotation_data, col = list(Timepoint = c("e7.5" = "#36648B", "e8.5" = "#4F94CD", "e9.5" = "#63B8FF"), 140 | Cluster = c("1" = "#00008B", "2" = "#8B0000", "3" = "#CDAD00"), 141 | Pseudotime = colorRamp2(c(min(nkx_dpt), mean(nkx_dpt), max(nkx_dpt)), colors = c("#36648B", "#4F94CD", "#63B8FF"))), 142 | show_legend = F, 143 | show_annotation_name = T, 144 | annotation_name_gp = gpar(fontsize = 4), 145 | annotation_name_offset = unit(0.8, "mm"), 146 | gap = unit(0.1, "mm"), 147 | annotation_height = unit.c(unit(1.5, "mm"), unit(1.5, "mm"), unit(1.5, "mm"))) 148 | 149 | nkx_smooth_heatmap <- Heatmap(nkx_data_smoothed, 150 | col = inferno(99), 151 | cluster_columns = F, 152 | #clustering_distance_rows = 'manhattan', 153 | clustering_method_rows = 'ward.D', 154 | #split = cl$clustering, 155 | row_names_gp = gpar(fontsize = 1.5), 156 | column_names_gp = gpar(fontsize = 1.5), 157 | show_column_names = F, 158 | name = "nkx_smooth", 159 | top_annotation = nkx_cell_annotation, 160 | heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm"))) 161 | 162 | # 163 | # Gene classification into "upregulated", "downregulated" and "other" 164 | # 165 | nkx_segments <- factor(apply(nkx_data_smoothed, 1, function(g) { 166 | smntn <- segmentation(as.numeric(g), method = "PELT", plot = F, verbose = F) 167 | s <- smntn$segment 168 | s$level <- ifelse(s$means < 1, "off", ifelse(s$means < 5, "low", ifelse(s$means < 8, "moderate", ifelse(s$means < 10, "high", "very high")))) 169 | s$level <- factor(s$level, levels = c("off", "low", "moderate", "high", "very high"), ordered = T) 170 | n <- nrow(s) 171 | 172 | direction <- paste(sapply(2:nrow(s), function(i) { 173 | diff <- as.numeric(s$level[i]) - as.numeric(s$level[i-1]) 174 | if(diff >= 1) return("up") 175 | if(diff <= -1) return("down") 176 | }), collapse = "-") 177 | 178 | class = "other" 179 | if(nrow(s) > 1) { 180 | direction <- paste(sapply(2:nrow(s), function(i) { 181 | diff <- as.numeric(s$level[i]) - as.numeric(s$level[i-1]) 182 | if(diff >= 1) return("up") 183 | if(diff <= -1) return("down") 184 | }), collapse = "-") 185 | if (startsWith(direction, "up") & endsWith(direction, "up")) class <- "upregulated" 186 | if (startsWith(direction, "down") & endsWith(direction, "down")) class <- "downregulated" 187 | #if (startsWith(direction, "up") & endsWith(direction, "down")) class <- "intermediate" 188 | #if (startsWith(direction, "up") & endsWith(direction, "up") & s$level[1] > "off") class <- "upregulated - primed" 189 | #if (startsWith(direction, "up") & endsWith(direction, "up") & s$level[1] == "off") class <- "upregulated - de novo" 190 | #if (startsWith(direction, "down") & endsWith(direction, "down")) class <- "downregulated" 191 | } 192 | return(class) 193 | }), levels = c("downregulated", "intermediate", "upregulated", "upregulated - primed", "upregulated - de novo", "other"), ordered = T) 194 | 195 | nkx_smooth_heatmap_class <- Heatmap(nkx_data_smoothed, 196 | col = inferno(99), 197 | cluster_columns = F, 198 | split = nkx_segments, 199 | row_names_gp = gpar(fontsize = 3), 200 | column_names_gp = gpar(fontsize = 1.5), 201 | row_title_gp = gpar(fontsize = 4), 202 | gap = unit(0.5, "mm"), 203 | show_column_names = F, 204 | show_row_dend = F, 205 | name = "nkx_smooth", 206 | top_annotation = nkx_cell_annotation, 207 | heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm"))) 208 | 209 | # 210 | # Isl1 heatmaps 211 | # 212 | 213 | isl1_cor_heatmap <- Heatmap(isl1_cor_data, 214 | col = colorRamp2(seq(from = -1, to = 1, by = 0.25), rev(brewer.pal(9, "RdBu"))), 215 | row_names_gp = gpar(fontsize = 2), 216 | column_names_gp = gpar(fontsize = 2), 217 | show_column_dend = F, 218 | column_title = "Isl1+ gene correlation", 219 | column_title_gp = gpar(fontface = "bold", fontsize = 8), 220 | name = "nkx_cor", 221 | heatmap_legend_param = list("at" = c(-1, 0, 1), "color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Pearson correlation", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm"))) 222 | 223 | # 224 | # Smoothed expression heatmap 225 | # 226 | isl1_data <- isl1_data[isl1_i,] 227 | rownames(isl1_data) <- fData(c1_subset[isl1_de_genes, ][isl1_i, ])$symbol 228 | 229 | isl1_pseudotime_order <- order(isl1_dpt) 230 | isl1_data_smoothed <- t(apply(isl1_data[, isl1_pseudotime_order], 1, function(r) rollapply(r, 11, mean, fill="extend"))) 231 | 232 | isl1_branch1 <- which(isl1_cluster[isl1_pseudotime_order] %in% c(2,3,4,5)) 233 | isl1_branch2 <- which(isl1_cluster[isl1_pseudotime_order] %in% c(1,5)) 234 | 235 | write.table(file = file.path(parameters$general$path_supdata, "Source_Data_SupplementaryFigure_7d.txt"), 236 | x = isl1_data_smoothed, 237 | row.names = T, 238 | col.names = T, 239 | sep = "\t", 240 | quote = F) 241 | 242 | # 243 | # Color mapping function to bring both heatmaps to same scale 244 | # 245 | e_min <- min(isl1_data_smoothed) 246 | e_max <- max(isl1_data_smoothed) 247 | 248 | breaks <- seq(e_min, e_max, length.out = 99) 249 | colors <- inferno(99) 250 | 251 | col_fun <- colorRamp2(breaks, colors) 252 | 253 | 254 | # Annotation of columns 255 | isl1_cell_annotation_data <- data.frame(Timepoint = pData(c1_subset[, isl1_cells])$Timepoint[isl1_pseudotime_order], Cluster = pData(c1_subset[, isl1_cells])$cluster[isl1_pseudotime_order], Pseudotime = isl1_dpt[isl1_pseudotime_order]) 256 | isl1_cell_annotation_data_branch1 <- isl1_cell_annotation_data[isl1_branch1, ] 257 | isl1_cell_annotation_data_branch2 <- isl1_cell_annotation_data[isl1_branch2, ] 258 | 259 | isl1_cell_annotation <- HeatmapAnnotation(isl1_cell_annotation_data, col = list(Timepoint = c("e7.5" = "#008B45", "e8.5" = "#00CD66", "e9.5" = "#00FF7F"), 260 | Cluster = c("1" = "#1B9E77", "2" = "#D95F02", "3" = "#7570B3", "4" = "#E7298A", "5" = "#66A61E"), 261 | Pseudotime = colorRamp2(c(min(isl1_dpt), mean(isl1_dpt), max(isl1_dpt)), colors = c("#008B45", "#00CD66", "#00FF7F"))), 262 | show_legend = F, 263 | show_annotation_name = T, 264 | annotation_name_gp = gpar(fontsize = 4), 265 | annotation_name_offset = unit(0.8, "mm"), 266 | gap = unit(0.1, "mm"), 267 | annotation_height = unit.c(unit(1.5, "mm"), unit(1.5, "mm"), unit(1.5, "mm"))) 268 | 269 | isl1_cell_annotation_branch1 <- HeatmapAnnotation(isl1_cell_annotation_data_branch1, col = list(Timepoint = c("e7.5" = "#008B45", "e8.5" = "#00CD66", "e9.5" = "#00FF7F"), 270 | Cluster = c("1" = "#1B9E77", "2" = "#D95F02", "3" = "#7570B3", "4" = "#E7298A", "5" = "#66A61E"), 271 | Pseudotime = colorRamp2(c(min(isl1_dpt), mean(isl1_dpt), max(isl1_dpt)), colors = c("#008B45", "#00CD66", "#00FF7F"))), 272 | show_legend = F, 273 | show_annotation_name = T, 274 | annotation_name_gp = gpar(fontsize = 4), 275 | annotation_name_offset = unit(0.8, "mm"), 276 | gap = unit(0.1, "mm"), 277 | annotation_height = unit.c(unit(1.5, "mm"), unit(1.5, "mm"), unit(1.5, "mm"))) 278 | 279 | isl1_cell_annotation_branch2 <- HeatmapAnnotation(isl1_cell_annotation_data_branch2, col = list(Timepoint = c("e7.5" = "#008B45", "e8.5" = "#00CD66", "e9.5" = "#00FF7F"), 280 | Cluster = c("1" = "#1B9E77", "2" = "#D95F02", "3" = "#7570B3", "4" = "#E7298A", "5" = "#66A61E"), 281 | Pseudotime = colorRamp2(c(min(isl1_dpt), mean(isl1_dpt), max(isl1_dpt)), colors = c("#008B45", "#00CD66", "#00FF7F"))), 282 | show_legend = F, 283 | show_annotation_name = F, 284 | annotation_name_gp = gpar(fontsize = 4), 285 | annotation_name_offset = unit(0.8, "mm"), 286 | gap = unit(0.1, "mm"), 287 | annotation_height = unit.c(unit(1.5, "mm"), unit(1.5, "mm"), unit(1.5, "mm"))) 288 | 289 | isl1_smooth_heatmap <- Heatmap(isl1_data_smoothed, 290 | col = col_fun, 291 | cluster_columns = F, 292 | row_names_gp = gpar(fontsize = 4), 293 | column_names_gp = gpar(fontsize = 4), 294 | show_column_names = F, 295 | name = "isl1_smooth", 296 | top_annotation = isl1_cell_annotation, 297 | heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm"))) 298 | 299 | isl1_smooth_heatmap_branch1 <- Heatmap(isl1_data_smoothed[, isl1_branch1], 300 | col = col_fun, 301 | cluster_columns = F, 302 | row_names_gp = gpar(fontsize = 1.5), 303 | column_names_gp = gpar(fontsize = 1.5), 304 | show_column_names = F, 305 | name = "isl1_smooth_b1", 306 | top_annotation = isl1_cell_annotation_branch1, 307 | heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm"))) 308 | 309 | isl1_smooth_heatmap_branch2 <- Heatmap(isl1_data_smoothed[, isl1_branch2], 310 | col = col_fun, 311 | cluster_columns = F, 312 | row_names_gp = gpar(fontsize = 1.5), 313 | column_names_gp = gpar(fontsize = 1.5), 314 | show_column_names = F, 315 | show_row_names = F, 316 | name = "isl1_smooth_b2", 317 | top_annotation = isl1_cell_annotation_branch2, 318 | heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm"))) 319 | 320 | # CLassification 321 | isl1_segments <- factor(apply(isl1_data_smoothed[, isl1_branch1], 1, function(g) { 322 | smntn <- segmentation(as.numeric(g), method = "PELT", plot = F, verbose = F) 323 | s <- smntn$segment 324 | s$level <- ifelse(s$means < 1, "off", ifelse(s$means < 5, "low", ifelse(s$means < 8, "moderate", ifelse(s$means < 10, "high", "very high")))) 325 | s$level <- factor(s$level, levels = c("off", "low", "moderate", "high", "very high"), ordered = T) 326 | n <- nrow(s) 327 | 328 | class = "other" 329 | if(nrow(s) > 1) { 330 | direction <- paste(sapply(2:nrow(s), function(i) { 331 | diff <- as.numeric(s$level[i]) - as.numeric(s$level[i-1]) 332 | if(diff >= 1) return("up") 333 | if(diff <= -1) return("down") 334 | }), collapse = "-") 335 | if (startsWith(direction, "up") & endsWith(direction, "up")) class <- "upregulated" 336 | if (startsWith(direction, "down") & endsWith(direction, "down")) class <- "downregulated" 337 | #if (startsWith(direction, "up") & endsWith(direction, "down")) class <- "intermediate" 338 | #if (startsWith(direction, "up") & endsWith(direction, "up") & s$level[1] > "off") class <- "upregulated - primed" 339 | #if (startsWith(direction, "up") & endsWith(direction, "up") & s$level[1] == "off") class <- "upregulated - de novo" 340 | #if (startsWith(direction, "down") & endsWith(direction, "down")) class <- "downregulated" 341 | } 342 | return(class) 343 | }), levels = c("downregulated", "intermediate", "upregulated", "upregulated - primed", "upregulated - de novo", "other"), ordered = T) 344 | 345 | isl1_smooth_heatmap_class_b1 <- Heatmap(isl1_data_smoothed[, isl1_branch1], 346 | col = col_fun, 347 | cluster_columns = F, 348 | split = isl1_segments, 349 | row_names_gp = gpar(fontsize = 3), 350 | column_names_gp = gpar(fontsize = 1.5), 351 | row_title_gp = gpar(fontsize = 4), 352 | gap = unit(0.5, "mm"), 353 | show_column_names = F, 354 | show_row_dend = F, 355 | name = "isl1_smooth_b1", 356 | top_annotation = isl1_cell_annotation_branch1, 357 | heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm"))) 358 | 359 | isl1_smooth_heatmap_class_b2 <- Heatmap(isl1_data_smoothed[, isl1_branch2], 360 | col = col_fun, 361 | cluster_columns = F, 362 | split = isl1_segments, 363 | row_names_gp = gpar(fontsize = 3), 364 | column_names_gp = gpar(fontsize = 1.5), 365 | row_title_gp = gpar(fontsize = 4), 366 | gap = unit(0.5, "mm"), 367 | show_column_names = F, 368 | show_row_names = F, 369 | show_row_dend = F, 370 | name = "isl1_smooth_b2", 371 | top_annotation = isl1_cell_annotation_branch2, 372 | heatmap_legend_param = list("color_bar" = "continuous", "legend_direction" = "horizontal", "title" = "Expression", "title_position" = "leftcenter", "title_gp" = gpar(fontsize = 4), "labels_gp" = gpar(fontsize = 4), "grid_height" = unit(2, "mm"))) 373 | 374 | 375 | # 376 | # Create Figures 377 | # 378 | # Fig2 e,f 379 | fig_2e <- grid.grabExpr(draw(nkx_smooth_heatmap_class, heatmap_legend_side = "bottom")) 380 | fig_2f <- grid.grabExpr(draw(isl1_smooth_heatmap_class_b2 + isl1_smooth_heatmap_class_b1, main_heatmap = "isl1_smooth_b1", heatmap_legend_side = "bottom", gap = unit(1, "mm"))) 381 | 382 | m <- multi_panel_figure(width = 205, height = 230, columns = 7, rows = 7) 383 | 384 | m <- fill_panel(m, fig_2e, label = "E", column = c(1:3), row = c(4,5,6,7)) 385 | m <- fill_panel(m, fig_2f, label = "F", column = c(4:7), row = c(4,5,6,7)) 386 | 387 | ggsave( 388 | plot = m, 389 | filename = file.path(parameters$general$path_rfigures, "Figure_2_ef.pdf"), 390 | width = 205, 391 | height = 230, 392 | units = "mm", 393 | dpi = 600) 394 | 395 | # Figure 2 396 | m <- multi_panel_figure(width = 205, height = 230, columns = 7, rows = 14) 397 | 398 | m <- fill_panel(m, nkx_ic_plot, label = "C", column = 5:7, row = 1:3) 399 | m <- fill_panel(m, isl1_ic_plot, label = "D", column = 5:7, row = 4:6) 400 | m <- fill_panel(m, fig_2e, label = "E", column = 1:3, row = 7:14) 401 | m <- fill_panel(m, fig_2f, label = "F", column = 4:7, row = 7:14) 402 | 403 | ggsave( 404 | plot = m, 405 | filename = file.path(parameters$general$path_rfigures, "Figure_2.pdf"), 406 | width = 205, 407 | height = 230, 408 | units = "mm", 409 | dpi = 600) 410 | 411 | # Supplementary figure for classified heatmaps 412 | sup_figX_a <- grid.grabExpr(draw(nkx_smooth_heatmap, heatmap_legend_side = "bottom")) 413 | sup_figX_b <- grid.grabExpr(draw(isl1_smooth_heatmap_branch2 + isl1_smooth_heatmap_branch1, main_heatmap = "isl1_smooth_b1", heatmap_legend_side = "bottom", gap = unit(1, "mm"), padding = unit(c(1, 1, 1, 1), "mm"))) 414 | 415 | m <- multi_panel_figure(width = 205, height = 230, columns = 7, rows = 3) 416 | 417 | m <- fill_panel(m, sup_figX_a, label = "A", column = c(1:3), row = 2) 418 | m <- fill_panel(m, sup_figX_b, label = "B", column = c(4:7), row = 2) 419 | 420 | ggsave( 421 | plot = m, 422 | filename = file.path(parameters$general$path_rfigures, "Supplementary_Figure_smoothHeatmap.pdf"), 423 | width = 205, 424 | height = 230, 425 | units = "mm", 426 | dpi = 600) 427 | 428 | # Supplementary figure for correlation heatmaps 429 | sup_figX_a <- grid.grabExpr(draw(nkx_cor_heatmap, heatmap_legend_side = "bottom")) 430 | sup_figX_b <- grid.grabExpr(draw(isl1_cor_heatmap, heatmap_legend_side = "bottom")) 431 | 432 | m <- multi_panel_figure(width = 205, height = 230, columns = 3, rows = 2) 433 | m <- fill_panel(m, sup_figX_a, row = 1, column = c(1,2)) 434 | m <- fill_panel(m, sup_figX_b, row = 2, column = c(1,2)) 435 | 436 | ggsave( 437 | plot = m, 438 | filename = file.path(parameters$general$path_rfigures, "Supplementary_Figure_Corheatmaps.pdf"), 439 | width = 205, 440 | height = 230, 441 | units = "mm", 442 | dpi = 600) 443 | --------------------------------------------------------------------------------