├── Data ├── fData.csv ├── feeders in Stage II.csv ├── feeders in Stage III.csv └── pData.csv ├── Figure2B └── tSNE_Fiure2B.csv ├── Figure3A ├── Chemical_reprogramming_trajectory.R ├── Chemical_reprogramming_trajectory.csv └── Chemical_reprogramming_trajectory.tiff ├── LICENSE ├── README.md ├── pre-processing.R ├── tSNE_StageI.R ├── tSNE_StageII.R ├── tSNE_StageII_plot.R └── tSNE_StageI_plot.R /Data/feeders in Stage II.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChengLiLab/CSC2018/345c4c201e9ed401e59054d463c649f3feebcdbf/Data/feeders in Stage II.csv -------------------------------------------------------------------------------- /Data/feeders in Stage III.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChengLiLab/CSC2018/345c4c201e9ed401e59054d463c649f3feebcdbf/Data/feeders in Stage III.csv -------------------------------------------------------------------------------- /Figure3A/Chemical_reprogramming_trajectory.R: -------------------------------------------------------------------------------- 1 | options(stringsAsFactors = FALSE) 2 | 3 | ### 4 | library(RColorBrewer) 5 | library(ggplot2) 6 | 7 | ### Read in data 8 | data <- read.csv("Chemical_reprogramming_trajectory.csv", row.names = 1) 9 | 10 | ### Set colors to identify cell types 11 | data$Batch <- factor(data$Batch, 12 | levels = c("MEF","SI5D","SI12D","XEN","SII8D","SII12D", 13 | "SIII3D","SIII6D","SIII8D","SIII10D","SIII15D", 14 | "SIII21D"),ordered = TRUE) 15 | myColors <- c("#CC60D8", "#9333B4", "#3701A5", "#120FC0", "#024D63", "#007E00", 16 | "#73BB00", "#C3DF23", "#FFF000", "#FED203", "#FF6C01", "#FE0009") 17 | names(myColors) <- c("MEF","SI5D","SI12D","XEN","SII8D","SII12D","SIII3D","SIII6D", 18 | "SIII8D","SIII10D","SIII15D","SIII21D") 19 | colScale <- scale_colour_manual(name = c("MEF","SI5D","SI12D","XEN","SII8D","SII12D", 20 | "SIII3D","SIII6D","SIII8D","SIII10D","SIII15D", 21 | "SIII21D"),values = myColors) 22 | 23 | ### Random the order of cells before plotting 24 | set.seed(1011) 25 | data_plot <- data[sample(rownames(data)),] 26 | 27 | ### Plot 28 | p <- ggplot() + 29 | geom_point(data = data_plot, aes(x=data_dim_1, y=data_dim_2, color = Batch), cex = .5) + 30 | colScale + 31 | theme_classic() + 32 | labs(x = "Component 1") + 33 | labs(y = "Component 2") + 34 | theme(legend.position="none") 35 | 36 | ### Output figures 37 | tiff(file=paste0("Chemical_reprogramming_trajectory.tiff"), width = 7, height = 7, units = 'in', 38 | res = 300, compression = 'none') 39 | print(p) 40 | dev.off() 41 | -------------------------------------------------------------------------------- /Figure3A/Chemical_reprogramming_trajectory.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChengLiLab/CSC2018/345c4c201e9ed401e59054d463c649f3feebcdbf/Figure3A/Chemical_reprogramming_trajectory.tiff -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 ChengLiLab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CSC2018 2 | Codes for our paper "Single-Cell RNA-Seq Reveals Dynamic Early Embryonic-like Programs during Chemical Reprogramming" published by Cell Stem Cell at year 2018 3 | 4 | # Information for this work 5 | Paper can be download from https://www.ncbi.nlm.nih.gov/pubmed/29937202 6 | 7 | # pre-requirement 8 | Should first download processed data GSE114952_AGG.tar.gz from GEO:GSE114952 9 | 10 | # future direction 11 | Consider to make a complete notebook for this work. 12 | -------------------------------------------------------------------------------- /pre-processing.R: -------------------------------------------------------------------------------- 1 | ################################################ 2 | # Paper can be download from: # 3 | # https://www.ncbi.nlm.nih.gov/pubmed/29937202 # 4 | # Cheng Li Lab, Peking University # 5 | # Qian Zhang(q.zhang.012@gmail.com) # 6 | ################################################ 7 | options(stringsAsFactors = F) 8 | 9 | ### load libraries ----- 10 | library(monocle) 11 | 12 | ### load data ---- 13 | # Due the limitation of Github, you should first download 14 | # processed data GSE114952_AGG.tar.gz from GEO:GSE114952. 15 | load("Data/matrix.RData") 16 | pd <- read.csv("Data/pData.csv", row.names = 1) 17 | fd <- read.csv("Data/fData.csv", row.names = 1) 18 | 19 | ### pre-processing ---- 20 | AGG <- newCellDataSet(matrix, 21 | phenoData = new("AnnotatedDataFrame", pd), 22 | featureData = new("AnnotatedDataFrame", fd), 23 | lowerDetectionLimit=0.5, 24 | expressionFamily=negbinomial.size()) 25 | 26 | # estimate_size_and_dispersion, eval=TRUE 27 | AGG <- estimateSizeFactors(AGG) 28 | AGG <- estimateDispersions(AGG, cores = 3) 29 | AGG <- detectGenes(AGG, min_expr = 0.1) 30 | 31 | # export raw data 32 | data_raw <- exprs(AGG) 33 | data_raw <- as.matrix(data_raw) 34 | dim(data_raw) 35 | 36 | # normalize data 37 | data_norm <- sweep(data_raw,2,colSums(data_raw),'/')*median(colSums(data_raw)) 38 | 39 | # Convert to data frame 40 | data_raw <- as.data.frame(data_raw) 41 | data_norm <- as.data.frame(data_norm) 42 | 43 | ### save results ---- 44 | save(AGG, file = "Data/AGG_estimate_size_and_dispersion.Robj") 45 | save(data_raw, data_norm, file = "Data/data_raw_norm.Robj") 46 | sessionInfo() 47 | -------------------------------------------------------------------------------- /tSNE_StageI.R: -------------------------------------------------------------------------------- 1 | ################################################ 2 | # Paper can be download from: # 3 | # https://www.ncbi.nlm.nih.gov/pubmed/29937202 # 4 | # Cheng Li Lab, Peking University # 5 | # Qian Zhang(q.zhang.012@gmail.com) # 6 | ################################################ 7 | options(stringsAsFactors = F) 8 | 9 | ### load libraries ---- 10 | library(viridis) 11 | library(monocle) 12 | 13 | ### Load data ---- 14 | load("Data/AGG_estimate_size_and_dispersion.Robj") 15 | 16 | ### Subset SI12D and XEN cells ---- 17 | AGG2 <- AGG[, pData(AGG)$Batch %in% c("SI12D","XEN")] 18 | AGG2 <- detectGenes(AGG2, min_expr = 0.1) 19 | 20 | ### t-SNE start ---- 21 | mean <- 0.5 22 | disp_table <- dispersionTable(AGG2) 23 | ordering_genes <- subset(disp_table, 24 | mean_expression >= mean & 25 | dispersion_empirical >= 1 * dispersion_fit)$gene_id 26 | 27 | AGG2 <- setOrderingFilter(AGG2, ordering_genes) 28 | plot_ordering_genes(AGG2) 29 | 30 | dim <- 7 31 | AGG2 <- AGG2 32 | AGG2 <- reduceDimension(AGG2, max_components = 2, norm_method = 'log', 33 | reduction_method = 'tSNE', num_dim = dim, verbose = T) # Change 2 num_dim 2..15 34 | AGG2 <- clusterCells(AGG2, num_clusters = 3, verbose = T) 35 | 36 | ### save t-SNE dimensions ---- 37 | cds <- AGG2 38 | x=1 39 | y=2 40 | color_by="Cluster" 41 | markers=NULL 42 | show_cell_names=FALSE 43 | cell_size=1.5 44 | cell_name_size=2 45 | if (is.null(cds@reducedDimA) | length(pData(cds)$Cluster) == 0){ 46 | stop("Error: Clustering is not performed yet. Please call clusterCells() before calling this function.") 47 | } 48 | 49 | gene_short_name <- NULL 50 | sample_name <- NULL 51 | data_dim_1 <- NULL 52 | data_dim_2 <- NULL 53 | 54 | lib_info <- pData(cds) 55 | 56 | tSNE_dim_coords <- reducedDimA(cds) 57 | data_df <- data.frame(t(tSNE_dim_coords[c(x,y),])) 58 | colnames(data_df) <- c("data_dim_1", "data_dim_2") 59 | data_df$sample_name <- colnames(cds) 60 | data_df <- merge(data_df, lib_info, by.x="sample_name", by.y="row.names") 61 | 62 | # save dimensions 63 | write.csv(data_df, file = paste0("StageI/AGG_SI12D_XEN_mean",mean,"_dim",dim,".csv"), quote = F) -------------------------------------------------------------------------------- /tSNE_StageII.R: -------------------------------------------------------------------------------- 1 | ################################################ 2 | # Paper can be download from: # 3 | # https://www.ncbi.nlm.nih.gov/pubmed/29937202 # 4 | # Cheng Li Lab, Peking University # 5 | # Qian Zhang(q.zhang.012@gmail.com) # 6 | ################################################ 7 | options(stringsAsFactors = F) 8 | 9 | ### load libraries ---- 10 | library(viridis) 11 | library(monocle) 12 | 13 | ### Load data ---- 14 | load("Data/AGG_estimate_size_and_dispersion.Robj") 15 | 16 | # Subset Stage II cells 17 | AGG3 <- AGG[, pData(AGG)$Batch %in% c("XEN","SII8D","SII12D")] 18 | AGG3 <- detectGenes(AGG3, min_expr = 0.1) 19 | 20 | # Delete Feeders 21 | feeder <- read.csv("Data/feeders in Stage II.csv", stringsAsFactors=FALSE,fileEncoding="utf-16", sep = "\t") 22 | l <- as.character(pData(AGG3)$Barcode) %in% feeder$Barcode 23 | AGG3 <- AGG3[,!l ] 24 | 25 | ### t-SNE start ---- 26 | mean <- 0.4 27 | disp_table <- dispersionTable(AGG3) 28 | ordering_genes <- subset(disp_table, 29 | mean_expression >= mean & 30 | dispersion_empirical >= 1 * dispersion_fit)$gene_id 31 | n <- length(ordering_genes) 32 | AGG3 <- setOrderingFilter(AGG3, ordering_genes) 33 | plot_ordering_genes(AGG3) 34 | 35 | dim <- 10 36 | AGG3 <- AGG3 37 | AGG3 <- reduceDimension(AGG3, max_components = 2, norm_method = 'log', 38 | reduction_method = 't-SNE', num_dim = dim, verbose = T) # Change 2 num_dim 2..15 39 | AGG3 <- clusterCells(AGG3, num_clusters = 5, verbose = T) 40 | 41 | ### save t-SNE dimensions ---- 42 | cds <- AGG3 43 | x=1 44 | y=2 45 | color_by="Cluster" 46 | markers=NULL 47 | show_cell_names=FALSE 48 | cell_size=1.5 49 | cell_name_size=2 50 | if (is.null(cds@reducedDimA) | length(pData(cds)$Cluster) == 0){ 51 | stop("Error: Clustering is not performed yet. Please call clusterCells() before calling this function.") 52 | } 53 | 54 | gene_short_name <- NULL 55 | sample_name <- NULL 56 | data_dim_1 <- NULL 57 | data_dim_2 <- NULL 58 | 59 | lib_info <- pData(cds) 60 | 61 | t-SNE_dim_coords <- reducedDimA(cds) 62 | data_df <- data.frame(t(t-SNE_dim_coords[c(x,y),])) 63 | colnames(data_df) <- c("data_dim_1", "data_dim_2") 64 | data_df$sample_name <- colnames(cds) 65 | data_df <- merge(data_df, lib_info, by.x="sample_name", by.y="row.names") 66 | 67 | # save dimensions 68 | write.csv(data_df, file = paste0("StageII/AGG_SII_mean",mean,"_dim",dim,".csv"), quote = F) 69 | -------------------------------------------------------------------------------- /tSNE_StageII_plot.R: -------------------------------------------------------------------------------- 1 | ################################################ 2 | # Paper can be download from: # 3 | # https://www.ncbi.nlm.nih.gov/pubmed/29937202 # 4 | # Cheng Li Lab, Peking University # 5 | # Qian Zhang(q.zhang.012@gmail.com) # 6 | ################################################ 7 | options(stringsAsFactors = F) 8 | 9 | ### load libraries ---- 10 | library(ggplot2) 11 | library(RColorBrewer) 12 | 13 | ### load data ---- 14 | load("Data/data_raw_norm.Robj") 15 | gene_id <- read.csv("Data/fData.csv", stringsAsFactors = FALSE,row.names = 1) 16 | SII_tsne <- read.csv("StageII/AGG_SII_mean0.4_dim10.csv", stringsAsFactors = F, row.names = 1) 17 | set.seed(1011) 18 | 19 | ### set colors for time points ---- 20 | SII_tsne$Batch <- factor(SII_tsne$Batch, 21 | levels = c("XEN","SII8D","SII12D"),ordered = TRUE) 22 | myColors <- c("#120FC0", "#024D63", "#007E00") 23 | names(myColors) <- c("XEN","SII8D","SII12D") 24 | colScale <- scale_colour_manual(name = c("XEN","SII8D","SII12D"),values = myColors) 25 | 26 | ### plot t-SNE with time point information ---- 27 | p <- ggplot() + 28 | geom_point(data = SII_tsne, aes(x=data_dim_1, y=data_dim_2, color = Batch)) + 29 | colScale + 30 | theme_classic() + 31 | # ggtitle("SII") + 32 | labs(x = " ") + 33 | labs(y = " ") + 34 | # theme(plot.title = element_text(size = 30, face = "bold", color = "steelblue")) + 35 | theme(legend.position="none", axis.text=element_blank(), axis.ticks = element_blank(), axis.line = element_blank()) 36 | 37 | # save figure 38 | tiff(file=paste0("StageII/tSNE_SII_Batch.tiff"), width = 7, height = 7, units = 'in', res = 300, compression = "none") 39 | print(p) 40 | dev.off() 41 | 42 | ### plot t-SNE with genes expression information ---- 43 | data <- SII_tsne[,c(2,3,4,6,10)] 44 | 45 | # subset data 46 | barcode <- as.character(data$Barcode) 47 | data_sub <- data_raw[,barcode] 48 | summary(colnames(data_sub) %in% data$Barcode) 49 | 50 | ### plot t-SNE with gene expression information ---- 51 | genes <- c("Sox17", "Sall4", "Oct4", "Zscan4c") 52 | n <- length(barcode) 53 | for (gene in genes) { 54 | id <- gene_id[gene_id$gene_short_name %in% gene,] 55 | id <- as.vector(as.matrix(id)) 56 | id <- id[1] 57 | l <- apply(data_sub[id,] - .1,2,sum) + .1 58 | f <- l == 0 59 | l <- log2(l) 60 | l[f] <- NA 61 | mi <- min(l,na.rm=TRUE) 62 | ma <- max(l,na.rm=TRUE) 63 | data.tsne <- as.data.frame(data.tsne) 64 | data.tsne$UMI <- l 65 | l <- is.na(data.tsne$UMI) 66 | 67 | data_grey <- data.tsne[l,] 68 | data_col <- data.tsne[!l,] 69 | data_col <- data_col[order(data_col$UMI),] 70 | data_col <- sample(data_col) 71 | 72 | # set colors 73 | myPalette <- colorRampPalette(c(rev(brewer.pal(n = 9,name = "Greys")[1:2]), brewer.pal(n = 9,name = "OrRd"), brewer.pal(n = 9,name = "OrRd")[9])) 74 | sc <- scale_colour_gradientn(colours = myPalette(256), limits=c(mi, ma)) 75 | p <- ggplot() + 76 | geom_point(data = data_grey, aes(x=data_dim_1, y=data_dim_2), color = "#F0F0F0", cex = 2.5) + 77 | geom_point(data = data_col, aes(x=data_dim_1, y=data_dim_2, color = UMI), cex = 2.5) + 78 | sc + 79 | theme_void() + 80 | theme(legend.position=("none")) 81 | 82 | # save figures for each gene 83 | tiff(file=paste0("StageII/SII_tSNE_",gene,"_mean",0.4,"_dim",10,".tiff"), width = 7, height = 7, units = 'in', res = 300, compression = "none") 84 | print(p) 85 | dev.off() 86 | } 87 | -------------------------------------------------------------------------------- /tSNE_StageI_plot.R: -------------------------------------------------------------------------------- 1 | ################################################ 2 | # Paper can be download from: # 3 | # https://www.ncbi.nlm.nih.gov/pubmed/29937202 # 4 | # Cheng Li Lab, Peking University # 5 | # Qian Zhang(q.zhang.012@gmail.com) # 6 | ################################################ 7 | options(stringsAsFactors = F) 8 | 9 | ### load libraries ---- 10 | library(ggplot2) 11 | library(RColorBrewer) 12 | 13 | ### load data ---- 14 | load("Data/data_raw_norm.Robj") 15 | gene_id <- read.csv("Data/fData.csv", stringsAsFactors = FALSE,row.names = 1) 16 | SI12D_XEN_tsne <- read.csv("StageI/AGG_SI12D_XEN_mean0.5_dim7.csv", stringsAsFactors = F, row.names = 1) 17 | set.seed(1011) 18 | 19 | ### set colors for time points ---- 20 | SI12D_XEN_tsne$Batch <- factor(SI12D_XEN_tsne$Batch, 21 | levels = c("XEN","SI12D"),ordered = TRUE) 22 | myColors <- c("#F5A623","#0365BF") 23 | names(myColors) <- c("XEN","SI12D") 24 | colScale <- scale_colour_manual(name = c("XEN","SI12D"),values = myColors) 25 | 26 | ### plot t-SNE with time point information ---- 27 | p <- ggplot() + 28 | geom_point(data = SI12D_XEN_tsne, aes(x=data_dim_1, y=data_dim_2, color = Batch)) + 29 | colScale + 30 | theme_classic() + 31 | # ggtitle("SI12D_XEN") + 32 | labs(x = " ") + 33 | labs(y = " ") + 34 | # theme(plot.title = element_text(size = 30, face = "bold", color = "steelblue")) + 35 | theme(legend.position="none", axis.text=element_blank(), axis.ticks = element_blank(), axis.line = element_blank()) 36 | 37 | # save figure 38 | tiff(file=paste0("StageI/tSNE_SI12D_XEN_Batch.tiff"), width = 7, height = 7, units = 'in', res = 300, compression = "none") 39 | print(p) 40 | dev.off() 41 | 42 | ### plot t-SNE with genes expression information ---- 43 | data <- SI12D_XEN_tsne[,c(2,3,4,6,10)] 44 | 45 | # subset data 46 | barcode <- as.character(data$Barcode) 47 | data_sub <- data_raw[,barcode] 48 | summary(colnames(data_sub) %in% data$Barcode) 49 | 50 | ### plot t-SNE with gene expression information ---- 51 | genes <- c("Prrx1", "Sox17", "Sall4") 52 | n <- length(barcode) 53 | for (gene in genes) { 54 | id <- gene_id[gene_id$gene_short_name %in% gene,] 55 | id <- as.vector(as.matrix(id)) 56 | id <- id[1] 57 | l <- apply(data_sub[id,] - .1,2,sum) + .1 58 | f <- l == 0 59 | l <- log2(l) 60 | l[f] <- NA 61 | mi <- min(l,na.rm=TRUE) 62 | ma <- max(l,na.rm=TRUE) 63 | data.tsne <- as.data.frame(data.tsne) 64 | data.tsne$UMI <- l 65 | l <- is.na(data.tsne$UMI) 66 | 67 | data_grey <- data.tsne[l,] 68 | data_col <- data.tsne[!l,] 69 | data_col <- data_col[order(data_col$UMI),] 70 | data_col <- sample(data_col) 71 | 72 | # set colors 73 | myPalette <- colorRampPalette(c(rev(brewer.pal(n = 9,name = "Greys")[1:2]), brewer.pal(n = 9,name = "OrRd"), brewer.pal(n = 9,name = "OrRd")[9])) 74 | sc <- scale_colour_gradientn(colours = myPalette(256), limits=c(mi, ma)) 75 | p <- ggplot() + 76 | geom_point(data = data_grey, aes(x=data_dim_1, y=data_dim_2), color = "#F0F0F0", cex = 2.5) + 77 | geom_point(data = data_col, aes(x=data_dim_1, y=data_dim_2, color = UMI), cex = 2.5) + 78 | sc + 79 | theme_void() + 80 | theme(legend.position=("none")) 81 | 82 | # save figures for each gene 83 | tiff(file=paste0("StageI/SI12D_XEN_tSNE_",gene,"_mean",0.5,"_dim",7,".tiff"), width = 7, height = 7, units = 'in', res = 300, compression = "none") 84 | print(p) 85 | dev.off() 86 | } --------------------------------------------------------------------------------