├── Data
    ├── fData.csv
    ├── feeders in Stage II.csv
    ├── feeders in Stage III.csv
    └── pData.csv
├── Figure2B
    └── tSNE_Fiure2B.csv
├── Figure3A
    ├── Chemical_reprogramming_trajectory.R
    ├── Chemical_reprogramming_trajectory.csv
    └── Chemical_reprogramming_trajectory.tiff
├── LICENSE
├── README.md
├── pre-processing.R
├── tSNE_StageI.R
├── tSNE_StageII.R
├── tSNE_StageII_plot.R
└── tSNE_StageI_plot.R


/Data/feeders in Stage II.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChengLiLab/CSC2018/345c4c201e9ed401e59054d463c649f3feebcdbf/Data/feeders in Stage II.csv


--------------------------------------------------------------------------------
/Data/feeders in Stage III.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChengLiLab/CSC2018/345c4c201e9ed401e59054d463c649f3feebcdbf/Data/feeders in Stage III.csv


--------------------------------------------------------------------------------
/Figure3A/Chemical_reprogramming_trajectory.R:
--------------------------------------------------------------------------------
 1 | options(stringsAsFactors = FALSE)
 2 | 
 3 | ###
 4 | library(RColorBrewer)
 5 | library(ggplot2)
 6 | 
 7 | ### Read in data
 8 | data <- read.csv("Chemical_reprogramming_trajectory.csv", row.names = 1)
 9 | 
10 | ### Set colors to identify cell types
11 | data$Batch <- factor(data$Batch,
12 |                      levels = c("MEF","SI5D","SI12D","XEN","SII8D","SII12D",
13 |                                 "SIII3D","SIII6D","SIII8D","SIII10D","SIII15D",
14 |                                 "SIII21D"),ordered = TRUE)
15 | myColors <- c("#CC60D8", "#9333B4", "#3701A5", "#120FC0", "#024D63", "#007E00", 
16 |               "#73BB00", "#C3DF23", "#FFF000", "#FED203", "#FF6C01", "#FE0009")
17 | names(myColors) <- c("MEF","SI5D","SI12D","XEN","SII8D","SII12D","SIII3D","SIII6D",
18 |                      "SIII8D","SIII10D","SIII15D","SIII21D")
19 | colScale <- scale_colour_manual(name = c("MEF","SI5D","SI12D","XEN","SII8D","SII12D",
20 |                                          "SIII3D","SIII6D","SIII8D","SIII10D","SIII15D",
21 |                                          "SIII21D"),values = myColors)
22 | 
23 | ### Random the order of cells before plotting
24 | set.seed(1011)
25 | data_plot <- data[sample(rownames(data)),]
26 | 
27 | ### Plot
28 | p <- ggplot() +
29 |   geom_point(data = data_plot, aes(x=data_dim_1, y=data_dim_2, color = Batch), cex = .5) +
30 |   colScale +
31 |   theme_classic() +
32 |   labs(x = "Component 1") +
33 |   labs(y = "Component 2") +
34 |   theme(legend.position="none")
35 | 
36 | ### Output figures
37 | tiff(file=paste0("Chemical_reprogramming_trajectory.tiff"), width = 7, height = 7, units = 'in', 
38 |      res = 300, compression = 'none')
39 | print(p)
40 | dev.off()
41 | 


--------------------------------------------------------------------------------
/Figure3A/Chemical_reprogramming_trajectory.tiff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChengLiLab/CSC2018/345c4c201e9ed401e59054d463c649f3feebcdbf/Figure3A/Chemical_reprogramming_trajectory.tiff


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 ChengLiLab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CSC2018
 2 | Codes for our paper "Single-Cell RNA-Seq Reveals Dynamic Early Embryonic-like Programs during Chemical Reprogramming" published by Cell Stem Cell at year 2018
 3 | 
 4 | # Information for this work
 5 | Paper can be download from https://www.ncbi.nlm.nih.gov/pubmed/29937202 
 6 | 
 7 | # pre-requirement
 8 | Should first download processed data GSE114952_AGG.tar.gz from GEO:GSE114952
 9 | 
10 | # future direction
11 | Consider to make a complete notebook for this work. 
12 | 


--------------------------------------------------------------------------------
/pre-processing.R:
--------------------------------------------------------------------------------
 1 | ################################################
 2 | # Paper can be download from：                 #
 3 | # https://www.ncbi.nlm.nih.gov/pubmed/29937202 #
 4 | # Cheng Li Lab, Peking University              #
 5 | # Qian Zhang(q.zhang.012@gmail.com)            #
 6 | ################################################
 7 | options(stringsAsFactors = F)
 8 | 
 9 | ### load libraries -----
10 | library(monocle)
11 | 
12 | ### load data ----
13 | # Due the limitation of Github, you should first download
14 | # processed data GSE114952_AGG.tar.gz from GEO:GSE114952.
15 | load("Data/matrix.RData")
16 | pd <- read.csv("Data/pData.csv", row.names = 1)
17 | fd <- read.csv("Data/fData.csv", row.names = 1)
18 | 
19 | ### pre-processing ----
20 | AGG <- newCellDataSet(matrix,
21 |                       phenoData = new("AnnotatedDataFrame", pd),
22 |                       featureData = new("AnnotatedDataFrame", fd),
23 |                       lowerDetectionLimit=0.5,
24 |                       expressionFamily=negbinomial.size())
25 | 
26 | # estimate_size_and_dispersion, eval=TRUE
27 | AGG <- estimateSizeFactors(AGG)
28 | AGG <- estimateDispersions(AGG, cores = 3)
29 | AGG <- detectGenes(AGG, min_expr = 0.1)
30 | 
31 | # export raw data
32 | data_raw <- exprs(AGG)
33 | data_raw <- as.matrix(data_raw)
34 | dim(data_raw)
35 | 
36 | # normalize data
37 | data_norm <- sweep(data_raw,2,colSums(data_raw),'/')*median(colSums(data_raw))
38 | 
39 | # Convert to data frame
40 | data_raw <- as.data.frame(data_raw)
41 | data_norm <- as.data.frame(data_norm)
42 | 
43 | ### save results ----
44 | save(AGG, file = "Data/AGG_estimate_size_and_dispersion.Robj")
45 | save(data_raw, data_norm, file = "Data/data_raw_norm.Robj")
46 | sessionInfo()
47 | 


--------------------------------------------------------------------------------
/tSNE_StageI.R:
--------------------------------------------------------------------------------
 1 | ################################################
 2 | # Paper can be download from：                 #
 3 | # https://www.ncbi.nlm.nih.gov/pubmed/29937202 #
 4 | # Cheng Li Lab, Peking University              #
 5 | # Qian Zhang(q.zhang.012@gmail.com)            #
 6 | ################################################
 7 | options(stringsAsFactors = F)
 8 | 
 9 | ### load libraries ----
10 | library(viridis)
11 | library(monocle)
12 | 
13 | ### Load data ----
14 | load("Data/AGG_estimate_size_and_dispersion.Robj")
15 | 
16 | ### Subset SI12D and XEN cells ----
17 | AGG2 <- AGG[, pData(AGG)$Batch %in% c("SI12D","XEN")] 
18 | AGG2 <- detectGenes(AGG2, min_expr = 0.1)
19 | 
20 | ### t-SNE start ----
21 | mean <- 0.5
22 | disp_table <- dispersionTable(AGG2)
23 | ordering_genes <- subset(disp_table,
24 |                          mean_expression >= mean &
25 |                            dispersion_empirical >= 1 * dispersion_fit)$gene_id
26 | 
27 | AGG2 <- setOrderingFilter(AGG2, ordering_genes)
28 | plot_ordering_genes(AGG2)
29 | 
30 | dim <- 7
31 | AGG2 <- AGG2
32 | AGG2 <- reduceDimension(AGG2, max_components = 2, norm_method = 'log',
33 |                             reduction_method = 'tSNE', num_dim = dim, verbose = T) # Change 2 num_dim 2..15
34 | AGG2 <- clusterCells(AGG2, num_clusters = 3, verbose = T)
35 | 
36 | ### save t-SNE dimensions ----
37 | cds <- AGG2
38 | x=1
39 | y=2
40 | color_by="Cluster"
41 | markers=NULL
42 | show_cell_names=FALSE
43 | cell_size=1.5
44 | cell_name_size=2
45 | if (is.null(cds@reducedDimA) | length(pData(cds)$Cluster) == 0){
46 |   stop("Error: Clustering is not performed yet. Please call clusterCells() before calling this function.")
47 | }
48 | 
49 | gene_short_name <- NULL
50 | sample_name <- NULL
51 | data_dim_1 <- NULL
52 | data_dim_2 <- NULL
53 | 
54 | lib_info <- pData(cds)
55 | 
56 | tSNE_dim_coords <- reducedDimA(cds)
57 | data_df <- data.frame(t(tSNE_dim_coords[c(x,y),]))
58 | colnames(data_df) <- c("data_dim_1", "data_dim_2")
59 | data_df$sample_name <- colnames(cds)
60 | data_df <- merge(data_df, lib_info, by.x="sample_name", by.y="row.names")
61 | 
62 | # save dimensions
63 | write.csv(data_df, file = paste0("StageI/AGG_SI12D_XEN_mean",mean,"_dim",dim,".csv"), quote = F)


--------------------------------------------------------------------------------
/tSNE_StageII.R:
--------------------------------------------------------------------------------
 1 | ################################################
 2 | # Paper can be download from：                 #
 3 | # https://www.ncbi.nlm.nih.gov/pubmed/29937202 #
 4 | # Cheng Li Lab, Peking University              #
 5 | # Qian Zhang(q.zhang.012@gmail.com)            #
 6 | ################################################
 7 | options(stringsAsFactors = F)
 8 | 
 9 | ### load libraries ----
10 | library(viridis)
11 | library(monocle)
12 | 
13 | ### Load data ----
14 | load("Data/AGG_estimate_size_and_dispersion.Robj")
15 | 
16 | # Subset Stage II cells
17 | AGG3 <- AGG[, pData(AGG)$Batch %in% c("XEN","SII8D","SII12D")]
18 | AGG3 <- detectGenes(AGG3, min_expr = 0.1)
19 | 
20 | # Delete Feeders
21 | feeder <- read.csv("Data/feeders in Stage II.csv", stringsAsFactors=FALSE,fileEncoding="utf-16", sep = "\t")
22 | l <- as.character(pData(AGG3)$Barcode) %in% feeder$Barcode
23 | AGG3 <- AGG3[,!l ]
24 | 
25 | ### t-SNE start ----
26 | mean <- 0.4
27 | disp_table <- dispersionTable(AGG3)
28 | ordering_genes <- subset(disp_table,
29 |                          mean_expression >= mean &
30 |                            dispersion_empirical >= 1 * dispersion_fit)$gene_id
31 | n <- length(ordering_genes)
32 | AGG3 <- setOrderingFilter(AGG3, ordering_genes)
33 | plot_ordering_genes(AGG3)
34 | 
35 | dim <- 10
36 | AGG3 <- AGG3
37 | AGG3 <- reduceDimension(AGG3, max_components = 2, norm_method = 'log',
38 |                             reduction_method = 't-SNE', num_dim = dim, verbose = T) # Change 2 num_dim 2..15
39 | AGG3 <- clusterCells(AGG3, num_clusters = 5, verbose = T)
40 | 
41 | ### save t-SNE dimensions ----
42 | cds <- AGG3
43 | x=1
44 | y=2
45 | color_by="Cluster"
46 | markers=NULL
47 | show_cell_names=FALSE
48 | cell_size=1.5
49 | cell_name_size=2
50 | if (is.null(cds@reducedDimA) | length(pData(cds)$Cluster) == 0){
51 |   stop("Error: Clustering is not performed yet. Please call clusterCells() before calling this function.")
52 | }
53 | 
54 | gene_short_name <- NULL
55 | sample_name <- NULL
56 | data_dim_1 <- NULL
57 | data_dim_2 <- NULL
58 | 
59 | lib_info <- pData(cds)
60 | 
61 | t-SNE_dim_coords <- reducedDimA(cds)
62 | data_df <- data.frame(t(t-SNE_dim_coords[c(x,y),]))
63 | colnames(data_df) <- c("data_dim_1", "data_dim_2")
64 | data_df$sample_name <- colnames(cds)
65 | data_df <- merge(data_df, lib_info, by.x="sample_name", by.y="row.names")
66 | 
67 | # save dimensions
68 | write.csv(data_df, file = paste0("StageII/AGG_SII_mean",mean,"_dim",dim,".csv"), quote = F)
69 | 


--------------------------------------------------------------------------------
/tSNE_StageII_plot.R:
--------------------------------------------------------------------------------
 1 | ################################################
 2 | # Paper can be download from：                 #
 3 | # https://www.ncbi.nlm.nih.gov/pubmed/29937202 #
 4 | # Cheng Li Lab, Peking University              #
 5 | # Qian Zhang(q.zhang.012@gmail.com)            #
 6 | ################################################
 7 | options(stringsAsFactors = F)
 8 | 
 9 | ### load libraries ----
10 | library(ggplot2)
11 | library(RColorBrewer)
12 | 
13 | ### load data ----
14 | load("Data/data_raw_norm.Robj")
15 | gene_id <- read.csv("Data/fData.csv", stringsAsFactors = FALSE,row.names = 1)
16 | SII_tsne <- read.csv("StageII/AGG_SII_mean0.4_dim10.csv", stringsAsFactors = F, row.names = 1)
17 | set.seed(1011)
18 | 
19 | ### set colors for time points ----
20 | SII_tsne$Batch <- factor(SII_tsne$Batch,
21 |                          levels = c("XEN","SII8D","SII12D"),ordered = TRUE)
22 | myColors <- c("#120FC0", "#024D63", "#007E00")
23 | names(myColors) <- c("XEN","SII8D","SII12D")
24 | colScale <- scale_colour_manual(name = c("XEN","SII8D","SII12D"),values = myColors)
25 | 
26 | ### plot t-SNE with time point information ----
27 | p <- ggplot() +
28 |   geom_point(data = SII_tsne, aes(x=data_dim_1, y=data_dim_2, color = Batch)) +
29 |   colScale +
30 |   theme_classic() +
31 |   # ggtitle("SII") +
32 |   labs(x = " ") + 
33 |   labs(y = " ") + 
34 |   # theme(plot.title = element_text(size = 30, face = "bold", color = "steelblue")) +
35 |   theme(legend.position="none", axis.text=element_blank(), axis.ticks = element_blank(), axis.line = element_blank())
36 | 
37 | # save figure
38 | tiff(file=paste0("StageII/tSNE_SII_Batch.tiff"), width = 7, height = 7, units = 'in', res = 300, compression = "none")
39 | print(p)
40 | dev.off()
41 | 
42 | ### plot t-SNE with genes expression information ----
43 | data <- SII_tsne[,c(2,3,4,6,10)]
44 | 
45 | # subset data
46 | barcode <- as.character(data$Barcode)
47 | data_sub <- data_raw[,barcode]
48 | summary(colnames(data_sub) %in% data$Barcode)
49 | 
50 | ### plot t-SNE with gene expression information ----
51 | genes <- c("Sox17", "Sall4", "Oct4", "Zscan4c")
52 | n <- length(barcode)
53 | for (gene in genes) {
54 |   id <- gene_id[gene_id$gene_short_name %in% gene,]
55 |   id <- as.vector(as.matrix(id))
56 |   id <- id[1]
57 |   l <- apply(data_sub[id,] - .1,2,sum) + .1
58 |   f <- l == 0
59 |   l <- log2(l)
60 |   l[f] <- NA
61 |   mi <- min(l,na.rm=TRUE)
62 |   ma <- max(l,na.rm=TRUE)
63 |   data.tsne <- as.data.frame(data.tsne)
64 |   data.tsne$UMI <- l
65 |   l <- is.na(data.tsne$UMI)
66 | 
67 |   data_grey <- data.tsne[l,]
68 |   data_col <- data.tsne[!l,]
69 |   data_col <- data_col[order(data_col$UMI),]
70 |   data_col <- sample(data_col)
71 |   
72 | # set colors
73 |   myPalette <- colorRampPalette(c(rev(brewer.pal(n = 9,name = "Greys")[1:2]), brewer.pal(n = 9,name = "OrRd"), brewer.pal(n = 9,name = "OrRd")[9]))
74 |   sc <- scale_colour_gradientn(colours = myPalette(256), limits=c(mi, ma))
75 |   p <- ggplot() +
76 |     geom_point(data = data_grey, aes(x=data_dim_1, y=data_dim_2), color = "#F0F0F0", cex = 2.5) +
77 |     geom_point(data = data_col, aes(x=data_dim_1, y=data_dim_2, color = UMI), cex = 2.5) +
78 |     sc +
79 |     theme_void() +
80 |     theme(legend.position=("none"))
81 |   
82 | # save figures for each gene  
83 |   tiff(file=paste0("StageII/SII_tSNE_",gene,"_mean",0.4,"_dim",10,".tiff"), width = 7, height = 7, units = 'in', res = 300, compression = "none")
84 |   print(p)
85 |   dev.off()
86 | }
87 | 


--------------------------------------------------------------------------------
/tSNE_StageI_plot.R:
--------------------------------------------------------------------------------
 1 | ################################################
 2 | # Paper can be download from：                 #
 3 | # https://www.ncbi.nlm.nih.gov/pubmed/29937202 #
 4 | # Cheng Li Lab, Peking University              #
 5 | # Qian Zhang(q.zhang.012@gmail.com)            #
 6 | ################################################
 7 | options(stringsAsFactors = F)
 8 | 
 9 | ### load libraries ----
10 | library(ggplot2)
11 | library(RColorBrewer)
12 | 
13 | ### load data ----
14 | load("Data/data_raw_norm.Robj")
15 | gene_id <- read.csv("Data/fData.csv", stringsAsFactors = FALSE,row.names = 1)
16 | SI12D_XEN_tsne <- read.csv("StageI/AGG_SI12D_XEN_mean0.5_dim7.csv", stringsAsFactors = F, row.names = 1)
17 | set.seed(1011)
18 | 
19 | ### set colors for time points ----
20 | SI12D_XEN_tsne$Batch <- factor(SI12D_XEN_tsne$Batch,
21 |                                levels = c("XEN","SI12D"),ordered = TRUE)
22 | myColors <- c("#F5A623","#0365BF")
23 | names(myColors) <- c("XEN","SI12D")
24 | colScale <- scale_colour_manual(name = c("XEN","SI12D"),values = myColors)
25 | 
26 | ### plot t-SNE with time point information ----
27 | p <- ggplot() +
28 |   geom_point(data = SI12D_XEN_tsne, aes(x=data_dim_1, y=data_dim_2, color = Batch)) +
29 |   colScale +
30 |   theme_classic() +
31 |   # ggtitle("SI12D_XEN") +
32 |   labs(x = " ") + 
33 |   labs(y = " ") + 
34 |   # theme(plot.title = element_text(size = 30, face = "bold", color = "steelblue")) +
35 |   theme(legend.position="none", axis.text=element_blank(), axis.ticks = element_blank(), axis.line = element_blank())
36 | 
37 | # save figure
38 | tiff(file=paste0("StageI/tSNE_SI12D_XEN_Batch.tiff"), width = 7, height = 7, units = 'in', res = 300, compression = "none")
39 | print(p)
40 | dev.off()
41 | 
42 | ### plot t-SNE with genes expression information ----
43 | data <- SI12D_XEN_tsne[,c(2,3,4,6,10)]
44 | 
45 | # subset data
46 | barcode <- as.character(data$Barcode)
47 | data_sub <- data_raw[,barcode]
48 | summary(colnames(data_sub) %in% data$Barcode)
49 | 
50 | ### plot t-SNE with gene expression information ----
51 | genes <- c("Prrx1", "Sox17", "Sall4")
52 | n <- length(barcode)
53 | for (gene in genes) {
54 |   id <- gene_id[gene_id$gene_short_name %in% gene,]
55 |   id <- as.vector(as.matrix(id))
56 |   id <- id[1]
57 |   l <- apply(data_sub[id,] - .1,2,sum) + .1
58 |   f <- l == 0
59 |   l <- log2(l)
60 |   l[f] <- NA
61 |   mi <- min(l,na.rm=TRUE)
62 |   ma <- max(l,na.rm=TRUE)
63 |   data.tsne <- as.data.frame(data.tsne)
64 |   data.tsne$UMI <- l
65 |   l <- is.na(data.tsne$UMI)
66 |   
67 |   data_grey <- data.tsne[l,]
68 |   data_col <- data.tsne[!l,]
69 |   data_col <- data_col[order(data_col$UMI),]
70 |   data_col <- sample(data_col)
71 |   
72 |   # set colors
73 |   myPalette <- colorRampPalette(c(rev(brewer.pal(n = 9,name = "Greys")[1:2]), brewer.pal(n = 9,name = "OrRd"), brewer.pal(n = 9,name = "OrRd")[9]))
74 |   sc <- scale_colour_gradientn(colours = myPalette(256), limits=c(mi, ma))
75 |   p <- ggplot() +
76 |     geom_point(data = data_grey, aes(x=data_dim_1, y=data_dim_2), color = "#F0F0F0", cex = 2.5) +
77 |     geom_point(data = data_col, aes(x=data_dim_1, y=data_dim_2, color = UMI), cex = 2.5) +
78 |     sc +
79 |     theme_void() +
80 |     theme(legend.position=("none"))
81 |   
82 |   # save figures for each gene  
83 |   tiff(file=paste0("StageI/SI12D_XEN_tSNE_",gene,"_mean",0.5,"_dim",7,".tiff"), width = 7, height = 7, units = 'in', res = 300, compression = "none")
84 |   print(p)
85 |   dev.off()
86 | }


--------------------------------------------------------------------------------