├── CNAME ├── DESCRIPTION ├── GeneSwitches.Rproj ├── NAMESPACE ├── R ├── binarization.R ├── compare_trajectories.R ├── convert_TI.R ├── data.R ├── find_switch_pathway.R ├── fit_logistic_regression.R ├── get_example_inputData.R ├── plot_gene_exp.R └── plot_timeline.R ├── README.Rmd ├── README.md ├── _config.yml ├── data-raw └── DATASET.R ├── data ├── gs_genelists.rda └── msigdb_h_c2_c5.rda └── man ├── binarize_exp.Rd ├── common_genes.Rd ├── common_genes_plot.Rd ├── convert_monocle2.Rd ├── convert_slingshot.Rd ├── distinct_genes.Rd ├── downsample_zeros.Rd ├── figures ├── README-binarization cutoff-1.png ├── README-binarization threshold-1.png ├── README-binarization_threshold-1.png ├── README-convert slingshot-1.png ├── README-distinct genes-1.png ├── README-pathways ridge plots-1.png ├── README-pathways-1.png ├── README-pathways_ridge_plots-1.png ├── README-plot common genes-1.png ├── README-plot distinct genes-1.png ├── README-plot exp1-1.png ├── README-plot exp1-2.png ├── README-plot exp2-1.png ├── README-plot pathway genes-1.png ├── README-plot pathways-1.png ├── README-plot scale timeline-1.png ├── README-plot_exp1-1.png ├── README-plot_exp1-2.png ├── README-plot_monocle_trajectory-1.png ├── README-plotexp-1.png ├── README-scale timeline-1.png ├── README-timeline example-1.png ├── README-timeline example-2.png └── README-unnamed-chunk-4-1.png ├── filter_switchgenes.Rd ├── find_switch_logistic_fastglm.Rd ├── find_switch_pathway.Rd ├── get_example_inputData.Rd ├── gs_genelists.Rd ├── merge_pathways.Rd ├── msigdb_h_c2_c5.Rd ├── phyper_pathway.Rd ├── plot_gene_exp.Rd ├── plot_monocle_State.Rd ├── plot_pathway_density.Rd ├── plot_timeline_ggplot.Rd ├── reduce_pathways.Rd └── subset_pseudotime.Rd /CNAME: -------------------------------------------------------------------------------- 1 | geneswitches.ddnetbio.com -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: GeneSwitches 2 | Type: Package 3 | Title: Discover the order of gene regulatory events during cell state transitions 4 | Version: 0.1.0 5 | Author: Elaine Yiqun CAO 6 | Maintainer: Elaine Yiqun CAO 7 | Description: This package discovers the order of gene regulatory events during 8 | cell state transitions at single-cell resolution. It works on any single-cell 9 | trajectory or pseudo-time ordering of cells to discover the genes that act as 10 | on/off switches between cell states and importantly the ordering at which these 11 | switches take place. 12 | License: What license is it under? 13 | Encoding: UTF-8 14 | LazyData: true 15 | Depends: R (>= 3.2) 16 | Imports: 17 | SingleCellExperiment (>= 1.6.0), 18 | ggplot2 (>= 3.1.0), 19 | RColorBrewer, 20 | plyr, 21 | mixtools 22 | RoxygenNote: 6.1.1.9000 23 | Suggests: 24 | testthat 25 | -------------------------------------------------------------------------------- /GeneSwitches.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageCheckArgs: --as-cran 22 | PackageRoxygenize: rd,collate,namespace,vignette 23 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(binarize_exp) 4 | export(common_genes) 5 | export(common_genes_plot) 6 | export(convert_monocle2) 7 | export(convert_slingshot) 8 | export(distinct_genes) 9 | export(filter_switchgenes) 10 | export(find_switch_logistic_fastglm) 11 | export(find_switch_pathway) 12 | export(get_example_inputData) 13 | export(plot_gene_exp) 14 | export(plot_monocle_State) 15 | export(plot_pathway_density) 16 | export(plot_timeline_ggplot) 17 | export(reduce_pathways) 18 | export(subset_pseudotime) 19 | import(Biobase) 20 | import(fastglm) 21 | import(ggplot2) 22 | import(parallel) 23 | import(plyr) 24 | importFrom(RColorBrewer,brewer.pal) 25 | importFrom(ggrepel,geom_text_repel) 26 | importFrom(ggridges,geom_density_ridges) 27 | importFrom(gridExtra,grid.arrange) 28 | importFrom(mixtools,normalmixEM) 29 | -------------------------------------------------------------------------------- /R/binarization.R: -------------------------------------------------------------------------------- 1 | 2 | .onAttach <- function(libname, pkgname) { 3 | packageStartupMessage("Welcome to GeneSwitches!") 4 | } 5 | 6 | #' @title Binarize gene expression 7 | #' 8 | #' @description This function generates on/off binarized data for gene expression 9 | #' 10 | #' @param sce SingleCellExperiment 11 | #' @param fix_cutoff Logical. if use fixed global cutoff for binarization, default FALSE 12 | #' @param binarize_cutoff fixed global cutoff for binarization, default 0.2 13 | #' @param ncores number of cores 14 | #' @return 15 | #' 16 | #' @import parallel 17 | #' @importFrom mixtools normalmixEM 18 | #' @export 19 | #' 20 | binarize_exp <- function(sce, fix_cutoff = FALSE, binarize_cutoff = 0.2, ncores = 3) { 21 | # calculate zero percentage 22 | zerop_g <- c() 23 | expdata <- assays(sce)$expdata 24 | for (i in 1:nrow(expdata)) { 25 | zp <- length(which(expdata[i, ] == 0))/ncol(expdata) 26 | zerop_g <- c(zerop_g, zp) 27 | } 28 | 29 | if (fix_cutoff == TRUE) { 30 | expdata <- assays(sce)$expdata 31 | is.na(expdata) <- assays(sce)$expdata == 0 32 | exp_reduced_binary <- as.matrix((expdata > binarize_cutoff) + 0) 33 | exp_reduced_binary[is.na(exp_reduced_binary)] = 0 34 | assays(sce)$binary <- exp_reduced_binary 35 | oupBinary <- data.frame(geneID = rownames(sce), 36 | zerop_gene = zerop_g, 37 | passBinary = TRUE) 38 | rowData(sce) <- oupBinary 39 | } else { 40 | expdata <- assays(sce)$expdata 41 | # Add gaussian noise to gene expression matrix 42 | # Here we use a sd of 0.1 43 | LogCountsadd = expdata + matrix(rnorm(nrow(expdata)*ncol(expdata), 44 | mean = 0, sd = 0.1), 45 | nrow(expdata), ncol(expdata)) 46 | # Start fitting mixture models for each gene 47 | oupBinary = do.call( 48 | rbind, mclapply(rownames(LogCountsadd), function(iGene){ 49 | set.seed(42) # Set seed for consistency 50 | tmpMix = normalmixEM(LogCountsadd[iGene, ], k = 2) 51 | if (tmpMix$mu[1] < tmpMix$mu[2]) { 52 | tmpOup = data.frame(geneID = iGene, 53 | mu1 = tmpMix$mu[1], 54 | mu2 = tmpMix$mu[2], 55 | sigma1 = tmpMix$sigma[1], 56 | sigma2 = tmpMix$sigma[2], 57 | lambda1 = tmpMix$lambda[1], 58 | lambda2 = tmpMix$lambda[2], 59 | loglik = tmpMix$loglik) 60 | } else { 61 | tmpOup = data.frame(geneID = iGene, 62 | mu1 = tmpMix$mu[2], 63 | mu2 = tmpMix$mu[1], 64 | sigma1 = tmpMix$sigma[2], 65 | sigma2 = tmpMix$sigma[1], 66 | lambda1 = tmpMix$lambda[2], 67 | lambda2 = tmpMix$lambda[1], 68 | loglik = tmpMix$loglik) 69 | } 70 | return(tmpOup) 71 | }, mc.cores = ncores)) 72 | 73 | # Check if non-bimodal genes 74 | oupBinary$passBinary = TRUE 75 | oupBinary[oupBinary$lambda1 < 0.1, ]$passBinary = FALSE 76 | oupBinary[oupBinary$lambda2 < 0.1, ]$passBinary = FALSE 77 | oupBinary[(oupBinary$mu2 - oupBinary$mu1) < (oupBinary$sigma1 + oupBinary$sigma2), ]$passBinary = FALSE 78 | # table(oupBinary$passBinary) 79 | 80 | # Solve for intersection for remaining genes 81 | oupBinary$root = -1 82 | for(iGene in oupBinary[oupBinary$passBinary == TRUE, ]$geneID){ 83 | tmpMix = oupBinary[oupBinary$geneID == iGene, ] 84 | tmpInt = uniroot(function(x, l1, l2, mu1, mu2, sd1, sd2) { 85 | dnorm(x, m = mu1, sd = sd1) * l1 - 86 | dnorm(x, m = mu2, sd = sd2) * l2}, 87 | interval = c(tmpMix$mu1,tmpMix$mu2), 88 | l1 = tmpMix$lambda1, mu1 = tmpMix$mu1, sd1 = tmpMix$sigma1, 89 | l2 = tmpMix$lambda2, mu2 = tmpMix$mu2, sd2 = tmpMix$sigma2) 90 | oupBinary[oupBinary$geneID == iGene, ]$root = tmpInt$root 91 | } 92 | # Binarize expression 93 | binLogCounts = expdata[oupBinary$geneID,] 94 | binLogCounts = t(scale(t(binLogCounts), scale = FALSE, 95 | center = oupBinary$root)) 96 | binLogCounts[binLogCounts >= 0] = 1 97 | binLogCounts[binLogCounts < 0] = 0 98 | assays(sce)$binary <- binLogCounts 99 | 100 | oupBinary$zerop_gene <- zerop_g 101 | rowData(sce) <- oupBinary 102 | } 103 | return(sce) 104 | } 105 | -------------------------------------------------------------------------------- /R/compare_trajectories.R: -------------------------------------------------------------------------------- 1 | #' @title Identify distinct switching genes for each path 2 | #' 3 | #' @description This function identifies distinct switching genes for each path 4 | #' 5 | #' @param toplotgl_Rsub1 switching genes of path1 6 | #' @param toplotgl_Rsub2 switching genes of path2 7 | #' @param path1name name of path1 given by user 8 | #' @param path2name name of path2 given by user 9 | #' @param r2cutoff pseudo R^2 cutoff 10 | #' @return 11 | #' 12 | #' @export 13 | #' 14 | distinct_genes <- function(toplotgl_Rsub1, toplotgl_Rsub2, path1name = "Path1Genes", path2name = "Path2Genes", 15 | r2cutoff = 0.05, scale_timeline = FALSE, path1time = NULL, path2time = NULL, bin = 100){ 16 | toplotgl_Rsub1$genenames <- rownames(toplotgl_Rsub1) 17 | toplotgl_Rsub2$genenames <- rownames(toplotgl_Rsub2) 18 | 19 | gl1 <- toplotgl_Rsub1$genenames 20 | gl2 <- toplotgl_Rsub2$genenames 21 | glin1 <- setdiff(gl1, gl2) 22 | glin2 <- setdiff(gl2, gl1) 23 | gs_p1 <- toplotgl_Rsub1[glin1,] 24 | gs_p2 <- toplotgl_Rsub2[glin2,] 25 | gs_p1$Paths <- path1name 26 | gs_p2$Paths <- path2name 27 | if (scale_timeline == TRUE) { 28 | steptime1 <- (max(path1time) - min(path1time))/bin 29 | gs_p1$switch_at_time <- round((gs_p1$switch_at_time - min(path1time))/steptime1) 30 | steptime2 <- (max(path2time) - min(path2time))/bin 31 | gs_p2$switch_at_time <- round((gs_p2$switch_at_time - min(path2time))/steptime2) 32 | } 33 | # combine distinct gene into one dataframe 34 | toplotgl <- rbind(gs_p1[,c("geneID","zerop_gene","switch_at_time","pvalues","FDR","pseudoR2s", 35 | "estimates","prd_quality","direction","switch_at_timeidx", 36 | "genenames","Paths")], 37 | gs_p2[,c("geneID","zerop_gene","switch_at_time","pvalues","FDR","pseudoR2s", 38 | "estimates","prd_quality","direction","switch_at_timeidx", 39 | "genenames","Paths")]) 40 | toplotgl <- toplotgl[toplotgl$pseudoR2s > r2cutoff,] 41 | return(toplotgl) 42 | } 43 | 44 | #' @title Identify common switching genes between paths 45 | #' 46 | #' @description This function identifies common switching genes between two paths 47 | #' 48 | #' @param toplotgl_Rsub1 switching genes of path1 49 | #' @param toplotgl_Rsub2 switching genes of path2 50 | #' @param path1name name of path1 given by user 51 | #' @param path2name name of path2 given by user 52 | #' @param r2cutoff pseudo R^2 cutoff 53 | #' @return 54 | #' 55 | #' @export 56 | #' 57 | common_genes <- function(toplotgl_Rsub1, toplotgl_Rsub2, path1name = "Path1Genes", path2name = "Path2Genes", 58 | r2cutoff = 0.05){ 59 | toplotgl_Rsub1 <- toplotgl_Rsub1[toplotgl_Rsub1$pseudoR2s > r2cutoff,] 60 | toplotgl_Rsub2 <- toplotgl_Rsub2[toplotgl_Rsub2$pseudoR2s > r2cutoff,] 61 | 62 | toplotgl_Rsub1$genenames <- rownames(toplotgl_Rsub1) 63 | toplotgl_Rsub2$genenames <- rownames(toplotgl_Rsub2) 64 | gl1 <- toplotgl_Rsub1$genenames 65 | gl2 <- toplotgl_Rsub2$genenames 66 | comgl <- intersect(gl1, gl2) 67 | 68 | if (all(toplotgl_Rsub1[comgl,]$direction != toplotgl_Rsub2[comgl,]$direction)){ 69 | print("Directions are not consistent.") 70 | return(NULL) 71 | } 72 | 73 | toplotgl_Rsub1$genetype <- path1name 74 | toplotgl_Rsub2$genetype <- path2name 75 | ggData <- as.data.frame(rbind(toplotgl_Rsub1[comgl, c("geneID","zerop_gene","switch_at_time","pvalues","FDR","pseudoR2s", 76 | "estimates","prd_quality","direction","switch_at_timeidx", 77 | "genenames","genetype")], 78 | toplotgl_Rsub2[comgl, c("geneID","zerop_gene","switch_at_time","pvalues","FDR","pseudoR2s", 79 | "estimates","prd_quality","direction","switch_at_timeidx", 80 | "genenames","genetype")])) 81 | ggData$genetype <- factor(ggData$genetype, levels = c(path1name, path2name)) 82 | return(ggData) 83 | } 84 | 85 | #' @title Plot common switching genes between paths 86 | #' 87 | #' @description This function plots common switching genes between two paths 88 | #' 89 | #' @param ggData data frame for common genes 90 | #' @param timedata timedata to show on plot 91 | #' @return 92 | #' 93 | #' @export 94 | #' 95 | common_genes_plot <- function(ggData, timedata){ 96 | ggData$genetypenum <- as.numeric(ggData$genetype) 97 | 98 | comtml_plot <- ggplot(ggData, aes(switch_at_time, genetypenum, group = genenames, col=direction)) + 99 | geom_line() + geom_point() + ylim(0.5, 3.5)# + xlim(20,28) 100 | 101 | comtml_plot<-comtml_plot+theme_classic() 102 | 103 | # Plot horizontal black line for timeline 104 | comtml_plot<-comtml_plot+geom_hline(yintercept=c(1,2),#as.integer(ggData$type), 105 | color = "black", size=0.6) 106 | 107 | pseudotime_step <- (max(timedata) - min(timedata))/10 108 | pseudotime_range <- seq(min(timedata), max(timedata), by=pseudotime_step) 109 | pseudotime_df <- data.frame(pseudotime_range, pseudotime_format=round(pseudotime_range,1)) 110 | 111 | pathposy <- c(1+0.07, 2-0.07) 112 | pathposx <- max(pseudotime_range)-0.9 113 | pathnames <- levels(ggData$genetype) 114 | pathnames_df <- data.frame(pathposx, pathposy, pathnames) 115 | comtml_plot <- comtml_plot + geom_text(data = pathnames_df, inherit.aes = FALSE, size=3.5, 116 | aes(x=pathposx, y=pathposy, label=pathnames)) 117 | 118 | ##text position 119 | positions <- seq(0.05, 1.15, by=0.1) 120 | tml <- as.data.frame(ggData[ggData$genetype == levels(ggData$genetype)[2],]) 121 | tml <- tml[with(tml, order(switch_at_time)), ] 122 | tml_uni <- tml[!duplicated(tml$switch_at_time),] 123 | 124 | line_pos <- data.frame( 125 | "switch_at_time"=tml_uni$switch_at_time, 126 | "position"=rep(positions, length.out=length(tml_uni$switch_at_time)) 127 | ) 128 | 129 | tml <- merge(x=tml, y=line_pos, by="switch_at_time", all = TRUE) 130 | 131 | text_offset <- 0.05 132 | tml$switch_at_time_count <- ave(tml$switch_at_time==tml$switch_at_time, tml$switch_at_time, FUN=cumsum) 133 | tml$text_position <- (tml$switch_at_time_count * text_offset) + tml$position 134 | 135 | # Plot vertical segment lines for milestones 136 | comtml_plot<-comtml_plot+geom_segment(data=tml[tml$switch_at_time_count == 1,], aes(y=2+position,yend=2,xend=switch_at_time), 137 | color='black', size=0.4)#size=tml$pseudoR2s) 138 | 139 | # Plot scatter points at zero and date 140 | # tml_plot<-tml_plot+geom_point(aes(y=0), size=2) 141 | 142 | # Don't show axes, appropriately position legend 143 | comtml_plot<-comtml_plot+theme(axis.line.y=element_blank(), 144 | axis.text.y=element_blank(), 145 | axis.title.x=element_blank(), 146 | axis.title.y=element_blank(), 147 | axis.ticks.y=element_blank(), 148 | axis.text.x =element_blank(), 149 | axis.ticks.x =element_blank(), 150 | axis.line.x =element_blank(), 151 | legend.position = "bottom", legend.key.size = unit(12, "pt"), 152 | legend.text = element_text(size = 12), 153 | text = element_text(size = 12, family = "Helvetica")) + 154 | labs(fill = "Regulation", color = "Regulation") + 155 | scale_color_manual(values=c("forestgreen", "chocolate2")) 156 | 157 | comtml_plot<-comtml_plot+geom_text(data=pseudotime_df, inherit.aes = FALSE, 158 | aes(x=pseudotime_range, y=0.9, label=pseudotime_format), size=3.8, color='black') 159 | 160 | # Show text for each milestone 161 | comtml_plot<-comtml_plot+geom_text(data=tml, inherit.aes = FALSE, 162 | aes(x=switch_at_time,y=2+text_position,label=genenames),size=3) 163 | 164 | return(comtml_plot) 165 | } 166 | 167 | -------------------------------------------------------------------------------- /R/convert_TI.R: -------------------------------------------------------------------------------- 1 | #' @title plot monocle2 trajectory colored by State 2 | #' 3 | #' @description This function plots monocle2 trajectory with "State" colors 4 | #' 5 | #' @param monocle2_obj monocle2 output object 6 | #' @import Biobase 7 | #' @import ggplot2 8 | #' @importFrom gridExtra grid.arrange 9 | #' @return 10 | #' 11 | #' @export 12 | #' 13 | plot_monocle_State <- function(monocle2_obj){ 14 | mcells <- pData(monocle2_obj) 15 | mcells$dim1 <- monocle2_obj@reducedDimS[1,] 16 | mcells$dim2 <- monocle2_obj@reducedDimS[2,] 17 | 18 | p1 <- ggplot(mcells, aes(dim1, dim2, color = State)) + 19 | ylab("Component 2") + xlab("Component 1") + 20 | geom_point(size = 1, alpha = 1.0) + 21 | scale_shape_manual(guide = FALSE, values = 16) + 22 | theme(text = element_text(size = 12, family = "Helvetica"), 23 | panel.background = element_rect(fill = "white", colour = NA), 24 | axis.line = element_line(colour = "black"), 25 | legend.key = element_rect(fill = NA), 26 | legend.key.width = unit(12, "pt"), 27 | legend.text = element_text(size = 10,colour = "black"), 28 | legend.title = element_text(size = 11, colour = "black"), 29 | legend.position = "top") 30 | 31 | p2 <- ggplot(mcells, aes(dim1, dim2, color = Pseudotime)) + 32 | ylab("Component 2") + xlab("Component 1") + 33 | geom_point(size = 1.7, alpha = 1.0) + 34 | scale_shape_manual(guide = FALSE, values = 16) + 35 | theme(text = element_text(size = 12, family = "Helvetica"), 36 | panel.background = element_rect(fill = "white", colour = NA), 37 | axis.line = element_line(colour = "black"), 38 | legend.key = element_rect(fill = NA), 39 | legend.key.width = unit(20, "pt"), 40 | legend.text = element_text(size = 10,colour = "black"), 41 | legend.title = element_text(size = 11, colour = "black"), 42 | legend.position = "top") 43 | return(grid.arrange(p1, p2, nrow = 1)) 44 | } 45 | 46 | #' @title Convert monocle2 output into GeneSwitches object 47 | #' 48 | #' @description This function converts monocle2 output into GeneSwitches object 49 | #' 50 | #' @param monocle2_obj monocle2 output object 51 | #' @param states a vector of states (path) that are interested in 52 | #' @param logexpdata log-normal gene expression 53 | #' @import Biobase 54 | #' @return 55 | #' 56 | #' @export 57 | #' 58 | convert_monocle2 <- function(monocle2_obj, states, expdata){ 59 | allcells <- pData(monocle2_obj) 60 | # extract cells and log-normal expression in certain path 61 | cells <- allcells[allcells$State %in% states,] 62 | expd <- expdata[,rownames(cells)] 63 | expd <- expd[apply(expd > 0,1,sum) >= 3,] 64 | # create GeneSwitches object 65 | sce <- SingleCellExperiment(assays = List(expdata = expd)) 66 | # pass pseudotime info 67 | colData(sce)$Pseudotime <- cells$Pseudotime 68 | # pass reduced dims info 69 | rd <- t(monocle2_obj@reducedDimS)[rownames(cells),] 70 | colnames(rd) <- c("Component 1", "Component 2") 71 | reducedDims(sce) <- SimpleList(monocleRD=rd) 72 | 73 | return(sce) 74 | } 75 | 76 | 77 | #' @title Convert slingshot output into GeneSwitches object 78 | #' 79 | #' @description This function converts slingshot output into GeneSwitches object 80 | #' 81 | #' @param sce_slingshot slingshot SingleCellExperiment output object 82 | #' @param pseudotime_idx name of desired pseudotime path to apply GeneSwitches 83 | #' @param assayname expression assay to use 84 | #' @return 85 | #' 86 | #' @export 87 | #' 88 | convert_slingshot <- function(sce_slingshot, pseudotime_idx, assayname = "expdata"){ 89 | allcells <- as.data.frame(colData(sce_slingshot)) 90 | # extract cells and log-normal expression in certain path 91 | cells <- allcells[!is.na(allcells[,pseudotime_idx]),] 92 | expd <- assays(sce_slingshot)[[assayname]][,rownames(cells)] 93 | expd <- expd[apply(expd > 0,1,sum) >= 3,] 94 | # create GeneSwitches object 95 | sce <- SingleCellExperiment(assays = List(expdata = expd)) 96 | # pass pseudotime info 97 | colData(sce)$Pseudotime <- cells[,pseudotime_idx] 98 | # pass reduced dims info 99 | for (i in 1:length(reducedDims(sce_slingshot))) { 100 | reducedDims(sce)[[i]] <- reducedDims(sce_slingshot)[[i]][rownames(cells),] 101 | } 102 | names(reducedDims(sce)) <- names(reducedDims(sce_slingshot)) 103 | 104 | return(sce) 105 | } 106 | 107 | 108 | #' @title Subset GeneSwitches object based on the range of pseudotime 109 | #' 110 | #' @description This function subsets GeneSwitches object based on the range of pseudotime 111 | #' 112 | #' @param sce GeneSwitches object which is a SingleCellExperiment object 113 | #' @param min_time lower bound of pseudotime 114 | #' @param max_time upper bound of pseudotime 115 | #' @param assayname expression assay to use 116 | #' @param minexp minimun expression to filer genes 117 | #' @param mincells minimun cells with expression 118 | #' @return 119 | #' 120 | #' @export 121 | #' 122 | subset_pseudotime <- function(sce, min_time, max_time, assayname = "expdata", minexp = 0, mincells = 10){ 123 | sce_subset <- sce[,sce$Pseudotime >= min_time & sce$Pseudotime <= max_time] 124 | # all(rownames(sce_p1_subset) == rownames(rowData(sce_p1_subset))) 125 | sce_subset <- sce_subset[which(apply(assays(sce_subset)[[assayname]] > minexp, 1 ,sum) >= mincells), ] 126 | return(sce_subset) 127 | } 128 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' An example of gene sets 2 | #' 3 | #' Pathways of GSEA MSigDB hallmark, c2 and c5 gene sets 4 | #' 5 | #' @format A list with 6153 pathways, and genes in each pathway 6 | #' 7 | #' @source \url{http://software.broadinstitute.org/gsea/msigdb/collections.jsp} 8 | "msigdb_h_c2_c5" 9 | 10 | 11 | #' An example of gene lists 12 | #' 13 | #' Provided gene lists with surface proteins and transctription factors 14 | #' 15 | #' @format A data frame with 3071 rows and 2 columns. rows are genes, and columns are "genenames" and "genetypes". 16 | #' 17 | #' @source \url{http://wlab.ethz.ch/cspa/, http://humantfs.ccbr.utoronto.ca/} 18 | "gs_genelists" 19 | -------------------------------------------------------------------------------- /R/find_switch_pathway.R: -------------------------------------------------------------------------------- 1 | #' @title Find significantly changed pathways and switching timepoint 2 | #' 3 | #' @description Apply hypergeometric test to determine significantly changed 4 | #' pathways and determine the switching timepoint for each pathway 5 | #' 6 | #' @param N expressed genes in pathways 7 | #' @param pathways a list of pathways with genes 8 | #' @param toplotgl_ptw swiching genes to plot 9 | #' @param sig_FDR FDR cut off for significant pathways 10 | #' @param direction switching direction, up or down 11 | #' @return 12 | #' 13 | phyper_pathway <- function(N, pathways, toplotgl_ptw, sig_FDR = 0.05, direction = c("up", "down")){ 14 | pv <- c() 15 | swt_pw <- c() 16 | for (i in 1:length(pathways)) { 17 | gp <- pathways[[i]] 18 | genestoplot <- intersect(rownames(toplotgl_ptw), gp) 19 | m <- length(gp) ## Number of "marked" elements, i.e. genes associated to this biological process 20 | n <- N - m ## Number of "non-marked" elements, i.e. genes not associated to this biological process 21 | x <- length(genestoplot) ## Number of "marked" elements in the selection, i.e. genes of the group of interest that are associated to this biological process 22 | k <- nrow(toplotgl_ptw) ## Size of the selection, i.e. submitted genes with at least one annotation in GO biological processes 23 | pv <- c(pv, phyper(q=x-1, m=m, n=n, k=k, lower.tail = FALSE)) 24 | 25 | toplotgl <- toplotgl_ptw[genestoplot,] 26 | swt_pw <- c(swt_pw, median(toplotgl$switch_at_time)) 27 | } 28 | switch_pw <- data.frame(switch_at_time = swt_pw, pvalue = pv) 29 | rownames(switch_pw) <- names(pathways) 30 | switch_pw$FDR <- p.adjust(switch_pw$pvalue, method = "BH") 31 | 32 | switch_pw_sig <- switch_pw[switch_pw$FDR < sig_FDR,] 33 | if (nrow(switch_pw_sig) > 0) { 34 | switch_pw_sig$direction <- direction 35 | } 36 | return(switch_pw_sig) 37 | } 38 | 39 | #' @title Merge redundant pathways 40 | #' 41 | #' @description Merge significant pathways that are with same genes over certain ratio 42 | #' 43 | #' @param switch_pw a data frame with significantly changed pathways 44 | #' @param pathways a list of pathways with genes 45 | #' @param ratio cutoff ratio for merging redundant pathways 46 | #' @return 47 | #' 48 | merge_pathways <- function(switch_pw, pathways, ratio){ 49 | switch_pw <- switch_pw[order(switch_pw$FDR),] 50 | switch_pw$keep <- 1 51 | for (i in 1:(nrow(switch_pw)-1)) { 52 | if (switch_pw[i,]$keep == 1) { 53 | for (j in (i+1):nrow(switch_pw)) { 54 | pw1 <- pathways[[rownames(switch_pw)[i]]] 55 | pw2 <- pathways[[rownames(switch_pw)[j]]] 56 | overlapping <- intersect(pw1, pw2) 57 | if (length(overlapping)/length(pw2) > ratio) { 58 | switch_pw[j,]$keep <- 0 59 | } 60 | } 61 | } 62 | } 63 | switch_pw <- switch_pw[switch_pw$keep == 1,] 64 | return(subset(switch_pw, select = -c(keep))) 65 | } 66 | 67 | #' @title Reduce redundant pathways 68 | #' 69 | #' @description Reduce significant pathways that are with same genes over certain ratio, 70 | #' do up- and down- regulated pathways separately 71 | #' 72 | #' @param switch_pw a data frame with significantly changed pathways 73 | #' @param pathways a list of pathways with genes 74 | #' @param redundant_pw_rate cutoff ratio for merging redundant pathways 75 | #' @export 76 | #' @return 77 | #' 78 | reduce_pathways <- function(switch_pw, pathways, redundant_pw_rate = 0.8){ 79 | switch_up <- switch_pw[switch_pw$direction == "up",] 80 | if (nrow(switch_up) > 1) { 81 | switch_up <- merge_pathways(switch_up, pathways, ratio = redundant_pw_rate) 82 | } 83 | switch_down <- switch_pw[switch_pw$direction == "down",] 84 | if (nrow(switch_down) > 1) { 85 | switch_down <- merge_pathways(switch_down, pathways, ratio = redundant_pw_rate) 86 | } 87 | # combine 88 | switch_pw_reduce <- rbind(switch_up, switch_down) 89 | switch_pw_reduce <- switch_pw_reduce[order(switch_pw_reduce$FDR),] 90 | return(switch_pw_reduce) 91 | } 92 | 93 | 94 | #' @title Find significantly changed pathways and switching timepoint 95 | #' 96 | #' @description This function finds significantly changed pathways and determine 97 | #' the switching timepoint for each pathway 98 | #' 99 | #' @param sce SingleCellExperiment 100 | #' @param pathways a list of pathways with genes 101 | #' @param toplotgl_ptw swiching genes to plot 102 | #' @param sig_FDR FDR cut off for significant pathways 103 | #' @return 104 | #' 105 | #' @export 106 | #' 107 | find_switch_pathway <- function(scerowdata, pathways = msigdb_h_c2_c5, toplotgl_sig, 108 | sig_FDR = 0.05) { 109 | # make all the genes from pathways into a vector 110 | gps <- c() 111 | for (i in 1:length(pathways)) { 112 | gps <- c(gps, pathways[[i]]) 113 | } 114 | genesINpathways <- unique(gps) 115 | N <- length(intersect(rownames(scerowdata), genesINpathways)) 116 | 117 | # up-regulated pathways 118 | toplotgl_ptw <- toplotgl_sig[rownames(toplotgl_sig) %in% genesINpathways & toplotgl_sig$direction == "up",] 119 | switch_pw_sigup <- phyper_pathway(N, pathways, toplotgl_ptw, sig_FDR = 0.05, direction = "up") 120 | # down-regulated pathways 121 | toplotgl_ptw <- toplotgl_sig[rownames(toplotgl_sig) %in% genesINpathways & toplotgl_sig$direction == "down",] 122 | switch_pw_sigdown <- phyper_pathway(N, pathways, toplotgl_ptw, sig_FDR = 0.05, direction = "down") 123 | # combine 124 | switch_pw_sig <- rbind(switch_pw_sigup, switch_pw_sigdown) 125 | if (nrow(switch_pw_sig) > 0) { 126 | switch_pw_sig$feature_type <- "pathways" 127 | switch_pw_sig$feature_name <- rownames(switch_pw_sig) 128 | } 129 | switch_pw_ord <- switch_pw_sig[order(switch_pw_sig$FDR),] 130 | 131 | return(switch_pw_ord) 132 | } 133 | 134 | #' @title Pathways ridge plot 135 | #' 136 | #' @description This function generates pathways ridge plots 137 | #' 138 | #' @param switch_pw_re significant pathways to plot 139 | #' @param toplotgl_sig switching genes 140 | #' @param direction switching direction of the pathway, up or down 141 | #' @param orderbytime order the pathways by switching time (mean time of switching genes) if TRUE, 142 | #' order the pathways by FDR if FALSE 143 | #' @return 144 | #' 145 | #' @importFrom ggridges geom_density_ridges 146 | #' @export 147 | #' 148 | plot_pathway_density <- function(switch_pw_re, toplotgl_sig, pw_direction = c("up", "down"), orderbytime = TRUE){ 149 | switch_pw_re <- switch_pw_re[switch_pw_re$direction %in% pw_direction,] 150 | if (orderbytime == TRUE) { 151 | switch_pw_re <- switch_pw_re[order(switch_pw_re$switch_at_time, decreasing = TRUE),] 152 | } else { 153 | switch_pw_re <- switch_pw_re[order(switch_pw_re$FDR, decreasing = TRUE),] 154 | } 155 | gl <- c() 156 | for (i in 1:nrow(switch_pw_re)) { 157 | pn <- rownames(switch_pw_re)[i] 158 | pgl <- data.frame(msigdb_h_c2_c5[pn], pn, stringsAsFactors = FALSE) 159 | colnames(pgl) <- c("Genes", "Pathways") 160 | tgn <- length(unique(pgl$Genes)) 161 | genestoplot <- intersect(rownames(toplotgl_sig), pgl$Genes) 162 | pgl <- pgl[pgl$Genes %in% genestoplot,] 163 | toplotgl <- toplotgl_sig[pgl$Genes, ] 164 | pgl <- as.data.frame(cbind(pgl, toplotgl)) 165 | pgl <- pgl[pgl$direction == switch_pw_re[i,]$direction,] 166 | pgl$Pathways <- paste0(pgl$Pathways, "(",nrow(pgl),"/",tgn,")") 167 | gl <- rbind(gl, pgl) 168 | } 169 | 170 | gl$Pathways <- factor(gl$Pathways, levels = unique(gl$Pathways)) 171 | p <- ggplot(gl, aes(x = switch_at_time, y = Pathways, fill = direction, col = direction)) 172 | p <- p + theme_classic() 173 | p <- p + xlab("Pseudo-timeline") + geom_density_ridges(alpha = 0.5) + 174 | labs(fill = "Regulation", color = "Regulation") 175 | p <- p + theme(text = element_text(size = 12, family = "Helvetica"), 176 | panel.background = element_rect(fill = "white", colour = NA), 177 | axis.line = element_line(colour = "black"), 178 | legend.key.size = unit(10, "pt"), 179 | legend.text = element_text(size = 10,colour = "black"), 180 | legend.title = element_text(size = 11, colour = "black")) + 181 | scale_color_manual(values=c("forestgreen", "chocolate2")) + 182 | scale_fill_manual(values=c("forestgreen", "chocolate2")) 183 | return(p) 184 | } 185 | -------------------------------------------------------------------------------- /R/fit_logistic_regression.R: -------------------------------------------------------------------------------- 1 | #' @title Random downsampling of zero expression 2 | #' 3 | #' @description This function does randomly downsampling of cells with zero expression for one gene 4 | #' 5 | #' @param glmdata binary data of one gene 6 | #' @param ratio_ds downsampling zeros to this proportion 7 | #' @return 8 | #' 9 | downsample_zeros <- function(glmdata, ratio_ds = 0.7) { 10 | p = as.numeric(ratio_ds) 11 | set.seed(42) # Set seed for consistency 12 | downsample <- sample(which(glmdata$expvalue == 0), length(which(glmdata$expvalue == 0)) - round(sum(glmdata$expvalue != 0) * p/(1 - p))) 13 | if (length(downsample) > 0) { 14 | subdata <- glmdata[-downsample, ] 15 | } else {subdata <- glmdata} 16 | return(subdata) 17 | } 18 | 19 | #' @title Fit fast logistic regression and find switching timepoint 20 | #' 21 | #' @description This function fits fast logistic regression and find switching timepoint for each gene 22 | #' 23 | #' @param sce SingleCellExperiment 24 | #' @param downsample Logical. if do random downsampling of zeros 25 | #' @param ds_cutoff only do downsampling if zero percentage is over this cutoff 26 | #' @param zero_ratio downsampling zeros to this proportion 27 | #' @param sig_FDR FDR cut off for significant genes 28 | #' @return 29 | #' 30 | #' @import fastglm 31 | #' @export 32 | #' 33 | find_switch_logistic_fastglm <- function(sce, downsample = FALSE, ds_cutoff = 0.7, zero_ratio = 0.7, 34 | sig_FDR = 0.05, show_warnings = TRUE) { 35 | binarydata <- assays(sce)$binary 36 | expdata <- assays(sce)$expdata 37 | binarydata <- binarydata[which(rowData(sce)$passBinary == TRUE), ] 38 | expdata <- expdata[which(rowData(sce)$passBinary == TRUE), ] 39 | genes <- rowData(sce)[which(rowData(sce)$passBinary == TRUE), ] 40 | timedata <- sce$Pseudotime 41 | pvalues <- binarydata[, 1] 42 | pseudoR2s <- binarydata[, 1] 43 | estimates <- binarydata[, 1] 44 | switch_at_time <- binarydata[, 1] 45 | prd_quality <- binarydata[, 1] 46 | CI <- binarydata[, 1] 47 | 48 | for (i in 1:nrow(binarydata)) { 49 | glmdata <- cbind(State = as.numeric(binarydata[i, ]), expvalue = as.numeric(expdata[i, ]), 50 | timedata = sce$Pseudotime) 51 | glmdata <- as.data.frame(glmdata) 52 | 53 | if (downsample == TRUE & round(genes$zerop_gene[i],3) > ds_cutoff) { 54 | glmdata <- downsample_zeros(glmdata, ratio_ds = zero_ratio) 55 | } 56 | 57 | if (show_warnings == TRUE) { 58 | glm_results <- fastglm(x = model.matrix(State ~ timedata, data = glmdata), 59 | y = glmdata$State, family = binomial(link = "logit")) 60 | } else { 61 | glm_results <-suppressWarnings(fastglm(x = model.matrix(State ~ timedata, data = glmdata), 62 | y = glmdata$State, family = binomial(link = "logit"))) 63 | } 64 | pvalues[i] <- coef(summary(glm_results))[, 4][2] 65 | ll.null <- glm_results$null.deviance/-2 66 | ll.proposed <- glm_results$deviance/-2 67 | # McFadden's Pseudo R^2 = [ LL(Null) - LL(Proposed) ] / LL(Null) 68 | pseudoR2s[i] <- (ll.null - ll.proposed)/ll.null 69 | estimates[i] <- coef(summary(glm_results))[, 1][2] 70 | # p=0.5 71 | switch_at_time[i] <- (log(0.5/(1 - 0.5)) - coef(glm_results)[1])/coef(glm_results)[2] 72 | if (switch_at_time[i] >= max(glmdata$timedata)) { 73 | switch_at_time[i] = max(glmdata$timedata) 74 | prd_quality[i] = 0 75 | } else { 76 | prd_quality[i] = 1 77 | } 78 | if (switch_at_time[i] <= min(glmdata$timedata)) { 79 | switch_at_time[i] = min(glmdata$timedata) 80 | prd_quality[i] = 0 81 | } 82 | se <- summary(glm_results)$coefficients[, 2] 83 | CI[i] <- sqrt((se[1]*1.96/coef(glm_results)[1])^2 + (se[2]*1.96/coef(glm_results)[2])^2)* 84 | abs(coef(glm_results)[1]/coef(glm_results)[2]) 85 | remove(glm_results) 86 | } 87 | 88 | result_switch <- cbind(switch_at_time, CI, pvalues, pseudoR2s, estimates, prd_quality) 89 | rownames(result_switch) <- rownames(binarydata) 90 | result_switch <- as.data.frame(result_switch) 91 | result_switch$direction <- "up" 92 | result_switch[result_switch$estimates < 0, ]$direction <- "down" 93 | result_switch$FDR <- p.adjust(result_switch$pvalues, method = "BH") 94 | steptime <- (max(timedata) - min(timedata))/100 95 | result_switch$switch_at_timeidx <- round((result_switch$switch_at_time - min(timedata))/steptime) 96 | 97 | # process_resultswitch -------------------------------------------------------------------------- 98 | # check significance FDR < sig_FDR 99 | if (max(result_switch$FDR) > sig_FDR) { 100 | result_switch[result_switch$FDR > sig_FDR, ]$prd_quality <- 0 101 | } 102 | 103 | geneinfo <- merge(rowData(sce), result_switch, by=0, all=TRUE)[,-1] #[,1:11] 104 | rownames(geneinfo) <- geneinfo$geneID 105 | # all(rownames(geneinfo) == rownames(sce)) 106 | geneinfo <- geneinfo[rownames(sce), ] 107 | rowData(sce) <- geneinfo 108 | return(sce) 109 | } 110 | 111 | -------------------------------------------------------------------------------- /R/get_example_inputData.R: -------------------------------------------------------------------------------- 1 | #' @title Download example input data 2 | #' 3 | #' @description This function checks if example input files are in the current directory and if not download them 4 | #' 5 | #' @return 6 | #' 7 | #' @export 8 | #' 9 | get_example_inputData <- function(){ 10 | logexpdata = "./logexpdata.RData" 11 | if(!file.exists(logexpdata)) { 12 | res <- tryCatch(download.file("http://files.ddnetbio.com/logexpdata.RData", 13 | destfile = logexpdata, 14 | method = "auto"), 15 | error = function(e) 1) 16 | if(res == 1) { print("Error: cannot download logexpdata.RData.") } 17 | } 18 | 19 | cardiac_monocle2 = "./cardiac_monocle2.RData" 20 | if(!file.exists(cardiac_monocle2)) { 21 | res <- tryCatch(download.file("http://files.ddnetbio.com/cardiac_monocle2.RData", 22 | destfile = cardiac_monocle2, 23 | method = "auto"), 24 | error = function(e) 1) 25 | if(res == 1) { print("Error: cannot download cardiac_monocle2.RData") } 26 | } 27 | } 28 | 29 | -------------------------------------------------------------------------------- /R/plot_gene_exp.R: -------------------------------------------------------------------------------- 1 | #' @title Plot gene expression 2 | #' 3 | #' @description This function plot gene expression on two-dimensional space 4 | #' 5 | #' @param sce SingleCellExperiment 6 | #' @param gene one gene of interest 7 | #' @param reduction dimensional reduction method 8 | #' @param downsample if do random downsampling of zeros 9 | #' @param ds_cutoff only do downsampling if zero percentage is over this cutoff 10 | #' @param zero_ratio downsampling zeros to this proportion 11 | #' @param ptsize point size 12 | #' @param fitting if plot logistic regression fitting 13 | #' @return 14 | #' 15 | #' @import ggplot2 16 | #' @importFrom RColorBrewer brewer.pal 17 | #' @importFrom gridExtra grid.arrange 18 | #' @export 19 | #' 20 | plot_gene_exp <- function(sce, gene, reduction, fitting = FALSE, bin_width = 0.1, 21 | downsample = FALSE, ds_cutoff = 0.7, zero_ratio = 0.7, ptsize = 0.7){ 22 | glmdata = data.frame(expvalue = assays(sce)$expdata[gene, ], 23 | timedata = sce$Pseudotime, 24 | State = as.numeric(assays(sce)$binary[gene, ]), 25 | Dim1 = reducedDim(sce, reduction)[,1], 26 | Dim2 = reducedDim(sce, reduction)[,2]) 27 | 28 | if (downsample == TRUE & round(sum(glmdata$expvalue == 0)/nrow(glmdata),3) > ds_cutoff) { 29 | glmdata <- downsample_zeros(glmdata, ratio_ds = zero_ratio) 30 | gene <- paste0(gene, " (downsampled--",zero_ratio*100,"%)") 31 | } 32 | 33 | # Define plot theme 34 | plotTheme = theme(plot.title = element_text(size = 24, face = "bold"), 35 | text = element_text(size = 18), 36 | panel.background = element_rect(fill = "white", colour = NA), 37 | plot.background = element_rect(fill = "white", colour = NA), 38 | axis.line = element_line(colour = "black", size = 1), 39 | axis.text = element_text(size = 15, colour = "black"), 40 | axis.ticks = element_line(colour = "black", size = 1), 41 | legend.key = element_rect(fill = NA), 42 | legend.key.width = unit(14, "pt"), 43 | legend.position = "right") 44 | 45 | # Plot 1 46 | p1 <- ggplot(glmdata, aes(Dim1, Dim2, color = expvalue)) + 47 | geom_point(size = 0.7, alpha = 1.0) + 48 | scale_shape_manual(guide = FALSE, values = 16) + 49 | scale_color_gradientn("",colours = c("grey85", brewer.pal(9, "OrRd"))) + 50 | plotTheme + ggtitle(gene) 51 | 52 | # Plot3 53 | p3 <- ggplot(glmdata, aes(timedata, State)) + geom_point() + 54 | xlab("Pseudotime") + ylab("Probability") + 55 | geom_smooth(method = "glm", method.args = list(family = "binomial"), se = FALSE) + 56 | geom_hline(yintercept = 0.5, linetype="dashed", color = "#E69F00", size=1) + 57 | scale_y_continuous(breaks = seq(0, 1, 0.5)) + 58 | scale_x_continuous(breaks = seq(0, max(glmdata$timedata), 10)) + 59 | plotTheme + ggtitle(" ") 60 | 61 | if (fitting == TRUE & "root" %in% names(rowData(sce))) { 62 | # Plot 2 63 | ggGaussian = data.frame(x = seq(min(glmdata$expvalue) - 0.1, 64 | max(glmdata$expvalue) + 0.1, length.out = 500)) 65 | ggGaussian$y1 = (nrow(glmdata)/10) * rowData(sce)[gene,]$lambda1 * 66 | dnorm(ggGaussian$x, mean = rowData(sce)[gene,]$mu1, 67 | sd = rowData(sce)[gene,]$sigma1) 68 | ggGaussian$y2 = (nrow(glmdata)/10) * rowData(sce)[gene,]$lambda2 * 69 | dnorm(ggGaussian$x, mean = rowData(sce)[gene,]$mu2, 70 | sd = rowData(sce)[gene,]$sigma2) 71 | p2 <- ggplot() + xlab("Log Expression") + 72 | geom_histogram(data = glmdata, aes(x=expvalue, fill = as.factor(State)), 73 | binwidth = bin_width, alpha=0.7, position = "identity") + 74 | geom_path(data = ggGaussian, aes(x,y1), color = "#999999", size=.7) + 75 | geom_path(data = ggGaussian, aes(x,y2), color = "chocolate2", size=.7) + 76 | geom_vline(xintercept = rowData(sce)[gene,]$root, size=1, linetype="dashed") + 77 | scale_fill_manual("State", values = c("grey", "#E69F00"), 78 | labels = c("Off", "On")) + 79 | plotTheme + theme(legend.position = "top") 80 | # Combine 81 | return(grid.arrange(p1, p2, p3, nrow = 1)) 82 | } else if (fitting == TRUE) { 83 | return(grid.arrange(p1, p3, nrow = 1)) 84 | } else { 85 | plot(p1) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /R/plot_timeline.R: -------------------------------------------------------------------------------- 1 | #' @title Extract switching gene list of interesting 2 | #' 3 | #' @description This function extract a list of significant switching genes 4 | #' 5 | #' @param sce switching genes 6 | #' @param allgenes if use all genes 7 | #' @param pathway_name a list of pathway name(s) to plot 8 | #' @param genelists a gene list to plot 9 | #' @param genetype specific gene type to plot c("EMT", "reprogramming", "stem", "surface", "TF") 10 | #' @param zero_pct zero-expression percentage cut off for significant genes 11 | #' @param r2cutoff pseudo R^2 cutoff 12 | #' @param direction switching direction, up or down 13 | #' @param topnum number of top genes ordered by pseudo R^2 value 14 | #' @return 15 | #' 16 | #' @import plyr 17 | #' @export 18 | #' 19 | filter_switchgenes <- function(sce, allgenes = FALSE, pathway_name = NULL, genelists = GeneSwitches:::gs_genelists, 20 | genetype = c("Surface proteins", "TFs"), zero_pct = 0.9, 21 | r2cutoff = 0.03, direction = c("up", "down"), topnum = 100000) { 22 | if (allgenes == TRUE) { 23 | toplotgl <- rowData(sce) 24 | toplotgl$feature_type <- "All genes" 25 | } else if(!is.null(pathway_name)) { 26 | gl <- c() 27 | for (pn in pathway_name) { 28 | pgl <- data.frame(msigdb_h_c2_c5[pn], pn, stringsAsFactors = FALSE) 29 | colnames(pgl) <- c("feature_name", "feature_type") 30 | gl <- rbind(gl, pgl) 31 | } 32 | multi <- gl$feature_name[duplicated(gl$feature_name)] 33 | if (length(multi) > 0) { 34 | gl <- ddply(gl,.(feature_name),paste)[,c(1,3)] 35 | rownames(gl) <- gl$feature_name 36 | colnames(gl)[2] <- "types" 37 | gl$feature_type <- gl$types 38 | gl[multi,]$feature_type <- "Multiple" 39 | genestoplot <- intersect(rownames(sce), gl$feature_name) 40 | toplotgl <- rowData(sce)[genestoplot, ] 41 | toplotgl$feature_type <- gl[genestoplot, ]$feature_type 42 | toplotgl$types <- gl[genestoplot, ]$types 43 | } else { 44 | rownames(gl) <- gl$feature_name 45 | genestoplot <- intersect(rownames(sce), gl$feature_name) 46 | toplotgl <- rowData(sce)[genestoplot, ] 47 | toplotgl$feature_type <- gl[genestoplot, ]$feature_type 48 | } 49 | } else { 50 | genelists_sub <- genelists[genelists$genetypes %in% genetype, ] 51 | genelists_sub <- genelists_sub[!duplicated(genelists_sub$genenames), ] 52 | rownames(genelists_sub) <- genelists_sub$genenames 53 | genestoplot <- intersect(rownames(sce), genelists_sub$genenames) 54 | toplotgl <- rowData(sce)[genestoplot, ] 55 | toplotgl$feature_type <- genelists_sub[genestoplot, ]$genetypes 56 | } 57 | 58 | toplotgl_sub <- toplotgl[toplotgl$zerop_gene < zero_pct & toplotgl$prd_quality == 1 & toplotgl$pseudoR2s > r2cutoff & 59 | toplotgl$direction %in% direction, ] 60 | if (nrow(toplotgl_sub) > topnum) { 61 | toplotgl_sub <- toplotgl_sub[order(toplotgl_sub$pseudoR2s, decreasing = TRUE),] 62 | toplotgl_sub <- toplotgl_sub[1:topnum,] 63 | } 64 | return(toplotgl_sub) 65 | } 66 | 67 | #' @title Plot switching genes 68 | #' 69 | #' @description This function plots switching genes on the pseudo-timeline 70 | #' 71 | #' @param tml switching genes 72 | #' @param timedata pseudotime for cells 73 | #' @param iffulltml if plot the full timeline 74 | #' @param txtsize text size for gene names 75 | #' @param color_by the cell attribute (e.g. the column of tml) to map to each cell's color 76 | #' @return 77 | #' 78 | #' @import ggplot2 79 | #' @importFrom ggrepel geom_text_repel 80 | #' @export 81 | #' 82 | plot_timeline_ggplot <- function(tml, timedata, iffulltml = TRUE, txtsize = 3.5, color_by = "feature_type") { 83 | tml <- as.data.frame(tml) 84 | tml <- tml[order(tml$switch_at_time), ] 85 | tml$direction_num <- -1 86 | if ("up" %in% tml$direction) { 87 | tml[tml$direction == "up", ]$direction_num <- 1 88 | } 89 | tml$color_by <- as.factor(tml[,color_by]) 90 | tml$feature_name <- rownames(tml) 91 | head(tml) 92 | 93 | if (iffulltml) { 94 | pseudotime_step <- (max(timedata) - min(timedata))/4 95 | pseudotime_range <- seq(min(timedata), max(timedata), by = pseudotime_step) 96 | pseudotime_df <- data.frame(pseudotime_range, pseudotime_format = round(pseudotime_range, 1)) 97 | } else { 98 | pseudotime_step <- (max(tml$switch_at_time) - min(tml$switch_at_time))/4 99 | pseudotime_range <- seq(min(tml$switch_at_time), max(tml$switch_at_time), by = pseudotime_step) 100 | pseudotime_df <- data.frame(pseudotime_range, pseudotime_format = round(pseudotime_range, 1)) 101 | } 102 | 103 | tml_plot <- ggplot(tml, aes(x = switch_at_time, y = pseudoR2s * direction_num, col = color_by, label = feature_name)) + 104 | geom_point(size = txtsize/3) + xlab("Pseudo-timeline") + ylab("Quality of fitting (R^2)") 105 | tml_plot <- tml_plot + theme_classic() 106 | # Plot horizontal black line for timeline 107 | tml_plot <- tml_plot + geom_hline(yintercept = 0, color = "black", size = 0.6) 108 | tml_plot <- tml_plot + geom_label(data = pseudotime_df, aes(x = pseudotime_range, y = 0, label = pseudotime_format), size = (txtsize-0.5), 109 | color = "black") 110 | 111 | tml_plot <- tml_plot + geom_text_repel(aes(x = switch_at_time, y = pseudoR2s * direction_num, label = feature_name), 112 | size = txtsize, fontface = "bold", show.legend = FALSE) 113 | 114 | tml_plot <- tml_plot + theme(legend.position = "bottom", legend.title = element_blank(), legend.key.size = unit(10, "pt"), 115 | text = element_text(size = 12, family = "Helvetica")) 116 | 117 | return(tml_plot) 118 | } 119 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | indent: true 4 | --- 5 | 6 | 7 | 8 | ```{r, include = FALSE} 9 | knitr::opts_chunk$set( 10 | collapse = TRUE, 11 | comment = "#>", 12 | fig.path = "man/figures/README-" 13 | # out.width = "100%" 14 | ) 15 | ``` 16 | # GeneSwitches 17 | 18 | 19 | 20 | 21 | The goal of GeneSwitches is to discover the order of gene-expression and 22 | functional events during cell state transitions at a single-cell resolution. 23 | It works on any single-cell trajectory or pseudo-time ordering of cells to 24 | discover the genes that act as on/off switches between cell states and 25 | importantly the ordering at which these switches take place. 26 | 27 | ## Installation 28 | 29 | ### Check and install required packages 30 | 31 | Users may use following codes to check and install all the required packages. 32 | 33 | ``` r 34 | list.of.packages <- c("SingleCellExperiment", "Biobase", "fastglm", "ggplot2", "monocle", 35 | "plyr", "RColorBrewer", "ggrepel", "ggridges", "gridExtra", "devtools", 36 | "mixtools") 37 | 38 | ## for package "fastglm", "ggplot2", "plyr", "RColorBrewer", "ggrepel", "ggridges", "gridExtra", "mixtools" 39 | new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] 40 | if(length(new.packages)) install.packages(new.packages) 41 | 42 | ## for package "SingleCellExperiment", "Biobase" 43 | if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") 44 | new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] 45 | if(length(new.packages)) BiocManager::install(new.packages) 46 | ``` 47 | 48 | ### Install GeneSwitches 49 | 50 | The source code of GeneSwitches can be installed from [GitHub](https://github.com/) with: 51 | 52 | ``` r 53 | devtools::install_github("SGDDNB/GeneSwitches") 54 | ``` 55 | 56 | 57 | ## Input datasets 58 | 59 | GeneSwitches requires two inputs, namely a gene expression matrix and corresponding pseudo-time ordering of each cell. We convert these input datasets in a `SingleCellExperiment` object (Lun and Risso 2017) and below you will find a full “start-to-finish” workflow to realise the potential of this analyis. Any additions, suggestions or comments are welcome in the github repository. 60 | 61 | ```{r load library, message=FALSE} 62 | ## load libraries 63 | library(GeneSwitches) 64 | library(SingleCellExperiment) 65 | ``` 66 | 67 | For this vignette, we will work with a published dataset of single-cell RNA-seq data from the differentiation of human embryonic stem cells (hESC) to cardiomyocytes (CM) (Friedman et al., 2018). For the purpose of demonstration, we have subset the original dataset into 3000 cells and run Monocle2 to generate trajectories. Both the Log-normalized gene expression and Monocle2 inferred trajectory results can be downloaded from here as [logexpdata.RData](http://files.ddnetbio.com/logexpdata.RData), and [cardiac_monocle2.RData](http://files.ddnetbio.com/cardiac_monocle2.RData). This dataset was chosen in part because it shows a bifurcating cell fate of cardiac hESC differentiation that gives rise to definitive cardiomyocytes (Path1) or non-contractile cardiac derivatives (Path2) that allow for all aspect of GeneSwitches to be applied. 68 | 69 | ```{r load data, message=FALSE} 70 | ## Download example files to current directory 71 | get_example_inputData() 72 | ## Load input data log-normalized gene expression 73 | load("./logexpdata.RData") 74 | ## Load Monocle2 object with pseudo-time and dimensionality reduction 75 | load("./cardiac_monocle2.RData") 76 | ``` 77 | 78 | ### Direct input (NOT run) 79 | Users can input the gene expression (`logexpdata`; recommend for log-normalized expression), pseudo-time (`cell_pseudotime`) and dimensionality reductions (`rd_PCA`; optional and only for gene expression plots) into SingleCellExperiment object as follows. 80 | ```r 81 | ### create SingleCellExperiment object with log-normalized single cell data 82 | #sce <- SingleCellExperiment(assays = List(expdata = logexpdata)) 83 | ### add pseudo-time information 84 | #colData(sce)$Pseudotime <- cell_pseudotime 85 | ### add dimensionality reductions, e.g. PCA, UMAP, tSNE 86 | #pca <- prcomp(t(assays(sce)$expdata), scale. = FALSE) 87 | #rd_PCA <- pca$x[,1:2] 88 | #reducedDims(sce) <- SimpleList(PCA = rd_PCA) 89 | ``` 90 | ### Convert from trajectory results 91 | Alternatively, GeneSwitches provides functions to convert Monocle2 or Slingshot results into SingleCellExperiment object directly. For Monocle2 trajectory, users need to indicate the states of the desired path, which can be checked by plotting the trajectory using Monocle2 function `plot_cell_trajectory` or the following function. 92 | 93 | ```{r plot_monocle_trajectory, fig.height = 4, fig.width = 8, fig.align = "center"} 94 | ## plot Monocle2 trajectory colored by State 95 | # monocle:::plot_cell_trajectory(cardiac_monocle2, color_by = "State") 96 | plot_monocle_State(cardiac_monocle2) 97 | ``` 98 | 99 | Based on the marker genes, the pseudo-time trajectory starts from State 3, which are hESC cells. Definitive CM cells are in State 1 and non-contractile cardiac derivatives are in State 5. Therefore, we focus on Path1 with cells in states 3, 2, 1 and Path2 with cells in states 3, 2, 5, and extract these two paths from Monocle2 object. 100 | 101 | ```{r Monocle2 input, message=FALSE} 102 | ## Input log-normalized gene expression, Monocle2 pseudo-time and dimensionality reduction 103 | ## Path1 containing cells in states 3,2,1 104 | sce_p1 <- convert_monocle2(monocle2_obj = cardiac_monocle2, 105 | states = c(3,2,1), expdata = logexpdata) 106 | ## Path2 containing cells in states 3,2,5 107 | sce_p2 <- convert_monocle2(monocle2_obj = cardiac_monocle2, 108 | states = c(3,2,5), expdata = logexpdata) 109 | ``` 110 | 111 | If we are only interested in the trajectory within a certain range of pseudotime, function `subset_pseudotime` can be used to subset the SingleCellExperiment object accordingly, followed by filtering out lowly expressed genes. 112 | ```{r} 113 | ### Subset cells to pseudotime range from 10 to 25 114 | #sce_p1_subset <- subset_pseudotime(sce_p1, min_time = 10, max_time = 25, minexp = 0, mincells = 10) 115 | ``` 116 | In Part I, we will apply GeneSwitches on a single trajectory, Path1, to demonstrate the general workflow and functions. Comparison of GeneSwitches results from two trajectories (Path1 & 2) will be shown in Part II. 117 | 118 | 119 | ## PART I. GeneSwitches on a single trajectory 120 | 121 | ### I-1. Binarize gene expression 122 | 123 | Since we focus on the genes that are either switched on or off, we first binarize the gene expression data into 1(on) or 0(off) state. To achieve this, for each gene, we fit a mixture model of two gaussian distributions to the input gene expression to calculates gene-specific thresholds for binarization. Prior to fitting, we add gaussian noise with zero mean and 0.1 standard deviation to the gene expression, which ensures numerical stability in the fitting of the gene expression. Genes that do not have a distinct bimodal “on-off” distribution are then removed. This step may take 2 minutes for 2000 cells using 3 cores. 124 | 125 | ```{r binarization1} 126 | ### binarize gene expression using gene-specific thresholds 127 | sce_p1 <- binarize_exp(sce_p1, ncores = 3) 128 | ``` 129 | 130 | Alternatively, we can use a global threshold for fast binarization. We plot a histogram of expression of all the genes in all cells and look for a break between the zero and expressed distributions to identify the global threshold. 131 | 132 | ```{r binarization_threshold, fig.height = 4, fig.width = 5, fig.align = "center"} 133 | ### check the threshold for binarization 134 | #h <- hist(assays(sce_p1)$expdata, breaks = 200, plot = FALSE) 135 | #{plot(h, freq = FALSE, xlim = c(0,2), ylim = c(0,1), main = "Histogram of gene expression", 136 | #xlab = "Gene expression", col = "darkgoldenrod2", border = "grey") 137 | #abline(v=0.2, col="blue")} 138 | 139 | ###In this example, we choose 0.2 (blue line, also set as default) as the threshold. 140 | # bn_cutoff <- 0.2 141 | # sce_p1 <- binarize_exp(sce_p1, fix_cutoff = TRUE, binarize_cutoff = bn_cutoff) 142 | ``` 143 | 144 | ### I-2. Fit logistic regression & estimate switching time 145 | 146 | Logistic regression is applied to model the binary states (on or off) of gene 147 | expression. Then the switching pseudo-time point is determined by the time at 148 | which the fitted line crosses the probability threshold 0.5. 149 | We use random downsampling of zero expressions (downsample = TRUE) to rescue 150 | the prediction of switching time for genes with high zero inflation. 151 | 152 | ```{r, message=FALSE} 153 | ## fit logistic regression and find the switching pseudo-time point for each gene 154 | ## with downsampling. This step takes less than 1 mins 155 | sce_p1 <- find_switch_logistic_fastglm(sce_p1, downsample = TRUE, show_warning = FALSE) 156 | ``` 157 | 158 | 159 | ### I-3. Visualize ordering of switching genes 160 | 161 | First, we filter poorly fitted genes based on zero-expression percentage (>90%), FDR (>0.05) and McFadden's Pseudo R^2 (<0.03). We can then the number of top best fitting (high McFadden's Pseudo R^2) genes to plot. One can also extract specific gene type(s) to plot, with provided gene type lists containing surface proteins (downloaded from [here](http://wlab.ethz.ch/cspa/)) and transcription factors (TFs, downloaded from [here](http://humantfs.ccbr.utoronto.ca/)). Users are allowed to pass their own gene type lists as a data frame to parameter genelists, with rows as genes (non-duplicated) and two columns with name `genenames` and `genetypes`. 162 | 163 | ```{r, message=FALSE} 164 | ## filter top 15 best fitting switching genes among all the genes 165 | sg_allgenes <- filter_switchgenes(sce_p1, allgenes = TRUE, topnum = 15) 166 | ## filter top 15 best fitting switching genes among surface proteins and TFs only 167 | sg_gtypes <- filter_switchgenes(sce_p1, allgenes = FALSE, topnum = 20, 168 | genelists = gs_genelists, genetype = c("Surface proteins", "TFs")) 169 | ## combine switching genes and remove duplicated genes from sg_allgenes 170 | sg_vis <- rbind(sg_gtypes, sg_allgenes[setdiff(rownames(sg_allgenes), rownames(sg_gtypes)),]) 171 | 172 | ``` 173 | 174 | Finally, plot the selected genes along the pseudo-timeline. Genes that are switched on are plotted above the line, while those switching off are below the line. 175 | 176 | ```{r timeline example, message=FALSE, fig.height = 4.2, fig.width = 6, fig.align = "center"} 177 | plot_timeline_ggplot(sg_vis, timedata = sce_p1$Pseudotime, txtsize = 3) 178 | ``` 179 | 180 | It is possible to use the dimensionality reduction provided from the user to visualise the gene expression and logistic regression fitting plots if needed. 181 | 182 | ```{r plotexp, fig.height = 2.7, fig.width = 3.5, fig.align = "center"} 183 | plot_gene_exp(sce_p1, gene = "VIM", reduction = "monocleRD", downsample = F) 184 | ``` 185 | 186 | 187 | ### I-4. Order pathways along the pseudo-timeline 188 | 189 | GeneSwitches can be used to order pathways or genesets as well. We include the pathways provided by MSigDB hallmark (Liberzon,A. et al., 2015), C2 curated and C5 gene ontology geneset collections. 190 | A Hypergeometric test is first applied to extract the pathways that are significantly overrepresented amongst those that are changing along the trajectory. The Switching time of the pathway is then determined by the median switching time of genes in that pathway. 191 | 192 | ```{r pathways} 193 | ## filter genes for pathway analysis using r^2 cutoff 0.1 194 | sg_pw <- filter_switchgenes(sce_p1, allgenes = TRUE, r2cutoff = 0.1) 195 | ## apply hypergeometric test and determine the switching time 196 | switch_pw <- find_switch_pathway(rowData(sce_p1), sig_FDR = 0.05, 197 | pathways = msigdb_h_c2_c5, sg_pw) 198 | ## remove redundant pathways 199 | switch_pw_reduce <- reduce_pathways(switch_pw, pathways = msigdb_h_c2_c5, 200 | redundant_pw_rate = 0.8) 201 | ``` 202 | 203 | To better visualise the functional changes ridge plots of pathways genes show the density of switching genes along the pseudo-time. Top 10 significantly changed pathways are plotted here, ordered by the switching time. 204 | 205 | ```{r pathways_ridge_plots, fig.height = 3, fig.width = 8, fig.align = "center"} 206 | plot_pathway_density(switch_pw_reduce[1:10,], sg_pw, orderbytime = TRUE) 207 | ``` 208 | 209 | We can also select specific pathway(s) to plot the switching genes in it. Among top 10 significantly changed pathways, we plot genes related to myogenesis and cardiac muscle tissue development. 210 | 211 | ```{r plot pathway genes, fig.height = 4.5, fig.width = 7, fig.align = "center"} 212 | sg_vis <- filter_switchgenes(sce_p1, topnum = 50, pathway_name = c("HALLMARK_MYOGENESIS", 213 | "GO_CARDIAC_MUSCLE_TISSUE_DEVELOPMENT")) 214 | plot_timeline_ggplot(sg_vis, timedata=sce_p1$Pseudotime, txtsize=3) 215 | ``` 216 | 217 | "Multiple" lables the genes in more than one pathways. 218 | 219 | 220 | ## PART II. Comparing switching genes from two trajectories 221 | 222 | Before comparison, we need to apply same steps in I-1 and I-2 on the cells from Path2 to identify switching pseudo-time point for each gene. 223 | 224 | ```{r for_path2} 225 | sce_p2 <- binarize_exp(sce_p2) 226 | sce_p2 <- find_switch_logistic_fastglm(sce_p2, downsample = TRUE, show_warnings = FALSE) 227 | ``` 228 | 229 | And we filter out poorly fitted genes for both paths using the same cutoff. 230 | 231 | ```{r filter out poorly fitted genes} 232 | sg_p1 <- filter_switchgenes(sce_p1, allgenes = TRUE, r2cutoff = 0.03) 233 | sg_p2 <- filter_switchgenes(sce_p2, allgenes = TRUE, r2cutoff = 0.03) 234 | ``` 235 | 236 | We then plot common switching genes between two paths to compare their ordering. 237 | 238 | ```{r plot common genes, fig.height = 4.5, fig.width = 7, fig.align = "center"} 239 | sg_com <- common_genes(sg_p1, sg_p2, r2cutoff = 0.4, 240 | path1name = "Definitive CM", path2name = "non-contractile") 241 | common_genes_plot(sg_com, timedata = sce_p1$Pseudotime) 242 | ``` 243 | 244 | More importantly, we can plot the distinct switching genes of the two paths. 245 | 246 | ```{r plot distinct genes, fig.height = 3.5, fig.width = 7, fig.align = "center"} 247 | sg_disgs <- distinct_genes(sg_p1, sg_p2, r2cutoff = 0.52, 248 | path1name = "Definitive CM", path2name = "non-contractile", 249 | path1time = sce_p1$Pseudotime, path2time = sce_p2$Pseudotime) 250 | plot_timeline_ggplot(sg_disgs, timedata = sce_p1$Pseudotime, color_by = "Paths", 251 | iffulltml = FALSE, txtsize = 3) 252 | ``` 253 | 254 | We can also scale the timelines to be the same length (default number of bins is 100) so that differences are based on percentage of the trajectory covered rather than pseudo-time. 255 | 256 | ```{r plot scale timeline, fig.height = 3.5, fig.width = 7, fig.align = "center"} 257 | sg_disgs_scale <- distinct_genes(sg_p1, sg_p2, r2cutoff = 0.52, 258 | path1name = "Definitive CM", path2name = "non-contractile", 259 | path1time = sce_p1$Pseudotime, path2time = sce_p2$Pseudotime, 260 | scale_timeline = T, bin = 100) 261 | # timedata need to be 1 to (number of bins) 262 | plot_timeline_ggplot(sg_disgs_scale, timedata = 1:100, color_by = "Paths", 263 | iffulltml = FALSE, txtsize = 3) 264 | ``` 265 | 266 | These two plots for distinct switching genes only show a range of pseudo-timeline in which there are switching events happening. This range is actually at the end of trajectories, while common genes are mostly at the early period (common gene plot). 267 | 268 | 269 | Similarly, we can check the gene expression plots for the two paths. 270 | 271 | ```{r plot_exp1, fig.height = 4, fig.width = 12, fig.align = "center"} 272 | gn <- "DCN" 273 | p <- plot_gene_exp(sce_p1, gene = gn, reduction = "monocleRD", 274 | downsample = FALSE, fitting = TRUE) 275 | p <- plot_gene_exp(sce_p2, gene = gn, reduction = "monocleRD", 276 | downsample = FALSE, fitting = TRUE) 277 | ``` 278 | 279 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # GeneSwitches 5 | 6 | 7 | 8 | 9 | 10 | The goal of GeneSwitches is to discover the order of gene-expression and 11 | functional events during cell state transitions at a single-cell 12 | resolution. It works on any single-cell trajectory or pseudo-time 13 | ordering of cells to discover the genes that act as on/off switches 14 | between cell states and importantly the ordering at which these switches 15 | take place. 16 | 17 | ## Installation 18 | 19 | ### Check and install required packages 20 | 21 | Users may use following codes to check and install all the required 22 | packages. 23 | 24 | ``` r 25 | list.of.packages <- c("SingleCellExperiment", "Biobase", "fastglm", "ggplot2", "monocle", 26 | "plyr", "RColorBrewer", "ggrepel", "ggridges", "gridExtra", "devtools", 27 | "mixtools") 28 | 29 | ## for package "fastglm", "ggplot2", "plyr", "RColorBrewer", "ggrepel", "ggridges", "gridExtra", "mixtools" 30 | new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] 31 | if(length(new.packages)) install.packages(new.packages) 32 | 33 | ## for package "SingleCellExperiment", "Biobase" 34 | if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") 35 | new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] 36 | if(length(new.packages)) BiocManager::install(new.packages) 37 | ``` 38 | 39 | ### Install GeneSwitches 40 | 41 | The source code of GeneSwitches can be installed from 42 | [GitHub](https://github.com/) with: 43 | 44 | ``` r 45 | devtools::install_github("SGDDNB/GeneSwitches") 46 | ``` 47 | 48 | ## Input datasets 49 | 50 | GeneSwitches requires two inputs, namely a gene expression matrix and 51 | corresponding pseudo-time ordering of each cell. We convert these input 52 | datasets in a `SingleCellExperiment` object (Lun and Risso 2017) and 53 | below you will find a full “start-to-finish” workflow to realise the 54 | potential of this analyis. Any additions, suggestions or comments are 55 | welcome in the github repository. 56 | 57 | ``` r 58 | ## load libraries 59 | library(GeneSwitches) 60 | library(SingleCellExperiment) 61 | ``` 62 | 63 | For this vignette, we will work with a published dataset of single-cell 64 | RNA-seq data from the differentiation of human embryonic stem cells 65 | (hESC) to cardiomyocytes (CM) (Friedman et al., 2018). For the purpose 66 | of demonstration, we have subset the original dataset into 3000 cells 67 | and run Monocle2 to generate trajectories. Both the Log-normalized gene 68 | expression and Monocle2 inferred trajectory results can be downloaded 69 | from here as 70 | [logexpdata.RData](http://files.ddnetbio.com/logexpdata.RData), and 71 | [cardiac\_monocle2.RData](http://files.ddnetbio.com/cardiac_monocle2.RData). 72 | This dataset was chosen in part because it shows a bifurcating cell fate 73 | of cardiac hESC differentiation that gives rise to definitive 74 | cardiomyocytes (Path1) or non-contractile cardiac derivatives (Path2) 75 | that allow for all aspect of GeneSwitches to be applied. 76 | 77 | ``` r 78 | ## Download example files to current directory 79 | get_example_inputData() 80 | ## Load input data log-normalized gene expression 81 | load("./logexpdata.RData") 82 | ## Load Monocle2 object with pseudo-time and dimensionality reduction 83 | load("./cardiac_monocle2.RData") 84 | ``` 85 | 86 | ### Direct input (NOT run) 87 | 88 | Users can input the gene expression (`logexpdata`; recommend for 89 | log-normalized expression), pseudo-time (`cell_pseudotime`) and 90 | dimensionality reductions (`rd_PCA`; optional and only for gene 91 | expression plots) into SingleCellExperiment object as 92 | follows. 93 | 94 | ``` r 95 | ### create SingleCellExperiment object with log-normalized single cell data 96 | #sce <- SingleCellExperiment(assays = List(expdata = logexpdata)) 97 | ### add pseudo-time information 98 | #colData(sce)$Pseudotime <- cell_pseudotime 99 | ### add dimensionality reductions, e.g. PCA, UMAP, tSNE 100 | #pca <- prcomp(t(assays(sce)$expdata), scale. = FALSE) 101 | #rd_PCA <- pca$x[,1:2] 102 | #reducedDims(sce) <- SimpleList(PCA = rd_PCA) 103 | ``` 104 | 105 | ### Convert from trajectory results 106 | 107 | Alternatively, GeneSwitches provides functions to convert Monocle2 or 108 | Slingshot results into SingleCellExperiment object directly. For 109 | Monocle2 trajectory, users need to indicate the states of the desired 110 | path, which can be checked by plotting the trajectory using Monocle2 111 | function `plot_cell_trajectory` or the following function. 112 | 113 | ``` r 114 | ## plot Monocle2 trajectory colored by State 115 | # monocle:::plot_cell_trajectory(cardiac_monocle2, color_by = "State") 116 | plot_monocle_State(cardiac_monocle2) 117 | ``` 118 | 119 | 120 | 121 | Based on the marker genes, the pseudo-time trajectory starts from State 122 | 3, which are hESC cells. Definitive CM cells are in State 1 and 123 | non-contractile cardiac derivatives are in State 5. Therefore, we focus 124 | on Path1 with cells in states 3, 2, 1 and Path2 with cells in states 3, 125 | 2, 5, and extract these two paths from Monocle2 126 | object. 127 | 128 | ``` r 129 | ## Input log-normalized gene expression, Monocle2 pseudo-time and dimensionality reduction 130 | ## Path1 containing cells in states 3,2,1 131 | sce_p1 <- convert_monocle2(monocle2_obj = cardiac_monocle2, 132 | states = c(3,2,1), expdata = logexpdata) 133 | ## Path2 containing cells in states 3,2,5 134 | sce_p2 <- convert_monocle2(monocle2_obj = cardiac_monocle2, 135 | states = c(3,2,5), expdata = logexpdata) 136 | ``` 137 | 138 | If we are only interested in the trajectory within a certain range of 139 | pseudotime, function `subset_pseudotime` can be used to subset the 140 | SingleCellExperiment object accordingly, followed by filtering out lowly 141 | expressed genes. 142 | 143 | ``` r 144 | ### Subset cells to pseudotime range from 10 to 25 145 | #sce_p1_subset <- subset_pseudotime(sce_p1, min_time = 10, max_time = 25, minexp = 0, mincells = 10) 146 | ``` 147 | 148 | In Part I, we will apply GeneSwitches on a single trajectory, Path1, to 149 | demonstrate the general workflow and functions. Comparison of 150 | GeneSwitches results from two trajectories (Path1 & 2) will be shown in 151 | Part II. 152 | 153 | ## PART I. GeneSwitches on a single trajectory 154 | 155 | ### I-1. Binarize gene expression 156 | 157 | Since we focus on the genes that are either switched on or off, we first 158 | binarize the gene expression data into 1(on) or 0(off) state. To achieve 159 | this, for each gene, we fit a mixture model of two gaussian 160 | distributions to the input gene expression to calculates gene-specific 161 | thresholds for binarization. Prior to fitting, we add gaussian noise 162 | with zero mean and 0.1 standard deviation to the gene expression, which 163 | ensures numerical stability in the fitting of the gene expression. Genes 164 | that do not have a distinct bimodal “on-off” distribution are then 165 | removed. This step may take 2 minutes for 2000 cells using 3 cores. 166 | 167 | ``` r 168 | ### binarize gene expression using gene-specific thresholds 169 | sce_p1 <- binarize_exp(sce_p1, ncores = 3) 170 | ``` 171 | 172 | Alternatively, we can use a global threshold for fast binarization. We 173 | plot a histogram of expression of all the genes in all cells and look 174 | for a break between the zero and expressed distributions to identify the 175 | global threshold. 176 | 177 | ``` r 178 | ### check the threshold for binarization 179 | #h <- hist(assays(sce_p1)$expdata, breaks = 200, plot = FALSE) 180 | #{plot(h, freq = FALSE, xlim = c(0,2), ylim = c(0,1), main = "Histogram of gene expression", 181 | #xlab = "Gene expression", col = "darkgoldenrod2", border = "grey") 182 | #abline(v=0.2, col="blue")} 183 | 184 | ###In this example, we choose 0.2 (blue line, also set as default) as the threshold. 185 | # bn_cutoff <- 0.2 186 | # sce_p1 <- binarize_exp(sce_p1, fix_cutoff = TRUE, binarize_cutoff = bn_cutoff) 187 | ``` 188 | 189 | ### I-2. Fit logistic regression & estimate switching time 190 | 191 | Logistic regression is applied to model the binary states (on or off) of 192 | gene expression. Then the switching pseudo-time point is determined by 193 | the time at which the fitted line crosses the probability threshold 0.5. 194 | We use random downsampling of zero expressions (downsample = TRUE) to 195 | rescue the prediction of switching time for genes with high zero 196 | inflation. 197 | 198 | ``` r 199 | ## fit logistic regression and find the switching pseudo-time point for each gene 200 | ## with downsampling. This step takes less than 1 mins 201 | sce_p1 <- find_switch_logistic_fastglm(sce_p1, downsample = TRUE, show_warning = FALSE) 202 | ``` 203 | 204 | ### I-3. Visualize ordering of switching genes 205 | 206 | First, we filter poorly fitted genes based on zero-expression percentage 207 | (\>90%), FDR (\>0.05) and McFadden’s Pseudo R^2 (\<0.03). We can then 208 | the number of top best fitting (high McFadden’s Pseudo R^2) genes to 209 | plot. One can also extract specific gene type(s) to plot, with provided 210 | gene type lists containing surface proteins (downloaded from 211 | [here](http://wlab.ethz.ch/cspa/)) and transcription factors (TFs, 212 | downloaded from [here](http://humantfs.ccbr.utoronto.ca/)). Users are 213 | allowed to pass their own gene type lists as a data frame to parameter 214 | genelists, with rows as genes (non-duplicated) and two columns with name 215 | `genenames` and `genetypes`. 216 | 217 | ``` r 218 | ## filter top 15 best fitting switching genes among all the genes 219 | sg_allgenes <- filter_switchgenes(sce_p1, allgenes = TRUE, topnum = 15) 220 | ## filter top 15 best fitting switching genes among surface proteins and TFs only 221 | sg_gtypes <- filter_switchgenes(sce_p1, allgenes = FALSE, topnum = 20, 222 | genelists = gs_genelists, genetype = c("Surface proteins", "TFs")) 223 | ## combine switching genes and remove duplicated genes from sg_allgenes 224 | sg_vis <- rbind(sg_gtypes, sg_allgenes[setdiff(rownames(sg_allgenes), rownames(sg_gtypes)),]) 225 | ``` 226 | 227 | Finally, plot the selected genes along the pseudo-timeline. Genes that 228 | are switched on are plotted above the line, while those switching off 229 | are below the 230 | line. 231 | 232 | ``` r 233 | plot_timeline_ggplot(sg_vis, timedata = sce_p1$Pseudotime, txtsize = 3) 234 | ``` 235 | 236 | 237 | 238 | It is possible to use the dimensionality reduction provided from the 239 | user to visualise the gene expression and logistic regression fitting 240 | plots if 241 | needed. 242 | 243 | ``` r 244 | plot_gene_exp(sce_p1, gene = "VIM", reduction = "monocleRD", downsample = F) 245 | ``` 246 | 247 | 248 | 249 | ### I-4. Order pathways along the pseudo-timeline 250 | 251 | GeneSwitches can be used to order pathways or genesets as well. We 252 | include the pathways provided by MSigDB hallmark (Liberzon,A. et al., 253 | 2015), C2 curated and C5 gene ontology geneset collections. A 254 | Hypergeometric test is first applied to extract the pathways that are 255 | significantly overrepresented amongst those that are changing along the 256 | trajectory. The Switching time of the pathway is then determined by the 257 | median switching time of genes in that pathway. 258 | 259 | ``` r 260 | ## filter genes for pathway analysis using r^2 cutoff 0.1 261 | sg_pw <- filter_switchgenes(sce_p1, allgenes = TRUE, r2cutoff = 0.1) 262 | ## apply hypergeometric test and determine the switching time 263 | switch_pw <- find_switch_pathway(rowData(sce_p1), sig_FDR = 0.05, 264 | pathways = msigdb_h_c2_c5, sg_pw) 265 | ## remove redundant pathways 266 | switch_pw_reduce <- reduce_pathways(switch_pw, pathways = msigdb_h_c2_c5, 267 | redundant_pw_rate = 0.8) 268 | ``` 269 | 270 | To better visualise the functional changes ridge plots of pathways genes 271 | show the density of switching genes along the pseudo-time. Top 10 272 | significantly changed pathways are plotted here, ordered by the 273 | switching time. 274 | 275 | ``` r 276 | plot_pathway_density(switch_pw_reduce[1:10,], sg_pw, orderbytime = TRUE) 277 | #> Picking joint bandwidth of 2.49 278 | ``` 279 | 280 | 281 | 282 | We can also select specific pathway(s) to plot the switching genes in 283 | it. Among top 10 significantly changed pathways, we plot genes related 284 | to myogenesis and cardiac muscle tissue 285 | development. 286 | 287 | ``` r 288 | sg_vis <- filter_switchgenes(sce_p1, topnum = 50, pathway_name = c("HALLMARK_MYOGENESIS", 289 | "GO_CARDIAC_MUSCLE_TISSUE_DEVELOPMENT")) 290 | plot_timeline_ggplot(sg_vis, timedata=sce_p1$Pseudotime, txtsize=3) 291 | ``` 292 | 293 | 294 | 295 | “Multiple” lables the genes in more than one pathways. 296 | 297 | ## PART II. Comparing switching genes from two trajectories 298 | 299 | Before comparison, we need to apply same steps in I-1 and I-2 on the 300 | cells from Path2 to identify switching pseudo-time point for each gene. 301 | 302 | ``` r 303 | sce_p2 <- binarize_exp(sce_p2) 304 | sce_p2 <- find_switch_logistic_fastglm(sce_p2, downsample = TRUE, show_warnings = FALSE) 305 | ``` 306 | 307 | And we filter out poorly fitted genes for both paths using the same 308 | cutoff. 309 | 310 | ``` r 311 | sg_p1 <- filter_switchgenes(sce_p1, allgenes = TRUE, r2cutoff = 0.03) 312 | sg_p2 <- filter_switchgenes(sce_p2, allgenes = TRUE, r2cutoff = 0.03) 313 | ``` 314 | 315 | We then plot common switching genes between two paths to compare their 316 | ordering. 317 | 318 | ``` r 319 | sg_com <- common_genes(sg_p1, sg_p2, r2cutoff = 0.4, 320 | path1name = "Definitive CM", path2name = "non-contractile") 321 | common_genes_plot(sg_com, timedata = sce_p1$Pseudotime) 322 | ``` 323 | 324 | 325 | 326 | More importantly, we can plot the distinct switching genes of the two 327 | paths. 328 | 329 | ``` r 330 | sg_disgs <- distinct_genes(sg_p1, sg_p2, r2cutoff = 0.52, 331 | path1name = "Definitive CM", path2name = "non-contractile", 332 | path1time = sce_p1$Pseudotime, path2time = sce_p2$Pseudotime) 333 | plot_timeline_ggplot(sg_disgs, timedata = sce_p1$Pseudotime, color_by = "Paths", 334 | iffulltml = FALSE, txtsize = 3) 335 | ``` 336 | 337 | 338 | 339 | We can also scale the timelines to be the same length (default number of 340 | bins is 100) so that differences are based on percentage of the 341 | trajectory covered rather than pseudo-time. 342 | 343 | ``` r 344 | sg_disgs_scale <- distinct_genes(sg_p1, sg_p2, r2cutoff = 0.52, 345 | path1name = "Definitive CM", path2name = "non-contractile", 346 | path1time = sce_p1$Pseudotime, path2time = sce_p2$Pseudotime, 347 | scale_timeline = T, bin = 100) 348 | # timedata need to be 1 to (number of bins) 349 | plot_timeline_ggplot(sg_disgs_scale, timedata = 1:100, color_by = "Paths", 350 | iffulltml = FALSE, txtsize = 3) 351 | ``` 352 | 353 | 354 | 355 | These two plots for distinct switching genes only show a range of 356 | pseudo-timeline in which there are switching events happening. This 357 | range is actually at the end of trajectories, while common genes are 358 | mostly at the early period (common gene plot). 359 | 360 | Similarly, we can check the gene expression plots for the two paths. 361 | 362 | ``` r 363 | gn <- "DCN" 364 | p <- plot_gene_exp(sce_p1, gene = gn, reduction = "monocleRD", 365 | downsample = FALSE, fitting = TRUE) 366 | #> Warning: glm.fit: algorithm did not converge 367 | ``` 368 | 369 | 370 | 371 | ``` r 372 | p <- plot_gene_exp(sce_p2, gene = gn, reduction = "monocleRD", 373 | downsample = FALSE, fitting = TRUE) 374 | ``` 375 | 376 | 377 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /data-raw/DATASET.R: -------------------------------------------------------------------------------- 1 | ## code to prepare `DATASET` dataset goes here 2 | 3 | ###pathways ---------------------------------- 4 | if (!requireNamespace("BiocManager", quietly = TRUE)) 5 | install.packages("BiocManager") 6 | BiocManager::install("fgsea") 7 | # BiocManager::install("fgsea", version = "3.8") 8 | library(fgsea) 9 | pathways1 <- gmtPathways("~/Documents/geneswitch/pathways/msigdb/h.all.v6.2.symbols.gmt") 10 | pathways2 <- gmtPathways("~/Documents/geneswitch/pathways/msigdb/c2.cp.kegg.v6.2.symbols.gmt") 11 | pathways3 <- gmtPathways("~/Documents/geneswitch/pathways/msigdb/c5.all.v6.2.symbols.gmt") 12 | msigdb_h_c2_c5 <- c(pathways1, pathways2, pathways3);length(pathways) 13 | # save(pathways, file = "../shiny/pathways.RData") 14 | usethis::use_data(msigdb_h_c2_c5, internal = FALSE) 15 | 16 | ###genelists ---------------------------------- 17 | load("~/Documents/general_scripts/genelists.RData") 18 | table(genelists$genetype) 19 | genelists <- genelists[genelists$genetype %in% c("TF", "surface"),];dim(genelists) 20 | genelists[genelists$genetype == "surface", ]$genetype <- "Surface protein" 21 | table(genelists$genetype) 22 | # usethis::use_data(genelists, internal = TRUE, overwrite = TRUE) 23 | genelists <- genelists[genelists$genetype == "TF",];dim(genelists) 24 | genelists$genetype <- "TFs" 25 | head(genelists) 26 | 27 | gl <- read.table("~/Documents/general_scripts/CellSurfaceAtlas.txt", header = TRUE) 28 | head(gl);class(gl);dim(gl) 29 | # length(setdiff(genelists[genelists$genetype == "Surface protein",]$genenames, gl$entrez_symbol)) 30 | # length(setdiff(gl$entrez_symbol, genelists[genelists$genetype == "Surface protein",]$genenames)) 31 | gl2 <- data.frame(genenames = unique(gl$entrez_symbol), genetype = "Surface proteins") 32 | rownames(gl2) <- gl2$genenames;dim(gl2) 33 | head(gl2) 34 | com <- intersect(rownames(gl2), rownames(genelists)) 35 | genelists[com,]$genetype <- "TF&Surface proteins" 36 | gl2 <- gl2[!rownames(gl2) %in% genelists$genenames,];dim(gl2) 37 | genelists <- rbind(gl2, genelists) 38 | head(genelists);table(genelists$genetype) 39 | gs_genelists <- genelists 40 | colnames(gs_genelists)[2] <- "genetypes" 41 | head(gs_genelists);table(gs_genelists$genetype) 42 | usethis::use_data(gs_genelists, internal = FALSE, overwrite = TRUE) 43 | 44 | genelists3 <- genelists 45 | save(genelists3, file = "~/Documents/general_scripts/genelists3(new).RData") 46 | 47 | gl <- read.table("~/Documents/general_scripts/ECM_proteins.txt", sep = "\t", header = TRUE) 48 | head(gl);class(gl);dim(gl) 49 | # length(setdiff(genelists[genelists$genetype == "Surface protein",]$genenames, gl$entrez_symbol)) 50 | # length(setdiff(gl$entrez_symbol, genelists[genelists$genetype == "Surface protein",]$genenames)) 51 | gl2 <- data.frame(genenames = unique(gl$entrez_symbol), genetype = "ECM") 52 | rownames(gl2) <- gl2$genenames;dim(gl2) 53 | head(gl2) 54 | com <- intersect(rownames(gl2), rownames(genelists)) 55 | genelists[com,] 56 | genelists <- genelists[!genelists$genenames %in% rownames(gl2),];dim(genelists) 57 | genelists <- rbind(gl2, genelists) 58 | head(genelists);table(genelists$genetype) 59 | 60 | gl <- read.table("~/Documents/general_scripts/CytoKineRegistry.txt", sep = "\t", header = TRUE) 61 | head(gl);class(gl);dim(gl) 62 | # length(setdiff(genelists[genelists$genetype == "Surface protein",]$genenames, gl$entrez_symbol)) 63 | # length(setdiff(gl$entrez_symbol, genelists[genelists$genetype == "Surface protein",]$genenames)) 64 | gl2 <- data.frame(genenames = unique(gl$EntrezGeneSymbol), genetype = "Cytokine") 65 | rownames(gl2) <- gl2$genenames;dim(gl2) 66 | head(gl2) 67 | com <- intersect(rownames(gl2), rownames(genelists));length(com) 68 | genelists[com,] 69 | genelists <- genelists[!genelists$genenames %in% rownames(gl2),];dim(genelists) 70 | genelists <- rbind(gl2, genelists) 71 | head(genelists);table(genelists$genetype) 72 | genelists2 <- genelists 73 | save(genelists2, file = "~/Documents/general_scripts/genelists2(new).RData") 74 | 75 | ###time monocle ---------------------------------- 76 | library(monocle) 77 | 78 | seu3obj <- readRDS("~/Documents/geneswitch/cardiomyocytes/cardiacSeurat.rds") 79 | allexpdata <- as.matrix(GetAssayData(object = seu3obj, slot = "counts"));dim(allexpdata) 80 | cellinfo <- seu3obj@meta.data;dim(cellinfo) 81 | all(rownames(cellinfo) == colnames(allexpdata)) 82 | 83 | #####subset cells 84 | cellinfo_sub <- cellinfo[sample(nrow(cellinfo), 3000),];dim(cellinfo_sub) 85 | table(cellinfo_sub$library) 86 | head(cellinfo_sub) 87 | expdata <- allexpdata[,rownames(cellinfo_sub)];dim(expdata) 88 | all(rownames(cellinfo_sub) == colnames(expdata)) 89 | 90 | genenames <- as.data.frame(rownames(expdata));head(genenames) 91 | colnames(genenames)[1] <- "gene_short_name" 92 | rownames(genenames) <- genenames$gene_short_name;head(genenames) 93 | cells <- cellinfo_sub 94 | all(rownames(cells) == colnames(expdata)) 95 | all(rownames(genenames) == rownames(expdata)) 96 | ################### 97 | 98 | pd <- new("AnnotatedDataFrame",data=cells) 99 | fd <- new("AnnotatedDataFrame",data=genenames) 100 | # First create a CellDataSet from the relative expression levels 101 | ORMM <- newCellDataSet(as.matrix(expdata), phenoData = pd, featureData =fd, 102 | lowerDetectionLimit = 1, expressionFamily = negbinomial.size()) 103 | 104 | ORMM <- estimateSizeFactors(ORMM) 105 | ORMM <- estimateDispersions(ORMM) 106 | ##Filtering low-quality cells 107 | ORMM<- detectGenes(ORMM, min_expr = 0.1) 108 | print(head(fData(ORMM))) 109 | expressed_genes <- row.names(subset(fData(ORMM), num_cells_expressed >=10)) 110 | length(expressed_genes) 111 | print(head(pData(ORMM)));dim(pData(ORMM)) 112 | 113 | ##good to look at the distribution of mRNA totals across the cells 114 | pData(ORMM)$Total_mRNAs <- Matrix::colSums(exprs(ORMM)) 115 | 116 | ORMM <- ORMM[,pData(ORMM)$Total_mRNAs < 1e6] 117 | 118 | upper_bound <- 10^(mean(log10(pData(ORMM)$Total_mRNAs)) + 119 | 2*sd(log10(pData(ORMM)$Total_mRNAs))) 120 | lower_bound <- 10^(mean(log10(pData(ORMM)$Total_mRNAs)) - 121 | 2*sd(log10(pData(ORMM)$Total_mRNAs))) 122 | pdf(paste(pid,"_1total_mRNAs_distribution.pdf",sep = "")) 123 | qplot(Total_mRNAs, data = pData(ORMM), color = Characteristics.inferred.lineage., geom = "density") + 124 | geom_vline(xintercept = lower_bound) + 125 | geom_vline(xintercept = upper_bound) 126 | dev.off() 127 | ##removed the few cells with either very low mRNA recovery or far more mRNA that the typical cell 128 | ORMM <- ORMM[,pData(ORMM)$Total_mRNAs > lower_bound & 129 | pData(ORMM)$Total_mRNAs < upper_bound] 130 | ORMM <- detectGenes(ORMM, min_expr = 0.1) 131 | # Log-transform each value in the expression matrix. 132 | L <- log(exprs(ORMM[expressed_genes,])) 133 | #install.packages("reshape2") 134 | library(reshape2) 135 | # Standardize each gene, so that they are all on the same scale, 136 | # Then melt the data with plyr so we can plot it easily 137 | melted_dens_df <- melt(Matrix::t(scale(Matrix::t(L)))) 138 | 139 | # Plot the distribution of the standardized gene expression values. 140 | pdf(paste(pid,"_2standardized_gene_exp.pdf",sep = "")) 141 | qplot(value, geom = "density", data = melted_dens_df) + 142 | stat_function(fun = dnorm, size = 0.5, color = 'red') + 143 | xlab("Standardized log(FPKM)") + 144 | ylab("Density") 145 | dev.off() 146 | 147 | #####Ordering based on genes that differ between clusters 148 | disp_table <- dispersionTable(ORMM) 149 | unsup_clustering_genes <- subset(disp_table, mean_expression >= 0.1) 150 | ORMM <- setOrderingFilter(ORMM, unsup_clustering_genes$gene_id) 151 | pdf(paste(pid,"_3ordering_genes.pdf",sep = "")) 152 | plot_ordering_genes(ORMM) 153 | dev.off() 154 | pdf(paste(pid,"_4pc_variance.pdf",sep = "")) 155 | plot_pc_variance_explained(ORMM, return_all = F) # norm_method='log' 156 | dev.off() 157 | ORMM <- reduceDimension(ORMM, max_components = 2, norm_method = 'log', num_dim = 5, 158 | residualModelFormulaStr = "~num_genes_expressed", 159 | reduction_method = 'tSNE', verbose = T) 160 | ORMM <- clusterCells(ORMM, verbose = T) 161 | 162 | ##Constructing Single Cell Trajectories 163 | diff_test_res <- differentialGeneTest(ORMM[expressed_genes,], 164 | fullModelFormulaStr = "~library", 165 | cores = 5, verbose = TRUE) 166 | ordering_genes <- row.names (subset(diff_test_res, qval < 0.01)) 167 | ORMM <- setOrderingFilter(ORMM, ordering_genes) 168 | pdf(paste(pid,"_5ordering_genes_fortree.pdf",sep = "")) 169 | plot_ordering_genes(ORMM) 170 | dev.off() 171 | ORMM <- reduceDimension(ORMM, max_components=2, method = 'DDRTree') 172 | ORMM <- orderCells(ORMM)#, num_paths=2) 173 | # save(ORMM, file = "/home/yiqun/Documents/geneswitch/cardiomyocytes/monocle2/ORMM(monocle_DDRTree).RData") 174 | expressed_genes <- row.names(subset(fData(ORMM), num_cells_expressed >=10)) 175 | ORMM_filtered <- ORMM[expressed_genes,] 176 | my_genes <- row.names(subset(fData(ORMM_filtered), 177 | gene_short_name %in% c("POU5F1","SOX17","EOMES","ISL1","TNNI1","MYL7","THY1"))) 178 | cds_subset <- ORMM_filtered[my_genes,] 179 | plot_cell_trajectory(ORMM, color_by = "Pseudotime", markers = "SOX17") 180 | 181 | 182 | cardiac_monocle2 <- ORMM 183 | cardiac_monocle2@reducedDimS[,1:5] 184 | cardiac_monocle2@reducedDimS[1,] <- -(cardiac_monocle2@reducedDimS[1,]) 185 | cardiac_monocle2@reducedDimK[1,] <- -(cardiac_monocle2@reducedDimK[1,]) 186 | head(pData(cardiac_monocle2)) 187 | # colnames(pData(cardiac_monocle2))[7] <- "Clusters" 188 | tiff("monocle_pseudotime.tiff", units="in", width=5, height=5, res=300) 189 | plot_cell_trajectory(cardiac_monocle2, color_by = "Pseudotime", cell_size = 1) 190 | dev.off() 191 | plot_cell_trajectory(cardiac_monocle2, color_by = "library", cell_size = 1) 192 | 193 | pdf(paste("monocle_","trajectory_cardiac",".pdf",sep = "")) 194 | plot_cell_trajectory(cardiac_monocle2, color_by = "library") 195 | plot_cell_trajectory(cardiac_monocle2, color_by = "RNA_snn_res.0.1") 196 | plot_cell_trajectory(cardiac_monocle2, color_by = "Pseudotime") 197 | # plot_genes_in_pseudotime(cds_subset, color_by = "library") 198 | dev.off() 199 | save(cardiac_monocle2, file = "cardiac_monocle2.RData") 200 | 201 | mcells <- pData(cardiac_monocle2);head(mcells) 202 | head(mcells);dim(mcells) 203 | 204 | seu3obj <- readRDS("~/Documents/geneswitch/cardiomyocytes/cardiacSeurat.rds") 205 | logexpdata <- as.matrix(GetAssayData(object = seu3obj, slot = "data")[,rownames(mcells)]);dim(logexpdata) 206 | all(rownames(mcells) == colnames(logexpdata)) 207 | logexpdata[1:5,1:5] 208 | save(logexpdata, file = "logexpdata.RData") 209 | 210 | # load("~/Documents/geneswitch/cardiomyocytes/monocle2/cardiac_monocle2.RData") 211 | # load("~/Documents/geneswitch/cardiomyocytes/monocle2/logexpdata.RData") 212 | # usethis::use_data(logexpdata, cardiac_monocle2, genelists, internal = TRUE, overwrite = TRUE) 213 | usethis::use_data(genelists, internal = TRUE, overwrite = TRUE) 214 | 215 | 216 | # You can install the released version of GeneSwitches from [CRAN](https://CRAN.R-project.org) with: 217 | 218 | #``` r 219 | #install.packages("GeneSwitches") 220 | #``` 221 | #You can install the development version from [GitHub](https://github.com/) with: 222 | # user system elapsed 223 | # 205.137 5.573 210.613 224 | # user system elapsed 225 | # 141.616 2.425 143.954 226 | # user system elapsed 227 | # 1956.396 242.979 2197.818 228 | -------------------------------------------------------------------------------- /data/gs_genelists.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/data/gs_genelists.rda -------------------------------------------------------------------------------- /data/msigdb_h_c2_c5.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/data/msigdb_h_c2_c5.rda -------------------------------------------------------------------------------- /man/binarize_exp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/binarization.R 3 | \name{binarize_exp} 4 | \alias{binarize_exp} 5 | \title{Binarize gene expression} 6 | \usage{ 7 | binarize_exp(sce, fix_cutoff = FALSE, binarize_cutoff = 0.2, 8 | ncores = 3) 9 | } 10 | \arguments{ 11 | \item{sce}{SingleCellExperiment} 12 | 13 | \item{fix_cutoff}{Logical. if use fixed global cutoff for binarization, default FALSE} 14 | 15 | \item{binarize_cutoff}{fixed global cutoff for binarization, default 0.2} 16 | 17 | \item{ncores}{number of cores} 18 | } 19 | \value{ 20 | 21 | } 22 | \description{ 23 | This function generates on/off binarized data for gene expression 24 | } 25 | -------------------------------------------------------------------------------- /man/common_genes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compare_trajectories.R 3 | \name{common_genes} 4 | \alias{common_genes} 5 | \title{Identify common switching genes between paths} 6 | \usage{ 7 | common_genes(toplotgl_Rsub1, toplotgl_Rsub2, path1name = "Path1Genes", 8 | path2name = "Path2Genes", r2cutoff = 0.05) 9 | } 10 | \arguments{ 11 | \item{toplotgl_Rsub1}{switching genes of path1} 12 | 13 | \item{toplotgl_Rsub2}{switching genes of path2} 14 | 15 | \item{path1name}{name of path1 given by user} 16 | 17 | \item{path2name}{name of path2 given by user} 18 | 19 | \item{r2cutoff}{pseudo R^2 cutoff} 20 | } 21 | \value{ 22 | 23 | } 24 | \description{ 25 | This function identifies common switching genes between two paths 26 | } 27 | -------------------------------------------------------------------------------- /man/common_genes_plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compare_trajectories.R 3 | \name{common_genes_plot} 4 | \alias{common_genes_plot} 5 | \title{Plot common switching genes between paths} 6 | \usage{ 7 | common_genes_plot(ggData, timedata) 8 | } 9 | \arguments{ 10 | \item{ggData}{data frame for common genes} 11 | 12 | \item{timedata}{timedata to show on plot} 13 | } 14 | \value{ 15 | 16 | } 17 | \description{ 18 | This function plots common switching genes between two paths 19 | } 20 | -------------------------------------------------------------------------------- /man/convert_monocle2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convert_TI.R 3 | \name{convert_monocle2} 4 | \alias{convert_monocle2} 5 | \title{Convert monocle2 output into GeneSwitches object} 6 | \usage{ 7 | convert_monocle2(monocle2_obj, states, expdata) 8 | } 9 | \arguments{ 10 | \item{monocle2_obj}{monocle2 output object} 11 | 12 | \item{states}{a vector of states (path) that are interested in} 13 | 14 | \item{logexpdata}{log-normal gene expression} 15 | } 16 | \value{ 17 | 18 | } 19 | \description{ 20 | This function converts monocle2 output into GeneSwitches object 21 | } 22 | -------------------------------------------------------------------------------- /man/convert_slingshot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convert_TI.R 3 | \name{convert_slingshot} 4 | \alias{convert_slingshot} 5 | \title{Convert slingshot output into GeneSwitches object} 6 | \usage{ 7 | convert_slingshot(sce_slingshot, pseudotime_idx, assayname = "expdata") 8 | } 9 | \arguments{ 10 | \item{sce_slingshot}{slingshot SingleCellExperiment output object} 11 | 12 | \item{pseudotime_idx}{name of desired pseudotime path to apply GeneSwitches} 13 | 14 | \item{assayname}{expression assay to use} 15 | } 16 | \value{ 17 | 18 | } 19 | \description{ 20 | This function converts slingshot output into GeneSwitches object 21 | } 22 | -------------------------------------------------------------------------------- /man/distinct_genes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compare_trajectories.R 3 | \name{distinct_genes} 4 | \alias{distinct_genes} 5 | \title{Identify distinct switching genes for each path} 6 | \usage{ 7 | distinct_genes(toplotgl_Rsub1, toplotgl_Rsub2, path1name = "Path1Genes", 8 | path2name = "Path2Genes", r2cutoff = 0.05, scale_timeline = FALSE, 9 | path1time = NULL, path2time = NULL, bin = 100) 10 | } 11 | \arguments{ 12 | \item{toplotgl_Rsub1}{switching genes of path1} 13 | 14 | \item{toplotgl_Rsub2}{switching genes of path2} 15 | 16 | \item{path1name}{name of path1 given by user} 17 | 18 | \item{path2name}{name of path2 given by user} 19 | 20 | \item{r2cutoff}{pseudo R^2 cutoff} 21 | } 22 | \value{ 23 | 24 | } 25 | \description{ 26 | This function identifies distinct switching genes for each path 27 | } 28 | -------------------------------------------------------------------------------- /man/downsample_zeros.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fit_logistic_regression.R 3 | \name{downsample_zeros} 4 | \alias{downsample_zeros} 5 | \title{Random downsampling of zero expression} 6 | \usage{ 7 | downsample_zeros(glmdata, ratio_ds = 0.7) 8 | } 9 | \arguments{ 10 | \item{glmdata}{binary data of one gene} 11 | 12 | \item{ratio_ds}{downsampling zeros to this proportion} 13 | } 14 | \value{ 15 | 16 | } 17 | \description{ 18 | This function does randomly downsampling of cells with zero expression for one gene 19 | } 20 | -------------------------------------------------------------------------------- /man/figures/README-binarization cutoff-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-binarization cutoff-1.png -------------------------------------------------------------------------------- /man/figures/README-binarization threshold-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-binarization threshold-1.png -------------------------------------------------------------------------------- /man/figures/README-binarization_threshold-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-binarization_threshold-1.png -------------------------------------------------------------------------------- /man/figures/README-convert slingshot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-convert slingshot-1.png -------------------------------------------------------------------------------- /man/figures/README-distinct genes-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-distinct genes-1.png -------------------------------------------------------------------------------- /man/figures/README-pathways ridge plots-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-pathways ridge plots-1.png -------------------------------------------------------------------------------- /man/figures/README-pathways-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-pathways-1.png -------------------------------------------------------------------------------- /man/figures/README-pathways_ridge_plots-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-pathways_ridge_plots-1.png -------------------------------------------------------------------------------- /man/figures/README-plot common genes-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot common genes-1.png -------------------------------------------------------------------------------- /man/figures/README-plot distinct genes-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot distinct genes-1.png -------------------------------------------------------------------------------- /man/figures/README-plot exp1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot exp1-1.png -------------------------------------------------------------------------------- /man/figures/README-plot exp1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot exp1-2.png -------------------------------------------------------------------------------- /man/figures/README-plot exp2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot exp2-1.png -------------------------------------------------------------------------------- /man/figures/README-plot pathway genes-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot pathway genes-1.png -------------------------------------------------------------------------------- /man/figures/README-plot pathways-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot pathways-1.png -------------------------------------------------------------------------------- /man/figures/README-plot scale timeline-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot scale timeline-1.png -------------------------------------------------------------------------------- /man/figures/README-plot_exp1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot_exp1-1.png -------------------------------------------------------------------------------- /man/figures/README-plot_exp1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot_exp1-2.png -------------------------------------------------------------------------------- /man/figures/README-plot_monocle_trajectory-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot_monocle_trajectory-1.png -------------------------------------------------------------------------------- /man/figures/README-plotexp-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plotexp-1.png -------------------------------------------------------------------------------- /man/figures/README-scale timeline-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-scale timeline-1.png -------------------------------------------------------------------------------- /man/figures/README-timeline example-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-timeline example-1.png -------------------------------------------------------------------------------- /man/figures/README-timeline example-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-timeline example-2.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /man/filter_switchgenes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_timeline.R 3 | \name{filter_switchgenes} 4 | \alias{filter_switchgenes} 5 | \title{Extract switching gene list of interesting} 6 | \usage{ 7 | filter_switchgenes(sce, allgenes = FALSE, pathway_name = NULL, 8 | genelists = GeneSwitches:::genelists, 9 | genetype = c("Surface proteins", "TFs"), zero_pct = 0.9, 10 | r2cutoff = 0.03, direction = c("up", "down"), topnum = 1e+05) 11 | } 12 | \arguments{ 13 | \item{sce}{switching genes} 14 | 15 | \item{allgenes}{if use all genes} 16 | 17 | \item{pathway_name}{a list of pathway name(s) to plot} 18 | 19 | \item{genelists}{a gene list to plot} 20 | 21 | \item{genetype}{specific gene type to plot c("EMT", "reprogramming", "stem", "surface", "TF")} 22 | 23 | \item{zero_pct}{zero-expression percentage cut off for significant genes} 24 | 25 | \item{r2cutoff}{pseudo R^2 cutoff} 26 | 27 | \item{direction}{switching direction, up or down} 28 | 29 | \item{topnum}{number of top genes ordered by pseudo R^2 value} 30 | } 31 | \value{ 32 | 33 | } 34 | \description{ 35 | This function extract a list of significant switching genes 36 | } 37 | -------------------------------------------------------------------------------- /man/find_switch_logistic_fastglm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fit_logistic_regression.R 3 | \name{find_switch_logistic_fastglm} 4 | \alias{find_switch_logistic_fastglm} 5 | \title{Fit fast logistic regression and find switching timepoint} 6 | \usage{ 7 | find_switch_logistic_fastglm(sce, downsample = FALSE, ds_cutoff = 0.7, 8 | zero_ratio = 0.7, sig_FDR = 0.05, show_warnings = TRUE) 9 | } 10 | \arguments{ 11 | \item{sce}{SingleCellExperiment} 12 | 13 | \item{downsample}{Logical. if do random downsampling of zeros} 14 | 15 | \item{ds_cutoff}{only do downsampling if zero percentage is over this cutoff} 16 | 17 | \item{zero_ratio}{downsampling zeros to this proportion} 18 | 19 | \item{sig_FDR}{FDR cut off for significant genes} 20 | } 21 | \value{ 22 | 23 | } 24 | \description{ 25 | This function fits fast logistic regression and find switching timepoint for each gene 26 | } 27 | -------------------------------------------------------------------------------- /man/find_switch_pathway.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/find_switch_pathway.R 3 | \name{find_switch_pathway} 4 | \alias{find_switch_pathway} 5 | \title{Find significantly changed pathways and switching timepoint} 6 | \usage{ 7 | find_switch_pathway(scerowdata, pathways = msigdb_h_c2_c5, toplotgl_sig, 8 | sig_FDR = 0.05) 9 | } 10 | \arguments{ 11 | \item{pathways}{a list of pathways with genes} 12 | 13 | \item{sig_FDR}{FDR cut off for significant pathways} 14 | 15 | \item{sce}{SingleCellExperiment} 16 | 17 | \item{toplotgl_ptw}{swiching genes to plot} 18 | } 19 | \value{ 20 | 21 | } 22 | \description{ 23 | This function finds significantly changed pathways and determine 24 | the switching timepoint for each pathway 25 | } 26 | -------------------------------------------------------------------------------- /man/get_example_inputData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_example_inputData.R 3 | \name{get_example_inputData} 4 | \alias{get_example_inputData} 5 | \title{Download example input data} 6 | \usage{ 7 | get_example_inputData() 8 | } 9 | \value{ 10 | 11 | } 12 | \description{ 13 | This function checks if example input files are in the current directory and if not download them 14 | } 15 | -------------------------------------------------------------------------------- /man/gs_genelists.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{gs_genelists} 5 | \alias{gs_genelists} 6 | \title{An example of gene lists} 7 | \format{A data frame with 3071 rows and 2 columns. rows are genes, and columns are "genenames" and "genetypes".} 8 | \source{ 9 | \url{http://wlab.ethz.ch/cspa/, http://humantfs.ccbr.utoronto.ca/} 10 | } 11 | \usage{ 12 | gs_genelists 13 | } 14 | \description{ 15 | Provided gene lists with surface proteins and transctription factors 16 | } 17 | \keyword{datasets} 18 | -------------------------------------------------------------------------------- /man/merge_pathways.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/find_switch_pathway.R 3 | \name{merge_pathways} 4 | \alias{merge_pathways} 5 | \title{Merge redundant pathways} 6 | \usage{ 7 | merge_pathways(switch_pw, pathways, ratio) 8 | } 9 | \arguments{ 10 | \item{switch_pw}{a data frame with significantly changed pathways} 11 | 12 | \item{pathways}{a list of pathways with genes} 13 | 14 | \item{ratio}{cutoff ratio for merging redundant pathways} 15 | } 16 | \value{ 17 | 18 | } 19 | \description{ 20 | Merge significant pathways that are with same genes over certain ratio 21 | } 22 | -------------------------------------------------------------------------------- /man/msigdb_h_c2_c5.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{msigdb_h_c2_c5} 5 | \alias{msigdb_h_c2_c5} 6 | \title{An example of gene sets} 7 | \format{A list with 6153 pathways, and genes in each pathway} 8 | \source{ 9 | \url{http://software.broadinstitute.org/gsea/msigdb/collections.jsp} 10 | } 11 | \usage{ 12 | msigdb_h_c2_c5 13 | } 14 | \description{ 15 | Pathways of GSEA MSigDB hallmark, c2 and c5 gene sets 16 | } 17 | \keyword{datasets} 18 | -------------------------------------------------------------------------------- /man/phyper_pathway.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/find_switch_pathway.R 3 | \name{phyper_pathway} 4 | \alias{phyper_pathway} 5 | \title{Find significantly changed pathways and switching timepoint} 6 | \usage{ 7 | phyper_pathway(N, pathways, toplotgl_ptw, sig_FDR = 0.05, 8 | direction = c("up", "down")) 9 | } 10 | \arguments{ 11 | \item{N}{expressed genes in pathways} 12 | 13 | \item{pathways}{a list of pathways with genes} 14 | 15 | \item{toplotgl_ptw}{swiching genes to plot} 16 | 17 | \item{sig_FDR}{FDR cut off for significant pathways} 18 | 19 | \item{direction}{switching direction, up or down} 20 | } 21 | \value{ 22 | 23 | } 24 | \description{ 25 | Apply hypergeometric test to determine significantly changed 26 | pathways and determine the switching timepoint for each pathway 27 | } 28 | -------------------------------------------------------------------------------- /man/plot_gene_exp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_gene_exp.R 3 | \name{plot_gene_exp} 4 | \alias{plot_gene_exp} 5 | \title{Plot gene expression} 6 | \usage{ 7 | plot_gene_exp(sce, geneofi, reduction, downsample = FALSE, 8 | ds_cutoff = 0.7, zero_ratio = 0.7, ptsize = 0.7, fitting = FALSE) 9 | } 10 | \arguments{ 11 | \item{sce}{SingleCellExperiment} 12 | 13 | \item{geneofi}{one gene of interest} 14 | 15 | \item{reduction}{dimensional reduction method} 16 | 17 | \item{downsample}{if do random downsampling of zeros} 18 | 19 | \item{ds_cutoff}{only do downsampling if zero percentage is over this cutoff} 20 | 21 | \item{zero_ratio}{downsampling zeros to this proportion} 22 | 23 | \item{ptsize}{point size} 24 | 25 | \item{fitting}{if plot logistic regression fitting} 26 | } 27 | \value{ 28 | 29 | } 30 | \description{ 31 | This function plot gene expression on two-dimensional space 32 | } 33 | -------------------------------------------------------------------------------- /man/plot_monocle_State.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convert_TI.R 3 | \name{plot_monocle_State} 4 | \alias{plot_monocle_State} 5 | \title{plot monocle2 trajectory colored by State} 6 | \usage{ 7 | plot_monocle_State(monocle2_obj) 8 | } 9 | \arguments{ 10 | \item{monocle2_obj}{monocle2 output object} 11 | } 12 | \value{ 13 | 14 | } 15 | \description{ 16 | This function plots monocle2 trajectory with "State" colors 17 | } 18 | -------------------------------------------------------------------------------- /man/plot_pathway_density.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/find_switch_pathway.R 3 | \name{plot_pathway_density} 4 | \alias{plot_pathway_density} 5 | \title{Pathways ridge plot} 6 | \usage{ 7 | plot_pathway_density(switch_pw_re, toplotgl_sig, pw_direction = c("up", 8 | "down"), orderbytime = TRUE) 9 | } 10 | \arguments{ 11 | \item{switch_pw_re}{significant pathways to plot} 12 | 13 | \item{toplotgl_sig}{switching genes} 14 | 15 | \item{orderbytime}{order the pathways by switching time (mean time of switching genes) if TRUE, 16 | order the pathways by FDR if FALSE} 17 | 18 | \item{direction}{switching direction of the pathway, up or down} 19 | } 20 | \value{ 21 | 22 | } 23 | \description{ 24 | This function generates pathways ridge plots 25 | } 26 | -------------------------------------------------------------------------------- /man/plot_timeline_ggplot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_timeline.R 3 | \name{plot_timeline_ggplot} 4 | \alias{plot_timeline_ggplot} 5 | \title{Plot switching genes} 6 | \usage{ 7 | plot_timeline_ggplot(tml, timedata, iffulltml = TRUE, txtsize = 3.5, 8 | color_by = "feature_type") 9 | } 10 | \arguments{ 11 | \item{tml}{switching genes} 12 | 13 | \item{timedata}{pseudotime for cells} 14 | 15 | \item{iffulltml}{if plot the full timeline} 16 | 17 | \item{txtsize}{text size for gene names} 18 | 19 | \item{color_by}{the cell attribute (e.g. the column of tml) to map to each cell's color} 20 | } 21 | \value{ 22 | 23 | } 24 | \description{ 25 | This function plots switching genes on the pseudo-timeline 26 | } 27 | -------------------------------------------------------------------------------- /man/reduce_pathways.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/find_switch_pathway.R 3 | \name{reduce_pathways} 4 | \alias{reduce_pathways} 5 | \title{Reduce redundant pathways} 6 | \usage{ 7 | reduce_pathways(switch_pw, pathways, redundant_pw_rate = 0.8) 8 | } 9 | \arguments{ 10 | \item{switch_pw}{a data frame with significantly changed pathways} 11 | 12 | \item{pathways}{a list of pathways with genes} 13 | 14 | \item{redundant_pw_rate}{cutoff ratio for merging redundant pathways} 15 | } 16 | \value{ 17 | 18 | } 19 | \description{ 20 | Reduce significant pathways that are with same genes over certain ratio, 21 | do up- and down- regulated pathways separately 22 | } 23 | -------------------------------------------------------------------------------- /man/subset_pseudotime.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convert_TI.R 3 | \name{subset_pseudotime} 4 | \alias{subset_pseudotime} 5 | \title{Subset GeneSwitches object based on the range of pseudotime} 6 | \usage{ 7 | subset_pseudotime(sce, min_time, max_time, assayname = "expdata", 8 | minexp = 0, mincells = 10) 9 | } 10 | \arguments{ 11 | \item{sce}{GeneSwitches object which is a SingleCellExperiment object} 12 | 13 | \item{min_time}{lower bound of pseudotime} 14 | 15 | \item{max_time}{upper bound of pseudotime} 16 | 17 | \item{assayname}{expression assay to use} 18 | 19 | \item{minexp}{minimun expression to filer genes} 20 | 21 | \item{mincells}{minimun cells with expression} 22 | } 23 | \value{ 24 | 25 | } 26 | \description{ 27 | This function subsets GeneSwitches object based on the range of pseudotime 28 | } 29 | --------------------------------------------------------------------------------