├── CNAME
├── DESCRIPTION
├── GeneSwitches.Rproj
├── NAMESPACE
├── R
    ├── binarization.R
    ├── compare_trajectories.R
    ├── convert_TI.R
    ├── data.R
    ├── find_switch_pathway.R
    ├── fit_logistic_regression.R
    ├── get_example_inputData.R
    ├── plot_gene_exp.R
    └── plot_timeline.R
├── README.Rmd
├── README.md
├── _config.yml
├── data-raw
    └── DATASET.R
├── data
    ├── gs_genelists.rda
    └── msigdb_h_c2_c5.rda
└── man
    ├── binarize_exp.Rd
    ├── common_genes.Rd
    ├── common_genes_plot.Rd
    ├── convert_monocle2.Rd
    ├── convert_slingshot.Rd
    ├── distinct_genes.Rd
    ├── downsample_zeros.Rd
    ├── figures
        ├── README-binarization cutoff-1.png
        ├── README-binarization threshold-1.png
        ├── README-binarization_threshold-1.png
        ├── README-convert slingshot-1.png
        ├── README-distinct genes-1.png
        ├── README-pathways ridge plots-1.png
        ├── README-pathways-1.png
        ├── README-pathways_ridge_plots-1.png
        ├── README-plot common genes-1.png
        ├── README-plot distinct genes-1.png
        ├── README-plot exp1-1.png
        ├── README-plot exp1-2.png
        ├── README-plot exp2-1.png
        ├── README-plot pathway genes-1.png
        ├── README-plot pathways-1.png
        ├── README-plot scale timeline-1.png
        ├── README-plot_exp1-1.png
        ├── README-plot_exp1-2.png
        ├── README-plot_monocle_trajectory-1.png
        ├── README-plotexp-1.png
        ├── README-scale timeline-1.png
        ├── README-timeline example-1.png
        ├── README-timeline example-2.png
        └── README-unnamed-chunk-4-1.png
    ├── filter_switchgenes.Rd
    ├── find_switch_logistic_fastglm.Rd
    ├── find_switch_pathway.Rd
    ├── get_example_inputData.Rd
    ├── gs_genelists.Rd
    ├── merge_pathways.Rd
    ├── msigdb_h_c2_c5.Rd
    ├── phyper_pathway.Rd
    ├── plot_gene_exp.Rd
    ├── plot_monocle_State.Rd
    ├── plot_pathway_density.Rd
    ├── plot_timeline_ggplot.Rd
    ├── reduce_pathways.Rd
    └── subset_pseudotime.Rd


/CNAME:
--------------------------------------------------------------------------------
1 | geneswitches.ddnetbio.com


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: GeneSwitches
 2 | Type: Package
 3 | Title: Discover the order of gene regulatory events during cell state transitions
 4 | Version: 0.1.0
 5 | Author: Elaine Yiqun CAO
 6 | Maintainer: Elaine Yiqun CAO <yiqun.cao@u.nus.edu.sg>
 7 | Description: This package discovers the order of gene regulatory events during
 8 |     cell state transitions at single-cell resolution. It works on any single-cell
 9 |     trajectory or pseudo-time ordering of cells to discover the genes that act as 
10 |     on/off switches between cell states and importantly the ordering at which these
11 |     switches take place.
12 | License: What license is it under?
13 | Encoding: UTF-8
14 | LazyData: true
15 | Depends: R (>= 3.2)
16 | Imports:
17 |     SingleCellExperiment (>= 1.6.0), 
18 |     ggplot2 (>= 3.1.0),
19 |     RColorBrewer,
20 |     plyr,
21 |     mixtools
22 | RoxygenNote: 6.1.1.9000
23 | Suggests: 
24 |     testthat
25 | 


--------------------------------------------------------------------------------
/GeneSwitches.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageCheckArgs: --as-cran
22 | PackageRoxygenize: rd,collate,namespace,vignette
23 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(binarize_exp)
 4 | export(common_genes)
 5 | export(common_genes_plot)
 6 | export(convert_monocle2)
 7 | export(convert_slingshot)
 8 | export(distinct_genes)
 9 | export(filter_switchgenes)
10 | export(find_switch_logistic_fastglm)
11 | export(find_switch_pathway)
12 | export(get_example_inputData)
13 | export(plot_gene_exp)
14 | export(plot_monocle_State)
15 | export(plot_pathway_density)
16 | export(plot_timeline_ggplot)
17 | export(reduce_pathways)
18 | export(subset_pseudotime)
19 | import(Biobase)
20 | import(fastglm)
21 | import(ggplot2)
22 | import(parallel)
23 | import(plyr)
24 | importFrom(RColorBrewer,brewer.pal)
25 | importFrom(ggrepel,geom_text_repel)
26 | importFrom(ggridges,geom_density_ridges)
27 | importFrom(gridExtra,grid.arrange)
28 | importFrom(mixtools,normalmixEM)
29 | 


--------------------------------------------------------------------------------
/R/binarization.R:
--------------------------------------------------------------------------------
  1 | 
  2 | .onAttach <- function(libname, pkgname) {
  3 |   packageStartupMessage("Welcome to GeneSwitches!")
  4 | }
  5 | 
  6 | #' @title Binarize gene expression
  7 | #'
  8 | #' @description This function generates on/off binarized data for gene expression
  9 | #'
 10 | #' @param sce SingleCellExperiment
 11 | #' @param fix_cutoff Logical. if use fixed global cutoff for binarization, default FALSE
 12 | #' @param binarize_cutoff fixed global cutoff for binarization, default 0.2
 13 | #' @param ncores number of cores
 14 | #' @return
 15 | #'
 16 | #' @import parallel
 17 | #' @importFrom mixtools normalmixEM
 18 | #' @export
 19 | #'
 20 | binarize_exp <- function(sce, fix_cutoff = FALSE, binarize_cutoff = 0.2, ncores = 3) {
 21 |   # calculate zero percentage
 22 |   zerop_g <- c()
 23 |   expdata <- assays(sce)$expdata
 24 |   for (i in 1:nrow(expdata)) {
 25 |     zp <- length(which(expdata[i, ] == 0))/ncol(expdata)
 26 |     zerop_g <- c(zerop_g, zp)
 27 |   }
 28 | 
 29 |   if (fix_cutoff == TRUE) {
 30 |     expdata <- assays(sce)$expdata
 31 |     is.na(expdata) <- assays(sce)$expdata == 0
 32 |     exp_reduced_binary <- as.matrix((expdata > binarize_cutoff) + 0)
 33 |     exp_reduced_binary[is.na(exp_reduced_binary)] = 0
 34 |     assays(sce)$binary <- exp_reduced_binary
 35 |     oupBinary <- data.frame(geneID = rownames(sce),
 36 |                             zerop_gene = zerop_g,
 37 |                             passBinary = TRUE)
 38 |     rowData(sce) <- oupBinary
 39 |   } else {
 40 |     expdata <- assays(sce)$expdata
 41 |     # Add gaussian noise to gene expression matrix
 42 |     # Here we use a sd of 0.1
 43 |     LogCountsadd = expdata + matrix(rnorm(nrow(expdata)*ncol(expdata),
 44 |                                           mean = 0, sd = 0.1),
 45 |                                     nrow(expdata), ncol(expdata))
 46 |     # Start fitting mixture models for each gene
 47 |     oupBinary = do.call(
 48 |       rbind, mclapply(rownames(LogCountsadd), function(iGene){
 49 |         set.seed(42)   # Set seed for consistency
 50 |         tmpMix = normalmixEM(LogCountsadd[iGene, ], k = 2)
 51 |         if (tmpMix$mu[1] < tmpMix$mu[2]) {
 52 |           tmpOup = data.frame(geneID = iGene,
 53 |                               mu1 = tmpMix$mu[1],
 54 |                               mu2 = tmpMix$mu[2],
 55 |                               sigma1 = tmpMix$sigma[1],
 56 |                               sigma2 = tmpMix$sigma[2],
 57 |                               lambda1 = tmpMix$lambda[1],
 58 |                               lambda2 = tmpMix$lambda[2],
 59 |                               loglik = tmpMix$loglik)
 60 |         } else {
 61 |           tmpOup = data.frame(geneID = iGene,
 62 |                               mu1 = tmpMix$mu[2],
 63 |                               mu2 = tmpMix$mu[1],
 64 |                               sigma1 = tmpMix$sigma[2],
 65 |                               sigma2 = tmpMix$sigma[1],
 66 |                               lambda1 = tmpMix$lambda[2],
 67 |                               lambda2 = tmpMix$lambda[1],
 68 |                               loglik = tmpMix$loglik)
 69 |         }
 70 |         return(tmpOup)
 71 |       }, mc.cores = ncores))
 72 | 
 73 |     # Check if non-bimodal genes
 74 |     oupBinary$passBinary = TRUE
 75 |     oupBinary[oupBinary$lambda1 < 0.1, ]$passBinary = FALSE
 76 |     oupBinary[oupBinary$lambda2 < 0.1, ]$passBinary = FALSE
 77 |     oupBinary[(oupBinary$mu2 - oupBinary$mu1) < (oupBinary$sigma1 + oupBinary$sigma2), ]$passBinary = FALSE
 78 |     # table(oupBinary$passBinary)
 79 | 
 80 |     # Solve for intersection for remaining genes
 81 |     oupBinary$root = -1
 82 |     for(iGene in oupBinary[oupBinary$passBinary == TRUE, ]$geneID){
 83 |       tmpMix = oupBinary[oupBinary$geneID == iGene, ]
 84 |       tmpInt = uniroot(function(x, l1, l2, mu1, mu2, sd1, sd2) {
 85 |         dnorm(x, m = mu1, sd = sd1) * l1 -
 86 |           dnorm(x, m = mu2, sd = sd2) * l2},
 87 |         interval = c(tmpMix$mu1,tmpMix$mu2),
 88 |         l1 = tmpMix$lambda1, mu1 = tmpMix$mu1, sd1 = tmpMix$sigma1,
 89 |         l2 = tmpMix$lambda2, mu2 = tmpMix$mu2, sd2 = tmpMix$sigma2)
 90 |       oupBinary[oupBinary$geneID == iGene, ]$root = tmpInt$root
 91 |     }
 92 |     # Binarize expression
 93 |     binLogCounts = expdata[oupBinary$geneID,]
 94 |     binLogCounts = t(scale(t(binLogCounts), scale = FALSE,
 95 |                            center = oupBinary$root))
 96 |     binLogCounts[binLogCounts >= 0] = 1
 97 |     binLogCounts[binLogCounts < 0] = 0
 98 |     assays(sce)$binary <- binLogCounts
 99 | 
100 |     oupBinary$zerop_gene <- zerop_g
101 |     rowData(sce) <- oupBinary
102 |   }
103 |   return(sce)
104 | }
105 | 


--------------------------------------------------------------------------------
/R/compare_trajectories.R:
--------------------------------------------------------------------------------
  1 | #' @title Identify distinct switching genes for each path
  2 | #'
  3 | #' @description This function identifies distinct switching genes for each path
  4 | #'
  5 | #' @param toplotgl_Rsub1 switching genes of path1
  6 | #' @param toplotgl_Rsub2 switching genes of path2
  7 | #' @param path1name name of path1 given by user
  8 | #' @param path2name name of path2 given by user
  9 | #' @param r2cutoff pseudo R^2 cutoff
 10 | #' @return
 11 | #'
 12 | #' @export
 13 | #'
 14 | distinct_genes <- function(toplotgl_Rsub1, toplotgl_Rsub2, path1name = "Path1Genes", path2name = "Path2Genes",
 15 |                            r2cutoff = 0.05, scale_timeline = FALSE, path1time = NULL, path2time = NULL, bin = 100){
 16 |   toplotgl_Rsub1$genenames <- rownames(toplotgl_Rsub1)
 17 |   toplotgl_Rsub2$genenames <- rownames(toplotgl_Rsub2)
 18 | 
 19 |   gl1 <- toplotgl_Rsub1$genenames
 20 |   gl2 <- toplotgl_Rsub2$genenames
 21 |   glin1 <- setdiff(gl1, gl2)
 22 |   glin2 <- setdiff(gl2, gl1)
 23 |   gs_p1 <- toplotgl_Rsub1[glin1,]
 24 |   gs_p2 <- toplotgl_Rsub2[glin2,]
 25 |   gs_p1$Paths <- path1name
 26 |   gs_p2$Paths <- path2name
 27 |   if (scale_timeline == TRUE) {
 28 |     steptime1 <- (max(path1time) - min(path1time))/bin
 29 |     gs_p1$switch_at_time <- round((gs_p1$switch_at_time - min(path1time))/steptime1)
 30 |     steptime2 <- (max(path2time) - min(path2time))/bin
 31 |     gs_p2$switch_at_time <- round((gs_p2$switch_at_time - min(path2time))/steptime2)
 32 |   }
 33 |   # combine distinct gene into one dataframe
 34 |   toplotgl <- rbind(gs_p1[,c("geneID","zerop_gene","switch_at_time","pvalues","FDR","pseudoR2s",
 35 |                              "estimates","prd_quality","direction","switch_at_timeidx",
 36 |                              "genenames","Paths")],
 37 |                     gs_p2[,c("geneID","zerop_gene","switch_at_time","pvalues","FDR","pseudoR2s",
 38 |                              "estimates","prd_quality","direction","switch_at_timeidx",
 39 |                              "genenames","Paths")])
 40 |   toplotgl <- toplotgl[toplotgl$pseudoR2s > r2cutoff,]
 41 |   return(toplotgl)
 42 | }
 43 | 
 44 | #' @title Identify common switching genes between paths
 45 | #'
 46 | #' @description This function identifies common switching genes between two paths
 47 | #'
 48 | #' @param toplotgl_Rsub1 switching genes of path1
 49 | #' @param toplotgl_Rsub2 switching genes of path2
 50 | #' @param path1name name of path1 given by user
 51 | #' @param path2name name of path2 given by user
 52 | #' @param r2cutoff pseudo R^2 cutoff
 53 | #' @return
 54 | #'
 55 | #' @export
 56 | #'
 57 | common_genes <- function(toplotgl_Rsub1, toplotgl_Rsub2, path1name = "Path1Genes", path2name = "Path2Genes",
 58 |                          r2cutoff = 0.05){
 59 |   toplotgl_Rsub1 <- toplotgl_Rsub1[toplotgl_Rsub1$pseudoR2s > r2cutoff,]
 60 |   toplotgl_Rsub2 <- toplotgl_Rsub2[toplotgl_Rsub2$pseudoR2s > r2cutoff,]
 61 | 
 62 |   toplotgl_Rsub1$genenames <- rownames(toplotgl_Rsub1)
 63 |   toplotgl_Rsub2$genenames <- rownames(toplotgl_Rsub2)
 64 |   gl1 <- toplotgl_Rsub1$genenames
 65 |   gl2 <- toplotgl_Rsub2$genenames
 66 |   comgl <- intersect(gl1, gl2)
 67 | 
 68 |   if (all(toplotgl_Rsub1[comgl,]$direction != toplotgl_Rsub2[comgl,]$direction)){
 69 |     print("Directions are not consistent.")
 70 |     return(NULL)
 71 |   }
 72 | 
 73 |   toplotgl_Rsub1$genetype <- path1name
 74 |   toplotgl_Rsub2$genetype <- path2name
 75 |   ggData <- as.data.frame(rbind(toplotgl_Rsub1[comgl, c("geneID","zerop_gene","switch_at_time","pvalues","FDR","pseudoR2s",
 76 |                                                         "estimates","prd_quality","direction","switch_at_timeidx",
 77 |                                                         "genenames","genetype")],
 78 |                                 toplotgl_Rsub2[comgl, c("geneID","zerop_gene","switch_at_time","pvalues","FDR","pseudoR2s",
 79 |                                                         "estimates","prd_quality","direction","switch_at_timeidx",
 80 |                                                         "genenames","genetype")]))
 81 |   ggData$genetype <- factor(ggData$genetype, levels = c(path1name, path2name))
 82 |   return(ggData)
 83 | }
 84 | 
 85 | #' @title Plot common switching genes between paths
 86 | #'
 87 | #' @description This function plots common switching genes between two paths
 88 | #'
 89 | #' @param ggData data frame for common genes
 90 | #' @param timedata timedata to show on plot
 91 | #' @return
 92 | #'
 93 | #' @export
 94 | #'
 95 | common_genes_plot <- function(ggData, timedata){
 96 |   ggData$genetypenum <- as.numeric(ggData$genetype)
 97 | 
 98 |   comtml_plot <- ggplot(ggData, aes(switch_at_time, genetypenum, group = genenames, col=direction)) +
 99 |     geom_line() + geom_point() + ylim(0.5, 3.5)# + xlim(20,28)
100 | 
101 |   comtml_plot<-comtml_plot+theme_classic()
102 | 
103 |   # Plot horizontal black line for timeline
104 |   comtml_plot<-comtml_plot+geom_hline(yintercept=c(1,2),#as.integer(ggData$type),
105 |                                       color = "black", size=0.6)
106 | 
107 |   pseudotime_step <- (max(timedata) - min(timedata))/10
108 |   pseudotime_range <- seq(min(timedata), max(timedata), by=pseudotime_step)
109 |   pseudotime_df <- data.frame(pseudotime_range, pseudotime_format=round(pseudotime_range,1))
110 | 
111 |   pathposy <- c(1+0.07, 2-0.07)
112 |   pathposx <- max(pseudotime_range)-0.9
113 |   pathnames <- levels(ggData$genetype)
114 |   pathnames_df <- data.frame(pathposx, pathposy, pathnames)
115 |   comtml_plot <- comtml_plot + geom_text(data = pathnames_df, inherit.aes = FALSE, size=3.5,
116 |                                          aes(x=pathposx, y=pathposy, label=pathnames))
117 | 
118 |   ##text position
119 |   positions <- seq(0.05, 1.15, by=0.1)
120 |   tml <- as.data.frame(ggData[ggData$genetype == levels(ggData$genetype)[2],])
121 |   tml <- tml[with(tml, order(switch_at_time)), ]
122 |   tml_uni <- tml[!duplicated(tml$switch_at_time),]
123 | 
124 |   line_pos <- data.frame(
125 |     "switch_at_time"=tml_uni$switch_at_time,
126 |     "position"=rep(positions, length.out=length(tml_uni$switch_at_time))
127 |   )
128 | 
129 |   tml <- merge(x=tml, y=line_pos, by="switch_at_time", all = TRUE)
130 | 
131 |   text_offset <- 0.05
132 |   tml$switch_at_time_count <- ave(tml$switch_at_time==tml$switch_at_time, tml$switch_at_time, FUN=cumsum)
133 |   tml$text_position <- (tml$switch_at_time_count * text_offset) + tml$position
134 | 
135 |   # Plot vertical segment lines for milestones
136 |   comtml_plot<-comtml_plot+geom_segment(data=tml[tml$switch_at_time_count == 1,], aes(y=2+position,yend=2,xend=switch_at_time),
137 |                                         color='black', size=0.4)#size=tml$pseudoR2s)
138 | 
139 |   # Plot scatter points at zero and date
140 |   # tml_plot<-tml_plot+geom_point(aes(y=0), size=2)
141 | 
142 |   # Don't show axes, appropriately position legend
143 |   comtml_plot<-comtml_plot+theme(axis.line.y=element_blank(),
144 |                                  axis.text.y=element_blank(),
145 |                                  axis.title.x=element_blank(),
146 |                                  axis.title.y=element_blank(),
147 |                                  axis.ticks.y=element_blank(),
148 |                                  axis.text.x =element_blank(),
149 |                                  axis.ticks.x =element_blank(),
150 |                                  axis.line.x =element_blank(),
151 |                                  legend.position = "bottom", legend.key.size = unit(12, "pt"),
152 |                                  legend.text = element_text(size = 12),
153 |                                  text = element_text(size = 12, family = "Helvetica")) +
154 |     labs(fill = "Regulation", color = "Regulation") +
155 |     scale_color_manual(values=c("forestgreen", "chocolate2"))
156 | 
157 |   comtml_plot<-comtml_plot+geom_text(data=pseudotime_df, inherit.aes = FALSE,
158 |                                      aes(x=pseudotime_range, y=0.9, label=pseudotime_format), size=3.8, color='black')
159 | 
160 |   # Show text for each milestone
161 |   comtml_plot<-comtml_plot+geom_text(data=tml, inherit.aes = FALSE,
162 |                                      aes(x=switch_at_time,y=2+text_position,label=genenames),size=3)
163 | 
164 |   return(comtml_plot)
165 | }
166 | 
167 | 


--------------------------------------------------------------------------------
/R/convert_TI.R:
--------------------------------------------------------------------------------
  1 | #' @title plot monocle2 trajectory colored by State
  2 | #'
  3 | #' @description This function plots monocle2 trajectory with "State" colors
  4 | #'
  5 | #' @param monocle2_obj monocle2 output object
  6 | #' @import Biobase
  7 | #' @import ggplot2
  8 | #' @importFrom gridExtra grid.arrange
  9 | #' @return
 10 | #'
 11 | #' @export
 12 | #'
 13 | plot_monocle_State <- function(monocle2_obj){
 14 |   mcells <- pData(monocle2_obj)
 15 |   mcells$dim1 <- monocle2_obj@reducedDimS[1,]
 16 |   mcells$dim2 <- monocle2_obj@reducedDimS[2,]
 17 | 
 18 |   p1 <- ggplot(mcells, aes(dim1, dim2, color = State)) +
 19 |     ylab("Component 2") + xlab("Component 1") +
 20 |     geom_point(size = 1, alpha = 1.0) +
 21 |     scale_shape_manual(guide = FALSE, values = 16) +
 22 |     theme(text = element_text(size = 12, family = "Helvetica"),
 23 |           panel.background = element_rect(fill = "white", colour = NA),
 24 |           axis.line = element_line(colour = "black"),
 25 |           legend.key = element_rect(fill = NA),
 26 |           legend.key.width = unit(12, "pt"),
 27 |           legend.text = element_text(size = 10,colour = "black"),
 28 |           legend.title = element_text(size = 11, colour = "black"),
 29 |           legend.position = "top")
 30 | 
 31 |   p2 <- ggplot(mcells, aes(dim1, dim2, color = Pseudotime)) +
 32 |     ylab("Component 2") + xlab("Component 1") +
 33 |     geom_point(size = 1.7, alpha = 1.0) +
 34 |     scale_shape_manual(guide = FALSE, values = 16) +
 35 |     theme(text = element_text(size = 12, family = "Helvetica"),
 36 |           panel.background = element_rect(fill = "white", colour = NA),
 37 |           axis.line = element_line(colour = "black"),
 38 |           legend.key = element_rect(fill = NA),
 39 |           legend.key.width = unit(20, "pt"),
 40 |           legend.text = element_text(size = 10,colour = "black"),
 41 |           legend.title = element_text(size = 11, colour = "black"),
 42 |           legend.position = "top")
 43 |   return(grid.arrange(p1, p2, nrow = 1))
 44 | }
 45 | 
 46 | #' @title Convert monocle2 output into GeneSwitches object
 47 | #'
 48 | #' @description This function converts monocle2 output into GeneSwitches object
 49 | #'
 50 | #' @param monocle2_obj monocle2 output object
 51 | #' @param states a vector of states (path) that are interested in
 52 | #' @param logexpdata log-normal gene expression
 53 | #' @import Biobase
 54 | #' @return
 55 | #'
 56 | #' @export
 57 | #'
 58 | convert_monocle2 <- function(monocle2_obj, states, expdata){
 59 |   allcells <- pData(monocle2_obj)
 60 |   # extract cells and log-normal expression in certain path
 61 |   cells <- allcells[allcells$State %in% states,]
 62 |   expd <- expdata[,rownames(cells)]
 63 |   expd <- expd[apply(expd > 0,1,sum) >= 3,]
 64 |   # create GeneSwitches object
 65 |   sce <- SingleCellExperiment(assays = List(expdata = expd))
 66 |   # pass pseudotime info
 67 |   colData(sce)$Pseudotime <- cells$Pseudotime
 68 |   # pass reduced dims info
 69 |   rd <- t(monocle2_obj@reducedDimS)[rownames(cells),]
 70 |   colnames(rd) <- c("Component 1", "Component 2")
 71 |   reducedDims(sce) <- SimpleList(monocleRD=rd)
 72 | 
 73 |   return(sce)
 74 | }
 75 | 
 76 | 
 77 | #' @title Convert slingshot output into GeneSwitches object
 78 | #'
 79 | #' @description This function converts slingshot output into GeneSwitches object
 80 | #'
 81 | #' @param sce_slingshot slingshot SingleCellExperiment output object
 82 | #' @param pseudotime_idx name of desired pseudotime path to apply GeneSwitches
 83 | #' @param assayname expression assay to use
 84 | #' @return
 85 | #'
 86 | #' @export
 87 | #'
 88 | convert_slingshot <- function(sce_slingshot, pseudotime_idx, assayname = "expdata"){
 89 |   allcells <- as.data.frame(colData(sce_slingshot))
 90 |   # extract cells and log-normal expression in certain path
 91 |   cells <- allcells[!is.na(allcells[,pseudotime_idx]),]
 92 |   expd <- assays(sce_slingshot)[[assayname]][,rownames(cells)]
 93 |   expd <- expd[apply(expd > 0,1,sum) >= 3,]
 94 |   # create GeneSwitches object
 95 |   sce <- SingleCellExperiment(assays = List(expdata = expd))
 96 |   # pass pseudotime info
 97 |   colData(sce)$Pseudotime <- cells[,pseudotime_idx]
 98 |   # pass reduced dims info
 99 |   for (i in 1:length(reducedDims(sce_slingshot))) {
100 |     reducedDims(sce)[[i]] <- reducedDims(sce_slingshot)[[i]][rownames(cells),]
101 |   }
102 |   names(reducedDims(sce)) <- names(reducedDims(sce_slingshot))
103 | 
104 |   return(sce)
105 | }
106 | 
107 | 
108 | #' @title Subset GeneSwitches object based on the range of pseudotime
109 | #'
110 | #' @description This function subsets GeneSwitches object based on the range of pseudotime
111 | #'
112 | #' @param sce GeneSwitches object which is a SingleCellExperiment object
113 | #' @param min_time lower bound of pseudotime
114 | #' @param max_time upper bound of pseudotime
115 | #' @param assayname expression assay to use
116 | #' @param minexp minimun expression to filer genes
117 | #' @param mincells minimun cells with expression
118 | #' @return
119 | #'
120 | #' @export
121 | #'
122 | subset_pseudotime <- function(sce, min_time, max_time, assayname = "expdata", minexp = 0, mincells = 10){
123 |   sce_subset <- sce[,sce$Pseudotime >= min_time & sce$Pseudotime <= max_time]
124 |   # all(rownames(sce_p1_subset) == rownames(rowData(sce_p1_subset)))
125 |   sce_subset <- sce_subset[which(apply(assays(sce_subset)[[assayname]] > minexp, 1 ,sum) >= mincells), ]
126 |   return(sce_subset)
127 | }
128 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' An example of gene sets
 2 | #'
 3 | #' Pathways of GSEA MSigDB hallmark, c2 and c5 gene sets
 4 | #'
 5 | #' @format A list with 6153 pathways, and genes in each pathway
 6 | #'
 7 | #' @source \url{http://software.broadinstitute.org/gsea/msigdb/collections.jsp}
 8 | "msigdb_h_c2_c5"
 9 | 
10 | 
11 | #' An example of gene lists
12 | #'
13 | #' Provided gene lists with surface proteins and transctription factors
14 | #'
15 | #' @format A data frame with 3071 rows and 2 columns. rows are genes, and columns are "genenames" and "genetypes".
16 | #'
17 | #' @source \url{http://wlab.ethz.ch/cspa/, http://humantfs.ccbr.utoronto.ca/}
18 | "gs_genelists"
19 | 


--------------------------------------------------------------------------------
/R/find_switch_pathway.R:
--------------------------------------------------------------------------------
  1 | #' @title Find significantly changed pathways and switching timepoint
  2 | #'
  3 | #' @description Apply hypergeometric test to determine significantly changed
  4 | #'     pathways and determine the switching timepoint for each pathway
  5 | #'
  6 | #' @param N expressed genes in pathways
  7 | #' @param pathways a list of pathways with genes
  8 | #' @param toplotgl_ptw swiching genes to plot
  9 | #' @param sig_FDR FDR cut off for significant pathways
 10 | #' @param direction switching direction, up or down
 11 | #' @return
 12 | #'
 13 | phyper_pathway <- function(N, pathways, toplotgl_ptw, sig_FDR = 0.05, direction = c("up", "down")){
 14 |   pv <- c()
 15 |   swt_pw <- c()
 16 |   for (i in 1:length(pathways)) {
 17 |     gp <- pathways[[i]]
 18 |     genestoplot <- intersect(rownames(toplotgl_ptw), gp)
 19 |     m <- length(gp) ## Number of "marked" elements, i.e. genes associated to this biological process
 20 |     n <- N - m ## Number of "non-marked" elements, i.e. genes not associated to this biological process
 21 |     x <- length(genestoplot) ## Number of "marked" elements in the selection, i.e. genes of the group of interest that are associated to this biological process
 22 |     k <- nrow(toplotgl_ptw) ## Size of the selection, i.e. submitted genes with at least one annotation in GO biological processes
 23 |     pv <- c(pv, phyper(q=x-1, m=m, n=n, k=k, lower.tail = FALSE))
 24 | 
 25 |     toplotgl <- toplotgl_ptw[genestoplot,]
 26 |     swt_pw <- c(swt_pw, median(toplotgl$switch_at_time))
 27 |   }
 28 |   switch_pw <- data.frame(switch_at_time = swt_pw, pvalue = pv)
 29 |   rownames(switch_pw) <- names(pathways)
 30 |   switch_pw$FDR <- p.adjust(switch_pw$pvalue, method = "BH")
 31 | 
 32 |   switch_pw_sig <- switch_pw[switch_pw$FDR < sig_FDR,]
 33 |   if (nrow(switch_pw_sig) > 0) {
 34 |     switch_pw_sig$direction <- direction
 35 |   }
 36 |   return(switch_pw_sig)
 37 | }
 38 | 
 39 | #' @title Merge redundant pathways
 40 | #'
 41 | #' @description Merge significant pathways that are with same genes over certain ratio
 42 | #'
 43 | #' @param switch_pw a data frame with significantly changed pathways
 44 | #' @param pathways a list of pathways with genes
 45 | #' @param ratio cutoff ratio for merging redundant pathways
 46 | #' @return
 47 | #'
 48 | merge_pathways <- function(switch_pw, pathways, ratio){
 49 |   switch_pw <- switch_pw[order(switch_pw$FDR),]
 50 |   switch_pw$keep <- 1
 51 |   for (i in 1:(nrow(switch_pw)-1)) {
 52 |     if (switch_pw[i,]$keep == 1) {
 53 |       for (j in (i+1):nrow(switch_pw)) {
 54 |         pw1 <- pathways[[rownames(switch_pw)[i]]]
 55 |         pw2 <- pathways[[rownames(switch_pw)[j]]]
 56 |         overlapping <- intersect(pw1, pw2)
 57 |         if (length(overlapping)/length(pw2) > ratio) {
 58 |           switch_pw[j,]$keep <- 0
 59 |         }
 60 |       }
 61 |     }
 62 |   }
 63 |   switch_pw <- switch_pw[switch_pw$keep == 1,]
 64 |   return(subset(switch_pw, select = -c(keep)))
 65 | }
 66 | 
 67 | #' @title Reduce redundant pathways
 68 | #'
 69 | #' @description Reduce significant pathways that are with same genes over certain ratio,
 70 | #'     do up- and down- regulated pathways separately
 71 | #'
 72 | #' @param switch_pw a data frame with significantly changed pathways
 73 | #' @param pathways a list of pathways with genes
 74 | #' @param redundant_pw_rate cutoff ratio for merging redundant pathways
 75 | #' @export
 76 | #' @return
 77 | #'
 78 | reduce_pathways <- function(switch_pw, pathways, redundant_pw_rate = 0.8){
 79 |   switch_up <- switch_pw[switch_pw$direction == "up",]
 80 |   if (nrow(switch_up) > 1) {
 81 |     switch_up <- merge_pathways(switch_up, pathways, ratio = redundant_pw_rate)
 82 |   }
 83 |   switch_down <- switch_pw[switch_pw$direction == "down",]
 84 |   if (nrow(switch_down) > 1) {
 85 |     switch_down <- merge_pathways(switch_down, pathways, ratio = redundant_pw_rate)
 86 |   }
 87 |   # combine
 88 |   switch_pw_reduce <- rbind(switch_up, switch_down)
 89 |   switch_pw_reduce <- switch_pw_reduce[order(switch_pw_reduce$FDR),]
 90 |   return(switch_pw_reduce)
 91 | }
 92 | 
 93 | 
 94 | #' @title Find significantly changed pathways and switching timepoint
 95 | #'
 96 | #' @description This function finds significantly changed pathways and determine
 97 | #'     the switching timepoint for each pathway
 98 | #'
 99 | #' @param sce SingleCellExperiment
100 | #' @param pathways a list of pathways with genes
101 | #' @param toplotgl_ptw swiching genes to plot
102 | #' @param sig_FDR FDR cut off for significant pathways
103 | #' @return
104 | #'
105 | #' @export
106 | #'
107 | find_switch_pathway <- function(scerowdata, pathways = msigdb_h_c2_c5, toplotgl_sig,
108 |                                 sig_FDR = 0.05) {
109 |   # make all the genes from pathways into a vector
110 |   gps <- c()
111 |   for (i in 1:length(pathways)) {
112 |     gps <- c(gps, pathways[[i]])
113 |   }
114 |   genesINpathways <- unique(gps)
115 |   N <- length(intersect(rownames(scerowdata), genesINpathways))
116 | 
117 |   # up-regulated pathways
118 |   toplotgl_ptw <- toplotgl_sig[rownames(toplotgl_sig) %in% genesINpathways & toplotgl_sig$direction == "up",]
119 |   switch_pw_sigup <- phyper_pathway(N, pathways, toplotgl_ptw, sig_FDR = 0.05, direction = "up")
120 |   # down-regulated pathways
121 |   toplotgl_ptw <- toplotgl_sig[rownames(toplotgl_sig) %in% genesINpathways & toplotgl_sig$direction == "down",]
122 |   switch_pw_sigdown <- phyper_pathway(N, pathways, toplotgl_ptw, sig_FDR = 0.05, direction = "down")
123 |   # combine
124 |   switch_pw_sig <- rbind(switch_pw_sigup, switch_pw_sigdown)
125 |   if (nrow(switch_pw_sig) > 0) {
126 |     switch_pw_sig$feature_type <- "pathways"
127 |     switch_pw_sig$feature_name <- rownames(switch_pw_sig)
128 |   }
129 |   switch_pw_ord <- switch_pw_sig[order(switch_pw_sig$FDR),]
130 | 
131 |   return(switch_pw_ord)
132 | }
133 | 
134 | #' @title Pathways ridge plot
135 | #'
136 | #' @description This function generates pathways ridge plots
137 | #'
138 | #' @param switch_pw_re significant pathways to plot
139 | #' @param toplotgl_sig switching genes
140 | #' @param direction switching direction of the pathway, up or down
141 | #' @param orderbytime order the pathways by switching time (mean time of switching genes) if TRUE,
142 | #'                    order the pathways by FDR if FALSE
143 | #' @return
144 | #'
145 | #' @importFrom ggridges geom_density_ridges
146 | #' @export
147 | #'
148 | plot_pathway_density <- function(switch_pw_re, toplotgl_sig, pw_direction = c("up", "down"), orderbytime = TRUE){
149 |   switch_pw_re <- switch_pw_re[switch_pw_re$direction %in% pw_direction,]
150 |   if (orderbytime == TRUE) {
151 |     switch_pw_re <- switch_pw_re[order(switch_pw_re$switch_at_time, decreasing = TRUE),]
152 |   } else {
153 |     switch_pw_re <- switch_pw_re[order(switch_pw_re$FDR, decreasing = TRUE),]
154 |   }
155 |   gl <- c()
156 |   for (i in 1:nrow(switch_pw_re)) {
157 |     pn <- rownames(switch_pw_re)[i]
158 |     pgl <- data.frame(msigdb_h_c2_c5[pn], pn, stringsAsFactors = FALSE)
159 |     colnames(pgl) <- c("Genes", "Pathways")
160 |     tgn <- length(unique(pgl$Genes))
161 |     genestoplot <- intersect(rownames(toplotgl_sig), pgl$Genes)
162 |     pgl <- pgl[pgl$Genes %in% genestoplot,]
163 |     toplotgl <- toplotgl_sig[pgl$Genes, ]
164 |     pgl <- as.data.frame(cbind(pgl, toplotgl))
165 |     pgl <- pgl[pgl$direction == switch_pw_re[i,]$direction,]
166 |     pgl$Pathways <- paste0(pgl$Pathways, "(",nrow(pgl),"/",tgn,")")
167 |     gl <- rbind(gl, pgl)
168 |   }
169 | 
170 |   gl$Pathways <- factor(gl$Pathways, levels = unique(gl$Pathways))
171 |   p <- ggplot(gl, aes(x = switch_at_time, y = Pathways, fill = direction, col = direction))
172 |   p <- p + theme_classic()
173 |   p <- p + xlab("Pseudo-timeline") + geom_density_ridges(alpha = 0.5) +
174 |     labs(fill = "Regulation", color = "Regulation")
175 |   p <- p + theme(text = element_text(size = 12, family = "Helvetica"),
176 |                  panel.background = element_rect(fill = "white", colour = NA),
177 |                  axis.line = element_line(colour = "black"),
178 |                  legend.key.size = unit(10, "pt"),
179 |                  legend.text = element_text(size = 10,colour = "black"),
180 |                  legend.title = element_text(size = 11, colour = "black")) +
181 |     scale_color_manual(values=c("forestgreen", "chocolate2")) +
182 |     scale_fill_manual(values=c("forestgreen", "chocolate2"))
183 |   return(p)
184 | }
185 | 


--------------------------------------------------------------------------------
/R/fit_logistic_regression.R:
--------------------------------------------------------------------------------
  1 | #' @title Random downsampling of zero expression
  2 | #'
  3 | #' @description This function does randomly downsampling of cells with zero expression for one gene
  4 | #'
  5 | #' @param glmdata binary data of one gene
  6 | #' @param ratio_ds downsampling zeros to this proportion
  7 | #' @return
  8 | #'
  9 | downsample_zeros <- function(glmdata, ratio_ds = 0.7) {
 10 |   p = as.numeric(ratio_ds)
 11 |   set.seed(42)   # Set seed for consistency
 12 |   downsample <- sample(which(glmdata$expvalue == 0), length(which(glmdata$expvalue == 0)) - round(sum(glmdata$expvalue != 0) * p/(1 - p)))
 13 |   if (length(downsample) > 0) {
 14 |     subdata <- glmdata[-downsample, ]
 15 |   } else {subdata <- glmdata}
 16 |   return(subdata)
 17 | }
 18 | 
 19 | #' @title Fit fast logistic regression and find switching timepoint
 20 | #'
 21 | #' @description This function fits fast logistic regression and find switching timepoint for each gene
 22 | #'
 23 | #' @param sce SingleCellExperiment
 24 | #' @param downsample Logical. if do random downsampling of zeros
 25 | #' @param ds_cutoff only do downsampling if zero percentage is over this cutoff
 26 | #' @param zero_ratio downsampling zeros to this proportion
 27 | #' @param sig_FDR FDR cut off for significant genes
 28 | #' @return
 29 | #'
 30 | #' @import fastglm
 31 | #' @export
 32 | #'
 33 | find_switch_logistic_fastglm <- function(sce, downsample = FALSE, ds_cutoff = 0.7, zero_ratio = 0.7,
 34 |                                          sig_FDR = 0.05, show_warnings = TRUE) {
 35 |   binarydata <- assays(sce)$binary
 36 |   expdata <- assays(sce)$expdata
 37 |   binarydata <- binarydata[which(rowData(sce)$passBinary == TRUE), ]
 38 |   expdata <- expdata[which(rowData(sce)$passBinary == TRUE), ]
 39 |   genes <- rowData(sce)[which(rowData(sce)$passBinary == TRUE), ]
 40 |   timedata <- sce$Pseudotime
 41 |   pvalues <- binarydata[, 1]
 42 |   pseudoR2s <- binarydata[, 1]
 43 |   estimates <- binarydata[, 1]
 44 |   switch_at_time <- binarydata[, 1]
 45 |   prd_quality <- binarydata[, 1]
 46 |   CI <- binarydata[, 1]
 47 | 
 48 |   for (i in 1:nrow(binarydata)) {
 49 |     glmdata <- cbind(State = as.numeric(binarydata[i, ]), expvalue = as.numeric(expdata[i, ]),
 50 |                      timedata = sce$Pseudotime)
 51 |     glmdata <- as.data.frame(glmdata)
 52 | 
 53 |     if (downsample == TRUE & round(genes$zerop_gene[i],3) > ds_cutoff) {
 54 |       glmdata <- downsample_zeros(glmdata, ratio_ds = zero_ratio)
 55 |     }
 56 | 
 57 |     if (show_warnings == TRUE) {
 58 |       glm_results <- fastglm(x = model.matrix(State ~ timedata, data = glmdata),
 59 |                              y = glmdata$State, family = binomial(link = "logit"))
 60 |     } else {
 61 |       glm_results <-suppressWarnings(fastglm(x = model.matrix(State ~ timedata, data = glmdata),
 62 |                                              y = glmdata$State, family = binomial(link = "logit")))
 63 |     }
 64 |     pvalues[i] <- coef(summary(glm_results))[, 4][2]
 65 |     ll.null <- glm_results$null.deviance/-2
 66 |     ll.proposed <- glm_results$deviance/-2
 67 |     # McFadden's Pseudo R^2 = [ LL(Null) - LL(Proposed) ] / LL(Null)
 68 |     pseudoR2s[i] <- (ll.null - ll.proposed)/ll.null
 69 |     estimates[i] <- coef(summary(glm_results))[, 1][2]
 70 |     # p=0.5
 71 |     switch_at_time[i] <- (log(0.5/(1 - 0.5)) - coef(glm_results)[1])/coef(glm_results)[2]
 72 |     if (switch_at_time[i] >= max(glmdata$timedata)) {
 73 |       switch_at_time[i] = max(glmdata$timedata)
 74 |       prd_quality[i] = 0
 75 |     } else {
 76 |       prd_quality[i] = 1
 77 |     }
 78 |     if (switch_at_time[i] <= min(glmdata$timedata)) {
 79 |       switch_at_time[i] = min(glmdata$timedata)
 80 |       prd_quality[i] = 0
 81 |     }
 82 |     se <- summary(glm_results)$coefficients[, 2]
 83 |     CI[i] <- sqrt((se[1]*1.96/coef(glm_results)[1])^2 + (se[2]*1.96/coef(glm_results)[2])^2)*
 84 |       abs(coef(glm_results)[1]/coef(glm_results)[2])
 85 |     remove(glm_results)
 86 |   }
 87 | 
 88 |   result_switch <- cbind(switch_at_time, CI, pvalues, pseudoR2s, estimates, prd_quality)
 89 |   rownames(result_switch) <- rownames(binarydata)
 90 |   result_switch <- as.data.frame(result_switch)
 91 |   result_switch$direction <- "up"
 92 |   result_switch[result_switch$estimates < 0, ]$direction <- "down"
 93 |   result_switch$FDR <- p.adjust(result_switch$pvalues, method = "BH")
 94 |   steptime <- (max(timedata) - min(timedata))/100
 95 |   result_switch$switch_at_timeidx <- round((result_switch$switch_at_time - min(timedata))/steptime)
 96 | 
 97 |   # process_resultswitch --------------------------------------------------------------------------
 98 |   # check significance FDR < sig_FDR
 99 |   if (max(result_switch$FDR) > sig_FDR) {
100 |     result_switch[result_switch$FDR > sig_FDR, ]$prd_quality <- 0
101 |   }
102 | 
103 |   geneinfo <- merge(rowData(sce), result_switch, by=0, all=TRUE)[,-1] #[,1:11]
104 |   rownames(geneinfo) <- geneinfo$geneID
105 |   # all(rownames(geneinfo) == rownames(sce))
106 |   geneinfo <- geneinfo[rownames(sce), ]
107 |   rowData(sce) <- geneinfo
108 |   return(sce)
109 | }
110 | 
111 | 


--------------------------------------------------------------------------------
/R/get_example_inputData.R:
--------------------------------------------------------------------------------
 1 | #' @title Download example input data
 2 | #'
 3 | #' @description This function checks if example input files are in the current directory and if not download them
 4 | #'
 5 | #' @return
 6 | #'
 7 | #' @export
 8 | #'
 9 | get_example_inputData <- function(){
10 |   logexpdata =  "./logexpdata.RData"
11 |   if(!file.exists(logexpdata)) {
12 |     res <- tryCatch(download.file("http://files.ddnetbio.com/logexpdata.RData",
13 |                                   destfile = logexpdata,
14 |                                   method = "auto"),
15 |                     error = function(e) 1)
16 |     if(res == 1) { print("Error: cannot download logexpdata.RData.") }
17 |   }
18 | 
19 |   cardiac_monocle2 =  "./cardiac_monocle2.RData"
20 |   if(!file.exists(cardiac_monocle2)) {
21 |     res <- tryCatch(download.file("http://files.ddnetbio.com/cardiac_monocle2.RData",
22 |                                   destfile = cardiac_monocle2,
23 |                                   method = "auto"),
24 |                     error = function(e) 1)
25 |     if(res == 1) { print("Error: cannot download cardiac_monocle2.RData") }
26 |   }
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/R/plot_gene_exp.R:
--------------------------------------------------------------------------------
 1 | #' @title Plot gene expression
 2 | #'
 3 | #' @description This function plot gene expression on two-dimensional space
 4 | #'
 5 | #' @param sce SingleCellExperiment
 6 | #' @param gene one gene of interest
 7 | #' @param reduction dimensional reduction method
 8 | #' @param downsample if do random downsampling of zeros
 9 | #' @param ds_cutoff only do downsampling if zero percentage is over this cutoff
10 | #' @param zero_ratio downsampling zeros to this proportion
11 | #' @param ptsize point size
12 | #' @param fitting if plot logistic regression fitting
13 | #' @return
14 | #'
15 | #' @import ggplot2
16 | #' @importFrom RColorBrewer brewer.pal
17 | #' @importFrom gridExtra grid.arrange
18 | #' @export
19 | #'
20 | plot_gene_exp <- function(sce, gene, reduction, fitting = FALSE, bin_width = 0.1,
21 |                           downsample = FALSE, ds_cutoff = 0.7, zero_ratio = 0.7, ptsize = 0.7){
22 |   glmdata = data.frame(expvalue = assays(sce)$expdata[gene, ],
23 |                       timedata = sce$Pseudotime,
24 |                       State = as.numeric(assays(sce)$binary[gene, ]),
25 |                       Dim1 = reducedDim(sce, reduction)[,1],
26 |                       Dim2 = reducedDim(sce, reduction)[,2])
27 | 
28 |   if (downsample == TRUE & round(sum(glmdata$expvalue == 0)/nrow(glmdata),3) > ds_cutoff) {
29 |     glmdata <- downsample_zeros(glmdata, ratio_ds = zero_ratio)
30 |     gene <- paste0(gene, " (downsampled--",zero_ratio*100,"%)")
31 |   }
32 | 
33 |   # Define plot theme
34 |   plotTheme = theme(plot.title = element_text(size = 24, face = "bold"),
35 |                     text = element_text(size = 18),
36 |                     panel.background = element_rect(fill = "white", colour = NA),
37 |                     plot.background  = element_rect(fill = "white", colour = NA),
38 |                     axis.line = element_line(colour = "black", size = 1),
39 |                     axis.text = element_text(size = 15, colour = "black"),
40 |                     axis.ticks = element_line(colour = "black", size = 1),
41 |                     legend.key = element_rect(fill = NA),
42 |                     legend.key.width = unit(14, "pt"),
43 |                     legend.position = "right")
44 | 
45 |   # Plot 1
46 |   p1 <- ggplot(glmdata, aes(Dim1, Dim2, color = expvalue)) +
47 |     geom_point(size = 0.7, alpha = 1.0) +
48 |     scale_shape_manual(guide = FALSE, values = 16) +
49 |     scale_color_gradientn("",colours = c("grey85", brewer.pal(9, "OrRd"))) +
50 |     plotTheme + ggtitle(gene)
51 | 
52 |   # Plot3
53 |   p3 <- ggplot(glmdata, aes(timedata, State)) + geom_point() +
54 |     xlab("Pseudotime") + ylab("Probability") +
55 |     geom_smooth(method = "glm", method.args = list(family = "binomial"), se = FALSE) +
56 |     geom_hline(yintercept = 0.5, linetype="dashed", color = "#E69F00", size=1) +
57 |     scale_y_continuous(breaks = seq(0, 1, 0.5)) +
58 |     scale_x_continuous(breaks = seq(0, max(glmdata$timedata), 10)) +
59 |     plotTheme + ggtitle(" ")
60 | 
61 |   if (fitting == TRUE & "root" %in% names(rowData(sce))) {
62 |     # Plot 2
63 |     ggGaussian = data.frame(x = seq(min(glmdata$expvalue) - 0.1,
64 |                                     max(glmdata$expvalue) + 0.1, length.out = 500))
65 |     ggGaussian$y1 = (nrow(glmdata)/10) * rowData(sce)[gene,]$lambda1 *
66 |       dnorm(ggGaussian$x, mean = rowData(sce)[gene,]$mu1,
67 |             sd = rowData(sce)[gene,]$sigma1)
68 |     ggGaussian$y2 = (nrow(glmdata)/10) * rowData(sce)[gene,]$lambda2 *
69 |       dnorm(ggGaussian$x, mean = rowData(sce)[gene,]$mu2,
70 |             sd = rowData(sce)[gene,]$sigma2)
71 |     p2 <- ggplot() + xlab("Log Expression") +
72 |       geom_histogram(data = glmdata, aes(x=expvalue, fill = as.factor(State)),
73 |                      binwidth = bin_width, alpha=0.7, position = "identity") +
74 |       geom_path(data = ggGaussian, aes(x,y1), color = "#999999", size=.7) +
75 |       geom_path(data = ggGaussian, aes(x,y2), color = "chocolate2", size=.7) +
76 |       geom_vline(xintercept = rowData(sce)[gene,]$root, size=1, linetype="dashed") +
77 |       scale_fill_manual("State", values = c("grey", "#E69F00"),
78 |                         labels = c("Off", "On")) +
79 |       plotTheme + theme(legend.position = "top")
80 |     # Combine
81 |     return(grid.arrange(p1, p2, p3, nrow = 1))
82 |   } else if (fitting == TRUE) {
83 |     return(grid.arrange(p1, p3, nrow = 1))
84 |   } else {
85 |     plot(p1)
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/R/plot_timeline.R:
--------------------------------------------------------------------------------
  1 | #' @title Extract switching gene list of interesting
  2 | #'
  3 | #' @description This function extract a list of significant switching genes
  4 | #'
  5 | #' @param sce switching genes
  6 | #' @param allgenes if use all genes
  7 | #' @param pathway_name a list of pathway name(s) to plot
  8 | #' @param genelists a gene list to plot
  9 | #' @param genetype specific gene type to plot c("EMT", "reprogramming", "stem", "surface", "TF")
 10 | #' @param zero_pct zero-expression percentage cut off for significant genes
 11 | #' @param r2cutoff pseudo R^2 cutoff
 12 | #' @param direction switching direction, up or down
 13 | #' @param topnum number of top genes ordered by pseudo R^2 value
 14 | #' @return
 15 | #'
 16 | #' @import plyr
 17 | #' @export
 18 | #'
 19 | filter_switchgenes <- function(sce, allgenes = FALSE, pathway_name = NULL, genelists = GeneSwitches:::gs_genelists,
 20 |                                genetype = c("Surface proteins", "TFs"), zero_pct = 0.9,
 21 |                                r2cutoff = 0.03, direction = c("up", "down"), topnum = 100000) {
 22 |   if (allgenes == TRUE) {
 23 |     toplotgl <- rowData(sce)
 24 |     toplotgl$feature_type <- "All genes"
 25 |   } else if(!is.null(pathway_name)) {
 26 |     gl <- c()
 27 |     for (pn in pathway_name) {
 28 |       pgl <- data.frame(msigdb_h_c2_c5[pn], pn, stringsAsFactors = FALSE)
 29 |       colnames(pgl) <- c("feature_name", "feature_type")
 30 |       gl <- rbind(gl, pgl)
 31 |     }
 32 |     multi <- gl$feature_name[duplicated(gl$feature_name)]
 33 |     if (length(multi) > 0) {
 34 |       gl <- ddply(gl,.(feature_name),paste)[,c(1,3)]
 35 |       rownames(gl) <- gl$feature_name
 36 |       colnames(gl)[2] <- "types"
 37 |       gl$feature_type <- gl$types
 38 |       gl[multi,]$feature_type <- "Multiple"
 39 |       genestoplot <- intersect(rownames(sce), gl$feature_name)
 40 |       toplotgl <- rowData(sce)[genestoplot, ]
 41 |       toplotgl$feature_type <- gl[genestoplot, ]$feature_type
 42 |       toplotgl$types <- gl[genestoplot, ]$types
 43 |     } else {
 44 |       rownames(gl) <- gl$feature_name
 45 |       genestoplot <- intersect(rownames(sce), gl$feature_name)
 46 |       toplotgl <- rowData(sce)[genestoplot, ]
 47 |       toplotgl$feature_type <- gl[genestoplot, ]$feature_type
 48 |     }
 49 |   } else {
 50 |     genelists_sub <- genelists[genelists$genetypes %in% genetype, ]
 51 |     genelists_sub <- genelists_sub[!duplicated(genelists_sub$genenames), ]
 52 |     rownames(genelists_sub) <- genelists_sub$genenames
 53 |     genestoplot <- intersect(rownames(sce), genelists_sub$genenames)
 54 |     toplotgl <- rowData(sce)[genestoplot, ]
 55 |     toplotgl$feature_type <- genelists_sub[genestoplot, ]$genetypes
 56 |   }
 57 | 
 58 |   toplotgl_sub <- toplotgl[toplotgl$zerop_gene < zero_pct & toplotgl$prd_quality == 1 & toplotgl$pseudoR2s > r2cutoff &
 59 |                            toplotgl$direction %in% direction, ]
 60 |   if (nrow(toplotgl_sub) > topnum) {
 61 |     toplotgl_sub <- toplotgl_sub[order(toplotgl_sub$pseudoR2s, decreasing = TRUE),]
 62 |     toplotgl_sub <- toplotgl_sub[1:topnum,]
 63 |   }
 64 |   return(toplotgl_sub)
 65 | }
 66 | 
 67 | #' @title Plot switching genes
 68 | #'
 69 | #' @description This function plots switching genes on the pseudo-timeline
 70 | #'
 71 | #' @param tml switching genes
 72 | #' @param timedata pseudotime for cells
 73 | #' @param iffulltml if plot the full timeline
 74 | #' @param txtsize text size for gene names
 75 | #' @param color_by the cell attribute (e.g. the column of tml) to map to each cell's color
 76 | #' @return
 77 | #'
 78 | #' @import ggplot2
 79 | #' @importFrom ggrepel geom_text_repel
 80 | #' @export
 81 | #'
 82 | plot_timeline_ggplot <- function(tml, timedata, iffulltml = TRUE, txtsize = 3.5, color_by = "feature_type") {
 83 |   tml <- as.data.frame(tml)
 84 |   tml <- tml[order(tml$switch_at_time), ]
 85 |   tml$direction_num <- -1
 86 |   if ("up" %in% tml$direction) {
 87 |     tml[tml$direction == "up", ]$direction_num <- 1
 88 |   }
 89 |   tml$color_by <- as.factor(tml[,color_by])
 90 |   tml$feature_name <- rownames(tml)
 91 |   head(tml)
 92 | 
 93 |   if (iffulltml) {
 94 |     pseudotime_step <- (max(timedata) - min(timedata))/4
 95 |     pseudotime_range <- seq(min(timedata), max(timedata), by = pseudotime_step)
 96 |     pseudotime_df <- data.frame(pseudotime_range, pseudotime_format = round(pseudotime_range, 1))
 97 |   } else {
 98 |     pseudotime_step <- (max(tml$switch_at_time) - min(tml$switch_at_time))/4
 99 |     pseudotime_range <- seq(min(tml$switch_at_time), max(tml$switch_at_time), by = pseudotime_step)
100 |     pseudotime_df <- data.frame(pseudotime_range, pseudotime_format = round(pseudotime_range, 1))
101 |   }
102 | 
103 |   tml_plot <- ggplot(tml, aes(x = switch_at_time, y = pseudoR2s * direction_num, col = color_by, label = feature_name)) +
104 |     geom_point(size = txtsize/3) + xlab("Pseudo-timeline") + ylab("Quality of fitting (R^2)")
105 |   tml_plot <- tml_plot + theme_classic()
106 |   # Plot horizontal black line for timeline
107 |   tml_plot <- tml_plot + geom_hline(yintercept = 0, color = "black", size = 0.6)
108 |   tml_plot <- tml_plot + geom_label(data = pseudotime_df, aes(x = pseudotime_range, y = 0, label = pseudotime_format), size = (txtsize-0.5),
109 |                                     color = "black")
110 | 
111 |   tml_plot <- tml_plot + geom_text_repel(aes(x = switch_at_time, y = pseudoR2s * direction_num, label = feature_name),
112 |                                          size = txtsize, fontface = "bold", show.legend = FALSE)
113 | 
114 |   tml_plot <- tml_plot + theme(legend.position = "bottom", legend.title = element_blank(), legend.key.size = unit(10, "pt"),
115 |                                text = element_text(size = 12, family = "Helvetica"))
116 | 
117 |   return(tml_plot)
118 | }
119 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | indent: true
  4 | ---
  5 | 
  6 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  7 | 
  8 | ```{r, include = FALSE}
  9 | knitr::opts_chunk$set(
 10 |   collapse = TRUE,
 11 |   comment = "#>",
 12 |   fig.path = "man/figures/README-"
 13 |   # out.width = "100%"
 14 | )
 15 | ```
 16 | # GeneSwitches
 17 | 
 18 | <!-- badges: start -->
 19 | <!-- badges: end -->
 20 | 
 21 |   The goal of GeneSwitches is to discover the order of gene-expression and 
 22 | functional events during cell state transitions at a single-cell resolution.
 23 | It works on any single-cell trajectory or pseudo-time ordering of cells to
 24 | discover the genes that act as on/off switches between cell states and 
 25 | importantly the ordering at which these switches take place.
 26 | 
 27 | ## Installation
 28 | 
 29 | ### Check and install required packages
 30 | 
 31 | Users may use following codes to check and install all the required packages.
 32 | 
 33 | ``` r
 34 | list.of.packages <- c("SingleCellExperiment", "Biobase", "fastglm", "ggplot2", "monocle",
 35 |                       "plyr", "RColorBrewer", "ggrepel", "ggridges", "gridExtra", "devtools",
 36 |                       "mixtools")
 37 | 
 38 | ## for package "fastglm", "ggplot2", "plyr", "RColorBrewer", "ggrepel", "ggridges", "gridExtra", "mixtools"
 39 | new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
 40 | if(length(new.packages)) install.packages(new.packages)
 41 | 
 42 | ## for package "SingleCellExperiment", "Biobase"
 43 | if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
 44 | new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
 45 | if(length(new.packages)) BiocManager::install(new.packages)
 46 | ```
 47 | 
 48 | ### Install GeneSwitches
 49 | 
 50 | The source code of GeneSwitches can be installed from [GitHub](https://github.com/) with:
 51 | 
 52 | ``` r
 53 | devtools::install_github("SGDDNB/GeneSwitches")
 54 | ```
 55 | 
 56 | 
 57 | ## Input datasets
 58 | 
 59 | GeneSwitches requires two inputs, namely a gene expression matrix and corresponding pseudo-time ordering of each cell. We convert these input datasets in a `SingleCellExperiment` object (Lun and Risso 2017) and below you will find a full “start-to-finish” workflow to realise the potential of this analyis. Any additions, suggestions or comments are welcome in the github repository.
 60 | 
 61 | ```{r load library, message=FALSE}
 62 | ## load libraries
 63 | library(GeneSwitches)
 64 | library(SingleCellExperiment)
 65 | ```
 66 | 
 67 | For this vignette, we will work with a published dataset of single-cell RNA-seq data from the differentiation of human embryonic stem cells (hESC) to cardiomyocytes (CM) (Friedman et al., 2018). For the purpose of demonstration, we have subset the original dataset into 3000 cells and run Monocle2 to generate trajectories. Both the Log-normalized gene expression and Monocle2 inferred trajectory results can be downloaded from here as [logexpdata.RData](http://files.ddnetbio.com/logexpdata.RData), and [cardiac_monocle2.RData](http://files.ddnetbio.com/cardiac_monocle2.RData). This dataset was chosen in part because it shows a bifurcating cell fate of cardiac hESC differentiation that gives rise to definitive cardiomyocytes (Path1) or non-contractile cardiac derivatives (Path2) that allow for all aspect of GeneSwitches to be applied. 
 68 | 
 69 | ```{r load data, message=FALSE}
 70 | ## Download example files to current directory
 71 | get_example_inputData()
 72 | ## Load input data log-normalized gene expression
 73 | load("./logexpdata.RData")
 74 | ## Load Monocle2 object with pseudo-time and dimensionality reduction
 75 | load("./cardiac_monocle2.RData")
 76 | ```
 77 | 
 78 | ### Direct input (NOT run)
 79 | Users can input the gene expression (`logexpdata`; recommend for log-normalized expression), pseudo-time (`cell_pseudotime`) and dimensionality reductions (`rd_PCA`; optional and only for gene expression plots) into SingleCellExperiment object as follows.
 80 | ```r
 81 | ### create SingleCellExperiment object with log-normalized single cell data
 82 | #sce <- SingleCellExperiment(assays = List(expdata = logexpdata))
 83 | ### add pseudo-time information
 84 | #colData(sce)$Pseudotime <- cell_pseudotime
 85 | ### add dimensionality reductions, e.g. PCA, UMAP, tSNE
 86 | #pca <- prcomp(t(assays(sce)$expdata), scale. = FALSE)
 87 | #rd_PCA <- pca$x[,1:2]
 88 | #reducedDims(sce) <- SimpleList(PCA = rd_PCA)
 89 | ```
 90 | ### Convert from trajectory results
 91 | Alternatively, GeneSwitches provides functions to convert Monocle2 or Slingshot results into SingleCellExperiment object directly. For Monocle2 trajectory, users need to indicate the states of the desired path, which can be checked by plotting the trajectory using Monocle2 function `plot_cell_trajectory` or the following function.
 92 | 
 93 | ```{r plot_monocle_trajectory, fig.height = 4, fig.width = 8, fig.align = "center"} 
 94 | ## plot Monocle2 trajectory colored by State
 95 | # monocle:::plot_cell_trajectory(cardiac_monocle2, color_by = "State")
 96 | plot_monocle_State(cardiac_monocle2)
 97 | ```
 98 | 
 99 | Based on the marker genes, the pseudo-time trajectory starts from State 3, which are hESC cells. Definitive CM cells are in State 1 and non-contractile cardiac derivatives are in State 5. Therefore, we focus on Path1 with cells in states 3, 2, 1 and Path2 with cells in states 3, 2, 5, and extract these two paths from Monocle2 object.
100 | 
101 | ```{r Monocle2 input, message=FALSE}
102 | ## Input log-normalized gene expression, Monocle2 pseudo-time and dimensionality reduction
103 | ## Path1 containing cells in states 3,2,1
104 | sce_p1 <- convert_monocle2(monocle2_obj = cardiac_monocle2, 
105 |                            states = c(3,2,1), expdata = logexpdata)
106 | ## Path2 containing cells in states 3,2,5
107 | sce_p2 <- convert_monocle2(monocle2_obj = cardiac_monocle2, 
108 |                            states = c(3,2,5), expdata = logexpdata)
109 | ```
110 | 
111 | If we are only interested in the trajectory within a certain range of pseudotime, function `subset_pseudotime` can be used to subset the SingleCellExperiment object accordingly, followed by filtering out lowly expressed genes.
112 | ```{r}
113 | ### Subset cells to pseudotime range from 10 to 25
114 | #sce_p1_subset <- subset_pseudotime(sce_p1, min_time = 10, max_time = 25, minexp = 0, mincells = 10)
115 | ```
116 | In Part I, we will apply GeneSwitches on a single trajectory, Path1, to demonstrate the general workflow and functions. Comparison of GeneSwitches results from two trajectories (Path1 & 2) will be shown in Part II.
117 | 
118 | 
119 | ## PART I. GeneSwitches on a single trajectory
120 | 
121 | ### I-1. Binarize gene expression
122 | 
123 | Since we focus on the genes that are either switched on or off, we first binarize the gene expression data into 1(on) or 0(off) state. To achieve this, for each gene, we fit a mixture model of two gaussian distributions to the input gene expression to calculates gene-specific thresholds for binarization. Prior to fitting, we add gaussian noise with zero mean and 0.1 standard deviation to the gene expression, which ensures numerical stability in the fitting of the gene expression. Genes that do not have a distinct bimodal “on-off” distribution are then removed. This step may take 2 minutes for 2000 cells using 3 cores. 
124 | 
125 | ```{r binarization1}
126 | ### binarize gene expression using gene-specific thresholds
127 | sce_p1 <- binarize_exp(sce_p1, ncores = 3)
128 | ```
129 | 
130 | Alternatively, we can use a global threshold for fast binarization. We plot a histogram of expression of all the genes in all cells and look for a break between the zero and expressed distributions to identify the global threshold.
131 | 
132 | ```{r binarization_threshold, fig.height = 4, fig.width = 5, fig.align = "center"}
133 | ### check the threshold for binarization
134 | #h <- hist(assays(sce_p1)$expdata, breaks = 200, plot = FALSE)
135 | #{plot(h, freq = FALSE, xlim = c(0,2), ylim = c(0,1), main = "Histogram of gene expression",
136 | #xlab = "Gene expression", col = "darkgoldenrod2", border = "grey")
137 | #abline(v=0.2, col="blue")}
138 | 
139 | ###In this example, we choose 0.2 (blue line, also set as default) as the threshold.
140 | # bn_cutoff <- 0.2
141 | # sce_p1 <- binarize_exp(sce_p1, fix_cutoff = TRUE, binarize_cutoff = bn_cutoff)
142 | ```
143 | 
144 | ### I-2. Fit logistic regression & estimate switching time
145 | 
146 | Logistic regression is applied to model the binary states (on or off) of gene
147 | expression. Then the switching pseudo-time point is determined by the time at
148 | which the fitted line crosses the probability threshold 0.5. 
149 | We use random downsampling of zero expressions (downsample = TRUE) to rescue
150 | the prediction of switching time for genes with high zero inflation. 
151 | 
152 | ```{r, message=FALSE}
153 | ## fit logistic regression and find the switching pseudo-time point for each gene
154 | ## with downsampling. This step takes less than 1 mins
155 | sce_p1 <- find_switch_logistic_fastglm(sce_p1, downsample = TRUE, show_warning = FALSE)
156 | ```
157 | 
158 | 
159 | ### I-3. Visualize ordering of switching genes
160 | 
161 | First, we filter poorly fitted genes based on zero-expression percentage (>90%), FDR (>0.05) and McFadden's Pseudo R^2 (<0.03). We can then the number of top best fitting (high McFadden's Pseudo R^2) genes to plot. One can also extract specific gene type(s) to plot, with provided gene type lists containing surface proteins (downloaded from [here](http://wlab.ethz.ch/cspa/)) and transcription factors (TFs, downloaded from [here](http://humantfs.ccbr.utoronto.ca/)). Users are allowed to pass their own gene type lists as a data frame to parameter genelists, with rows as genes (non-duplicated) and two columns with name `genenames` and `genetypes`.
162 | 
163 | ```{r, message=FALSE}
164 | ## filter top 15 best fitting switching genes among all the genes
165 | sg_allgenes <- filter_switchgenes(sce_p1, allgenes = TRUE, topnum = 15)
166 | ## filter top 15 best fitting switching genes among surface proteins and TFs only
167 | sg_gtypes <- filter_switchgenes(sce_p1, allgenes = FALSE, topnum = 20,
168 |                                 genelists = gs_genelists, genetype = c("Surface proteins", "TFs"))
169 | ## combine switching genes and remove duplicated genes from sg_allgenes
170 | sg_vis <- rbind(sg_gtypes, sg_allgenes[setdiff(rownames(sg_allgenes), rownames(sg_gtypes)),])
171 | 
172 | ```
173 | 
174 | Finally, plot the selected genes along the pseudo-timeline. Genes that are switched on are plotted above the line, while those switching off are below the line.
175 | 
176 | ```{r timeline example, message=FALSE, fig.height = 4.2, fig.width = 6, fig.align = "center"}
177 | plot_timeline_ggplot(sg_vis, timedata = sce_p1$Pseudotime, txtsize = 3)
178 | ```
179 | 
180 | It is possible to use the dimensionality reduction provided from the user to visualise the gene expression and logistic regression fitting plots if needed.
181 | 
182 | ```{r plotexp, fig.height = 2.7, fig.width = 3.5, fig.align = "center"}
183 | plot_gene_exp(sce_p1, gene = "VIM", reduction = "monocleRD", downsample = F)
184 | ```
185 | 
186 | 
187 | ### I-4. Order pathways along the pseudo-timeline
188 | 
189 | GeneSwitches can be used to order pathways or genesets as well. We include the pathways provided by MSigDB hallmark (Liberzon,A. et al., 2015), C2 curated and C5 gene ontology geneset collections.
190 | A Hypergeometric test is first applied to extract the pathways that are significantly overrepresented amongst those that are changing along the trajectory. The Switching time of the pathway is then determined by the median switching time of genes in that pathway.
191 | 
192 | ```{r pathways}
193 | ## filter genes for pathway analysis using r^2 cutoff 0.1
194 | sg_pw <- filter_switchgenes(sce_p1, allgenes = TRUE, r2cutoff = 0.1)
195 | ## apply hypergeometric test and determine the switching time
196 | switch_pw <- find_switch_pathway(rowData(sce_p1), sig_FDR = 0.05,
197 |                                  pathways = msigdb_h_c2_c5, sg_pw)
198 | ## remove redundant pathways
199 | switch_pw_reduce <- reduce_pathways(switch_pw, pathways = msigdb_h_c2_c5, 
200 |                                     redundant_pw_rate = 0.8)
201 | ```
202 | 
203 | To better visualise the functional changes ridge plots of pathways genes show the density of switching genes along the pseudo-time. Top 10 significantly changed pathways are plotted here, ordered by the switching time.
204 | 
205 | ```{r pathways_ridge_plots, fig.height = 3, fig.width = 8, fig.align = "center"}
206 | plot_pathway_density(switch_pw_reduce[1:10,], sg_pw, orderbytime = TRUE)
207 | ```
208 | 
209 | We can also select specific pathway(s) to plot the switching genes in it. Among top 10 significantly changed pathways, we plot genes related to myogenesis and cardiac muscle tissue development. 
210 | 
211 | ```{r plot pathway genes, fig.height = 4.5, fig.width = 7, fig.align = "center"}
212 | sg_vis <- filter_switchgenes(sce_p1, topnum = 50, pathway_name = c("HALLMARK_MYOGENESIS",
213 |                                                                 "GO_CARDIAC_MUSCLE_TISSUE_DEVELOPMENT"))
214 | plot_timeline_ggplot(sg_vis, timedata=sce_p1$Pseudotime, txtsize=3)
215 | ```
216 | 
217 | "Multiple" lables the genes in more than one pathways.
218 | 
219 | 
220 | ## PART II. Comparing switching genes from two trajectories
221 | 
222 | Before comparison, we need to apply same steps in I-1 and I-2 on the cells from Path2 to identify switching pseudo-time point for each gene.
223 | 
224 | ```{r for_path2}
225 | sce_p2 <- binarize_exp(sce_p2)
226 | sce_p2 <- find_switch_logistic_fastglm(sce_p2, downsample = TRUE, show_warnings = FALSE)
227 | ```
228 | 
229 | And we filter out poorly fitted genes for both paths using the same cutoff.
230 | 
231 | ```{r filter out poorly fitted genes}
232 | sg_p1 <- filter_switchgenes(sce_p1, allgenes = TRUE, r2cutoff = 0.03)
233 | sg_p2 <- filter_switchgenes(sce_p2, allgenes = TRUE, r2cutoff = 0.03)
234 | ```
235 | 
236 | We then plot common switching genes between two paths to compare their ordering.
237 | 
238 | ```{r plot common genes, fig.height = 4.5, fig.width = 7, fig.align = "center"}
239 | sg_com <- common_genes(sg_p1, sg_p2, r2cutoff = 0.4,
240 |                        path1name = "Definitive CM", path2name = "non-contractile")
241 | common_genes_plot(sg_com, timedata = sce_p1$Pseudotime)
242 | ```
243 | 
244 | More importantly, we can plot the distinct switching genes of the two paths.
245 | 
246 | ```{r plot distinct genes, fig.height = 3.5, fig.width = 7, fig.align = "center"}
247 | sg_disgs <- distinct_genes(sg_p1, sg_p2, r2cutoff = 0.52,
248 |                            path1name = "Definitive CM", path2name = "non-contractile",
249 |                            path1time = sce_p1$Pseudotime, path2time = sce_p2$Pseudotime)
250 | plot_timeline_ggplot(sg_disgs, timedata = sce_p1$Pseudotime, color_by = "Paths", 
251 |                      iffulltml = FALSE, txtsize = 3)
252 | ```
253 | 
254 | We can also scale the timelines to be the same length (default number of bins is 100) so that differences are based on percentage of the trajectory covered rather than pseudo-time. 
255 | 
256 | ```{r plot scale timeline, fig.height = 3.5, fig.width = 7, fig.align = "center"}
257 | sg_disgs_scale <- distinct_genes(sg_p1, sg_p2, r2cutoff = 0.52, 
258 |                                  path1name = "Definitive CM", path2name = "non-contractile",
259 |                                  path1time = sce_p1$Pseudotime, path2time = sce_p2$Pseudotime, 
260 |                                  scale_timeline = T, bin = 100)
261 | # timedata need to be 1 to (number of bins)
262 | plot_timeline_ggplot(sg_disgs_scale, timedata = 1:100, color_by = "Paths", 
263 |                      iffulltml = FALSE, txtsize = 3)
264 | ```
265 | 
266 | These two plots for distinct switching genes only show a range of pseudo-timeline in which there are switching events happening. This range is actually at the end of trajectories, while common genes are mostly at the early period (common gene plot).  
267 | 
268 | 
269 | Similarly, we can check the gene expression plots for the two paths.
270 | 
271 | ```{r plot_exp1, fig.height = 4, fig.width = 12, fig.align = "center"}
272 | gn <- "DCN"
273 | p <- plot_gene_exp(sce_p1, gene = gn, reduction = "monocleRD", 
274 |                    downsample = FALSE, fitting = TRUE)
275 | p <- plot_gene_exp(sce_p2, gene = gn, reduction = "monocleRD", 
276 |                    downsample = FALSE, fitting = TRUE)
277 | ```
278 | 
279 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # GeneSwitches
  5 | 
  6 | <!-- badges: start -->
  7 | 
  8 | <!-- badges: end -->
  9 | 
 10 | The goal of GeneSwitches is to discover the order of gene-expression and
 11 | functional events during cell state transitions at a single-cell
 12 | resolution. It works on any single-cell trajectory or pseudo-time
 13 | ordering of cells to discover the genes that act as on/off switches
 14 | between cell states and importantly the ordering at which these switches
 15 | take place.
 16 | 
 17 | ## Installation
 18 | 
 19 | ### Check and install required packages
 20 | 
 21 | Users may use following codes to check and install all the required
 22 | packages.
 23 | 
 24 | ``` r
 25 | list.of.packages <- c("SingleCellExperiment", "Biobase", "fastglm", "ggplot2", "monocle",
 26 |                       "plyr", "RColorBrewer", "ggrepel", "ggridges", "gridExtra", "devtools",
 27 |                       "mixtools")
 28 | 
 29 | ## for package "fastglm", "ggplot2", "plyr", "RColorBrewer", "ggrepel", "ggridges", "gridExtra", "mixtools"
 30 | new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
 31 | if(length(new.packages)) install.packages(new.packages)
 32 | 
 33 | ## for package "SingleCellExperiment", "Biobase"
 34 | if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
 35 | new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
 36 | if(length(new.packages)) BiocManager::install(new.packages)
 37 | ```
 38 | 
 39 | ### Install GeneSwitches
 40 | 
 41 | The source code of GeneSwitches can be installed from
 42 | [GitHub](https://github.com/) with:
 43 | 
 44 | ``` r
 45 | devtools::install_github("SGDDNB/GeneSwitches")
 46 | ```
 47 | 
 48 | ## Input datasets
 49 | 
 50 | GeneSwitches requires two inputs, namely a gene expression matrix and
 51 | corresponding pseudo-time ordering of each cell. We convert these input
 52 | datasets in a `SingleCellExperiment` object (Lun and Risso 2017) and
 53 | below you will find a full “start-to-finish” workflow to realise the
 54 | potential of this analyis. Any additions, suggestions or comments are
 55 | welcome in the github repository.
 56 | 
 57 | ``` r
 58 | ## load libraries
 59 | library(GeneSwitches)
 60 | library(SingleCellExperiment)
 61 | ```
 62 | 
 63 | For this vignette, we will work with a published dataset of single-cell
 64 | RNA-seq data from the differentiation of human embryonic stem cells
 65 | (hESC) to cardiomyocytes (CM) (Friedman et al., 2018). For the purpose
 66 | of demonstration, we have subset the original dataset into 3000 cells
 67 | and run Monocle2 to generate trajectories. Both the Log-normalized gene
 68 | expression and Monocle2 inferred trajectory results can be downloaded
 69 | from here as
 70 | [logexpdata.RData](http://files.ddnetbio.com/logexpdata.RData), and
 71 | [cardiac\_monocle2.RData](http://files.ddnetbio.com/cardiac_monocle2.RData).
 72 | This dataset was chosen in part because it shows a bifurcating cell fate
 73 | of cardiac hESC differentiation that gives rise to definitive
 74 | cardiomyocytes (Path1) or non-contractile cardiac derivatives (Path2)
 75 | that allow for all aspect of GeneSwitches to be applied.
 76 | 
 77 | ``` r
 78 | ## Download example files to current directory
 79 | get_example_inputData()
 80 | ## Load input data log-normalized gene expression
 81 | load("./logexpdata.RData")
 82 | ## Load Monocle2 object with pseudo-time and dimensionality reduction
 83 | load("./cardiac_monocle2.RData")
 84 | ```
 85 | 
 86 | ### Direct input (NOT run)
 87 | 
 88 | Users can input the gene expression (`logexpdata`; recommend for
 89 | log-normalized expression), pseudo-time (`cell_pseudotime`) and
 90 | dimensionality reductions (`rd_PCA`; optional and only for gene
 91 | expression plots) into SingleCellExperiment object as
 92 | follows.
 93 | 
 94 | ``` r
 95 | ### create SingleCellExperiment object with log-normalized single cell data
 96 | #sce <- SingleCellExperiment(assays = List(expdata = logexpdata))
 97 | ### add pseudo-time information
 98 | #colData(sce)$Pseudotime <- cell_pseudotime
 99 | ### add dimensionality reductions, e.g. PCA, UMAP, tSNE
100 | #pca <- prcomp(t(assays(sce)$expdata), scale. = FALSE)
101 | #rd_PCA <- pca$x[,1:2]
102 | #reducedDims(sce) <- SimpleList(PCA = rd_PCA)
103 | ```
104 | 
105 | ### Convert from trajectory results
106 | 
107 | Alternatively, GeneSwitches provides functions to convert Monocle2 or
108 | Slingshot results into SingleCellExperiment object directly. For
109 | Monocle2 trajectory, users need to indicate the states of the desired
110 | path, which can be checked by plotting the trajectory using Monocle2
111 | function `plot_cell_trajectory` or the following function.
112 | 
113 | ``` r
114 | ## plot Monocle2 trajectory colored by State
115 | # monocle:::plot_cell_trajectory(cardiac_monocle2, color_by = "State")
116 | plot_monocle_State(cardiac_monocle2)
117 | ```
118 | 
119 | <img src="man/figures/README-plot_monocle_trajectory-1.png" style="display: block; margin: auto;" />
120 | 
121 | Based on the marker genes, the pseudo-time trajectory starts from State
122 | 3, which are hESC cells. Definitive CM cells are in State 1 and
123 | non-contractile cardiac derivatives are in State 5. Therefore, we focus
124 | on Path1 with cells in states 3, 2, 1 and Path2 with cells in states 3,
125 | 2, 5, and extract these two paths from Monocle2
126 | object.
127 | 
128 | ``` r
129 | ## Input log-normalized gene expression, Monocle2 pseudo-time and dimensionality reduction
130 | ## Path1 containing cells in states 3,2,1
131 | sce_p1 <- convert_monocle2(monocle2_obj = cardiac_monocle2, 
132 |                            states = c(3,2,1), expdata = logexpdata)
133 | ## Path2 containing cells in states 3,2,5
134 | sce_p2 <- convert_monocle2(monocle2_obj = cardiac_monocle2, 
135 |                            states = c(3,2,5), expdata = logexpdata)
136 | ```
137 | 
138 | If we are only interested in the trajectory within a certain range of
139 | pseudotime, function `subset_pseudotime` can be used to subset the
140 | SingleCellExperiment object accordingly, followed by filtering out lowly
141 | expressed genes.
142 | 
143 | ``` r
144 | ### Subset cells to pseudotime range from 10 to 25
145 | #sce_p1_subset <- subset_pseudotime(sce_p1, min_time = 10, max_time = 25, minexp = 0, mincells = 10)
146 | ```
147 | 
148 | In Part I, we will apply GeneSwitches on a single trajectory, Path1, to
149 | demonstrate the general workflow and functions. Comparison of
150 | GeneSwitches results from two trajectories (Path1 & 2) will be shown in
151 | Part II.
152 | 
153 | ## PART I. GeneSwitches on a single trajectory
154 | 
155 | ### I-1. Binarize gene expression
156 | 
157 | Since we focus on the genes that are either switched on or off, we first
158 | binarize the gene expression data into 1(on) or 0(off) state. To achieve
159 | this, for each gene, we fit a mixture model of two gaussian
160 | distributions to the input gene expression to calculates gene-specific
161 | thresholds for binarization. Prior to fitting, we add gaussian noise
162 | with zero mean and 0.1 standard deviation to the gene expression, which
163 | ensures numerical stability in the fitting of the gene expression. Genes
164 | that do not have a distinct bimodal “on-off” distribution are then
165 | removed. This step may take 2 minutes for 2000 cells using 3 cores.
166 | 
167 | ``` r
168 | ### binarize gene expression using gene-specific thresholds
169 | sce_p1 <- binarize_exp(sce_p1, ncores = 3)
170 | ```
171 | 
172 | Alternatively, we can use a global threshold for fast binarization. We
173 | plot a histogram of expression of all the genes in all cells and look
174 | for a break between the zero and expressed distributions to identify the
175 | global threshold.
176 | 
177 | ``` r
178 | ### check the threshold for binarization
179 | #h <- hist(assays(sce_p1)$expdata, breaks = 200, plot = FALSE)
180 | #{plot(h, freq = FALSE, xlim = c(0,2), ylim = c(0,1), main = "Histogram of gene expression",
181 | #xlab = "Gene expression", col = "darkgoldenrod2", border = "grey")
182 | #abline(v=0.2, col="blue")}
183 | 
184 | ###In this example, we choose 0.2 (blue line, also set as default) as the threshold.
185 | # bn_cutoff <- 0.2
186 | # sce_p1 <- binarize_exp(sce_p1, fix_cutoff = TRUE, binarize_cutoff = bn_cutoff)
187 | ```
188 | 
189 | ### I-2. Fit logistic regression & estimate switching time
190 | 
191 | Logistic regression is applied to model the binary states (on or off) of
192 | gene expression. Then the switching pseudo-time point is determined by
193 | the time at which the fitted line crosses the probability threshold 0.5.
194 | We use random downsampling of zero expressions (downsample = TRUE) to
195 | rescue the prediction of switching time for genes with high zero
196 | inflation.
197 | 
198 | ``` r
199 | ## fit logistic regression and find the switching pseudo-time point for each gene
200 | ## with downsampling. This step takes less than 1 mins
201 | sce_p1 <- find_switch_logistic_fastglm(sce_p1, downsample = TRUE, show_warning = FALSE)
202 | ```
203 | 
204 | ### I-3. Visualize ordering of switching genes
205 | 
206 | First, we filter poorly fitted genes based on zero-expression percentage
207 | (\>90%), FDR (\>0.05) and McFadden’s Pseudo R^2 (\<0.03). We can then
208 | the number of top best fitting (high McFadden’s Pseudo R^2) genes to
209 | plot. One can also extract specific gene type(s) to plot, with provided
210 | gene type lists containing surface proteins (downloaded from
211 | [here](http://wlab.ethz.ch/cspa/)) and transcription factors (TFs,
212 | downloaded from [here](http://humantfs.ccbr.utoronto.ca/)). Users are
213 | allowed to pass their own gene type lists as a data frame to parameter
214 | genelists, with rows as genes (non-duplicated) and two columns with name
215 | `genenames` and `genetypes`.
216 | 
217 | ``` r
218 | ## filter top 15 best fitting switching genes among all the genes
219 | sg_allgenes <- filter_switchgenes(sce_p1, allgenes = TRUE, topnum = 15)
220 | ## filter top 15 best fitting switching genes among surface proteins and TFs only
221 | sg_gtypes <- filter_switchgenes(sce_p1, allgenes = FALSE, topnum = 20,
222 |                                 genelists = gs_genelists, genetype = c("Surface proteins", "TFs"))
223 | ## combine switching genes and remove duplicated genes from sg_allgenes
224 | sg_vis <- rbind(sg_gtypes, sg_allgenes[setdiff(rownames(sg_allgenes), rownames(sg_gtypes)),])
225 | ```
226 | 
227 | Finally, plot the selected genes along the pseudo-timeline. Genes that
228 | are switched on are plotted above the line, while those switching off
229 | are below the
230 | line.
231 | 
232 | ``` r
233 | plot_timeline_ggplot(sg_vis, timedata = sce_p1$Pseudotime, txtsize = 3)
234 | ```
235 | 
236 | <img src="man/figures/README-timeline example-1.png" style="display: block; margin: auto;" />
237 | 
238 | It is possible to use the dimensionality reduction provided from the
239 | user to visualise the gene expression and logistic regression fitting
240 | plots if
241 | needed.
242 | 
243 | ``` r
244 | plot_gene_exp(sce_p1, gene = "VIM", reduction = "monocleRD", downsample = F)
245 | ```
246 | 
247 | <img src="man/figures/README-plotexp-1.png" style="display: block; margin: auto;" />
248 | 
249 | ### I-4. Order pathways along the pseudo-timeline
250 | 
251 | GeneSwitches can be used to order pathways or genesets as well. We
252 | include the pathways provided by MSigDB hallmark (Liberzon,A. et al.,
253 | 2015), C2 curated and C5 gene ontology geneset collections. A
254 | Hypergeometric test is first applied to extract the pathways that are
255 | significantly overrepresented amongst those that are changing along the
256 | trajectory. The Switching time of the pathway is then determined by the
257 | median switching time of genes in that pathway.
258 | 
259 | ``` r
260 | ## filter genes for pathway analysis using r^2 cutoff 0.1
261 | sg_pw <- filter_switchgenes(sce_p1, allgenes = TRUE, r2cutoff = 0.1)
262 | ## apply hypergeometric test and determine the switching time
263 | switch_pw <- find_switch_pathway(rowData(sce_p1), sig_FDR = 0.05,
264 |                                  pathways = msigdb_h_c2_c5, sg_pw)
265 | ## remove redundant pathways
266 | switch_pw_reduce <- reduce_pathways(switch_pw, pathways = msigdb_h_c2_c5, 
267 |                                     redundant_pw_rate = 0.8)
268 | ```
269 | 
270 | To better visualise the functional changes ridge plots of pathways genes
271 | show the density of switching genes along the pseudo-time. Top 10
272 | significantly changed pathways are plotted here, ordered by the
273 | switching time.
274 | 
275 | ``` r
276 | plot_pathway_density(switch_pw_reduce[1:10,], sg_pw, orderbytime = TRUE)
277 | #> Picking joint bandwidth of 2.49
278 | ```
279 | 
280 | <img src="man/figures/README-pathways_ridge_plots-1.png" style="display: block; margin: auto;" />
281 | 
282 | We can also select specific pathway(s) to plot the switching genes in
283 | it. Among top 10 significantly changed pathways, we plot genes related
284 | to myogenesis and cardiac muscle tissue
285 | development.
286 | 
287 | ``` r
288 | sg_vis <- filter_switchgenes(sce_p1, topnum = 50, pathway_name = c("HALLMARK_MYOGENESIS",
289 |                                                                 "GO_CARDIAC_MUSCLE_TISSUE_DEVELOPMENT"))
290 | plot_timeline_ggplot(sg_vis, timedata=sce_p1$Pseudotime, txtsize=3)
291 | ```
292 | 
293 | <img src="man/figures/README-plot pathway genes-1.png" style="display: block; margin: auto;" />
294 | 
295 | “Multiple” lables the genes in more than one pathways.
296 | 
297 | ## PART II. Comparing switching genes from two trajectories
298 | 
299 | Before comparison, we need to apply same steps in I-1 and I-2 on the
300 | cells from Path2 to identify switching pseudo-time point for each gene.
301 | 
302 | ``` r
303 | sce_p2 <- binarize_exp(sce_p2)
304 | sce_p2 <- find_switch_logistic_fastglm(sce_p2, downsample = TRUE, show_warnings = FALSE)
305 | ```
306 | 
307 | And we filter out poorly fitted genes for both paths using the same
308 | cutoff.
309 | 
310 | ``` r
311 | sg_p1 <- filter_switchgenes(sce_p1, allgenes = TRUE, r2cutoff = 0.03)
312 | sg_p2 <- filter_switchgenes(sce_p2, allgenes = TRUE, r2cutoff = 0.03)
313 | ```
314 | 
315 | We then plot common switching genes between two paths to compare their
316 | ordering.
317 | 
318 | ``` r
319 | sg_com <- common_genes(sg_p1, sg_p2, r2cutoff = 0.4,
320 |                        path1name = "Definitive CM", path2name = "non-contractile")
321 | common_genes_plot(sg_com, timedata = sce_p1$Pseudotime)
322 | ```
323 | 
324 | <img src="man/figures/README-plot common genes-1.png" style="display: block; margin: auto;" />
325 | 
326 | More importantly, we can plot the distinct switching genes of the two
327 | paths.
328 | 
329 | ``` r
330 | sg_disgs <- distinct_genes(sg_p1, sg_p2, r2cutoff = 0.52,
331 |                            path1name = "Definitive CM", path2name = "non-contractile",
332 |                            path1time = sce_p1$Pseudotime, path2time = sce_p2$Pseudotime)
333 | plot_timeline_ggplot(sg_disgs, timedata = sce_p1$Pseudotime, color_by = "Paths", 
334 |                      iffulltml = FALSE, txtsize = 3)
335 | ```
336 | 
337 | <img src="man/figures/README-plot distinct genes-1.png" style="display: block; margin: auto;" />
338 | 
339 | We can also scale the timelines to be the same length (default number of
340 | bins is 100) so that differences are based on percentage of the
341 | trajectory covered rather than pseudo-time.
342 | 
343 | ``` r
344 | sg_disgs_scale <- distinct_genes(sg_p1, sg_p2, r2cutoff = 0.52, 
345 |                                  path1name = "Definitive CM", path2name = "non-contractile",
346 |                                  path1time = sce_p1$Pseudotime, path2time = sce_p2$Pseudotime, 
347 |                                  scale_timeline = T, bin = 100)
348 | # timedata need to be 1 to (number of bins)
349 | plot_timeline_ggplot(sg_disgs_scale, timedata = 1:100, color_by = "Paths", 
350 |                      iffulltml = FALSE, txtsize = 3)
351 | ```
352 | 
353 | <img src="man/figures/README-plot scale timeline-1.png" style="display: block; margin: auto;" />
354 | 
355 | These two plots for distinct switching genes only show a range of
356 | pseudo-timeline in which there are switching events happening. This
357 | range is actually at the end of trajectories, while common genes are
358 | mostly at the early period (common gene plot).
359 | 
360 | Similarly, we can check the gene expression plots for the two paths.
361 | 
362 | ``` r
363 | gn <- "DCN"
364 | p <- plot_gene_exp(sce_p1, gene = gn, reduction = "monocleRD", 
365 |                    downsample = FALSE, fitting = TRUE)
366 | #> Warning: glm.fit: algorithm did not converge
367 | ```
368 | 
369 | <img src="man/figures/README-plot_exp1-1.png" style="display: block; margin: auto;" />
370 | 
371 | ``` r
372 | p <- plot_gene_exp(sce_p2, gene = gn, reduction = "monocleRD", 
373 |                    downsample = FALSE, fitting = TRUE)
374 | ```
375 | 
376 | <img src="man/figures/README-plot_exp1-2.png" style="display: block; margin: auto;" />
377 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------
/data-raw/DATASET.R:
--------------------------------------------------------------------------------
  1 | ## code to prepare `DATASET` dataset goes here
  2 | 
  3 | ###pathways ----------------------------------
  4 | if (!requireNamespace("BiocManager", quietly = TRUE))
  5 |   install.packages("BiocManager")
  6 | BiocManager::install("fgsea")
  7 | # BiocManager::install("fgsea", version = "3.8")
  8 | library(fgsea)
  9 | pathways1 <- gmtPathways("~/Documents/geneswitch/pathways/msigdb/h.all.v6.2.symbols.gmt")
 10 | pathways2 <- gmtPathways("~/Documents/geneswitch/pathways/msigdb/c2.cp.kegg.v6.2.symbols.gmt")
 11 | pathways3 <- gmtPathways("~/Documents/geneswitch/pathways/msigdb/c5.all.v6.2.symbols.gmt")
 12 | msigdb_h_c2_c5 <- c(pathways1, pathways2, pathways3);length(pathways)
 13 | # save(pathways, file = "../shiny/pathways.RData")
 14 | usethis::use_data(msigdb_h_c2_c5, internal = FALSE)
 15 | 
 16 | ###genelists ----------------------------------
 17 | load("~/Documents/general_scripts/genelists.RData")
 18 | table(genelists$genetype)
 19 | genelists <- genelists[genelists$genetype %in% c("TF", "surface"),];dim(genelists)
 20 | genelists[genelists$genetype == "surface", ]$genetype <- "Surface protein"
 21 | table(genelists$genetype)
 22 | # usethis::use_data(genelists, internal = TRUE, overwrite = TRUE)
 23 | genelists <- genelists[genelists$genetype == "TF",];dim(genelists)
 24 | genelists$genetype <- "TFs"
 25 | head(genelists)
 26 | 
 27 | gl <- read.table("~/Documents/general_scripts/CellSurfaceAtlas.txt", header = TRUE)
 28 | head(gl);class(gl);dim(gl)
 29 | # length(setdiff(genelists[genelists$genetype == "Surface protein",]$genenames, gl$entrez_symbol))
 30 | # length(setdiff(gl$entrez_symbol, genelists[genelists$genetype == "Surface protein",]$genenames))
 31 | gl2 <- data.frame(genenames = unique(gl$entrez_symbol), genetype = "Surface proteins")
 32 | rownames(gl2) <- gl2$genenames;dim(gl2)
 33 | head(gl2)
 34 | com <- intersect(rownames(gl2), rownames(genelists))
 35 | genelists[com,]$genetype <- "TF&Surface proteins"
 36 | gl2 <- gl2[!rownames(gl2) %in% genelists$genenames,];dim(gl2)
 37 | genelists <- rbind(gl2, genelists)
 38 | head(genelists);table(genelists$genetype)
 39 | gs_genelists <- genelists
 40 | colnames(gs_genelists)[2] <- "genetypes"
 41 | head(gs_genelists);table(gs_genelists$genetype)
 42 | usethis::use_data(gs_genelists, internal = FALSE, overwrite = TRUE)
 43 | 
 44 | genelists3 <- genelists
 45 | save(genelists3, file = "~/Documents/general_scripts/genelists3(new).RData")
 46 | 
 47 | gl <- read.table("~/Documents/general_scripts/ECM_proteins.txt", sep = "\t", header = TRUE)
 48 | head(gl);class(gl);dim(gl)
 49 | # length(setdiff(genelists[genelists$genetype == "Surface protein",]$genenames, gl$entrez_symbol))
 50 | # length(setdiff(gl$entrez_symbol, genelists[genelists$genetype == "Surface protein",]$genenames))
 51 | gl2 <- data.frame(genenames = unique(gl$entrez_symbol), genetype = "ECM")
 52 | rownames(gl2) <- gl2$genenames;dim(gl2)
 53 | head(gl2)
 54 | com <- intersect(rownames(gl2), rownames(genelists))
 55 | genelists[com,]
 56 | genelists <- genelists[!genelists$genenames %in% rownames(gl2),];dim(genelists)
 57 | genelists <- rbind(gl2, genelists)
 58 | head(genelists);table(genelists$genetype)
 59 | 
 60 | gl <- read.table("~/Documents/general_scripts/CytoKineRegistry.txt", sep = "\t", header = TRUE)
 61 | head(gl);class(gl);dim(gl)
 62 | # length(setdiff(genelists[genelists$genetype == "Surface protein",]$genenames, gl$entrez_symbol))
 63 | # length(setdiff(gl$entrez_symbol, genelists[genelists$genetype == "Surface protein",]$genenames))
 64 | gl2 <- data.frame(genenames = unique(gl$EntrezGeneSymbol), genetype = "Cytokine")
 65 | rownames(gl2) <- gl2$genenames;dim(gl2)
 66 | head(gl2)
 67 | com <- intersect(rownames(gl2), rownames(genelists));length(com)
 68 | genelists[com,]
 69 | genelists <- genelists[!genelists$genenames %in% rownames(gl2),];dim(genelists)
 70 | genelists <- rbind(gl2, genelists)
 71 | head(genelists);table(genelists$genetype)
 72 | genelists2 <- genelists
 73 | save(genelists2, file = "~/Documents/general_scripts/genelists2(new).RData")
 74 | 
 75 | ###time monocle ----------------------------------
 76 | library(monocle)
 77 | 
 78 | seu3obj <- readRDS("~/Documents/geneswitch/cardiomyocytes/cardiacSeurat.rds")
 79 | allexpdata <- as.matrix(GetAssayData(object = seu3obj, slot = "counts"));dim(allexpdata)
 80 | cellinfo <- seu3obj@meta.data;dim(cellinfo)
 81 | all(rownames(cellinfo) == colnames(allexpdata))
 82 | 
 83 | #####subset cells
 84 | cellinfo_sub <- cellinfo[sample(nrow(cellinfo), 3000),];dim(cellinfo_sub)
 85 | table(cellinfo_sub$library)
 86 | head(cellinfo_sub)
 87 | expdata <- allexpdata[,rownames(cellinfo_sub)];dim(expdata)
 88 | all(rownames(cellinfo_sub) == colnames(expdata))
 89 | 
 90 | genenames <- as.data.frame(rownames(expdata));head(genenames)
 91 | colnames(genenames)[1] <- "gene_short_name"
 92 | rownames(genenames) <- genenames$gene_short_name;head(genenames)
 93 | cells <- cellinfo_sub
 94 | all(rownames(cells) == colnames(expdata))
 95 | all(rownames(genenames) == rownames(expdata))
 96 | ###################
 97 | 
 98 | pd <- new("AnnotatedDataFrame",data=cells)
 99 | fd <- new("AnnotatedDataFrame",data=genenames)
100 | # First create a CellDataSet from the relative expression levels
101 | ORMM <- newCellDataSet(as.matrix(expdata), phenoData = pd, featureData =fd,
102 |                        lowerDetectionLimit = 1, expressionFamily = negbinomial.size())
103 | 
104 | ORMM <- estimateSizeFactors(ORMM)
105 | ORMM <- estimateDispersions(ORMM)
106 | ##Filtering low-quality cells
107 | ORMM<- detectGenes(ORMM, min_expr = 0.1)
108 | print(head(fData(ORMM)))
109 | expressed_genes <- row.names(subset(fData(ORMM), num_cells_expressed >=10))
110 | length(expressed_genes)
111 | print(head(pData(ORMM)));dim(pData(ORMM))
112 | 
113 | ##good to look at the distribution of mRNA totals across the cells
114 | pData(ORMM)$Total_mRNAs <- Matrix::colSums(exprs(ORMM))
115 | 
116 | ORMM <- ORMM[,pData(ORMM)$Total_mRNAs < 1e6]
117 | 
118 | upper_bound <- 10^(mean(log10(pData(ORMM)$Total_mRNAs)) +
119 |                      2*sd(log10(pData(ORMM)$Total_mRNAs)))
120 | lower_bound <- 10^(mean(log10(pData(ORMM)$Total_mRNAs)) -
121 |                      2*sd(log10(pData(ORMM)$Total_mRNAs)))
122 | pdf(paste(pid,"_1total_mRNAs_distribution.pdf",sep = ""))
123 | qplot(Total_mRNAs, data = pData(ORMM), color = Characteristics.inferred.lineage., geom = "density") +
124 |   geom_vline(xintercept = lower_bound) +
125 |   geom_vline(xintercept = upper_bound)
126 | dev.off()
127 | ##removed the few cells with either very low mRNA recovery or far more mRNA that the typical cell
128 | ORMM <- ORMM[,pData(ORMM)$Total_mRNAs > lower_bound &
129 |                pData(ORMM)$Total_mRNAs < upper_bound]
130 | ORMM <- detectGenes(ORMM, min_expr = 0.1)
131 | # Log-transform each value in the expression matrix.
132 | L <- log(exprs(ORMM[expressed_genes,]))
133 | #install.packages("reshape2")
134 | library(reshape2)
135 | # Standardize each gene, so that they are all on the same scale,
136 | # Then melt the data with plyr so we can plot it easily
137 | melted_dens_df <- melt(Matrix::t(scale(Matrix::t(L))))
138 | 
139 | # Plot the distribution of the standardized gene expression values.
140 | pdf(paste(pid,"_2standardized_gene_exp.pdf",sep = ""))
141 | qplot(value, geom = "density", data = melted_dens_df) +
142 |   stat_function(fun = dnorm, size = 0.5, color = 'red') +
143 |   xlab("Standardized log(FPKM)") +
144 |   ylab("Density")
145 | dev.off()
146 | 
147 | #####Ordering based on genes that differ between clusters
148 | disp_table <- dispersionTable(ORMM)
149 | unsup_clustering_genes <- subset(disp_table, mean_expression >= 0.1)
150 | ORMM <- setOrderingFilter(ORMM, unsup_clustering_genes$gene_id)
151 | pdf(paste(pid,"_3ordering_genes.pdf",sep = ""))
152 | plot_ordering_genes(ORMM)
153 | dev.off()
154 | pdf(paste(pid,"_4pc_variance.pdf",sep = ""))
155 | plot_pc_variance_explained(ORMM, return_all = F) # norm_method='log'
156 | dev.off()
157 | ORMM <- reduceDimension(ORMM, max_components = 2, norm_method = 'log', num_dim = 5,
158 |                         residualModelFormulaStr = "~num_genes_expressed",
159 |                         reduction_method = 'tSNE', verbose = T)
160 | ORMM <- clusterCells(ORMM, verbose = T)
161 | 
162 | ##Constructing Single Cell Trajectories
163 | diff_test_res <- differentialGeneTest(ORMM[expressed_genes,],
164 |                                       fullModelFormulaStr = "~library",
165 |                                       cores = 5, verbose = TRUE)
166 | ordering_genes <- row.names (subset(diff_test_res, qval < 0.01))
167 | ORMM <- setOrderingFilter(ORMM, ordering_genes)
168 | pdf(paste(pid,"_5ordering_genes_fortree.pdf",sep = ""))
169 | plot_ordering_genes(ORMM)
170 | dev.off()
171 | ORMM <- reduceDimension(ORMM, max_components=2, method = 'DDRTree')
172 | ORMM <- orderCells(ORMM)#, num_paths=2)
173 | # save(ORMM, file = "/home/yiqun/Documents/geneswitch/cardiomyocytes/monocle2/ORMM(monocle_DDRTree).RData")
174 | expressed_genes <- row.names(subset(fData(ORMM), num_cells_expressed >=10))
175 | ORMM_filtered <- ORMM[expressed_genes,]
176 | my_genes <- row.names(subset(fData(ORMM_filtered),
177 |                              gene_short_name %in% c("POU5F1","SOX17","EOMES","ISL1","TNNI1","MYL7","THY1")))
178 | cds_subset <- ORMM_filtered[my_genes,]
179 | plot_cell_trajectory(ORMM, color_by = "Pseudotime", markers = "SOX17")
180 | 
181 | 
182 | cardiac_monocle2 <- ORMM
183 | cardiac_monocle2@reducedDimS[,1:5]
184 | cardiac_monocle2@reducedDimS[1,] <- -(cardiac_monocle2@reducedDimS[1,])
185 | cardiac_monocle2@reducedDimK[1,] <- -(cardiac_monocle2@reducedDimK[1,])
186 | head(pData(cardiac_monocle2))
187 | # colnames(pData(cardiac_monocle2))[7] <- "Clusters"
188 | tiff("monocle_pseudotime.tiff", units="in", width=5, height=5, res=300)
189 | plot_cell_trajectory(cardiac_monocle2, color_by = "Pseudotime", cell_size = 1)
190 | dev.off()
191 | plot_cell_trajectory(cardiac_monocle2, color_by = "library", cell_size = 1)
192 | 
193 | pdf(paste("monocle_","trajectory_cardiac",".pdf",sep = ""))
194 | plot_cell_trajectory(cardiac_monocle2, color_by = "library")
195 | plot_cell_trajectory(cardiac_monocle2, color_by = "RNA_snn_res.0.1")
196 | plot_cell_trajectory(cardiac_monocle2, color_by = "Pseudotime")
197 | # plot_genes_in_pseudotime(cds_subset, color_by = "library")
198 | dev.off()
199 | save(cardiac_monocle2, file = "cardiac_monocle2.RData")
200 | 
201 | mcells <- pData(cardiac_monocle2);head(mcells)
202 | head(mcells);dim(mcells)
203 | 
204 | seu3obj <- readRDS("~/Documents/geneswitch/cardiomyocytes/cardiacSeurat.rds")
205 | logexpdata <- as.matrix(GetAssayData(object = seu3obj, slot = "data")[,rownames(mcells)]);dim(logexpdata)
206 | all(rownames(mcells) == colnames(logexpdata))
207 | logexpdata[1:5,1:5]
208 | save(logexpdata, file = "logexpdata.RData")
209 | 
210 | # load("~/Documents/geneswitch/cardiomyocytes/monocle2/cardiac_monocle2.RData")
211 | # load("~/Documents/geneswitch/cardiomyocytes/monocle2/logexpdata.RData")
212 | # usethis::use_data(logexpdata, cardiac_monocle2, genelists, internal = TRUE, overwrite = TRUE)
213 | usethis::use_data(genelists, internal = TRUE, overwrite = TRUE)
214 | 
215 | 
216 | # You can install the released version of GeneSwitches from [CRAN](https://CRAN.R-project.org) with:
217 | 
218 | #``` r
219 | #install.packages("GeneSwitches")
220 | #```
221 | #You can install the development version from [GitHub](https://github.com/) with:
222 | # user  system elapsed
223 | # 205.137   5.573 210.613
224 | # user  system elapsed
225 | # 141.616   2.425 143.954
226 | # user   system  elapsed
227 | # 1956.396  242.979 2197.818
228 | 


--------------------------------------------------------------------------------
/data/gs_genelists.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/data/gs_genelists.rda


--------------------------------------------------------------------------------
/data/msigdb_h_c2_c5.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/data/msigdb_h_c2_c5.rda


--------------------------------------------------------------------------------
/man/binarize_exp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/binarization.R
 3 | \name{binarize_exp}
 4 | \alias{binarize_exp}
 5 | \title{Binarize gene expression}
 6 | \usage{
 7 | binarize_exp(sce, fix_cutoff = FALSE, binarize_cutoff = 0.2,
 8 |   ncores = 3)
 9 | }
10 | \arguments{
11 | \item{sce}{SingleCellExperiment}
12 | 
13 | \item{fix_cutoff}{Logical. if use fixed global cutoff for binarization, default FALSE}
14 | 
15 | \item{binarize_cutoff}{fixed global cutoff for binarization, default 0.2}
16 | 
17 | \item{ncores}{number of cores}
18 | }
19 | \value{
20 | 
21 | }
22 | \description{
23 | This function generates on/off binarized data for gene expression
24 | }
25 | 


--------------------------------------------------------------------------------
/man/common_genes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compare_trajectories.R
 3 | \name{common_genes}
 4 | \alias{common_genes}
 5 | \title{Identify common switching genes between paths}
 6 | \usage{
 7 | common_genes(toplotgl_Rsub1, toplotgl_Rsub2, path1name = "Path1Genes",
 8 |   path2name = "Path2Genes", r2cutoff = 0.05)
 9 | }
10 | \arguments{
11 | \item{toplotgl_Rsub1}{switching genes of path1}
12 | 
13 | \item{toplotgl_Rsub2}{switching genes of path2}
14 | 
15 | \item{path1name}{name of path1 given by user}
16 | 
17 | \item{path2name}{name of path2 given by user}
18 | 
19 | \item{r2cutoff}{pseudo R^2 cutoff}
20 | }
21 | \value{
22 | 
23 | }
24 | \description{
25 | This function identifies common switching genes between two paths
26 | }
27 | 


--------------------------------------------------------------------------------
/man/common_genes_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compare_trajectories.R
 3 | \name{common_genes_plot}
 4 | \alias{common_genes_plot}
 5 | \title{Plot common switching genes between paths}
 6 | \usage{
 7 | common_genes_plot(ggData, timedata)
 8 | }
 9 | \arguments{
10 | \item{ggData}{data frame for common genes}
11 | 
12 | \item{timedata}{timedata to show on plot}
13 | }
14 | \value{
15 | 
16 | }
17 | \description{
18 | This function plots common switching genes between two paths
19 | }
20 | 


--------------------------------------------------------------------------------
/man/convert_monocle2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/convert_TI.R
 3 | \name{convert_monocle2}
 4 | \alias{convert_monocle2}
 5 | \title{Convert monocle2 output into GeneSwitches object}
 6 | \usage{
 7 | convert_monocle2(monocle2_obj, states, expdata)
 8 | }
 9 | \arguments{
10 | \item{monocle2_obj}{monocle2 output object}
11 | 
12 | \item{states}{a vector of states (path) that are interested in}
13 | 
14 | \item{logexpdata}{log-normal gene expression}
15 | }
16 | \value{
17 | 
18 | }
19 | \description{
20 | This function converts monocle2 output into GeneSwitches object
21 | }
22 | 


--------------------------------------------------------------------------------
/man/convert_slingshot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/convert_TI.R
 3 | \name{convert_slingshot}
 4 | \alias{convert_slingshot}
 5 | \title{Convert slingshot output into GeneSwitches object}
 6 | \usage{
 7 | convert_slingshot(sce_slingshot, pseudotime_idx, assayname = "expdata")
 8 | }
 9 | \arguments{
10 | \item{sce_slingshot}{slingshot SingleCellExperiment output object}
11 | 
12 | \item{pseudotime_idx}{name of desired pseudotime path to apply GeneSwitches}
13 | 
14 | \item{assayname}{expression assay to use}
15 | }
16 | \value{
17 | 
18 | }
19 | \description{
20 | This function converts slingshot output into GeneSwitches object
21 | }
22 | 


--------------------------------------------------------------------------------
/man/distinct_genes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compare_trajectories.R
 3 | \name{distinct_genes}
 4 | \alias{distinct_genes}
 5 | \title{Identify distinct switching genes for each path}
 6 | \usage{
 7 | distinct_genes(toplotgl_Rsub1, toplotgl_Rsub2, path1name = "Path1Genes",
 8 |   path2name = "Path2Genes", r2cutoff = 0.05, scale_timeline = FALSE,
 9 |   path1time = NULL, path2time = NULL, bin = 100)
10 | }
11 | \arguments{
12 | \item{toplotgl_Rsub1}{switching genes of path1}
13 | 
14 | \item{toplotgl_Rsub2}{switching genes of path2}
15 | 
16 | \item{path1name}{name of path1 given by user}
17 | 
18 | \item{path2name}{name of path2 given by user}
19 | 
20 | \item{r2cutoff}{pseudo R^2 cutoff}
21 | }
22 | \value{
23 | 
24 | }
25 | \description{
26 | This function identifies distinct switching genes for each path
27 | }
28 | 


--------------------------------------------------------------------------------
/man/downsample_zeros.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fit_logistic_regression.R
 3 | \name{downsample_zeros}
 4 | \alias{downsample_zeros}
 5 | \title{Random downsampling of zero expression}
 6 | \usage{
 7 | downsample_zeros(glmdata, ratio_ds = 0.7)
 8 | }
 9 | \arguments{
10 | \item{glmdata}{binary data of one gene}
11 | 
12 | \item{ratio_ds}{downsampling zeros to this proportion}
13 | }
14 | \value{
15 | 
16 | }
17 | \description{
18 | This function does randomly downsampling of cells with zero expression for one gene
19 | }
20 | 


--------------------------------------------------------------------------------
/man/figures/README-binarization cutoff-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-binarization cutoff-1.png


--------------------------------------------------------------------------------
/man/figures/README-binarization threshold-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-binarization threshold-1.png


--------------------------------------------------------------------------------
/man/figures/README-binarization_threshold-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-binarization_threshold-1.png


--------------------------------------------------------------------------------
/man/figures/README-convert slingshot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-convert slingshot-1.png


--------------------------------------------------------------------------------
/man/figures/README-distinct genes-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-distinct genes-1.png


--------------------------------------------------------------------------------
/man/figures/README-pathways ridge plots-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-pathways ridge plots-1.png


--------------------------------------------------------------------------------
/man/figures/README-pathways-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-pathways-1.png


--------------------------------------------------------------------------------
/man/figures/README-pathways_ridge_plots-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-pathways_ridge_plots-1.png


--------------------------------------------------------------------------------
/man/figures/README-plot common genes-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot common genes-1.png


--------------------------------------------------------------------------------
/man/figures/README-plot distinct genes-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot distinct genes-1.png


--------------------------------------------------------------------------------
/man/figures/README-plot exp1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot exp1-1.png


--------------------------------------------------------------------------------
/man/figures/README-plot exp1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot exp1-2.png


--------------------------------------------------------------------------------
/man/figures/README-plot exp2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot exp2-1.png


--------------------------------------------------------------------------------
/man/figures/README-plot pathway genes-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot pathway genes-1.png


--------------------------------------------------------------------------------
/man/figures/README-plot pathways-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot pathways-1.png


--------------------------------------------------------------------------------
/man/figures/README-plot scale timeline-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot scale timeline-1.png


--------------------------------------------------------------------------------
/man/figures/README-plot_exp1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot_exp1-1.png


--------------------------------------------------------------------------------
/man/figures/README-plot_exp1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot_exp1-2.png


--------------------------------------------------------------------------------
/man/figures/README-plot_monocle_trajectory-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plot_monocle_trajectory-1.png


--------------------------------------------------------------------------------
/man/figures/README-plotexp-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-plotexp-1.png


--------------------------------------------------------------------------------
/man/figures/README-scale timeline-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-scale timeline-1.png


--------------------------------------------------------------------------------
/man/figures/README-timeline example-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-timeline example-1.png


--------------------------------------------------------------------------------
/man/figures/README-timeline example-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-timeline example-2.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SGDDNB/GeneSwitches/92cd922aac6f1ed6d760c783b43a818c724edeab/man/figures/README-unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/man/filter_switchgenes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_timeline.R
 3 | \name{filter_switchgenes}
 4 | \alias{filter_switchgenes}
 5 | \title{Extract switching gene list of interesting}
 6 | \usage{
 7 | filter_switchgenes(sce, allgenes = FALSE, pathway_name = NULL,
 8 |   genelists = GeneSwitches:::genelists,
 9 |   genetype = c("Surface proteins", "TFs"), zero_pct = 0.9,
10 |   r2cutoff = 0.03, direction = c("up", "down"), topnum = 1e+05)
11 | }
12 | \arguments{
13 | \item{sce}{switching genes}
14 | 
15 | \item{allgenes}{if use all genes}
16 | 
17 | \item{pathway_name}{a list of pathway name(s) to plot}
18 | 
19 | \item{genelists}{a gene list to plot}
20 | 
21 | \item{genetype}{specific gene type to plot c("EMT", "reprogramming", "stem", "surface", "TF")}
22 | 
23 | \item{zero_pct}{zero-expression percentage cut off for significant genes}
24 | 
25 | \item{r2cutoff}{pseudo R^2 cutoff}
26 | 
27 | \item{direction}{switching direction, up or down}
28 | 
29 | \item{topnum}{number of top genes ordered by pseudo R^2 value}
30 | }
31 | \value{
32 | 
33 | }
34 | \description{
35 | This function extract a list of significant switching genes
36 | }
37 | 


--------------------------------------------------------------------------------
/man/find_switch_logistic_fastglm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fit_logistic_regression.R
 3 | \name{find_switch_logistic_fastglm}
 4 | \alias{find_switch_logistic_fastglm}
 5 | \title{Fit fast logistic regression and find switching timepoint}
 6 | \usage{
 7 | find_switch_logistic_fastglm(sce, downsample = FALSE, ds_cutoff = 0.7,
 8 |   zero_ratio = 0.7, sig_FDR = 0.05, show_warnings = TRUE)
 9 | }
10 | \arguments{
11 | \item{sce}{SingleCellExperiment}
12 | 
13 | \item{downsample}{Logical. if do random downsampling of zeros}
14 | 
15 | \item{ds_cutoff}{only do downsampling if zero percentage is over this cutoff}
16 | 
17 | \item{zero_ratio}{downsampling zeros to this proportion}
18 | 
19 | \item{sig_FDR}{FDR cut off for significant genes}
20 | }
21 | \value{
22 | 
23 | }
24 | \description{
25 | This function fits fast logistic regression and find switching timepoint for each gene
26 | }
27 | 


--------------------------------------------------------------------------------
/man/find_switch_pathway.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/find_switch_pathway.R
 3 | \name{find_switch_pathway}
 4 | \alias{find_switch_pathway}
 5 | \title{Find significantly changed pathways and switching timepoint}
 6 | \usage{
 7 | find_switch_pathway(scerowdata, pathways = msigdb_h_c2_c5, toplotgl_sig,
 8 |   sig_FDR = 0.05)
 9 | }
10 | \arguments{
11 | \item{pathways}{a list of pathways with genes}
12 | 
13 | \item{sig_FDR}{FDR cut off for significant pathways}
14 | 
15 | \item{sce}{SingleCellExperiment}
16 | 
17 | \item{toplotgl_ptw}{swiching genes to plot}
18 | }
19 | \value{
20 | 
21 | }
22 | \description{
23 | This function finds significantly changed pathways and determine
24 |     the switching timepoint for each pathway
25 | }
26 | 


--------------------------------------------------------------------------------
/man/get_example_inputData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_example_inputData.R
 3 | \name{get_example_inputData}
 4 | \alias{get_example_inputData}
 5 | \title{Download example input data}
 6 | \usage{
 7 | get_example_inputData()
 8 | }
 9 | \value{
10 | 
11 | }
12 | \description{
13 | This function checks if example input files are in the current directory and if not download them
14 | }
15 | 


--------------------------------------------------------------------------------
/man/gs_genelists.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{gs_genelists}
 5 | \alias{gs_genelists}
 6 | \title{An example of gene lists}
 7 | \format{A data frame with 3071 rows and 2 columns. rows are genes, and columns are "genenames" and "genetypes".}
 8 | \source{
 9 | \url{http://wlab.ethz.ch/cspa/, http://humantfs.ccbr.utoronto.ca/}
10 | }
11 | \usage{
12 | gs_genelists
13 | }
14 | \description{
15 | Provided gene lists with surface proteins and transctription factors
16 | }
17 | \keyword{datasets}
18 | 


--------------------------------------------------------------------------------
/man/merge_pathways.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/find_switch_pathway.R
 3 | \name{merge_pathways}
 4 | \alias{merge_pathways}
 5 | \title{Merge redundant pathways}
 6 | \usage{
 7 | merge_pathways(switch_pw, pathways, ratio)
 8 | }
 9 | \arguments{
10 | \item{switch_pw}{a data frame with significantly changed pathways}
11 | 
12 | \item{pathways}{a list of pathways with genes}
13 | 
14 | \item{ratio}{cutoff ratio for merging redundant pathways}
15 | }
16 | \value{
17 | 
18 | }
19 | \description{
20 | Merge significant pathways that are with same genes over certain ratio
21 | }
22 | 


--------------------------------------------------------------------------------
/man/msigdb_h_c2_c5.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{msigdb_h_c2_c5}
 5 | \alias{msigdb_h_c2_c5}
 6 | \title{An example of gene sets}
 7 | \format{A list with 6153 pathways, and genes in each pathway}
 8 | \source{
 9 | \url{http://software.broadinstitute.org/gsea/msigdb/collections.jsp}
10 | }
11 | \usage{
12 | msigdb_h_c2_c5
13 | }
14 | \description{
15 | Pathways of GSEA MSigDB hallmark, c2 and c5 gene sets
16 | }
17 | \keyword{datasets}
18 | 


--------------------------------------------------------------------------------
/man/phyper_pathway.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/find_switch_pathway.R
 3 | \name{phyper_pathway}
 4 | \alias{phyper_pathway}
 5 | \title{Find significantly changed pathways and switching timepoint}
 6 | \usage{
 7 | phyper_pathway(N, pathways, toplotgl_ptw, sig_FDR = 0.05,
 8 |   direction = c("up", "down"))
 9 | }
10 | \arguments{
11 | \item{N}{expressed genes in pathways}
12 | 
13 | \item{pathways}{a list of pathways with genes}
14 | 
15 | \item{toplotgl_ptw}{swiching genes to plot}
16 | 
17 | \item{sig_FDR}{FDR cut off for significant pathways}
18 | 
19 | \item{direction}{switching direction, up or down}
20 | }
21 | \value{
22 | 
23 | }
24 | \description{
25 | Apply hypergeometric test to determine significantly changed
26 |     pathways and determine the switching timepoint for each pathway
27 | }
28 | 


--------------------------------------------------------------------------------
/man/plot_gene_exp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_gene_exp.R
 3 | \name{plot_gene_exp}
 4 | \alias{plot_gene_exp}
 5 | \title{Plot gene expression}
 6 | \usage{
 7 | plot_gene_exp(sce, geneofi, reduction, downsample = FALSE,
 8 |   ds_cutoff = 0.7, zero_ratio = 0.7, ptsize = 0.7, fitting = FALSE)
 9 | }
10 | \arguments{
11 | \item{sce}{SingleCellExperiment}
12 | 
13 | \item{geneofi}{one gene of interest}
14 | 
15 | \item{reduction}{dimensional reduction method}
16 | 
17 | \item{downsample}{if do random downsampling of zeros}
18 | 
19 | \item{ds_cutoff}{only do downsampling if zero percentage is over this cutoff}
20 | 
21 | \item{zero_ratio}{downsampling zeros to this proportion}
22 | 
23 | \item{ptsize}{point size}
24 | 
25 | \item{fitting}{if plot logistic regression fitting}
26 | }
27 | \value{
28 | 
29 | }
30 | \description{
31 | This function plot gene expression on two-dimensional space
32 | }
33 | 


--------------------------------------------------------------------------------
/man/plot_monocle_State.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/convert_TI.R
 3 | \name{plot_monocle_State}
 4 | \alias{plot_monocle_State}
 5 | \title{plot monocle2 trajectory colored by State}
 6 | \usage{
 7 | plot_monocle_State(monocle2_obj)
 8 | }
 9 | \arguments{
10 | \item{monocle2_obj}{monocle2 output object}
11 | }
12 | \value{
13 | 
14 | }
15 | \description{
16 | This function plots monocle2 trajectory with "State" colors
17 | }
18 | 


--------------------------------------------------------------------------------
/man/plot_pathway_density.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/find_switch_pathway.R
 3 | \name{plot_pathway_density}
 4 | \alias{plot_pathway_density}
 5 | \title{Pathways ridge plot}
 6 | \usage{
 7 | plot_pathway_density(switch_pw_re, toplotgl_sig, pw_direction = c("up",
 8 |   "down"), orderbytime = TRUE)
 9 | }
10 | \arguments{
11 | \item{switch_pw_re}{significant pathways to plot}
12 | 
13 | \item{toplotgl_sig}{switching genes}
14 | 
15 | \item{orderbytime}{order the pathways by switching time (mean time of switching genes) if TRUE,
16 | order the pathways by FDR if FALSE}
17 | 
18 | \item{direction}{switching direction of the pathway, up or down}
19 | }
20 | \value{
21 | 
22 | }
23 | \description{
24 | This function generates pathways ridge plots
25 | }
26 | 


--------------------------------------------------------------------------------
/man/plot_timeline_ggplot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_timeline.R
 3 | \name{plot_timeline_ggplot}
 4 | \alias{plot_timeline_ggplot}
 5 | \title{Plot switching genes}
 6 | \usage{
 7 | plot_timeline_ggplot(tml, timedata, iffulltml = TRUE, txtsize = 3.5,
 8 |   color_by = "feature_type")
 9 | }
10 | \arguments{
11 | \item{tml}{switching genes}
12 | 
13 | \item{timedata}{pseudotime for cells}
14 | 
15 | \item{iffulltml}{if plot the full timeline}
16 | 
17 | \item{txtsize}{text size for gene names}
18 | 
19 | \item{color_by}{the cell attribute (e.g. the column of tml) to map to each cell's color}
20 | }
21 | \value{
22 | 
23 | }
24 | \description{
25 | This function plots switching genes on the pseudo-timeline
26 | }
27 | 


--------------------------------------------------------------------------------
/man/reduce_pathways.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/find_switch_pathway.R
 3 | \name{reduce_pathways}
 4 | \alias{reduce_pathways}
 5 | \title{Reduce redundant pathways}
 6 | \usage{
 7 | reduce_pathways(switch_pw, pathways, redundant_pw_rate = 0.8)
 8 | }
 9 | \arguments{
10 | \item{switch_pw}{a data frame with significantly changed pathways}
11 | 
12 | \item{pathways}{a list of pathways with genes}
13 | 
14 | \item{redundant_pw_rate}{cutoff ratio for merging redundant pathways}
15 | }
16 | \value{
17 | 
18 | }
19 | \description{
20 | Reduce significant pathways that are with same genes over certain ratio,
21 |     do up- and down- regulated pathways separately
22 | }
23 | 


--------------------------------------------------------------------------------
/man/subset_pseudotime.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/convert_TI.R
 3 | \name{subset_pseudotime}
 4 | \alias{subset_pseudotime}
 5 | \title{Subset GeneSwitches object based on the range of pseudotime}
 6 | \usage{
 7 | subset_pseudotime(sce, min_time, max_time, assayname = "expdata",
 8 |   minexp = 0, mincells = 10)
 9 | }
10 | \arguments{
11 | \item{sce}{GeneSwitches object which is a SingleCellExperiment object}
12 | 
13 | \item{min_time}{lower bound of pseudotime}
14 | 
15 | \item{max_time}{upper bound of pseudotime}
16 | 
17 | \item{assayname}{expression assay to use}
18 | 
19 | \item{minexp}{minimun expression to filer genes}
20 | 
21 | \item{mincells}{minimun cells with expression}
22 | }
23 | \value{
24 | 
25 | }
26 | \description{
27 | This function subsets GeneSwitches object based on the range of pseudotime
28 | }
29 | 


--------------------------------------------------------------------------------