├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── R ├── cnvFunction.R ├── scAnnotation.R ├── scCombination.R ├── scStatistics.R └── utils.R ├── README.md ├── inst ├── rds │ ├── cellTypeTemplates.RDS │ ├── cnvRef_Data-HM.RDS │ ├── cnvRef_Data-boneMarrow-MS.RDS │ ├── cnvRef_SNN-HM.RDS │ └── cnvRef_SNN-boneMarrow-MS.RDS ├── rmd │ ├── SoupX.Rmd │ ├── cellCalling.Rmd │ ├── cellCycle.Rmd │ ├── cellInteraction.Rmd │ ├── cellTypePred.Rmd │ ├── contamination.Rmd │ ├── diffExpr.Rmd │ ├── doublet.Rmd │ ├── exprProgram.Rmd │ ├── filterCell.Rmd │ ├── filterGene.Rmd │ ├── geneSets.Rmd │ ├── main-scAnno.Rmd │ ├── main-scAnnoComb.Rmd │ ├── main-scStat.Rmd │ ├── malign-comb.Rmd │ ├── malignancy.Rmd │ ├── stemness.Rmd │ └── umap.Rmd └── txt │ ├── PairsLigRec.txt │ ├── cellCycle-genes.txt │ ├── diss-genes.txt │ ├── gene-chr-hg19.txt │ ├── gene-chr-hg38.txt │ ├── gene-chr-mm10.txt │ ├── gene.chr.txt │ ├── hallmark-pathways.txt │ ├── hg-mm-HomologyGenes.txt │ └── pcbc-stemsig.tsv ├── man ├── Read10Xdata.Rd ├── checkAnnoArguments.Rd ├── checkCombArguments.Rd ├── checkStatArguments.Rd ├── clusterBarPlot.Rd ├── extractFiles.Rd ├── genAnnoReport.Rd ├── genStatReport.Rd ├── generate10Xdata.Rd ├── getCellTypeColor.Rd ├── getDefaultColors.Rd ├── getDefaultGeneSets.Rd ├── getDefaultMarkers.Rd ├── getTumorCluster.Rd ├── ggplot_config.Rd ├── markerPlot.Rd ├── plotCellInteraction.Rd ├── plotExprProgram.Rd ├── plotGeneSet.Rd ├── plotMalignancy.Rd ├── plotSeurat.Rd ├── pointDRPlot.Rd ├── predCellType.Rd ├── prepareData.Rd ├── prepareSeurat.Rd ├── runCellClassify.Rd ├── runCellCycle.Rd ├── runCellInteraction.Rd ├── runDoublet.Rd ├── runExprProgram.Rd ├── runGeneSets.Rd ├── runMalignancy.Rd ├── runScAnnotation.Rd ├── runScCombination.Rd ├── runScStatistics.Rd ├── runSeurat.Rd ├── runStemness.Rd └── runSurvival.Rd ├── scCancer.Rproj └── vignettes ├── .gitignore └── scCancer.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^doc$ 4 | ^Meta$ 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | inst/doc 2 | doc 3 | Meta 4 | .Rproj.user 5 | .Rhistory 6 | rprof.log -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: scCancer 2 | Type: Package 3 | Title: A package for automated processing of single cell RNA-seq data in cancer 4 | Version: 2.2.1 5 | Author: G-Lab 6 | Maintainer: Wenbo Guo 7 | Description: The package 'scCancer' is focuses on processing and analyzing 8 | scRNA-seq data for cancer research. Except routine data processing steps, 9 | it also integrates several cancer-specific analyses. For example, 10 | more comprehensive quality control, cancer microenvironment cell types 11 | prediction, cell malignancy estimation, gene set signature scores calculation, 12 | and expression programs identification. After all steps, the package can 13 | generate a user-friendly graphic report. 14 | License: GPL-3 15 | Encoding: UTF-8 16 | LazyData: true 17 | RoxygenNote: 7.1.1 18 | Suggests: 19 | rmarkdown, 20 | DropletUtils 21 | VignetteBuilder: knitr 22 | Imports: 23 | cowplot, 24 | dplyr, 25 | ggExtra, 26 | ggplot2, 27 | grid, 28 | gridExtra, 29 | GSVA, 30 | knitr, 31 | markdown, 32 | Matrix, 33 | methods, 34 | NNLM, 35 | pheatmap, 36 | R.utils, 37 | reshape2, 38 | scds, 39 | Seurat (>= 3.0), 40 | SingleCellExperiment, 41 | SoupX, 42 | stringr, 43 | survival, 44 | survminer, 45 | harmony, 46 | liger 47 | Depends: R (>= 3.5.0) 48 | biocViews: 49 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(Read10Xdata) 4 | export(checkAnnoArguments) 5 | export(checkCombArguments) 6 | export(checkStatArguments) 7 | export(clusterBarPlot) 8 | export(extractFiles) 9 | export(genAnnoReport) 10 | export(genStatReport) 11 | export(generate10Xdata) 12 | export(getCellTypeColor) 13 | export(getDefaultColors) 14 | export(getDefaultGeneSets) 15 | export(getDefaultMarkers) 16 | export(getTumorCluster) 17 | export(ggplot_config) 18 | export(markerPlot) 19 | export(plotCellInteraction) 20 | export(plotExprProgram) 21 | export(plotGeneSet) 22 | export(plotMalignancy) 23 | export(plotSeurat) 24 | export(pointDRPlot) 25 | export(predCellType) 26 | export(prepareData) 27 | export(prepareSeurat) 28 | export(runCellClassify) 29 | export(runCellCycle) 30 | export(runCellInteraction) 31 | export(runDoublet) 32 | export(runExprProgram) 33 | export(runGeneSets) 34 | export(runMalignancy) 35 | export(runScAnnotation) 36 | export(runScCombination) 37 | export(runScStatistics) 38 | export(runSeurat) 39 | export(runStemness) 40 | export(runSurvival) 41 | import(Matrix) 42 | import(R.utils) 43 | import(Seurat) 44 | import(SoupX) 45 | import(ggplot2) 46 | import(harmony) 47 | import(knitr) 48 | import(liger) 49 | import(survival) 50 | import(survminer) 51 | importFrom(GSVA,gsva) 52 | importFrom(NNLM,nnmf) 53 | importFrom(SingleCellExperiment,SingleCellExperiment) 54 | importFrom(cowplot,get_legend) 55 | importFrom(cowplot,plot_grid) 56 | importFrom(dplyr,"%>%") 57 | importFrom(dplyr,group_by) 58 | importFrom(dplyr,top_n) 59 | importFrom(ggExtra,ggMarginal) 60 | importFrom(grDevices,boxplot.stats) 61 | importFrom(grDevices,colorRampPalette) 62 | importFrom(grid,grid.draw) 63 | importFrom(grid,grid.newpage) 64 | importFrom(grid,unit.c) 65 | importFrom(gridExtra,arrangeGrob) 66 | importFrom(gridExtra,grid.arrange) 67 | importFrom(markdown,markdownToHTML) 68 | importFrom(methods,as) 69 | importFrom(pheatmap,pheatmap) 70 | importFrom(reshape2,melt) 71 | importFrom(scds,bcds) 72 | importFrom(scds,cxds) 73 | importFrom(stats,cor) 74 | importFrom(stats,density) 75 | importFrom(stats,filter) 76 | importFrom(stats,median) 77 | importFrom(stats,quantile) 78 | importFrom(stats,sd) 79 | importFrom(stringr,str_c) 80 | importFrom(utils,read.delim) 81 | importFrom(utils,read.table) 82 | importFrom(utils,write.csv) 83 | importFrom(utils,write.table) 84 | -------------------------------------------------------------------------------- /R/cnvFunction.R: -------------------------------------------------------------------------------- 1 | prepareCNV <- function(expr.data, 2 | gene.manifest, 3 | cell.annotation, 4 | ref.data = NULL, 5 | species = "human", 6 | genome = "hg19", 7 | hg.mm.mix = F){ 8 | ## gene.chr 9 | if(species == "human"){ 10 | if(genome == "hg38"){ 11 | gene.chr <- read.table(system.file("txt", "gene-chr-hg38.txt", package = "scCancer"), 12 | col.names = c("EnsemblID", "CHR", "C_START", "C_STOP"), 13 | stringsAsFactors = F) 14 | }else if(genome == "hg19"){ 15 | gene.chr <- read.table(system.file("txt", "gene-chr-hg19.txt", package = "scCancer"), 16 | col.names = c("EnsemblID", "CHR", "C_START", "C_STOP"), 17 | stringsAsFactors = F) 18 | }else{ 19 | stop("Error in 'runInferCNV': ", genome, " is not allowed for 'genome'.\n") 20 | } 21 | }else if(species == "mouse"){ 22 | if(genome == "mm10"){ 23 | gene.chr <- read.table(system.file("txt", "gene-chr-mm10.txt", package = "scCancer"), 24 | col.names = c("EnsemblID", "CHR", "C_START", "C_STOP"), 25 | stringsAsFactors = F) 26 | }else{ 27 | stop("Error in 'runInferCNV': ", genome, " is not allowed for 'genome'.\n") 28 | } 29 | }else{ 30 | stop("Error in 'runInferCNV': ", species, " is not allowed for 'species'.\n") 31 | } 32 | 33 | ## reference.data 34 | if(is.null(ref.data)){ 35 | if(species == "human"){ 36 | ref.data <- readRDS(system.file("rds", "cnvRef_Data-HM.RDS", package = "scCancer")) 37 | }else if(species == "mouse"){ 38 | ref.data <- readRDS(system.file("rds", "cnvRef_Data-boneMarrow-MS.RDS", package = "scCancer")) 39 | } 40 | } 41 | ref.anno <- data.frame(cellName = colnames(ref.data), 42 | cellAnno = "Reference", 43 | stringsAsFactors = F) 44 | 45 | ## combine data 46 | com.genes <- intersect(rownames(expr.data), rownames(ref.data)) 47 | ref.data <- ref.data[com.genes, ] 48 | expr.data <- expr.data[com.genes, ] 49 | expr.data <- cbind(as.matrix(expr.data), ref.data) 50 | rownames(expr.data) <- gene.manifest[rownames(expr.data), ]$EnsemblID 51 | 52 | ## combine cell.anno 53 | cell.anno <- data.frame(cellName = cell.annotation$barcodes, 54 | cellAnno = cell.annotation$Cell.Type, 55 | stringsAsFactors = F) 56 | cell.anno <- rbind(cell.anno, ref.anno) 57 | rownames(cell.anno) <- cell.anno$cellName 58 | 59 | ## common genes between expr.data and gene.chr 60 | com.genes <- intersect(rownames(expr.data), gene.chr$EnsemblID) 61 | gene.chr <- subset(gene.chr, EnsemblID %in% com.genes) 62 | 63 | # gene.chr <- gene.chr[with(gene.chr, order(CHR, C_START, C_STOP)), ] 64 | 65 | expr.data <- expr.data[gene.chr$EnsemblID, ] 66 | rownames(gene.chr) <- gene.chr$EnsemblID 67 | 68 | return(list(expr.data = expr.data, 69 | gene.chr = gene.chr, 70 | cell.anno = cell.anno)) 71 | } 72 | 73 | 74 | 75 | rmGeneForCNV <- function(cnvList, cutoff = 0.1, minCell = 3){ 76 | gene.mean <- Matrix::rowMeans(cnvList$expr.data) 77 | gene.sum <- Matrix::rowSums(cnvList$expr.data > 0) 78 | genes.sel <- rownames(cnvList$expr.data)[gene.mean >= cutoff & gene.sum >= minCell] 79 | 80 | cnvList$expr.data <- cnvList$expr.data[genes.sel, ] 81 | cnvList$gene.chr <- cnvList$gene.chr[genes.sel, ] 82 | 83 | return(cnvList) 84 | } 85 | 86 | 87 | normalizeDataForCNV <- function(cnvList){ 88 | expr.data <- cnvList$expr.data 89 | 90 | cs = Matrix::colSums(expr.data) 91 | expr.data <- t(t(expr.data) / cs) 92 | normalize_factor <- 10^round(log10(mean(cs))) 93 | expr.data <- expr.data * normalize_factor 94 | 95 | cnvList$expr.data <- expr.data 96 | return(cnvList) 97 | } 98 | 99 | 100 | 101 | anscombeTransform <- function(cnvList){ 102 | cnvList$expr.data <- 2 * sqrt(cnvList$expr.data + 3/8) 103 | return(cnvList) 104 | } 105 | 106 | 107 | 108 | logForCNV <- function(cnvList){ 109 | cnvList$expr.data <- log2(cnvList$expr.data + 1) 110 | return(cnvList) 111 | } 112 | 113 | 114 | 115 | getAverageBounds <- function(cnvList){ 116 | lower.bound <- mean(apply(cnvList$expr.data, 2, min)) 117 | upper.bound <- mean(apply(cnvList$expr.data, 2, max)) 118 | threshold = mean(abs(c(lower.bound, upper.bound))) 119 | return(threshold) 120 | } 121 | 122 | 123 | 124 | boundForCNV <- function(cnvList, threshold){ 125 | cnvList$expr.data[cnvList$expr.data > threshold] <- threshold 126 | cnvList$expr.data[cnvList$expr.data < (-1 * threshold)] <- -1 * threshold 127 | return(cnvList) 128 | } 129 | 130 | 131 | 132 | smoothOne <- function(ori.data, window.len = window.len){ 133 | half.window <- (window.len - 1) / 2 134 | 135 | pad.data <- c(rep(0, half.window), ori.data, rep(0, half.window)) 136 | bool.data <- c(rep(0, half.window), rep(1, length(ori.data)), rep(0, half.window)) 137 | 138 | kernel.vec <- c(1:half.window, half.window + 1, half.window:1) 139 | 140 | sum.data <- filter(pad.data, kernel.vec, sides = 2) 141 | num.data <- filter(bool.data, kernel.vec, sides = 2) 142 | sum.data <- sum.data[!is.na(sum.data)] 143 | num.data <- num.data[!is.na(num.data)] 144 | 145 | smo.data <- sum.data / num.data 146 | return(smo.data) 147 | } 148 | 149 | 150 | smoothByChr <- function(cnvList, window.len = 101){ 151 | chrList <- cnvList$gene.chr$CHR 152 | chrs <- as.character(unique(cnvList$gene.chr$CHR)) 153 | 154 | if(window.len < 2){ 155 | cat("- Warning in 'smoothBychr': Window length < 2, returning original data.\n") 156 | return(cnvList) 157 | } 158 | 159 | expr.data <- cnvList$expr.data 160 | for(chr in chrs) { 161 | # print(chr) 162 | cur.genes.ix <- which(chrList == chr) 163 | cur.data <- expr.data[cur.genes.ix, , drop=F] 164 | 165 | if(length(cur.genes.ix) > 1) { 166 | if(window.len %% 2 == 0){ 167 | window.len = window.len + 1 168 | cat("- Warning in 'smoothBychr': Window length is even, adding one to 'window.len'.\n") 169 | } 170 | 171 | smooth.data <- apply(cur.data, 2, smoothOne, window.len = window.len) 172 | rownames(smooth.data) <- rownames(cur.data) 173 | colnames(smooth.data) <- colnames(cur.data) 174 | 175 | expr.data[cur.genes.ix, ] <- smooth.data 176 | } 177 | } 178 | cnvList$expr.data <- expr.data 179 | return(cnvList) 180 | } 181 | 182 | 183 | 184 | centerAcrossChr <- function(cnvList, method = "median"){ 185 | expr.data <- cnvList$expr.data 186 | if (method == "median") { 187 | row_median <- apply(expr.data, 2, function(x) { median(x, na.rm=T) } ) 188 | expr.data <- t(apply(expr.data, 1, "-", row_median)) 189 | } 190 | else { 191 | row_means <- apply(expr.data, 2, function(x) { mean(x, na.rm=T) } ) 192 | expr.data <- t(apply(expr.data, 1, "-", row_means)) 193 | } 194 | cnvList$expr.data <- expr.data 195 | return(cnvList) 196 | } 197 | 198 | 199 | 200 | subtractRefExpr <- function(cnvList, inv_log = TRUE){ 201 | ref.cellNames <- subset(cnvList$cell.anno, cellAnno == "Reference")$cellName 202 | 203 | if (inv_log) { 204 | ref.means = log2(Matrix::rowMeans(2^cnvList$expr.data[, ref.cellNames] - 1) + 1) 205 | } else { 206 | ref.means = Matrix::rowMeans(cnvList$expr.data[, ref.cellNames]) 207 | } 208 | 209 | cnvList$expr.data <- cnvList$expr.data - ref.means 210 | return(cnvList) 211 | } 212 | 213 | 214 | invertLog2 <- function(cnvList){ 215 | cnvList$expr.data <- 2^cnvList$expr.data 216 | return(cnvList) 217 | } 218 | 219 | 220 | 221 | denoiseByRefMeanSd <- function(cnvList, sd_amplifier=1.5){ 222 | expr.data <- cnvList$expr.data 223 | ref.cellNames <- subset(cnvList$cell.anno, cellAnno == "Reference")$cellName 224 | 225 | mean.ref.vals <- mean(expr.data[, ref.cellNames]) 226 | mean.ref.sd <- mean(apply(expr.data[, ref.cellNames], 2, function(x) sd(x, na.rm=T))) * sd_amplifier 227 | 228 | up.bound <- mean.ref.vals + mean.ref.sd 229 | low.bound <- mean.ref.vals - mean.ref.sd 230 | 231 | expr.data[expr.data > low.bound & expr.data < up.bound] <- mean.ref.vals 232 | 233 | cnvList$expr.data <- expr.data 234 | 235 | return(cnvList) 236 | } 237 | 238 | 239 | 240 | removeOutliers <- function(cnvList){ 241 | expr.data <- cnvList$expr.data 242 | up.bound <- mean(apply(expr.data, 2, max)) 243 | low.bound <- mean(apply(expr.data, 2, min)) 244 | 245 | expr.data[expr.data < low.bound] <- low.bound 246 | expr.data[expr.data > up.bound] <- up.bound 247 | 248 | cnvList$expr.data <- expr.data 249 | 250 | return(cnvList) 251 | } 252 | 253 | 254 | 255 | runCNV <- function(expr.data, 256 | gene.manifest, 257 | cell.annotation, 258 | cutoff = 0.1, minCell = 3, 259 | ref.data = NULL, 260 | species = "human", 261 | genome = "hg19", 262 | hg.mm.mix = F){ 263 | 264 | cnvList <- prepareCNV(expr.data = expr.data, 265 | gene.manifest = gene.manifest, 266 | cell.annotation, 267 | ref.data = ref.data, 268 | species = species, 269 | genome = genome, 270 | hg.mm.mix = hg.mm.mix) 271 | cnvList <- rmGeneForCNV(cnvList, cutoff = cutoff, minCell = minCell) 272 | cnvList <- normalizeDataForCNV(cnvList) 273 | cnvList <- anscombeTransform(cnvList) 274 | cnvList <- logForCNV(cnvList) 275 | threshold <- getAverageBounds(cnvList) 276 | cnvList <- boundForCNV(cnvList, threshold) 277 | cnvList <- smoothByChr(cnvList, window.len = 101) 278 | cnvList <- centerAcrossChr(cnvList, method = "median") 279 | cnvList <- subtractRefExpr(cnvList) 280 | cnvList <- invertLog2(cnvList) 281 | cnvList <- denoiseByRefMeanSd(cnvList, sd_amplifier = 1.0) 282 | cnvList <- removeOutliers(cnvList) 283 | 284 | return(cnvList) 285 | } 286 | 287 | 288 | getMalignScore <- function(cnvList, cell.type = "Observation", method = "smooth", adjMat = NULL){ 289 | if(cell.type == "Observation"){ 290 | cell.names <- subset(cnvList$cell.anno, cellAnno != "Reference")$cellName 291 | }else if(cell.type == "Reference"){ 292 | cell.names <- subset(cnvList$cell.anno, cellAnno == "Reference")$cellName 293 | } 294 | 295 | cur.data <- cnvList$expr.data[, cell.names] 296 | 297 | if(is.null(adjMat) & method == "smooth"){ 298 | cat("- Warning in 'getMalignScore': Adjacent matrix is not provided, and use 'direct' method instead.\n") 299 | method <- "direct" 300 | } 301 | if(method == "smooth"){ 302 | thres <- quantile(adjMat@x, 1- (dim(adjMat)[1] * 10 / length(adjMat@x))) 303 | 304 | indexes <- as.matrix((adjMat > thres) + 0) 305 | tt <- 0.5 / (rowSums(indexes) - 1) 306 | tt[is.infinite(tt)] <- 0 307 | 308 | indexes <- indexes * tt 309 | indexes <- indexes * (1 - diag(rep(1, dim(indexes)[1]))) 310 | diagValue <- rep(0.5, dim(indexes)[1]) 311 | diagValue[tt == 0] <- 1 312 | 313 | indexes <- t(indexes + diag(diagValue)) 314 | 315 | new.cur.data <- as.matrix(cur.data) %*% indexes 316 | malignScore <- colSums((new.cur.data - 1)^2) 317 | malignScore <- malignScore / dim(new.cur.data)[1] 318 | 319 | }else if(method == "direct"){ 320 | malignScore <- colSums((cur.data - 1)^2) 321 | malignScore <- malignScore / dim(cur.data)[1] 322 | } 323 | 324 | names(malignScore) <- colnames(cur.data) 325 | 326 | return(malignScore) 327 | } 328 | 329 | 330 | 331 | malignPlot <- function(obserScore, referScore, malign.thres = NULL){ 332 | scoreDF <- data.frame(malignScore = c(obserScore, referScore), 333 | sets = c(rep("Observation", length(obserScore)), 334 | rep("Reference", length(referScore)))) 335 | p <- ggplot() + 336 | geom_histogram(data = subset(scoreDF, sets == "Observation"), 337 | mapping = aes(x = malignScore, fill = "Observation"), 338 | bins = 150, alpha = 0.6) + 339 | geom_histogram(data = subset(scoreDF, sets == "Reference"), 340 | mapping = aes(x = malignScore, fill = "Reference"), 341 | bins = 150, alpha = 0.6) + 342 | labs(x = "Malignancy score", y = "Droplets count") + 343 | scale_fill_manual(name = "Cells sets", guide = "legend", 344 | values = c("Observation"="#2e68b7", "Reference"="grey")) + 345 | theme_classic() + 346 | ggplot_config(base.size = 7) + 347 | theme(legend.justification = c(1.12,1.12), legend.position = c(1,1)) 348 | if(!is.null(malign.thres)){ 349 | p <- p + geom_vline(xintercept = malign.thres, colour = "red", linetype = "dashed") 350 | } 351 | return(p) 352 | } 353 | 354 | 355 | 356 | getBimodalThres <- function(scores){ 357 | x.density <- density(scores) 358 | d.x.density <- diff(x.density$y) 359 | d.sign <- (d.x.density > 0) + 0 360 | 361 | ext.pos <- which(d.sign[2:length(d.sign)] - d.sign[1:(length(d.sign)-1)] != 0) 362 | ext.density <- x.density$y[ext.pos] 363 | y.max <- max(ext.density) 364 | if(length(ext.pos) >= 3){ 365 | del.ix <- c() 366 | for(ei in 2:length(ext.density)){ 367 | if(abs(ext.density[ei] - ext.density[ei - 1]) < y.max * 0.001){ 368 | del.ix <- c(del.ix, ei - 1, ei) 369 | } 370 | } 371 | sel.ix <- !(1:length(ext.density) %in% unique(del.ix)) 372 | ext.density <- ext.density[sel.ix] 373 | ext.pos <- ext.pos[sel.ix] 374 | } 375 | 376 | if(length(ext.pos) >= 3){ 377 | t.ext.density <- c(0, ext.density, 0) 378 | ext.height <- sapply(2:(length(ext.pos) + 1), FUN = function(x){ 379 | return(min(abs(t.ext.density[x] - t.ext.density[x-1]), abs(t.ext.density[x] - t.ext.density[(x+1)]))) 380 | }) 381 | ext <- data.frame(x = ext.pos, y = ext.density, height = ext.height) 382 | max.ix <- order(ext.density, decreasing = T) 383 | if(ext.height[max.ix[2]] / ext.height[max.ix[1]] > 0.01){ 384 | cut.df <- ext[c(max.ix[2]:max.ix[1]), ] 385 | threshold <- x.density$x[cut.df[which.min(cut.df$y), ]$x] 386 | }else{ 387 | threshold <- NULL 388 | } 389 | }else{ 390 | threshold <- NULL 391 | } 392 | 393 | return(threshold) 394 | } 395 | 396 | 397 | # 398 | # getBimodalThres <- function(scores){ 399 | # x.density <- density(scores) 400 | # d.x.density <- diff(x.density$y) 401 | # d.sign <- (d.x.density > 0) + 0 402 | # 403 | # ext.pos <- which(d.sign[2:length(d.sign)] - d.sign[1:(length(d.sign)-1)] != 0) 404 | # if(length(ext.pos) >= 3){ 405 | # ext.density <- x.density$y[ext.pos] 406 | # t.ext.density <- c(0, ext.density, 0) 407 | # ext.height <- sapply(2:(length(ext.pos) + 1), FUN = function(x){ 408 | # return(min(abs(t.ext.density[x] - t.ext.density[x-1]), abs(t.ext.density[x] - t.ext.density[(x+1)]))) 409 | # }) 410 | # ext <- data.frame(x = ext.pos, y = ext.density, height = ext.height) 411 | # 412 | # max.ix <- order(ext.density, decreasing = T) 413 | # if(ext.height[max.ix[2]] / ext.height[max.ix[1]] > 0.1){ 414 | # cut.df <- ext[c(max.ix[2]:max.ix[1]), ] 415 | # threshold <- x.density$x[cut.df[which.min(cut.df$y), ]$x] 416 | # }else{ 417 | # threshold <- NULL 418 | # } 419 | # }else{ 420 | # threshold <- NULL 421 | # } 422 | # return(threshold) 423 | # } 424 | 425 | 426 | 427 | #' plotMalignancy 428 | #' 429 | #' @param cell.annotation A data.frame of cells' annotation containing the cells' 430 | #' malignancy score (`Malign.score`) and type (`Malign.type`). 431 | #' @inheritParams runScAnnotation 432 | #' 433 | #' @return A plot list. 434 | #' @export 435 | #' 436 | plotMalignancy <- function(cell.annotation, 437 | coor.names = c("tSNE_1", "tSNE_2"), 438 | savePath = NULL){ 439 | ## scatter plot of malignancy 440 | p.malignType.Point <- pointDRPlot(cell.annotation, value = "Malign.type", 441 | coor.names = coor.names, 442 | colors = c("malignant" = "#f57e87", "nonMalignant" = "#66d5a5"), 443 | legend.position = "right", 444 | legend.title = "Malignancy\n type") 445 | 446 | p.malignScore.Point <- pointDRPlot(cell.annotation, value = "Malign.score", 447 | coor.names = coor.names, 448 | colors = c("white", "#f57e87"), 449 | discrete = F, 450 | limit.quantile = 0.1, 451 | legend.position = "right", 452 | legend.title = "Malignancy\n score") 453 | 454 | p.malignType.bar <- clusterBarPlot(cell.annotation = cell.annotation, 455 | cell.colors = c("malignant" = "#f57e87", "nonMalignant" = "#66d5a5"), 456 | sel.col = "Malign.type", 457 | legend.title = "Malignancy type") 458 | 459 | ## save 460 | if(!is.null(savePath)){ 461 | ggsave(filename = file.path(savePath, "figures/malignType-point.png"), 462 | p.malignType.Point, width = 5, height = 3.8, dpi = 300) 463 | ggsave(filename = file.path(savePath, "figures/malignScore-point.png"), 464 | p.malignScore.Point, width = 5, height = 3.8, dpi = 300) 465 | ggsave(filename = file.path(savePath, "figures/malignType-bar.png"), 466 | p.malignType.bar, width = 6, height = 3, dpi = 300) 467 | } 468 | 469 | return(list(p.malignType.Point = p.malignType.Point, 470 | p.malignScore.Point = p.malignScore.Point, 471 | p.malignType.bar = p.malignType.bar)) 472 | } 473 | 474 | 475 | 476 | 477 | #' runMalignancy 478 | #' 479 | #' @param expr A Seurat object. 480 | #' @param gene.manifest A data.frame of genes' manifest. 481 | #' @param cell.annotation A data.frame of cells' annotation. 482 | #' @param cutoff The cut-off for min average read counts per gene among 483 | #' reference cells. The default is 0.1. 484 | #' @param minCell An integer number used to filter gene. The default is 3. 485 | #' @param p.value.cutoff The p-value to decide whether the distribution of 486 | #' malignancy score is bimodality. 487 | #' @param ref.data An expression matrix of gene by cell, which is used as the normal reference. 488 | #' The default is NULL, and an immune cells or bone marrow cells expression matrix will be used for human or mouse species, respectively. 489 | #' @param referAdjMat An adjacent matrix for the normal reference data. 490 | #' The larger the value, the closer the cell pair is. 491 | #' The default is NULL, and a SNN matrix of the default ref.data will be used. 492 | #' @inheritParams runScAnnotation 493 | #' 494 | #' @return A list of cnvList, reference malignancy score, seurat object, 495 | #' cell.annotatino, bimodal.pvalue, malign.thres, and all generated plots. 496 | #' @export 497 | #' 498 | runMalignancy <- function(expr, 499 | gene.manifest, 500 | cell.annotation, 501 | savePath, 502 | cutoff = 0.1, minCell = 3, 503 | p.value.cutoff = 0.5, 504 | coor.names = c("tSNE_1", "tSNE_2"), 505 | ref.data = NULL, 506 | referAdjMat = NULL, 507 | species = "human", 508 | genome = "hg19", 509 | hg.mm.mix = F){ 510 | if(!dir.exists(file.path(savePath, 'malignancy/'))){ 511 | dir.create(file.path(savePath, 'malignancy/'), recursive = T) 512 | } 513 | 514 | expr.data <- expr@assays$RNA@counts 515 | cnvList <- runCNV(expr.data = expr.data, 516 | gene.manifest = gene.manifest, 517 | cell.annotation = cell.annotation, 518 | cutoff = cutoff, minCell = minCell, 519 | ref.data = ref.data, 520 | species = species, 521 | genome = genome, 522 | hg.mm.mix = hg.mm.mix) 523 | 524 | if(is.null(ref.data)){ 525 | if(species == "human"){ 526 | referAdjMat <- readRDS(system.file("rds", "cnvRef_SNN-HM.RDS", package = "scCancer")) 527 | }else if(species == "mouse"){ 528 | referAdjMat <- readRDS(system.file("rds", "cnvRef_SNN-boneMarrow-MS.RDS", package = "scCancer")) 529 | } 530 | } 531 | referScore.smooth <- getMalignScore(cnvList, "Reference", method = "smooth", adjMat = referAdjMat) 532 | obserScore.smooth <- getMalignScore(cnvList, "Observation", method = "smooth", 533 | adjMat = expr@graphs$RNA_snn) 534 | up.refer <- quantile(referScore.smooth, 0.995) 535 | low.refer <- quantile(referScore.smooth, 0.005) 536 | referScore.smooth <- (referScore.smooth - low.refer) / (up.refer - low.refer) 537 | obserScore.smooth <- (obserScore.smooth - low.refer) / (up.refer - low.refer) 538 | 539 | all.thres <- getBimodalThres(scores = c(referScore.smooth, obserScore.smooth)) 540 | malign.thres <- getBimodalThres(scores = obserScore.smooth) 541 | 542 | ju.exist.malign <- !is.null(all.thres) | !is.null(malign.thres) 543 | 544 | ## malignancy type 545 | if(!is.null(all.thres)){ 546 | malign.type <- rep("malignant", length(obserScore.smooth)) 547 | names(malign.type) <- names(obserScore.smooth) 548 | if(!is.null(malign.thres)){ 549 | malign.type[names(obserScore.smooth)[obserScore.smooth < malign.thres]] <- "nonMalignant" 550 | } 551 | }else{ 552 | malign.type <- rep("nonMalignant", length(obserScore.smooth)) 553 | names(malign.type) <- names(obserScore.smooth) 554 | if(!is.null(malign.thres)){ 555 | malign.type[names(obserScore.smooth)[obserScore.smooth >= malign.thres]] <- "malignant" 556 | } 557 | } 558 | p.malignScore <- malignPlot(obserScore.smooth, referScore.smooth, 559 | malign.thres = malign.thres) 560 | 561 | ## add score and type to cell.annotation 562 | cell.annotation$Malign.score <- obserScore.smooth[rownames(cell.annotation)] 563 | cell.annotation$Malign.type <- malign.type[rownames(cell.annotation)] 564 | expr[["Malign.score"]] <- cell.annotation$Malign.score 565 | expr[["Malign.type"]] <- cell.annotation$Malign.type 566 | 567 | ## plot 568 | p.results <- plotMalignancy(cell.annotation = cell.annotation, 569 | coor.names = coor.names, 570 | savePath = savePath) 571 | p.results[["p.malignScore"]] <- p.malignScore 572 | ggsave(filename = file.path(savePath, "figures/malignScore.png"), 573 | p.malignScore, width = 5, height = 4, dpi = 300) 574 | 575 | ## save results 576 | write.table(cnvList$expr.data[, names(obserScore.smooth)], 577 | file = file.path(savePath, "malignancy/inferCNV-observation.txt"), 578 | quote = F, sep = "\t", row.names = T) 579 | write.table(cnvList$expr.data[, names(referScore.smooth)], 580 | file = file.path(savePath, "malignancy/inferCNV-reference.txt"), 581 | quote = F, sep = "\t", row.names = T) 582 | write.table(data.frame(referScore.smooth), 583 | file = file.path(savePath, "malignancy/refer-malignScore.txt"), 584 | quote = F, sep = "\t", row.names = T) 585 | 586 | results <- list( 587 | cnvList = cnvList, 588 | referScore = referScore.smooth, 589 | expr = expr, 590 | cell.annotation = cell.annotation, 591 | ju.exist.malign = ju.exist.malign, 592 | # bimodal.pvalue = bimodal.pvalue, 593 | malign.thres = malign.thres, 594 | p.results = p.results 595 | ) 596 | return(results) 597 | } 598 | 599 | 600 | -------------------------------------------------------------------------------- /R/scCombination.R: -------------------------------------------------------------------------------- 1 | 2 | #' runScCombination 3 | #' 4 | #' Perform multi-samples analyses. 5 | #' 6 | #' @param single.savePaths A vecotr of paths containing the results files of step 'runScAnnotation' for each sample. 7 | #' @param sampleNames A vector of labels for all samples. 8 | #' @param combName A label for the combined samples. 9 | #' @param comb.method The method to combine samples. The default is "NormalMNN". "Harmony", "NormalMNN", "SeuratMNN", "Raw", "Regression" and "LIGER" are optional. 10 | #' @param harmony.theta The parameter 'theta' of function "RunHarmony" in the harmony package. 11 | #' @param harmony.lambda The parameter 'lambda' of function "RunHarmony" in the harmony package. 12 | #' @param harmony.sigma The parameter 'sigma' of function "RunHarmony" in the harmony package. 13 | #' @param sample.colors The colors used for samples. The default is NULL, and the pre-set colors will be used. 14 | #' @inheritParams runScAnnotation 15 | #' 16 | #' @return A results list with all useful objects used in the function. 17 | #' @export 18 | #' 19 | #' @import harmony liger 20 | #' 21 | runScCombination <- function(single.savePaths, sampleNames, savePath, combName, 22 | authorName = NULL, 23 | comb.method = "NormalMNN", 24 | harmony.theta = NULL, 25 | harmony.lambda = NULL, 26 | harmony.sigma = 0.1, 27 | vars.to.regress = c("nCount_RNA", "mito.percent", "ribo.percent"), 28 | pc.use = 30, 29 | resolution = 0.8, 30 | clusterStashName = "comb.cluster", 31 | show.features = NULL, bool.add.features = T, 32 | bool.runDiffExpr = T, 33 | n.markers = 5, 34 | sample.colors = NULL, 35 | species = "human", 36 | genome = "hg19", 37 | hg.mm.mix = F, 38 | bool.runCellClassify = T, 39 | ct.templates = NULL, 40 | coor.names = c("tSNE_1", "tSNE_2"), 41 | bool.runMalignancy = T, 42 | cnv.ref.data = NULL, 43 | cnv.referAdjMat = NULL, 44 | cutoff = 0.1, 45 | p.value.cutoff = 0.5, 46 | bool.intraTumor = T, 47 | bool.runCellCycle = T, 48 | bool.runStemness = T, 49 | bool.runGeneSets = T, 50 | geneSets = NULL, 51 | geneSet.method = "average", 52 | bool.runExprProgram = T, 53 | nmf.rank = 50, 54 | genReport = T){ 55 | 56 | message("[", Sys.time(), "] START: RUN ScCombination") 57 | results <- as.list(environment()) 58 | checkCombArguments(results) 59 | 60 | if(species == "mouse" & genome == "hg19"){ 61 | genome <- "mm10" 62 | } 63 | 64 | if(!dir.exists(file.path(savePath, "figures/"))){ 65 | dir.create(file.path(savePath, "figures/"), recursive = T) 66 | } 67 | suppressWarnings( savePath <- normalizePath(savePath, "/") ) 68 | results[["savePath"]] <- savePath 69 | 70 | 71 | message("[", Sys.time(), "] -----: sample data combination") 72 | expr.list <- list() 73 | sample.ident <- c() 74 | for(i in 1:length(sampleNames)){ 75 | sampleName <- sampleNames[i] 76 | cur.path <- single.savePaths[i] 77 | print(sampleName) 78 | expr.list[[sampleName]] <- readRDS(paste0(cur.path, "/expr.RDS")) 79 | sample.ident <- c(sample.ident, rep(sampleName, dim(expr.list[[sampleName]])[2])) 80 | } 81 | sample.ident <- as.factor(sample.ident) 82 | 83 | bool.plotHVG = T 84 | if(comb.method == "SeuratMNN"){ 85 | message("[", Sys.time(), "] -----: combine data by Seurat MNN") 86 | suppressWarnings( expr.anchors <- FindIntegrationAnchors(object.list = expr.list, 87 | dims = 1:pc.use) ) 88 | expr <- IntegrateData(anchorset = expr.anchors, 89 | dims = 1:pc.use, verbose = F) 90 | expr <- ScaleData(expr, verbose = FALSE) 91 | DefaultAssay(expr) <- "integrated" 92 | expr[["sample.ident"]] <- sample.ident 93 | bool.plotHVG = F 94 | 95 | saveRDS(expr.anchors@anchors, file = file.path(savePath, "anchors.RDS")) 96 | 97 | }else if(comb.method == "Raw"){ 98 | message("[", Sys.time(), "] -----: combine raw matrix data") 99 | suppressWarnings( expr <- merge(expr.list[[1]], expr.list[2:length(expr.list)]) ) 100 | expr <- FindVariableFeatures(expr, selection.method = "vst", nfeatures = 2000, verbose = F) 101 | expr <- ScaleData(object = expr, vars.to.regress = vars.to.regress, verbose = F) 102 | expr[["sample.ident"]] <- sample.ident 103 | 104 | }else if(comb.method == "Regression"){ 105 | message("[", Sys.time(), "] -----: combine data and regress out sample source") 106 | suppressWarnings( expr <- merge(expr.list[[1]], expr.list[2:length(expr.list)]) ) 107 | expr <- FindVariableFeatures(expr, selection.method = "vst", nfeatures = 2000, verbose = F) 108 | expr[["sample.ident"]] <- sample.ident 109 | expr <- ScaleData(object = expr, 110 | vars.to.regress = c("sample.ident", vars.to.regress), 111 | verbose = F) 112 | 113 | }else if(comb.method == "Harmony"){ 114 | message("[", Sys.time(), "] -----: combine data by Harmony") 115 | 116 | items <- unique(unlist(lapply(names(expr.list), function(x){ 117 | grep("^GS__", names(expr.list[[x]]@meta.data), value = T) 118 | }))) 119 | items <- c("doublet.score", "Cell.Type", "Malign.score", 120 | "Malign.type", "CellCycle.score", "Stemness.score", items) 121 | 122 | ju.mat <- sapply(names(expr.list), function(x){ 123 | !(items %in% names(expr.list[[x]]@meta.data)) 124 | }) 125 | comb.metadata <- lapply(items[rowSums(ju.mat) == 0], function(x){ 126 | tmp <- do.call(c, lapply(names(expr.list), function(y){ 127 | expr.list[[y]]@meta.data[[x]] 128 | })) 129 | }) 130 | names(comb.metadata) <- items[rowSums(ju.mat) == 0] 131 | comb.metadata <- data.frame(comb.metadata) 132 | 133 | share.genes <- Reduce(intersect, lapply(expr.list, rownames)) 134 | for(s.name in names(expr.list)){ 135 | expr.list[[s.name]] <- GetAssayData(expr.list[[s.name]], slot = "counts")[share.genes, ] 136 | } 137 | comb.data <- do.call(cbind, expr.list) 138 | rm(expr.list) 139 | 140 | expr <- CreateSeuratObject(counts = comb.data, min.cells = 5) %>% 141 | Seurat::NormalizeData(verbose = FALSE) %>% 142 | FindVariableFeatures(selection.method = "vst", nfeatures = 2000, verbose = F) %>% 143 | ScaleData(verbose = FALSE) %>% 144 | RunPCA(pc.genes = expr@var.genes, verbose = FALSE) 145 | expr[["sample.ident"]] <- sample.ident 146 | expr <- expr %>% RunHarmony("sample.ident", plot_convergence = TRUE, 147 | theta = harmony.theta, 148 | lambad = harmony.lambda, 149 | sigma = harmony.sigma, 150 | verbose = F) 151 | 152 | expr@meta.data <- cbind(expr@meta.data, comb.metadata) 153 | 154 | bool.plotHVG <- F 155 | 156 | }else if(comb.method == "LIGER"){ 157 | message("[", Sys.time(), "] -----: combine data by LIGER") 158 | 159 | items <- unique(unlist(lapply(names(expr.list), function(x){ 160 | grep("^GS__", names(expr.list[[x]]@meta.data), value = T) 161 | }))) 162 | items <- c("doublet.score", "Cell.Type", "Malign.score", 163 | "Malign.type", "CellCycle.score", "Stemness.score", items) 164 | 165 | ju.mat <- sapply(names(expr.list), function(x){ 166 | !(items %in% names(expr.list[[x]]@meta.data)) 167 | }) 168 | comb.metadata <- lapply(items[rowSums(ju.mat) == 0], function(x){ 169 | tmp <- do.call(c, lapply(names(expr.list), function(y){ 170 | expr.list[[y]]@meta.data[[x]] 171 | })) 172 | }) 173 | names(comb.metadata) <- items[rowSums(ju.mat) == 0] 174 | comb.metadata <- data.frame(comb.metadata) 175 | 176 | for(e.i in 1:length(expr.list)){ 177 | s.name <- names(expr.list)[e.i] 178 | expr.list[[s.name]] <- RenameCells(expr.list[[s.name]], 179 | new.names = paste0(colnames(expr.list[[s.name]]), "-", e.i)) 180 | expr.list[[s.name]] <- GetAssayData(expr.list[[s.name]], slot = "counts") 181 | } 182 | expr = createLiger(expr.list) 183 | expr = normalize(expr) 184 | expr = selectGenes(expr, var.thresh = 0.1) 185 | expr = scaleNotCenter(expr) 186 | 187 | expr = optimizeALS(expr, k = 20) 188 | expr = quantileAlignSNF(expr) 189 | expr = runTSNE(expr) 190 | expr = ligerToSeurat(expr, use.liger.genes = T) 191 | 192 | expr = ScaleData(expr, verbose = FALSE) 193 | expr[["sample.ident"]] <- sample.ident 194 | expr@reductions$inmf@assay.used <- "RNA" 195 | 196 | expr@meta.data <- cbind(expr@meta.data, comb.metadata) 197 | 198 | bool.plotHVG = F 199 | 200 | }else if(comb.method == "NormalMNN"){ 201 | message("[", Sys.time(), "] -----: combine data by normal cell MNN") 202 | suppressWarnings( expr.anchors <- FindIntegrationAnchors(object.list = expr.list, 203 | dims = 1:pc.use) ) 204 | anchors <- expr.anchors@anchors 205 | 206 | anchors$cellType1 <- "NULL" 207 | anchors$cellType2 <- "NULL" 208 | anchors$malignType1 <- "NULL" 209 | anchors$malignType2 <- "NULL" 210 | anchors$malignScore1 <- -1 211 | anchors$malignScore2 <- -1 212 | for(oi in expr.anchors@reference.objects){ 213 | cur.ix <- which(anchors$dataset1 == oi) 214 | anchors$cellType1[cur.ix] <- expr.list[[oi]]@meta.data$Cell.Type[anchors$cell1[cur.ix]] 215 | anchors$malignType1[cur.ix] <- expr.list[[oi]]@meta.data$Malign.type[anchors$cell1[cur.ix]] 216 | anchors$malignScore1[cur.ix] <- expr.list[[oi]]@meta.data$Malign.score[anchors$cell1[cur.ix]] 217 | 218 | cur.ix <- which(anchors$dataset2 == oi) 219 | anchors$cellType2[cur.ix] <- expr.list[[oi]]@meta.data$Cell.Type[anchors$cell2[cur.ix]] 220 | anchors$malignType2[cur.ix] <- expr.list[[oi]]@meta.data$Malign.type[anchors$cell2[cur.ix]] 221 | anchors$malignScore2[cur.ix] <- expr.list[[oi]]@meta.data$Malign.score[anchors$cell2[cur.ix]] 222 | } 223 | 224 | anchors.new <- subset(anchors, cellType1 != "Epithelial" & cellType1 != "Unknown" & cellType2 != "Epithelial" & cellType2 != "Unknown") 225 | if(dim(anchors)[1] == 0){ 226 | anchors.new <- anchors 227 | cat("- Warning in 'runScCombination': Cannot find the nomral cell anchors, and use initial anchors instead.\n") 228 | } 229 | expr.anchors@anchors <- anchors.new 230 | 231 | expr <- IntegrateData(anchorset = expr.anchors, 232 | dims = 1:pc.use, verbose = F) 233 | expr <- ScaleData(expr, verbose = FALSE) 234 | DefaultAssay(expr) <- "integrated" 235 | expr[["sample.ident"]] <- sample.ident 236 | bool.plotHVG = F 237 | 238 | saveRDS(anchors.new, file = file.path(savePath, "anchors.RDS")) 239 | } 240 | results[["bool.plotHVG"]] <- bool.plotHVG 241 | 242 | ## --------- seurat --------- 243 | t.results <- runSeurat( 244 | expr = expr, 245 | savePath = savePath, 246 | pc.use = pc.use, 247 | resolution = resolution, 248 | clusterStashName = clusterStashName, 249 | bool.runDiffExpr = bool.runDiffExpr, 250 | comb.method = comb.method 251 | ) 252 | expr = t.results$expr 253 | cell.annotation = t.results$cell.annotation 254 | results[["diff.expr.genes"]] = t.results$diff.expr.genes 255 | rm(t.results) 256 | gc() 257 | 258 | for(item in c("doublet.score", "Cell.Type", "Malign.score", 259 | "Malign.type", "CellCycle.score", "Stemness.score")){ 260 | if(item %in% names(expr@meta.data)){ 261 | cell.annotation[[item]] <- expr@meta.data[[item]] 262 | } 263 | } 264 | for(item in grep("^GS__", names(expr@meta.data), value = T)){ 265 | cell.annotation[[item]] <- expr@meta.data[[item]] 266 | } 267 | 268 | results[["seurat.plots"]] <- plotSeurat( 269 | expr = expr, 270 | cell.annotation = cell.annotation, 271 | show.features = show.features, 272 | bool.add.features = bool.add.features, 273 | coor.names = coor.names, 274 | bool.plotHVG = bool.plotHVG, 275 | 276 | bool.runDiffExpr = bool.runDiffExpr, 277 | diff.expr.genes = results[["diff.expr.genes"]], 278 | n.markers = n.markers, 279 | 280 | species = species, 281 | savePath = savePath 282 | ) 283 | 284 | results[["DEplot.height"]] <- 0.5 + 0.1 * n.markers * length(unique(cell.annotation$Cluster)) 285 | results[["markersPlot.height"]] <- 2 * ceiling(length(results[["seurat.plots"]]$ps.markers) / 4) 286 | 287 | 288 | ## --------- sample source --------- 289 | message("[", Sys.time(), "] -----: plot sample source") 290 | cell.annotation$sample <- expr@meta.data$sample.ident 291 | if(is.null(sample.colors)){ 292 | sample.colors <- getDefaultColors(n = length(unique(cell.annotation$sample)), 293 | type = 2) 294 | } 295 | 296 | if(setequal(sampleNames, unique(cell.annotation$sample))){ 297 | cell.annotation$sample <- factor(cell.annotation$sample, levels = sampleNames) 298 | }else{ 299 | cell.annotation$sample <- factor(cell.annotation$sample) 300 | } 301 | p.sample <- pointDRPlot(cell.annotation, value = "sample", 302 | coor.names = coor.names, 303 | colors = sample.colors, 304 | point.type = 2, 305 | legend.position = "right", 306 | legend.title = "Sample") 307 | p.bar.sample <- clusterBarPlot(cell.annotation = cell.annotation, 308 | cell.colors = sample.colors, 309 | sel.col = "sample", 310 | legend.position = "bottom", 311 | legend.title = "Sample") 312 | 313 | ggsave(filename = file.path(savePath, "figures/sampleSource-point.png"), 314 | p.sample, width = 7, height = 5, dpi = 300) 315 | ggsave(filename = file.path(savePath, "figures/sampleSource-bar.png"), 316 | p.bar.sample, width = 6, height = 3, dpi = 300) 317 | results[["p.sample"]] <- p.sample 318 | results[["p.bar.sample"]] <- p.bar.sample 319 | 320 | 321 | ## --------- cell type --------- 322 | if(bool.runCellClassify){ 323 | t.results <- runCellClassify(expr, cell.annotation, 324 | coor.names = coor.names, 325 | savePath = savePath, 326 | ct.templates = ct.templates, 327 | species = species) 328 | expr <- t.results$expr 329 | cell.annotation <- t.results$cell.annotation 330 | results[["cellType.plot"]] <- t.results$p.results 331 | rm(t.results) 332 | } 333 | 334 | 335 | ## --------- malignancy --------- 336 | if(bool.runMalignancy){ 337 | if(!(all(c("Malign.score", "Malign.type") %in% names(cell.annotation)))){ 338 | message("[", Sys.time(), "] -----: cells malignancy annotation") 339 | for(i in 1:length(sampleNames)){ 340 | cur.manifest <- read.table(paste0(single.savePaths[i], "/geneManifest.txt"), 341 | header = T, sep = "\t", stringsAsFactors = F) 342 | if(i == 1){ 343 | gene.manifest <- cur.manifest 344 | }else{ 345 | new.genes <- subset(cur.manifest, !(EnsemblID %in% gene.manifest$EnsemblID)) 346 | gene.manifest <- rbind(gene.manifest, new.genes) 347 | } 348 | } 349 | # rownames(gene.manifest) <- gene.manifest$EnsemblID 350 | rownames(gene.manifest) <- gene.manifest$Symbol 351 | t.results <- runMalignancy(expr = expr, 352 | gene.manifest = gene.manifest, 353 | cell.annotation = cell.annotation, 354 | savePath = savePath, 355 | cutoff = cutoff, minCell = 3, 356 | p.value.cutoff = p.value.cutoff, 357 | coor.names = coor.names, 358 | ref.data = cnv.ref.data, 359 | referAdjMat = cnv.referAdjMat, 360 | species = species, 361 | genome = genome, 362 | hg.mm.mix = hg.mm.mix) 363 | expr <- t.results$expr 364 | cell.annotation <- t.results$cell.annotation 365 | results[["cnvList"]] <- t.results$cnvList 366 | results[["referScore"]] <- t.results$referScore 367 | results[["ju.exist.malign"]] <- t.results$ju.exist.malign 368 | results[["malign.thres"]] <- t.results$malign.thres 369 | results[["bimodal.pvalue"]] <- t.results$bimodal.pvalue 370 | results[["malign.plot"]] <- t.results$p.results 371 | rm(t.results) 372 | }else{ 373 | message("[", Sys.time(), "] -----: cells malignancy combination") 374 | results[["malign.plot"]] <- plotMalignancy(cell.annotation = cell.annotation, 375 | coor.names = coor.names, 376 | savePath = savePath) 377 | } 378 | } 379 | 380 | 381 | ## --------- select tumor clusters --------- 382 | if(bool.intraTumor){ 383 | tumor.clusters <- getTumorCluster(cell.annotation = cell.annotation) 384 | results[["tumor.clusters"]] <- tumor.clusters 385 | 386 | if(is.null(tumor.clusters)){ 387 | sel.clusters <- unique(cell.annotation$Cluster) 388 | sel.clusters <- sel.clusters[order(sel.clusters)] 389 | }else{ 390 | sel.clusters <- tumor.clusters 391 | } 392 | }else{ 393 | sel.clusters <- unique(cell.annotation$Cluster) 394 | sel.clusters <- sel.clusters[order(sel.clusters)] 395 | } 396 | 397 | 398 | ## --------- cell cycle --------- 399 | if(bool.runCellCycle){ 400 | if(!("CellCycle.score" %in% names(cell.annotation))){ 401 | CellCycle.score <- runCellCycle(expr, species = species) 402 | cell.annotation$CellCycle.score <- CellCycle.score 403 | expr[["CellCycle.score"]] <- CellCycle.score 404 | }else{ 405 | message("[", Sys.time(), "] -----: cell cycle score combination") 406 | } 407 | 408 | # CellCycle.score <- runCellCycle(expr, species = species) 409 | # cell.annotation$CellCycle.score <- CellCycle.score 410 | # expr[["CellCycle.score"]] <- CellCycle.score 411 | 412 | results[["cellCycle.plot"]] <- 413 | pointDRPlot(cell.annotation, 414 | sel.clusters = sel.clusters, 415 | value = "CellCycle.score", 416 | coor.names = coor.names, 417 | colors = c("white", "#009b45"), 418 | discrete = F, 419 | legend.position = "right", 420 | legend.title = "Cell cycle score") 421 | ggsave(filename = file.path(savePath, "figures/cellCycle-point.png"), 422 | results[["cellCycle.plot"]], width = 5, height = 4, dpi = 300) 423 | } 424 | 425 | 426 | ## --------- stemness --------- 427 | if(bool.runStemness){ 428 | if(!("Stemness.score" %in% names(cell.annotation))){ 429 | stem.scores <- runStemness(X = GetAssayData(object = expr, slot = "scale.data"), species = species) 430 | cell.annotation[["Stemness.score"]] <- stem.scores 431 | expr[["Stemness.score"]] <- stem.scores 432 | }else{ 433 | message("[", Sys.time(), "] -----: stemness score combination") 434 | } 435 | 436 | results[["stemness.plot"]] <- 437 | pointDRPlot(cell.annotation, 438 | sel.clusters = sel.clusters, 439 | value = "Stemness.score", 440 | coor.names = coor.names, 441 | colors = c("white", "#ff9000"), 442 | discrete = F, 443 | legend.position = "right", 444 | legend.title = "Stemness") 445 | ggsave(filename = file.path(savePath, "figures/stemness-point.png"), 446 | results[["stemness.plot"]], width = 5, height = 4, dpi = 300) 447 | } 448 | 449 | 450 | ## --------- gene sets ---------- 451 | if(bool.runGeneSets){ 452 | if(is.null(geneSets)){ 453 | geneSets <- getDefaultGeneSets(species = species) 454 | } 455 | if(geneSet.method == "GSVA" | !all(paste0("GS__", names(geneSets)) %in% names(cell.annotation))){ 456 | t.scores <- runGeneSets(expr = expr, geneSets = geneSets, method = geneSet.method) 457 | if(!is.null(t.scores)){ 458 | cell.annotation <- cbind(cell.annotation, t.scores) 459 | } 460 | }else{ 461 | message("[", Sys.time(), "] -----: gene set signatures combination") 462 | t.scores <- cell.annotation[, paste0("GS__", names(geneSets))] 463 | } 464 | 465 | if(!is.null(t.scores)){ 466 | bool.limit <- T 467 | if(geneSet.method == "GSVA"){ 468 | bool.limit <- F 469 | } 470 | results[["geneSet.plot"]] <- 471 | plotGeneSet(subset(cell.annotation, Cluster %in% sel.clusters), 472 | prefix = "GS__", 473 | bool.limit = bool.limit, 474 | savePath = savePath) 475 | results[["geneSetPlot.height"]] <- 0.5 + 0.11 * dim(t.scores)[2] 476 | rm(t.scores) 477 | }else{ 478 | bool.runGeneSets = FALSE 479 | } 480 | } 481 | 482 | 483 | ## ---------- expression programs ---------- 484 | if(bool.runExprProgram){ 485 | results[["exprProgram.results"]] <- runExprProgram(expr, rank = nmf.rank, 486 | sel.clusters = sel.clusters, 487 | clusterStashName = clusterStashName, 488 | savePath = savePath) 489 | results[["exprProgram.plot"]] <- plotExprProgram(H = results[["exprProgram.results"]]$H, 490 | cell.annotation, 491 | sel.clusters = sel.clusters, 492 | savePath = savePath) 493 | results[["exprProgPlot.height"]] <- 0.5 + 0.11 * dim(results[["exprProgram.results"]]$H)[1] 494 | } 495 | results[["expr"]] <- expr 496 | results[["cell.annotation"]] <- cell.annotation 497 | 498 | 499 | ## -------- save --------- 500 | saveRDS(expr, file = file.path(savePath, "expr.RDS")) 501 | write.table(cell.annotation, file = file.path(savePath, "cellAnnotation.txt"), 502 | quote = F, sep = "\t", row.names = F) 503 | 504 | if(genReport){ 505 | message("[", Sys.time(), "] -----: report generating") 506 | if(!dir.exists(file.path(savePath, 'report-figures/'))){ 507 | dir.create(file.path(savePath, 'report-figures/'), recursive = T) 508 | } 509 | suppressWarnings( 510 | knit(system.file("rmd", "main-scAnnoComb.Rmd", package = "scCancer"), 511 | file.path(savePath,'report-scAnnoComb.md'), quiet = T) 512 | ) 513 | markdownToHTML(file.path(savePath,'report-scAnnoComb.md'), 514 | file.path(savePath, 'report-scAnnoComb.html')) 515 | } 516 | 517 | message("[", Sys.time(), "] END: Finish ScCombination\n\n") 518 | 519 | return(results) 520 | } 521 | 522 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | 2 | get10Xpath <- function (samplePath, raw.data = F){ 3 | prefix <- ifelse(raw.data, 'raw', 'filtered') 4 | cur.path <- paste0(samplePath, '/') 5 | res.path <- paste0(cur.path, prefix, '_feature_bc_matrix') 6 | if (!dir.exists(res.path)){ 7 | res.path <- paste0(cur.path, prefix, '_gene_bc_matrices/hg19/') 8 | } 9 | if (!dir.exists(res.path)){ 10 | res.path <- paste0(cur.path, prefix, '_gene_bc_matrices/hg38/') 11 | } 12 | if (!dir.exists(res.path)){ 13 | res.path <- paste0(cur.path, prefix, '_gene_bc_matrices/mm10/') 14 | } 15 | if(!dir.exists(res.path)){ 16 | res.path <- NULL 17 | } 18 | return(res.path) 19 | } 20 | 21 | 22 | ExtractField <- function (string, field = 1, delim = "_"){ 23 | fields <- as.numeric(x = unlist(x = strsplit( 24 | x = as.character(x = field), split = ","))) 25 | if (length(x = fields) == 1) { 26 | return(strsplit(x = string, split = delim)[[1]][field]) 27 | } 28 | return(paste(strsplit(x = string, split = delim)[[1]][fields], collapse = delim)) 29 | } 30 | 31 | 32 | getCRversion <- function(data.path){ 33 | version <- "Cell Ranger (version 2)" 34 | if(grepl("feature_bc_matrix", data.path)){ 35 | version <- "Cell Ranger (version >= 3)" 36 | } 37 | return(version) 38 | } 39 | 40 | 41 | getBarcodes <- function(data.path){ 42 | barcode.loc <- paste0(data.path, "/barcodes.tsv") 43 | if(grepl("feature_bc_matrix", data.path)){ 44 | barcode.loc <- paste0(barcode.loc, ".gz") 45 | } 46 | cell.names <- readLines(barcode.loc) 47 | if (all(grepl(pattern = "\\-1$", x = cell.names))) { 48 | cell.names <- as.vector(x = as.character( 49 | x = sapply( 50 | X = cell.names, 51 | FUN = ExtractField, 52 | field = 1, 53 | delim = "-" 54 | ) 55 | )) 56 | } 57 | return(cell.names) 58 | } 59 | 60 | 61 | #' Read10Xdata 62 | #' 63 | #' Read expression matrix data from 10X. This function is modified from Seurat package. 64 | #' 65 | #' @param data.dir Directory containing the matrix.mtx, genes.tsv (or features.tsv), and barcodes.tsv files provided by 10X. 66 | #' A vector or named vector can be given in order to load several data directories. 67 | #' If a named vector is given, the cell barcode names will be prefixed with the name. 68 | #' @param gene.column An integer indicating which column of genes.tsv or features.tsv to use for gene names; default is 2. 69 | #' @param unique.features Make feature names unique (default TRUE). 70 | #' @param only.expr Whether to read expression data only if have multiple features (default TRUE). 71 | #' 72 | #' @return If the 10X data only has expression data or the argument 'only.expr' is TRUE, 73 | #' a sparse matrix containing the expression data will be returned. 74 | #' Otherwise, if the 10X data has multiple data types, 75 | #' a list containing a sparse matrix of the data from each type will be returned. 76 | #' 77 | #' @export 78 | #' 79 | Read10Xdata <- function (data.dir = NULL, gene.column = 2, 80 | unique.features = TRUE, only.expr = TRUE) { 81 | full.data <- list() 82 | for (i in seq_along(data.dir)) { 83 | run <- data.dir[i] 84 | if (!dir.exists(paths = run)) { 85 | stop("Directory provided does not exist") 86 | } 87 | if (!grepl("\\/$", run)) { 88 | run <- paste(run, "/", sep = "") 89 | } 90 | barcode.loc <- file.path(run, "barcodes.tsv") 91 | gene.loc <- file.path(run, "genes.tsv") 92 | features.loc <- file.path(run, "features.tsv.gz") 93 | matrix.loc <- file.path(run, "matrix.mtx") 94 | pre_ver_3 <- file.exists(gene.loc) 95 | if (!pre_ver_3) { 96 | addgz <- function(s) { 97 | return(paste0(s, ".gz")) 98 | } 99 | barcode.loc <- addgz(s = barcode.loc) 100 | matrix.loc <- addgz(s = matrix.loc) 101 | } 102 | if (!file.exists(barcode.loc)) { 103 | stop("Barcode file missing") 104 | } 105 | if (!pre_ver_3 && !file.exists(features.loc)) { 106 | stop("Gene name or features file missing") 107 | } 108 | if (!file.exists(matrix.loc)) { 109 | stop("Expression matrix file missing") 110 | } 111 | data <- readMM(file = matrix.loc) 112 | cell.names <- readLines(barcode.loc) 113 | if (all(grepl(pattern = "\\-1$", x = cell.names))) { 114 | cell.names <- as.vector(x = as.character( 115 | x = sapply( 116 | X = cell.names, 117 | FUN = ExtractField, 118 | field = 1, 119 | delim = "-" 120 | ) 121 | )) 122 | } 123 | if (is.null(x = names(x = data.dir))) { 124 | if (i < 2) { 125 | colnames(x = data) <- cell.names 126 | } else { 127 | colnames(x = data) <- paste0(i, "_", cell.names) 128 | } 129 | } else { 130 | colnames(x = data) <- 131 | paste0(names(x = data.dir)[i], "_", cell.names) 132 | } 133 | feature.names <- read.delim( 134 | file = ifelse( 135 | test = pre_ver_3, 136 | yes = gene.loc, 137 | no = features.loc 138 | ), 139 | header = FALSE, 140 | stringsAsFactors = FALSE 141 | ) 142 | if (any(is.na(x = feature.names[, gene.column]))) { 143 | warning("Some features names are NA. Replacing NA names with ID from the opposite column requested", 144 | call. = FALSE, immediate. = TRUE) 145 | na.features <- which(x = is.na(x = feature.names[, 146 | gene.column])) 147 | replacement.column <- ifelse(test = gene.column == 148 | 2, yes = 1, no = 2) 149 | feature.names[na.features, gene.column] <- feature.names[na.features, 150 | replacement.column] 151 | } 152 | if (unique.features) { 153 | fcols = ncol(x = feature.names) 154 | if (fcols < gene.column) { 155 | stop(paste0("gene.column was set to ", gene.column, 156 | " but feature.tsv.gz (or genes.tsv) only has ", 157 | fcols, " columns.", " Try setting the gene.column argument to a value <= to ", 158 | fcols, ".")) 159 | } 160 | rownames(x = data) <- make.unique(names = feature.names[, 161 | gene.column]) 162 | } 163 | # In cell ranger 3.0, a third column specifying the type of data was added 164 | # and we will return each type of data as a separate matrix 165 | if (ncol(x = feature.names) > 2) { 166 | data_types <- factor(x = feature.names$V3) 167 | lvls <- levels(x = data_types) 168 | if (length(x = lvls) > 1 && length(x = full.data) == 169 | 0) { 170 | message("10X data contains more than one type and is being returned as a list containing matrices of each type.") 171 | } 172 | expr_name <- "Gene Expression" 173 | if (expr_name %in% lvls) { 174 | # Return Gene Expression first 175 | lvls <- c(expr_name, lvls[-which(x = lvls == expr_name)]) 176 | } 177 | data <- lapply( 178 | X = lvls, 179 | FUN = function(l) { 180 | return(data[data_types == l, ]) 181 | } 182 | ) 183 | names(x = data) <- lvls 184 | } else{ 185 | data <- list(data) 186 | } 187 | full.data[[length(x = full.data) + 1]] <- data 188 | } 189 | # Combine all the data from different directories into one big matrix, note this 190 | # assumes that all data directories essentially have the same features files 191 | list_of_data <- list() 192 | for (j in 1:length(x = full.data[[1]])) { 193 | list_of_data[[j]] <- 194 | do.call(cbind, lapply(X = full.data, FUN = `[[`, j)) 195 | list_of_data[[j]] <- 196 | as(object = list_of_data[[j]], Class = "CsparseMatrix") 197 | } 198 | names(x = list_of_data) <- names(x = full.data[[1]]) 199 | 200 | if (only.expr){ 201 | return(list_of_data[[1]]) 202 | }else{ 203 | # If multiple features, only return a list, otherwise a matrix. 204 | if (length(x = list_of_data) == 1) { 205 | return(list_of_data[[1]]) 206 | } else { 207 | return(list_of_data) 208 | } 209 | } 210 | } 211 | 212 | 213 | 214 | #' ggplot_config 215 | #' 216 | #' @param base.size The size of text. 217 | #' 218 | #' @return A theme. 219 | #' @export 220 | #' 221 | ggplot_config <- function(base.size = 8){ 222 | p <- theme_classic() + 223 | theme(plot.title = element_text(size = 2 * base.size), 224 | axis.title.x = element_text(size = 2 * base.size, vjust = -0.2), 225 | axis.title.y = element_text(size = 2 * base.size, vjust = 0.2), 226 | axis.text.x = element_text(size = 2 * base.size), 227 | axis.text.y = element_text(size = 2 * base.size), 228 | panel.grid.major = element_blank(), 229 | panel.grid.minor = element_blank(), 230 | legend.title = element_text(size = 2 * base.size - 2), 231 | legend.text = element_text(size = 1.5 * base.size) 232 | ) 233 | return(p) 234 | } 235 | 236 | 237 | 238 | getOutliers <- function(x){ 239 | x.med <- median(x) 240 | outs <- boxplot.stats(x)$out 241 | outliers <- subset(outs, outs > x.med) 242 | return(outliers) 243 | } 244 | 245 | 246 | 247 | getCellix <- function(cell.manifest, filter.thres, arg){ 248 | ixs <- lapply(arg, FUN = function(x) { 249 | ix <- which(cell.manifest[[x]] >= filter.thres[x, 'Low.threshold'] & 250 | cell.manifest[[x]] < filter.thres[x, 'High.threshold']) 251 | return(ix) 252 | }) 253 | res.ix <- ixs[[1]] 254 | for(i in 1:length(arg)){ 255 | res.ix <- intersect(res.ix, ixs[[i]]) 256 | } 257 | return(res.ix) 258 | } 259 | 260 | 261 | 262 | grid_arrange_shared_legend <- function(..., all.p, ncol = length(list(...)), nrow = 1, position = c("bottom", "right")) { 263 | 264 | plots <- list(...) 265 | position <- match.arg(position) 266 | g <- ggplotGrob(all.p + theme(legend.position = position))$grobs 267 | legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]] 268 | lheight <- sum(legend$height) 269 | lwidth <- sum(legend$width) 270 | gl <- lapply(plots, function(x) x + theme(legend.position="none")) 271 | gl <- c(gl, ncol = ncol, nrow = nrow) 272 | 273 | combined <- switch(position, 274 | "bottom" = arrangeGrob(do.call(arrangeGrob, gl), 275 | legend, 276 | ncol = 1, 277 | heights = unit.c(unit(1, "npc") - lheight, lheight)), 278 | "right" = arrangeGrob(do.call(arrangeGrob, gl), 279 | legend, 280 | ncol = 2, 281 | widths = unit.c(unit(1, "npc") - lwidth, lwidth))) 282 | grid.newpage() 283 | grid.draw(combined) 284 | 285 | # return gtable invisibly 286 | invisible(combined) 287 | } 288 | 289 | 290 | 291 | #' getDefaultMarkers 292 | #' 293 | #' Return default markers of several common cell types. 294 | #' 295 | #' @inheritParams runScAnnotation 296 | #' 297 | #' @return A list of default markers of several common cell types. 298 | #' @export 299 | #' 300 | getDefaultMarkers <- function(species = "human"){ 301 | # feature.def <- list( 302 | # "T cell" = c("CD3D"), 303 | # "B cell" = c("CD79A"), 304 | # "NK cell" = c("NKG7"), 305 | # "Monocyte" = c("LYZ"), 306 | # "Endothelial" = c("PLVAP"), 307 | # "Myofibroblast" = c("ACTA2"), 308 | # "Epithelial" = c("EPCAM", "KRT8")) 309 | 310 | if(species == "human"){ 311 | feature.def <- list( 312 | "T cell" = c("PTPRC", "CD3D", "CD4", "CD8A", "CD8B"), 313 | "B cell" = c("CD79A"), 314 | "NK cell" = c("NKG7"), 315 | "Myeloid cell" = c("LYZ"), 316 | "Endothelial" = c("PLVAP"), 317 | "Fibroblast" = c("ACTA2"), 318 | "Epithelial" = c("EPCAM", "KRT8")) 319 | }else if(species == "mouse"){ 320 | feature.def <- list( 321 | "T cell" = c("Ptprc", "Cd3d", "Cd4", "Cd8a", "Cd8b"), 322 | "B cell" = c("Cd79a"), 323 | "NK cell" = c("Nkg7"), 324 | "Myeloid cell" = c("Lyz1", "Lyz2"), 325 | "Endothelial" = c("Plvap"), 326 | "Fibroblast" = c("Acta2"), 327 | "Epithelial" = c("Epcam", "Krt8")) 328 | } 329 | 330 | return(feature.def) 331 | } 332 | 333 | 334 | 335 | #' getDefaultColors 336 | #' 337 | #' @param n The number of colors. 338 | #' @param type The type of color style. Only 1, 2, or 3 is allowed. 339 | #' 340 | #' @return A vector of colors. 341 | #' @export 342 | #' 343 | getDefaultColors <- function(n = NULL, type = 1){ 344 | if(type == 1){ 345 | colors <- c("#cb7c77", "#68d359", "#6a7dc9", "#c9d73d", "#c555cb", 346 | "#d7652d", "#7cd5c8", "#c49a3f", "#507d41", "#5d8d9c", 347 | "#90353b", "#674c2a", "#1B9E77", "#c5383c", "#0081d1", 348 | "#ffd900", "#502e71", "#c8b693", "#aed688", "#f6a97a", 349 | "#c6a5cc", "#798234", "#6b42c8", "#cf4c8b", "#666666", 350 | "#feb308", "#ff1a1a", "#1aff1a", "#1a1aff", "#ffff1a") 351 | }else if(type == 2){ 352 | if(n <= 8){ 353 | colors <- c("#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", 354 | "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3") 355 | }else if(n <= 14){ 356 | colors <- c("#437BFE", "#FEC643", "#43FE69", "#FE6943", "#C643FE", 357 | "#43D9FE", "#B87A3D", "#679966", "#993333", "#7F6699", 358 | "#E78AC3", "#333399", "#A6D854", "#E5C494") 359 | } 360 | else if(n <= 20){ 361 | colors <- c("#87b3d4", "#d5492f", "#6bd155", "#683ec2", "#c9d754", 362 | "#d04dc7", "#81d8ae", "#d34a76", "#607d3a", "#6d76cb", 363 | "#ce9d3f", "#81357a", "#d3c3a4", "#3c2f5a", "#b96f49", 364 | "#4e857e", "#6e282c", "#d293c8", "#393a2a", "#997579") 365 | }else if(n <= 30){ 366 | colors <- c("#628bac", "#ceda3f", "#7e39c9", "#72d852", "#d849cc", 367 | "#5e8f37", "#5956c8", "#cfa53f", "#392766", "#c7da8b", 368 | "#8d378c", "#68d9a3", "#dd3e34", "#8ed4d5", "#d84787", 369 | "#498770", "#c581d3", "#d27333", "#6680cb", "#83662e", 370 | "#cab7da", "#364627", "#d16263", "#2d384d", "#e0b495", 371 | "#4b272a", "#919071", "#7b3860", "#843028", "#bb7d91") 372 | }else{ 373 | colors <- c("#982f29", "#5ddb53", "#8b35d6", "#a9e047", "#4836be", 374 | "#e0dc33", "#d248d5", "#61a338", "#9765e5", "#69df96", 375 | "#7f3095", "#d0d56a", "#371c6b", "#cfa738", "#5066d1", 376 | "#e08930", "#6a8bd3", "#da4f1e", "#83e6d6", "#df4341", 377 | "#6ebad4", "#e34c75", "#50975f", "#d548a4", "#badb97", 378 | "#b377cf", "#899140", "#564d8b", "#ddb67f", "#292344", 379 | "#d0cdb8", "#421b28", "#5eae99", "#a03259", "#406024", 380 | "#e598d7", "#343b20", "#bbb5d9", "#975223", "#576e8b", 381 | "#d97f5e", "#253e44", "#de959b", "#417265", "#712b5b", 382 | "#8c6d30", "#a56c95", "#5f3121", "#8f846e", "#8f5b5c") 383 | } 384 | }else if(type == 3){ 385 | # colors <- c("#07a2a4", "#9a7fd1", "#588dd5", "#f5994e", 386 | # "#c05050", "#59678c", "#c9ab00", "#7eb00a") 387 | colors <- c("#c14089", "#6f5553", "#E5C494", "#738f4c", "#bb6240", 388 | "#66C2A5", "#2dfd29", "#0c0fdc") 389 | } 390 | if(!is.null(n)){ 391 | if(n <= length(colors)){ 392 | colors <- colors[1:n] 393 | }else{ 394 | step <- 16777200 %/% (n - length(colors)) - 2 395 | add.colors <- paste0("#", as.hexmode(seq(from = sample(1:step, 1), 396 | by = step, length.out = (n-length(colors))))) 397 | colors <- c(colors, add.colors) 398 | } 399 | } 400 | return(colors) 401 | } 402 | 403 | 404 | #' getCellTypeColor 405 | #' 406 | #' @param cell.types A vector of cell types. 407 | #' 408 | #' @return A vector of colors. 409 | #' @export 410 | #' 411 | getCellTypeColor <- function(cell.types){ 412 | cell.colors <- c( 413 | "T.cells.CD4" = "#07a2a4", 414 | "T.cells.CD8" = "#9a7fd1", 415 | "B.cells" = "#588dd5", 416 | "NK.cells" = "#f5994e", 417 | "Myeloid.cells" = "#c05050", 418 | "Endothelial" = "#59678c", 419 | "Fibroblast" = "#c9ab00", 420 | "Epithelial" = "#7eb00a", 421 | "Unknown" = "grey") 422 | cti = 1 423 | new.types <- setdiff(cell.types, names(cell.colors)) 424 | for(ct in new.types){ 425 | cell.colors[ct] <- getDefaultColors(n = length(new.types), type = 3)[cti] 426 | cti = cti + 1 427 | } 428 | return(cell.colors) 429 | } 430 | 431 | 432 | limitData <- function(data, min = NULL, max = NULL){ 433 | data2 <- data 434 | if(!is.null(min)){ 435 | data2[data2 < min] <- min 436 | } 437 | if(!is.null(max)){ 438 | data2[data2 > max] <- max 439 | } 440 | return(data2) 441 | } 442 | 443 | 444 | 445 | getClusterInfo <- function(cell.annotation){ 446 | cluster.info <- cell.annotation[order(cell.annotation$Cluster), 'Cluster', drop = F] 447 | cluster.info$Cluster <- as.factor(cluster.info$Cluster) 448 | 449 | num.cluster <- table(cluster.info$Cluster) 450 | # num.cluster <- num.cluster[as.character(1 : length(num.cluster))] 451 | num.cluster <- num.cluster[as.character(unique(cluster.info$Cluster))] 452 | cluster.pos <- cumsum(num.cluster) 453 | 454 | def.colors <- getDefaultColors() 455 | clusters <- unique(cell.annotation$Cluster) 456 | clusters <- sort(clusters) 457 | cluster.colors <- c() 458 | for(i in 1:length(clusters)){ 459 | cluster.colors[as.character(clusters[i])] = def.colors[clusters[i]] 460 | } 461 | cluster.colors = list(Cluster = cluster.colors) 462 | 463 | return(list(cluster.info = cluster.info, 464 | cluster.colors = cluster.colors, 465 | cluster.pos = cluster.pos)) 466 | } 467 | 468 | 469 | 470 | getMouseGene <- function(hg.genes, bool.name = F, deduplicate = T){ 471 | hg.mm.HomologyGenes <- read.table(system.file("txt", "hg-mm-HomologyGenes.txt", package = "scCancer"), 472 | header = T, stringsAsFactors = F) 473 | hg.mm.HomologyGenes <- subset(hg.mm.HomologyGenes, hgGenes %in% hg.genes) 474 | 475 | if(deduplicate){ 476 | hg.num <- table(hg.mm.HomologyGenes$hgGenes) 477 | hg.mm.HomologyGenes <- subset(hg.mm.HomologyGenes, !(hgGenes %in% names(hg.num)[hg.num > 1])) 478 | mm.num <- table(hg.mm.HomologyGenes$mmGenes) 479 | hg.mm.HomologyGenes <- subset(hg.mm.HomologyGenes, !(mmGenes %in% names(mm.num)[mm.num > 1])) 480 | } 481 | 482 | mm.genes <- hg.mm.HomologyGenes$mmGenes 483 | 484 | if(bool.name){ 485 | names(mm.genes) <- hg.mm.HomologyGenes$hgGenes 486 | } 487 | return(mm.genes) 488 | } 489 | 490 | 491 | 492 | 493 | #' runSurvival 494 | #' 495 | #' According to the marker genes or signatures expression high/low levels, 496 | #' patient are divided into two groups and then survival analysis is performed. 497 | #' The survival curves can be plotted. 498 | #' 499 | #' @param features The names of marker genes or signatures to be analyzed. 500 | #' @param data The data used to perform survival analysis. 501 | #' It should be an expression or signature matrix with gene or signature by patient. 502 | #' The row names are the features' anmes. The columns are patients' labels. 503 | #' @param surv.time The survival time of patients. It should be in accord with the columns of data. 504 | #' @param surv.event The status indicator of patients. 0=alive, 1=dead. It should be in accord with the columns of data. 505 | #' @param cut.off The percentage threshold to divide patients into two groups. 506 | #' The default is 0.5, which means the patients are divided by median. 507 | #' Other values, such as 0.4, means the first 40 percent patients are set "Low" group 508 | #' and the last 40 percent are set "High" group (the median 20 percent are discarded). 509 | #' @param savePath The path to save the survival plots of genes or signatures (the default is NULL and the plots will be return without saving). 510 | #' 511 | #' 512 | #' @return A list of survival curves plots. 513 | #' @export 514 | #' 515 | #' @import survival survminer 516 | #' 517 | runSurvival <- function(features, data, surv.time, surv.event, cut.off = 0.5, savePath = NULL){ 518 | data <- as.matrix(data) 519 | cut.off <- min(cut.off, 1 - cut.off) 520 | 521 | ps <- list() 522 | for(feat in features){ 523 | if(feat %in% rownames(data)){ 524 | dw.thres <- quantile(data[feat, ], cut.off) 525 | up.thres <- quantile(data[feat, ], 1-cut.off) 526 | p.df <- data.frame(sample = colnames(data), 527 | surv.time = surv.time, 528 | surv.event = surv.event) 529 | p.df$expr <- sapply(data[feat, ], function(x){ 530 | if(x >= up.thres){ 531 | return("High") 532 | }else if(x < dw.thres){ 533 | return("Low") 534 | }else{ 535 | return("Med") 536 | } 537 | }) 538 | surv.df <<- subset(p.df, expr != "Med") 539 | surv_object <<- Surv(time = surv.df$surv.time, event = surv.df$surv.event) 540 | fit <- survfit(surv_object ~ expr, data = surv.df) 541 | p.surv <- ggsurvplot(fit, pval = TRUE, 542 | palette = c("#f57e87", "#66d5a5"), 543 | legend.title = paste0(feat, ":")) 544 | if(!is.null(savePath)){ 545 | if(!dir.exists(savePath)){ 546 | dir.create(savePath, recursive = T) 547 | } 548 | ggsave(filename = paste0(savePath, "surv-", feat, ".png"), p.surv$plot, 549 | width = 3.5, height = 3.5, dpi = 300) 550 | } 551 | ps[[feat]] <- p.surv$plot 552 | }else{ 553 | cat("- Warning in 'runSurvival':", feat, "not found.\n") 554 | } 555 | } 556 | return(ps) 557 | } 558 | 559 | 560 | #' generate10Xdata 561 | #' 562 | #' Generate a 10X-like data folder based on the data matrix and gene information, 563 | #' which can be used directly to perform scCancer analysis. 564 | #' 565 | #' @param matrix A gene-cell matrix or data.frame. 566 | #' @param gene.info A data.frame of gene information. It should contain two columns, 567 | #' the first is gene Ensemble ID, and the second is gene symbol. 568 | #' The order of the genes should be consistant with the row order of 'matrix'. 569 | #' @param outPath A path to save the output files. 570 | #' @param overwrite If TRUE and the output file already exists, the file is 571 | #' silently overwritten, otherwise an exception is thrown. The default is "FALSE". 572 | #' 573 | #' @return NULL 574 | #' @export 575 | #' 576 | #' @import Matrix R.utils 577 | #' 578 | generate10Xdata <- function(matrix, gene.info, outPath, overwrite = F){ 579 | if(!dir.exists(paste0(outPath, "/filtered_feature_bc_matrix/"))){ 580 | dir.create(paste0(outPath, "/filtered_feature_bc_matrix/"), recursive = T) 581 | } 582 | 583 | barcode.gz <- gzfile(paste0(outPath, "/filtered_feature_bc_matrix/barcodes.tsv.gz"), "w") 584 | write.table(colnames(matrix), barcode.gz, quote = F, col.names = F, row.names = F, sep = "\t") 585 | close(barcode.gz) 586 | 587 | gene.info[, 3] <- "Gene Expression" 588 | feature.gz <- gzfile(paste0(outPath, "/filtered_feature_bc_matrix/features.tsv.gz"), "w") 589 | write.table(gene.info, feature.gz, quote = F, col.names = F, row.names = F, sep = "\t") 590 | close(feature.gz) 591 | 592 | writeMM(as(as.matrix(matrix),"CsparseMatrix"), file = paste0(outPath, "/filtered_feature_bc_matrix/matrix.mtx")) 593 | gzip(paste0(outPath, "/filtered_feature_bc_matrix/matrix.mtx"), overwrite = overwrite) 594 | } 595 | 596 | 597 | 598 | #' extractFiles 599 | #' 600 | #' Extract files from each sample's folder and rename them with sample's name. 601 | #' 602 | #' @param savePath A path of samples' result folder. 603 | #' @param sampleNames A vector of samples' names (the subfolder names in 'savePath'). 604 | #' @param outputPath A path to saving the extracted reports. 605 | #' @param files The name of files you want to extract. The default is c("report-scStat.html", "report-scAnno.html"). 606 | #' @param subfolders The name of subfolders for the files you want to extract. The default is NULL. 607 | #' It can be a character string, which means all files are under the subfolder. 608 | #' It can also be a character string vector with same length as "files", which are corresponding to "files". 609 | #' 610 | #' @return NULL 611 | #' @export 612 | #' 613 | extractFiles <- function(savePath, sampleNames, outputPath, 614 | files = c("report-scStat.html", "report-scAnno.html"), 615 | subfolders = NULL){ 616 | message("[", Sys.time(), "] -----: extract files") 617 | if((!is.null(subfolders)) & (length(subfolders) != 1) & (length(subfolders) != length(files))){ 618 | stop("The lengths of files and subfolders are not equal.") 619 | } 620 | 621 | if(!dir.exists(file.path(outputPath))){ 622 | dir.create(file.path(outputPath), recursive = T) 623 | } 624 | 625 | for(sampleName in sampleNames){ 626 | cur.path <- paste0(savePath, "/", sampleName, "/") 627 | ori.files <- paste0(cur.path, subfolders, "/", files) 628 | new.files <- paste0(outputPath, "/", sampleName, "-", files) 629 | file.copy(ori.files, new.files, overwrite = T) 630 | } 631 | } 632 | 633 | 634 | 635 | #' checkStatArguments 636 | #' 637 | #' 638 | #' @param argList A list of arguments passed into 'runScStatistics". 639 | #' 640 | #' @return NULL 641 | #' @export 642 | #' 643 | checkStatArguments <- function(argList){ 644 | if(!dir.exists(argList$dataPath)){ 645 | stop("No such directory for the 'dataPath':",argList$dataPath ,".\n") 646 | } 647 | 648 | if(!(argList$species %in% c("human", "mouse"))){ 649 | stop("The parameter 'species' should be one of the c(\"human\", \"mouse\").\n") 650 | } 651 | 652 | if(!is.numeric(argList$hg.mm.thres)){ 653 | stop("The parameter 'hg.mm.thres' should be a float-point number within [0.5, 1].\n") 654 | }else if(argList$hg.mm.thres < 0.5 | argList$hg.mm.thres > 1){ 655 | stop("The parameter 'hg.mm.thres' should be within [0.5, 1].\n") 656 | } 657 | } 658 | 659 | 660 | #' checkAnnoArguments 661 | #' 662 | #' @param argList A list of arguments passed into 'runScAnnotation". 663 | #' 664 | #' @return NULL 665 | #' @export 666 | #' 667 | checkAnnoArguments <- function(argList){ 668 | if(!dir.exists(argList$dataPath)){ 669 | stop("No such directory for the 'dataPath':",argList$dataPath ,".\n") 670 | } 671 | 672 | if(!dir.exists(argList$statPath)){ 673 | stop("No such directory for the 'statPath':",argList$statPath ,".\n") 674 | } 675 | 676 | if(!(argList$species %in% c("human", "mouse"))){ 677 | stop("The parameter 'species' should be one of the c(\"human\", \"mouse\").\n") 678 | } 679 | 680 | if(!(argList$genome %in% c("hg19", "hg38", "mm10"))){ 681 | stop("The parameter 'genome' should be one of the c(\"hg19\", \"hg38\", \"mm10\").\n") 682 | } 683 | 684 | if(!(all(argList$anno.filter %in% c("mitochondrial", "ribosome", "dissociation"))) & 685 | !(is.null(argList$anno.filter))){ 686 | stop("The parameter 'anno.filter' should be some of c(\"mitochondrial\", \"ribosome\", \"dissociation\") or NULL.\n") 687 | } 688 | 689 | if(!(argList$doublet.method %in% c("cxds", "bcds"))){ 690 | stop("The parameter 'doublet.method' should be one of the c(\"cxds\", \"bcds\").\n") 691 | } 692 | 693 | if(!(all(argList$coor.names == c("tSNE_1", "tSNE_2")) | 694 | all(argList$coor.names == c("UMAP_1", "UMAP_2")))){ 695 | stop("The parameter 'coor.names' should be c(\"tSNE_1\", \"tSNE_2\") or c(\"UMAP_1\", \"UMAP_2\").\n") 696 | } 697 | 698 | if(!(argList$geneSet.method %in% c("average", "GSVA"))){ 699 | stop("The parameter 'geneSet.method' should be one of the c(\"average\", \"GSVA\").\n") 700 | } 701 | } 702 | 703 | 704 | 705 | #' checkCombArguments 706 | #' 707 | #' @param argList A list of arguments passed into 'runScCombination". 708 | #' 709 | #' @return NULL 710 | #' @export 711 | #' 712 | checkCombArguments <- function(argList){ 713 | if(length(argList$single.savePaths) != length(argList$sampleNames)){ 714 | stop("The length of parameter 'single.savePaths' and 'sampleNames' should be equal.\n") 715 | } 716 | if(!(argList$comb.method %in% c("Harmony", "NormalMNN", "SeuratMNN", "Raw", "Regression", "LIGER"))){ 717 | stop("The parameter 'comb.method' should be one of the c(\"Harmony\", \"NormalMNN\", \"SeuratMNN\", \"Raw\", \"Regression\", \"LIGER\").\n") 718 | } 719 | } 720 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # scCancer 2 | 3 | ## Introduction 4 | 5 | The `scCancer` package focuses on processing and analyzing droplet-based scRNA-seq data for cancer research. Except basic data processing steps, this package takes several special considerations for cancer-specific features. 6 | 7 | The workflow of `scCancer` mainly consists of three modules: `scStatistics`, `scAnnotation`, and `scCombination`. 8 | * The `scStatistics` performs basic statistical analyses of raw data and quality control. 9 | * The `scAnnotation` performs functional data analyses and visualizations, such as low dimensional representation, clustering, cell type classification, cell malignancy estimation, cellular phenotype analyses, gene signature analyses, cell-cell interaction analyses, etc. 10 | * The `scCombination` perform multiple samples data integration, batch effect correction and analyses visualization. 11 | 12 | After the computational analyses, detailed and graphical reports were generated in user-friendly HTML format. 13 | 14 | scCancer-workflow 15 | 16 | ([Click to view larger workflow picture](http://lifeome.net/software/sccancer/scCancer-workflow.png)) 17 | 18 | 19 | ## System Requirements 20 | * R version: >= 3.5.0 (**suggest:** R 3.6, **not 4.0**) 21 | * **Hint: For R (version>=4.0) under Windows system**, the Rtools needs to be updated to version 4.0 from https://cran.r-project.org/bin/windows/Rtools/. So, if you are not familiar with R environment configuration, we **don't** suggest to use R (>=4.0). 22 | 23 | ## Current version 24 | 25 | * scCancer 2.2.1 (update at 2021.03.02) 26 | * [All version log](https://github.com/wguo-research/scCancer/wiki/Version-Log) 27 | 28 | ## Installation 29 | 30 | The detailed installation instruction can be found in the project [wiki]( https://github.com/wguo-research/scCancer/wiki/2.-Installation). 31 | 32 | 33 | ## Usage 34 | 35 | The vignette of `scCancer` can be found in the project [wiki]( https://github.com/wguo-research/scCancer/wiki). 36 | 37 | * [Quick start](https://github.com/wguo-research/scCancer/wiki/3.-Quick-start) 38 | * [Step by step introduction](https://github.com/wguo-research/scCancer/wiki/4.-Step-by-step-introduction) 39 | * [Other personalized settings](https://github.com/wguo-research/scCancer/wiki/5.-Other-personalized-settings) 40 | 41 | We provide an [example data](http://lifeome.net/software/sccancer/KC-example.tar.gz) of kidney cancer from 10X Genomics, and following are the generated HTML reports: 42 | 43 | * [`report-scStat.html`](http://lifeome.net/software/sccancer/KC-example-report-scStat.html) 44 | * [`report-scAnno.html`](http://lifeome.net/software/sccancer/KC-example-report-scAnno.html) 45 | 46 | For multi-datasets, following is a generated HTML report for three kidney cancer samples integration analysis: 47 | 48 | * [`report-scAnnoComb.html`](http://lifeome.net/software/sccancer/KC123-report-scAnnoComb.html) 49 | 50 | 51 | ## Citation 52 | Please use the following citation: 53 | 54 | [1] Wenbo Guo, Dongfang Wang, Shicheng Wang, Yiran Shan, Changyi Liu, Jin Gu, scCancer: a package for automated processing of single-cell RNA-seq data in cancer, _Briefings in Bioinformatics_, bbaa127, [https://doi.org/10.1093/bib/bbaa127](https://doi.org/10.1093/bib/bbaa127) 55 | 56 | [2] Zeyu Chen, Yuxin Miao, Zhiyuan Tan, Qifan Hu, Yanhong Wu, Xinqi Li, Wenbo Guo, Jin Gu, scCancer2: data-driven in-depth annotations of the tumor microenvironment at single-level resolution, Bioinformatics, Volume 40, Issue 2, February 2024, btae028, [https://doi.org/10.1093/bioinformatics/btae028](https://doi.org/10.1093/bioinformatics/btae028) 57 | 58 | ## License 59 | GPL-3 60 | -------------------------------------------------------------------------------- /inst/rds/cellTypeTemplates.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wguo-research/scCancer/0858810b558e31fcb212057ca5a8688ec2353dad/inst/rds/cellTypeTemplates.RDS -------------------------------------------------------------------------------- /inst/rds/cnvRef_Data-HM.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wguo-research/scCancer/0858810b558e31fcb212057ca5a8688ec2353dad/inst/rds/cnvRef_Data-HM.RDS -------------------------------------------------------------------------------- /inst/rds/cnvRef_Data-boneMarrow-MS.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wguo-research/scCancer/0858810b558e31fcb212057ca5a8688ec2353dad/inst/rds/cnvRef_Data-boneMarrow-MS.RDS -------------------------------------------------------------------------------- /inst/rds/cnvRef_SNN-HM.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wguo-research/scCancer/0858810b558e31fcb212057ca5a8688ec2353dad/inst/rds/cnvRef_SNN-HM.RDS -------------------------------------------------------------------------------- /inst/rds/cnvRef_SNN-boneMarrow-MS.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wguo-research/scCancer/0858810b558e31fcb212057ca5a8688ec2353dad/inst/rds/cnvRef_SNN-boneMarrow-MS.RDS -------------------------------------------------------------------------------- /inst/rmd/SoupX.Rmd: -------------------------------------------------------------------------------- 1 | 2 | ### `r h.i`.`r h.ii`.2 Ambient RNAs contamination fraction estimation 3 | 4 | We refer to the algorithm of [`SoupX`](https://github.com/constantAmateur/SoupX) 5 | to estimate the contamination fraction of ambient RNAs from lysed cells. 6 | 7 | The estimated contamination fraction is ` `r results$contamination.frac * 100`% `. 8 | 9 | -------------------------------------------------------------------------------- /inst/rmd/cellCalling.Rmd: -------------------------------------------------------------------------------- 1 | 2 | 3 | * The number of droplets containing UMI (nUMI > 0) is ` `r results$nList[1]` `. 4 | 5 | ```{r echo=FALSE, results='asis', eval=!results$run.emptydrop} 6 | cat("* Using the supplied cell calling results(filtered data), `", results$nList[2], "` cells are identified (min.nUMI = `", results$min.nUMI, "`).\n", sep = "") 7 | ``` 8 | 9 | ```{r echo=FALSE, results='asis', eval=results$run.emptydrop} 10 | cat("* Cell calling result(fitered data) cannot be found, so we use the R package [`DropletUtils`](https://bioconductor.org/packages/release/bioc/html/DropletUtils.html)` to identify cell.\n") 11 | cat("* After cell calling, `", results$nList[2], "` cells are identified (min.nUMI = `", results$min.nUMI, "`).\n", sep = "") 12 | ``` 13 | 14 | * Following are two plots showing the distribution of `nUMI` for cells and empty droplets identified. 15 | 16 | ```{r nUMI, echo=F, message=F, warning=F, dpi=300, fig.height=3, fig.width=8} 17 | plot_grid(results$p.cells.1, results$p.cells.2, ncol = 2) 18 | ``` 19 |

(Hi-res image: left, right)

20 | -------------------------------------------------------------------------------- /inst/rmd/cellCycle.Rmd: -------------------------------------------------------------------------------- 1 | 2 | ### `r h.i`.`r h.ii` Cell cycle estimation 3 | The estimated cell cycles can be found in the column `CellCycle.score` of the table file 4 | [cellAnnotation.txt](./cellAnnotation.txt). 5 | 6 | Here is the scatter plot colored by estimated cell cycle score. 7 | 8 | ```{r cellCyclePlot, echo=F, message=F, warning=F, dpi=500, fig.width=5, fig.height=4, fig.align="center", out.width='60%'} 9 | results$cellCycle.plot 10 | ``` 11 |

(Hi-res image: view)

12 | 13 | ```{r echo=F} 14 | h.ii <- h.ii + 1 15 | ``` 16 | -------------------------------------------------------------------------------- /inst/rmd/cellInteraction.Rmd: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### `r h.i`.`r h.ii` Cell interaction analysis 4 | In order to analyze the ligand-receptor interactions between the various cell types in cancer micro-environment, 5 | we use a ligand-receptor database [`FANTOM5`](http://fantom.gsc.riken.jp/data/), and estimate the interaction scores among cell sets (the default is clusters). 6 | 7 | The interaction scores between all ligand-receptor pairs and all cell sets can be found in the table file 8 | [InteractionScore.txt](./InteractionScore.txt). 9 | 10 | Here is a plot showing the number of ligand-receptor paris with score larger than 0.1 among clusters. 11 | The size of point means the number of ligand-receptor paris with scores larger than 0.1. 12 | The color of point means the sum of the ligand-receptor pairs scores. 13 | To compare conveniently, the bottom subplot shows the predicted cell type fraction of each cell set. 14 | 15 | ```{r cellInteractionPlot, echo=F, message=F, warning=F, dpi=300, fig.width=7, fig.height=6.5, fig.align="center", out.width='80%'} 16 | grid::grid.draw(results$inter.plot) 17 | ``` 18 | 19 |

(Hi-res image: view)

20 | 21 | 22 | Following are the top 10 scores ligand-receptor pairs. 23 | ```{r interaction.summary, echo=F} 24 | format(head(results$interaction.score, 10), digits = 4) 25 | ``` 26 | 27 | 28 | ```{r echo=F} 29 | h.ii <- h.ii + 1 30 | ``` 31 | -------------------------------------------------------------------------------- /inst/rmd/cellTypePred.Rmd: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### `r h.i`.`r h.ii` Cancer micro-environmental cell types annotation 4 | In order to annotate major microenvironment cell types, including endothelial cells, 5 | fibroblast, and immune cells (CD4+ T cells, CD8+ T cells, B cells, nature killer cells, 6 | and myeloid cells), we use a one-class logistic regression (OCLR) model to perform prediction. 7 | The predicted cell type results can be found in the column `Cell.Type` of the table file 8 | [cellAnnotation.txt](./cellAnnotation.txt). 9 | And the correlation coefficients with cell type templates are in columns `*.corr`. 10 | 11 | Here is the scatter plot colored by predicted cell types. 12 | 13 | ```{r cellTypePlot, echo=F, message=F, warning=F, dpi=500, fig.width=5.2, fig.height=4, fig.align="center", out.width='80%'} 14 | results$cellType.plot$p.type 15 | ``` 16 |

(Hi-res image: view)

17 | 18 | 19 | Here is a bar plot showing the relationship between cell cluster and cell type annotation. 20 | 21 | ```{r barPlot, echo=F, message=F, warning=F, dpi=300, fig.width=6, fig.height=4, fig.align="center", out.width='80%'} 22 | results$cellType.plot$p.bar 23 | ``` 24 |

(Hi-res image: view)

25 | 26 | 27 | 28 | ```{r echo=F} 29 | h.ii <- h.ii + 1 30 | ``` 31 | -------------------------------------------------------------------------------- /inst/rmd/contamination.Rmd: -------------------------------------------------------------------------------- 1 | 2 | * **Ambient RNAs contamination removing.** Taking advantage of the algorithm of [`SoupX`](https://github.com/constantAmateur/SoupX), we use the contamination fraction ` `r results$contamination.frac * 100`% ` to reduce the ambient RNAs' influence. 3 | -------------------------------------------------------------------------------- /inst/rmd/diffExpr.Rmd: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### `r h.i`.`r h.ii` Differential expression analysis 4 | In order to extract the features of each cluster, we perform differentially expression analysis by running [`Seurat`](https://satijalab.org/seurat/) functions. 5 | 6 | Here is a heatmap plot showing the top `r results$n.markers` differentially expressed genes for every cluster compared to all remaining cells. All information of differentially expressed genes can be found [here](./diff.expr.genes/). 7 | 8 | ```{r DEplot, echo=F, message=F, warning=F, dpi=800, fig.width=8, fig.height=results$DEplot.height} 9 | results$seurat.plots$p.de.heatmap 10 | ``` 11 |

(Hi-res image: view)

12 | 13 | ```{r echo=F} 14 | h.ii <- h.ii + 1 15 | ``` 16 | -------------------------------------------------------------------------------- /inst/rmd/doublet.Rmd: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### `r h.i`.`r h.ii` Doublet score estimation 4 | 5 | ```{r echo=FALSE, results='asis'} 6 | if(results$doublet.method == "bcds"){ 7 | cat("We estimate doublet score by using the binary classification based algorithm `bcds` in R package [scds](https://bioconductor.org/packages/release/bioc/html/scds.html).\n") 8 | }else if(results$doublet.method == "cxds"){ 9 | cat("We estimate doublet score by using the co-expression based algorithm `cxds` in R package [scds](https://bioconductor.org/packages/release/bioc/html/scds.html).\n") 10 | }else if(results$doublet.method == "DoubletFinder"){ 11 | cat("We estimate doublet score by using the algorithm in R package [DoubletFinder](https://github.com/chris-mcginnis-ucsf/DoubletFinder).\n") 12 | } 13 | ``` 14 | 15 | The estimated doublet scores can be found in the column `doublet.score` of the table file 16 | [cellAnnotation.txt](./cellAnnotation.txt). 17 | 18 | Here is the scatter plot colored by the number of UMIs(left) and the estimated doublet scores(right). 19 | 20 | ```{r doubletPlot, echo=F, message=F, warning=F, dpi=500, fig.width=10, fig.height=4, fig.align="center"} 21 | plot_grid(results$nUMI.plot, results$doublet.plot, ncol = 2) 22 | ``` 23 |

(Hi-res image: left, right)

24 | 25 | 26 | ```{r echo=F} 27 | h.ii <- h.ii + 1 28 | ``` 29 | -------------------------------------------------------------------------------- /inst/rmd/exprProgram.Rmd: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### `r h.i`.`r h.ii` Expression programs identification 4 | In order to unsupervised identify potential expression program signatures, 5 | we apply non-negative matrix factorization (NMF) to the centralized and non-negative changed expression matrix. 6 | The input for number of identified programs (the rank in NMF) is ` `r results$nmf.rank` `. 7 | 8 | Following is the heatmap for identified cells' expression programs. 9 | 10 | ```{r exprProgramplot, echo=F, message=F, warning=F, dpi=500, fig.width=10, fig.height=results$exprProgPlot.height} 11 | results$exprProgram.plot 12 | ``` 13 |

(Hi-res image: view)

14 | 15 | 16 | After this step, `scCancer` saved following results files to the folder '[expr.programs](./expr.programs/)': 17 | * The left matrix `W` (genes * programs): [W-gene-program.txt](./expr.programs/W-gene-program.txt). 18 | * The right matrix `H` (programs * cells): [H-program-cell.txt](./expr.programs/H-program-cell.txt). 19 | * The relative genes of each programs: [program.gene.value.txt](./expr.programs/program.gene.value.txt). 20 | 21 | 22 | ```{r echo=F} 23 | h.ii <- h.ii + 1 24 | ``` 25 | -------------------------------------------------------------------------------- /inst/rmd/filterCell.Rmd: -------------------------------------------------------------------------------- 1 | 2 | 3 | * **Cell QC.** According to the thresholds in [cell.QC.thres.txt](./cell.QC.thres.txt) (as shown below), 4 | we get ` `r dim(results$cell.annotation)[1]` ` cells. 5 | 6 |
7 | ```{r thresTable, echo=F, warning=F} 8 | # results$filter.thres %>% knitr::kable("html") 9 | kable(results$filter.thres) 10 | ``` 11 |
12 | 13 | -------------------------------------------------------------------------------- /inst/rmd/filterGene.Rmd: -------------------------------------------------------------------------------- 1 | 2 | * **Gene QC.** After filtering the mitochondrial, ribosome, dissociation genes and genes expressed in too less (`nCell < `r results$nCell.min` `) or too much (`background percent >= `r results$bgPercent.max` `) cells, we get ` `r dim(results$gene.manifest)[1]` ` genes ([the filtered genes list file](./gene.manifest.filter.txt)). 3 | -------------------------------------------------------------------------------- /inst/rmd/geneSets.Rmd: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### `r h.i`.`r h.ii` Gene set signature scores calculation 4 | In order to analyze cells' expression in gene sets (signatures) level, we calculate the signature scores for each cell. 5 | 6 | ```{r echo=FALSE, results='asis'} 7 | if(is.null(results$geneSets)){ 8 | cat("* Gene sets: the default 50 hallmark gene sets from [MSigDB](http://software.broadinstitute.org/gsea/msigdb/).\n") 9 | }else{ 10 | cat("* Gene sets: the input gene sets list.\n") 11 | } 12 | if(results$geneSet.method == "average"){ 13 | cat("* Method: relative average expression levels.") 14 | }else if(results$geneSet.method == "GSVA"){ 15 | cat("* Method: [GSVA](https://www.bioconductor.org/packages/release/bioc/html/GSVA.html).") 16 | } 17 | ``` 18 | 19 | The calculated gene set signature scores can be found in the column `GS__*` of the table file 20 | [cellAnnotation.txt](./cellAnnotation.txt). 21 | 22 | Following is the heatmap for these signatures scores. 23 | 24 | ```{r geneSetPlot, echo=F, message=F, warning=F, dpi=500, fig.width=10, fig.height=results$geneSetPlot.height} 25 | results$geneSet.plot 26 | ``` 27 |

(Hi-res image: view)

28 | 29 | ```{r echo=F} 30 | h.ii <- h.ii + 1 31 | ``` 32 | -------------------------------------------------------------------------------- /inst/rmd/main-scAnno.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "scCancer" 3 | author: "G-Lab" 4 | date: "2019/6/11" 5 | output: html_document 6 | --- 7 | 8 | 52 | 53 | 54 | ```{r setting, include=FALSE} 55 | options(knitr.table.format = "html") 56 | options(scipen=10) 57 | knitr::opts_chunk$set(echo = TRUE, fig.path = file.path(results$savePath, 'report-figures//')) 58 | 59 | title <- "scCancer" 60 | if(!is.null(results$sampleName)){ 61 | title <- paste0(results$sampleName, " - ", title) 62 | } 63 | 64 | if(!is.null(results$authorName)){ 65 | userName <- results$authorName 66 | }else{ 67 | userName <- Sys.getenv("USERNAME") 68 | } 69 | reportMark <- Sys.time() 70 | if(userName != ""){ 71 | reportMark <- paste0(userName, " , ", reportMark) 72 | } 73 | 74 | h.i <- 1 75 | h.ii <- 1 76 | ``` 77 | 78 | 79 | # `r title` 80 | -------------------------------- 81 |

`r reportMark`

82 | 83 | 84 | 85 | 86 | ## `r h.i` Read data 87 | Read the expression data and filter cells and genes according to quality control steps. 88 | 89 | ```{r contamination, child=system.file("rmd", "contamination.Rmd", package = "scCancer"), eval = results$bool.rmContamination} 90 | ``` 91 | 92 | ```{r filterCell, child=system.file("rmd", "filterCell.Rmd", package = "scCancer"), eval = results$bool.filter.cell} 93 | ``` 94 | 95 | ```{r filterGene, child=system.file("rmd", "filterGene.Rmd", package = "scCancer"), eval = results$bool.filter.gene} 96 | ``` 97 | 98 | ```{r echo=F} 99 | h.i <- h.i + 1 100 | ``` 101 | 102 | 103 | 104 | 105 | ## `r h.i` Data preprocessing 106 | 107 | After the quality control, we perform following preprocessing steps based on some functions of the R package [`Seurat V3`](https://satijalab.org/seurat/). 108 | 109 | * **Normalization.** Normalize the raw counts data to TPMs (tyranscripts-per-million) and log-transforms them. 110 | * **Scale data.** Remove unwanted sources of variations (` `r results$vars.to.regress` `) by regression and center the resulting residuals. 111 | * **Highly variable genes.** Calcuate the average expression and dispersion of each gene across all cells to select highly variable genes(HVGs). 112 | 113 | ```{r hvgPlot, echo=F, message=F, warning=F, dpi=500, fig.width=8, fig.height=4, fig.align="center", out.width='70%'} 114 | results$seurat.plots$p.hvg 115 | ``` 116 |

(Hi-res image: view)

117 | * **PCA.** Perform principal component analysis (PCA) and select PCs to perform clustering and visualization. 118 | * **Visualiztion.** Using t-SNE or UMAP to persent each single cell in two-dimensional space. 119 | 120 | 121 | 122 | ```{r echo=F} 123 | h.i <- h.i + 1 124 | ``` 125 | 126 | 127 | 128 | 129 | 130 | ## `r h.i` Cells annotation 131 | 132 | 133 | 134 | ### `r h.i`.`r h.ii` Markers expression profile 135 | Here are the scatter plots colored by the normalized expression of some cell type markers. 136 | 137 |
138 | 139 | ```{r echo=FALSE, results='asis'} 140 | if(results$bool.add.features){ 141 | if(results$species == "human"){ 142 | cat("| Cell Type | Markers |\n", sep="") 143 | cat("| :-------------- | :----------------------- |\n", sep="") 144 | cat("| T cells (CD4+) | PTPRC, CD3D, CD4 |\n", sep="") 145 | cat("| T cells (CD8+) | PTPRC, CD3D, CD8A, CD8B |\n", sep="") 146 | cat("| B cells | PTPRC, CD79A |\n", sep="") 147 | cat("| NK cell | PTPRC, NKG7 |\n", sep="") 148 | cat("| Myeloid cells | PTPRC, LYZ |\n", sep="") 149 | cat("| Endothelial | PLVAP |\n", sep="") 150 | cat("| Fibroblast | ACTA2 |\n", sep="") 151 | cat("| Epithelial | EPCAM, KRT8 |\n", sep="") 152 | }else{ 153 | cat("| Cell Type | Markers |\n", sep="") 154 | cat("| :-------------- | :----------------------- |\n", sep="") 155 | cat("| T cells (CD4+) | Ptprc, Cd3d, Cd4 |\n", sep="") 156 | cat("| T cells (CD8+) | Ptprc, Cd3d, Cd8a, Cd8b |\n", sep="") 157 | cat("| B cells | Ptprc, Cd79a |\n", sep="") 158 | cat("| NK cell | Ptprc, Nkg7 |\n", sep="") 159 | cat("| Myeloid cells | Ptprc, Lyz1, Lyz2 |\n", sep="") 160 | cat("| Endothelial | Plvap |\n", sep="") 161 | cat("| Fibroblast | Acta2 |\n", sep="") 162 | cat("| Epithelial | Epcam, Krt8 |\n", sep="") 163 | } 164 | } 165 | if(!is.null(results$show.features)){ 166 | cat("| Input genes | ", paste(results$show.features, collapse=", "), " |\n", sep="") 167 | } 168 | ``` 169 | 170 |
171 | 172 | ```{r markersPlot, eval=!is.null(results$seurat.plots$p.markers.all), echo=F, message=F, warning=F, dpi=500, fig.width=8, fig.height=results$markersPlot.height} 173 | results$seurat.plots$p.markers.all 174 | ``` 175 |

(Hi-res image: view, view single)

176 | 177 | 178 | Following are some statistical indicators of these genes. 179 | ```{r, echo=F, message=F, warning=F} 180 | final.genes <- names(results$seurat.plots$ps.markers) 181 | gene.manifest <- read.table(file.path(statPath, 'geneManifest.txt'), header = T, sep = "\t") 182 | final.gene.manifest <- subset(gene.manifest, Symbol %in% final.genes) 183 | if("bg.percent" %in% colnames(final.gene.manifest)){ 184 | show.cols <- c("Symbol", "EnsemblID", "nCell", "bg.percent", "detect.rate", "prop.median") 185 | }else{ 186 | show.cols <- c("Symbol", "EnsemblID", "nCell", "detect.rate", "prop.median") 187 | } 188 | final.gene.manifest <- final.gene.manifest[order(final.gene.manifest$Symbol), show.cols] 189 | rownames(final.gene.manifest) <- final.gene.manifest$Symbol 190 | rm(gene.manifest) 191 | print(format(final.gene.manifest, digits = 3, scientific = T)) 192 | ``` 193 | 194 | ```{r echo=F} 195 | h.ii <- h.ii + 1 196 | ``` 197 | 198 | 199 | 200 | 201 | 202 | ### `r h.i`.`r h.ii` Clustering 203 | In order to identify clusters of all single cells, we perform a graph-based clustering by running [`Seurat`](https://satijalab.org/seurat/) functions. 204 | The cluster information can be found in the column `Cluster` of the table file 205 | [cellAnnotation.txt](./cellAnnotation.txt). 206 | 207 | Here is the t-SNE plot colored by cell clusters. 208 | 209 | ```{r clusterPlotTsne, echo=F, message=F, warning=F, dpi=500, fig.width=5, fig.height=4, fig.align="center", out.width='80%'} 210 | results$seurat.plots$p.cluster.tsne 211 | ``` 212 |

(Hi-res image: view)

213 | 214 | 215 | ```{r umap, child=system.file("rmd", "umap.Rmd", package = "scCancer"), eval = !is.null(results$seurat.plots$p.cluster.umap)} 216 | ``` 217 | 218 | 219 | ```{r echo=F} 220 | h.ii <- h.ii + 1 221 | ``` 222 | 223 | 224 | 225 | ```{r doublet, child=system.file("rmd", "doublet.Rmd", package = "scCancer"), eval = results$bool.runDoublet} 226 | ``` 227 | 228 | 229 | 230 | ```{r diffExpr, child=system.file("rmd", "diffExpr.Rmd", package = "scCancer"), eval = results$bool.runDiffExpr} 231 | ``` 232 | 233 | 234 | 235 | 236 | ```{r cellType, child=system.file("rmd", "cellTypePred.Rmd", package = "scCancer"), eval = results$bool.runCellClassify} 237 | ``` 238 | 239 | 240 | 241 | 242 | ```{r malignancy, child=system.file("rmd", "malignancy.Rmd", package = "scCancer"), eval = results$bool.runMalignancy} 243 | ``` 244 | 245 | 246 | -------------------------------- 247 | ```{r echo=FALSE, results='asis', eval = results$bool.intraTumor} 248 | # if(is.null(results$tumor.clusters)){ 249 | # cat("#### According to the results of cell type prediction and cell malignancy estimation, ", 250 | # "we couldn't identify tumor clusters, ", 251 | # "so we use all clusters to perform following heterogeneity analyses.\n", sep = "") 252 | # }else{ 253 | # # cat("#### According to the results of cell type prediction and cell malignancy estimation, we identify the clusters `", 254 | # # str_c(results$tumor.clusters, collapse = ", "), 255 | # # "` as tumor clusters, and following intra-tumor heterogeneity analyses mainly focus on them.\n", sep = "") 256 | # cat("#### According to the results of cell type prediction and cell malignancy estimation, we identify the tumor clusters, ", 257 | # "and following intra-tumor heterogeneity analyses mainly focus on them.\n", sep = "") 258 | # } 259 | cat("#### In order to analyze intra-tumor heterogeneity, we select tumor clusters firstly based on the results of cell type prediction and cell malignancy estimation.\n") 260 | if(is.null(results$tumor.clusters)){ 261 | cat("#### Warning: Here, we couldn't identify the tumor clusters, so we use all clusters to perform following analyses.\n") 262 | }else{ 263 | cat("#### Here, we identify cluster `", str_c(results$tumor.clusters, collapse = ", "), 264 | "` as tumor cells. And following analyses mainly focus on them.\n", sep = "") 265 | } 266 | ``` 267 | 268 | 269 | 270 | 271 | ```{r cellCycle, child=system.file("rmd", "cellCycle.Rmd", package = "scCancer"), eval = results$bool.runCellCycle} 272 | ``` 273 | 274 | 275 | 276 | 277 | ```{r stemness, child=system.file("rmd", "stemness.Rmd", package = "scCancer"), eval = results$bool.runStemness} 278 | ``` 279 | 280 | 281 | 282 | 283 | ```{r geneSets, child=system.file("rmd", "geneSets.Rmd", package = "scCancer"), eval = results$bool.runGeneSets} 284 | ``` 285 | 286 | 287 | 288 | 289 | ```{r exprProgram, child=system.file("rmd", "exprProgram.Rmd", package = "scCancer"), eval = results$bool.runExprProgram} 290 | ``` 291 | 292 | 293 | 294 | 295 | ```{r cellInteraction, child=system.file("rmd", "cellInteraction.Rmd", package = "scCancer"), eval = results$bool.runInteraction} 296 | ``` 297 | 298 | 299 | 300 | 301 | ```{r echo=F} 302 | h.i <- h.i + 1 303 | ``` 304 | 305 | 306 | ## `r h.i` Output 307 | 308 | ```{r echo=F} 309 | r.i <- 7 310 | ``` 311 | 312 | Running this script generates following files: 313 | 314 | 1. **Html report** : 315 | [report-scAnno.html](./report-scAnno.html). 316 | 2. **Markdown report** : 317 | [report-scAnno.md](./report-scAnno.md). 318 | 3. **Figure files** : 319 | [figures/](./figures/). 320 | 4. **Figures used in the report** : 321 | [report-figures/](./report-figures/). 322 | 5. **Seurat object** : 323 | [expr.RDS](./). 324 | 6. **Annotation of cells** : 325 | [cellAnnotation.txt](./cellAnnotation.txt). 326 | ```{r echo=FALSE, results='asis', eval=results$bool.runDiffExpr} 327 | cat(r.i, ". **Differentially expressed genes' information for all clusters** : ", sep = "") 328 | cat("[diff.expr.genes/](./diff.expr.genes/).\n", sep = "") 329 | r.i <- r.i + 1 330 | ``` 331 | ```{r echo=FALSE, results='asis', eval=results$bool.runMalignancy} 332 | cat(r.i, ". **Results of malignancy estimation** : [malignancy/](./malignancy/).\n", sep = "") 333 | r.i <- r.i + 1 334 | ``` 335 | ```{r echo=FALSE, results='asis', eval=results$bool.runExprProgram} 336 | cat(r.i, ". **Results of expression programs identification** : [expr.programs/](./expr.programs/).\n", sep = "") 337 | r.i <- r.i + 1 338 | ``` 339 | ```{r echo=FALSE, results='asis', eval=results$bool.runInteraction} 340 | cat(r.i, ". **Cell clusters interactions scores** : [InteractionScore.txt](./InteractionScore.txt).\n", sep = "") 341 | r.i <- r.i + 1 342 | ``` 343 | 344 | 345 | 346 |
347 | 348 | -------------------------------------- 349 | © [G-Lab](http://lifeome.net/glab/jgu/), [Tsinghua University](http://www.tsinghua.edu.cn) 350 | 351 | -------------------------------------------------------------------------------- /inst/rmd/main-scAnnoComb.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "scCancer" 3 | author: "G-Lab" 4 | date: "2019/6/11" 5 | output: html_document 6 | --- 7 | 8 | 52 | 53 | 54 | ```{r setting, include=FALSE} 55 | options(knitr.table.format = "html") 56 | options(scipen=10) 57 | knitr::opts_chunk$set(echo = TRUE, fig.path = file.path(results$savePath, 'report-figures//')) 58 | 59 | title <- "scCancer" 60 | if(!is.null(results$combName)){ 61 | title <- paste0(results$combName, " - ", title) 62 | } 63 | 64 | if(!is.null(results$authorName)){ 65 | userName <- results$authorName 66 | }else{ 67 | userName <- Sys.getenv("USERNAME") 68 | } 69 | reportMark <- Sys.time() 70 | if(userName != ""){ 71 | reportMark <- paste0(userName, " , ", reportMark) 72 | } 73 | 74 | h.i <- 1 75 | h.ii <- 1 76 | ``` 77 | 78 | 79 | # `r title` 80 | -------------------------------- 81 |

`r reportMark`

82 | 83 | 84 | 85 | ## `r h.i` Read data 86 | 87 | The input samples are: 88 | 89 |
90 | 91 | ```{r echo=FALSE, results='asis'} 92 | cat("| Sample name | #cells after QC |\n", sep="") 93 | cat("| :-------------- | :----------------------- |\n", sep="") 94 | for(s in results$sampleNames){ 95 | cat("| ", s, " | ", sum(results$cell.annotation$sample == s), " |\n", sep = "") 96 | } 97 | ``` 98 | 99 |
100 | 101 | 102 | ```{r echo=F} 103 | h.i <- h.i + 1 104 | ``` 105 | 106 | 107 | 108 | 109 | 110 | ## `r h.i` Data preprocessing 111 | 112 | After the quality control, we perform following preprocessing steps based on some functions of the R package [`Seurat V3`](https://satijalab.org/seurat/). 113 | 114 | * **Normalization.** Normalize the raw counts data to TPMs (tyranscripts-per-million) and log-transforms them. 115 | * **Scale data.** Remove unwanted sources of variations (` `r results$vars.to.regress` `) by regression and center the resulting residuals. 116 | * **Highly variable genes.** Calcuate the average expression and dispersion of each gene across all cells to select highly variable genes(HVGs). 117 | 118 | ```{r hvgPlot, echo=F, message=F, warning=F, eval=results$bool.plotHVG, dpi=500, fig.width=8, fig.height=4, fig.align="center", out.width='70%'} 119 | results$seurat.plots$p.hvg 120 | ``` 121 | ```{r eval = results$bool.plotHVG, echo = F, results='asis'} 122 | cat("

(Hi-res image: view)

\n") 123 | ``` 124 | * **PCA.** Perform principal component analysis (PCA) and select PCs to perform clustering and visualization. 125 | * **Visualiztion.** Using t-SNE or UMAP to persent each single cell in two-dimensional space. 126 | 127 | 128 | 129 | ```{r echo=F} 130 | h.i <- h.i + 1 131 | ``` 132 | 133 | 134 | 135 | 136 | 137 | ## `r h.i` Cells annotation 138 | 139 | 140 | 141 | ### `r h.i`.`r h.ii` Markers expression profile 142 | Here are the scatter plots colored by the normalized expression of some cell type markers. 143 | 144 |
145 | 146 | ```{r echo=FALSE, results='asis'} 147 | if(results$bool.add.features){ 148 | if(results$species == "human"){ 149 | cat("| Cell Type | Markers |\n", sep="") 150 | cat("| :-------------- | :----------------------- |\n", sep="") 151 | cat("| T cells (CD4+) | PTPRC, CD3D, CD4 |\n", sep="") 152 | cat("| T cells (CD8+) | PTPRC, CD3D, CD8A, CD8B |\n", sep="") 153 | cat("| B cells | PTPRC, CD79A |\n", sep="") 154 | cat("| NK cell | PTPRC, NKG7 |\n", sep="") 155 | cat("| Myeloid cells | PTPRC, LYZ |\n", sep="") 156 | cat("| Endothelial | PLVAP |\n", sep="") 157 | cat("| Fibroblast | ACTA2 |\n", sep="") 158 | cat("| Epithelial | EPCAM, KRT8 |\n", sep="") 159 | }else{ 160 | cat("| Cell Type | Markers |\n", sep="") 161 | cat("| :-------------- | :----------------------- |\n", sep="") 162 | cat("| T cells (CD4+) | Ptprc, Cd3d, Cd4 |\n", sep="") 163 | cat("| T cells (CD8+) | Ptprc, Cd3d, Cd8a, Cd8b |\n", sep="") 164 | cat("| B cells | Ptprc, Cd79a |\n", sep="") 165 | cat("| NK cell | Ptprc, Nkg7 |\n", sep="") 166 | cat("| Myeloid cells | Ptprc, Lyz1, Lyz2 |\n", sep="") 167 | cat("| Endothelial | Plvap |\n", sep="") 168 | cat("| Fibroblast | Acta2 |\n", sep="") 169 | cat("| Epithelial | Epcam, Krt8 |\n", sep="") 170 | } 171 | } 172 | if(!is.null(results$show.features)){ 173 | cat("| Input genes | ", paste(results$show.features, collapse=", "), " |\n", sep="") 174 | } 175 | ``` 176 | 177 |
178 | 179 | ```{r markersPlot, eval=!is.null(results$seurat.plots$p.markers.all), echo=F, message=F, warning=F, dpi=500, fig.width=8, fig.height=results$markersPlot.height} 180 | results$seurat.plots$p.markers.all 181 | ``` 182 |

(Hi-res image: view, view single)

183 | 184 | 185 | ```{r echo=F} 186 | h.ii <- h.ii + 1 187 | ``` 188 | 189 | 190 | 191 | 192 | ### `r h.i`.`r h.ii` Clustering 193 | In order to identify clusters of all single cells, we perform a graph-based clustering by running [`Seurat`](https://satijalab.org/seurat/) functions. 194 | The cluster information can be found in the column `Cluster` of the table file 195 | [cellAnnotation.txt](./cellAnnotation.txt). 196 | 197 | Here is the t-SNE plot colored by cell clusters. 198 | 199 | ```{r clusterPlotTsne, echo=F, message=F, warning=F, dpi=500, fig.width=5, fig.height=4, fig.align="center", out.width='80%'} 200 | results$seurat.plots$p.cluster.tsne 201 | ``` 202 |

(Hi-res image: view)

203 | 204 | 205 | ```{r umap, child=system.file("rmd", "umap.Rmd", package = "scCancer"), eval = !is.null(results$seurat.plots$p.cluster.umap)} 206 | ``` 207 | 208 | 209 | ```{r echo=F} 210 | h.ii <- h.ii + 1 211 | ``` 212 | 213 | 214 | 215 | 216 | ### `r h.i`.`r h.ii` Sample source 217 | The sample source information can be found in the column `sample` of the table file 218 | [cellAnnotation.txt](./cellAnnotation.txt). The method of batch effect correction is ` `r results$comb.method` `. 219 | 220 | Here is the the scatter plot colored by cell clusters. 221 | 222 | ```{r samplePlot, echo=F, message=F, warning=F, dpi=500, fig.width=7, fig.height=5, fig.align="center", out.width='80%'} 223 | results$p.sample 224 | ``` 225 |

(Hi-res image: view)

226 | 227 | Here is a bar plot showing the relationship between cell cluster and sample source. 228 | 229 | ```{r sampleBarPlot, echo=F, message=F, warning=F, dpi=300, fig.width=6, fig.height=3, fig.align="center", out.width='80%'} 230 | results$p.bar.sample 231 | ``` 232 |

(Hi-res image: view)

233 | 234 | ```{r echo=F} 235 | h.ii <- h.ii + 1 236 | ``` 237 | 238 | 239 | 240 | 241 | ```{r diffExpr, child=system.file("rmd", "diffExpr.Rmd", package = "scCancer"), eval = results$bool.runDiffExpr} 242 | ``` 243 | 244 | 245 | 246 | 247 | ```{r cellType, child=system.file("rmd", "cellTypePred.Rmd", package = "scCancer"), eval = results$bool.runCellClassify} 248 | ``` 249 | 250 | 251 | 252 | 253 | ```{r malignancy, child=system.file("rmd", "malignancy.Rmd", package = "scCancer"), eval = (results$bool.runMalignancy & ("cnvList" %in% names(results)))} 254 | ``` 255 | 256 | ```{r malignancyPlot, child=system.file("rmd", "malign-comb.Rmd", package = "scCancer"), eval = (results$bool.runMalignancy & !("cnvList" %in% names(results)))} 257 | ``` 258 | 259 | 260 | 261 | 262 | -------------------------------- 263 | ```{r echo=FALSE, results='asis', eval = results$bool.intraTumor} 264 | # if(is.null(results$tumor.clusters)){ 265 | # cat("#### According to the results of cell type prediction and cell malignancy estimation, ", 266 | # "we couldn't identify tumor clusters, ", 267 | # "so we use all clusters to perform following heterogeneity analyses.\n", sep = "") 268 | # }else{ 269 | # # cat("#### According to the results of cell type prediction and cell malignancy estimation, we identify the clusters `", 270 | # # str_c(results$tumor.clusters, collapse = ", "), 271 | # # "` as tumor clusters, and following intra-tumor heterogeneity analyses mainly focus on them.\n", sep = "") 272 | # cat("#### According to the results of cell type prediction and cell malignancy estimation, we identify the tumor clusters, ", 273 | # "and following intra-tumor heterogeneity analyses mainly focus on them.\n", sep = "") 274 | # } 275 | cat("#### In order to analyze intra-tumor heterogeneity, we select tumor clusters firstly based on the results of cell type prediction.\n") 276 | if(is.null(results$tumor.clusters)){ 277 | cat("#### Warning: Here, we couldn't identify the tumor clusters, so we use all clusters to perform following analyses.\n") 278 | }else{ 279 | cat("#### Here, we identify cluster `", str_c(results$tumor.clusters, collapse = ", "), 280 | "` as tumor cells. And following analyses mainly focus on them.\n", sep = "") 281 | } 282 | ``` 283 | 284 | 285 | 286 | 287 | ```{r cellCycle, child=system.file("rmd", "cellCycle.Rmd", package = "scCancer"), eval = results$bool.runCellCycle} 288 | ``` 289 | 290 | 291 | 292 | 293 | ```{r stemness, child=system.file("rmd", "stemness.Rmd", package = "scCancer"), eval = results$bool.runStemness} 294 | ``` 295 | 296 | 297 | 298 | 299 | ```{r geneSets, child=system.file("rmd", "geneSets.Rmd", package = "scCancer"), eval = results$bool.runGeneSets} 300 | ``` 301 | 302 | 303 | 304 | 305 | ```{r exprProgram, child=system.file("rmd", "exprProgram.Rmd", package = "scCancer"), eval = results$bool.runExprProgram} 306 | ``` 307 | 308 | 309 | 310 | 311 | ```{r echo=F} 312 | h.i <- h.i + 1 313 | ``` 314 | 315 | 316 | ## `r h.i` Output 317 | 318 | ```{r echo=F} 319 | r.i <- 7 320 | ``` 321 | 322 | Running this script generates following files: 323 | 324 | 1. **Html report** : 325 | [report-scAnnoComb.html](./report-scAnnoComb.html). 326 | 2. **Markdown report** : 327 | [report-scAnnoComb.md](./report-scAnnoComb.md). 328 | 3. **Figure files** : 329 | [figures/](./figures/). 330 | 4. **Figures used in the report** : 331 | [report-figures/](./report-figures/). 332 | 5. **Seurat object** : 333 | [expr.RDS](./). 334 | 6. **Annotation of cells** : 335 | [cellAnnotation.txt](./cellAnnotation.txt). 336 | ```{r echo=FALSE, results='asis', eval=(results$comb.method=="SeuratMNN" | results$comb.method=="NormalMNN")} 337 | cat(r.i, ". **Anchors for batch correction** : ", sep = "") 338 | cat("[anchors.RDS](./anchors.RDS).\n", sep = "") 339 | r.i <- r.i + 1 340 | ``` 341 | ```{r echo=FALSE, results='asis', eval=results$bool.runDiffExpr} 342 | cat(r.i, ". **Differentially expressed genes' information for all clusters** : ", sep = "") 343 | cat("[diff.expr.genes/](./diff.expr.genes/).\n", sep = "") 344 | r.i <- r.i + 1 345 | ``` 346 | ```{r echo=FALSE, results='asis', eval=results$bool.runExprProgram} 347 | cat(r.i, ". **Results of expression programs identification** : [expr.programs/](./expr.programs/).\n", sep = "") 348 | r.i <- r.i + 1 349 | ``` 350 | 351 | 352 | 353 |
354 | 355 | -------------------------------------- 356 | © [G-Lab](http://lifeome.net/glab/jgu/), [Tsinghua University](http://www.tsinghua.edu.cn) 357 | 358 | -------------------------------------------------------------------------------- /inst/rmd/main-scStat.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "scCancer" 3 | author: "wguo" 4 | date: "2019/6/13" 5 | output: html_document 6 | --- 7 | 8 | 52 | 53 | 54 | 55 | ```{r setting, include=FALSE} 56 | options(knitr.table.format = "html") 57 | options(scipen=10) 58 | knitr::opts_chunk$set(echo = TRUE, fig.path = file.path(results$savePath, 'report-figures//')) 59 | 60 | title <- "scCancer" 61 | if(!is.null(results$sampleName)){ 62 | title <- paste0(results$sampleName, " - ", title) 63 | } 64 | 65 | if(!is.null(results$authorName)){ 66 | userName <- results$authorName 67 | }else{ 68 | userName <- Sys.getenv("USERNAME") 69 | } 70 | reportMark <- Sys.time() 71 | if(userName != ""){ 72 | reportMark <- paste0(userName, " , ", reportMark) 73 | } 74 | 75 | h.i <- 1 76 | h.ii <- 1 77 | ``` 78 | 79 | 80 | # `r title` 81 | -------------------------------- 82 |

`r reportMark`

83 | 84 | 85 | ## `r h.i` Cell statistics 86 | 87 | * The input of `scCancer` pipeline is the matrix generated by [` `r results$cr.version` `](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger). 88 | 89 | ```{r echo=F, results='asis'} 90 | if(file.exists(file.path(results$dataPath, "web_summary.html"))){ 91 | file.copy(file.path(results$dataPath, "web_summary.html"), file.path(results$savePath, "report-cellRanger.html"), overwrite = T) 92 | cat("* Here is the [summary report](./report-cellRanger.html) from `Cell Ranger`.", sep = "") 93 | } 94 | ``` 95 | 96 | 97 | ### `r h.i`.`r h.ii` Cell calling 98 | 99 | ```{r CR-Calling-T, child=system.file("rmd", "cellCalling.Rmd", package = "scCancer"), eval = results$raw.data} 100 | ``` 101 | 102 | ```{r CR-Calling-F, echo=FALSE, results='asis', eval=!results$raw.data} 103 | cat("* Raw data (containing all barcodes) cannot be found, and only filtered data are supplied. So cell-calling doesn't be performed and the analyses for background distribution is omitted.\n") 104 | cat("* For the filtered data, ", results$nList[2], " cells are identified (min.nUMI = `", results$min.nUMI, "`).\n", sep="") 105 | ``` 106 | 107 | ```{r echo=F} 108 | h.ii <- h.ii + 1 109 | # print(results$raw.data) 110 | ``` 111 | 112 | 113 | 114 | 115 | ### `r h.i`.`r h.ii` The number of UMIs and detected genes in cells 116 | 117 | After the cell calling by ` `r results$cr.version` `, we further perform quality control to 118 | filter droplets with low quality cells according to `nUMI` (total number of UMIs) and `nGene` (total number of detected genes). 119 | 120 | For `nUMI` : 121 | * Suggested threshold to filter cells with extremely large `nUMI` : ` `r results$cell.threshold$nUMI` `. 122 | + Using this threshold, ` `r sum(results$cell.manifest$nUMI >= results$cell.threshold$nUMI)` ` cells will be filtered. 123 | 124 | For `nGene` : 125 | * Suggested threshold to filter cells with extremely large `nGene` : ` `r results$cell.threshold$nGene` `. 126 | + Using this threshold, ` `r sum(results$cell.manifest$nGene >= results$cell.threshold$nGene)` ` cells will be filtered. 127 | * Suggested threshold to filter cells with extremely small `nGene` : `200`. 128 | + Using this threshold, ` `r sum(results$cell.manifest$nGene < 200)` ` cells will be filtered. 129 | 130 | **Comment**: The suggested thresholds (except the lower bound of `nGene`, which is set by convention) are determined based on the their distributions. Using them, the outliers identified will be filtered. The same below. 131 | 132 | 133 | ```{r filter, echo=F, message=F, warning=F, dpi=300, fig.height=2.5, fig.width=8} 134 | plot_grid(results$p.nUMI, results$p.nGene, ncol = 2) 135 | ``` 136 |

(Hi-res image: left, right)

137 | 138 | 139 | ```{r echo=F} 140 | h.i <- h.i + 1 141 | h.ii <- 1 142 | ``` 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | ## `r h.i` Gene statistics 151 | The number of genes expressed in at least one cell : ` `r sum(results$gene.manifest$nCell > 0)` `. 152 | 153 | 154 | ### `r h.i`.`r h.ii` Mitochondrial genes 155 | Summary of mitochondrial genes percentage (`mito.percent`) in cells: 156 | ```{r mito.summary, echo=F} 157 | format(summary(results$cell.manifest$mito.percent), digits = 3) 158 | ``` 159 | * Suggested threshold to filter cells with high mitochondrial genes percentage : ` `r round(results$cell.threshold$mito.percent, 3)` `. 160 | + Using this threshold, ` `r sum(results$cell.manifest$mito.percent >= results$cell.threshold$mito.percent)` ` cells will be filtered. 161 | 162 | ```{r mito, echo=FALSE, message=F, dpi=300, fig.height=4, fig.width=4, fig.align="center", out.width='40%'} 163 | results$p.mito 164 | ``` 165 |

(Hi-res image: view)

166 | 167 | 168 | ```{r echo=F} 169 | h.ii <- h.ii + 1 170 | ``` 171 | 172 | 173 | 174 | ### `r h.i`.`r h.ii` Ribosome genes 175 | Summary of ribosome genes percentage (`ribo.percent`) in cells: 176 | ```{r ribo.summary, echo=F} 177 | format(summary(results$cell.manifest$ribo.percent), digits = 3) 178 | ``` 179 | * Suggested threshold to filter cells with high ribosome genes percentage : ` `r round(results$cell.threshold$ribo.percent, 3)` `. 180 | + Using this threshold, ` `r sum(results$cell.manifest$ribo.percent >= results$cell.threshold$ribo.percent)` ` cells will be filtered. 181 | 182 | ```{r ribo, echo=FALSE, message=F, dpi=300, fig.height=4, fig.width=4, fig.align="center", out.width='40%'} 183 | results$p.ribo 184 | ``` 185 |

(Hi-res image: view)

186 | 187 | ```{r echo=F} 188 | h.ii <- h.ii + 1 189 | ``` 190 | 191 | 192 | 193 | ### `r h.i`.`r h.ii` Dissociation associated genes 194 | Summary of dissociation associated genes percentage (`diss.percent`) in cells: 195 | ```{r diss.summary, echo=F} 196 | format(summary(results$cell.manifest$diss.percent), digits = 3) 197 | ``` 198 | * Suggested threshold to filter cells with high dissociation genes percentage : ` `r round(results$cell.threshold$diss.percent, 3)` `. 199 | + Using this threshold, ` `r sum(results$cell.manifest$diss.percent >= results$cell.threshold$diss.percent)` ` cells will be filtered. 200 | 201 | ```{r diss, echo=FALSE, message=F, dpi=300, fig.height=4, fig.width=4, fig.align="center", out.width='40%'} 202 | results$p.diss 203 | ``` 204 |

(Hi-res image: view)

205 | 206 | ```{r echo=F} 207 | h.ii <- h.ii + 1 208 | ``` 209 | 210 | 211 | 212 | 213 | ### `r h.i`.`r h.ii` Ambient RNAs 214 | 215 | ### `r h.i`.`r h.ii`.1 Highly-expressed genes 216 | In order to analyze the gene expression profiles in detail and identify 217 | highly-expressed genes in background mRNAs from lysed cells, 218 | we calculate some metrics as shown below. 219 | ```{r echo=F, results='asis'} 220 | if("bg.percent" %in% colnames(results$gene.manifest)){ 221 | cat("* `bg.percent` : the expression proportion for each gene in background distribution (all droplets with `nUMI <= 10`).\n") 222 | } 223 | ``` 224 | * `prop.median` : the median of expression proportions for a gene in each cell. 225 | * `detect.rate` : the detected (`#UMI > 0`) rate for a gene in all cells. 226 | 227 | Here is a plot showing the distributions of gene proportion in cells for the first 100 genes (ordered by their proportion in background `bg.percent`). And the points (genes) are colored according to whether they belongs to mitochondrial, ribosome, or dissociation associated genes. 228 | ```{r echo=F, results='asis'} 229 | if("bg.percent" %in% colnames(results$gene.manifest)){ 230 | cat("The red star signs mark the genes’ proportion in background.\n") 231 | } 232 | ``` 233 | 234 | ```{r genePropPlot, echo=F, message=F, warning=F, dpi=300, fig.width=8, fig.height=8, fig.align="center"} 235 | grid::grid.draw(results$p.geneProp) 236 | ``` 237 |

(Hi-res image: view)

238 | 239 | 240 | ```{r echo=F, results='asis'} 241 | if("bg.percent" %in% colnames(results$gene.manifest)){ 242 | cat("The plot below shows the relationship between `bg.percent` and `prop.median`, `bg.percent` and `detect.rate`.\n") 243 | } 244 | ``` 245 | 246 | ```{r gene.plot, echo=F, message=F, warning=F, dpi=300, fig.height=4, fig.width=8} 247 | if("bg.percent" %in% colnames(results$gene.manifest)){ 248 | plot_grid(results$p.bg.cell, results$p.bg.detect, ncol = 2) 249 | } 250 | ``` 251 | 252 | ```{r echo=F, results='asis'} 253 | if("bg.percent" %in% colnames(results$gene.manifest)){ 254 | cat("

(Hi-res image: left, right)

\n") 255 | # cat("(Hi-res image\n") 256 | } 257 | ``` 258 | 259 | ```{r echo=F} 260 | h.ii <- h.ii + 1 261 | ``` 262 | 263 | ```{r soupx, child=system.file("rmd", "SoupX.Rmd", package = "scCancer"), eval = results$bool.runSoupx} 264 | ``` 265 | 266 | 267 | 268 | ```{r echo=F} 269 | h.i <- h.i + 1 270 | h.ii <- 1 271 | ``` 272 | 273 | 274 | 275 | 276 | ## `r h.i` Output 277 | 278 | ### `r h.i`.`r h.ii` Thresholds to filter droplets 279 | According to the results of statistics and visualization, we propose following thresholds to filter cells: 280 | 281 |
282 | ```{r thresholds, echo=F, warning=F} 283 | # results$filter.thres %>% knitr::kable("html") 284 | kable(results$filter.thres) 285 | ``` 286 |
287 | 288 | 289 | * **Hint**: In general, `Cell Ranger` can filter the droplets with low nUMI. So here we set `Low.threshold` for nUMI as `0`. 290 | The users need to use the identification results of `Cell Ranger` or set a suitable threshold first to filter the possible empty droplets with less UMIs. 291 | 292 | 293 | Using these thresholds, the number of cells vary as follows: 294 | 295 | ` `r paste0("Raw : ", results$nList[1])` ` -> 296 | ` `r paste0("cellranger3 : ", results$nList[2])` ` -> 297 | ` `r paste0("nUMI<", results$cell.threshold$nUMI, " : ", results$nList[3])` ` -> 298 | ` `r paste0("nGene>=200 : ", results$nList[4])` ` -> 299 | ` `r paste0("nGene<", results$cell.threshold$nGene, " : ", results$nList[5])` ` -> 300 | ` `r paste0("mito.percent<", round(results$cell.threshold$mito.percent, 3), " : ", results$nList[6])` ` -> 301 | ` `r paste0("ribo.percent<", round(results$cell.threshold$ribo.percent, 3), " : ", results$nList[7])` ` -> 302 | ` `r paste0("diss.percent<", round(results$cell.threshold$diss.percent, 3), " : ", results$nList[8])` ` 303 | 304 | ```{r echo=F} 305 | h.ii <- 1 306 | ``` 307 | 308 | 309 | ### `r h.i`.`r h.ii` Output files 310 | Running this script generates following files: 311 | 312 | ```{r echo=F} 313 | r.i <- 8 314 | ``` 315 | 316 | 1. **Html report** : 317 | [report-scStat.html](./report-scStat.html). 318 | 2. **Markdown report** : 319 | [report-scStat.md](./report-scStat.md). 320 | 3. **Figure files** : 321 | [figures/](./figures/). 322 | 4. **Figures used in the report**: 323 | [report-figures/](./report-figures/). 324 | 5. **Text file with cell manifest** : 325 | [cellManifest-all.txt](./cellManifest-all.txt). 326 | 6. **Text file with suggested thresholds as above** : 327 | [cell.QC.thres.txt](./cell.QC.thres.txt). 328 | 7. **Text file with gene manifest** : 329 | [geneManifest.txt](./geneManifest.txt). 330 | ```{r echo=F, results='asis', eval=results$bool.runSoupx} 331 | cat(r.i, ". **RDS file with SoupX object** :[soupx-object.RDS](./).", 332 | sep = "") 333 | r.i <- r.i + 1 334 | ``` 335 | ```{r echo=F, results='asis'} 336 | if(file.exists(file.path(results$dataPath, "web_summary.html"))){ 337 | cat("9. **Cell ranger html report** (Copy from the source data folder):\n") 338 | cat("[report-cellRanger.html](./report-cellRanger.html).\n", sep = "") 339 | } 340 | ``` 341 | 342 | 343 | 344 |
345 | 346 | -------------------------------------- 347 | © [G-Lab](http://lifeome.net/glab/jgu/), [Tsinghua University](http://www.tsinghua.edu.cn) 348 | -------------------------------------------------------------------------------- /inst/rmd/malign-comb.Rmd: -------------------------------------------------------------------------------- 1 | 2 | ### `r h.i`.`r h.ii` Cell malignancy estimation 3 | Using the cell malignancy results from each sample, we can get following plots. 4 | 5 | Here is the t-SNE plot colored by malignancy score (left) and type (right). 6 | 7 | ```{r malignPointPlot, echo=F, message=F, warning=F, dpi=300, fig.width=10, fig.height=4, fig.align="center"} 8 | plot_grid(results$malign.plot$p.malignScore.Point, 9 | results$malign.plot$p.malignType.Point, ncol = 2) 10 | ``` 11 |

(Hi-res image: left, right)

12 | 13 | 14 | 15 | Here is a bar plot showing the relationship between cell cluster and cell malignancy type. 16 | 17 | ```{r malignBarPlot, echo=F, message=F, warning=F, dpi=300, fig.width=6, fig.height=3, fig.align="center", out.width='80%'} 18 | results$malign.plot$p.malignType.bar 19 | ``` 20 |

(Hi-res image: view)

21 | 22 | 23 | The estimated cell malignancy scores and types can be found in the column `Malign.score` and `Malign.type` of the table file 24 | [cellAnnotation.txt](./cellAnnotation.txt). 25 | 26 | 27 | 28 | ```{r echo=F} 29 | h.ii <- h.ii + 1 30 | ``` 31 | -------------------------------------------------------------------------------- /inst/rmd/malignancy.Rmd: -------------------------------------------------------------------------------- 1 | 2 | ### `r h.i`.`r h.ii` Cell malignancy estimation 3 | 4 | In order to distinguish malignant and non-malignant cells, we infer copy number alterations (CNV) from tumor single cell RNA-Seq data referring to the method of R package [`infercnv`](https://github.com/broadinstitute/inferCNV/wiki). Then we calculate a smoothed malignancy score based on the CNV profile. 5 | 6 | Following is the malignancy scores distribution plot for observation cells in the sample (blue) and reference cells (grey). 7 | By detecting the bimodality in the malignancy score distribution, 8 | ```{r echo=FALSE, results='asis'} 9 | if(!is.null(results$malign.thres)){ 10 | cat("we get the bimodal boundary is nearly `", format(results$malign.thres, digits = 3, scientific = T), "` (red dash line).", sep = "") 11 | }else{ 12 | cat("we cannot think the distribution is bimodality.", sep = "") 13 | } 14 | ``` 15 | 16 | ```{r malignScorePlot, echo=F, message=F, warning=F, dpi=300, fig.width=5, fig.height=4, fig.align="center", out.width='50%'} 17 | results$malign.plot$p.malignScore 18 | ``` 19 |

(Hi-res image: view)

20 | 21 | 22 | 23 | Here is the t-SNE plot colored by malignancy score (left) and type (right). 24 | 25 | ```{r malignPointPlot, echo=F, message=F, warning=F, dpi=300, fig.width=10, fig.height=4, fig.align="center"} 26 | plot_grid(results$malign.plot$p.malignScore.Point, 27 | results$malign.plot$p.malignType.Point, ncol = 2) 28 | ``` 29 |

(Hi-res image: left, right)

30 | 31 | 32 | 33 | Here is a bar plot showing the relationship between cell cluster and cell malignancy type. 34 | 35 | ```{r malignBarPlot, echo=F, message=F, warning=F, dpi=300, fig.width=6, fig.height=3, fig.align="center", out.width='80%'} 36 | results$malign.plot$p.malignType.bar 37 | ``` 38 |

(Hi-res image: view)

39 | 40 | 41 | 42 | The estimated cell malignancy scores and types can be found in the column `Malign.score` and `Malign.type` of the table file 43 | [cellAnnotation.txt](./cellAnnotation.txt). 44 | 45 | 46 | After this step, `scCancer` saved following results files to the folder '[malignancy/](./malignancy/)': 47 | * Estimated CNV profile of reference cells: [inferCNV-reference.txt](./malignancy/inferCNV-reference.txt). 48 | * Estimated CNV profile of sample cells: [inferCNV-observation.txt](./malignancy/inferCNV-observation.txt). 49 | * Malignancy scores of reference cells: [refer-malignScore.txt](./malignancy/refer-malignScore.txt). 50 | 51 | 52 | ```{r echo=F} 53 | h.ii <- h.ii + 1 54 | ``` 55 | -------------------------------------------------------------------------------- /inst/rmd/stemness.Rmd: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### `r h.i`.`r h.ii` Cell stemness estimation 4 | 5 | We train a stemness signature and use it to estimate stemness scores. 6 | The predicted stemness scores can be found in the column `Stemness.score` of the table file 7 | [cellAnnotation.txt](./cellAnnotation.txt). 8 | 9 | Here is the scatter plot colored by estimated stemness scores. 10 | 11 | ```{r stemnessPlot, echo=F, message=F, warning=F, dpi=500, fig.width=5, fig.height=4, fig.align="center", out.width='60%'} 12 | results$stemness.plot 13 | ``` 14 |

(Hi-res image: view)

15 | 16 | 17 | ```{r echo=F} 18 | h.ii <- h.ii + 1 19 | ``` 20 | -------------------------------------------------------------------------------- /inst/rmd/umap.Rmd: -------------------------------------------------------------------------------- 1 | 2 | Here is the UMAP plot colored by cell clusters. 3 | 4 | ```{r clusterPlotUmap, echo=F, message=F, warning=F, dpi=500, fig.width=5, fig.height=4, fig.align="center", out.width='80%'} 5 | results$seurat.plots$p.cluster.umap 6 | ``` 7 |

(Hi-res image: view)

8 | -------------------------------------------------------------------------------- /inst/txt/cellCycle-genes.txt: -------------------------------------------------------------------------------- 1 | MCM5 2 | PCNA 3 | TYMS 4 | FEN1 5 | MCM2 6 | MCM4 7 | RRM1 8 | UNG 9 | GINS2 10 | MCM6 11 | CDCA7 12 | DTL 13 | PRIM1 14 | UHRF1 15 | MLF1IP 16 | HELLS 17 | RFC2 18 | RPA2 19 | NASP 20 | RAD51AP1 21 | GMNN 22 | WDR76 23 | SLBP 24 | CCNE2 25 | UBR7 26 | POLD3 27 | MSH2 28 | ATAD2 29 | RAD51 30 | RRM2 31 | CDC45 32 | CDC6 33 | EXO1 34 | TIPIN 35 | DSCC1 36 | BLM 37 | CASP8AP2 38 | USP1 39 | CLSPN 40 | POLA1 41 | CHAF1B 42 | BRIP1 43 | E2F8 44 | HMGB2 45 | CDK1 46 | NUSAP1 47 | UBE2C 48 | BIRC5 49 | TPX2 50 | TOP2A 51 | NDC80 52 | CKS2 53 | NUF2 54 | CKS1B 55 | MKI67 56 | TMPO 57 | CENPF 58 | TACC3 59 | FAM64A 60 | SMC4 61 | CCNB2 62 | CKAP2L 63 | CKAP2 64 | AURKB 65 | BUB1 66 | KIF11 67 | ANP32E 68 | TUBB4B 69 | GTSE1 70 | KIF20B 71 | HJURP 72 | CDCA3 73 | HN1 74 | CDC20 75 | TTK 76 | CDC25C 77 | KIF2C 78 | RANGAP1 79 | NCAPD2 80 | DLGAP5 81 | CDCA2 82 | CDCA8 83 | ECT2 84 | KIF23 85 | HMMR 86 | AURKA 87 | PSRC1 88 | ANLN 89 | LBR 90 | CKAP5 91 | CENPE 92 | CTCF 93 | NEK2 94 | G2E3 95 | GAS2L3 96 | CBX5 97 | CENPA 98 | -------------------------------------------------------------------------------- /inst/txt/diss-genes.txt: -------------------------------------------------------------------------------- 1 | ACTG1 2 | ANKRD1 3 | ARID5A 4 | ATF3 5 | ATF4 6 | BAG3 7 | BHLHE40 8 | BRD2 9 | BTG1 10 | BTG2 11 | CCNL1 12 | CCRN4L 13 | CEBPB 14 | CEBPD 15 | CEBPG 16 | CSRNP1 17 | CXCL1 18 | CYR61 19 | DCN 20 | DDX3X 21 | DDX5 22 | DES 23 | DNAJA1 24 | DNAJB1 25 | DNAJB4 26 | DUSP1 27 | DUSP8 28 | EGR1 29 | EGR2 30 | EIF1 31 | EIF5 32 | ERF 33 | ERRFI1 34 | FAM132B 35 | FOS 36 | FOSB 37 | FOSL2 38 | GADD45A 39 | GADD45G 40 | GCC1 41 | GEM 42 | H3F3B 43 | HIPK3 44 | HSP90AA1 45 | HSP90AB1 46 | HSPA1A 47 | HSPA1B 48 | HSPA5 49 | HSPA8 50 | HSPB1 51 | HSPE1 52 | HSPH1 53 | ID3 54 | IDI1 55 | IER2 56 | IER3 57 | IER5 58 | IFRD1 59 | IL6 60 | IRF1 61 | IRF8 62 | ITPKC 63 | JUN 64 | JUNB 65 | JUND 66 | KCNE4 67 | KLF2 68 | KLF4 69 | KLF6 70 | KLF9 71 | LITAF 72 | LMNA 73 | MAFF 74 | MAFK 75 | MCL1 76 | MIDN 77 | MIR22HG 78 | MT1 79 | MT2 80 | MYADM 81 | MYC 82 | MYD88 83 | NCKAP5L 84 | NCOA7 85 | NFKBIA 86 | NFKBIZ 87 | NOP58 88 | NPPC 89 | NR4A1 90 | ODC1 91 | OSGIN1 92 | OXNAD1 93 | PCF11 94 | PDE4B 95 | PER1 96 | PHLDA1 97 | PNP 98 | PNRC1 99 | PPP1CC 100 | PPP1R15A 101 | PXDC1 102 | RAP1B 103 | RASSF1 104 | RHOB 105 | RHOH 106 | RIPK1 107 | SAT1 108 | SBNO2 109 | SDC4 110 | SERPINE1 111 | SKIL 112 | SLC10A6 113 | SLC38A2 114 | SLC41A1 115 | SOCS3 116 | SQSTM1 117 | SRF 118 | SRSF5 119 | SRSF7 120 | STAT3 121 | TAGLN2 122 | TIPARP 123 | TNFAIP3 124 | TNFAIP6 125 | TPM3 126 | TPPP3 127 | TRA2A 128 | TRA2B 129 | TRIB1 130 | TUBB4B 131 | TUBB6 132 | UBC 133 | USP2 134 | WAC 135 | ZC3H12A 136 | ZFAND5 137 | ZFP36 138 | ZFP36L1 139 | ZFP36L2 140 | ZYX 141 | -------------------------------------------------------------------------------- /man/Read10Xdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{Read10Xdata} 4 | \alias{Read10Xdata} 5 | \title{Read10Xdata} 6 | \usage{ 7 | Read10Xdata( 8 | data.dir = NULL, 9 | gene.column = 2, 10 | unique.features = TRUE, 11 | only.expr = TRUE 12 | ) 13 | } 14 | \arguments{ 15 | \item{data.dir}{Directory containing the matrix.mtx, genes.tsv (or features.tsv), and barcodes.tsv files provided by 10X. 16 | A vector or named vector can be given in order to load several data directories. 17 | If a named vector is given, the cell barcode names will be prefixed with the name.} 18 | 19 | \item{gene.column}{An integer indicating which column of genes.tsv or features.tsv to use for gene names; default is 2.} 20 | 21 | \item{unique.features}{Make feature names unique (default TRUE).} 22 | 23 | \item{only.expr}{Whether to read expression data only if have multiple features (default TRUE).} 24 | } 25 | \value{ 26 | If the 10X data only has expression data or the argument 'only.expr' is TRUE, 27 | a sparse matrix containing the expression data will be returned. 28 | Otherwise, if the 10X data has multiple data types, 29 | a list containing a sparse matrix of the data from each type will be returned. 30 | } 31 | \description{ 32 | Read expression matrix data from 10X. This function is modified from Seurat package. 33 | } 34 | -------------------------------------------------------------------------------- /man/checkAnnoArguments.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{checkAnnoArguments} 4 | \alias{checkAnnoArguments} 5 | \title{checkAnnoArguments} 6 | \usage{ 7 | checkAnnoArguments(argList) 8 | } 9 | \arguments{ 10 | \item{argList}{A list of arguments passed into 'runScAnnotation".} 11 | } 12 | \description{ 13 | checkAnnoArguments 14 | } 15 | -------------------------------------------------------------------------------- /man/checkCombArguments.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{checkCombArguments} 4 | \alias{checkCombArguments} 5 | \title{checkCombArguments} 6 | \usage{ 7 | checkCombArguments(argList) 8 | } 9 | \arguments{ 10 | \item{argList}{A list of arguments passed into 'runScCombination".} 11 | } 12 | \description{ 13 | checkCombArguments 14 | } 15 | -------------------------------------------------------------------------------- /man/checkStatArguments.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{checkStatArguments} 4 | \alias{checkStatArguments} 5 | \title{checkStatArguments} 6 | \usage{ 7 | checkStatArguments(argList) 8 | } 9 | \arguments{ 10 | \item{argList}{A list of arguments passed into 'runScStatistics".} 11 | } 12 | \description{ 13 | checkStatArguments 14 | } 15 | -------------------------------------------------------------------------------- /man/clusterBarPlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{clusterBarPlot} 4 | \alias{clusterBarPlot} 5 | \title{clusterBarPlot} 6 | \usage{ 7 | clusterBarPlot( 8 | cell.annotation, 9 | sel.col = "Cell.Type", 10 | cell.colors = NULL, 11 | legend.title = NULL, 12 | legend.position = "bottom", 13 | legend.ncol = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{cell.annotation}{A data.frame of cells' annotation containing the cells' Cluster and other information to be colored.} 18 | 19 | \item{sel.col}{The column name of cell.annotation, which indicating the type of cells.} 20 | 21 | \item{cell.colors}{An array of colors used to show the cells' type. If NULL, the default colors will be used.} 22 | 23 | \item{legend.title}{The title of legends. If NULL, the value of "sel.col" will be used.} 24 | 25 | \item{legend.position}{The position of legends ("none", "left", "right", "bottom", "top", or two-element numeric vector).} 26 | 27 | \item{legend.ncol}{The number of column of legends.} 28 | } 29 | \value{ 30 | A bar plot. 31 | } 32 | \description{ 33 | clusterBarPlot 34 | } 35 | -------------------------------------------------------------------------------- /man/extractFiles.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{extractFiles} 4 | \alias{extractFiles} 5 | \title{extractFiles} 6 | \usage{ 7 | extractFiles( 8 | savePath, 9 | sampleNames, 10 | outputPath, 11 | files = c("report-scStat.html", "report-scAnno.html"), 12 | subfolders = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{savePath}{A path of samples' result folder.} 17 | 18 | \item{sampleNames}{A vector of samples' names (the subfolder names in 'savePath').} 19 | 20 | \item{outputPath}{A path to saving the extracted reports.} 21 | 22 | \item{files}{The name of files you want to extract. The default is c("report-scStat.html", "report-scAnno.html").} 23 | 24 | \item{subfolders}{The name of subfolders for the files you want to extract. The default is NULL. 25 | It can be a character string, which means all files are under the subfolder. 26 | It can also be a character string vector with same length as "files", which are corresponding to "files".} 27 | } 28 | \description{ 29 | Extract files from each sample's folder and rename them with sample's name. 30 | } 31 | -------------------------------------------------------------------------------- /man/genAnnoReport.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{genAnnoReport} 4 | \alias{genAnnoReport} 5 | \title{genAnnoReport} 6 | \usage{ 7 | genAnnoReport(results, savePath) 8 | } 9 | \arguments{ 10 | \item{results}{A list generated by 'runScAnnotation'} 11 | 12 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.} 13 | } 14 | \description{ 15 | genAnnoReport 16 | } 17 | -------------------------------------------------------------------------------- /man/genStatReport.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scStatistics.R 3 | \name{genStatReport} 4 | \alias{genStatReport} 5 | \title{genStatReport} 6 | \usage{ 7 | genStatReport(results, savePath) 8 | } 9 | \arguments{ 10 | \item{results}{A list generated by 'runScStatistics'} 11 | 12 | \item{savePath}{A path to save the results files(suggest to create a foler named by sample name).} 13 | } 14 | \description{ 15 | genStatReport 16 | } 17 | -------------------------------------------------------------------------------- /man/generate10Xdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{generate10Xdata} 4 | \alias{generate10Xdata} 5 | \title{generate10Xdata} 6 | \usage{ 7 | generate10Xdata(matrix, gene.info, outPath, overwrite = F) 8 | } 9 | \arguments{ 10 | \item{matrix}{A gene-cell matrix or data.frame.} 11 | 12 | \item{gene.info}{A data.frame of gene information. It should contain two columns, 13 | the first is gene Ensemble ID, and the second is gene symbol. 14 | The order of the genes should be consistant with the row order of 'matrix'.} 15 | 16 | \item{outPath}{A path to save the output files.} 17 | 18 | \item{overwrite}{If TRUE and the output file already exists, the file is 19 | silently overwritten, otherwise an exception is thrown. The default is "FALSE".} 20 | } 21 | \description{ 22 | Generate a 10X-like data folder based on the data matrix and gene information, 23 | which can be used directly to perform scCancer analysis. 24 | } 25 | -------------------------------------------------------------------------------- /man/getCellTypeColor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{getCellTypeColor} 4 | \alias{getCellTypeColor} 5 | \title{getCellTypeColor} 6 | \usage{ 7 | getCellTypeColor(cell.types) 8 | } 9 | \arguments{ 10 | \item{cell.types}{A vector of cell types.} 11 | } 12 | \value{ 13 | A vector of colors. 14 | } 15 | \description{ 16 | getCellTypeColor 17 | } 18 | -------------------------------------------------------------------------------- /man/getDefaultColors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{getDefaultColors} 4 | \alias{getDefaultColors} 5 | \title{getDefaultColors} 6 | \usage{ 7 | getDefaultColors(n = NULL, type = 1) 8 | } 9 | \arguments{ 10 | \item{n}{The number of colors.} 11 | 12 | \item{type}{The type of color style. Only 1, 2, or 3 is allowed.} 13 | } 14 | \value{ 15 | A vector of colors. 16 | } 17 | \description{ 18 | getDefaultColors 19 | } 20 | -------------------------------------------------------------------------------- /man/getDefaultGeneSets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{getDefaultGeneSets} 4 | \alias{getDefaultGeneSets} 5 | \title{getDefaultGeneSets} 6 | \usage{ 7 | getDefaultGeneSets(species = "human") 8 | } 9 | \arguments{ 10 | \item{species}{A character string indicating what species the sample belong to. 11 | Only "human"(default) or "mouse" are allowed.} 12 | } 13 | \value{ 14 | A list of gene sets (50 hallmark gene sets). 15 | } 16 | \description{ 17 | getDefaultGeneSets 18 | } 19 | -------------------------------------------------------------------------------- /man/getDefaultMarkers.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{getDefaultMarkers} 4 | \alias{getDefaultMarkers} 5 | \title{getDefaultMarkers} 6 | \usage{ 7 | getDefaultMarkers(species = "human") 8 | } 9 | \arguments{ 10 | \item{species}{A character string indicating what species the sample belong to. 11 | Only "human"(default) or "mouse" are allowed.} 12 | } 13 | \value{ 14 | A list of default markers of several common cell types. 15 | } 16 | \description{ 17 | Return default markers of several common cell types. 18 | } 19 | -------------------------------------------------------------------------------- /man/getTumorCluster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{getTumorCluster} 4 | \alias{getTumorCluster} 5 | \title{getTumorCluster} 6 | \usage{ 7 | getTumorCluster(cell.annotation, epi.thres = 0.6, malign.thres = 0.8) 8 | } 9 | \arguments{ 10 | \item{cell.annotation}{A data.frame of cells' annotation containing predicted cell typea and estimated cell malignant type.} 11 | 12 | \item{epi.thres}{A threshold for epithelial cell percent to decide putative tumor clusters.} 13 | 14 | \item{malign.thres}{A threshold for malignant cell percent to decide putative tumor clusters.} 15 | } 16 | \value{ 17 | A list of identified tumor clusters. If no clusters are found, return NULL. 18 | } 19 | \description{ 20 | Identify tumor clusters according to the results of cell type prediction and cell malignancy estimatation. 21 | } 22 | -------------------------------------------------------------------------------- /man/ggplot_config.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{ggplot_config} 4 | \alias{ggplot_config} 5 | \title{ggplot_config} 6 | \usage{ 7 | ggplot_config(base.size = 8) 8 | } 9 | \arguments{ 10 | \item{base.size}{The size of text.} 11 | } 12 | \value{ 13 | A theme. 14 | } 15 | \description{ 16 | ggplot_config 17 | } 18 | -------------------------------------------------------------------------------- /man/markerPlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{markerPlot} 4 | \alias{markerPlot} 5 | \title{markerPlot} 6 | \usage{ 7 | markerPlot( 8 | expr.data, 9 | coor.df, 10 | coor.names = c("tSNE_1", "tSNE_2"), 11 | features = NULL, 12 | add = T, 13 | species = "human", 14 | font.size = 4, 15 | color = "blue" 16 | ) 17 | } 18 | \arguments{ 19 | \item{expr.data}{A matrix of expression (gene by cell)} 20 | 21 | \item{coor.df}{A data.frame which contains cells' 2D coordinates.} 22 | 23 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.} 24 | 25 | \item{features}{A vector of genes to plot.} 26 | 27 | \item{add}{A logical value indicating whether to present the default markers.} 28 | 29 | \item{species}{A character string indicating what species the sample belong to. 30 | Only "human"(default) or "mouse" are allowed.} 31 | 32 | \item{font.size}{The size of labels.} 33 | 34 | \item{color}{The color of point.} 35 | } 36 | \value{ 37 | A list of ggplot obejects for each maker genes. 38 | } 39 | \description{ 40 | Generate plots of interested genes' expression profile. 41 | } 42 | -------------------------------------------------------------------------------- /man/plotCellInteraction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{plotCellInteraction} 4 | \alias{plotCellInteraction} 5 | \title{plotCellInteraction} 6 | \usage{ 7 | plotCellInteraction(stat.df, cell.annotation) 8 | } 9 | \arguments{ 10 | \item{stat.df}{A data.frame of cell sets interaction result.} 11 | 12 | \item{cell.annotation}{A data.frame of cells' annotation containing the cells' cluster and type.} 13 | } 14 | \value{ 15 | A plot showing the result of cell interaction. 16 | } 17 | \description{ 18 | plotCellInteraction 19 | } 20 | -------------------------------------------------------------------------------- /man/plotExprProgram.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{plotExprProgram} 4 | \alias{plotExprProgram} 5 | \title{plotExprProgram} 6 | \usage{ 7 | plotExprProgram( 8 | H, 9 | cell.annotation, 10 | bool.limit = T, 11 | sel.clusters = NULL, 12 | savePath = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{H}{The decomposed right matrix H.} 17 | 18 | \item{cell.annotation}{A data.frame of cells' annotation containing cluster information.} 19 | 20 | \item{bool.limit}{A logical value indicating whether to set upper and lower limit when plot heatmap.} 21 | 22 | \item{sel.clusters}{A vector of selected clusters to analyze. The default is NULL and all clusters will be used.} 23 | 24 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.} 25 | } 26 | \value{ 27 | A heatmap for cells' expression programs. 28 | } 29 | \description{ 30 | plotExprProgram 31 | } 32 | -------------------------------------------------------------------------------- /man/plotGeneSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{plotGeneSet} 4 | \alias{plotGeneSet} 5 | \title{plotGeneSet} 6 | \usage{ 7 | plotGeneSet(cell.annotation, prefix = "GS__", bool.limit = T, savePath = NULL) 8 | } 9 | \arguments{ 10 | \item{cell.annotation}{A data.frame of cells' annotation containing gene set signature scores.} 11 | 12 | \item{prefix}{A prefix string of column names for gene sets.} 13 | 14 | \item{bool.limit}{A logical value indicating whether to set upper and lower limit when plot heatmap.} 15 | 16 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.} 17 | } 18 | \value{ 19 | A heatmap for gene set signature scores. 20 | } 21 | \description{ 22 | plotGeneSet 23 | } 24 | -------------------------------------------------------------------------------- /man/plotMalignancy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cnvFunction.R 3 | \name{plotMalignancy} 4 | \alias{plotMalignancy} 5 | \title{plotMalignancy} 6 | \usage{ 7 | plotMalignancy( 8 | cell.annotation, 9 | coor.names = c("tSNE_1", "tSNE_2"), 10 | savePath = NULL 11 | ) 12 | } 13 | \arguments{ 14 | \item{cell.annotation}{A data.frame of cells' annotation containing the cells' 15 | malignancy score (`Malign.score`) and type (`Malign.type`).} 16 | 17 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.} 18 | 19 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.} 20 | } 21 | \value{ 22 | A plot list. 23 | } 24 | \description{ 25 | plotMalignancy 26 | } 27 | -------------------------------------------------------------------------------- /man/plotSeurat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{plotSeurat} 4 | \alias{plotSeurat} 5 | \title{plotSeurat} 6 | \usage{ 7 | plotSeurat( 8 | expr, 9 | cell.annotation = cell.annotation, 10 | show.features = NULL, 11 | bool.add.features = T, 12 | coor.names = c("tSNE_1", "tSNE_2"), 13 | bool.plotHVG = T, 14 | bool.runDiffExpr = T, 15 | diff.expr.genes = NULL, 16 | n.markers = 5, 17 | species = "human", 18 | savePath 19 | ) 20 | } 21 | \arguments{ 22 | \item{expr}{A Seurat object.} 23 | 24 | \item{cell.annotation}{A data.frame of cells' annotation.} 25 | 26 | \item{show.features}{A list or vector for genes to be plotted in 'markerPlot'.} 27 | 28 | \item{bool.add.features}{A logical value indicating whether to add default features to 'show.features' or not.} 29 | 30 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.} 31 | 32 | \item{bool.plotHVG}{A logical value indicating Whehter to plot highly variable genes.} 33 | 34 | \item{bool.runDiffExpr}{A logical value indicating whether to perform differential expressed analysis.} 35 | 36 | \item{diff.expr.genes}{A data.frame of differential expressed genes.} 37 | 38 | \item{n.markers}{An integer indicating the number of differential expressed genes showed in the plot. The defalut is 5.} 39 | 40 | \item{species}{A character string indicating what species the sample belong to. 41 | Only "human"(default) or "mouse" are allowed.} 42 | 43 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.} 44 | } 45 | \value{ 46 | A list of all plots generated by Seurat analyses. 47 | } 48 | \description{ 49 | Construct and save plots of Seurat analysis. 50 | } 51 | -------------------------------------------------------------------------------- /man/pointDRPlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{pointDRPlot} 4 | \alias{pointDRPlot} 5 | \title{pointDRPlot} 6 | \usage{ 7 | pointDRPlot( 8 | cell.annotation, 9 | value, 10 | sel.clusters = NULL, 11 | coor.names = c("tSNE_1", "tSNE_2"), 12 | colors = NULL, 13 | discrete = T, 14 | limit.quantile = 0, 15 | point.type = 1, 16 | legend.position = "right", 17 | legend.title = NULL 18 | ) 19 | } 20 | \arguments{ 21 | \item{cell.annotation}{A data.frame of cells' annotation containing the cells' coordinates and index to be colored.} 22 | 23 | \item{value}{The column name of cell.annotation, which is mapped to the colors of points.} 24 | 25 | \item{sel.clusters}{An array of selected clusters to present. (The default is NULL and all clusters will be used.)} 26 | 27 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.} 28 | 29 | \item{colors}{An array of colors used to show the gredients or type of points. If NULL, the default colors will be used.} 30 | 31 | \item{discrete}{A logical value indicating whether the value column is discrete or not.} 32 | 33 | \item{limit.quantile}{A quantile threshold to limit the data and reduce the influence of outliers.} 34 | 35 | \item{point.type}{A number indicating the shape type of points. "1" (default) means the point has a lightgrey border, and "2" means not.} 36 | 37 | \item{legend.position}{The position of legends ("none", "left", "right", "bottom", "top", or two-element numeric vector).} 38 | 39 | \item{legend.title}{The title of legends.} 40 | } 41 | \value{ 42 | A ggplot object for the scatter plot. 43 | } 44 | \description{ 45 | Plot scatter for cells. 46 | } 47 | -------------------------------------------------------------------------------- /man/predCellType.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{predCellType} 4 | \alias{predCellType} 5 | \title{predCellType} 6 | \usage{ 7 | predCellType(X.test, ct.templates = NULL, species = "human") 8 | } 9 | \arguments{ 10 | \item{X.test}{A cells expression matrix (row for genes, column for cells).} 11 | 12 | \item{ct.templates}{A list of gene weight vectors for each cell type.} 13 | 14 | \item{species}{A character string indicating what species the sample belong to. 15 | Only "human"(default) or "mouse" are allowed.} 16 | } 17 | \value{ 18 | A list of predicted cell types and the relative correlations. 19 | } 20 | \description{ 21 | predCellType 22 | } 23 | -------------------------------------------------------------------------------- /man/prepareData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scStatistics.R 3 | \name{prepareData} 4 | \alias{prepareData} 5 | \title{prepareData} 6 | \usage{ 7 | prepareData( 8 | samplePath, 9 | species = "human", 10 | hg.mm.mix = F, 11 | hg.mm.thres = 0.9, 12 | mix.anno = c(human = "hg19", mouse = "mm10") 13 | ) 14 | } 15 | \arguments{ 16 | \item{samplePath}{A path containing the cell ranger processed data.} 17 | 18 | \item{species}{A character string indicating what species the sample belong to. 19 | Must be one of "human"(default) and "mouse".} 20 | 21 | \item{hg.mm.mix}{A logical value indicating whether the sample is a mix of 22 | human cells and mouse cells(such as PDX sample). 23 | If TRUE, the arguments 'hg.mm.thres' and 'mix.anno' should be set to corresponding values.} 24 | 25 | \item{hg.mm.thres}{A float-point threshold within [0.5, 1] to identify human and mouse cells. 26 | Cells with UMI percentage of single species larger than the threshold are labeled human or mouse cells. 27 | The default is 0.6.} 28 | 29 | \item{mix.anno}{A vector to indicate the prefix of genes from different species. 30 | The default is c("human" = "hg19", "mouse" = "mm10").} 31 | } 32 | \value{ 33 | A list of expr.data, cell.manifest, gene.manifest, raw.data, min.nUMI, cr.version and run.emptydrop 34 | } 35 | \description{ 36 | prepareData 37 | } 38 | -------------------------------------------------------------------------------- /man/prepareSeurat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{prepareSeurat} 4 | \alias{prepareSeurat} 5 | \title{prepareSeurat} 6 | \usage{ 7 | prepareSeurat( 8 | dataPath, 9 | statPath, 10 | savePath, 11 | sampleName = "sc", 12 | bool.filter.cell = T, 13 | bool.filter.gene = T, 14 | anno.filter = c("mitochondrial", "ribosome", "dissociation"), 15 | nCell.min = 3, 16 | bgPercent.max = 1, 17 | hg.mm.mix = F, 18 | bool.rmContamination = T, 19 | vars.add.meta = c("mito.percent", "ribo.percent", "diss.percent"), 20 | vars.to.regress = c("nCount_RNA", "mito.percent", "ribo.percent") 21 | ) 22 | } 23 | \arguments{ 24 | \item{dataPath}{A path containing the cell ranger processed data. 25 | Under this path, folders 'filtered_feature_bc_matrix' and 'raw_feature_bc_matrix' exist generally.} 26 | 27 | \item{statPath}{A path containing the results files of step 'runScStatistics'.} 28 | 29 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.} 30 | 31 | \item{sampleName}{A character string giving a label for this sample.} 32 | 33 | \item{bool.filter.cell}{A logical value indicating whether to filter the cells 34 | according to the QC of 'scStatistics'.} 35 | 36 | \item{bool.filter.gene}{A logical value indicating whether to filter the genes 37 | according to the QC of 'scStatistics'.} 38 | 39 | \item{anno.filter}{A vector indicating the types of genes to be filtered. 40 | Must be some of c("mitochondrial", "ribosome", "dissociation")(default) or NULL.} 41 | 42 | \item{nCell.min}{An integer number used to filter gene. The default is 3. 43 | Genes with the number of expressed cells less than this threshold will be filtered.} 44 | 45 | \item{bgPercent.max}{A float number used to filter gene. The default is 1 (no filtering). 46 | Genes with the background percentage larger than this threshold will be filtered.} 47 | 48 | \item{hg.mm.mix}{A logical value indicating whether the sample is a mix of 49 | human cells and mouse cells(such as PDX sample). 50 | If TRUE, the arguments 'hg.mm.thres' and 'mix.anno' should be set to corresponding values.} 51 | 52 | \item{bool.rmContamination}{A logical value indicating whether to remove ambient RNA contamination based on 'SoupX'.} 53 | 54 | \item{vars.add.meta}{A vector indicating the variables to be added to Seurat object's meta.data. 55 | The default is c("mito.percent", "ribo.percent", "diss.percent").} 56 | 57 | \item{vars.to.regress}{A vector indicating the variables to regress out in R package Seurat. 58 | The default is c("nCount_RNA", "mito.percent", "ribo.percent").} 59 | } 60 | \value{ 61 | A list of Seurat object and gene.manifest. 62 | The Seurat object is after log-normalization, highly variable genes identification, scaling data. 63 | } 64 | \description{ 65 | According to the QC results of scStatistics, filter cells and genes. 66 | Prepare a Seurat object. 67 | } 68 | -------------------------------------------------------------------------------- /man/runCellClassify.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{runCellClassify} 4 | \alias{runCellClassify} 5 | \title{runCellClassify} 6 | \usage{ 7 | runCellClassify( 8 | expr, 9 | cell.annotation, 10 | coor.names = c("tSNE_1", "tSNE_2"), 11 | savePath, 12 | ct.templates = NULL, 13 | species = "human" 14 | ) 15 | } 16 | \arguments{ 17 | \item{expr}{A Seurat object.} 18 | 19 | \item{cell.annotation}{A data.frame of cells' annotation.} 20 | 21 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.} 22 | 23 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.} 24 | 25 | \item{ct.templates}{A list of vectors of several cell type templates. 26 | The default is NULL and the templates prepared in this package will be used.} 27 | 28 | \item{species}{A character string indicating what species the sample belong to. 29 | Only "human"(default) or "mouse" are allowed.} 30 | } 31 | \value{ 32 | A list of updated Seurat object, cell.annotation, and the plots for cell type annotation. 33 | } 34 | \description{ 35 | Use a one-class logistic regression (OCLR) model to predict cancer microenvironment cell types. 36 | } 37 | -------------------------------------------------------------------------------- /man/runCellCycle.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{runCellCycle} 4 | \alias{runCellCycle} 5 | \title{runCellCycle} 6 | \usage{ 7 | runCellCycle(expr, species = "human") 8 | } 9 | \arguments{ 10 | \item{expr}{A Seurat object.} 11 | 12 | \item{species}{A character string indicating what species the sample belong to. 13 | Only "human"(default) or "mouse" are allowed.} 14 | } 15 | \value{ 16 | An array of cell cycle scores. 17 | } 18 | \description{ 19 | Estimate cell cycle scores. 20 | } 21 | -------------------------------------------------------------------------------- /man/runCellInteraction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{runCellInteraction} 4 | \alias{runCellInteraction} 5 | \title{runCellInteraction} 6 | \usage{ 7 | runCellInteraction( 8 | expr, 9 | cellSetName = "default", 10 | species = "human", 11 | savePath = NULL 12 | ) 13 | } 14 | \arguments{ 15 | \item{expr}{A Seurat object.} 16 | 17 | \item{cellSetName}{The colunm name of `expr`'s `meta.data`, used to indicate the cell set annotation.} 18 | 19 | \item{species}{A character string indicating what species the sample belong to. 20 | Only "human"(default) or "mouse" are allowed.} 21 | 22 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.} 23 | } 24 | \value{ 25 | A data frame which contains the cell sets ligand-receptor pairs and their scores. 26 | } 27 | \description{ 28 | runCellInteraction 29 | } 30 | -------------------------------------------------------------------------------- /man/runDoublet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{runDoublet} 4 | \alias{runDoublet} 5 | \title{runDoublet} 6 | \usage{ 7 | runDoublet(expr, method = "cxds", pc.use = 30) 8 | } 9 | \arguments{ 10 | \item{expr}{A Seurat object.} 11 | 12 | \item{method}{The method to estimate doublet score. The default is "cxds".} 13 | 14 | \item{pc.use}{An integer number indicating the number of PCs to use as input features. The default is 30.} 15 | } 16 | \value{ 17 | An array of doublet scores. 18 | } 19 | \description{ 20 | runDoublet 21 | } 22 | -------------------------------------------------------------------------------- /man/runExprProgram.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{runExprProgram} 4 | \alias{runExprProgram} 5 | \title{runExprProgram} 6 | \usage{ 7 | runExprProgram( 8 | expr, 9 | rank = 50, 10 | sel.clusters = NULL, 11 | clusterStashName = "default", 12 | savePath = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{expr}{A Seurat object.} 17 | 18 | \item{rank}{An integer of decomposition rank used in NMF.} 19 | 20 | \item{sel.clusters}{A vector of selected clusters to analyze. The default is NULL and all clusters will be used.} 21 | 22 | \item{clusterStashName}{A character string used as the name of cluster identies. The default is "default".} 23 | 24 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.} 25 | } 26 | \value{ 27 | A list of decomposed matrixes (W and H), and the relative genes of each programs. 28 | } 29 | \description{ 30 | Perform non-negative matrix factorization (NMF) to identify expression programs. 31 | } 32 | -------------------------------------------------------------------------------- /man/runGeneSets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{runGeneSets} 4 | \alias{runGeneSets} 5 | \title{runGeneSets} 6 | \usage{ 7 | runGeneSets(expr, geneSets, method = "average") 8 | } 9 | \arguments{ 10 | \item{expr}{A Seurat object.} 11 | 12 | \item{geneSets}{A list of gene sets to be analyzed. The default is NULL and 50 hallmark gene sets from MSigDB will be used.} 13 | 14 | \item{method}{The method to be used in calculate gene set scores. Currently, only "average" and "GSVA" are allowed.} 15 | } 16 | \value{ 17 | A data.frame of calculated gene set signature scores. 18 | } 19 | \description{ 20 | Calculate gene set signature scores for cells. 21 | } 22 | -------------------------------------------------------------------------------- /man/runMalignancy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cnvFunction.R 3 | \name{runMalignancy} 4 | \alias{runMalignancy} 5 | \title{runMalignancy} 6 | \usage{ 7 | runMalignancy( 8 | expr, 9 | gene.manifest, 10 | cell.annotation, 11 | savePath, 12 | cutoff = 0.1, 13 | minCell = 3, 14 | p.value.cutoff = 0.5, 15 | coor.names = c("tSNE_1", "tSNE_2"), 16 | ref.data = NULL, 17 | referAdjMat = NULL, 18 | species = "human", 19 | genome = "hg19", 20 | hg.mm.mix = F 21 | ) 22 | } 23 | \arguments{ 24 | \item{expr}{A Seurat object.} 25 | 26 | \item{gene.manifest}{A data.frame of genes' manifest.} 27 | 28 | \item{cell.annotation}{A data.frame of cells' annotation.} 29 | 30 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.} 31 | 32 | \item{cutoff}{The cut-off for min average read counts per gene among 33 | reference cells. The default is 0.1.} 34 | 35 | \item{minCell}{An integer number used to filter gene. The default is 3.} 36 | 37 | \item{p.value.cutoff}{The p-value to decide whether the distribution of 38 | malignancy score is bimodality.} 39 | 40 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.} 41 | 42 | \item{ref.data}{An expression matrix of gene by cell, which is used as the normal reference. 43 | The default is NULL, and an immune cells or bone marrow cells expression matrix will be used for human or mouse species, respectively.} 44 | 45 | \item{referAdjMat}{An adjacent matrix for the normal reference data. 46 | The larger the value, the closer the cell pair is. 47 | The default is NULL, and a SNN matrix of the default ref.data will be used.} 48 | 49 | \item{species}{A character string indicating what species the sample belong to. 50 | Only "human"(default) or "mouse" are allowed.} 51 | 52 | \item{genome}{A character string indicating the version of the reference gene annotation information. 53 | This information is mainly used to infer CNV profile and estimate malignancy. 54 | Only 'hg19' (defalut) or 'hg38' are allowed for "human" species, and only "mm10" is allowed for "mouse" species.} 55 | 56 | \item{hg.mm.mix}{A logical value indicating whether the sample is a mix of 57 | human cells and mouse cells(such as PDX sample). 58 | If TRUE, the arguments 'hg.mm.thres' and 'mix.anno' should be set to corresponding values.} 59 | } 60 | \value{ 61 | A list of cnvList, reference malignancy score, seurat object, 62 | cell.annotatino, bimodal.pvalue, malign.thres, and all generated plots. 63 | } 64 | \description{ 65 | runMalignancy 66 | } 67 | -------------------------------------------------------------------------------- /man/runScAnnotation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{runScAnnotation} 4 | \alias{runScAnnotation} 5 | \title{runScAnnotation} 6 | \usage{ 7 | runScAnnotation( 8 | dataPath, 9 | statPath, 10 | savePath = NULL, 11 | authorName = NULL, 12 | sampleName = "sc", 13 | bool.filter.cell = T, 14 | bool.filter.gene = T, 15 | anno.filter = c("mitochondrial", "ribosome", "dissociation"), 16 | nCell.min = 3, 17 | bgPercent.max = 1, 18 | bool.rmContamination = F, 19 | vars.add.meta = c("mito.percent", "ribo.percent", "diss.percent"), 20 | vars.to.regress = c("nCount_RNA", "mito.percent", "ribo.percent"), 21 | pc.use = 30, 22 | resolution = 0.8, 23 | clusterStashName = "default", 24 | show.features = NULL, 25 | bool.add.features = T, 26 | bool.runDiffExpr = T, 27 | n.markers = 5, 28 | species = "human", 29 | genome = "hg19", 30 | hg.mm.mix = F, 31 | bool.runDoublet = T, 32 | doublet.method = "bcds", 33 | bool.runCellClassify = T, 34 | ct.templates = NULL, 35 | coor.names = c("tSNE_1", "tSNE_2"), 36 | bool.runMalignancy = T, 37 | cnv.ref.data = NULL, 38 | cnv.referAdjMat = NULL, 39 | cutoff = 0.1, 40 | p.value.cutoff = 0.5, 41 | bool.intraTumor = T, 42 | bool.runCellCycle = T, 43 | bool.runStemness = T, 44 | bool.runGeneSets = T, 45 | geneSets = NULL, 46 | geneSet.method = "average", 47 | bool.runExprProgram = T, 48 | nmf.rank = 50, 49 | bool.runInteraction = T, 50 | genReport = T 51 | ) 52 | } 53 | \arguments{ 54 | \item{dataPath}{A path containing the cell ranger processed data. 55 | Under this path, folders 'filtered_feature_bc_matrix' and 'raw_feature_bc_matrix' exist generally.} 56 | 57 | \item{statPath}{A path containing the results files of step 'runScStatistics'.} 58 | 59 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.} 60 | 61 | \item{authorName}{A character string for authors name and will be shown in the report.} 62 | 63 | \item{sampleName}{A character string giving a label for this sample.} 64 | 65 | \item{bool.filter.cell}{A logical value indicating whether to filter the cells 66 | according to the QC of 'scStatistics'.} 67 | 68 | \item{bool.filter.gene}{A logical value indicating whether to filter the genes 69 | according to the QC of 'scStatistics'.} 70 | 71 | \item{anno.filter}{A vector indicating the types of genes to be filtered. 72 | Must be some of c("mitochondrial", "ribosome", "dissociation")(default) or NULL.} 73 | 74 | \item{nCell.min}{An integer number used to filter gene. The default is 3. 75 | Genes with the number of expressed cells less than this threshold will be filtered.} 76 | 77 | \item{bgPercent.max}{A float number used to filter gene. The default is 1 (no filtering). 78 | Genes with the background percentage larger than this threshold will be filtered.} 79 | 80 | \item{bool.rmContamination}{A logical value indicating whether to remove ambient RNA contamination based on 'SoupX'.} 81 | 82 | \item{vars.add.meta}{A vector indicating the variables to be added to Seurat object's meta.data. 83 | The default is c("mito.percent", "ribo.percent", "diss.percent").} 84 | 85 | \item{vars.to.regress}{A vector indicating the variables to regress out in R package Seurat. 86 | The default is c("nCount_RNA", "mito.percent", "ribo.percent").} 87 | 88 | \item{pc.use}{An integer number indicating the number of PCs to use as input features. The default is 30.} 89 | 90 | \item{resolution}{A float number used in function 'FindClusters' in Seurat. The default is 0.8.} 91 | 92 | \item{clusterStashName}{A character string used as the name of cluster identies. The default is "default".} 93 | 94 | \item{show.features}{A list or vector for genes to be plotted in 'markerPlot'.} 95 | 96 | \item{bool.add.features}{A logical value indicating whether to add default features to 'show.features' or not.} 97 | 98 | \item{bool.runDiffExpr}{A logical value indicating whether to perform differential expressed analysis.} 99 | 100 | \item{n.markers}{An integer indicating the number of differential expressed genes showed in the plot. The defalut is 5.} 101 | 102 | \item{species}{A character string indicating what species the sample belong to. 103 | Only "human"(default) or "mouse" are allowed.} 104 | 105 | \item{genome}{A character string indicating the version of the reference gene annotation information. 106 | This information is mainly used to infer CNV profile and estimate malignancy. 107 | Only 'hg19' (defalut) or 'hg38' are allowed for "human" species, and only "mm10" is allowed for "mouse" species.} 108 | 109 | \item{hg.mm.mix}{A logical value indicating whether the sample is a mix of 110 | human cells and mouse cells(such as PDX sample). 111 | If TRUE, the arguments 'hg.mm.thres' and 'mix.anno' should be set to corresponding values.} 112 | 113 | \item{bool.runDoublet}{A logical value indicating whether to estimate doublet scores.} 114 | 115 | \item{doublet.method}{The method to estimate doublet score. The default is "bcds". 116 | "cxds"(co-expression based doublet scoring) and "bcds"(binary classification based doublet scoring) are allowed. 117 | These methods are from R package "scds".} 118 | 119 | \item{bool.runCellClassify}{A logical value indicating whether to predict the usual cell type. The default is TRUE.} 120 | 121 | \item{ct.templates}{A list of vectors of several cell type templates. 122 | The default is NULL and the templates prepared in this package will be used.} 123 | 124 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.} 125 | 126 | \item{bool.runMalignancy}{A logical value indicating whether to estimate malignancy.} 127 | 128 | \item{cnv.ref.data}{An expression matrix of gene by cell, which is used as the normal reference during estimating malignancy. 129 | The default is NULL, and an immune cells or bone marrow cells expression matrix will be used for human or mouse species, respectively.} 130 | 131 | \item{cnv.referAdjMat}{An adjacent matrix for the normal reference data. 132 | The larger the value, the closer the cell pair is. 133 | The default is NULL, and a SNN matrix of the default ref.data will be used.} 134 | 135 | \item{cutoff}{A threshold used in the CNV inference.} 136 | 137 | \item{p.value.cutoff}{A threshold to decide weather the bimodality distribution of malignancy score is significant.} 138 | 139 | \item{bool.intraTumor}{A logical value indicating whether to use the identified tumor clusters to 140 | perform following intra-tumor heterogeneity analyses.} 141 | 142 | \item{bool.runCellCycle}{A logical value indicating whether to estimate cell cycle scores.} 143 | 144 | \item{bool.runStemness}{A logical value indicating whether to estimate stemness scores.} 145 | 146 | \item{bool.runGeneSets}{A logical value indicating whether to estimate gene sets signature scores.} 147 | 148 | \item{geneSets}{A list of gene sets to be analyzed. The default is NULL and 50 hallmark gene sets from MSigDB will be used.} 149 | 150 | \item{geneSet.method}{The method to be used in calculate gene set scores. Currently, only "average" and "GSVA" are allowed.} 151 | 152 | \item{bool.runExprProgram}{A logical value indicating whether to run non-negative matrix factorization (NMF) to identify expression programs.} 153 | 154 | \item{nmf.rank}{An integer of decomposition rank used in NMF.} 155 | 156 | \item{bool.runInteraction}{A logical value indicating whether to run cell set ligand-receptor interaction analysis.} 157 | 158 | \item{genReport}{A logical value indicating whether to generate a .html/.md report (suggest to set TRUE).} 159 | } 160 | \value{ 161 | A results list with all useful objects used in the function. 162 | } 163 | \description{ 164 | According to the results of 'runScStatistics', perform cell and gene quality control. 165 | Using the R package Seurat to perform basic operations (normalization, log-transformation, 166 | highly variable genes identification, removing unwanted variance, scaling, centering, 167 | dimension reduction, clustering, and differential expression analy-sis). 168 | Perform some cancer-specific analyses: cancer micro-environmental cell type classification, 169 | cell malignancy estimation, cell cycle analysis, cell stemness analysis, 170 | gene set signature analysis, expression programs identification, and so on. 171 | } 172 | -------------------------------------------------------------------------------- /man/runScCombination.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scCombination.R 3 | \name{runScCombination} 4 | \alias{runScCombination} 5 | \title{runScCombination} 6 | \usage{ 7 | runScCombination( 8 | single.savePaths, 9 | sampleNames, 10 | savePath, 11 | combName, 12 | authorName = NULL, 13 | comb.method = "NormalMNN", 14 | harmony.theta = NULL, 15 | harmony.lambda = NULL, 16 | harmony.sigma = 0.1, 17 | vars.to.regress = c("nCount_RNA", "mito.percent", "ribo.percent"), 18 | pc.use = 30, 19 | resolution = 0.8, 20 | clusterStashName = "comb.cluster", 21 | show.features = NULL, 22 | bool.add.features = T, 23 | bool.runDiffExpr = T, 24 | n.markers = 5, 25 | sample.colors = NULL, 26 | species = "human", 27 | genome = "hg19", 28 | hg.mm.mix = F, 29 | bool.runCellClassify = T, 30 | ct.templates = NULL, 31 | coor.names = c("tSNE_1", "tSNE_2"), 32 | bool.runMalignancy = T, 33 | cnv.ref.data = NULL, 34 | cnv.referAdjMat = NULL, 35 | cutoff = 0.1, 36 | p.value.cutoff = 0.5, 37 | bool.intraTumor = T, 38 | bool.runCellCycle = T, 39 | bool.runStemness = T, 40 | bool.runGeneSets = T, 41 | geneSets = NULL, 42 | geneSet.method = "average", 43 | bool.runExprProgram = T, 44 | nmf.rank = 50, 45 | genReport = T 46 | ) 47 | } 48 | \arguments{ 49 | \item{single.savePaths}{A vecotr of paths containing the results files of step 'runScAnnotation' for each sample.} 50 | 51 | \item{sampleNames}{A vector of labels for all samples.} 52 | 53 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.} 54 | 55 | \item{combName}{A label for the combined samples.} 56 | 57 | \item{authorName}{A character string for authors name and will be shown in the report.} 58 | 59 | \item{comb.method}{The method to combine samples. The default is "NormalMNN". "Harmony", "NormalMNN", "SeuratMNN", "Raw", "Regression" and "LIGER" are optional.} 60 | 61 | \item{harmony.theta}{The parameter 'theta' of function "RunHarmony" in the harmony package.} 62 | 63 | \item{harmony.lambda}{The parameter 'lambda' of function "RunHarmony" in the harmony package.} 64 | 65 | \item{harmony.sigma}{The parameter 'sigma' of function "RunHarmony" in the harmony package.} 66 | 67 | \item{vars.to.regress}{A vector indicating the variables to regress out in R package Seurat. 68 | The default is c("nCount_RNA", "mito.percent", "ribo.percent").} 69 | 70 | \item{pc.use}{An integer number indicating the number of PCs to use as input features. The default is 30.} 71 | 72 | \item{resolution}{A float number used in function 'FindClusters' in Seurat. The default is 0.8.} 73 | 74 | \item{clusterStashName}{A character string used as the name of cluster identies. The default is "default".} 75 | 76 | \item{show.features}{A list or vector for genes to be plotted in 'markerPlot'.} 77 | 78 | \item{bool.add.features}{A logical value indicating whether to add default features to 'show.features' or not.} 79 | 80 | \item{bool.runDiffExpr}{A logical value indicating whether to perform differential expressed analysis.} 81 | 82 | \item{n.markers}{An integer indicating the number of differential expressed genes showed in the plot. The defalut is 5.} 83 | 84 | \item{sample.colors}{The colors used for samples. The default is NULL, and the pre-set colors will be used.} 85 | 86 | \item{species}{A character string indicating what species the sample belong to. 87 | Only "human"(default) or "mouse" are allowed.} 88 | 89 | \item{genome}{A character string indicating the version of the reference gene annotation information. 90 | This information is mainly used to infer CNV profile and estimate malignancy. 91 | Only 'hg19' (defalut) or 'hg38' are allowed for "human" species, and only "mm10" is allowed for "mouse" species.} 92 | 93 | \item{hg.mm.mix}{A logical value indicating whether the sample is a mix of 94 | human cells and mouse cells(such as PDX sample). 95 | If TRUE, the arguments 'hg.mm.thres' and 'mix.anno' should be set to corresponding values.} 96 | 97 | \item{bool.runCellClassify}{A logical value indicating whether to predict the usual cell type. The default is TRUE.} 98 | 99 | \item{ct.templates}{A list of vectors of several cell type templates. 100 | The default is NULL and the templates prepared in this package will be used.} 101 | 102 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.} 103 | 104 | \item{bool.runMalignancy}{A logical value indicating whether to estimate malignancy.} 105 | 106 | \item{cnv.ref.data}{An expression matrix of gene by cell, which is used as the normal reference during estimating malignancy. 107 | The default is NULL, and an immune cells or bone marrow cells expression matrix will be used for human or mouse species, respectively.} 108 | 109 | \item{cnv.referAdjMat}{An adjacent matrix for the normal reference data. 110 | The larger the value, the closer the cell pair is. 111 | The default is NULL, and a SNN matrix of the default ref.data will be used.} 112 | 113 | \item{cutoff}{A threshold used in the CNV inference.} 114 | 115 | \item{p.value.cutoff}{A threshold to decide weather the bimodality distribution of malignancy score is significant.} 116 | 117 | \item{bool.intraTumor}{A logical value indicating whether to use the identified tumor clusters to 118 | perform following intra-tumor heterogeneity analyses.} 119 | 120 | \item{bool.runCellCycle}{A logical value indicating whether to estimate cell cycle scores.} 121 | 122 | \item{bool.runStemness}{A logical value indicating whether to estimate stemness scores.} 123 | 124 | \item{bool.runGeneSets}{A logical value indicating whether to estimate gene sets signature scores.} 125 | 126 | \item{geneSets}{A list of gene sets to be analyzed. The default is NULL and 50 hallmark gene sets from MSigDB will be used.} 127 | 128 | \item{geneSet.method}{The method to be used in calculate gene set scores. Currently, only "average" and "GSVA" are allowed.} 129 | 130 | \item{bool.runExprProgram}{A logical value indicating whether to run non-negative matrix factorization (NMF) to identify expression programs.} 131 | 132 | \item{nmf.rank}{An integer of decomposition rank used in NMF.} 133 | 134 | \item{genReport}{A logical value indicating whether to generate a .html/.md report (suggest to set TRUE).} 135 | } 136 | \value{ 137 | A results list with all useful objects used in the function. 138 | } 139 | \description{ 140 | Perform multi-samples analyses. 141 | } 142 | -------------------------------------------------------------------------------- /man/runScStatistics.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scStatistics.R 3 | \name{runScStatistics} 4 | \alias{runScStatistics} 5 | \title{runScStatistics} 6 | \usage{ 7 | runScStatistics( 8 | dataPath, 9 | savePath, 10 | authorName = NULL, 11 | sampleName = "sc", 12 | species = "human", 13 | hg.mm.mix = F, 14 | hg.mm.thres = 0.6, 15 | mix.anno = c(human = "hg19", mouse = "mm10"), 16 | bg.spec.genes = NULL, 17 | bool.runSoupx = F, 18 | genReport = T 19 | ) 20 | } 21 | \arguments{ 22 | \item{dataPath}{A path containing the cell ranger processed data. 23 | Under this path, folders 'filtered_feature_bc_matrix' and 'raw_feature_bc_matrix' exist generally.} 24 | 25 | \item{savePath}{A path to save the results files(suggest to create a foler named by sample name).} 26 | 27 | \item{authorName}{A character string for authors name and will be shown in the report.} 28 | 29 | \item{sampleName}{A character string giving a label for this sample.} 30 | 31 | \item{species}{A character string indicating what species the sample belong to. 32 | Must be one of "human"(default) and "mouse".} 33 | 34 | \item{hg.mm.mix}{A logical value indicating whether the sample is a mix of 35 | human cells and mouse cells(such as PDX sample). 36 | If TRUE, the arguments 'hg.mm.thres' and 'mix.anno' should be set to corresponding values.} 37 | 38 | \item{hg.mm.thres}{A float-point threshold within [0.5, 1] to identify human and mouse cells. 39 | Cells with UMI percentage of single species larger than the threshold are labeled human or mouse cells. 40 | The default is 0.6.} 41 | 42 | \item{mix.anno}{A vector to indicate the prefix of genes from different species. 43 | The default is c("human" = "hg19", "mouse" = "mm10").} 44 | 45 | \item{bg.spec.genes}{A list of backgroud specific genes, which are used to remove ambient genes' influence.} 46 | 47 | \item{bool.runSoupx}{A logical value indicating whether to estimate contamination fraction by SoupX.} 48 | 49 | \item{genReport}{A logical value indicating whether to generate a .html/.md report (suggest to set TRUE).} 50 | } 51 | \value{ 52 | A results list with all useful objects used in the function. 53 | } 54 | \description{ 55 | perform cell QC, gene QC, visualization and give suggested thresholds. 56 | } 57 | -------------------------------------------------------------------------------- /man/runSeurat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{runSeurat} 4 | \alias{runSeurat} 5 | \title{runSeurat} 6 | \usage{ 7 | runSeurat( 8 | expr, 9 | savePath, 10 | pc.use = 30, 11 | resolution = 0.8, 12 | clusterStashName = "default", 13 | bool.runDiffExpr = T, 14 | comb.method = NULL 15 | ) 16 | } 17 | \arguments{ 18 | \item{expr}{A Seurat object return by prepareSeurat.} 19 | 20 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.} 21 | 22 | \item{pc.use}{An integer number indicating the number of PCs to use as input features. The default is 30.} 23 | 24 | \item{resolution}{A float number used in function 'FindClusters' in Seurat. The default is 0.8.} 25 | 26 | \item{clusterStashName}{A character string used as the name of cluster identies. The default is "default".} 27 | 28 | \item{bool.runDiffExpr}{A logical value indicating whether to perform differential expressed analysis.} 29 | 30 | \item{comb.method}{The method used in combining samples. It worked only for multi-sample analysis.} 31 | } 32 | \value{ 33 | A list containing a Seurat object, differential expressed genes and annotation information for cells. 34 | } 35 | \description{ 36 | Perform usual Seurat step and cell type prediction. 37 | } 38 | -------------------------------------------------------------------------------- /man/runStemness.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scAnnotation.R 3 | \name{runStemness} 4 | \alias{runStemness} 5 | \title{runStemness} 6 | \usage{ 7 | runStemness(X, stem.sig = NULL, species = "human") 8 | } 9 | \arguments{ 10 | \item{X}{An expression matrix of gene by cell to estimate stemness.} 11 | 12 | \item{stem.sig}{An array of stemness signature. The default is NULL, and a prepared signature will be used.} 13 | 14 | \item{species}{A character string indicating what species the sample belong to. 15 | Only "human"(default) or "mouse" are allowed.} 16 | } 17 | \value{ 18 | An array of cell stemness scores. 19 | } 20 | \description{ 21 | Estimate cell stemness according to the Spearman correlation with stemness signature. 22 | } 23 | -------------------------------------------------------------------------------- /man/runSurvival.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{runSurvival} 4 | \alias{runSurvival} 5 | \title{runSurvival} 6 | \usage{ 7 | runSurvival( 8 | features, 9 | data, 10 | surv.time, 11 | surv.event, 12 | cut.off = 0.5, 13 | savePath = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{features}{The names of marker genes or signatures to be analyzed.} 18 | 19 | \item{data}{The data used to perform survival analysis. 20 | It should be an expression or signature matrix with gene or signature by patient. 21 | The row names are the features' anmes. The columns are patients' labels.} 22 | 23 | \item{surv.time}{The survival time of patients. It should be in accord with the columns of data.} 24 | 25 | \item{surv.event}{The status indicator of patients. 0=alive, 1=dead. It should be in accord with the columns of data.} 26 | 27 | \item{cut.off}{The percentage threshold to divide patients into two groups. 28 | The default is 0.5, which means the patients are divided by median. 29 | Other values, such as 0.4, means the first 40 percent patients are set "Low" group 30 | and the last 40 percent are set "High" group (the median 20 percent are discarded).} 31 | 32 | \item{savePath}{The path to save the survival plots of genes or signatures (the default is NULL and the plots will be return without saving).} 33 | } 34 | \value{ 35 | A list of survival curves plots. 36 | } 37 | \description{ 38 | According to the marker genes or signatures expression high/low levels, 39 | patient are divided into two groups and then survival analysis is performed. 40 | The survival curves can be plotted. 41 | } 42 | -------------------------------------------------------------------------------- /scCancer.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | --------------------------------------------------------------------------------