├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── R
    ├── cnvFunction.R
    ├── scAnnotation.R
    ├── scCombination.R
    ├── scStatistics.R
    └── utils.R
├── README.md
├── inst
    ├── rds
    │   ├── cellTypeTemplates.RDS
    │   ├── cnvRef_Data-HM.RDS
    │   ├── cnvRef_Data-boneMarrow-MS.RDS
    │   ├── cnvRef_SNN-HM.RDS
    │   └── cnvRef_SNN-boneMarrow-MS.RDS
    ├── rmd
    │   ├── SoupX.Rmd
    │   ├── cellCalling.Rmd
    │   ├── cellCycle.Rmd
    │   ├── cellInteraction.Rmd
    │   ├── cellTypePred.Rmd
    │   ├── contamination.Rmd
    │   ├── diffExpr.Rmd
    │   ├── doublet.Rmd
    │   ├── exprProgram.Rmd
    │   ├── filterCell.Rmd
    │   ├── filterGene.Rmd
    │   ├── geneSets.Rmd
    │   ├── main-scAnno.Rmd
    │   ├── main-scAnnoComb.Rmd
    │   ├── main-scStat.Rmd
    │   ├── malign-comb.Rmd
    │   ├── malignancy.Rmd
    │   ├── stemness.Rmd
    │   └── umap.Rmd
    └── txt
    │   ├── PairsLigRec.txt
    │   ├── cellCycle-genes.txt
    │   ├── diss-genes.txt
    │   ├── gene-chr-hg19.txt
    │   ├── gene-chr-hg38.txt
    │   ├── gene-chr-mm10.txt
    │   ├── gene.chr.txt
    │   ├── hallmark-pathways.txt
    │   ├── hg-mm-HomologyGenes.txt
    │   └── pcbc-stemsig.tsv
├── man
    ├── Read10Xdata.Rd
    ├── checkAnnoArguments.Rd
    ├── checkCombArguments.Rd
    ├── checkStatArguments.Rd
    ├── clusterBarPlot.Rd
    ├── extractFiles.Rd
    ├── genAnnoReport.Rd
    ├── genStatReport.Rd
    ├── generate10Xdata.Rd
    ├── getCellTypeColor.Rd
    ├── getDefaultColors.Rd
    ├── getDefaultGeneSets.Rd
    ├── getDefaultMarkers.Rd
    ├── getTumorCluster.Rd
    ├── ggplot_config.Rd
    ├── markerPlot.Rd
    ├── plotCellInteraction.Rd
    ├── plotExprProgram.Rd
    ├── plotGeneSet.Rd
    ├── plotMalignancy.Rd
    ├── plotSeurat.Rd
    ├── pointDRPlot.Rd
    ├── predCellType.Rd
    ├── prepareData.Rd
    ├── prepareSeurat.Rd
    ├── runCellClassify.Rd
    ├── runCellCycle.Rd
    ├── runCellInteraction.Rd
    ├── runDoublet.Rd
    ├── runExprProgram.Rd
    ├── runGeneSets.Rd
    ├── runMalignancy.Rd
    ├── runScAnnotation.Rd
    ├── runScCombination.Rd
    ├── runScStatistics.Rd
    ├── runSeurat.Rd
    ├── runStemness.Rd
    └── runSurvival.Rd
├── scCancer.Rproj
└── vignettes
    ├── .gitignore
    └── scCancer.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^doc$
4 | ^Meta$
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | inst/doc
2 | doc
3 | Meta
4 | .Rproj.user
5 | .Rhistory
6 | rprof.log


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: scCancer
 2 | Type: Package
 3 | Title: A package for automated processing of single cell RNA-seq data in cancer
 4 | Version: 2.2.1
 5 | Author: G-Lab
 6 | Maintainer: Wenbo Guo <gwb17@mails.tsinghua.edu.cn>
 7 | Description: The package 'scCancer' is focuses on processing and analyzing 
 8 |     scRNA-seq data for cancer research. Except routine data processing steps, 
 9 |     it also integrates several cancer-specific analyses. For example, 
10 |     more comprehensive quality control, cancer microenvironment cell types 
11 |     prediction, cell malignancy estimation, gene set signature scores calculation,
12 |     and expression programs identification. After all steps, the package can
13 |     generate a user-friendly graphic report. 
14 | License: GPL-3
15 | Encoding: UTF-8
16 | LazyData: true
17 | RoxygenNote: 7.1.1
18 | Suggests: 
19 |     rmarkdown,
20 |     DropletUtils
21 | VignetteBuilder: knitr
22 | Imports: 
23 |     cowplot,
24 |     dplyr,
25 |     ggExtra,
26 |     ggplot2,
27 |     grid,
28 |     gridExtra,
29 |     GSVA,
30 |     knitr,
31 |     markdown,
32 |     Matrix,
33 |     methods,
34 |     NNLM,
35 |     pheatmap,
36 |     R.utils,
37 |     reshape2,
38 |     scds,
39 |     Seurat (>= 3.0),
40 |     SingleCellExperiment,
41 |     SoupX,
42 |     stringr,
43 |     survival,
44 |     survminer,
45 |     harmony,
46 |     liger
47 | Depends: R (>= 3.5.0)
48 | biocViews:
49 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(Read10Xdata)
 4 | export(checkAnnoArguments)
 5 | export(checkCombArguments)
 6 | export(checkStatArguments)
 7 | export(clusterBarPlot)
 8 | export(extractFiles)
 9 | export(genAnnoReport)
10 | export(genStatReport)
11 | export(generate10Xdata)
12 | export(getCellTypeColor)
13 | export(getDefaultColors)
14 | export(getDefaultGeneSets)
15 | export(getDefaultMarkers)
16 | export(getTumorCluster)
17 | export(ggplot_config)
18 | export(markerPlot)
19 | export(plotCellInteraction)
20 | export(plotExprProgram)
21 | export(plotGeneSet)
22 | export(plotMalignancy)
23 | export(plotSeurat)
24 | export(pointDRPlot)
25 | export(predCellType)
26 | export(prepareData)
27 | export(prepareSeurat)
28 | export(runCellClassify)
29 | export(runCellCycle)
30 | export(runCellInteraction)
31 | export(runDoublet)
32 | export(runExprProgram)
33 | export(runGeneSets)
34 | export(runMalignancy)
35 | export(runScAnnotation)
36 | export(runScCombination)
37 | export(runScStatistics)
38 | export(runSeurat)
39 | export(runStemness)
40 | export(runSurvival)
41 | import(Matrix)
42 | import(R.utils)
43 | import(Seurat)
44 | import(SoupX)
45 | import(ggplot2)
46 | import(harmony)
47 | import(knitr)
48 | import(liger)
49 | import(survival)
50 | import(survminer)
51 | importFrom(GSVA,gsva)
52 | importFrom(NNLM,nnmf)
53 | importFrom(SingleCellExperiment,SingleCellExperiment)
54 | importFrom(cowplot,get_legend)
55 | importFrom(cowplot,plot_grid)
56 | importFrom(dplyr,"%>%")
57 | importFrom(dplyr,group_by)
58 | importFrom(dplyr,top_n)
59 | importFrom(ggExtra,ggMarginal)
60 | importFrom(grDevices,boxplot.stats)
61 | importFrom(grDevices,colorRampPalette)
62 | importFrom(grid,grid.draw)
63 | importFrom(grid,grid.newpage)
64 | importFrom(grid,unit.c)
65 | importFrom(gridExtra,arrangeGrob)
66 | importFrom(gridExtra,grid.arrange)
67 | importFrom(markdown,markdownToHTML)
68 | importFrom(methods,as)
69 | importFrom(pheatmap,pheatmap)
70 | importFrom(reshape2,melt)
71 | importFrom(scds,bcds)
72 | importFrom(scds,cxds)
73 | importFrom(stats,cor)
74 | importFrom(stats,density)
75 | importFrom(stats,filter)
76 | importFrom(stats,median)
77 | importFrom(stats,quantile)
78 | importFrom(stats,sd)
79 | importFrom(stringr,str_c)
80 | importFrom(utils,read.delim)
81 | importFrom(utils,read.table)
82 | importFrom(utils,write.csv)
83 | importFrom(utils,write.table)
84 | 


--------------------------------------------------------------------------------
/R/cnvFunction.R:
--------------------------------------------------------------------------------
  1 | prepareCNV <- function(expr.data,
  2 |                        gene.manifest,
  3 |                        cell.annotation,
  4 |                        ref.data = NULL,
  5 |                        species = "human",
  6 |                        genome = "hg19",
  7 |                        hg.mm.mix = F){
  8 |     ## gene.chr
  9 |     if(species == "human"){
 10 |         if(genome == "hg38"){
 11 |             gene.chr <- read.table(system.file("txt", "gene-chr-hg38.txt", package = "scCancer"),
 12 |                                    col.names = c("EnsemblID", "CHR", "C_START", "C_STOP"),
 13 |                                    stringsAsFactors = F)
 14 |         }else if(genome == "hg19"){
 15 |             gene.chr <- read.table(system.file("txt", "gene-chr-hg19.txt", package = "scCancer"),
 16 |                                    col.names = c("EnsemblID", "CHR", "C_START", "C_STOP"),
 17 |                                    stringsAsFactors = F)
 18 |         }else{
 19 |             stop("Error in 'runInferCNV': ", genome, " is not allowed for 'genome'.\n")
 20 |         }
 21 |     }else if(species == "mouse"){
 22 |         if(genome == "mm10"){
 23 |             gene.chr <- read.table(system.file("txt", "gene-chr-mm10.txt", package = "scCancer"),
 24 |                                    col.names = c("EnsemblID", "CHR", "C_START", "C_STOP"),
 25 |                                    stringsAsFactors = F)
 26 |         }else{
 27 |             stop("Error in 'runInferCNV': ", genome, " is not allowed for 'genome'.\n")
 28 |         }
 29 |     }else{
 30 |         stop("Error in 'runInferCNV': ", species, " is not allowed for 'species'.\n")
 31 |     }
 32 | 
 33 |     ## reference.data
 34 |     if(is.null(ref.data)){
 35 |         if(species == "human"){
 36 |             ref.data <- readRDS(system.file("rds", "cnvRef_Data-HM.RDS", package = "scCancer"))
 37 |         }else if(species == "mouse"){
 38 |             ref.data <- readRDS(system.file("rds", "cnvRef_Data-boneMarrow-MS.RDS", package = "scCancer"))
 39 |         }
 40 |     }
 41 |     ref.anno <- data.frame(cellName = colnames(ref.data),
 42 |                            cellAnno = "Reference",
 43 |                            stringsAsFactors = F)
 44 | 
 45 |     ## combine data
 46 |     com.genes <- intersect(rownames(expr.data), rownames(ref.data))
 47 |     ref.data <- ref.data[com.genes, ]
 48 |     expr.data <- expr.data[com.genes, ]
 49 |     expr.data <- cbind(as.matrix(expr.data), ref.data)
 50 |     rownames(expr.data) <- gene.manifest[rownames(expr.data), ]$EnsemblID
 51 | 
 52 |     ## combine cell.anno
 53 |     cell.anno <- data.frame(cellName = cell.annotation$barcodes,
 54 |                             cellAnno = cell.annotation$Cell.Type,
 55 |                             stringsAsFactors = F)
 56 |     cell.anno <- rbind(cell.anno, ref.anno)
 57 |     rownames(cell.anno) <- cell.anno$cellName
 58 | 
 59 |     ## common genes between expr.data and gene.chr
 60 |     com.genes <- intersect(rownames(expr.data), gene.chr$EnsemblID)
 61 |     gene.chr <- subset(gene.chr, EnsemblID %in% com.genes)
 62 | 
 63 |     # gene.chr <- gene.chr[with(gene.chr, order(CHR, C_START, C_STOP)), ]
 64 | 
 65 |     expr.data <- expr.data[gene.chr$EnsemblID, ]
 66 |     rownames(gene.chr) <- gene.chr$EnsemblID
 67 | 
 68 |     return(list(expr.data = expr.data,
 69 |                 gene.chr = gene.chr,
 70 |                 cell.anno = cell.anno))
 71 | }
 72 | 
 73 | 
 74 | 
 75 | rmGeneForCNV <- function(cnvList, cutoff = 0.1, minCell = 3){
 76 |     gene.mean <- Matrix::rowMeans(cnvList$expr.data)
 77 |     gene.sum <- Matrix::rowSums(cnvList$expr.data > 0)
 78 |     genes.sel <- rownames(cnvList$expr.data)[gene.mean >= cutoff & gene.sum >= minCell]
 79 | 
 80 |     cnvList$expr.data <- cnvList$expr.data[genes.sel, ]
 81 |     cnvList$gene.chr <- cnvList$gene.chr[genes.sel, ]
 82 | 
 83 |     return(cnvList)
 84 | }
 85 | 
 86 | 
 87 | normalizeDataForCNV <- function(cnvList){
 88 |     expr.data <- cnvList$expr.data
 89 | 
 90 |     cs = Matrix::colSums(expr.data)
 91 |     expr.data <- t(t(expr.data) / cs)
 92 |     normalize_factor <- 10^round(log10(mean(cs)))
 93 |     expr.data <- expr.data * normalize_factor
 94 | 
 95 |     cnvList$expr.data <- expr.data
 96 |     return(cnvList)
 97 | }
 98 | 
 99 | 
100 | 
101 | anscombeTransform <- function(cnvList){
102 |     cnvList$expr.data <- 2 * sqrt(cnvList$expr.data + 3/8)
103 |     return(cnvList)
104 | }
105 | 
106 | 
107 | 
108 | logForCNV <- function(cnvList){
109 |     cnvList$expr.data <- log2(cnvList$expr.data + 1)
110 |     return(cnvList)
111 | }
112 | 
113 | 
114 | 
115 | getAverageBounds <- function(cnvList){
116 |     lower.bound <- mean(apply(cnvList$expr.data, 2, min))
117 |     upper.bound <- mean(apply(cnvList$expr.data, 2, max))
118 |     threshold = mean(abs(c(lower.bound, upper.bound)))
119 |     return(threshold)
120 | }
121 | 
122 | 
123 | 
124 | boundForCNV <- function(cnvList, threshold){
125 |     cnvList$expr.data[cnvList$expr.data > threshold] <- threshold
126 |     cnvList$expr.data[cnvList$expr.data < (-1 * threshold)] <- -1 * threshold
127 |     return(cnvList)
128 | }
129 | 
130 | 
131 | 
132 | smoothOne <- function(ori.data, window.len = window.len){
133 |     half.window <- (window.len - 1) / 2
134 | 
135 |     pad.data <- c(rep(0, half.window), ori.data, rep(0, half.window))
136 |     bool.data <- c(rep(0, half.window), rep(1, length(ori.data)), rep(0, half.window))
137 | 
138 |     kernel.vec <- c(1:half.window, half.window + 1, half.window:1)
139 | 
140 |     sum.data <- filter(pad.data, kernel.vec, sides = 2)
141 |     num.data <- filter(bool.data, kernel.vec, sides = 2)
142 |     sum.data <- sum.data[!is.na(sum.data)]
143 |     num.data <- num.data[!is.na(num.data)]
144 | 
145 |     smo.data <- sum.data / num.data
146 |     return(smo.data)
147 | }
148 | 
149 | 
150 | smoothByChr <- function(cnvList, window.len = 101){
151 |     chrList <- cnvList$gene.chr$CHR
152 |     chrs <- as.character(unique(cnvList$gene.chr$CHR))
153 | 
154 |     if(window.len < 2){
155 |         cat("- Warning in 'smoothBychr': Window length < 2, returning original data.\n")
156 |         return(cnvList)
157 |     }
158 | 
159 |     expr.data <- cnvList$expr.data
160 |     for(chr in chrs) {
161 |         # print(chr)
162 |         cur.genes.ix <- which(chrList == chr)
163 |         cur.data <- expr.data[cur.genes.ix, , drop=F]
164 | 
165 |         if(length(cur.genes.ix) > 1) {
166 |             if(window.len %% 2 == 0){
167 |                 window.len = window.len + 1
168 |                 cat("- Warning in 'smoothBychr': Window length is even, adding one to 'window.len'.\n")
169 |             }
170 | 
171 |             smooth.data <- apply(cur.data, 2, smoothOne, window.len = window.len)
172 |             rownames(smooth.data) <- rownames(cur.data)
173 |             colnames(smooth.data) <- colnames(cur.data)
174 | 
175 |             expr.data[cur.genes.ix, ] <- smooth.data
176 |         }
177 |     }
178 |     cnvList$expr.data <- expr.data
179 |     return(cnvList)
180 | }
181 | 
182 | 
183 | 
184 | centerAcrossChr <- function(cnvList, method = "median"){
185 |     expr.data <- cnvList$expr.data
186 |     if (method == "median") {
187 |         row_median <- apply(expr.data, 2, function(x) { median(x, na.rm=T) } )
188 |         expr.data <- t(apply(expr.data, 1, "-", row_median))
189 |     }
190 |     else {
191 |         row_means <- apply(expr.data, 2, function(x) { mean(x, na.rm=T) } )
192 |         expr.data <- t(apply(expr.data, 1, "-", row_means))
193 |     }
194 |     cnvList$expr.data <- expr.data
195 |     return(cnvList)
196 | }
197 | 
198 | 
199 | 
200 | subtractRefExpr <- function(cnvList, inv_log = TRUE){
201 |     ref.cellNames <- subset(cnvList$cell.anno, cellAnno == "Reference")$cellName
202 | 
203 |     if (inv_log) {
204 |         ref.means = log2(Matrix::rowMeans(2^cnvList$expr.data[, ref.cellNames] - 1) + 1)
205 |     } else {
206 |         ref.means = Matrix::rowMeans(cnvList$expr.data[, ref.cellNames])
207 |     }
208 | 
209 |     cnvList$expr.data <- cnvList$expr.data - ref.means
210 |     return(cnvList)
211 | }
212 | 
213 | 
214 | invertLog2 <- function(cnvList){
215 |     cnvList$expr.data <- 2^cnvList$expr.data
216 |     return(cnvList)
217 | }
218 | 
219 | 
220 | 
221 | denoiseByRefMeanSd <- function(cnvList, sd_amplifier=1.5){
222 |     expr.data <- cnvList$expr.data
223 |     ref.cellNames <- subset(cnvList$cell.anno, cellAnno == "Reference")$cellName
224 | 
225 |     mean.ref.vals <- mean(expr.data[, ref.cellNames])
226 |     mean.ref.sd <- mean(apply(expr.data[, ref.cellNames], 2, function(x) sd(x, na.rm=T))) * sd_amplifier
227 | 
228 |     up.bound <- mean.ref.vals + mean.ref.sd
229 |     low.bound <- mean.ref.vals - mean.ref.sd
230 | 
231 |     expr.data[expr.data > low.bound & expr.data < up.bound] <- mean.ref.vals
232 | 
233 |     cnvList$expr.data <- expr.data
234 | 
235 |     return(cnvList)
236 | }
237 | 
238 | 
239 | 
240 | removeOutliers <- function(cnvList){
241 |     expr.data <- cnvList$expr.data
242 |     up.bound <- mean(apply(expr.data, 2, max))
243 |     low.bound <- mean(apply(expr.data, 2, min))
244 | 
245 |     expr.data[expr.data < low.bound] <- low.bound
246 |     expr.data[expr.data > up.bound] <- up.bound
247 | 
248 |     cnvList$expr.data <- expr.data
249 | 
250 |     return(cnvList)
251 | }
252 | 
253 | 
254 | 
255 | runCNV <- function(expr.data,
256 |                    gene.manifest,
257 |                    cell.annotation,
258 |                    cutoff = 0.1, minCell = 3,
259 |                    ref.data = NULL,
260 |                    species = "human",
261 |                    genome = "hg19",
262 |                    hg.mm.mix = F){
263 | 
264 |     cnvList <- prepareCNV(expr.data = expr.data,
265 |                           gene.manifest = gene.manifest,
266 |                           cell.annotation,
267 |                           ref.data = ref.data,
268 |                           species = species,
269 |                           genome = genome,
270 |                           hg.mm.mix = hg.mm.mix)
271 |     cnvList <- rmGeneForCNV(cnvList, cutoff = cutoff, minCell = minCell)
272 |     cnvList <- normalizeDataForCNV(cnvList)
273 |     cnvList <- anscombeTransform(cnvList)
274 |     cnvList <- logForCNV(cnvList)
275 |     threshold <- getAverageBounds(cnvList)
276 |     cnvList <- boundForCNV(cnvList, threshold)
277 |     cnvList <- smoothByChr(cnvList, window.len = 101)
278 |     cnvList <- centerAcrossChr(cnvList, method = "median")
279 |     cnvList <- subtractRefExpr(cnvList)
280 |     cnvList <- invertLog2(cnvList)
281 |     cnvList <- denoiseByRefMeanSd(cnvList, sd_amplifier = 1.0)
282 |     cnvList <- removeOutliers(cnvList)
283 | 
284 |     return(cnvList)
285 | }
286 | 
287 | 
288 | getMalignScore <- function(cnvList, cell.type = "Observation", method = "smooth", adjMat = NULL){
289 |     if(cell.type == "Observation"){
290 |         cell.names <- subset(cnvList$cell.anno, cellAnno != "Reference")$cellName
291 |     }else if(cell.type == "Reference"){
292 |         cell.names <- subset(cnvList$cell.anno, cellAnno == "Reference")$cellName
293 |     }
294 | 
295 |     cur.data <- cnvList$expr.data[, cell.names]
296 | 
297 |     if(is.null(adjMat) & method == "smooth"){
298 |         cat("- Warning in 'getMalignScore': Adjacent matrix is not provided, and use 'direct' method instead.\n")
299 |         method <- "direct"
300 |     }
301 |     if(method == "smooth"){
302 |         thres <- quantile(adjMat@x, 1- (dim(adjMat)[1] * 10 / length(adjMat@x)))
303 | 
304 |         indexes <- as.matrix((adjMat > thres) + 0)
305 |         tt <- 0.5 / (rowSums(indexes) - 1)
306 |         tt[is.infinite(tt)] <- 0
307 | 
308 |         indexes <- indexes * tt
309 |         indexes <- indexes * (1 - diag(rep(1, dim(indexes)[1])))
310 |         diagValue <- rep(0.5, dim(indexes)[1])
311 |         diagValue[tt == 0] <- 1
312 | 
313 |         indexes <- t(indexes + diag(diagValue))
314 | 
315 |         new.cur.data <- as.matrix(cur.data) %*% indexes
316 |         malignScore <- colSums((new.cur.data - 1)^2)
317 |         malignScore <- malignScore / dim(new.cur.data)[1]
318 | 
319 |     }else if(method == "direct"){
320 |         malignScore <- colSums((cur.data - 1)^2)
321 |         malignScore <- malignScore / dim(cur.data)[1]
322 |     }
323 | 
324 |     names(malignScore) <- colnames(cur.data)
325 | 
326 |     return(malignScore)
327 | }
328 | 
329 | 
330 | 
331 | malignPlot <- function(obserScore, referScore, malign.thres = NULL){
332 |     scoreDF <- data.frame(malignScore = c(obserScore, referScore),
333 |                           sets = c(rep("Observation", length(obserScore)),
334 |                                    rep("Reference", length(referScore))))
335 |     p <- ggplot() +
336 |         geom_histogram(data = subset(scoreDF, sets == "Observation"),
337 |                        mapping = aes(x = malignScore, fill = "Observation"),
338 |                        bins = 150, alpha = 0.6) +
339 |         geom_histogram(data = subset(scoreDF, sets == "Reference"),
340 |                        mapping = aes(x = malignScore, fill = "Reference"),
341 |                        bins = 150, alpha = 0.6) +
342 |         labs(x = "Malignancy score", y = "Droplets count") +
343 |         scale_fill_manual(name = "Cells sets", guide = "legend",
344 |                           values = c("Observation"="#2e68b7", "Reference"="grey")) +
345 |         theme_classic() +
346 |         ggplot_config(base.size = 7) +
347 |         theme(legend.justification = c(1.12,1.12), legend.position = c(1,1))
348 |     if(!is.null(malign.thres)){
349 |         p <- p + geom_vline(xintercept = malign.thres, colour = "red", linetype = "dashed")
350 |     }
351 |     return(p)
352 | }
353 | 
354 | 
355 | 
356 | getBimodalThres <- function(scores){
357 |     x.density <- density(scores)
358 |     d.x.density <- diff(x.density$y)
359 |     d.sign <- (d.x.density > 0) + 0
360 | 
361 |     ext.pos <- which(d.sign[2:length(d.sign)] - d.sign[1:(length(d.sign)-1)] != 0)
362 |     ext.density <- x.density$y[ext.pos]
363 |     y.max <- max(ext.density)
364 |     if(length(ext.pos) >= 3){
365 |         del.ix <- c()
366 |         for(ei in 2:length(ext.density)){
367 |             if(abs(ext.density[ei] - ext.density[ei - 1]) < y.max * 0.001){
368 |                 del.ix <- c(del.ix, ei - 1, ei)
369 |             }
370 |         }
371 |         sel.ix <- !(1:length(ext.density) %in% unique(del.ix))
372 |         ext.density <- ext.density[sel.ix]
373 |         ext.pos <- ext.pos[sel.ix]
374 |     }
375 | 
376 |     if(length(ext.pos) >= 3){
377 |         t.ext.density <- c(0, ext.density, 0)
378 |         ext.height <- sapply(2:(length(ext.pos) + 1), FUN = function(x){
379 |             return(min(abs(t.ext.density[x] - t.ext.density[x-1]), abs(t.ext.density[x] - t.ext.density[(x+1)])))
380 |         })
381 |         ext <- data.frame(x = ext.pos, y = ext.density, height = ext.height)
382 |         max.ix <- order(ext.density, decreasing = T)
383 |         if(ext.height[max.ix[2]] / ext.height[max.ix[1]] > 0.01){
384 |             cut.df <- ext[c(max.ix[2]:max.ix[1]), ]
385 |             threshold <- x.density$x[cut.df[which.min(cut.df$y), ]$x]
386 |         }else{
387 |             threshold <- NULL
388 |         }
389 |     }else{
390 |         threshold <- NULL
391 |     }
392 | 
393 |     return(threshold)
394 | }
395 | 
396 | 
397 | #
398 | # getBimodalThres <- function(scores){
399 | #     x.density <- density(scores)
400 | #     d.x.density <- diff(x.density$y)
401 | #     d.sign <- (d.x.density > 0) + 0
402 | #
403 | #     ext.pos <- which(d.sign[2:length(d.sign)] - d.sign[1:(length(d.sign)-1)] != 0)
404 | #     if(length(ext.pos) >= 3){
405 | #         ext.density <- x.density$y[ext.pos]
406 | #         t.ext.density <- c(0, ext.density, 0)
407 | #         ext.height <- sapply(2:(length(ext.pos) + 1), FUN = function(x){
408 | #             return(min(abs(t.ext.density[x] - t.ext.density[x-1]), abs(t.ext.density[x] - t.ext.density[(x+1)])))
409 | #         })
410 | #         ext <- data.frame(x = ext.pos, y = ext.density, height = ext.height)
411 | #
412 | #         max.ix <- order(ext.density, decreasing = T)
413 | #         if(ext.height[max.ix[2]] / ext.height[max.ix[1]] > 0.1){
414 | #             cut.df <- ext[c(max.ix[2]:max.ix[1]), ]
415 | #             threshold <- x.density$x[cut.df[which.min(cut.df$y), ]$x]
416 | #         }else{
417 | #             threshold <- NULL
418 | #         }
419 | #     }else{
420 | #         threshold <- NULL
421 | #     }
422 | #     return(threshold)
423 | # }
424 | 
425 | 
426 | 
427 | #' plotMalignancy
428 | #'
429 | #' @param cell.annotation A data.frame of cells' annotation containing the cells'
430 | #' malignancy score (`Malign.score`) and type (`Malign.type`).
431 | #' @inheritParams runScAnnotation
432 | #'
433 | #' @return A plot list.
434 | #' @export
435 | #'
436 | plotMalignancy <- function(cell.annotation,
437 |                            coor.names = c("tSNE_1", "tSNE_2"),
438 |                            savePath = NULL){
439 |     ## scatter plot of malignancy
440 |     p.malignType.Point <- pointDRPlot(cell.annotation, value = "Malign.type",
441 |                                       coor.names = coor.names,
442 |                                       colors = c("malignant" = "#f57e87", "nonMalignant" = "#66d5a5"),
443 |                                       legend.position = "right",
444 |                                       legend.title = "Malignancy\n type")
445 | 
446 |     p.malignScore.Point <- pointDRPlot(cell.annotation, value = "Malign.score",
447 |                                        coor.names = coor.names,
448 |                                        colors = c("white", "#f57e87"),
449 |                                        discrete = F,
450 |                                        limit.quantile = 0.1,
451 |                                        legend.position = "right",
452 |                                        legend.title = "Malignancy\n score")
453 | 
454 |     p.malignType.bar <- clusterBarPlot(cell.annotation = cell.annotation,
455 |                                        cell.colors = c("malignant" = "#f57e87", "nonMalignant" = "#66d5a5"),
456 |                                        sel.col = "Malign.type",
457 |                                        legend.title = "Malignancy type")
458 | 
459 |     ## save
460 |     if(!is.null(savePath)){
461 |         ggsave(filename = file.path(savePath, "figures/malignType-point.png"),
462 |                p.malignType.Point, width = 5, height = 3.8, dpi = 300)
463 |         ggsave(filename = file.path(savePath, "figures/malignScore-point.png"),
464 |                p.malignScore.Point, width = 5, height = 3.8, dpi = 300)
465 |         ggsave(filename = file.path(savePath, "figures/malignType-bar.png"),
466 |                p.malignType.bar, width = 6, height = 3, dpi = 300)
467 |     }
468 | 
469 |     return(list(p.malignType.Point = p.malignType.Point,
470 |                 p.malignScore.Point = p.malignScore.Point,
471 |                 p.malignType.bar = p.malignType.bar))
472 | }
473 | 
474 | 
475 | 
476 | 
477 | #' runMalignancy
478 | #'
479 | #' @param expr A Seurat object.
480 | #' @param gene.manifest A data.frame of genes' manifest.
481 | #' @param cell.annotation A data.frame of cells' annotation.
482 | #' @param cutoff The cut-off for min average read counts per gene among
483 | #' reference cells. The default is 0.1.
484 | #' @param minCell An integer number used to filter gene. The default is 3.
485 | #' @param p.value.cutoff The p-value to decide whether the distribution of
486 | #' malignancy score is bimodality.
487 | #' @param ref.data An expression matrix of gene by cell, which is used as the normal reference.
488 | #' The default is NULL, and an immune cells or bone marrow cells expression matrix will be used for human or mouse species, respectively.
489 | #' @param referAdjMat An adjacent matrix for the normal reference data.
490 | #' The larger the value, the closer the cell pair is.
491 | #' The default is NULL, and a SNN matrix of the default ref.data will be used.
492 | #' @inheritParams runScAnnotation
493 | #'
494 | #' @return A list of cnvList, reference malignancy score, seurat object,
495 | #' cell.annotatino, bimodal.pvalue, malign.thres, and all generated plots.
496 | #' @export
497 | #'
498 | runMalignancy <- function(expr,
499 |                           gene.manifest,
500 |                           cell.annotation,
501 |                           savePath,
502 |                           cutoff = 0.1, minCell = 3,
503 |                           p.value.cutoff = 0.5,
504 |                           coor.names = c("tSNE_1", "tSNE_2"),
505 |                           ref.data = NULL,
506 |                           referAdjMat = NULL,
507 |                           species = "human",
508 |                           genome = "hg19",
509 |                           hg.mm.mix = F){
510 |     if(!dir.exists(file.path(savePath, 'malignancy/'))){
511 |         dir.create(file.path(savePath, 'malignancy/'), recursive = T)
512 |     }
513 | 
514 |     expr.data <- expr@assays$RNA@counts
515 |     cnvList <- runCNV(expr.data = expr.data,
516 |                       gene.manifest = gene.manifest,
517 |                       cell.annotation = cell.annotation,
518 |                       cutoff = cutoff, minCell = minCell,
519 |                       ref.data = ref.data,
520 |                       species = species,
521 |                       genome = genome,
522 |                       hg.mm.mix = hg.mm.mix)
523 | 
524 |     if(is.null(ref.data)){
525 |         if(species == "human"){
526 |             referAdjMat <- readRDS(system.file("rds", "cnvRef_SNN-HM.RDS", package = "scCancer"))
527 |         }else if(species == "mouse"){
528 |             referAdjMat <- readRDS(system.file("rds", "cnvRef_SNN-boneMarrow-MS.RDS", package = "scCancer"))
529 |         }
530 |     }
531 |     referScore.smooth <- getMalignScore(cnvList, "Reference", method = "smooth", adjMat = referAdjMat)
532 |     obserScore.smooth <- getMalignScore(cnvList, "Observation", method = "smooth",
533 |                                         adjMat = expr@graphs$RNA_snn)
534 |     up.refer <- quantile(referScore.smooth, 0.995)
535 |     low.refer <- quantile(referScore.smooth, 0.005)
536 |     referScore.smooth <- (referScore.smooth - low.refer) / (up.refer - low.refer)
537 |     obserScore.smooth <- (obserScore.smooth - low.refer) / (up.refer - low.refer)
538 | 
539 |     all.thres <- getBimodalThres(scores = c(referScore.smooth, obserScore.smooth))
540 |     malign.thres <- getBimodalThres(scores = obserScore.smooth)
541 | 
542 |     ju.exist.malign <- !is.null(all.thres) | !is.null(malign.thres)
543 | 
544 |     ## malignancy type
545 |     if(!is.null(all.thres)){
546 |         malign.type <- rep("malignant", length(obserScore.smooth))
547 |         names(malign.type) <- names(obserScore.smooth)
548 |         if(!is.null(malign.thres)){
549 |             malign.type[names(obserScore.smooth)[obserScore.smooth < malign.thres]] <- "nonMalignant"
550 |         }
551 |     }else{
552 |         malign.type <- rep("nonMalignant", length(obserScore.smooth))
553 |         names(malign.type) <- names(obserScore.smooth)
554 |         if(!is.null(malign.thres)){
555 |             malign.type[names(obserScore.smooth)[obserScore.smooth >= malign.thres]] <- "malignant"
556 |         }
557 |     }
558 |     p.malignScore <- malignPlot(obserScore.smooth, referScore.smooth,
559 |                                 malign.thres = malign.thres)
560 | 
561 |     ## add score and type to cell.annotation
562 |     cell.annotation$Malign.score <- obserScore.smooth[rownames(cell.annotation)]
563 |     cell.annotation$Malign.type <- malign.type[rownames(cell.annotation)]
564 |     expr[["Malign.score"]] <- cell.annotation$Malign.score
565 |     expr[["Malign.type"]] <- cell.annotation$Malign.type
566 | 
567 |     ## plot
568 |     p.results <- plotMalignancy(cell.annotation = cell.annotation,
569 |                                 coor.names = coor.names,
570 |                                 savePath = savePath)
571 |     p.results[["p.malignScore"]] <- p.malignScore
572 |     ggsave(filename = file.path(savePath, "figures/malignScore.png"),
573 |            p.malignScore, width = 5, height = 4, dpi = 300)
574 | 
575 |     ## save results
576 |     write.table(cnvList$expr.data[, names(obserScore.smooth)],
577 |                 file = file.path(savePath, "malignancy/inferCNV-observation.txt"),
578 |                 quote = F, sep = "\t", row.names = T)
579 |     write.table(cnvList$expr.data[, names(referScore.smooth)],
580 |                 file = file.path(savePath, "malignancy/inferCNV-reference.txt"),
581 |                 quote = F, sep = "\t", row.names = T)
582 |     write.table(data.frame(referScore.smooth),
583 |                 file = file.path(savePath, "malignancy/refer-malignScore.txt"),
584 |                 quote = F, sep = "\t", row.names = T)
585 | 
586 |     results <- list(
587 |         cnvList = cnvList,
588 |         referScore = referScore.smooth,
589 |         expr = expr,
590 |         cell.annotation = cell.annotation,
591 |         ju.exist.malign = ju.exist.malign,
592 |         # bimodal.pvalue = bimodal.pvalue,
593 |         malign.thres = malign.thres,
594 |         p.results = p.results
595 |     )
596 |     return(results)
597 | }
598 | 
599 | 
600 | 


--------------------------------------------------------------------------------
/R/scCombination.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' runScCombination
  3 | #'
  4 | #' Perform multi-samples analyses.
  5 | #'
  6 | #' @param single.savePaths A vecotr of paths containing the results files of step 'runScAnnotation' for each sample.
  7 | #' @param sampleNames A vector of labels for all samples.
  8 | #' @param combName A label for the combined samples.
  9 | #' @param comb.method The method to combine samples. The default is "NormalMNN". "Harmony", "NormalMNN", "SeuratMNN", "Raw", "Regression" and "LIGER" are optional.
 10 | #' @param harmony.theta The parameter 'theta' of function "RunHarmony" in the harmony package.
 11 | #' @param harmony.lambda The parameter 'lambda' of function "RunHarmony" in the harmony package.
 12 | #' @param harmony.sigma The parameter 'sigma' of function "RunHarmony" in the harmony package.
 13 | #' @param sample.colors The colors used for samples. The default is NULL, and the pre-set colors will be used.
 14 | #' @inheritParams runScAnnotation
 15 | #'
 16 | #' @return A results list with all useful objects used in the function.
 17 | #' @export
 18 | #'
 19 | #' @import harmony liger
 20 | #'
 21 | runScCombination <- function(single.savePaths, sampleNames, savePath, combName,
 22 |                              authorName = NULL,
 23 |                              comb.method = "NormalMNN",
 24 |                              harmony.theta = NULL,
 25 |                              harmony.lambda = NULL,
 26 |                              harmony.sigma = 0.1,
 27 |                              vars.to.regress = c("nCount_RNA", "mito.percent", "ribo.percent"),
 28 |                              pc.use = 30,
 29 |                              resolution = 0.8,
 30 |                              clusterStashName = "comb.cluster",
 31 |                              show.features = NULL, bool.add.features = T,
 32 |                              bool.runDiffExpr = T,
 33 |                              n.markers = 5,
 34 |                              sample.colors = NULL,
 35 |                              species = "human",
 36 |                              genome = "hg19",
 37 |                              hg.mm.mix = F,
 38 |                              bool.runCellClassify = T,
 39 |                              ct.templates = NULL,
 40 |                              coor.names = c("tSNE_1", "tSNE_2"),
 41 |                              bool.runMalignancy = T,
 42 |                              cnv.ref.data = NULL,
 43 |                              cnv.referAdjMat = NULL,
 44 |                              cutoff = 0.1,
 45 |                              p.value.cutoff = 0.5,
 46 |                              bool.intraTumor = T,
 47 |                              bool.runCellCycle = T,
 48 |                              bool.runStemness = T,
 49 |                              bool.runGeneSets = T,
 50 |                              geneSets = NULL,
 51 |                              geneSet.method = "average",
 52 |                              bool.runExprProgram = T,
 53 |                              nmf.rank = 50,
 54 |                              genReport = T){
 55 | 
 56 |     message("[", Sys.time(), "] START: RUN ScCombination")
 57 |     results <- as.list(environment())
 58 |     checkCombArguments(results)
 59 | 
 60 |     if(species == "mouse" & genome == "hg19"){
 61 |         genome <- "mm10"
 62 |     }
 63 | 
 64 |     if(!dir.exists(file.path(savePath, "figures/"))){
 65 |         dir.create(file.path(savePath, "figures/"), recursive = T)
 66 |     }
 67 |     suppressWarnings( savePath <- normalizePath(savePath, "/") )
 68 |     results[["savePath"]] <- savePath
 69 | 
 70 | 
 71 |     message("[", Sys.time(), "] -----: sample data combination")
 72 |     expr.list <- list()
 73 |     sample.ident <- c()
 74 |     for(i in 1:length(sampleNames)){
 75 |         sampleName <- sampleNames[i]
 76 |         cur.path <- single.savePaths[i]
 77 |         print(sampleName)
 78 |         expr.list[[sampleName]] <- readRDS(paste0(cur.path, "/expr.RDS"))
 79 |         sample.ident <- c(sample.ident, rep(sampleName, dim(expr.list[[sampleName]])[2]))
 80 |     }
 81 |     sample.ident <- as.factor(sample.ident)
 82 | 
 83 |     bool.plotHVG = T
 84 |     if(comb.method == "SeuratMNN"){
 85 |         message("[", Sys.time(), "] -----: combine data by Seurat MNN")
 86 |         suppressWarnings( expr.anchors <- FindIntegrationAnchors(object.list = expr.list,
 87 |                                                                  dims = 1:pc.use) )
 88 |         expr <- IntegrateData(anchorset = expr.anchors,
 89 |                               dims = 1:pc.use, verbose = F)
 90 |         expr <- ScaleData(expr, verbose = FALSE)
 91 |         DefaultAssay(expr) <- "integrated"
 92 |         expr[["sample.ident"]] <- sample.ident
 93 |         bool.plotHVG = F
 94 | 
 95 |         saveRDS(expr.anchors@anchors, file = file.path(savePath, "anchors.RDS"))
 96 | 
 97 |     }else if(comb.method == "Raw"){
 98 |         message("[", Sys.time(), "] -----: combine raw matrix data")
 99 |         suppressWarnings( expr <- merge(expr.list[[1]], expr.list[2:length(expr.list)]) )
100 |         expr <- FindVariableFeatures(expr, selection.method = "vst", nfeatures = 2000, verbose = F)
101 |         expr <- ScaleData(object = expr, vars.to.regress = vars.to.regress, verbose = F)
102 |         expr[["sample.ident"]] <- sample.ident
103 | 
104 |     }else if(comb.method == "Regression"){
105 |         message("[", Sys.time(), "] -----: combine data and regress out sample source")
106 |         suppressWarnings( expr <- merge(expr.list[[1]], expr.list[2:length(expr.list)]) )
107 |         expr <- FindVariableFeatures(expr, selection.method = "vst", nfeatures = 2000, verbose = F)
108 |         expr[["sample.ident"]] <- sample.ident
109 |         expr <- ScaleData(object = expr,
110 |                           vars.to.regress = c("sample.ident", vars.to.regress),
111 |                           verbose = F)
112 | 
113 |     }else if(comb.method == "Harmony"){
114 |         message("[", Sys.time(), "] -----: combine data by Harmony")
115 | 
116 |         items <- unique(unlist(lapply(names(expr.list), function(x){
117 |             grep("^GS__", names(expr.list[[x]]@meta.data), value = T)
118 |         })))
119 |         items <- c("doublet.score", "Cell.Type", "Malign.score",
120 |                    "Malign.type", "CellCycle.score", "Stemness.score", items)
121 | 
122 |         ju.mat <- sapply(names(expr.list), function(x){
123 |             !(items %in% names(expr.list[[x]]@meta.data))
124 |         })
125 |         comb.metadata <- lapply(items[rowSums(ju.mat) == 0], function(x){
126 |             tmp <- do.call(c, lapply(names(expr.list), function(y){
127 |                 expr.list[[y]]@meta.data[[x]]
128 |             }))
129 |         })
130 |         names(comb.metadata) <- items[rowSums(ju.mat) == 0]
131 |         comb.metadata <- data.frame(comb.metadata)
132 | 
133 |         share.genes <- Reduce(intersect,  lapply(expr.list, rownames))
134 |         for(s.name in names(expr.list)){
135 |             expr.list[[s.name]] <- GetAssayData(expr.list[[s.name]], slot = "counts")[share.genes, ]
136 |         }
137 |         comb.data <- do.call(cbind, expr.list)
138 |         rm(expr.list)
139 | 
140 |         expr <- CreateSeuratObject(counts = comb.data,  min.cells = 5) %>%
141 |             Seurat::NormalizeData(verbose = FALSE) %>%
142 |             FindVariableFeatures(selection.method = "vst", nfeatures = 2000, verbose = F) %>%
143 |             ScaleData(verbose = FALSE) %>%
144 |             RunPCA(pc.genes = expr@var.genes, verbose = FALSE)
145 |         expr[["sample.ident"]] <- sample.ident
146 |         expr <- expr %>% RunHarmony("sample.ident", plot_convergence = TRUE,
147 |                                     theta = harmony.theta,
148 |                                     lambad = harmony.lambda,
149 |                                     sigma = harmony.sigma,
150 |                                     verbose = F)
151 | 
152 |         expr@meta.data <- cbind(expr@meta.data, comb.metadata)
153 | 
154 |         bool.plotHVG <- F
155 | 
156 |     }else if(comb.method == "LIGER"){
157 |         message("[", Sys.time(), "] -----: combine data by LIGER")
158 | 
159 |         items <- unique(unlist(lapply(names(expr.list), function(x){
160 |             grep("^GS__", names(expr.list[[x]]@meta.data), value = T)
161 |         })))
162 |         items <- c("doublet.score", "Cell.Type", "Malign.score",
163 |                    "Malign.type", "CellCycle.score", "Stemness.score", items)
164 | 
165 |         ju.mat <- sapply(names(expr.list), function(x){
166 |             !(items %in% names(expr.list[[x]]@meta.data))
167 |         })
168 |         comb.metadata <- lapply(items[rowSums(ju.mat) == 0], function(x){
169 |             tmp <- do.call(c, lapply(names(expr.list), function(y){
170 |                 expr.list[[y]]@meta.data[[x]]
171 |             }))
172 |         })
173 |         names(comb.metadata) <- items[rowSums(ju.mat) == 0]
174 |         comb.metadata <- data.frame(comb.metadata)
175 | 
176 |         for(e.i in 1:length(expr.list)){
177 |             s.name <- names(expr.list)[e.i]
178 |             expr.list[[s.name]] <- RenameCells(expr.list[[s.name]],
179 |                                                new.names = paste0(colnames(expr.list[[s.name]]), "-", e.i))
180 |             expr.list[[s.name]] <- GetAssayData(expr.list[[s.name]], slot = "counts")
181 |         }
182 |         expr = createLiger(expr.list)
183 |         expr = normalize(expr)
184 |         expr = selectGenes(expr, var.thresh = 0.1)
185 |         expr = scaleNotCenter(expr)
186 | 
187 |         expr = optimizeALS(expr, k = 20)
188 |         expr = quantileAlignSNF(expr)
189 |         expr = runTSNE(expr)
190 |         expr = ligerToSeurat(expr, use.liger.genes = T)
191 | 
192 |         expr = ScaleData(expr, verbose = FALSE)
193 |         expr[["sample.ident"]] <- sample.ident
194 |         expr@reductions$inmf@assay.used <- "RNA"
195 | 
196 |         expr@meta.data <- cbind(expr@meta.data, comb.metadata)
197 | 
198 |         bool.plotHVG = F
199 | 
200 |     }else if(comb.method == "NormalMNN"){
201 |         message("[", Sys.time(), "] -----: combine data by normal cell MNN")
202 |         suppressWarnings( expr.anchors <- FindIntegrationAnchors(object.list = expr.list,
203 |                                                                  dims = 1:pc.use) )
204 |         anchors <- expr.anchors@anchors
205 | 
206 |         anchors$cellType1 <- "NULL"
207 |         anchors$cellType2 <- "NULL"
208 |         anchors$malignType1 <- "NULL"
209 |         anchors$malignType2 <- "NULL"
210 |         anchors$malignScore1 <- -1
211 |         anchors$malignScore2 <- -1
212 |         for(oi in expr.anchors@reference.objects){
213 |             cur.ix <- which(anchors$dataset1 == oi)
214 |             anchors$cellType1[cur.ix] <- expr.list[[oi]]@meta.data$Cell.Type[anchors$cell1[cur.ix]]
215 |             anchors$malignType1[cur.ix] <- expr.list[[oi]]@meta.data$Malign.type[anchors$cell1[cur.ix]]
216 |             anchors$malignScore1[cur.ix] <- expr.list[[oi]]@meta.data$Malign.score[anchors$cell1[cur.ix]]
217 | 
218 |             cur.ix <- which(anchors$dataset2 == oi)
219 |             anchors$cellType2[cur.ix] <- expr.list[[oi]]@meta.data$Cell.Type[anchors$cell2[cur.ix]]
220 |             anchors$malignType2[cur.ix] <- expr.list[[oi]]@meta.data$Malign.type[anchors$cell2[cur.ix]]
221 |             anchors$malignScore2[cur.ix] <- expr.list[[oi]]@meta.data$Malign.score[anchors$cell2[cur.ix]]
222 |         }
223 | 
224 |         anchors.new <- subset(anchors, cellType1 != "Epithelial" & cellType1 != "Unknown" & cellType2 != "Epithelial" & cellType2 != "Unknown")
225 |         if(dim(anchors)[1] == 0){
226 |             anchors.new <- anchors
227 |             cat("- Warning in 'runScCombination': Cannot find the nomral cell anchors, and use initial anchors instead.\n")
228 |         }
229 |         expr.anchors@anchors <- anchors.new
230 | 
231 |         expr <- IntegrateData(anchorset = expr.anchors,
232 |                               dims = 1:pc.use, verbose = F)
233 |         expr <- ScaleData(expr, verbose = FALSE)
234 |         DefaultAssay(expr) <- "integrated"
235 |         expr[["sample.ident"]] <- sample.ident
236 |         bool.plotHVG = F
237 | 
238 |         saveRDS(anchors.new, file = file.path(savePath, "anchors.RDS"))
239 |     }
240 |     results[["bool.plotHVG"]] <- bool.plotHVG
241 | 
242 |     ## --------- seurat ---------
243 |     t.results <- runSeurat(
244 |         expr = expr,
245 |         savePath = savePath,
246 |         pc.use = pc.use,
247 |         resolution = resolution,
248 |         clusterStashName = clusterStashName,
249 |         bool.runDiffExpr = bool.runDiffExpr,
250 |         comb.method = comb.method
251 |     )
252 |     expr = t.results$expr
253 |     cell.annotation = t.results$cell.annotation
254 |     results[["diff.expr.genes"]] = t.results$diff.expr.genes
255 |     rm(t.results)
256 |     gc()
257 | 
258 |     for(item in c("doublet.score", "Cell.Type", "Malign.score",
259 |                   "Malign.type", "CellCycle.score", "Stemness.score")){
260 |         if(item %in% names(expr@meta.data)){
261 |             cell.annotation[[item]] <- expr@meta.data[[item]]
262 |         }
263 |     }
264 |     for(item in grep("^GS__", names(expr@meta.data), value = T)){
265 |         cell.annotation[[item]] <- expr@meta.data[[item]]
266 |     }
267 | 
268 |     results[["seurat.plots"]] <- plotSeurat(
269 |         expr = expr,
270 |         cell.annotation = cell.annotation,
271 |         show.features = show.features,
272 |         bool.add.features = bool.add.features,
273 |         coor.names = coor.names,
274 |         bool.plotHVG = bool.plotHVG,
275 | 
276 |         bool.runDiffExpr = bool.runDiffExpr,
277 |         diff.expr.genes = results[["diff.expr.genes"]],
278 |         n.markers = n.markers,
279 | 
280 |         species = species,
281 |         savePath = savePath
282 |     )
283 | 
284 |     results[["DEplot.height"]] <- 0.5 + 0.1 * n.markers * length(unique(cell.annotation$Cluster))
285 |     results[["markersPlot.height"]] <- 2 * ceiling(length(results[["seurat.plots"]]$ps.markers) / 4)
286 | 
287 | 
288 |     ## --------- sample source ---------
289 |     message("[", Sys.time(), "] -----: plot sample source")
290 |     cell.annotation$sample <- expr@meta.data$sample.ident
291 |     if(is.null(sample.colors)){
292 |         sample.colors <- getDefaultColors(n = length(unique(cell.annotation$sample)),
293 |                                           type = 2)
294 |     }
295 | 
296 |     if(setequal(sampleNames, unique(cell.annotation$sample))){
297 |         cell.annotation$sample <- factor(cell.annotation$sample, levels = sampleNames)
298 |     }else{
299 |         cell.annotation$sample <- factor(cell.annotation$sample)
300 |     }
301 |     p.sample <- pointDRPlot(cell.annotation, value = "sample",
302 |                             coor.names = coor.names,
303 |                             colors = sample.colors,
304 |                             point.type = 2,
305 |                             legend.position = "right",
306 |                             legend.title = "Sample")
307 |     p.bar.sample <- clusterBarPlot(cell.annotation = cell.annotation,
308 |                                    cell.colors = sample.colors,
309 |                                    sel.col = "sample",
310 |                                    legend.position = "bottom",
311 |                                    legend.title = "Sample")
312 | 
313 |     ggsave(filename = file.path(savePath, "figures/sampleSource-point.png"),
314 |            p.sample, width = 7, height = 5, dpi = 300)
315 |     ggsave(filename = file.path(savePath, "figures/sampleSource-bar.png"),
316 |            p.bar.sample, width = 6, height = 3, dpi = 300)
317 |     results[["p.sample"]] <- p.sample
318 |     results[["p.bar.sample"]] <- p.bar.sample
319 | 
320 | 
321 |     ## --------- cell type ---------
322 |     if(bool.runCellClassify){
323 |         t.results <- runCellClassify(expr, cell.annotation,
324 |                                      coor.names = coor.names,
325 |                                      savePath = savePath,
326 |                                      ct.templates = ct.templates,
327 |                                      species = species)
328 |         expr <- t.results$expr
329 |         cell.annotation <- t.results$cell.annotation
330 |         results[["cellType.plot"]] <- t.results$p.results
331 |         rm(t.results)
332 |     }
333 | 
334 | 
335 |     ## --------- malignancy ---------
336 |     if(bool.runMalignancy){
337 |         if(!(all(c("Malign.score", "Malign.type") %in% names(cell.annotation)))){
338 |             message("[", Sys.time(), "] -----: cells malignancy annotation")
339 |             for(i in 1:length(sampleNames)){
340 |                 cur.manifest <- read.table(paste0(single.savePaths[i], "/geneManifest.txt"),
341 |                                            header = T, sep = "\t", stringsAsFactors = F)
342 |                 if(i == 1){
343 |                     gene.manifest <- cur.manifest
344 |                 }else{
345 |                     new.genes <- subset(cur.manifest, !(EnsemblID %in% gene.manifest$EnsemblID))
346 |                     gene.manifest <- rbind(gene.manifest, new.genes)
347 |                 }
348 |             }
349 |             # rownames(gene.manifest) <- gene.manifest$EnsemblID
350 |             rownames(gene.manifest) <- gene.manifest$Symbol
351 |             t.results <- runMalignancy(expr = expr,
352 |                                        gene.manifest = gene.manifest,
353 |                                        cell.annotation = cell.annotation,
354 |                                        savePath = savePath,
355 |                                        cutoff = cutoff, minCell = 3,
356 |                                        p.value.cutoff = p.value.cutoff,
357 |                                        coor.names = coor.names,
358 |                                        ref.data = cnv.ref.data,
359 |                                        referAdjMat = cnv.referAdjMat,
360 |                                        species = species,
361 |                                        genome = genome,
362 |                                        hg.mm.mix = hg.mm.mix)
363 |             expr <- t.results$expr
364 |             cell.annotation <- t.results$cell.annotation
365 |             results[["cnvList"]] <- t.results$cnvList
366 |             results[["referScore"]] <- t.results$referScore
367 |             results[["ju.exist.malign"]] <- t.results$ju.exist.malign
368 |             results[["malign.thres"]] <- t.results$malign.thres
369 |             results[["bimodal.pvalue"]] <- t.results$bimodal.pvalue
370 |             results[["malign.plot"]] <- t.results$p.results
371 |             rm(t.results)
372 |         }else{
373 |             message("[", Sys.time(), "] -----: cells malignancy combination")
374 |             results[["malign.plot"]] <- plotMalignancy(cell.annotation = cell.annotation,
375 |                                                        coor.names = coor.names,
376 |                                                        savePath = savePath)
377 |         }
378 |     }
379 | 
380 | 
381 |     ## --------- select tumor clusters ---------
382 |     if(bool.intraTumor){
383 |         tumor.clusters <- getTumorCluster(cell.annotation = cell.annotation)
384 |         results[["tumor.clusters"]] <- tumor.clusters
385 | 
386 |         if(is.null(tumor.clusters)){
387 |             sel.clusters <- unique(cell.annotation$Cluster)
388 |             sel.clusters <- sel.clusters[order(sel.clusters)]
389 |         }else{
390 |             sel.clusters <- tumor.clusters
391 |         }
392 |     }else{
393 |         sel.clusters <- unique(cell.annotation$Cluster)
394 |         sel.clusters <- sel.clusters[order(sel.clusters)]
395 |     }
396 | 
397 | 
398 |     ## --------- cell cycle ---------
399 |     if(bool.runCellCycle){
400 |         if(!("CellCycle.score" %in% names(cell.annotation))){
401 |             CellCycle.score <- runCellCycle(expr, species = species)
402 |             cell.annotation$CellCycle.score <- CellCycle.score
403 |             expr[["CellCycle.score"]] <- CellCycle.score
404 |         }else{
405 |             message("[", Sys.time(), "] -----: cell cycle score combination")
406 |         }
407 | 
408 |         # CellCycle.score <- runCellCycle(expr, species = species)
409 |         # cell.annotation$CellCycle.score <- CellCycle.score
410 |         # expr[["CellCycle.score"]] <- CellCycle.score
411 | 
412 |         results[["cellCycle.plot"]] <-
413 |             pointDRPlot(cell.annotation,
414 |                         sel.clusters = sel.clusters,
415 |                         value = "CellCycle.score",
416 |                         coor.names = coor.names,
417 |                         colors = c("white", "#009b45"),
418 |                         discrete = F,
419 |                         legend.position = "right",
420 |                         legend.title = "Cell cycle score")
421 |         ggsave(filename = file.path(savePath, "figures/cellCycle-point.png"),
422 |                results[["cellCycle.plot"]], width = 5, height = 4, dpi = 300)
423 |     }
424 | 
425 | 
426 |     ## --------- stemness ---------
427 |     if(bool.runStemness){
428 |         if(!("Stemness.score" %in% names(cell.annotation))){
429 |             stem.scores <- runStemness(X = GetAssayData(object = expr, slot = "scale.data"), species = species)
430 |             cell.annotation[["Stemness.score"]] <- stem.scores
431 |             expr[["Stemness.score"]] <- stem.scores
432 |         }else{
433 |             message("[", Sys.time(), "] -----: stemness score combination")
434 |         }
435 | 
436 |         results[["stemness.plot"]] <-
437 |             pointDRPlot(cell.annotation,
438 |                         sel.clusters = sel.clusters,
439 |                         value = "Stemness.score",
440 |                         coor.names = coor.names,
441 |                         colors = c("white", "#ff9000"),
442 |                         discrete = F,
443 |                         legend.position = "right",
444 |                         legend.title = "Stemness")
445 |         ggsave(filename = file.path(savePath, "figures/stemness-point.png"),
446 |                results[["stemness.plot"]], width = 5, height = 4, dpi = 300)
447 |     }
448 | 
449 | 
450 |     ## --------- gene sets ----------
451 |     if(bool.runGeneSets){
452 |         if(is.null(geneSets)){
453 |             geneSets <- getDefaultGeneSets(species = species)
454 |         }
455 |         if(geneSet.method == "GSVA" | !all(paste0("GS__", names(geneSets)) %in% names(cell.annotation))){
456 |             t.scores <- runGeneSets(expr = expr, geneSets = geneSets, method = geneSet.method)
457 |             if(!is.null(t.scores)){
458 |                 cell.annotation <- cbind(cell.annotation, t.scores)
459 |             }
460 |         }else{
461 |             message("[", Sys.time(), "] -----: gene set signatures combination")
462 |             t.scores <- cell.annotation[, paste0("GS__", names(geneSets))]
463 |         }
464 | 
465 |         if(!is.null(t.scores)){
466 |             bool.limit <- T
467 |             if(geneSet.method == "GSVA"){
468 |                 bool.limit <- F
469 |             }
470 |             results[["geneSet.plot"]] <-
471 |                 plotGeneSet(subset(cell.annotation, Cluster %in% sel.clusters),
472 |                             prefix = "GS__",
473 |                             bool.limit = bool.limit,
474 |                             savePath = savePath)
475 |             results[["geneSetPlot.height"]] <- 0.5 + 0.11 * dim(t.scores)[2]
476 |             rm(t.scores)
477 |         }else{
478 |             bool.runGeneSets = FALSE
479 |         }
480 |     }
481 | 
482 | 
483 |     ## ---------- expression programs ----------
484 |     if(bool.runExprProgram){
485 |         results[["exprProgram.results"]] <- runExprProgram(expr, rank = nmf.rank,
486 |                                                            sel.clusters = sel.clusters,
487 |                                                            clusterStashName = clusterStashName,
488 |                                                            savePath = savePath)
489 |         results[["exprProgram.plot"]] <- plotExprProgram(H = results[["exprProgram.results"]]$H,
490 |                                                          cell.annotation,
491 |                                                          sel.clusters = sel.clusters,
492 |                                                          savePath = savePath)
493 |         results[["exprProgPlot.height"]] <- 0.5 + 0.11 * dim(results[["exprProgram.results"]]$H)[1]
494 |     }
495 |     results[["expr"]] <- expr
496 |     results[["cell.annotation"]] <- cell.annotation
497 | 
498 | 
499 |     ## -------- save ---------
500 |     saveRDS(expr, file = file.path(savePath, "expr.RDS"))
501 |     write.table(cell.annotation, file = file.path(savePath, "cellAnnotation.txt"),
502 |                 quote = F, sep = "\t", row.names = F)
503 | 
504 |     if(genReport){
505 |         message("[", Sys.time(), "] -----: report generating")
506 |         if(!dir.exists(file.path(savePath, 'report-figures/'))){
507 |             dir.create(file.path(savePath, 'report-figures/'), recursive = T)
508 |         }
509 |         suppressWarnings(
510 |             knit(system.file("rmd", "main-scAnnoComb.Rmd", package = "scCancer"),
511 |                  file.path(savePath,'report-scAnnoComb.md'), quiet = T)
512 |         )
513 |         markdownToHTML(file.path(savePath,'report-scAnnoComb.md'),
514 |                        file.path(savePath, 'report-scAnnoComb.html'))
515 |     }
516 | 
517 |     message("[", Sys.time(), "] END: Finish ScCombination\n\n")
518 | 
519 |     return(results)
520 | }
521 | 
522 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | 
  2 | get10Xpath <- function (samplePath, raw.data = F){
  3 |     prefix <- ifelse(raw.data, 'raw', 'filtered')
  4 |     cur.path <- paste0(samplePath, '/')
  5 |     res.path <- paste0(cur.path, prefix, '_feature_bc_matrix')
  6 |     if (!dir.exists(res.path)){
  7 |         res.path <- paste0(cur.path, prefix, '_gene_bc_matrices/hg19/')
  8 |     }
  9 |     if (!dir.exists(res.path)){
 10 |         res.path <- paste0(cur.path, prefix, '_gene_bc_matrices/hg38/')
 11 |     }
 12 |     if (!dir.exists(res.path)){
 13 |         res.path <- paste0(cur.path, prefix, '_gene_bc_matrices/mm10/')
 14 |     }
 15 |     if(!dir.exists(res.path)){
 16 |         res.path <- NULL
 17 |     }
 18 |     return(res.path)
 19 | }
 20 | 
 21 | 
 22 | ExtractField <- function (string, field = 1, delim = "_"){
 23 |     fields <- as.numeric(x = unlist(x = strsplit(
 24 |         x = as.character(x = field), split = ",")))
 25 |     if (length(x = fields) == 1) {
 26 |         return(strsplit(x = string, split = delim)[[1]][field])
 27 |     }
 28 |     return(paste(strsplit(x = string, split = delim)[[1]][fields], collapse = delim))
 29 | }
 30 | 
 31 | 
 32 | getCRversion <- function(data.path){
 33 |     version <- "Cell Ranger (version 2)"
 34 |     if(grepl("feature_bc_matrix", data.path)){
 35 |         version <- "Cell Ranger (version >= 3)"
 36 |     }
 37 |     return(version)
 38 | }
 39 | 
 40 | 
 41 | getBarcodes <- function(data.path){
 42 |     barcode.loc <- paste0(data.path, "/barcodes.tsv")
 43 |     if(grepl("feature_bc_matrix", data.path)){
 44 |         barcode.loc <- paste0(barcode.loc, ".gz")
 45 |     }
 46 |     cell.names <- readLines(barcode.loc)
 47 |     if (all(grepl(pattern = "\\-1$", x = cell.names))) {
 48 |         cell.names <- as.vector(x = as.character(
 49 |             x = sapply(
 50 |                 X = cell.names,
 51 |                 FUN = ExtractField,
 52 |                 field = 1,
 53 |                 delim = "-"
 54 |             )
 55 |         ))
 56 |     }
 57 |     return(cell.names)
 58 | }
 59 | 
 60 | 
 61 | #' Read10Xdata
 62 | #'
 63 | #' Read expression matrix data from 10X. This function is modified from Seurat package.
 64 | #'
 65 | #' @param data.dir Directory containing the matrix.mtx, genes.tsv (or features.tsv), and barcodes.tsv files provided by 10X.
 66 | #' A vector or named vector can be given in order to load several data directories.
 67 | #' If a named vector is given, the cell barcode names will be prefixed with the name.
 68 | #' @param gene.column An integer indicating which column of genes.tsv or features.tsv to use for gene names; default is 2.
 69 | #' @param unique.features Make feature names unique (default TRUE).
 70 | #' @param only.expr Whether to read expression data only if have multiple features (default TRUE).
 71 | #'
 72 | #' @return If the 10X data only has expression data or the argument 'only.expr' is TRUE,
 73 | #' a sparse matrix containing the expression data will be returned.
 74 | #' Otherwise, if the 10X data has multiple data types,
 75 | #' a list containing a sparse matrix of the data from each type will be returned.
 76 | #'
 77 | #' @export
 78 | #'
 79 | Read10Xdata <- function (data.dir = NULL, gene.column = 2,
 80 |                          unique.features = TRUE, only.expr = TRUE)  {
 81 |     full.data <- list()
 82 |     for (i in seq_along(data.dir)) {
 83 |         run <- data.dir[i]
 84 |         if (!dir.exists(paths = run)) {
 85 |             stop("Directory provided does not exist")
 86 |         }
 87 |         if (!grepl("\\/$", run)) {
 88 |             run <- paste(run, "/", sep = "")
 89 |         }
 90 |         barcode.loc <- file.path(run, "barcodes.tsv")
 91 |         gene.loc <- file.path(run, "genes.tsv")
 92 |         features.loc <- file.path(run, "features.tsv.gz")
 93 |         matrix.loc <- file.path(run, "matrix.mtx")
 94 |         pre_ver_3 <- file.exists(gene.loc)
 95 |         if (!pre_ver_3) {
 96 |             addgz <- function(s) {
 97 |                 return(paste0(s, ".gz"))
 98 |             }
 99 |             barcode.loc <- addgz(s = barcode.loc)
100 |             matrix.loc <- addgz(s = matrix.loc)
101 |         }
102 |         if (!file.exists(barcode.loc)) {
103 |             stop("Barcode file missing")
104 |         }
105 |         if (!pre_ver_3 && !file.exists(features.loc)) {
106 |             stop("Gene name or features file missing")
107 |         }
108 |         if (!file.exists(matrix.loc)) {
109 |             stop("Expression matrix file missing")
110 |         }
111 |         data <- readMM(file = matrix.loc)
112 |         cell.names <- readLines(barcode.loc)
113 |         if (all(grepl(pattern = "\\-1$", x = cell.names))) {
114 |             cell.names <- as.vector(x = as.character(
115 |                 x = sapply(
116 |                     X = cell.names,
117 |                     FUN = ExtractField,
118 |                     field = 1,
119 |                     delim = "-"
120 |                 )
121 |             ))
122 |         }
123 |         if (is.null(x = names(x = data.dir))) {
124 |             if (i < 2) {
125 |                 colnames(x = data) <- cell.names
126 |             } else {
127 |                 colnames(x = data) <- paste0(i, "_", cell.names)
128 |             }
129 |         } else {
130 |             colnames(x = data) <-
131 |                 paste0(names(x = data.dir)[i], "_", cell.names)
132 |         }
133 |         feature.names <- read.delim(
134 |             file = ifelse(
135 |                 test = pre_ver_3,
136 |                 yes = gene.loc,
137 |                 no = features.loc
138 |             ),
139 |             header = FALSE,
140 |             stringsAsFactors = FALSE
141 |         )
142 |         if (any(is.na(x = feature.names[, gene.column]))) {
143 |             warning("Some features names are NA. Replacing NA names with ID from the opposite column requested",
144 |                     call. = FALSE, immediate. = TRUE)
145 |             na.features <- which(x = is.na(x = feature.names[,
146 |                                                              gene.column]))
147 |             replacement.column <- ifelse(test = gene.column ==
148 |                                              2, yes = 1, no = 2)
149 |             feature.names[na.features, gene.column] <- feature.names[na.features,
150 |                                                                      replacement.column]
151 |         }
152 |         if (unique.features) {
153 |             fcols = ncol(x = feature.names)
154 |             if (fcols < gene.column) {
155 |                 stop(paste0("gene.column was set to ", gene.column,
156 |                             " but feature.tsv.gz (or genes.tsv) only has ",
157 |                             fcols, " columns.", " Try setting the gene.column argument to a value <= to ",
158 |                             fcols, "."))
159 |             }
160 |             rownames(x = data) <- make.unique(names = feature.names[,
161 |                                                                     gene.column])
162 |         }
163 |         # In cell ranger 3.0, a third column specifying the type of data was added
164 |         # and we will return each type of data as a separate matrix
165 |         if (ncol(x = feature.names) > 2) {
166 |             data_types <- factor(x = feature.names$V3)
167 |             lvls <- levels(x = data_types)
168 |             if (length(x = lvls) > 1 && length(x = full.data) ==
169 |                 0) {
170 |                 message("10X data contains more than one type and is being returned as a list containing matrices of each type.")
171 |             }
172 |             expr_name <- "Gene Expression"
173 |             if (expr_name %in% lvls) {
174 |                 # Return Gene Expression first
175 |                 lvls <- c(expr_name, lvls[-which(x = lvls == expr_name)])
176 |             }
177 |             data <- lapply(
178 |                 X = lvls,
179 |                 FUN = function(l) {
180 |                     return(data[data_types == l, ])
181 |                 }
182 |             )
183 |             names(x = data) <- lvls
184 |         } else{
185 |             data <- list(data)
186 |         }
187 |         full.data[[length(x = full.data) + 1]] <- data
188 |     }
189 |     # Combine all the data from different directories into one big matrix, note this
190 |     # assumes that all data directories essentially have the same features files
191 |     list_of_data <- list()
192 |     for (j in 1:length(x = full.data[[1]])) {
193 |         list_of_data[[j]] <-
194 |             do.call(cbind, lapply(X = full.data, FUN = `[[`, j))
195 |         list_of_data[[j]] <-
196 |             as(object = list_of_data[[j]], Class = "CsparseMatrix")
197 |     }
198 |     names(x = list_of_data) <- names(x = full.data[[1]])
199 | 
200 |     if (only.expr){
201 |         return(list_of_data[[1]])
202 |     }else{
203 |         # If multiple features, only return a list, otherwise a matrix.
204 |         if (length(x = list_of_data) == 1) {
205 |             return(list_of_data[[1]])
206 |         } else {
207 |             return(list_of_data)
208 |         }
209 |     }
210 | }
211 | 
212 | 
213 | 
214 | #' ggplot_config
215 | #'
216 | #' @param base.size The size of text.
217 | #'
218 | #' @return A theme.
219 | #' @export
220 | #'
221 | ggplot_config <- function(base.size = 8){
222 |     p <- theme_classic() +
223 |         theme(plot.title = element_text(size = 2 * base.size),
224 |               axis.title.x = element_text(size = 2 * base.size, vjust = -0.2),
225 |               axis.title.y = element_text(size = 2 * base.size, vjust = 0.2),
226 |               axis.text.x = element_text(size = 2 * base.size),
227 |               axis.text.y = element_text(size = 2 * base.size),
228 |               panel.grid.major = element_blank(),
229 |               panel.grid.minor = element_blank(),
230 |               legend.title = element_text(size = 2 * base.size - 2),
231 |               legend.text = element_text(size = 1.5 * base.size)
232 |         )
233 |     return(p)
234 | }
235 | 
236 | 
237 | 
238 | getOutliers <- function(x){
239 |     x.med <- median(x)
240 |     outs <- boxplot.stats(x)$out
241 |     outliers <- subset(outs, outs > x.med)
242 |     return(outliers)
243 | }
244 | 
245 | 
246 | 
247 | getCellix <- function(cell.manifest, filter.thres, arg){
248 |     ixs <- lapply(arg, FUN = function(x) {
249 |         ix <- which(cell.manifest[[x]] >= filter.thres[x, 'Low.threshold'] &
250 |                     cell.manifest[[x]] < filter.thres[x, 'High.threshold'])
251 |         return(ix)
252 |     })
253 |     res.ix <- ixs[[1]]
254 |     for(i in 1:length(arg)){
255 |         res.ix <- intersect(res.ix, ixs[[i]])
256 |     }
257 |     return(res.ix)
258 | }
259 | 
260 | 
261 | 
262 | grid_arrange_shared_legend <- function(..., all.p, ncol = length(list(...)), nrow = 1, position = c("bottom", "right")) {
263 | 
264 |     plots <- list(...)
265 |     position <- match.arg(position)
266 |     g <- ggplotGrob(all.p + theme(legend.position = position))$grobs
267 |     legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
268 |     lheight <- sum(legend$height)
269 |     lwidth <- sum(legend$width)
270 |     gl <- lapply(plots, function(x) x + theme(legend.position="none"))
271 |     gl <- c(gl, ncol = ncol, nrow = nrow)
272 | 
273 |     combined <- switch(position,
274 |                        "bottom" = arrangeGrob(do.call(arrangeGrob, gl),
275 |                                               legend,
276 |                                               ncol = 1,
277 |                                               heights = unit.c(unit(1, "npc") - lheight, lheight)),
278 |                        "right" = arrangeGrob(do.call(arrangeGrob, gl),
279 |                                              legend,
280 |                                              ncol = 2,
281 |                                              widths = unit.c(unit(1, "npc") - lwidth, lwidth)))
282 |     grid.newpage()
283 |     grid.draw(combined)
284 | 
285 |     # return gtable invisibly
286 |     invisible(combined)
287 | }
288 | 
289 | 
290 | 
291 | #' getDefaultMarkers
292 | #'
293 | #' Return default markers of several common cell types.
294 | #'
295 | #' @inheritParams runScAnnotation
296 | #'
297 | #' @return A list of default markers of several common cell types.
298 | #' @export
299 | #'
300 | getDefaultMarkers <- function(species = "human"){
301 |     # feature.def <- list(
302 |     #     "T cell" = c("CD3D"),
303 |     #     "B cell" = c("CD79A"),
304 |     #     "NK cell" = c("NKG7"),
305 |     #     "Monocyte" = c("LYZ"),
306 |     #     "Endothelial" = c("PLVAP"),
307 |     #     "Myofibroblast" = c("ACTA2"),
308 |     #     "Epithelial" = c("EPCAM", "KRT8"))
309 | 
310 |     if(species == "human"){
311 |         feature.def <- list(
312 |             "T cell" = c("PTPRC", "CD3D", "CD4", "CD8A", "CD8B"),
313 |             "B cell" = c("CD79A"),
314 |             "NK cell" = c("NKG7"),
315 |             "Myeloid cell" = c("LYZ"),
316 |             "Endothelial" = c("PLVAP"),
317 |             "Fibroblast" = c("ACTA2"),
318 |             "Epithelial" = c("EPCAM", "KRT8"))
319 |     }else if(species == "mouse"){
320 |         feature.def <- list(
321 |             "T cell" = c("Ptprc", "Cd3d", "Cd4", "Cd8a", "Cd8b"),
322 |             "B cell" = c("Cd79a"),
323 |             "NK cell" = c("Nkg7"),
324 |             "Myeloid cell" = c("Lyz1", "Lyz2"),
325 |             "Endothelial" = c("Plvap"),
326 |             "Fibroblast" = c("Acta2"),
327 |             "Epithelial" = c("Epcam", "Krt8"))
328 |     }
329 | 
330 |     return(feature.def)
331 | }
332 | 
333 | 
334 | 
335 | #' getDefaultColors
336 | #'
337 | #' @param n The number of colors.
338 | #' @param type The type of color style. Only 1, 2, or 3 is allowed.
339 | #'
340 | #' @return A vector of colors.
341 | #' @export
342 | #'
343 | getDefaultColors <- function(n = NULL, type = 1){
344 |     if(type == 1){
345 |         colors <- c("#cb7c77", "#68d359", "#6a7dc9", "#c9d73d", "#c555cb",
346 |                     "#d7652d", "#7cd5c8", "#c49a3f", "#507d41", "#5d8d9c",
347 |                     "#90353b", "#674c2a", "#1B9E77", "#c5383c", "#0081d1",
348 |                     "#ffd900", "#502e71", "#c8b693", "#aed688", "#f6a97a",
349 |                     "#c6a5cc", "#798234", "#6b42c8", "#cf4c8b", "#666666",
350 |                     "#feb308", "#ff1a1a", "#1aff1a", "#1a1aff", "#ffff1a")
351 |     }else if(type == 2){
352 |         if(n <= 8){
353 |             colors <- c("#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3",
354 |                         "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3")
355 |         }else if(n <= 14){
356 |             colors <- c("#437BFE", "#FEC643", "#43FE69", "#FE6943", "#C643FE",
357 |                         "#43D9FE", "#B87A3D", "#679966", "#993333", "#7F6699",
358 |                         "#E78AC3", "#333399", "#A6D854", "#E5C494")
359 |         }
360 |         else if(n <= 20){
361 |             colors <- c("#87b3d4", "#d5492f", "#6bd155", "#683ec2", "#c9d754",
362 |                         "#d04dc7", "#81d8ae", "#d34a76", "#607d3a", "#6d76cb",
363 |                         "#ce9d3f", "#81357a", "#d3c3a4", "#3c2f5a", "#b96f49",
364 |                         "#4e857e", "#6e282c", "#d293c8", "#393a2a", "#997579")
365 |         }else if(n <= 30){
366 |             colors <- c("#628bac", "#ceda3f", "#7e39c9", "#72d852", "#d849cc",
367 |                         "#5e8f37", "#5956c8", "#cfa53f", "#392766", "#c7da8b",
368 |                         "#8d378c", "#68d9a3", "#dd3e34", "#8ed4d5", "#d84787",
369 |                         "#498770", "#c581d3", "#d27333", "#6680cb", "#83662e",
370 |                         "#cab7da", "#364627", "#d16263", "#2d384d", "#e0b495",
371 |                         "#4b272a", "#919071", "#7b3860", "#843028", "#bb7d91")
372 |         }else{
373 |             colors <- c("#982f29", "#5ddb53", "#8b35d6", "#a9e047", "#4836be",
374 |                         "#e0dc33", "#d248d5", "#61a338", "#9765e5", "#69df96",
375 |                         "#7f3095", "#d0d56a", "#371c6b", "#cfa738", "#5066d1",
376 |                         "#e08930", "#6a8bd3", "#da4f1e", "#83e6d6", "#df4341",
377 |                         "#6ebad4", "#e34c75", "#50975f", "#d548a4", "#badb97",
378 |                         "#b377cf", "#899140", "#564d8b", "#ddb67f", "#292344",
379 |                         "#d0cdb8", "#421b28", "#5eae99", "#a03259", "#406024",
380 |                         "#e598d7", "#343b20", "#bbb5d9", "#975223", "#576e8b",
381 |                         "#d97f5e", "#253e44", "#de959b", "#417265", "#712b5b",
382 |                         "#8c6d30", "#a56c95", "#5f3121", "#8f846e", "#8f5b5c")
383 |         }
384 |     }else if(type == 3){
385 |         # colors <- c("#07a2a4", "#9a7fd1", "#588dd5", "#f5994e",
386 |         #             "#c05050", "#59678c", "#c9ab00", "#7eb00a")
387 |         colors <- c("#c14089", "#6f5553", "#E5C494", "#738f4c", "#bb6240",
388 |                     "#66C2A5", "#2dfd29", "#0c0fdc")
389 |     }
390 |     if(!is.null(n)){
391 |         if(n <= length(colors)){
392 |             colors <- colors[1:n]
393 |         }else{
394 |             step <- 16777200 %/% (n - length(colors)) - 2
395 |             add.colors <- paste0("#", as.hexmode(seq(from = sample(1:step, 1),
396 |                                                      by = step, length.out = (n-length(colors)))))
397 |             colors <- c(colors, add.colors)
398 |         }
399 |     }
400 |     return(colors)
401 | }
402 | 
403 | 
404 | #' getCellTypeColor
405 | #'
406 | #' @param cell.types A vector of cell types.
407 | #'
408 | #' @return A vector of colors.
409 | #' @export
410 | #'
411 | getCellTypeColor <- function(cell.types){
412 |     cell.colors <- c(
413 |         "T.cells.CD4" = "#07a2a4",
414 |         "T.cells.CD8" = "#9a7fd1",
415 |         "B.cells" = "#588dd5",
416 |         "NK.cells" = "#f5994e",
417 |         "Myeloid.cells" = "#c05050",
418 |         "Endothelial" = "#59678c",
419 |         "Fibroblast" = "#c9ab00",
420 |         "Epithelial" = "#7eb00a",
421 |         "Unknown" = "grey")
422 |     cti = 1
423 |     new.types <- setdiff(cell.types, names(cell.colors))
424 |     for(ct in new.types){
425 |         cell.colors[ct] <- getDefaultColors(n = length(new.types), type = 3)[cti]
426 |         cti = cti + 1
427 |     }
428 |     return(cell.colors)
429 | }
430 | 
431 | 
432 | limitData <- function(data, min = NULL, max = NULL){
433 |     data2 <- data
434 |     if(!is.null(min)){
435 |         data2[data2 < min] <- min
436 |     }
437 |     if(!is.null(max)){
438 |         data2[data2 > max] <- max
439 |     }
440 |     return(data2)
441 | }
442 | 
443 | 
444 | 
445 | getClusterInfo <- function(cell.annotation){
446 |     cluster.info <- cell.annotation[order(cell.annotation$Cluster), 'Cluster', drop = F]
447 |     cluster.info$Cluster <- as.factor(cluster.info$Cluster)
448 | 
449 |     num.cluster <- table(cluster.info$Cluster)
450 |     # num.cluster <- num.cluster[as.character(1 : length(num.cluster))]
451 |     num.cluster <- num.cluster[as.character(unique(cluster.info$Cluster))]
452 |     cluster.pos <- cumsum(num.cluster)
453 | 
454 |     def.colors <- getDefaultColors()
455 |     clusters <- unique(cell.annotation$Cluster)
456 |     clusters <- sort(clusters)
457 |     cluster.colors <- c()
458 |     for(i in 1:length(clusters)){
459 |         cluster.colors[as.character(clusters[i])] = def.colors[clusters[i]]
460 |     }
461 |     cluster.colors = list(Cluster = cluster.colors)
462 | 
463 |     return(list(cluster.info = cluster.info,
464 |                 cluster.colors = cluster.colors,
465 |                 cluster.pos = cluster.pos))
466 | }
467 | 
468 | 
469 | 
470 | getMouseGene <- function(hg.genes, bool.name = F, deduplicate = T){
471 |     hg.mm.HomologyGenes <- read.table(system.file("txt", "hg-mm-HomologyGenes.txt", package = "scCancer"),
472 |                                   header = T, stringsAsFactors = F)
473 |     hg.mm.HomologyGenes <- subset(hg.mm.HomologyGenes, hgGenes %in% hg.genes)
474 | 
475 |     if(deduplicate){
476 |         hg.num <- table(hg.mm.HomologyGenes$hgGenes)
477 |         hg.mm.HomologyGenes <- subset(hg.mm.HomologyGenes, !(hgGenes %in% names(hg.num)[hg.num > 1]))
478 |         mm.num <- table(hg.mm.HomologyGenes$mmGenes)
479 |         hg.mm.HomologyGenes <- subset(hg.mm.HomologyGenes, !(mmGenes %in% names(mm.num)[mm.num > 1]))
480 |     }
481 | 
482 |     mm.genes <- hg.mm.HomologyGenes$mmGenes
483 | 
484 |     if(bool.name){
485 |         names(mm.genes) <- hg.mm.HomologyGenes$hgGenes
486 |     }
487 |     return(mm.genes)
488 | }
489 | 
490 | 
491 | 
492 | 
493 | #' runSurvival
494 | #'
495 | #' According to the marker genes or signatures expression high/low levels,
496 | #' patient are divided into two groups and then survival analysis is performed.
497 | #' The survival curves can be plotted.
498 | #'
499 | #' @param features The names of marker genes or signatures to be analyzed.
500 | #' @param data The data used to perform survival analysis.
501 | #' It should be an expression or signature matrix with gene or signature by patient.
502 | #' The row names are the features' anmes. The columns are patients' labels.
503 | #' @param surv.time The survival time of patients. It should be in accord with the columns of data.
504 | #' @param surv.event The status indicator of patients. 0=alive, 1=dead. It should be in accord with the columns of data.
505 | #' @param cut.off The percentage threshold to divide patients into two groups.
506 | #' The default is 0.5, which means the patients are divided by median.
507 | #' Other values, such as 0.4, means the first 40 percent patients are set "Low" group
508 | #' and the last 40 percent are set "High" group (the median 20 percent are discarded).
509 | #' @param savePath The path to save the survival plots of genes or signatures (the default is NULL and the plots will be return without saving).
510 | #'
511 | #'
512 | #' @return A list of survival curves plots.
513 | #' @export
514 | #'
515 | #' @import survival survminer
516 | #'
517 | runSurvival <- function(features, data, surv.time, surv.event, cut.off = 0.5, savePath = NULL){
518 |     data <- as.matrix(data)
519 |     cut.off <- min(cut.off, 1 - cut.off)
520 | 
521 |     ps <- list()
522 |     for(feat in features){
523 |         if(feat %in% rownames(data)){
524 |             dw.thres <- quantile(data[feat, ], cut.off)
525 |             up.thres <- quantile(data[feat, ], 1-cut.off)
526 |             p.df <- data.frame(sample = colnames(data),
527 |                                surv.time = surv.time,
528 |                                surv.event = surv.event)
529 |             p.df$expr <- sapply(data[feat, ], function(x){
530 |                 if(x >= up.thres){
531 |                     return("High")
532 |                 }else if(x < dw.thres){
533 |                     return("Low")
534 |                 }else{
535 |                     return("Med")
536 |                 }
537 |             })
538 |             surv.df <<- subset(p.df, expr != "Med")
539 |             surv_object <<- Surv(time = surv.df$surv.time, event = surv.df$surv.event)
540 |             fit <- survfit(surv_object ~ expr, data = surv.df)
541 |             p.surv <- ggsurvplot(fit, pval = TRUE,
542 |                                  palette = c("#f57e87", "#66d5a5"),
543 |                                  legend.title = paste0(feat, ":"))
544 |             if(!is.null(savePath)){
545 |                 if(!dir.exists(savePath)){
546 |                     dir.create(savePath, recursive = T)
547 |                 }
548 |                 ggsave(filename = paste0(savePath, "surv-", feat, ".png"), p.surv$plot,
549 |                        width = 3.5, height = 3.5, dpi = 300)
550 |             }
551 |             ps[[feat]] <- p.surv$plot
552 |         }else{
553 |             cat("- Warning in 'runSurvival':", feat, "not found.\n")
554 |         }
555 |     }
556 |     return(ps)
557 | }
558 | 
559 | 
560 | #' generate10Xdata
561 | #'
562 | #' Generate a 10X-like data folder based on the data matrix and gene information,
563 | #' which can be used directly to perform scCancer analysis.
564 | #'
565 | #' @param matrix A gene-cell matrix or data.frame.
566 | #' @param gene.info A data.frame of gene information. It should contain two columns,
567 | #' the first is gene Ensemble ID, and the second is gene symbol.
568 | #' The order of the genes should be consistant with the row order of 'matrix'.
569 | #' @param outPath A path to save the output files.
570 | #' @param overwrite If TRUE and the output file already exists, the file is
571 | #' silently overwritten, otherwise an exception is thrown. The default is "FALSE".
572 | #'
573 | #' @return NULL
574 | #' @export
575 | #'
576 | #' @import Matrix R.utils
577 | #'
578 | generate10Xdata <- function(matrix, gene.info, outPath, overwrite = F){
579 |     if(!dir.exists(paste0(outPath, "/filtered_feature_bc_matrix/"))){
580 |         dir.create(paste0(outPath, "/filtered_feature_bc_matrix/"), recursive = T)
581 |     }
582 | 
583 |     barcode.gz <- gzfile(paste0(outPath, "/filtered_feature_bc_matrix/barcodes.tsv.gz"), "w")
584 |     write.table(colnames(matrix), barcode.gz, quote = F, col.names = F, row.names = F, sep = "\t")
585 |     close(barcode.gz)
586 | 
587 |     gene.info[, 3] <- "Gene Expression"
588 |     feature.gz <- gzfile(paste0(outPath, "/filtered_feature_bc_matrix/features.tsv.gz"), "w")
589 |     write.table(gene.info, feature.gz, quote = F, col.names = F, row.names = F, sep = "\t")
590 |     close(feature.gz)
591 | 
592 |     writeMM(as(as.matrix(matrix),"CsparseMatrix"), file = paste0(outPath, "/filtered_feature_bc_matrix/matrix.mtx"))
593 |     gzip(paste0(outPath, "/filtered_feature_bc_matrix/matrix.mtx"), overwrite = overwrite)
594 | }
595 | 
596 | 
597 | 
598 | #' extractFiles
599 | #'
600 | #' Extract files from each sample's folder and rename them with sample's name.
601 | #'
602 | #' @param savePath A path of samples' result folder.
603 | #' @param sampleNames A vector of samples' names (the subfolder names in 'savePath').
604 | #' @param outputPath A path to saving the extracted reports.
605 | #' @param files The name of files you want to extract. The default is c("report-scStat.html", "report-scAnno.html").
606 | #' @param subfolders The name of subfolders for the files you want to extract. The default is NULL.
607 | #' It can be a character string, which means all files are under the subfolder.
608 | #' It can also be a character string vector with same length as "files", which are corresponding to "files".
609 | #'
610 | #' @return NULL
611 | #' @export
612 | #'
613 | extractFiles <- function(savePath, sampleNames, outputPath,
614 |                          files = c("report-scStat.html", "report-scAnno.html"),
615 |                          subfolders = NULL){
616 |     message("[", Sys.time(), "] -----: extract files")
617 |     if((!is.null(subfolders)) & (length(subfolders) != 1) & (length(subfolders) != length(files))){
618 |         stop("The lengths of files and subfolders are not equal.")
619 |     }
620 | 
621 |     if(!dir.exists(file.path(outputPath))){
622 |         dir.create(file.path(outputPath), recursive = T)
623 |     }
624 | 
625 |     for(sampleName in sampleNames){
626 |         cur.path <- paste0(savePath, "/", sampleName, "/")
627 |         ori.files <- paste0(cur.path, subfolders, "/", files)
628 |         new.files <- paste0(outputPath, "/", sampleName, "-", files)
629 |         file.copy(ori.files, new.files, overwrite = T)
630 |     }
631 | }
632 | 
633 | 
634 | 
635 | #' checkStatArguments
636 | #'
637 | #'
638 | #' @param argList A list of arguments passed into 'runScStatistics".
639 | #'
640 | #' @return NULL
641 | #' @export
642 | #'
643 | checkStatArguments <- function(argList){
644 |     if(!dir.exists(argList$dataPath)){
645 |         stop("No such directory for the 'dataPath':",argList$dataPath ,".\n")
646 |     }
647 | 
648 |     if(!(argList$species %in% c("human", "mouse"))){
649 |         stop("The parameter 'species' should be one of the c(\"human\", \"mouse\").\n")
650 |     }
651 | 
652 |     if(!is.numeric(argList$hg.mm.thres)){
653 |         stop("The parameter 'hg.mm.thres' should be a float-point number within [0.5, 1].\n")
654 |     }else if(argList$hg.mm.thres < 0.5 | argList$hg.mm.thres > 1){
655 |         stop("The parameter 'hg.mm.thres' should be within [0.5, 1].\n")
656 |     }
657 | }
658 | 
659 | 
660 | #' checkAnnoArguments
661 | #'
662 | #' @param argList A list of arguments passed into 'runScAnnotation".
663 | #'
664 | #' @return NULL
665 | #' @export
666 | #'
667 | checkAnnoArguments <- function(argList){
668 |     if(!dir.exists(argList$dataPath)){
669 |         stop("No such directory for the 'dataPath':",argList$dataPath ,".\n")
670 |     }
671 | 
672 |     if(!dir.exists(argList$statPath)){
673 |         stop("No such directory for the 'statPath':",argList$statPath ,".\n")
674 |     }
675 | 
676 |     if(!(argList$species %in% c("human", "mouse"))){
677 |         stop("The parameter 'species' should be one of the c(\"human\", \"mouse\").\n")
678 |     }
679 | 
680 |     if(!(argList$genome %in% c("hg19", "hg38", "mm10"))){
681 |         stop("The parameter 'genome' should be one of the c(\"hg19\", \"hg38\", \"mm10\").\n")
682 |     }
683 | 
684 |     if(!(all(argList$anno.filter %in% c("mitochondrial", "ribosome", "dissociation"))) &
685 |        !(is.null(argList$anno.filter))){
686 |         stop("The parameter 'anno.filter' should be some of c(\"mitochondrial\", \"ribosome\", \"dissociation\") or NULL.\n")
687 |     }
688 | 
689 |     if(!(argList$doublet.method %in% c("cxds", "bcds"))){
690 |         stop("The parameter 'doublet.method' should be one of the c(\"cxds\", \"bcds\").\n")
691 |     }
692 | 
693 |     if(!(all(argList$coor.names == c("tSNE_1", "tSNE_2")) |
694 |          all(argList$coor.names == c("UMAP_1", "UMAP_2")))){
695 |         stop("The parameter 'coor.names' should be c(\"tSNE_1\", \"tSNE_2\") or c(\"UMAP_1\", \"UMAP_2\").\n")
696 |     }
697 | 
698 |     if(!(argList$geneSet.method %in% c("average", "GSVA"))){
699 |         stop("The parameter 'geneSet.method' should be one of the c(\"average\", \"GSVA\").\n")
700 |     }
701 | }
702 | 
703 | 
704 | 
705 | #' checkCombArguments
706 | #'
707 | #' @param argList A list of arguments passed into 'runScCombination".
708 | #'
709 | #' @return NULL
710 | #' @export
711 | #'
712 | checkCombArguments <- function(argList){
713 |     if(length(argList$single.savePaths) != length(argList$sampleNames)){
714 |         stop("The length of parameter 'single.savePaths' and 'sampleNames' should be equal.\n")
715 |     }
716 |     if(!(argList$comb.method %in% c("Harmony", "NormalMNN", "SeuratMNN", "Raw", "Regression", "LIGER"))){
717 |         stop("The parameter 'comb.method' should be one of the c(\"Harmony\", \"NormalMNN\", \"SeuratMNN\", \"Raw\", \"Regression\", \"LIGER\").\n")
718 |     }
719 | }
720 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # scCancer
 2 | 
 3 | ## Introduction
 4 | 
 5 | The `scCancer` package focuses on processing and analyzing droplet-based scRNA-seq data for cancer research. Except basic data processing steps, this package takes several special considerations for cancer-specific features.
 6 | 
 7 | The workflow of  `scCancer` mainly consists of three modules: `scStatistics`, `scAnnotation`, and `scCombination`.
 8 | * The `scStatistics` performs basic statistical analyses of raw data and quality control.
 9 | * The `scAnnotation` performs functional data analyses and visualizations, such as low dimensional representation, clustering, cell type classification, cell malignancy estimation, cellular phenotype analyses, gene signature analyses, cell-cell interaction analyses, etc.
10 | * The `scCombination` perform multiple samples data integration, batch effect correction and analyses visualization.
11 | 
12 | After the computational analyses, detailed and graphical reports were generated in user-friendly HTML format.
13 | 
14 | <img src="http://lifeome.net/software/sccancer/scCancer-workflow.png" width="70%" alt="scCancer-workflow" align=center>
15 | 
16 | ([Click to view larger workflow picture](http://lifeome.net/software/sccancer/scCancer-workflow.png))
17 | 
18 | 
19 | ## System Requirements
20 | * R version: >= 3.5.0 (**suggest:** R 3.6, **not 4.0**)
21 | * **Hint:  For R (version>=4.0) under Windows system**, the Rtools needs to be updated to version 4.0 from https://cran.r-project.org/bin/windows/Rtools/. So, if you are not familiar with R environment configuration, we **don't** suggest to use R (>=4.0).
22 | 
23 | ## Current version
24 | 
25 | * scCancer 2.2.1 (update at 2021.03.02)
26 | * [All version log](https://github.com/wguo-research/scCancer/wiki/Version-Log)
27 | 
28 | ## Installation
29 | 
30 | The detailed installation instruction can be found in the project [wiki]( https://github.com/wguo-research/scCancer/wiki/2.-Installation).
31 | 
32 | 
33 | ## Usage
34 | 
35 | The vignette of `scCancer` can be found in the project [wiki]( https://github.com/wguo-research/scCancer/wiki).
36 | 
37 | * [Quick start](https://github.com/wguo-research/scCancer/wiki/3.-Quick-start)
38 | * [Step by step introduction](https://github.com/wguo-research/scCancer/wiki/4.-Step-by-step-introduction)
39 | * [Other personalized settings](https://github.com/wguo-research/scCancer/wiki/5.-Other-personalized-settings)
40 | 
41 | We provide an [example data](http://lifeome.net/software/sccancer/KC-example.tar.gz) of kidney cancer from 10X Genomics, and following are the generated HTML reports:
42 | 
43 | * [`report-scStat.html`](http://lifeome.net/software/sccancer/KC-example-report-scStat.html)
44 | * [`report-scAnno.html`](http://lifeome.net/software/sccancer/KC-example-report-scAnno.html)
45 | 
46 | For multi-datasets, following is a generated HTML report for three kidney cancer samples integration analysis:
47 | 
48 | * [`report-scAnnoComb.html`](http://lifeome.net/software/sccancer/KC123-report-scAnnoComb.html)
49 | 
50 | 
51 | ## Citation
52 | Please use the following citation:
53 | 
54 | [1] Wenbo Guo, Dongfang Wang, Shicheng Wang, Yiran Shan, Changyi Liu, Jin Gu, scCancer: a package for automated processing of single-cell RNA-seq data in cancer, _Briefings in Bioinformatics_,  bbaa127, [https://doi.org/10.1093/bib/bbaa127](https://doi.org/10.1093/bib/bbaa127)
55 | 
56 | [2] Zeyu Chen, Yuxin Miao, Zhiyuan Tan, Qifan Hu, Yanhong Wu, Xinqi Li, Wenbo Guo, Jin Gu, scCancer2: data-driven in-depth annotations of the tumor microenvironment at single-level resolution, Bioinformatics, Volume 40, Issue 2, February 2024, btae028, [https://doi.org/10.1093/bioinformatics/btae028](https://doi.org/10.1093/bioinformatics/btae028)
57 | 
58 | ## License
59 | GPL-3
60 | 


--------------------------------------------------------------------------------
/inst/rds/cellTypeTemplates.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wguo-research/scCancer/0858810b558e31fcb212057ca5a8688ec2353dad/inst/rds/cellTypeTemplates.RDS


--------------------------------------------------------------------------------
/inst/rds/cnvRef_Data-HM.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wguo-research/scCancer/0858810b558e31fcb212057ca5a8688ec2353dad/inst/rds/cnvRef_Data-HM.RDS


--------------------------------------------------------------------------------
/inst/rds/cnvRef_Data-boneMarrow-MS.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wguo-research/scCancer/0858810b558e31fcb212057ca5a8688ec2353dad/inst/rds/cnvRef_Data-boneMarrow-MS.RDS


--------------------------------------------------------------------------------
/inst/rds/cnvRef_SNN-HM.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wguo-research/scCancer/0858810b558e31fcb212057ca5a8688ec2353dad/inst/rds/cnvRef_SNN-HM.RDS


--------------------------------------------------------------------------------
/inst/rds/cnvRef_SNN-boneMarrow-MS.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wguo-research/scCancer/0858810b558e31fcb212057ca5a8688ec2353dad/inst/rds/cnvRef_SNN-boneMarrow-MS.RDS


--------------------------------------------------------------------------------
/inst/rmd/SoupX.Rmd:
--------------------------------------------------------------------------------
1 | 
2 | ### `r h.i`.`r h.ii`.2 Ambient RNAs contamination fraction estimation
3 | 
4 | We refer to the algorithm of [`SoupX`](https://github.com/constantAmateur/SoupX) 
5 | to estimate the contamination fraction of ambient RNAs from lysed cells.
6 | 
7 | The estimated contamination fraction is <span style="color:red">` `r results$contamination.frac * 100`% `</span>.
8 | 
9 | 


--------------------------------------------------------------------------------
/inst/rmd/cellCalling.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | * The number of droplets containing UMI (nUMI > 0) is ` `r results$nList[1]` `.
 4 | 
 5 | ```{r echo=FALSE, results='asis', eval=!results$run.emptydrop}
 6 | cat("* Using the supplied cell calling results(filtered data), `", results$nList[2], "` cells are identified (min.nUMI = `", results$min.nUMI, "`).\n", sep = "")
 7 | ```
 8 | 
 9 | ```{r echo=FALSE, results='asis', eval=results$run.emptydrop}
10 | cat("* Cell calling result(fitered data) cannot be found, so we use the R package [`DropletUtils`](https://bioconductor.org/packages/release/bioc/html/DropletUtils.html)` to identify cell.\n")
11 | cat("* After cell calling, `", results$nList[2], "` cells are identified (min.nUMI = `", results$min.nUMI, "`).\n", sep = "")
12 | ```
13 | 
14 | * Following are two plots showing the distribution of `nUMI` for cells and empty droplets identified.
15 | 
16 | ```{r nUMI, echo=F, message=F, warning=F, dpi=300, fig.height=3, fig.width=8}
17 | plot_grid(results$p.cells.1, results$p.cells.2, ncol = 2)
18 | ```
19 | <p align="right">(Hi-res image: <a href="./figures/cells-distr-hist.png">left</a>, <a href="./figures/cells-distr-rank.png">right</a>)</p>
20 | 


--------------------------------------------------------------------------------
/inst/rmd/cellCycle.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | ### `r h.i`.`r h.ii` Cell cycle estimation
 3 | The estimated cell cycles can be found in the column `CellCycle.score` of the table file 
 4 | [cellAnnotation.txt](./cellAnnotation.txt). 
 5 | 
 6 | Here is the scatter plot colored by estimated cell cycle score. 
 7 | 
 8 | ```{r cellCyclePlot, echo=F, message=F, warning=F, dpi=500, fig.width=5, fig.height=4, fig.align="center", out.width='60%'}
 9 | results$cellCycle.plot
10 | ```
11 | <p align="right">(Hi-res image: <a href="./figures/cellCycle-point.png">view</a>)</p>
12 | 
13 | ```{r echo=F}
14 | h.ii <- h.ii + 1
15 | ```
16 | 


--------------------------------------------------------------------------------
/inst/rmd/cellInteraction.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ### `r h.i`.`r h.ii` Cell interaction analysis
 4 | In order to analyze the ligand-receptor interactions between the various cell types in cancer micro-environment, 
 5 | we use a ligand-receptor database [`FANTOM5`](http://fantom.gsc.riken.jp/data/), and estimate the interaction scores among cell sets (the default is clusters). 
 6 | 
 7 | The interaction scores between all ligand-receptor pairs and all cell sets can be found in the table file 
 8 | [InteractionScore.txt](./InteractionScore.txt). 
 9 | 
10 | Here is a plot showing the number of ligand-receptor paris with score larger than 0.1 among clusters. 
11 | The size of point means the number of ligand-receptor paris with scores larger than 0.1.
12 | The color of point means the sum of the ligand-receptor pairs scores.
13 | To compare conveniently, the bottom subplot shows the predicted cell type fraction of each cell set.
14 | 
15 | ```{r cellInteractionPlot, echo=F, message=F, warning=F, dpi=300, fig.width=7, fig.height=6.5, fig.align="center", out.width='80%'}
16 | grid::grid.draw(results$inter.plot)
17 | ```
18 | 
19 | <p align="right">(Hi-res image: <a href="./figures/interaction-score.png">view</a>)</p>
20 | 
21 | 
22 | Following are the top 10 scores ligand-receptor pairs.
23 | ```{r interaction.summary, echo=F}
24 | format(head(results$interaction.score, 10), digits = 4)
25 | ```
26 | 
27 | 
28 | ```{r echo=F}
29 | h.ii <- h.ii + 1
30 | ```
31 | 


--------------------------------------------------------------------------------
/inst/rmd/cellTypePred.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ### `r h.i`.`r h.ii` Cancer micro-environmental cell types annotation
 4 | In order to annotate major microenvironment cell types, including endothelial cells, 
 5 | fibroblast, and immune cells (CD4+ T cells, CD8+ T cells, B cells, nature killer cells, 
 6 | and myeloid cells), we use a one-class logistic regression (OCLR) model to perform prediction. 
 7 | The predicted cell type results can be found in the column `Cell.Type` of the table file 
 8 | [cellAnnotation.txt](./cellAnnotation.txt).
 9 | And the correlation coefficients with cell type templates are in columns `*.corr`.
10 | 
11 | Here is the scatter plot colored by predicted cell types.
12 | 
13 | ```{r cellTypePlot, echo=F, message=F, warning=F, dpi=500, fig.width=5.2, fig.height=4, fig.align="center", out.width='80%'}
14 | results$cellType.plot$p.type
15 | ```
16 | <p align="right">(Hi-res image: <a href="./figures/cellType-point.png">view</a>)</p>
17 | 
18 | 
19 | Here is a bar plot showing the relationship between cell cluster and cell type annotation.
20 | 
21 | ```{r barPlot, echo=F, message=F, warning=F, dpi=300, fig.width=6, fig.height=4, fig.align="center", out.width='80%'}
22 | results$cellType.plot$p.bar
23 | ```
24 | <p align="right">(Hi-res image: <a href="./figures/cellType-bar.png">view</a>)</p>
25 | 
26 | 
27 | 
28 | ```{r echo=F}
29 | h.ii <- h.ii + 1
30 | ```
31 | 


--------------------------------------------------------------------------------
/inst/rmd/contamination.Rmd:
--------------------------------------------------------------------------------
1 | 
2 | * **Ambient RNAs contamination removing.** Taking advantage of the algorithm of [`SoupX`](https://github.com/constantAmateur/SoupX), we use the contamination fraction ` `r results$contamination.frac * 100`% ` to reduce the ambient RNAs' influence.
3 | 


--------------------------------------------------------------------------------
/inst/rmd/diffExpr.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ### `r h.i`.`r h.ii` Differential expression analysis
 4 | In order to extract the features of each cluster, we perform differentially expression analysis by running [`Seurat`](https://satijalab.org/seurat/) functions.
 5 | 
 6 | Here is a heatmap plot showing the top `r results$n.markers` differentially expressed genes for every cluster compared to all remaining cells. All information of differentially expressed genes can be found [here](./diff.expr.genes/).
 7 | 
 8 | ```{r DEplot, echo=F, message=F, warning=F, dpi=800, fig.width=8, fig.height=results$DEplot.height}
 9 | results$seurat.plots$p.de.heatmap
10 | ```
11 | <p align="right">(Hi-res image: <a href="./figures/DE-heatmap.png">view</a>)</p>
12 | 
13 | ```{r echo=F}
14 | h.ii <- h.ii + 1
15 | ```
16 | 


--------------------------------------------------------------------------------
/inst/rmd/doublet.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ### `r h.i`.`r h.ii` Doublet score estimation
 4 | 
 5 | ```{r echo=FALSE, results='asis'}
 6 | if(results$doublet.method == "bcds"){
 7 |     cat("We estimate doublet score by using the binary classification based algorithm `bcds` in R package [scds](https://bioconductor.org/packages/release/bioc/html/scds.html).\n")
 8 | }else if(results$doublet.method == "cxds"){
 9 |     cat("We estimate doublet score by using the co-expression based algorithm `cxds` in R package [scds](https://bioconductor.org/packages/release/bioc/html/scds.html).\n")
10 | }else if(results$doublet.method == "DoubletFinder"){
11 |     cat("We estimate doublet score by using the algorithm in R package [DoubletFinder](https://github.com/chris-mcginnis-ucsf/DoubletFinder).\n")
12 | }
13 | ```
14 | 
15 | The estimated doublet scores can be found in the column `doublet.score` of the table file 
16 | [cellAnnotation.txt](./cellAnnotation.txt).
17 | 
18 | Here is the scatter plot colored by the number of UMIs(left) and the estimated doublet scores(right).
19 | 
20 | ```{r doubletPlot, echo=F, message=F, warning=F, dpi=500, fig.width=10, fig.height=4, fig.align="center"}
21 | plot_grid(results$nUMI.plot, results$doublet.plot, ncol = 2)
22 | ```
23 | <p align="right">(Hi-res image: <a href="./figures/nUMI-point.png">left</a>, <a href="./figures/doublet-point.png">right</a>)</p>
24 | 
25 | 
26 | ```{r echo=F}
27 | h.ii <- h.ii + 1
28 | ```
29 | 


--------------------------------------------------------------------------------
/inst/rmd/exprProgram.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ### `r h.i`.`r h.ii` Expression programs identification
 4 | In order to unsupervised identify potential expression program signatures,
 5 | we apply non-negative matrix factorization (NMF) to the centralized and non-negative changed expression matrix.
 6 | The input for number of identified programs (the rank in NMF) is ` `r results$nmf.rank` `.
 7 | 
 8 | Following is the heatmap for identified cells' expression programs.
 9 | 
10 | ```{r exprProgramplot, echo=F, message=F, warning=F, dpi=500, fig.width=10, fig.height=results$exprProgPlot.height}
11 | results$exprProgram.plot
12 | ```
13 | <p align="right">(Hi-res image: <a href="./figures/exprProgram-heatmap.png">view</a>)</p>
14 | 
15 | 
16 | After this step, `scCancer` saved following results files to the folder '[expr.programs](./expr.programs/)':
17 |  * The left matrix `W` (genes * programs): [W-gene-program.txt](./expr.programs/W-gene-program.txt).
18 |  * The right matrix `H` (programs * cells): [H-program-cell.txt](./expr.programs/H-program-cell.txt).
19 |  * The relative genes of each programs: [program.gene.value.txt](./expr.programs/program.gene.value.txt).
20 |  
21 |  
22 | ```{r echo=F}
23 | h.ii <- h.ii + 1
24 | ```
25 | 


--------------------------------------------------------------------------------
/inst/rmd/filterCell.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | * **Cell QC.** According to the thresholds in [cell.QC.thres.txt](./cell.QC.thres.txt) (as shown below), 
 4 | we get ` `r dim(results$cell.annotation)[1]` ` cells.
 5 | 
 6 | <center>
 7 | ```{r thresTable, echo=F, warning=F}
 8 | # results$filter.thres %>% knitr::kable("html")
 9 | kable(results$filter.thres)
10 | ```
11 | </center>
12 | 
13 | 


--------------------------------------------------------------------------------
/inst/rmd/filterGene.Rmd:
--------------------------------------------------------------------------------
1 | 
2 | * **Gene QC.** After filtering the mitochondrial, ribosome, dissociation genes and genes expressed in too less (`nCell < `r results$nCell.min` `) or too much (`background percent >= `r results$bgPercent.max` `) cells, we get ` `r dim(results$gene.manifest)[1]` ` genes ([the filtered genes list file](./gene.manifest.filter.txt)).
3 | 


--------------------------------------------------------------------------------
/inst/rmd/geneSets.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ### `r h.i`.`r h.ii` Gene set signature scores calculation 
 4 | In order to analyze cells' expression in gene sets (signatures) level, we calculate the signature scores for each cell.
 5 | 
 6 | ```{r echo=FALSE, results='asis'}
 7 | if(is.null(results$geneSets)){
 8 |     cat("* Gene sets: the default 50 hallmark gene sets from [MSigDB](http://software.broadinstitute.org/gsea/msigdb/).\n")
 9 | }else{
10 |     cat("* Gene sets: the input gene sets list.\n")
11 | }
12 | if(results$geneSet.method == "average"){
13 |     cat("* Method: relative average expression levels.")
14 | }else if(results$geneSet.method == "GSVA"){
15 |     cat("* Method: [GSVA](https://www.bioconductor.org/packages/release/bioc/html/GSVA.html).")
16 | }
17 | ```
18 | 
19 | The calculated gene set signature scores can be found in the column `GS__*` of the table file 
20 | [cellAnnotation.txt](./cellAnnotation.txt). 
21 | 
22 | Following is the heatmap for these signatures scores.
23 | 
24 | ```{r geneSetPlot, echo=F, message=F, warning=F, dpi=500, fig.width=10, fig.height=results$geneSetPlot.height}
25 | results$geneSet.plot
26 | ```
27 | <p align="right">(Hi-res image: <a href="./figures/geneSet-heatmap.png">view</a>)</p>
28 | 
29 | ```{r echo=F}
30 | h.ii <- h.ii + 1
31 | ```
32 | 


--------------------------------------------------------------------------------
/inst/rmd/main-scAnno.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "scCancer"
  3 | author: "G-Lab"
  4 | date: "2019/6/11"
  5 | output: html_document
  6 | ---
  7 | 
  8 | <style type="text/css">
  9 |     body{
 10 |         font-size: 15px;
 11 |         line-height: 22px;
 12 |     }
 13 |     h1.title {
 14 |         font-size: 38px;
 15 |     }
 16 |     h1 {
 17 |         font-size: 28px;
 18 |         margin-top: 23px;
 19 |     }
 20 |     h2 {
 21 |         font-size: 24px;
 22 |         margin-top: 25px;
 23 |     }
 24 |     h3 {
 25 |       font-size: 20px;
 26 |         margin-top: 25px;
 27 |     }
 28 |     code.r{
 29 |         font-size: 13px;
 30 |     }
 31 |     pre {
 32 |         font-size: 14px;
 33 |     }
 34 |     p {
 35 |         margin-top:10px;
 36 |         margin-bottom:10px;
 37 |     }
 38 |     table { 
 39 |         width: 60%;
 40 |         border-collapse: collapse;
 41 |         font-family: Futura, Arial, sans-serif;
 42 |     }
 43 |     th,td {
 44 |         padding: 5px;
 45 |     }
 46 |     th,td {
 47 |         border-bottom: 1px solid #ddd;
 48 |         border-top: 1px solid #ddd;
 49 |         padding-right: 20px
 50 |     }
 51 | </style>
 52 | 
 53 | 
 54 | ```{r setting, include=FALSE}
 55 | options(knitr.table.format = "html") 
 56 | options(scipen=10)
 57 | knitr::opts_chunk$set(echo = TRUE, fig.path = file.path(results$savePath, 'report-figures//'))
 58 | 
 59 | title <- "scCancer"
 60 | if(!is.null(results$sampleName)){
 61 |   title <- paste0(results$sampleName, "  -  ", title)
 62 | }
 63 | 
 64 | if(!is.null(results$authorName)){
 65 |   userName <- results$authorName
 66 | }else{
 67 |   userName <- Sys.getenv("USERNAME")
 68 | }
 69 | reportMark <- Sys.time()
 70 | if(userName != ""){
 71 |   reportMark <- paste0(userName, " , ", reportMark)
 72 | }
 73 | 
 74 | h.i <- 1
 75 | h.ii <- 1
 76 | ```
 77 | 
 78 | 
 79 | # `r title`
 80 | --------------------------------
 81 | <p align="right">`r reportMark`</p>
 82 | 
 83 | 
 84 | 
 85 | 
 86 | ## `r h.i` Read data
 87 | Read the expression data and filter cells and genes according to quality control steps.
 88 | 
 89 | ```{r contamination, child=system.file("rmd", "contamination.Rmd", package = "scCancer"), eval = results$bool.rmContamination}
 90 | ```
 91 | 
 92 | ```{r filterCell, child=system.file("rmd", "filterCell.Rmd", package = "scCancer"), eval = results$bool.filter.cell}
 93 | ```
 94 | 
 95 | ```{r filterGene, child=system.file("rmd", "filterGene.Rmd", package = "scCancer"), eval = results$bool.filter.gene}
 96 | ```
 97 | 
 98 | ```{r echo=F}
 99 | h.i <- h.i + 1
100 | ```
101 | 
102 | 
103 | 
104 | 
105 | ## `r h.i` Data preprocessing
106 | 
107 | After the quality control, we perform following preprocessing steps based on some functions of the R package [`Seurat V3`](https://satijalab.org/seurat/).
108 | 
109 | * **Normalization.** Normalize the raw counts data to TPMs (tyranscripts-per-million) and log-transforms them.
110 | * **Scale data.** Remove unwanted sources of variations (` `r results$vars.to.regress` `) by regression and center the resulting residuals.
111 | * **Highly variable genes.** Calcuate the average expression and dispersion of each gene across all cells to select highly variable genes(HVGs).
112 | 
113 | ```{r hvgPlot, echo=F, message=F, warning=F, dpi=500, fig.width=8, fig.height=4, fig.align="center", out.width='70%'}
114 | results$seurat.plots$p.hvg
115 | ```
116 | <p align="right">(Hi-res image: <a href="./figures/hvg.png">view</a>)</p>
117 | * **PCA.** Perform principal component analysis (PCA) and select PCs to perform clustering and visualization.
118 | * **Visualiztion.** Using t-SNE or UMAP to persent each single cell in two-dimensional space.
119 | 
120 | 
121 | 
122 | ```{r echo=F}
123 | h.i <- h.i + 1
124 | ```
125 | 
126 | 
127 | 
128 | 
129 | 
130 | ## `r h.i` Cells annotation
131 | 
132 | 
133 | 
134 | ### `r h.i`.`r h.ii` Markers expression profile
135 | Here are the scatter plots colored by the normalized expression of some cell type markers. 
136 | 
137 | <center>
138 | 
139 | ```{r echo=FALSE, results='asis'}
140 | if(results$bool.add.features){
141 |   if(results$species == "human"){
142 |     cat("| Cell Type       | Markers                  |\n", sep="")
143 |     cat("| :-------------- | :----------------------- |\n", sep="")
144 |     cat("| T cells (CD4+)  | PTPRC, CD3D, CD4         |\n", sep="")
145 |     cat("| T cells (CD8+)  | PTPRC, CD3D, CD8A, CD8B  |\n", sep="")
146 |     cat("| B cells         | PTPRC, CD79A             |\n", sep="")
147 |     cat("| NK cell         | PTPRC, NKG7              |\n", sep="")
148 |     cat("| Myeloid cells   | PTPRC, LYZ               |\n", sep="")
149 |     cat("| Endothelial     | PLVAP                    |\n", sep="")
150 |     cat("| Fibroblast      | ACTA2                    |\n", sep="")
151 |     cat("| Epithelial      | EPCAM, KRT8              |\n", sep="")
152 |   }else{
153 |     cat("| Cell Type       | Markers                  |\n", sep="")
154 |     cat("| :-------------- | :----------------------- |\n", sep="")
155 |     cat("| T cells (CD4+)  | Ptprc, Cd3d, Cd4         |\n", sep="")
156 |     cat("| T cells (CD8+)  | Ptprc, Cd3d, Cd8a, Cd8b  |\n", sep="")
157 |     cat("| B cells         | Ptprc, Cd79a             |\n", sep="")
158 |     cat("| NK cell         | Ptprc, Nkg7              |\n", sep="")
159 |     cat("| Myeloid cells   | Ptprc, Lyz1, Lyz2        |\n", sep="")
160 |     cat("| Endothelial     | Plvap                    |\n", sep="")
161 |     cat("| Fibroblast      | Acta2                    |\n", sep="")
162 |     cat("| Epithelial      | Epcam, Krt8              |\n", sep="")
163 |   }
164 | }
165 | if(!is.null(results$show.features)){
166 |   cat("| Input genes     | ", paste(results$show.features, collapse=", "), " |\n", sep="")
167 | }
168 | ```
169 | 
170 | </center>
171 | 
172 | ```{r markersPlot, eval=!is.null(results$seurat.plots$p.markers.all), echo=F, message=F, warning=F, dpi=500, fig.width=8, fig.height=results$markersPlot.height}
173 | results$seurat.plots$p.markers.all
174 | ```
175 | <p align="right" style="margin-top:1px">(Hi-res image: <a href="./figures/markers-all.png">view</a>, <a href="./figures/singleMarkerPlot/">view single</a>)</p>
176 | 
177 | 
178 | Following are some statistical indicators of these genes.
179 | ```{r, echo=F, message=F, warning=F}
180 | final.genes <- names(results$seurat.plots$ps.markers)
181 | gene.manifest <- read.table(file.path(statPath, 'geneManifest.txt'), header = T, sep = "\t")
182 | final.gene.manifest <- subset(gene.manifest, Symbol %in% final.genes)
183 | if("bg.percent" %in% colnames(final.gene.manifest)){
184 |   show.cols <- c("Symbol", "EnsemblID", "nCell", "bg.percent", "detect.rate", "prop.median")
185 | }else{
186 |   show.cols <- c("Symbol", "EnsemblID", "nCell", "detect.rate", "prop.median")
187 | }
188 | final.gene.manifest <- final.gene.manifest[order(final.gene.manifest$Symbol), show.cols]
189 | rownames(final.gene.manifest) <- final.gene.manifest$Symbol
190 | rm(gene.manifest)
191 | print(format(final.gene.manifest, digits = 3, scientific = T))
192 | ```
193 | 
194 | ```{r echo=F}
195 | h.ii <- h.ii + 1
196 | ```
197 | 
198 | 
199 | 
200 | 
201 | 
202 | ### `r h.i`.`r h.ii` Clustering
203 | In order to identify clusters of all single cells, we perform a graph-based clustering by running [`Seurat`](https://satijalab.org/seurat/) functions. 
204 | The cluster information can be found in the column `Cluster` of the table file 
205 | [cellAnnotation.txt](./cellAnnotation.txt). 
206 | 
207 | Here is the t-SNE plot colored by cell clusters. 
208 | 
209 | ```{r clusterPlotTsne, echo=F, message=F, warning=F, dpi=500, fig.width=5, fig.height=4, fig.align="center", out.width='80%'}
210 | results$seurat.plots$p.cluster.tsne
211 | ```
212 | <p align="right">(Hi-res image: <a href="./figures/cluster-point-tsne.png">view</a>)</p>
213 | 
214 | 
215 | ```{r umap, child=system.file("rmd", "umap.Rmd", package = "scCancer"), eval = !is.null(results$seurat.plots$p.cluster.umap)}
216 | ```
217 | 
218 | 
219 | ```{r echo=F}
220 | h.ii <- h.ii + 1
221 | ```
222 | 
223 | 
224 | 
225 | ```{r doublet, child=system.file("rmd", "doublet.Rmd", package = "scCancer"), eval = results$bool.runDoublet}
226 | ```
227 | 
228 | 
229 | 
230 | ```{r diffExpr, child=system.file("rmd", "diffExpr.Rmd", package = "scCancer"), eval = results$bool.runDiffExpr}
231 | ```
232 | 
233 | 
234 | 
235 | 
236 | ```{r cellType, child=system.file("rmd", "cellTypePred.Rmd", package = "scCancer"), eval = results$bool.runCellClassify}
237 | ```
238 | 
239 | 
240 | 
241 | 
242 | ```{r malignancy, child=system.file("rmd", "malignancy.Rmd", package = "scCancer"), eval = results$bool.runMalignancy}
243 | ```
244 | 
245 | 
246 | --------------------------------
247 | ```{r echo=FALSE, results='asis', eval = results$bool.intraTumor}
248 | # if(is.null(results$tumor.clusters)){
249 | #     cat("#### According to the results of cell type prediction and cell malignancy estimation, ",
250 | #     "we couldn't identify tumor clusters, ", 
251 | #     "so we use all clusters to perform following heterogeneity analyses.\n", sep = "")
252 | # }else{
253 | #     # cat("#### According to the results of cell type prediction and cell malignancy estimation, we identify the clusters `",
254 | #     #     str_c(results$tumor.clusters, collapse = ", "),
255 | #     #     "` as tumor clusters, and following intra-tumor heterogeneity analyses mainly focus on them.\n", sep = "")
256 | #     cat("#### According to the results of cell type prediction and cell malignancy estimation, we identify the tumor clusters, ",
257 | #         "and following intra-tumor heterogeneity analyses mainly focus on them.\n", sep = "")
258 | # }
259 | cat("#### In order to analyze <span style='color:red'>intra-tumor heterogeneity</span>, we select tumor clusters firstly based on the results of cell type prediction and cell malignancy estimation.\n")
260 | if(is.null(results$tumor.clusters)){
261 |     cat("#### <span style='color:red;font-size:19px'>Warning:</span> Here, we couldn't identify the tumor clusters, so we use <span style='color:red'>all clusters</span> to perform following analyses.\n")
262 | }else{
263 |     cat("#### Here, we identify <span style='color:red'>cluster `", str_c(results$tumor.clusters, collapse = ", "),
264 |         "`</span> as tumor cells. And following analyses mainly focus on them.\n", sep = "")
265 | }
266 | ```
267 | 
268 | 
269 | 
270 | 
271 | ```{r cellCycle, child=system.file("rmd", "cellCycle.Rmd", package = "scCancer"), eval = results$bool.runCellCycle}
272 | ```
273 | 
274 | 
275 | 
276 | 
277 | ```{r stemness, child=system.file("rmd", "stemness.Rmd", package = "scCancer"), eval = results$bool.runStemness}
278 | ```
279 | 
280 | 
281 | 
282 | 
283 | ```{r geneSets, child=system.file("rmd", "geneSets.Rmd", package = "scCancer"), eval = results$bool.runGeneSets}
284 | ```
285 | 
286 | 
287 | 
288 | 
289 | ```{r exprProgram, child=system.file("rmd", "exprProgram.Rmd", package = "scCancer"), eval = results$bool.runExprProgram}
290 | ```
291 | 
292 | 
293 | 
294 | 
295 | ```{r cellInteraction, child=system.file("rmd", "cellInteraction.Rmd", package = "scCancer"), eval = results$bool.runInteraction}
296 | ```
297 | 
298 | 
299 | 
300 | 
301 | ```{r echo=F}
302 | h.i <- h.i + 1
303 | ```
304 | 
305 | 
306 | ## `r h.i` Output
307 | 
308 | ```{r echo=F}
309 | r.i <- 7
310 | ```
311 | 
312 | Running this script generates following files:
313 | 
314 | 1. **Html report** :
315 | [report-scAnno.html](./report-scAnno.html).
316 | 2. **Markdown report** :
317 | [report-scAnno.md](./report-scAnno.md).
318 | 3. **Figure files** :
319 | [figures/](./figures/).
320 | 4. **Figures used in the report** :
321 | [report-figures/](./report-figures/).
322 | 5. **Seurat object** :
323 | [expr.RDS](./).
324 | 6. **Annotation of cells** :
325 | [cellAnnotation.txt](./cellAnnotation.txt).
326 | ```{r echo=FALSE, results='asis', eval=results$bool.runDiffExpr}
327 | cat(r.i, ". **Differentially expressed genes' information for all clusters** : ", sep = "")
328 | cat("[diff.expr.genes/](./diff.expr.genes/).\n", sep = "")
329 | r.i <- r.i + 1
330 | ```
331 | ```{r echo=FALSE, results='asis', eval=results$bool.runMalignancy}
332 | cat(r.i, ". **Results of malignancy estimation** : [malignancy/](./malignancy/).\n", sep = "")
333 | r.i <- r.i + 1
334 | ```
335 | ```{r echo=FALSE, results='asis', eval=results$bool.runExprProgram}
336 | cat(r.i, ". **Results of expression programs identification** : [expr.programs/](./expr.programs/).\n", sep = "")
337 | r.i <- r.i + 1
338 | ```
339 | ```{r echo=FALSE, results='asis', eval=results$bool.runInteraction}
340 | cat(r.i, ". **Cell clusters interactions scores** : [InteractionScore.txt](./InteractionScore.txt).\n", sep = "")
341 | r.i <- r.i + 1
342 | ```
343 | 
344 | 
345 | 
346 | <br>
347 | 
348 | --------------------------------------
349 | &copy; [G-Lab](http://lifeome.net/glab/jgu/),   [Tsinghua University](http://www.tsinghua.edu.cn)
350 | 
351 | 


--------------------------------------------------------------------------------
/inst/rmd/main-scAnnoComb.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "scCancer"
  3 | author: "G-Lab"
  4 | date: "2019/6/11"
  5 | output: html_document
  6 | ---
  7 | 
  8 | <style type="text/css">
  9 |     body{
 10 |         font-size: 15px;
 11 |         line-height: 22px;
 12 |     }
 13 |     h1.title {
 14 |         font-size: 38px;
 15 |     }
 16 |     h1 {
 17 |         font-size: 28px;
 18 |         margin-top: 23px;
 19 |     }
 20 |     h2 {
 21 |         font-size: 24px;
 22 |         margin-top: 25px;
 23 |     }
 24 |     h3 {
 25 |       font-size: 20px;
 26 |         margin-top: 25px;
 27 |     }
 28 |     code.r{
 29 |         font-size: 13px;
 30 |     }
 31 |     pre {
 32 |         font-size: 14px;
 33 |     }
 34 |     p {
 35 |         margin-top:10px;
 36 |         margin-bottom:10px;
 37 |     }
 38 |     table { 
 39 |         width: 60%;
 40 |         border-collapse: collapse;
 41 |         font-family: Futura, Arial, sans-serif;
 42 |     }
 43 |     th,td {
 44 |         padding: 5px;
 45 |     }
 46 |     th,td {
 47 |         border-bottom: 1px solid #ddd;
 48 |         border-top: 1px solid #ddd;
 49 |         padding-right: 20px
 50 |     }
 51 | </style>
 52 | 
 53 | 
 54 | ```{r setting, include=FALSE}
 55 | options(knitr.table.format = "html") 
 56 | options(scipen=10)
 57 | knitr::opts_chunk$set(echo = TRUE, fig.path = file.path(results$savePath, 'report-figures//'))
 58 | 
 59 | title <- "scCancer"
 60 | if(!is.null(results$combName)){
 61 |   title <- paste0(results$combName, "  -  ", title)
 62 | }
 63 | 
 64 | if(!is.null(results$authorName)){
 65 |   userName <- results$authorName
 66 | }else{
 67 |   userName <- Sys.getenv("USERNAME")
 68 | }
 69 | reportMark <- Sys.time()
 70 | if(userName != ""){
 71 |   reportMark <- paste0(userName, " , ", reportMark)
 72 | }
 73 | 
 74 | h.i <- 1
 75 | h.ii <- 1
 76 | ```
 77 | 
 78 | 
 79 | # `r title`
 80 | --------------------------------
 81 | <p align="right">`r reportMark`</p>
 82 | 
 83 | 
 84 | 
 85 | ## `r h.i` Read data
 86 | 
 87 | The input samples are:
 88 | 
 89 | <center>
 90 | 
 91 | ```{r echo=FALSE, results='asis'}
 92 | cat("| Sample name     | #cells after QC          |\n", sep="")
 93 | cat("| :-------------- | :----------------------- |\n", sep="")
 94 | for(s in results$sampleNames){
 95 |   cat("| ", s, " | ", sum(results$cell.annotation$sample == s), " |\n", sep = "")
 96 | }
 97 | ```
 98 | 
 99 | </center>
100 | 
101 | 
102 | ```{r echo=F}
103 | h.i <- h.i + 1
104 | ```
105 | 
106 | 
107 | 
108 | 
109 | 
110 | ## `r h.i` Data preprocessing
111 | 
112 | After the quality control, we perform following preprocessing steps based on some functions of the R package [`Seurat V3`](https://satijalab.org/seurat/).
113 | 
114 | * **Normalization.** Normalize the raw counts data to TPMs (tyranscripts-per-million) and log-transforms them.
115 | * **Scale data.** Remove unwanted sources of variations (` `r results$vars.to.regress` `) by regression and center the resulting residuals.
116 | * **Highly variable genes.** Calcuate the average expression and dispersion of each gene across all cells to select highly variable genes(HVGs).
117 | 
118 | ```{r hvgPlot, echo=F, message=F, warning=F, eval=results$bool.plotHVG, dpi=500, fig.width=8, fig.height=4, fig.align="center", out.width='70%'}
119 | results$seurat.plots$p.hvg
120 | ```
121 | ```{r eval = results$bool.plotHVG, echo = F, results='asis'}
122 | cat("<p align=\"right\">(Hi-res image: <a href=\"./figures/hvg.png\">view</a>)</p>\n")
123 | ```
124 | * **PCA.** Perform principal component analysis (PCA) and select PCs to perform clustering and visualization.
125 | * **Visualiztion.** Using t-SNE or UMAP to persent each single cell in two-dimensional space.
126 | 
127 | 
128 | 
129 | ```{r echo=F}
130 | h.i <- h.i + 1
131 | ```
132 | 
133 | 
134 | 
135 | 
136 | 
137 | ## `r h.i` Cells annotation
138 | 
139 | 
140 | 
141 | ### `r h.i`.`r h.ii` Markers expression profile
142 | Here are the scatter plots colored by the normalized expression of some cell type markers. 
143 | 
144 | <center>
145 | 
146 | ```{r echo=FALSE, results='asis'}
147 | if(results$bool.add.features){
148 |   if(results$species == "human"){
149 |     cat("| Cell Type       | Markers                  |\n", sep="")
150 |     cat("| :-------------- | :----------------------- |\n", sep="")
151 |     cat("| T cells (CD4+)  | PTPRC, CD3D, CD4         |\n", sep="")
152 |     cat("| T cells (CD8+)  | PTPRC, CD3D, CD8A, CD8B  |\n", sep="")
153 |     cat("| B cells         | PTPRC, CD79A             |\n", sep="")
154 |     cat("| NK cell         | PTPRC, NKG7              |\n", sep="")
155 |     cat("| Myeloid cells   | PTPRC, LYZ               |\n", sep="")
156 |     cat("| Endothelial     | PLVAP                    |\n", sep="")
157 |     cat("| Fibroblast      | ACTA2                    |\n", sep="")
158 |     cat("| Epithelial      | EPCAM, KRT8              |\n", sep="")
159 |   }else{
160 |     cat("| Cell Type       | Markers                  |\n", sep="")
161 |     cat("| :-------------- | :----------------------- |\n", sep="")
162 |     cat("| T cells (CD4+)  | Ptprc, Cd3d, Cd4         |\n", sep="")
163 |     cat("| T cells (CD8+)  | Ptprc, Cd3d, Cd8a, Cd8b  |\n", sep="")
164 |     cat("| B cells         | Ptprc, Cd79a             |\n", sep="")
165 |     cat("| NK cell         | Ptprc, Nkg7              |\n", sep="")
166 |     cat("| Myeloid cells   | Ptprc, Lyz1, Lyz2        |\n", sep="")
167 |     cat("| Endothelial     | Plvap                    |\n", sep="")
168 |     cat("| Fibroblast      | Acta2                    |\n", sep="")
169 |     cat("| Epithelial      | Epcam, Krt8              |\n", sep="")
170 |   }
171 | }
172 | if(!is.null(results$show.features)){
173 |   cat("| Input genes     | ", paste(results$show.features, collapse=", "), " |\n", sep="")
174 | }
175 | ```
176 | 
177 | </center>
178 | 
179 | ```{r markersPlot, eval=!is.null(results$seurat.plots$p.markers.all), echo=F, message=F, warning=F, dpi=500, fig.width=8, fig.height=results$markersPlot.height}
180 | results$seurat.plots$p.markers.all
181 | ```
182 | <p align="right" style="margin-top:1px">(Hi-res image: <a href="./figures/markers-all.png">view</a>, <a href="./figures/singleMarkerPlot/">view single</a>)</p>
183 | 
184 | 
185 | ```{r echo=F}
186 | h.ii <- h.ii + 1
187 | ```
188 | 
189 | 
190 | 
191 | 
192 | ### `r h.i`.`r h.ii` Clustering
193 | In order to identify clusters of all single cells, we perform a graph-based clustering by running [`Seurat`](https://satijalab.org/seurat/) functions. 
194 | The cluster information can be found in the column `Cluster` of the table file 
195 | [cellAnnotation.txt](./cellAnnotation.txt). 
196 | 
197 | Here is the t-SNE plot colored by cell clusters. 
198 | 
199 | ```{r clusterPlotTsne, echo=F, message=F, warning=F, dpi=500, fig.width=5, fig.height=4, fig.align="center", out.width='80%'}
200 | results$seurat.plots$p.cluster.tsne
201 | ```
202 | <p align="right">(Hi-res image: <a href="./figures/cluster-point.png">view</a>)</p>
203 | 
204 | 
205 | ```{r umap, child=system.file("rmd", "umap.Rmd", package = "scCancer"), eval = !is.null(results$seurat.plots$p.cluster.umap)}
206 | ```
207 | 
208 | 
209 | ```{r echo=F}
210 | h.ii <- h.ii + 1
211 | ```
212 | 
213 | 
214 | 
215 | 
216 | ### `r h.i`.`r h.ii` Sample source
217 | The sample source information can be found in the column `sample` of the table file 
218 | [cellAnnotation.txt](./cellAnnotation.txt). The method of batch effect correction is ` `r results$comb.method` `.
219 | 
220 | Here is the the scatter plot colored by cell clusters. 
221 | 
222 | ```{r samplePlot, echo=F, message=F, warning=F, dpi=500, fig.width=7, fig.height=5, fig.align="center", out.width='80%'}
223 | results$p.sample
224 | ```
225 | <p align="right">(Hi-res image: <a href="./figures/sampleSource-point.png">view</a>)</p>
226 | 
227 | Here is a bar plot showing the relationship between cell cluster and sample source.
228 | 
229 | ```{r sampleBarPlot, echo=F, message=F, warning=F, dpi=300, fig.width=6, fig.height=3, fig.align="center", out.width='80%'}
230 | results$p.bar.sample
231 | ```
232 | <p align="right">(Hi-res image: <a href="./figures/sampleSource-bar.png">view</a>)</p>
233 | 
234 | ```{r echo=F}
235 | h.ii <- h.ii + 1
236 | ```
237 | 
238 | 
239 | 
240 | 
241 | ```{r diffExpr, child=system.file("rmd", "diffExpr.Rmd", package = "scCancer"), eval = results$bool.runDiffExpr}
242 | ```
243 | 
244 | 
245 | 
246 | 
247 | ```{r cellType, child=system.file("rmd", "cellTypePred.Rmd", package = "scCancer"), eval = results$bool.runCellClassify}
248 | ```
249 | 
250 | 
251 | 
252 | 
253 | ```{r malignancy, child=system.file("rmd", "malignancy.Rmd", package = "scCancer"), eval = (results$bool.runMalignancy & ("cnvList" %in% names(results)))}
254 | ```
255 | 
256 | ```{r malignancyPlot, child=system.file("rmd", "malign-comb.Rmd", package = "scCancer"), eval = (results$bool.runMalignancy & !("cnvList" %in% names(results)))}
257 | ```
258 | 
259 | 
260 | 
261 | 
262 | --------------------------------
263 | ```{r echo=FALSE, results='asis', eval = results$bool.intraTumor}
264 | # if(is.null(results$tumor.clusters)){
265 | #     cat("#### According to the results of cell type prediction and cell malignancy estimation, ",
266 | #     "we couldn't identify tumor clusters, ", 
267 | #     "so we use all clusters to perform following heterogeneity analyses.\n", sep = "")
268 | # }else{
269 | #     # cat("#### According to the results of cell type prediction and cell malignancy estimation, we identify the clusters `",
270 | #     #     str_c(results$tumor.clusters, collapse = ", "),
271 | #     #     "` as tumor clusters, and following intra-tumor heterogeneity analyses mainly focus on them.\n", sep = "")
272 | #     cat("#### According to the results of cell type prediction and cell malignancy estimation, we identify the tumor clusters, ",
273 | #         "and following intra-tumor heterogeneity analyses mainly focus on them.\n", sep = "")
274 | # }
275 | cat("#### In order to analyze <span style='color:red'>intra-tumor heterogeneity</span>, we select tumor clusters firstly based on the results of cell type prediction.\n")
276 | if(is.null(results$tumor.clusters)){
277 |     cat("#### <span style='color:red;font-size:19px'>Warning:</span> Here, we couldn't identify the tumor clusters, so we use <span style='color:red'>all clusters</span> to perform following analyses.\n")
278 | }else{
279 |     cat("#### Here, we identify <span style='color:red'>cluster `", str_c(results$tumor.clusters, collapse = ", "),
280 |         "`</span> as tumor cells. And following analyses mainly focus on them.\n", sep = "")
281 | }
282 | ```
283 | 
284 | 
285 | 
286 | 
287 | ```{r cellCycle, child=system.file("rmd", "cellCycle.Rmd", package = "scCancer"), eval = results$bool.runCellCycle}
288 | ```
289 | 
290 | 
291 | 
292 | 
293 | ```{r stemness, child=system.file("rmd", "stemness.Rmd", package = "scCancer"), eval = results$bool.runStemness}
294 | ```
295 | 
296 | 
297 | 
298 | 
299 | ```{r geneSets, child=system.file("rmd", "geneSets.Rmd", package = "scCancer"), eval = results$bool.runGeneSets}
300 | ```
301 | 
302 | 
303 | 
304 | 
305 | ```{r exprProgram, child=system.file("rmd", "exprProgram.Rmd", package = "scCancer"), eval = results$bool.runExprProgram}
306 | ```
307 | 
308 | 
309 | 
310 | 
311 | ```{r echo=F}
312 | h.i <- h.i + 1
313 | ```
314 | 
315 | 
316 | ## `r h.i` Output
317 | 
318 | ```{r echo=F}
319 | r.i <- 7
320 | ```
321 | 
322 | Running this script generates following files:
323 | 
324 | 1. **Html report** :
325 | [report-scAnnoComb.html](./report-scAnnoComb.html).
326 | 2. **Markdown report** :
327 | [report-scAnnoComb.md](./report-scAnnoComb.md).
328 | 3. **Figure files** :
329 | [figures/](./figures/).
330 | 4. **Figures used in the report** :
331 | [report-figures/](./report-figures/).
332 | 5. **Seurat object** :
333 | [expr.RDS](./).
334 | 6. **Annotation of cells** :
335 | [cellAnnotation.txt](./cellAnnotation.txt).
336 | ```{r echo=FALSE, results='asis', eval=(results$comb.method=="SeuratMNN" | results$comb.method=="NormalMNN")}
337 | cat(r.i, ". **Anchors for batch correction** : ", sep = "")
338 | cat("[anchors.RDS](./anchors.RDS).\n", sep = "")
339 | r.i <- r.i + 1
340 | ```
341 | ```{r echo=FALSE, results='asis', eval=results$bool.runDiffExpr}
342 | cat(r.i, ". **Differentially expressed genes' information for all clusters** : ", sep = "")
343 | cat("[diff.expr.genes/](./diff.expr.genes/).\n", sep = "")
344 | r.i <- r.i + 1
345 | ```
346 | ```{r echo=FALSE, results='asis', eval=results$bool.runExprProgram}
347 | cat(r.i, ". **Results of expression programs identification** : [expr.programs/](./expr.programs/).\n", sep = "")
348 | r.i <- r.i + 1
349 | ```
350 | 
351 | 
352 | 
353 | <br>
354 | 
355 | --------------------------------------
356 | &copy; [G-Lab](http://lifeome.net/glab/jgu/),   [Tsinghua University](http://www.tsinghua.edu.cn)
357 | 
358 | 


--------------------------------------------------------------------------------
/inst/rmd/main-scStat.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "scCancer"
  3 | author: "wguo"
  4 | date: "2019/6/13"
  5 | output: html_document
  6 | ---
  7 | 
  8 | <style type="text/css">
  9 |     body{
 10 |         font-size: 15px;
 11 |         line-height: 22px;
 12 |     }
 13 |     h1.title {
 14 |         font-size: 38px;
 15 |     }
 16 |     h1 {
 17 |         font-size: 28px;
 18 |         margin-top: 23px;
 19 |     }
 20 |     h2 {
 21 |         font-size: 24px;
 22 |         margin-top: 25px;
 23 |     }
 24 |     h3 {
 25 |       font-size: 20px;
 26 |         margin-top: 25px;
 27 |     }
 28 |     code.r{
 29 |         font-size: 13px;
 30 |     }
 31 |     pre {
 32 |         font-size: 14px;
 33 |     }
 34 |     p {
 35 |         margin-top:10px;
 36 |         margin-bottom:10px;
 37 |     }
 38 |     table { 
 39 |         width: 60%;
 40 |         border-collapse: collapse;
 41 |         font-family: Futura, Arial, sans-serif;
 42 |     }
 43 |     th,td {
 44 |         padding: 5px;
 45 |     }
 46 |     th,td {
 47 |         border-bottom: 1px solid #ddd;
 48 |         border-top: 1px solid #ddd;
 49 |         padding-right: 20px
 50 |     }
 51 | </style>
 52 | 
 53 | 
 54 | 
 55 | ```{r setting, include=FALSE}
 56 | options(knitr.table.format = "html") 
 57 | options(scipen=10)
 58 | knitr::opts_chunk$set(echo = TRUE, fig.path = file.path(results$savePath, 'report-figures//'))
 59 | 
 60 | title <- "scCancer"
 61 | if(!is.null(results$sampleName)){
 62 |   title <- paste0(results$sampleName, "  -  ", title)
 63 | }
 64 | 
 65 | if(!is.null(results$authorName)){
 66 |   userName <- results$authorName
 67 | }else{
 68 |   userName <- Sys.getenv("USERNAME")
 69 | }
 70 | reportMark <- Sys.time()
 71 | if(userName != ""){
 72 |   reportMark <- paste0(userName, " , ", reportMark)
 73 | }
 74 | 
 75 | h.i <- 1
 76 | h.ii <- 1
 77 | ```
 78 | 
 79 | 
 80 | # `r title`
 81 | --------------------------------
 82 | <p align="right">`r reportMark`</p>
 83 | 
 84 | 
 85 | ## `r h.i` Cell statistics
 86 | 
 87 | * The input of `scCancer` pipeline is the matrix generated by [` `r results$cr.version`  `](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger).
 88 | 
 89 | ```{r echo=F, results='asis'}
 90 | if(file.exists(file.path(results$dataPath, "web_summary.html"))){
 91 |   file.copy(file.path(results$dataPath, "web_summary.html"), file.path(results$savePath, "report-cellRanger.html"), overwrite = T)
 92 |   cat("* Here is the [summary report](./report-cellRanger.html) from `Cell Ranger`.", sep = "")
 93 | }
 94 | ```
 95 | 
 96 | 
 97 | ### `r h.i`.`r h.ii` Cell calling
 98 |  
 99 | ```{r CR-Calling-T, child=system.file("rmd", "cellCalling.Rmd", package = "scCancer"), eval = results$raw.data}
100 | ```
101 | 
102 | ```{r CR-Calling-F, echo=FALSE, results='asis', eval=!results$raw.data}
103 | cat("* Raw data (containing all barcodes) cannot be found, and only filtered data are supplied. So cell-calling doesn't be performed and the analyses for background distribution is omitted.\n")
104 | cat("* For the filtered data, ", results$nList[2], " cells are identified (min.nUMI = `", results$min.nUMI, "`).\n", sep="")
105 | ```
106 | 
107 | ```{r echo=F}
108 | h.ii <- h.ii + 1
109 | # print(results$raw.data)
110 | ```
111 | 
112 | 
113 | 
114 | 
115 | ### `r h.i`.`r h.ii` The number of UMIs and detected genes in cells
116 | 
117 | After the cell calling by ` `r results$cr.version` `, we further perform quality control to 
118 | filter droplets with low quality cells according to `nUMI` (total number of UMIs) and `nGene` (total number of detected genes).
119 | 
120 | For `nUMI` :
121 | * Suggested threshold to filter cells with extremely large `nUMI` : <span style="color:red">` `r results$cell.threshold$nUMI` `</span>.
122 |     + Using this threshold, ` `r sum(results$cell.manifest$nUMI >= results$cell.threshold$nUMI)` ` cells will be filtered.
123 | 
124 | For `nGene` :
125 | * Suggested threshold to filter cells with extremely large `nGene` : <span style="color:red">` `r results$cell.threshold$nGene` `</span>.
126 |     + Using this threshold, ` `r sum(results$cell.manifest$nGene >= results$cell.threshold$nGene)` ` cells will be filtered.
127 | * Suggested threshold to filter cells with extremely small `nGene` : <span style="color:red">`200`</span>.
128 |     + Using this threshold, ` `r sum(results$cell.manifest$nGene < 200)` ` cells will be filtered.
129 | 
130 | **Comment**: The suggested thresholds (except the lower bound of `nGene`, which is set by convention) are determined based on the their distributions. Using them, the outliers identified will be filtered. The same below.
131 | 
132 | 
133 | ```{r filter, echo=F, message=F, warning=F, dpi=300, fig.height=2.5, fig.width=8}
134 | plot_grid(results$p.nUMI, results$p.nGene, ncol = 2)
135 | ```
136 | <p align='right' style='margin-top:3px'>(Hi-res image: <a href='./figures/nUMI-distr.png'>left</a>, <a href='./figures/nGene-distr.png'>right</a>)</p>
137 | 
138 | 
139 | ```{r echo=F}
140 | h.i <- h.i + 1
141 | h.ii <- 1
142 | ```
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | ## `r h.i` Gene statistics
151 | The number of genes expressed in at least one cell : ` `r sum(results$gene.manifest$nCell > 0)` `.
152 | 
153 | 
154 | ### `r h.i`.`r h.ii` Mitochondrial genes
155 | Summary of mitochondrial genes percentage (`mito.percent`) in cells:
156 | ```{r mito.summary, echo=F}
157 | format(summary(results$cell.manifest$mito.percent), digits = 3)
158 | ```
159 | * Suggested threshold to filter cells with high mitochondrial genes percentage : <span style="color:red">` `r round(results$cell.threshold$mito.percent, 3)` `</span>.
160 |     + Using this threshold, ` `r sum(results$cell.manifest$mito.percent >= results$cell.threshold$mito.percent)` ` cells will be filtered.
161 | 
162 | ```{r mito, echo=FALSE, message=F, dpi=300, fig.height=4, fig.width=4, fig.align="center", out.width='40%'}
163 | results$p.mito
164 | ```
165 | <p align="right">(Hi-res image: <a href="./figures/mito-distr.png">view</a>)</p>
166 | 
167 | 
168 | ```{r echo=F}
169 | h.ii <- h.ii + 1
170 | ```
171 | 
172 | 
173 | 
174 | ### `r h.i`.`r h.ii` Ribosome genes
175 | Summary of ribosome genes percentage (`ribo.percent`) in cells:
176 | ```{r ribo.summary, echo=F}
177 | format(summary(results$cell.manifest$ribo.percent), digits = 3)
178 | ```
179 | * Suggested threshold to filter cells with high ribosome genes percentage : <span style="color:red">` `r round(results$cell.threshold$ribo.percent, 3)` `</span>.
180 |     + Using this threshold, ` `r sum(results$cell.manifest$ribo.percent >= results$cell.threshold$ribo.percent)` ` cells will be filtered.
181 | 
182 | ```{r ribo, echo=FALSE, message=F, dpi=300, fig.height=4, fig.width=4, fig.align="center", out.width='40%'}
183 | results$p.ribo
184 | ```
185 | <p align="right">(Hi-res image: <a href="./figures/ribo-distr.png">view</a>)</p>
186 | 
187 | ```{r echo=F}
188 | h.ii <- h.ii + 1
189 | ```
190 | 
191 | 
192 | 
193 | ### `r h.i`.`r h.ii` Dissociation associated genes
194 | Summary of dissociation associated genes percentage (`diss.percent`) in cells:
195 | ```{r diss.summary, echo=F}
196 | format(summary(results$cell.manifest$diss.percent), digits = 3)
197 | ```
198 | * Suggested threshold to filter cells with high dissociation genes percentage : <span style="color:red">` `r round(results$cell.threshold$diss.percent, 3)` `</span>.
199 |     + Using this threshold, ` `r sum(results$cell.manifest$diss.percent >= results$cell.threshold$diss.percent)` ` cells will be filtered.
200 | 
201 | ```{r diss, echo=FALSE, message=F, dpi=300, fig.height=4, fig.width=4, fig.align="center", out.width='40%'}
202 | results$p.diss
203 | ```
204 | <p align="right">(Hi-res image: <a href="./figures/diss-distr.png">view</a>)</p>
205 | 
206 | ```{r echo=F}
207 | h.ii <- h.ii + 1
208 | ```
209 | 
210 | 
211 | 
212 | 
213 | ### `r h.i`.`r h.ii` Ambient RNAs 
214 | 
215 | ### `r h.i`.`r h.ii`.1 Highly-expressed genes
216 | In order to analyze the gene expression profiles in detail and identify  
217 | highly-expressed genes in background mRNAs from lysed cells, 
218 | we calculate some metrics as shown below.
219 | ```{r echo=F, results='asis'}
220 | if("bg.percent" %in% colnames(results$gene.manifest)){
221 |   cat("* `bg.percent` : the expression proportion for each gene in background distribution (all droplets with `nUMI <= 10`).\n")
222 | }
223 | ```
224 | * `prop.median`	: the median of expression proportions for a gene in each cell.
225 | * `detect.rate`	: the detected (`#UMI > 0`) rate for a gene in all cells.
226 | 
227 | Here is a plot showing the distributions of gene proportion in cells for the first 100 genes (ordered by their proportion in background `bg.percent`). And the points (genes) are colored according to whether they belongs to mitochondrial, ribosome, or dissociation associated genes.
228 | ```{r echo=F, results='asis'}
229 | if("bg.percent" %in% colnames(results$gene.manifest)){
230 |   cat("The red star signs mark the genes’ proportion in background.\n")
231 | }
232 | ```
233 | 
234 | ```{r genePropPlot, echo=F, message=F, warning=F, dpi=300, fig.width=8, fig.height=8, fig.align="center"}
235 | grid::grid.draw(results$p.geneProp)
236 | ```
237 | <p align="right">(Hi-res image: <a href="./figures/geneProp.png">view</a>)</p>
238 | 
239 | 
240 | ```{r echo=F, results='asis'}
241 | if("bg.percent" %in% colnames(results$gene.manifest)){
242 |   cat("The plot below shows the relationship between `bg.percent` and `prop.median`, `bg.percent` and `detect.rate`.\n")
243 | }
244 | ```
245 | 
246 | ```{r gene.plot, echo=F, message=F, warning=F, dpi=300, fig.height=4, fig.width=8}
247 | if("bg.percent" %in% colnames(results$gene.manifest)){
248 |   plot_grid(results$p.bg.cell, results$p.bg.detect, ncol = 2)
249 | }
250 | ```
251 | 
252 | ```{r echo=F, results='asis'}
253 | if("bg.percent" %in% colnames(results$gene.manifest)){
254 |   cat("<p align=\"right\">(Hi-res image: <a href=\"./figures/bg-cell-scatter.png\">left</a>, <a href=\"./figures/bg-detect-scatter.png\">right</a>)</p>\n")
255 |   # cat("(Hi-res image\n")
256 | }
257 | ```
258 | 
259 | ```{r echo=F}
260 | h.ii <- h.ii + 1
261 | ```
262 | 
263 | ```{r soupx, child=system.file("rmd", "SoupX.Rmd", package = "scCancer"), eval = results$bool.runSoupx}
264 | ```
265 | 
266 | 
267 | 
268 | ```{r echo=F}
269 | h.i <- h.i + 1
270 | h.ii <- 1
271 | ```
272 | 
273 | 
274 | 
275 | 
276 | ## `r h.i` Output
277 | 
278 | ### `r h.i`.`r h.ii` Thresholds to filter droplets
279 | According to the results of statistics and visualization, we propose following thresholds to filter cells:
280 | 
281 | <center>
282 | ```{r thresholds, echo=F, warning=F}
283 | # results$filter.thres %>% knitr::kable("html")
284 | kable(results$filter.thres)
285 | ```
286 | </center>
287 | 
288 | 
289 | * **Hint**: In general, `Cell Ranger` can filter the droplets with low nUMI. So here we set `Low.threshold` for nUMI as `0`.
290 | The users need to use the identification results of `Cell Ranger` or set a suitable threshold first to filter the possible empty droplets with less UMIs.
291 | 
292 | 
293 | Using these thresholds, the number of cells vary as follows:
294 | 
295 | ` `r paste0("Raw : ", results$nList[1])` ` ->
296 | ` `r paste0("cellranger3 : ", results$nList[2])` ` ->
297 | ` `r paste0("nUMI<", results$cell.threshold$nUMI, " : ", results$nList[3])` ` ->
298 | ` `r paste0("nGene>=200 : ", results$nList[4])` ` ->
299 | ` `r paste0("nGene<", results$cell.threshold$nGene, " : ", results$nList[5])` ` ->
300 | ` `r paste0("mito.percent<", round(results$cell.threshold$mito.percent, 3), " : ", results$nList[6])` ` ->
301 | ` `r paste0("ribo.percent<", round(results$cell.threshold$ribo.percent, 3), " : ", results$nList[7])` ` ->
302 | ` `r paste0("diss.percent<", round(results$cell.threshold$diss.percent, 3), " : ", results$nList[8])` `
303 | 
304 | ```{r echo=F}
305 | h.ii <- 1
306 | ```
307 | 
308 | 
309 | ### `r h.i`.`r h.ii` Output files
310 | Running this script generates following files:
311 | 
312 | ```{r echo=F}
313 | r.i <- 8
314 | ```
315 | 
316 | 1. **Html report** :
317 | [report-scStat.html](./report-scStat.html).
318 | 2. **Markdown report** :
319 | [report-scStat.md](./report-scStat.md).
320 | 3. **Figure files** :
321 | [figures/](./figures/).
322 | 4. **Figures used in the report**:
323 | [report-figures/](./report-figures/).
324 | 5. **Text file with cell manifest** :
325 | [cellManifest-all.txt](./cellManifest-all.txt).
326 | 6. **Text file with suggested thresholds as above** :
327 | [cell.QC.thres.txt](./cell.QC.thres.txt).
328 | 7. **Text file with gene manifest** :
329 | [geneManifest.txt](./geneManifest.txt).
330 | ```{r echo=F, results='asis',  eval=results$bool.runSoupx}
331 | cat(r.i, ". **RDS file with SoupX object** :[soupx-object.RDS](./).", 
332 |     sep = "")
333 | r.i <- r.i + 1
334 | ```
335 | ```{r echo=F, results='asis'}
336 | if(file.exists(file.path(results$dataPath, "web_summary.html"))){
337 |   cat("9. **Cell ranger html report** (Copy from the source data folder):\n")
338 |   cat("[report-cellRanger.html](./report-cellRanger.html).\n", sep = "")
339 | }
340 | ```
341 | 
342 | 
343 | 
344 | <br>
345 | 
346 | --------------------------------------
347 | &copy; [G-Lab](http://lifeome.net/glab/jgu/),   [Tsinghua University](http://www.tsinghua.edu.cn)
348 | 


--------------------------------------------------------------------------------
/inst/rmd/malign-comb.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | ### `r h.i`.`r h.ii` Cell malignancy estimation
 3 | Using the cell malignancy results from each sample, we can get following plots.
 4 | 
 5 | Here is the t-SNE plot colored by malignancy score (left) and type (right).
 6 | 
 7 | ```{r malignPointPlot, echo=F, message=F, warning=F, dpi=300, fig.width=10, fig.height=4, fig.align="center"}
 8 | plot_grid(results$malign.plot$p.malignScore.Point, 
 9 |           results$malign.plot$p.malignType.Point, ncol = 2)
10 | ```
11 | <p align='right' style='margin-top:3px'>(Hi-res image: <a href='./figures/malignType-point.png'>left</a>, <a href='./figures/malignScore-point.png'>right</a>)</p>
12 | 
13 | 
14 | 
15 | Here is a bar plot showing the relationship between cell cluster and cell malignancy type.
16 | 
17 | ```{r malignBarPlot, echo=F, message=F, warning=F, dpi=300, fig.width=6, fig.height=3, fig.align="center", out.width='80%'}
18 | results$malign.plot$p.malignType.bar
19 | ```
20 | <p align="right">(Hi-res image: <a href="./figures/malignType-bar.png">view</a>)</p>
21 | 
22 | 
23 | The estimated cell malignancy scores and types can be found in the column `Malign.score` and `Malign.type` of the table file 
24 | [cellAnnotation.txt](./cellAnnotation.txt). 
25 | 
26 | 
27 | 
28 | ```{r echo=F}
29 | h.ii <- h.ii + 1
30 | ```
31 | 


--------------------------------------------------------------------------------
/inst/rmd/malignancy.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | ### `r h.i`.`r h.ii` Cell malignancy estimation
 3 | 
 4 | In order to distinguish malignant and non-malignant cells, we infer copy number alterations (CNV) from tumor single cell RNA-Seq data referring to the method of R package [`infercnv`](https://github.com/broadinstitute/inferCNV/wiki). Then we calculate a smoothed malignancy score based on the CNV profile. 
 5 | 
 6 | Following is the malignancy scores distribution plot for observation cells in the sample (blue) and reference cells (grey).
 7 | By detecting the bimodality in the malignancy score distribution, 
 8 | ```{r echo=FALSE, results='asis'}
 9 | if(!is.null(results$malign.thres)){
10 |     cat("we get the bimodal boundary is nearly `", format(results$malign.thres, digits = 3, scientific = T), "` (red dash line).", sep = "")
11 | }else{
12 |     cat("we cannot think the distribution is bimodality.", sep = "")
13 | }
14 | ```
15 | 
16 | ```{r malignScorePlot, echo=F, message=F, warning=F, dpi=300, fig.width=5, fig.height=4, fig.align="center", out.width='50%'}
17 | results$malign.plot$p.malignScore
18 | ```
19 | <p align="right">(Hi-res image: <a href="./figures/malignScore.png">view</a>)</p>
20 | 
21 | 
22 | 
23 | Here is the t-SNE plot colored by malignancy score (left) and type (right).
24 | 
25 | ```{r malignPointPlot, echo=F, message=F, warning=F, dpi=300, fig.width=10, fig.height=4, fig.align="center"}
26 | plot_grid(results$malign.plot$p.malignScore.Point, 
27 |           results$malign.plot$p.malignType.Point, ncol = 2)
28 | ```
29 | <p align='right' style='margin-top:3px'>(Hi-res image: <a href='./figures/malignType-point.png'>left</a>, <a href='./figures/malignScore-point.png'>right</a>)</p>
30 | 
31 | 
32 | 
33 | Here is a bar plot showing the relationship between cell cluster and cell malignancy type.
34 | 
35 | ```{r malignBarPlot, echo=F, message=F, warning=F, dpi=300, fig.width=6, fig.height=3, fig.align="center", out.width='80%'}
36 | results$malign.plot$p.malignType.bar
37 | ```
38 | <p align="right">(Hi-res image: <a href="./figures/malignType-bar.png">view</a>)</p>
39 | 
40 | 
41 | 
42 | The estimated cell malignancy scores and types can be found in the column `Malign.score` and `Malign.type` of the table file 
43 | [cellAnnotation.txt](./cellAnnotation.txt). 
44 | 
45 | 
46 | After this step, `scCancer` saved following results files to the folder '[malignancy/](./malignancy/)':
47 |  * Estimated CNV profile of reference cells: [inferCNV-reference.txt](./malignancy/inferCNV-reference.txt).
48 |  * Estimated CNV profile of sample cells: [inferCNV-observation.txt](./malignancy/inferCNV-observation.txt).
49 |  * Malignancy scores of reference cells: [refer-malignScore.txt](./malignancy/refer-malignScore.txt).
50 | 
51 | 
52 | ```{r echo=F}
53 | h.ii <- h.ii + 1
54 | ```
55 | 


--------------------------------------------------------------------------------
/inst/rmd/stemness.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ### `r h.i`.`r h.ii` Cell stemness estimation
 4 | 
 5 | We train a stemness signature and use it to estimate stemness scores. 
 6 | The predicted stemness scores can be found in the column `Stemness.score` of the table file 
 7 | [cellAnnotation.txt](./cellAnnotation.txt).
 8 | 
 9 | Here is the scatter plot colored by estimated stemness scores.
10 | 
11 | ```{r stemnessPlot, echo=F, message=F, warning=F, dpi=500, fig.width=5, fig.height=4, fig.align="center", out.width='60%'}
12 | results$stemness.plot
13 | ```
14 | <p align="right">(Hi-res image: <a href="./figures/stemness-point.png">view</a>)</p>
15 | 
16 | 
17 | ```{r echo=F}
18 | h.ii <- h.ii + 1
19 | ```
20 | 


--------------------------------------------------------------------------------
/inst/rmd/umap.Rmd:
--------------------------------------------------------------------------------
1 | 
2 | Here is the UMAP plot colored by cell clusters.
3 | 
4 | ```{r clusterPlotUmap, echo=F, message=F, warning=F, dpi=500, fig.width=5, fig.height=4, fig.align="center", out.width='80%'}
5 | results$seurat.plots$p.cluster.umap
6 | ```
7 | <p align="right">(Hi-res image: <a href="./figures/cluster-point-umap.png">view</a>)</p>
8 | 


--------------------------------------------------------------------------------
/inst/txt/cellCycle-genes.txt:
--------------------------------------------------------------------------------
 1 | MCM5
 2 | PCNA
 3 | TYMS
 4 | FEN1
 5 | MCM2
 6 | MCM4
 7 | RRM1
 8 | UNG
 9 | GINS2
10 | MCM6
11 | CDCA7
12 | DTL
13 | PRIM1
14 | UHRF1
15 | MLF1IP
16 | HELLS
17 | RFC2
18 | RPA2
19 | NASP
20 | RAD51AP1
21 | GMNN
22 | WDR76
23 | SLBP
24 | CCNE2
25 | UBR7
26 | POLD3
27 | MSH2
28 | ATAD2
29 | RAD51
30 | RRM2
31 | CDC45
32 | CDC6
33 | EXO1
34 | TIPIN
35 | DSCC1
36 | BLM
37 | CASP8AP2
38 | USP1
39 | CLSPN
40 | POLA1
41 | CHAF1B
42 | BRIP1
43 | E2F8
44 | HMGB2
45 | CDK1
46 | NUSAP1
47 | UBE2C
48 | BIRC5
49 | TPX2
50 | TOP2A
51 | NDC80
52 | CKS2
53 | NUF2
54 | CKS1B
55 | MKI67
56 | TMPO
57 | CENPF
58 | TACC3
59 | FAM64A
60 | SMC4
61 | CCNB2
62 | CKAP2L
63 | CKAP2
64 | AURKB
65 | BUB1
66 | KIF11
67 | ANP32E
68 | TUBB4B
69 | GTSE1
70 | KIF20B
71 | HJURP
72 | CDCA3
73 | HN1
74 | CDC20
75 | TTK
76 | CDC25C
77 | KIF2C
78 | RANGAP1
79 | NCAPD2
80 | DLGAP5
81 | CDCA2
82 | CDCA8
83 | ECT2
84 | KIF23
85 | HMMR
86 | AURKA
87 | PSRC1
88 | ANLN
89 | LBR
90 | CKAP5
91 | CENPE
92 | CTCF
93 | NEK2
94 | G2E3
95 | GAS2L3
96 | CBX5
97 | CENPA
98 | 


--------------------------------------------------------------------------------
/inst/txt/diss-genes.txt:
--------------------------------------------------------------------------------
  1 | ACTG1
  2 | ANKRD1
  3 | ARID5A
  4 | ATF3
  5 | ATF4
  6 | BAG3
  7 | BHLHE40
  8 | BRD2
  9 | BTG1
 10 | BTG2
 11 | CCNL1
 12 | CCRN4L
 13 | CEBPB
 14 | CEBPD
 15 | CEBPG
 16 | CSRNP1
 17 | CXCL1
 18 | CYR61
 19 | DCN
 20 | DDX3X
 21 | DDX5
 22 | DES
 23 | DNAJA1
 24 | DNAJB1
 25 | DNAJB4
 26 | DUSP1
 27 | DUSP8
 28 | EGR1
 29 | EGR2
 30 | EIF1
 31 | EIF5
 32 | ERF
 33 | ERRFI1
 34 | FAM132B
 35 | FOS
 36 | FOSB
 37 | FOSL2
 38 | GADD45A
 39 | GADD45G
 40 | GCC1
 41 | GEM
 42 | H3F3B
 43 | HIPK3
 44 | HSP90AA1
 45 | HSP90AB1
 46 | HSPA1A
 47 | HSPA1B
 48 | HSPA5
 49 | HSPA8
 50 | HSPB1
 51 | HSPE1
 52 | HSPH1
 53 | ID3
 54 | IDI1
 55 | IER2
 56 | IER3
 57 | IER5
 58 | IFRD1
 59 | IL6
 60 | IRF1
 61 | IRF8
 62 | ITPKC
 63 | JUN
 64 | JUNB
 65 | JUND
 66 | KCNE4
 67 | KLF2
 68 | KLF4
 69 | KLF6
 70 | KLF9
 71 | LITAF
 72 | LMNA
 73 | MAFF
 74 | MAFK
 75 | MCL1
 76 | MIDN
 77 | MIR22HG
 78 | MT1
 79 | MT2
 80 | MYADM
 81 | MYC
 82 | MYD88
 83 | NCKAP5L
 84 | NCOA7
 85 | NFKBIA
 86 | NFKBIZ
 87 | NOP58
 88 | NPPC
 89 | NR4A1
 90 | ODC1
 91 | OSGIN1
 92 | OXNAD1
 93 | PCF11
 94 | PDE4B
 95 | PER1
 96 | PHLDA1
 97 | PNP
 98 | PNRC1
 99 | PPP1CC
100 | PPP1R15A
101 | PXDC1
102 | RAP1B
103 | RASSF1
104 | RHOB
105 | RHOH
106 | RIPK1
107 | SAT1
108 | SBNO2
109 | SDC4
110 | SERPINE1
111 | SKIL
112 | SLC10A6
113 | SLC38A2
114 | SLC41A1
115 | SOCS3
116 | SQSTM1
117 | SRF
118 | SRSF5
119 | SRSF7
120 | STAT3
121 | TAGLN2
122 | TIPARP
123 | TNFAIP3
124 | TNFAIP6
125 | TPM3
126 | TPPP3
127 | TRA2A
128 | TRA2B
129 | TRIB1
130 | TUBB4B
131 | TUBB6
132 | UBC
133 | USP2
134 | WAC
135 | ZC3H12A
136 | ZFAND5
137 | ZFP36
138 | ZFP36L1
139 | ZFP36L2
140 | ZYX
141 | 


--------------------------------------------------------------------------------
/man/Read10Xdata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{Read10Xdata}
 4 | \alias{Read10Xdata}
 5 | \title{Read10Xdata}
 6 | \usage{
 7 | Read10Xdata(
 8 |   data.dir = NULL,
 9 |   gene.column = 2,
10 |   unique.features = TRUE,
11 |   only.expr = TRUE
12 | )
13 | }
14 | \arguments{
15 | \item{data.dir}{Directory containing the matrix.mtx, genes.tsv (or features.tsv), and barcodes.tsv files provided by 10X.
16 | A vector or named vector can be given in order to load several data directories.
17 | If a named vector is given, the cell barcode names will be prefixed with the name.}
18 | 
19 | \item{gene.column}{An integer indicating which column of genes.tsv or features.tsv to use for gene names; default is 2.}
20 | 
21 | \item{unique.features}{Make feature names unique (default TRUE).}
22 | 
23 | \item{only.expr}{Whether to read expression data only if have multiple features (default TRUE).}
24 | }
25 | \value{
26 | If the 10X data only has expression data or the argument 'only.expr' is TRUE,
27 | a sparse matrix containing the expression data will be returned.
28 | Otherwise, if the 10X data has multiple data types,
29 | a list containing a sparse matrix of the data from each type will be returned.
30 | }
31 | \description{
32 | Read expression matrix data from 10X. This function is modified from Seurat package.
33 | }
34 | 


--------------------------------------------------------------------------------
/man/checkAnnoArguments.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{checkAnnoArguments}
 4 | \alias{checkAnnoArguments}
 5 | \title{checkAnnoArguments}
 6 | \usage{
 7 | checkAnnoArguments(argList)
 8 | }
 9 | \arguments{
10 | \item{argList}{A list of arguments passed into 'runScAnnotation".}
11 | }
12 | \description{
13 | checkAnnoArguments
14 | }
15 | 


--------------------------------------------------------------------------------
/man/checkCombArguments.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{checkCombArguments}
 4 | \alias{checkCombArguments}
 5 | \title{checkCombArguments}
 6 | \usage{
 7 | checkCombArguments(argList)
 8 | }
 9 | \arguments{
10 | \item{argList}{A list of arguments passed into 'runScCombination".}
11 | }
12 | \description{
13 | checkCombArguments
14 | }
15 | 


--------------------------------------------------------------------------------
/man/checkStatArguments.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{checkStatArguments}
 4 | \alias{checkStatArguments}
 5 | \title{checkStatArguments}
 6 | \usage{
 7 | checkStatArguments(argList)
 8 | }
 9 | \arguments{
10 | \item{argList}{A list of arguments passed into 'runScStatistics".}
11 | }
12 | \description{
13 | checkStatArguments
14 | }
15 | 


--------------------------------------------------------------------------------
/man/clusterBarPlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{clusterBarPlot}
 4 | \alias{clusterBarPlot}
 5 | \title{clusterBarPlot}
 6 | \usage{
 7 | clusterBarPlot(
 8 |   cell.annotation,
 9 |   sel.col = "Cell.Type",
10 |   cell.colors = NULL,
11 |   legend.title = NULL,
12 |   legend.position = "bottom",
13 |   legend.ncol = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{cell.annotation}{A data.frame of cells' annotation containing the cells' Cluster and other information to be colored.}
18 | 
19 | \item{sel.col}{The column name of cell.annotation, which indicating the type of cells.}
20 | 
21 | \item{cell.colors}{An array of colors used to show the cells' type. If NULL, the default colors will be used.}
22 | 
23 | \item{legend.title}{The title of legends. If NULL, the value of "sel.col" will be used.}
24 | 
25 | \item{legend.position}{The position of legends ("none", "left", "right", "bottom", "top", or two-element numeric vector).}
26 | 
27 | \item{legend.ncol}{The number of column of legends.}
28 | }
29 | \value{
30 | A bar plot.
31 | }
32 | \description{
33 | clusterBarPlot
34 | }
35 | 


--------------------------------------------------------------------------------
/man/extractFiles.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{extractFiles}
 4 | \alias{extractFiles}
 5 | \title{extractFiles}
 6 | \usage{
 7 | extractFiles(
 8 |   savePath,
 9 |   sampleNames,
10 |   outputPath,
11 |   files = c("report-scStat.html", "report-scAnno.html"),
12 |   subfolders = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{savePath}{A path of samples' result folder.}
17 | 
18 | \item{sampleNames}{A vector of samples' names (the subfolder names in 'savePath').}
19 | 
20 | \item{outputPath}{A path to saving the extracted reports.}
21 | 
22 | \item{files}{The name of files you want to extract. The default is c("report-scStat.html", "report-scAnno.html").}
23 | 
24 | \item{subfolders}{The name of subfolders for the files you want to extract. The default is NULL.
25 | It can be a character string, which means all files are under the subfolder.
26 | It can also be a character string vector with same length as "files", which are corresponding to "files".}
27 | }
28 | \description{
29 | Extract files from each sample's folder and rename them with sample's name.
30 | }
31 | 


--------------------------------------------------------------------------------
/man/genAnnoReport.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{genAnnoReport}
 4 | \alias{genAnnoReport}
 5 | \title{genAnnoReport}
 6 | \usage{
 7 | genAnnoReport(results, savePath)
 8 | }
 9 | \arguments{
10 | \item{results}{A list generated by 'runScAnnotation'}
11 | 
12 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.}
13 | }
14 | \description{
15 | genAnnoReport
16 | }
17 | 


--------------------------------------------------------------------------------
/man/genStatReport.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scStatistics.R
 3 | \name{genStatReport}
 4 | \alias{genStatReport}
 5 | \title{genStatReport}
 6 | \usage{
 7 | genStatReport(results, savePath)
 8 | }
 9 | \arguments{
10 | \item{results}{A list generated by 'runScStatistics'}
11 | 
12 | \item{savePath}{A path to save the results files(suggest to create a foler named by sample name).}
13 | }
14 | \description{
15 | genStatReport
16 | }
17 | 


--------------------------------------------------------------------------------
/man/generate10Xdata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{generate10Xdata}
 4 | \alias{generate10Xdata}
 5 | \title{generate10Xdata}
 6 | \usage{
 7 | generate10Xdata(matrix, gene.info, outPath, overwrite = F)
 8 | }
 9 | \arguments{
10 | \item{matrix}{A gene-cell matrix or data.frame.}
11 | 
12 | \item{gene.info}{A data.frame of gene information. It should contain two columns,
13 | the first is gene Ensemble ID, and the second is gene symbol.
14 | The order of the genes should be consistant with the row order of 'matrix'.}
15 | 
16 | \item{outPath}{A path to save the output files.}
17 | 
18 | \item{overwrite}{If TRUE and the output file already exists, the file is
19 | silently overwritten, otherwise an exception is thrown. The default is "FALSE".}
20 | }
21 | \description{
22 | Generate a 10X-like data folder based on the data matrix and gene information,
23 | which can be used directly to perform scCancer analysis.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/getCellTypeColor.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{getCellTypeColor}
 4 | \alias{getCellTypeColor}
 5 | \title{getCellTypeColor}
 6 | \usage{
 7 | getCellTypeColor(cell.types)
 8 | }
 9 | \arguments{
10 | \item{cell.types}{A vector of cell types.}
11 | }
12 | \value{
13 | A vector of colors.
14 | }
15 | \description{
16 | getCellTypeColor
17 | }
18 | 


--------------------------------------------------------------------------------
/man/getDefaultColors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{getDefaultColors}
 4 | \alias{getDefaultColors}
 5 | \title{getDefaultColors}
 6 | \usage{
 7 | getDefaultColors(n = NULL, type = 1)
 8 | }
 9 | \arguments{
10 | \item{n}{The number of colors.}
11 | 
12 | \item{type}{The type of color style. Only 1, 2, or 3 is allowed.}
13 | }
14 | \value{
15 | A vector of colors.
16 | }
17 | \description{
18 | getDefaultColors
19 | }
20 | 


--------------------------------------------------------------------------------
/man/getDefaultGeneSets.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{getDefaultGeneSets}
 4 | \alias{getDefaultGeneSets}
 5 | \title{getDefaultGeneSets}
 6 | \usage{
 7 | getDefaultGeneSets(species = "human")
 8 | }
 9 | \arguments{
10 | \item{species}{A character string indicating what species the sample belong to.
11 | Only "human"(default) or "mouse" are allowed.}
12 | }
13 | \value{
14 | A list of gene sets (50 hallmark gene sets).
15 | }
16 | \description{
17 | getDefaultGeneSets
18 | }
19 | 


--------------------------------------------------------------------------------
/man/getDefaultMarkers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{getDefaultMarkers}
 4 | \alias{getDefaultMarkers}
 5 | \title{getDefaultMarkers}
 6 | \usage{
 7 | getDefaultMarkers(species = "human")
 8 | }
 9 | \arguments{
10 | \item{species}{A character string indicating what species the sample belong to.
11 | Only "human"(default) or "mouse" are allowed.}
12 | }
13 | \value{
14 | A list of default markers of several common cell types.
15 | }
16 | \description{
17 | Return default markers of several common cell types.
18 | }
19 | 


--------------------------------------------------------------------------------
/man/getTumorCluster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{getTumorCluster}
 4 | \alias{getTumorCluster}
 5 | \title{getTumorCluster}
 6 | \usage{
 7 | getTumorCluster(cell.annotation, epi.thres = 0.6, malign.thres = 0.8)
 8 | }
 9 | \arguments{
10 | \item{cell.annotation}{A data.frame of cells' annotation containing predicted cell typea and estimated cell malignant type.}
11 | 
12 | \item{epi.thres}{A threshold for epithelial cell percent to decide putative tumor clusters.}
13 | 
14 | \item{malign.thres}{A threshold for malignant cell percent to decide putative tumor clusters.}
15 | }
16 | \value{
17 | A list of identified tumor clusters. If no clusters are found, return NULL.
18 | }
19 | \description{
20 | Identify tumor clusters according to the results of cell type prediction and cell malignancy estimatation.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/ggplot_config.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{ggplot_config}
 4 | \alias{ggplot_config}
 5 | \title{ggplot_config}
 6 | \usage{
 7 | ggplot_config(base.size = 8)
 8 | }
 9 | \arguments{
10 | \item{base.size}{The size of text.}
11 | }
12 | \value{
13 | A theme.
14 | }
15 | \description{
16 | ggplot_config
17 | }
18 | 


--------------------------------------------------------------------------------
/man/markerPlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{markerPlot}
 4 | \alias{markerPlot}
 5 | \title{markerPlot}
 6 | \usage{
 7 | markerPlot(
 8 |   expr.data,
 9 |   coor.df,
10 |   coor.names = c("tSNE_1", "tSNE_2"),
11 |   features = NULL,
12 |   add = T,
13 |   species = "human",
14 |   font.size = 4,
15 |   color = "blue"
16 | )
17 | }
18 | \arguments{
19 | \item{expr.data}{A matrix of expression (gene by cell)}
20 | 
21 | \item{coor.df}{A data.frame which contains cells' 2D coordinates.}
22 | 
23 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.}
24 | 
25 | \item{features}{A vector of genes to plot.}
26 | 
27 | \item{add}{A logical value indicating whether to present the default markers.}
28 | 
29 | \item{species}{A character string indicating what species the sample belong to.
30 | Only "human"(default) or "mouse" are allowed.}
31 | 
32 | \item{font.size}{The size of labels.}
33 | 
34 | \item{color}{The color of point.}
35 | }
36 | \value{
37 | A list of ggplot obejects for each maker genes.
38 | }
39 | \description{
40 | Generate plots of interested genes' expression profile.
41 | }
42 | 


--------------------------------------------------------------------------------
/man/plotCellInteraction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{plotCellInteraction}
 4 | \alias{plotCellInteraction}
 5 | \title{plotCellInteraction}
 6 | \usage{
 7 | plotCellInteraction(stat.df, cell.annotation)
 8 | }
 9 | \arguments{
10 | \item{stat.df}{A data.frame of cell sets interaction result.}
11 | 
12 | \item{cell.annotation}{A data.frame of cells' annotation containing the cells' cluster and type.}
13 | }
14 | \value{
15 | A plot showing the result of cell interaction.
16 | }
17 | \description{
18 | plotCellInteraction
19 | }
20 | 


--------------------------------------------------------------------------------
/man/plotExprProgram.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{plotExprProgram}
 4 | \alias{plotExprProgram}
 5 | \title{plotExprProgram}
 6 | \usage{
 7 | plotExprProgram(
 8 |   H,
 9 |   cell.annotation,
10 |   bool.limit = T,
11 |   sel.clusters = NULL,
12 |   savePath = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{H}{The decomposed right matrix H.}
17 | 
18 | \item{cell.annotation}{A data.frame of cells' annotation containing cluster information.}
19 | 
20 | \item{bool.limit}{A logical value indicating whether to set upper and lower limit when plot heatmap.}
21 | 
22 | \item{sel.clusters}{A vector of selected clusters to analyze. The default is NULL and all clusters will be used.}
23 | 
24 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.}
25 | }
26 | \value{
27 | A heatmap for cells' expression programs.
28 | }
29 | \description{
30 | plotExprProgram
31 | }
32 | 


--------------------------------------------------------------------------------
/man/plotGeneSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{plotGeneSet}
 4 | \alias{plotGeneSet}
 5 | \title{plotGeneSet}
 6 | \usage{
 7 | plotGeneSet(cell.annotation, prefix = "GS__", bool.limit = T, savePath = NULL)
 8 | }
 9 | \arguments{
10 | \item{cell.annotation}{A data.frame of cells' annotation containing gene set signature scores.}
11 | 
12 | \item{prefix}{A prefix string of column names for gene sets.}
13 | 
14 | \item{bool.limit}{A logical value indicating whether to set upper and lower limit when plot heatmap.}
15 | 
16 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.}
17 | }
18 | \value{
19 | A heatmap for gene set signature scores.
20 | }
21 | \description{
22 | plotGeneSet
23 | }
24 | 


--------------------------------------------------------------------------------
/man/plotMalignancy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cnvFunction.R
 3 | \name{plotMalignancy}
 4 | \alias{plotMalignancy}
 5 | \title{plotMalignancy}
 6 | \usage{
 7 | plotMalignancy(
 8 |   cell.annotation,
 9 |   coor.names = c("tSNE_1", "tSNE_2"),
10 |   savePath = NULL
11 | )
12 | }
13 | \arguments{
14 | \item{cell.annotation}{A data.frame of cells' annotation containing the cells'
15 | malignancy score (`Malign.score`) and type (`Malign.type`).}
16 | 
17 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.}
18 | 
19 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.}
20 | }
21 | \value{
22 | A plot list.
23 | }
24 | \description{
25 | plotMalignancy
26 | }
27 | 


--------------------------------------------------------------------------------
/man/plotSeurat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{plotSeurat}
 4 | \alias{plotSeurat}
 5 | \title{plotSeurat}
 6 | \usage{
 7 | plotSeurat(
 8 |   expr,
 9 |   cell.annotation = cell.annotation,
10 |   show.features = NULL,
11 |   bool.add.features = T,
12 |   coor.names = c("tSNE_1", "tSNE_2"),
13 |   bool.plotHVG = T,
14 |   bool.runDiffExpr = T,
15 |   diff.expr.genes = NULL,
16 |   n.markers = 5,
17 |   species = "human",
18 |   savePath
19 | )
20 | }
21 | \arguments{
22 | \item{expr}{A Seurat object.}
23 | 
24 | \item{cell.annotation}{A data.frame of cells' annotation.}
25 | 
26 | \item{show.features}{A list or vector for genes to be plotted in 'markerPlot'.}
27 | 
28 | \item{bool.add.features}{A logical value indicating whether to add default features to 'show.features' or not.}
29 | 
30 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.}
31 | 
32 | \item{bool.plotHVG}{A logical value indicating Whehter to plot highly variable genes.}
33 | 
34 | \item{bool.runDiffExpr}{A logical value indicating whether to perform differential expressed analysis.}
35 | 
36 | \item{diff.expr.genes}{A data.frame of differential expressed genes.}
37 | 
38 | \item{n.markers}{An integer indicating the number of differential expressed genes showed in the plot. The defalut is 5.}
39 | 
40 | \item{species}{A character string indicating what species the sample belong to.
41 | Only "human"(default) or "mouse" are allowed.}
42 | 
43 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.}
44 | }
45 | \value{
46 | A list of all plots generated by Seurat analyses.
47 | }
48 | \description{
49 | Construct and save plots of Seurat analysis.
50 | }
51 | 


--------------------------------------------------------------------------------
/man/pointDRPlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{pointDRPlot}
 4 | \alias{pointDRPlot}
 5 | \title{pointDRPlot}
 6 | \usage{
 7 | pointDRPlot(
 8 |   cell.annotation,
 9 |   value,
10 |   sel.clusters = NULL,
11 |   coor.names = c("tSNE_1", "tSNE_2"),
12 |   colors = NULL,
13 |   discrete = T,
14 |   limit.quantile = 0,
15 |   point.type = 1,
16 |   legend.position = "right",
17 |   legend.title = NULL
18 | )
19 | }
20 | \arguments{
21 | \item{cell.annotation}{A data.frame of cells' annotation containing the cells' coordinates and index to be colored.}
22 | 
23 | \item{value}{The column name of cell.annotation, which is mapped to the colors of points.}
24 | 
25 | \item{sel.clusters}{An array of selected clusters to present. (The default is NULL and all clusters will be used.)}
26 | 
27 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.}
28 | 
29 | \item{colors}{An array of colors used to show the gredients or type of points. If NULL, the default colors will be used.}
30 | 
31 | \item{discrete}{A logical value indicating whether the value column is discrete or not.}
32 | 
33 | \item{limit.quantile}{A quantile threshold to limit the data and reduce the influence of outliers.}
34 | 
35 | \item{point.type}{A number indicating the shape type of points. "1" (default) means the point has a lightgrey border, and "2" means not.}
36 | 
37 | \item{legend.position}{The position of legends ("none", "left", "right", "bottom", "top", or two-element numeric vector).}
38 | 
39 | \item{legend.title}{The title of legends.}
40 | }
41 | \value{
42 | A ggplot object for the scatter plot.
43 | }
44 | \description{
45 | Plot scatter for cells.
46 | }
47 | 


--------------------------------------------------------------------------------
/man/predCellType.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{predCellType}
 4 | \alias{predCellType}
 5 | \title{predCellType}
 6 | \usage{
 7 | predCellType(X.test, ct.templates = NULL, species = "human")
 8 | }
 9 | \arguments{
10 | \item{X.test}{A cells expression matrix (row for genes, column for cells).}
11 | 
12 | \item{ct.templates}{A list of gene weight vectors for each cell type.}
13 | 
14 | \item{species}{A character string indicating what species the sample belong to.
15 | Only "human"(default) or "mouse" are allowed.}
16 | }
17 | \value{
18 | A list of predicted cell types and the relative correlations.
19 | }
20 | \description{
21 | predCellType
22 | }
23 | 


--------------------------------------------------------------------------------
/man/prepareData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scStatistics.R
 3 | \name{prepareData}
 4 | \alias{prepareData}
 5 | \title{prepareData}
 6 | \usage{
 7 | prepareData(
 8 |   samplePath,
 9 |   species = "human",
10 |   hg.mm.mix = F,
11 |   hg.mm.thres = 0.9,
12 |   mix.anno = c(human = "hg19", mouse = "mm10")
13 | )
14 | }
15 | \arguments{
16 | \item{samplePath}{A path containing the cell ranger processed data.}
17 | 
18 | \item{species}{A character string indicating what species the sample belong to.
19 | Must be one of "human"(default) and "mouse".}
20 | 
21 | \item{hg.mm.mix}{A logical value indicating whether the sample is a mix of
22 | human cells and mouse cells(such as PDX sample).
23 | If TRUE, the arguments 'hg.mm.thres' and 'mix.anno' should be set to corresponding values.}
24 | 
25 | \item{hg.mm.thres}{A float-point threshold within [0.5, 1] to identify human and mouse cells.
26 | Cells with UMI percentage of single species larger than the threshold are labeled human or mouse cells.
27 | The default is 0.6.}
28 | 
29 | \item{mix.anno}{A vector to indicate the prefix of genes from different species.
30 | The default is c("human" = "hg19", "mouse" = "mm10").}
31 | }
32 | \value{
33 | A list of expr.data, cell.manifest, gene.manifest, raw.data, min.nUMI, cr.version and run.emptydrop
34 | }
35 | \description{
36 | prepareData
37 | }
38 | 


--------------------------------------------------------------------------------
/man/prepareSeurat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{prepareSeurat}
 4 | \alias{prepareSeurat}
 5 | \title{prepareSeurat}
 6 | \usage{
 7 | prepareSeurat(
 8 |   dataPath,
 9 |   statPath,
10 |   savePath,
11 |   sampleName = "sc",
12 |   bool.filter.cell = T,
13 |   bool.filter.gene = T,
14 |   anno.filter = c("mitochondrial", "ribosome", "dissociation"),
15 |   nCell.min = 3,
16 |   bgPercent.max = 1,
17 |   hg.mm.mix = F,
18 |   bool.rmContamination = T,
19 |   vars.add.meta = c("mito.percent", "ribo.percent", "diss.percent"),
20 |   vars.to.regress = c("nCount_RNA", "mito.percent", "ribo.percent")
21 | )
22 | }
23 | \arguments{
24 | \item{dataPath}{A path containing the cell ranger processed data.
25 | Under this path, folders 'filtered_feature_bc_matrix' and 'raw_feature_bc_matrix' exist generally.}
26 | 
27 | \item{statPath}{A path containing the results files of step 'runScStatistics'.}
28 | 
29 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.}
30 | 
31 | \item{sampleName}{A character string giving a label for this sample.}
32 | 
33 | \item{bool.filter.cell}{A logical value indicating whether to filter the cells
34 | according to the QC of 'scStatistics'.}
35 | 
36 | \item{bool.filter.gene}{A logical value indicating whether to filter the genes
37 | according to the QC of 'scStatistics'.}
38 | 
39 | \item{anno.filter}{A vector indicating the types of genes to be filtered.
40 | Must be some of c("mitochondrial", "ribosome", "dissociation")(default) or NULL.}
41 | 
42 | \item{nCell.min}{An integer number used to filter gene. The default is 3.
43 | Genes with the number of expressed cells less than this threshold will be filtered.}
44 | 
45 | \item{bgPercent.max}{A float number used to filter gene. The default is 1 (no filtering).
46 | Genes with the background percentage larger than this threshold will be filtered.}
47 | 
48 | \item{hg.mm.mix}{A logical value indicating whether the sample is a mix of
49 | human cells and mouse cells(such as PDX sample).
50 | If TRUE, the arguments 'hg.mm.thres' and 'mix.anno' should be set to corresponding values.}
51 | 
52 | \item{bool.rmContamination}{A logical value indicating whether to remove ambient RNA contamination based on 'SoupX'.}
53 | 
54 | \item{vars.add.meta}{A vector indicating the variables to be added to Seurat object's meta.data.
55 | The default is c("mito.percent", "ribo.percent", "diss.percent").}
56 | 
57 | \item{vars.to.regress}{A vector indicating the variables to regress out in R package Seurat.
58 | The default is c("nCount_RNA", "mito.percent", "ribo.percent").}
59 | }
60 | \value{
61 | A list of Seurat object and gene.manifest.
62 | The Seurat object is after log-normalization, highly variable genes identification, scaling data.
63 | }
64 | \description{
65 | According to the QC results of scStatistics, filter cells and genes.
66 | Prepare a Seurat object.
67 | }
68 | 


--------------------------------------------------------------------------------
/man/runCellClassify.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{runCellClassify}
 4 | \alias{runCellClassify}
 5 | \title{runCellClassify}
 6 | \usage{
 7 | runCellClassify(
 8 |   expr,
 9 |   cell.annotation,
10 |   coor.names = c("tSNE_1", "tSNE_2"),
11 |   savePath,
12 |   ct.templates = NULL,
13 |   species = "human"
14 | )
15 | }
16 | \arguments{
17 | \item{expr}{A Seurat object.}
18 | 
19 | \item{cell.annotation}{A data.frame of cells' annotation.}
20 | 
21 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.}
22 | 
23 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.}
24 | 
25 | \item{ct.templates}{A list of vectors of several cell type templates.
26 | The default is NULL and the templates prepared in this package will be used.}
27 | 
28 | \item{species}{A character string indicating what species the sample belong to.
29 | Only "human"(default) or "mouse" are allowed.}
30 | }
31 | \value{
32 | A list of updated Seurat object, cell.annotation, and the plots for cell type annotation.
33 | }
34 | \description{
35 | Use a one-class logistic regression (OCLR) model to predict cancer microenvironment cell types.
36 | }
37 | 


--------------------------------------------------------------------------------
/man/runCellCycle.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{runCellCycle}
 4 | \alias{runCellCycle}
 5 | \title{runCellCycle}
 6 | \usage{
 7 | runCellCycle(expr, species = "human")
 8 | }
 9 | \arguments{
10 | \item{expr}{A Seurat object.}
11 | 
12 | \item{species}{A character string indicating what species the sample belong to.
13 | Only "human"(default) or "mouse" are allowed.}
14 | }
15 | \value{
16 | An array of cell cycle scores.
17 | }
18 | \description{
19 | Estimate cell cycle scores.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/runCellInteraction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{runCellInteraction}
 4 | \alias{runCellInteraction}
 5 | \title{runCellInteraction}
 6 | \usage{
 7 | runCellInteraction(
 8 |   expr,
 9 |   cellSetName = "default",
10 |   species = "human",
11 |   savePath = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{expr}{A Seurat object.}
16 | 
17 | \item{cellSetName}{The colunm name of `expr`'s `meta.data`, used to indicate the cell set annotation.}
18 | 
19 | \item{species}{A character string indicating what species the sample belong to.
20 | Only "human"(default) or "mouse" are allowed.}
21 | 
22 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.}
23 | }
24 | \value{
25 | A data frame which contains the cell sets ligand-receptor pairs and their scores.
26 | }
27 | \description{
28 | runCellInteraction
29 | }
30 | 


--------------------------------------------------------------------------------
/man/runDoublet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{runDoublet}
 4 | \alias{runDoublet}
 5 | \title{runDoublet}
 6 | \usage{
 7 | runDoublet(expr, method = "cxds", pc.use = 30)
 8 | }
 9 | \arguments{
10 | \item{expr}{A Seurat object.}
11 | 
12 | \item{method}{The method to estimate doublet score. The default is "cxds".}
13 | 
14 | \item{pc.use}{An integer number indicating the number of PCs to use as input features. The default is 30.}
15 | }
16 | \value{
17 | An array of doublet scores.
18 | }
19 | \description{
20 | runDoublet
21 | }
22 | 


--------------------------------------------------------------------------------
/man/runExprProgram.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{runExprProgram}
 4 | \alias{runExprProgram}
 5 | \title{runExprProgram}
 6 | \usage{
 7 | runExprProgram(
 8 |   expr,
 9 |   rank = 50,
10 |   sel.clusters = NULL,
11 |   clusterStashName = "default",
12 |   savePath = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{expr}{A Seurat object.}
17 | 
18 | \item{rank}{An integer of decomposition rank used in NMF.}
19 | 
20 | \item{sel.clusters}{A vector of selected clusters to analyze. The default is NULL and all clusters will be used.}
21 | 
22 | \item{clusterStashName}{A character string used as the name of cluster identies. The default is "default".}
23 | 
24 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.}
25 | }
26 | \value{
27 | A list of decomposed matrixes (W and H), and the relative genes of each programs.
28 | }
29 | \description{
30 | Perform non-negative matrix factorization (NMF) to identify expression programs.
31 | }
32 | 


--------------------------------------------------------------------------------
/man/runGeneSets.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{runGeneSets}
 4 | \alias{runGeneSets}
 5 | \title{runGeneSets}
 6 | \usage{
 7 | runGeneSets(expr, geneSets, method = "average")
 8 | }
 9 | \arguments{
10 | \item{expr}{A Seurat object.}
11 | 
12 | \item{geneSets}{A list of gene sets to be analyzed. The default is NULL and 50 hallmark gene sets from MSigDB will be used.}
13 | 
14 | \item{method}{The method to be used in calculate gene set scores. Currently, only "average" and "GSVA" are allowed.}
15 | }
16 | \value{
17 | A data.frame of calculated gene set signature scores.
18 | }
19 | \description{
20 | Calculate gene set signature scores for cells.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/runMalignancy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cnvFunction.R
 3 | \name{runMalignancy}
 4 | \alias{runMalignancy}
 5 | \title{runMalignancy}
 6 | \usage{
 7 | runMalignancy(
 8 |   expr,
 9 |   gene.manifest,
10 |   cell.annotation,
11 |   savePath,
12 |   cutoff = 0.1,
13 |   minCell = 3,
14 |   p.value.cutoff = 0.5,
15 |   coor.names = c("tSNE_1", "tSNE_2"),
16 |   ref.data = NULL,
17 |   referAdjMat = NULL,
18 |   species = "human",
19 |   genome = "hg19",
20 |   hg.mm.mix = F
21 | )
22 | }
23 | \arguments{
24 | \item{expr}{A Seurat object.}
25 | 
26 | \item{gene.manifest}{A data.frame of genes' manifest.}
27 | 
28 | \item{cell.annotation}{A data.frame of cells' annotation.}
29 | 
30 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.}
31 | 
32 | \item{cutoff}{The cut-off for min average read counts per gene among
33 | reference cells. The default is 0.1.}
34 | 
35 | \item{minCell}{An integer number used to filter gene. The default is 3.}
36 | 
37 | \item{p.value.cutoff}{The p-value to decide whether the distribution of
38 | malignancy score is bimodality.}
39 | 
40 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.}
41 | 
42 | \item{ref.data}{An expression matrix of gene by cell, which is used as the normal reference.
43 | The default is NULL, and an immune cells or bone marrow cells expression matrix will be used for human or mouse species, respectively.}
44 | 
45 | \item{referAdjMat}{An adjacent matrix for the normal reference data.
46 | The larger the value, the closer the cell pair is.
47 | The default is NULL, and a SNN matrix of the default ref.data will be used.}
48 | 
49 | \item{species}{A character string indicating what species the sample belong to.
50 | Only "human"(default) or "mouse" are allowed.}
51 | 
52 | \item{genome}{A character string indicating the version of the reference gene annotation information.
53 | This information is mainly used to infer CNV profile and estimate malignancy.
54 | Only 'hg19' (defalut) or 'hg38' are allowed for "human" species, and only "mm10" is allowed for "mouse" species.}
55 | 
56 | \item{hg.mm.mix}{A logical value indicating whether the sample is a mix of
57 | human cells and mouse cells(such as PDX sample).
58 | If TRUE, the arguments 'hg.mm.thres' and 'mix.anno' should be set to corresponding values.}
59 | }
60 | \value{
61 | A list of cnvList, reference malignancy score, seurat object,
62 | cell.annotatino, bimodal.pvalue, malign.thres, and all generated plots.
63 | }
64 | \description{
65 | runMalignancy
66 | }
67 | 


--------------------------------------------------------------------------------
/man/runScAnnotation.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/scAnnotation.R
  3 | \name{runScAnnotation}
  4 | \alias{runScAnnotation}
  5 | \title{runScAnnotation}
  6 | \usage{
  7 | runScAnnotation(
  8 |   dataPath,
  9 |   statPath,
 10 |   savePath = NULL,
 11 |   authorName = NULL,
 12 |   sampleName = "sc",
 13 |   bool.filter.cell = T,
 14 |   bool.filter.gene = T,
 15 |   anno.filter = c("mitochondrial", "ribosome", "dissociation"),
 16 |   nCell.min = 3,
 17 |   bgPercent.max = 1,
 18 |   bool.rmContamination = F,
 19 |   vars.add.meta = c("mito.percent", "ribo.percent", "diss.percent"),
 20 |   vars.to.regress = c("nCount_RNA", "mito.percent", "ribo.percent"),
 21 |   pc.use = 30,
 22 |   resolution = 0.8,
 23 |   clusterStashName = "default",
 24 |   show.features = NULL,
 25 |   bool.add.features = T,
 26 |   bool.runDiffExpr = T,
 27 |   n.markers = 5,
 28 |   species = "human",
 29 |   genome = "hg19",
 30 |   hg.mm.mix = F,
 31 |   bool.runDoublet = T,
 32 |   doublet.method = "bcds",
 33 |   bool.runCellClassify = T,
 34 |   ct.templates = NULL,
 35 |   coor.names = c("tSNE_1", "tSNE_2"),
 36 |   bool.runMalignancy = T,
 37 |   cnv.ref.data = NULL,
 38 |   cnv.referAdjMat = NULL,
 39 |   cutoff = 0.1,
 40 |   p.value.cutoff = 0.5,
 41 |   bool.intraTumor = T,
 42 |   bool.runCellCycle = T,
 43 |   bool.runStemness = T,
 44 |   bool.runGeneSets = T,
 45 |   geneSets = NULL,
 46 |   geneSet.method = "average",
 47 |   bool.runExprProgram = T,
 48 |   nmf.rank = 50,
 49 |   bool.runInteraction = T,
 50 |   genReport = T
 51 | )
 52 | }
 53 | \arguments{
 54 | \item{dataPath}{A path containing the cell ranger processed data.
 55 | Under this path, folders 'filtered_feature_bc_matrix' and 'raw_feature_bc_matrix' exist generally.}
 56 | 
 57 | \item{statPath}{A path containing the results files of step 'runScStatistics'.}
 58 | 
 59 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.}
 60 | 
 61 | \item{authorName}{A character string for authors name and will be shown in the report.}
 62 | 
 63 | \item{sampleName}{A character string giving a label for this sample.}
 64 | 
 65 | \item{bool.filter.cell}{A logical value indicating whether to filter the cells
 66 | according to the QC of 'scStatistics'.}
 67 | 
 68 | \item{bool.filter.gene}{A logical value indicating whether to filter the genes
 69 | according to the QC of 'scStatistics'.}
 70 | 
 71 | \item{anno.filter}{A vector indicating the types of genes to be filtered.
 72 | Must be some of c("mitochondrial", "ribosome", "dissociation")(default) or NULL.}
 73 | 
 74 | \item{nCell.min}{An integer number used to filter gene. The default is 3.
 75 | Genes with the number of expressed cells less than this threshold will be filtered.}
 76 | 
 77 | \item{bgPercent.max}{A float number used to filter gene. The default is 1 (no filtering).
 78 | Genes with the background percentage larger than this threshold will be filtered.}
 79 | 
 80 | \item{bool.rmContamination}{A logical value indicating whether to remove ambient RNA contamination based on 'SoupX'.}
 81 | 
 82 | \item{vars.add.meta}{A vector indicating the variables to be added to Seurat object's meta.data.
 83 | The default is c("mito.percent", "ribo.percent", "diss.percent").}
 84 | 
 85 | \item{vars.to.regress}{A vector indicating the variables to regress out in R package Seurat.
 86 | The default is c("nCount_RNA", "mito.percent", "ribo.percent").}
 87 | 
 88 | \item{pc.use}{An integer number indicating the number of PCs to use as input features. The default is 30.}
 89 | 
 90 | \item{resolution}{A float number used in function 'FindClusters' in Seurat. The default is 0.8.}
 91 | 
 92 | \item{clusterStashName}{A character string used as the name of cluster identies. The default is "default".}
 93 | 
 94 | \item{show.features}{A list or vector for genes to be plotted in 'markerPlot'.}
 95 | 
 96 | \item{bool.add.features}{A logical value indicating whether to add default features to 'show.features' or not.}
 97 | 
 98 | \item{bool.runDiffExpr}{A logical value indicating whether to perform differential expressed analysis.}
 99 | 
100 | \item{n.markers}{An integer indicating the number of differential expressed genes showed in the plot. The defalut is 5.}
101 | 
102 | \item{species}{A character string indicating what species the sample belong to.
103 | Only "human"(default) or "mouse" are allowed.}
104 | 
105 | \item{genome}{A character string indicating the version of the reference gene annotation information.
106 | This information is mainly used to infer CNV profile and estimate malignancy.
107 | Only 'hg19' (defalut) or 'hg38' are allowed for "human" species, and only "mm10" is allowed for "mouse" species.}
108 | 
109 | \item{hg.mm.mix}{A logical value indicating whether the sample is a mix of
110 | human cells and mouse cells(such as PDX sample).
111 | If TRUE, the arguments 'hg.mm.thres' and 'mix.anno' should be set to corresponding values.}
112 | 
113 | \item{bool.runDoublet}{A logical value indicating whether to estimate doublet scores.}
114 | 
115 | \item{doublet.method}{The method to estimate doublet score. The default is "bcds".
116 | "cxds"(co-expression based doublet scoring) and "bcds"(binary classification based doublet scoring) are allowed.
117 | These methods are from R package "scds".}
118 | 
119 | \item{bool.runCellClassify}{A logical value indicating whether to predict the usual cell type. The default is TRUE.}
120 | 
121 | \item{ct.templates}{A list of vectors of several cell type templates.
122 | The default is NULL and the templates prepared in this package will be used.}
123 | 
124 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.}
125 | 
126 | \item{bool.runMalignancy}{A logical value indicating whether to estimate malignancy.}
127 | 
128 | \item{cnv.ref.data}{An expression matrix of gene by cell, which is used as the normal reference during estimating malignancy.
129 | The default is NULL, and an immune cells or bone marrow cells expression matrix will be used for human or mouse species, respectively.}
130 | 
131 | \item{cnv.referAdjMat}{An adjacent matrix for the normal reference data.
132 | The larger the value, the closer the cell pair is.
133 | The default is NULL, and a SNN matrix of the default ref.data will be used.}
134 | 
135 | \item{cutoff}{A threshold used in the CNV inference.}
136 | 
137 | \item{p.value.cutoff}{A threshold to decide weather the bimodality distribution of malignancy score is significant.}
138 | 
139 | \item{bool.intraTumor}{A logical value indicating whether to use the identified tumor clusters to
140 | perform following intra-tumor heterogeneity analyses.}
141 | 
142 | \item{bool.runCellCycle}{A logical value indicating whether to estimate cell cycle scores.}
143 | 
144 | \item{bool.runStemness}{A logical value indicating whether to estimate stemness scores.}
145 | 
146 | \item{bool.runGeneSets}{A logical value indicating whether to estimate gene sets signature scores.}
147 | 
148 | \item{geneSets}{A list of gene sets to be analyzed. The default is NULL and 50 hallmark gene sets from MSigDB will be used.}
149 | 
150 | \item{geneSet.method}{The method to be used in calculate gene set scores. Currently, only "average" and "GSVA" are allowed.}
151 | 
152 | \item{bool.runExprProgram}{A logical value indicating whether to run non-negative matrix factorization (NMF) to identify expression programs.}
153 | 
154 | \item{nmf.rank}{An integer of decomposition rank used in NMF.}
155 | 
156 | \item{bool.runInteraction}{A logical value indicating whether to run cell set ligand-receptor interaction analysis.}
157 | 
158 | \item{genReport}{A logical value indicating whether to generate a .html/.md report (suggest to set TRUE).}
159 | }
160 | \value{
161 | A results list with all useful objects used in the function.
162 | }
163 | \description{
164 | According to the results of 'runScStatistics', perform cell and gene quality control.
165 | Using the R package Seurat to perform basic operations (normalization, log-transformation,
166 | highly variable genes identification, removing unwanted variance, scaling, centering,
167 | dimension reduction, clustering, and differential expression analy-sis).
168 | Perform some cancer-specific analyses: cancer micro-environmental cell type classification,
169 | cell malignancy estimation, cell cycle analysis, cell stemness analysis,
170 | gene set signature analysis, expression programs identification, and so on.
171 | }
172 | 


--------------------------------------------------------------------------------
/man/runScCombination.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/scCombination.R
  3 | \name{runScCombination}
  4 | \alias{runScCombination}
  5 | \title{runScCombination}
  6 | \usage{
  7 | runScCombination(
  8 |   single.savePaths,
  9 |   sampleNames,
 10 |   savePath,
 11 |   combName,
 12 |   authorName = NULL,
 13 |   comb.method = "NormalMNN",
 14 |   harmony.theta = NULL,
 15 |   harmony.lambda = NULL,
 16 |   harmony.sigma = 0.1,
 17 |   vars.to.regress = c("nCount_RNA", "mito.percent", "ribo.percent"),
 18 |   pc.use = 30,
 19 |   resolution = 0.8,
 20 |   clusterStashName = "comb.cluster",
 21 |   show.features = NULL,
 22 |   bool.add.features = T,
 23 |   bool.runDiffExpr = T,
 24 |   n.markers = 5,
 25 |   sample.colors = NULL,
 26 |   species = "human",
 27 |   genome = "hg19",
 28 |   hg.mm.mix = F,
 29 |   bool.runCellClassify = T,
 30 |   ct.templates = NULL,
 31 |   coor.names = c("tSNE_1", "tSNE_2"),
 32 |   bool.runMalignancy = T,
 33 |   cnv.ref.data = NULL,
 34 |   cnv.referAdjMat = NULL,
 35 |   cutoff = 0.1,
 36 |   p.value.cutoff = 0.5,
 37 |   bool.intraTumor = T,
 38 |   bool.runCellCycle = T,
 39 |   bool.runStemness = T,
 40 |   bool.runGeneSets = T,
 41 |   geneSets = NULL,
 42 |   geneSet.method = "average",
 43 |   bool.runExprProgram = T,
 44 |   nmf.rank = 50,
 45 |   genReport = T
 46 | )
 47 | }
 48 | \arguments{
 49 | \item{single.savePaths}{A vecotr of paths containing the results files of step 'runScAnnotation' for each sample.}
 50 | 
 51 | \item{sampleNames}{A vector of labels for all samples.}
 52 | 
 53 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.}
 54 | 
 55 | \item{combName}{A label for the combined samples.}
 56 | 
 57 | \item{authorName}{A character string for authors name and will be shown in the report.}
 58 | 
 59 | \item{comb.method}{The method to combine samples. The default is "NormalMNN". "Harmony", "NormalMNN", "SeuratMNN", "Raw", "Regression" and "LIGER" are optional.}
 60 | 
 61 | \item{harmony.theta}{The parameter 'theta' of function "RunHarmony" in the harmony package.}
 62 | 
 63 | \item{harmony.lambda}{The parameter 'lambda' of function "RunHarmony" in the harmony package.}
 64 | 
 65 | \item{harmony.sigma}{The parameter 'sigma' of function "RunHarmony" in the harmony package.}
 66 | 
 67 | \item{vars.to.regress}{A vector indicating the variables to regress out in R package Seurat.
 68 | The default is c("nCount_RNA", "mito.percent", "ribo.percent").}
 69 | 
 70 | \item{pc.use}{An integer number indicating the number of PCs to use as input features. The default is 30.}
 71 | 
 72 | \item{resolution}{A float number used in function 'FindClusters' in Seurat. The default is 0.8.}
 73 | 
 74 | \item{clusterStashName}{A character string used as the name of cluster identies. The default is "default".}
 75 | 
 76 | \item{show.features}{A list or vector for genes to be plotted in 'markerPlot'.}
 77 | 
 78 | \item{bool.add.features}{A logical value indicating whether to add default features to 'show.features' or not.}
 79 | 
 80 | \item{bool.runDiffExpr}{A logical value indicating whether to perform differential expressed analysis.}
 81 | 
 82 | \item{n.markers}{An integer indicating the number of differential expressed genes showed in the plot. The defalut is 5.}
 83 | 
 84 | \item{sample.colors}{The colors used for samples. The default is NULL, and the pre-set colors will be used.}
 85 | 
 86 | \item{species}{A character string indicating what species the sample belong to.
 87 | Only "human"(default) or "mouse" are allowed.}
 88 | 
 89 | \item{genome}{A character string indicating the version of the reference gene annotation information.
 90 | This information is mainly used to infer CNV profile and estimate malignancy.
 91 | Only 'hg19' (defalut) or 'hg38' are allowed for "human" species, and only "mm10" is allowed for "mouse" species.}
 92 | 
 93 | \item{hg.mm.mix}{A logical value indicating whether the sample is a mix of
 94 | human cells and mouse cells(such as PDX sample).
 95 | If TRUE, the arguments 'hg.mm.thres' and 'mix.anno' should be set to corresponding values.}
 96 | 
 97 | \item{bool.runCellClassify}{A logical value indicating whether to predict the usual cell type. The default is TRUE.}
 98 | 
 99 | \item{ct.templates}{A list of vectors of several cell type templates.
100 | The default is NULL and the templates prepared in this package will be used.}
101 | 
102 | \item{coor.names}{A vector indicating the names of two-dimension coordinate used in visualization.}
103 | 
104 | \item{bool.runMalignancy}{A logical value indicating whether to estimate malignancy.}
105 | 
106 | \item{cnv.ref.data}{An expression matrix of gene by cell, which is used as the normal reference during estimating malignancy.
107 | The default is NULL, and an immune cells or bone marrow cells expression matrix will be used for human or mouse species, respectively.}
108 | 
109 | \item{cnv.referAdjMat}{An adjacent matrix for the normal reference data.
110 | The larger the value, the closer the cell pair is.
111 | The default is NULL, and a SNN matrix of the default ref.data will be used.}
112 | 
113 | \item{cutoff}{A threshold used in the CNV inference.}
114 | 
115 | \item{p.value.cutoff}{A threshold to decide weather the bimodality distribution of malignancy score is significant.}
116 | 
117 | \item{bool.intraTumor}{A logical value indicating whether to use the identified tumor clusters to
118 | perform following intra-tumor heterogeneity analyses.}
119 | 
120 | \item{bool.runCellCycle}{A logical value indicating whether to estimate cell cycle scores.}
121 | 
122 | \item{bool.runStemness}{A logical value indicating whether to estimate stemness scores.}
123 | 
124 | \item{bool.runGeneSets}{A logical value indicating whether to estimate gene sets signature scores.}
125 | 
126 | \item{geneSets}{A list of gene sets to be analyzed. The default is NULL and 50 hallmark gene sets from MSigDB will be used.}
127 | 
128 | \item{geneSet.method}{The method to be used in calculate gene set scores. Currently, only "average" and "GSVA" are allowed.}
129 | 
130 | \item{bool.runExprProgram}{A logical value indicating whether to run non-negative matrix factorization (NMF) to identify expression programs.}
131 | 
132 | \item{nmf.rank}{An integer of decomposition rank used in NMF.}
133 | 
134 | \item{genReport}{A logical value indicating whether to generate a .html/.md report (suggest to set TRUE).}
135 | }
136 | \value{
137 | A results list with all useful objects used in the function.
138 | }
139 | \description{
140 | Perform multi-samples analyses.
141 | }
142 | 


--------------------------------------------------------------------------------
/man/runScStatistics.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scStatistics.R
 3 | \name{runScStatistics}
 4 | \alias{runScStatistics}
 5 | \title{runScStatistics}
 6 | \usage{
 7 | runScStatistics(
 8 |   dataPath,
 9 |   savePath,
10 |   authorName = NULL,
11 |   sampleName = "sc",
12 |   species = "human",
13 |   hg.mm.mix = F,
14 |   hg.mm.thres = 0.6,
15 |   mix.anno = c(human = "hg19", mouse = "mm10"),
16 |   bg.spec.genes = NULL,
17 |   bool.runSoupx = F,
18 |   genReport = T
19 | )
20 | }
21 | \arguments{
22 | \item{dataPath}{A path containing the cell ranger processed data.
23 | Under this path, folders 'filtered_feature_bc_matrix' and 'raw_feature_bc_matrix' exist generally.}
24 | 
25 | \item{savePath}{A path to save the results files(suggest to create a foler named by sample name).}
26 | 
27 | \item{authorName}{A character string for authors name and will be shown in the report.}
28 | 
29 | \item{sampleName}{A character string giving a label for this sample.}
30 | 
31 | \item{species}{A character string indicating what species the sample belong to.
32 | Must be one of "human"(default) and "mouse".}
33 | 
34 | \item{hg.mm.mix}{A logical value indicating whether the sample is a mix of
35 | human cells and mouse cells(such as PDX sample).
36 | If TRUE, the arguments 'hg.mm.thres' and 'mix.anno' should be set to corresponding values.}
37 | 
38 | \item{hg.mm.thres}{A float-point threshold within [0.5, 1] to identify human and mouse cells.
39 | Cells with UMI percentage of single species larger than the threshold are labeled human or mouse cells.
40 | The default is 0.6.}
41 | 
42 | \item{mix.anno}{A vector to indicate the prefix of genes from different species.
43 | The default is c("human" = "hg19", "mouse" = "mm10").}
44 | 
45 | \item{bg.spec.genes}{A list of backgroud specific genes, which are used to remove ambient genes' influence.}
46 | 
47 | \item{bool.runSoupx}{A logical value indicating whether to estimate contamination fraction by SoupX.}
48 | 
49 | \item{genReport}{A logical value indicating whether to generate a .html/.md report (suggest to set TRUE).}
50 | }
51 | \value{
52 | A results list with all useful objects used in the function.
53 | }
54 | \description{
55 | perform cell QC, gene QC, visualization and give suggested thresholds.
56 | }
57 | 


--------------------------------------------------------------------------------
/man/runSeurat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{runSeurat}
 4 | \alias{runSeurat}
 5 | \title{runSeurat}
 6 | \usage{
 7 | runSeurat(
 8 |   expr,
 9 |   savePath,
10 |   pc.use = 30,
11 |   resolution = 0.8,
12 |   clusterStashName = "default",
13 |   bool.runDiffExpr = T,
14 |   comb.method = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{expr}{A Seurat object return by prepareSeurat.}
19 | 
20 | \item{savePath}{A path to save the results files. If NULL, the 'statPath' will be used instead.}
21 | 
22 | \item{pc.use}{An integer number indicating the number of PCs to use as input features. The default is 30.}
23 | 
24 | \item{resolution}{A float number used in function 'FindClusters' in Seurat. The default is 0.8.}
25 | 
26 | \item{clusterStashName}{A character string used as the name of cluster identies. The default is "default".}
27 | 
28 | \item{bool.runDiffExpr}{A logical value indicating whether to perform differential expressed analysis.}
29 | 
30 | \item{comb.method}{The method used in combining samples. It worked only for multi-sample analysis.}
31 | }
32 | \value{
33 | A list containing a Seurat object, differential expressed genes and annotation information for cells.
34 | }
35 | \description{
36 | Perform usual Seurat step and cell type prediction.
37 | }
38 | 


--------------------------------------------------------------------------------
/man/runStemness.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scAnnotation.R
 3 | \name{runStemness}
 4 | \alias{runStemness}
 5 | \title{runStemness}
 6 | \usage{
 7 | runStemness(X, stem.sig = NULL, species = "human")
 8 | }
 9 | \arguments{
10 | \item{X}{An expression matrix of gene by cell to estimate stemness.}
11 | 
12 | \item{stem.sig}{An array of stemness signature. The default is NULL, and a prepared signature will be used.}
13 | 
14 | \item{species}{A character string indicating what species the sample belong to.
15 | Only "human"(default) or "mouse" are allowed.}
16 | }
17 | \value{
18 | An array of cell stemness scores.
19 | }
20 | \description{
21 | Estimate cell stemness according to the Spearman correlation with stemness signature.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/runSurvival.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{runSurvival}
 4 | \alias{runSurvival}
 5 | \title{runSurvival}
 6 | \usage{
 7 | runSurvival(
 8 |   features,
 9 |   data,
10 |   surv.time,
11 |   surv.event,
12 |   cut.off = 0.5,
13 |   savePath = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{features}{The names of marker genes or signatures to be analyzed.}
18 | 
19 | \item{data}{The data used to perform survival analysis.
20 | It should be an expression or signature matrix with gene or signature by patient.
21 | The row names are the features' anmes. The columns are patients' labels.}
22 | 
23 | \item{surv.time}{The survival time of patients. It should be in accord with the columns of data.}
24 | 
25 | \item{surv.event}{The status indicator of patients. 0=alive, 1=dead. It should be in accord with the columns of data.}
26 | 
27 | \item{cut.off}{The percentage threshold to divide patients into two groups.
28 | The default is 0.5, which means the patients are divided by median.
29 | Other values, such as 0.4, means the first 40 percent patients are set "Low" group
30 | and the last 40 percent are set "High" group (the median 20 percent are discarded).}
31 | 
32 | \item{savePath}{The path to save the survival plots of genes or signatures (the default is NULL and the plots will be return without saving).}
33 | }
34 | \value{
35 | A list of survival curves plots.
36 | }
37 | \description{
38 | According to the marker genes or signatures expression high/low levels,
39 | patient are divided into two groups and then survival analysis is performed.
40 | The survival curves can be plotted.
41 | }
42 | 


--------------------------------------------------------------------------------
/scCancer.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 4
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------