├── DESCRIPTION
├── NAMESPACE
├── R
    ├── DEG.R
    ├── FindLR.R
    ├── LRPlot.R
    ├── NetView.R
    ├── TimePlot.R
    ├── rawParse.R
    └── sysdata.rda
├── README.md
├── data
    └── LR_database.rda
├── example
    ├── example_code.r
    └── example_data.txt
└── man
    ├── DEG.Rd
    ├── DESeq2Test.Rd
    ├── DESingleTest.Rd
    ├── FindLR.Rd
    ├── LRPlot.Rd
    ├── MASTTest.Rd
    ├── MonocleTest.Rd
    ├── NetView.Rd
    ├── SCDETest.Rd
    ├── TimePlot.Rd
    ├── WilcoxTest.Rd
    ├── edgeRTest.Rd
    └── rawParse.Rd


/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: iTALK
 2 | Type: Package
 3 | Title: Characterize and Illustrate Intercellular Communication 
 4 | Version: 0.1.0
 5 | Date: 2018-12-12
 6 | Author: Yuanxin Wang
 7 | Maintainer: Yuanxin Wang <ywang65@mdanderson.org>
 8 | Description: iTALK, a computational approach to characterize, compare, 
 9 |     and illustrate intercellular communication signals in the multicellular
10 |     ecosystem using either bulk RNA sequencing data or single cell RNAseq data. 
11 |     iTALK can in principle be used to dissect the complexity, diversity, and 
12 |     dynamics of cell-cell communication from a wide range of cellular processes.
13 | License: CC-BY-NC-SA
14 | Encoding: UTF-8
15 | LazyData: true
16 | Depends: R (>= 3.4.0)
17 | Imports:
18 |     progress,
19 |     pbapply,
20 |     dplyr,
21 |     tidyr,
22 |     graphics,
23 |     randomcoloR,
24 |     circlize,
25 |     ggplot2,
26 |     network,
27 |     igraph,
28 |     DESeq2,
29 |     edgeR, 
30 |     monocle, 
31 |     scde, 
32 |     DEsingle, 
33 |     MAST,
34 |     scater
35 | VignetteBuilder: knitr
36 | biocViews: RNASequencing 
37 | RoxygenNote: 6.1.1
38 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(DEG)
 4 | export(DESeq2Test)
 5 | export(DESingleTest)
 6 | export(FindLR)
 7 | export(LRPlot)
 8 | export(MASTTest)
 9 | export(MonocleTest)
10 | export(NetView)
11 | export(SCDETest)
12 | export(TimePlot)
13 | export(WilcoxTest)
14 | export(edgeRTest)
15 | export(rawParse)
16 | import(Biobase)
17 | import(DESeq2)
18 | import(DEsingle)
19 | import(MAST)
20 | import(circlize)
21 | import(dplyr)
22 | import(edgeR)
23 | import(ggplot2)
24 | import(graphics)
25 | import(igraph)
26 | import(monocle)
27 | import(network)
28 | import(randomcoloR)
29 | import(scde)
30 | import(tidyr)
31 | importFrom(pbapply,pbsapply)
32 | importFrom(progress,progress_bar)
33 | importFrom(stats,wilcox.test)
34 | importFrom(utils,installed.packages)
35 | 


--------------------------------------------------------------------------------
/R/DEG.R:
--------------------------------------------------------------------------------
  1 | #' Call DEGenes
  2 | #'
  3 | #' This function loads the data as a dataframe, and method as a string.
  4 | #' It assumes that each line contains gene expression profile of one single
  5 | #' cell, and each column contains the one single gene expression profile in
  6 | #' different cells. The dataframe should also contain the cell type information
  7 | #' with column name 'cell_type', as well as group information as 'compare_group'
  8 | #' Batch information as 'batch' is optional. If included, users may want to use
  9 | #' the raw count data for later analysis. Differential expressed genes will be
 10 | #' called within each cell type by the method users select. For bulk RNAseq,
 11 | #' we provide edgeR, DESeq2. And for scRNA-seq, popular methods in packages
 12 | #' scde, monocle, DEsingle and MAST are available.
 13 | #'
 14 | #' @param data Input raw or normalized count data with column 'cell_type'
 15 | #' and 'compare_group'
 16 | #' @param method Method used to call DEGenes. Available options are:
 17 | ##' \itemize{
 18 | ##'  \item{Wilcox}: Wilcoxon rank sum test
 19 | ##'  \item{DESeq2}: Negative binomial model based differential analysis
 20 | ##'  (Love et al, Genome Biology, 2014)
 21 | ##'  \item{SCDE}: Bayesian approach to single-cell differential
 22 | ##'  expression analysis (Kharchenko et al, Nature Method, 2014)
 23 | ##'  \item{monocle}: Census based differential analysis (Qiu et al,
 24 | ##'  Nature Methods, 2017)
 25 | ##'  \item{edgeR}: Negative binomial distributions, including empirical
 26 | ##'  Bayes estimation, exact tests, generalized linear models and
 27 | ##'  quasi-likelihood tests based differential analysis (McCarthy et al,
 28 | ##'  Nucleic Acids Research, 2012)
 29 | ##'  \item{DESingle}: Zero-Inflated Negative Binomial model to estimate
 30 | ##'  the proportion of real and dropout zeros and to define and detect
 31 | ##'  the 3 types of DE genes (Miao et al, Bioinformatics, 2018)
 32 | ##'  \item{MAST}: GLM-framework that treates cellular detection rate as a
 33 | ##'  covariate (Finak et al, Genome Biology, 2015)
 34 | ##'  }
 35 | #' @param min_gene_expressed Genes expressed in minimum number of cells
 36 | #' @param min_valid_cells Minimum number of genes detected in the cell
 37 | #' @param contrast String vector specifying the contrast to be
 38 | #' tested against the log2-fold-change threshold
 39 | #' @param q_cut Cut-off for q value
 40 | #' @param add Whether add genes that are not differentially expressed 
 41 | #' but highly expressed for finding the significant pairs later
 42 | #' @param top Same as in function rawParse
 43 | #' @param stats Same as in function rawParse
 44 | #' @return A matrix of the differential expressed genes
 45 | #' @importFrom utils installed.packages
 46 | #' @import dplyr
 47 | #' @import DESeq2
 48 | #' @import scde
 49 | #' @import monocle
 50 | #' @import edgeR
 51 | #' @import DEsingle
 52 | #' @import MAST
 53 | #' @import Biobase
 54 | #' @export
 55 | DEG<-function(data,method, min_gene_expressed=0, min_valid_cells=0,contrast=NULL,q_cut=0.05,add=TRUE,top=50,stats='mean',...){
 56 |   if(method %in% c('SCDE','monocle','DESingle','MAST') && dim(data)[1]>=400){
 57 |     print('Warning: It may take a long time. You can go and brew a cup of coffee...')
 58 |   }
 59 |   if(length(unique(data$cell_type))!=1){
 60 |     stop('Error: please compare data with sinlge cell type')
 61 |   }
 62 |   sub_data<-subset(data,select=-cell_type)
 63 |   combination<-combn(unique(sub_data$compare_group),2)
 64 |   res=NULL
 65 |   if(method=='Wilcox'){
 66 |     for(i in ncol(combination)){
 67 |       if(is.null(contrast)){
 68 |         contrast=c(combination[,i])
 69 |       }
 70 |       sub_data<-sub_data[sub_data$compare_group %in% combination[,i],]
 71 |       res<-rbind(res,WilcoxTest(sub_data,min_gene_expressed, min_valid_cells, contrast=contrast,...))
 72 |     }
 73 |   }else if(method=='DESeq2'){
 74 |     for(i in ncol(combination)){
 75 |       if(is.null(contrast)){
 76 |         contrast=c(combination[,i])
 77 |       }
 78 |       sub_data<-sub_data[sub_data$compare_group %in% combination[,i],]
 79 |       res<-rbind(res,DESeq2Test(sub_data,min_gene_expressed, min_valid_cells, contrast=contrast,...))
 80 |     }
 81 |   }else if(method=='SCDE'){
 82 |     for(i in ncol(combination)){
 83 |       if(is.null(contrast)){
 84 |         contrast=c(combination[,i])
 85 |       }
 86 |       sub_data<-sub_data[sub_data$compare_group %in% combination[,i],]
 87 |       res<-rbind(res,SCDETest(sub_data,min_gene_expressed, min_valid_cells, contrast=contrast,...))
 88 |     }
 89 |   }else if(method=='monocle'){
 90 |     for(i in ncol(combination)){
 91 |       if(is.null(contrast)){
 92 |         contrast=c(combination[,i])
 93 |       }
 94 |       sub_data<-sub_data[sub_data$compare_group %in% combination[,i],]
 95 |       res<-rbind(res,MonocleTest(sub_data,min_gene_expressed, min_valid_cells, contrast=contrast,...))
 96 |     }
 97 |   }else if(method=='edgeR'){
 98 |     for(i in ncol(combination)){
 99 |       if(is.null(contrast)){
100 |         contrast=c(combination[,i])
101 |       }
102 |       sub_data<-sub_data[sub_data$compare_group %in% combination[,i],]
103 |       res<-rbind(res,edgeRTest(sub_data,min_gene_expressed, min_valid_cells, contrast=contrast,...))
104 |     }
105 |   }else if(method=='DESingle'){
106 |     for(i in ncol(combination)){
107 |       if(is.null(contrast)){
108 |         contrast=c(combination[,i])
109 |       }
110 |       sub_data<-sub_data[sub_data$compare_group %in% combination[,i],]
111 |       res<-rbind(res,DESingleTest(sub_data,min_gene_expressed, min_valid_cells, contrast=contrast,...))
112 |     }
113 |   }else if(method=='MAST'){
114 |     for(i in ncol(combination)){
115 |       if(is.null(contrast)){
116 |         contrast=c(combination[,i])
117 |       }
118 |       sub_data<-sub_data[sub_data$compare_group %in% combination[,i],]
119 |       res<-rbind(res,MASTTest(sub_data,min_gene_expressed, min_valid_cells, contrast=contrast,...))
120 |     }
121 |   }else{
122 |     stop('Error: method currently not available')
123 |   }
124 |   cell_type<-unique(data$cell_type)
125 |   res<-data.frame(res,cell_type,stringsAsFactors = FALSE)
126 |   res <- res %>% dplyr::filter(q.value<q_cut)
127 |   if(add){
128 |     parsedData<-rawParse(data %>% select(-compare_group),top=top,stats=stats)%>% select(c(gene,cell_type))%>% dplyr::mutate(logFC=0.0001,p.value=NA,q.value=NA)
129 |     parsedData<-parsedData %>% anti_join(res,by=c('gene'='gene'))
130 |     res<-rbind(res,parsedData)
131 |   }    
132 |   return(res)
133 | }
134 | 
135 | #' Differential expression using wilcox
136 | #'
137 | #' Identifies differentially expressed genes between two groups of cells using
138 | #' a Wilcoxon Rank Sum test
139 | #' @param sub_data Count data removed cell_type and selected certain two
140 | #' compare_group
141 | #' @param min_gene_expressed Genes expressed in minimum number of cells
142 | #' @param min_valid_cells Minimum number of genes detected in the cell
143 | #' @param contrast String vector specifying the contrast to be
144 | #' tested against the log2-fold-change threshold
145 | #' @param data_type Type of data. Available options are:
146 | ##' \itemize{
147 | ##'  \item{'raw data'}: Raw count data without any pre-processing
148 | ##'  \item{'log count'}: Normalized and log-transformed data
149 | ##' }
150 | #' @param verbose Whether show the progress of computing
151 | #'
152 | #' @return A matrix of differentially expressed genes and related statistics.
153 | #'
154 | #' @importFrom pbapply pbsapply
155 | #' @importFrom stats wilcox.test
156 | #'
157 | #' @export
158 | WilcoxTest<-function(sub_data,min_gene_expressed, min_valid_cells,
159 |                      contrast=unique(sub_data$compare_group),
160 |                      datatype='raw count', verbose=0){
161 |   counts<-t(subset(sub_data,select=-compare_group))
162 |   counts<-apply(counts,2,function(x) {storage.mode(x) <- 'numeric'; x})
163 |   expressed_genes <- rownames(subset(counts,rowSums(counts) >= min_gene_expressed))
164 |   valid_cells<-(colSums(counts)>=min_valid_cells)
165 |   groups<-as.factor(sub_data[valid_cells,'compare_group'])
166 |   counts<-counts[expressed_genes,valid_cells]
167 |   mysapply <- if (verbose) {pbsapply} else {sapply}
168 |   p_val <- mysapply(
169 |     X = 1:nrow(counts),
170 |     FUN = function(i) {
171 |       return(wilcox.test(counts[i,]~groups)$p.value)
172 |     }
173 |   )
174 |   if(datatype=='raw count'){
175 |     logFC<-log(rowSums(counts[,groups==contrast[1]])/ncol(counts[,groups==contrast[1]])/(rowSums(counts[,groups==contrast[2]])/ncol(counts[,groups==contrast[2]])),2)
176 |   }else if(datatype=='log count'){
177 |     logFC<-rowSums(counts[,groups==contrast[1]])/ncol(counts[,groups==contrast[1]])-rowSums(counts[,groups==contrast[2]])/ncol(counts[,groups==contrast[2]])
178 |   }else{
179 |     stop('Error: invalid data type')
180 |   }
181 |   res<-data.frame(rownames(counts),logFC,p_val,stringsAsFactors = FALSE)
182 |   colnames(res)<-c('gene','logFC','p.value')
183 |   res <- res %>% mutate(q.value=p.adjust(res$p.value, method = "BH"))
184 |   return(res)
185 | }
186 | 
187 | #' Differential expression using DESeq2
188 | #'
189 | #' Identifies differentially expressed genes between two groups of cells using
190 | #' DESeq2
191 | #'
192 | #' @references Love MI, Huber W and Anders S (2014). "Moderated estimation of
193 | #' fold change and dispersion for RNA-seq data with DESeq2." Genome Biology.
194 | #' https://bioconductor.org/packages/release/bioc/html/DESeq2.html
195 | #' @param sub_data Count data removed cell_type and selected certain two
196 | #' compare_group
197 | #' @param min_gene_expressed Genes expressed in minimum number of cells
198 | #' @param min_valid_cells Minimum number of genes detected in the cell
199 | #' @param contrast String vector specifying the contrast to be
200 | #' tested against the log2-fold-change threshold
201 | #' @param test either "Wald" or "LRT", which will then use either
202 | #' Wald significance tests (defined by \code{\link{nbinomWaldTest}}),
203 | #' or the likelihood ratio test on the difference in deviance between a
204 | #' full and reduced model formula (defined by \code{\link{nbinomLRT}})
205 | #' @param fitType either "parametric", "local", or "mean"
206 | #' for the type of fitting of dispersions to the mean intensity.
207 | #' See \code{\link{estimateDispersions}} for description.
208 | #' @param sfType either "ratio", "poscounts", or "iterate"
209 | #' for teh type of size factor estimation. See
210 | #' \code{\link{estimateSizeFactors}} for description.
211 | #' @param betaPrior whether or not to put a zero-mean normal prior on
212 | #' the non-intercept coefficients
213 | #' See \code{\link{nbinomWaldTest}} for description of the calculation
214 | #' of the beta prior. In versions \code{>=1.16}, the default is set
215 | #' to \code{FALSE}, and shrunken LFCs are obtained afterwards using
216 | #' \code{\link{lfcShrink}}.
217 | #' @param quiet whether to print messages at each step
218 | #' @param modelMatrixType either "standard" or "expanded", which describe
219 | #' how the model matrix, X of the GLM formula is formed.
220 | #' "standard" is as created by \code{model.matrix} using the
221 | #' design formula. "expanded" includes an indicator variable for each
222 | #' level of factors in addition to an intercept. for more information
223 | #' see the Description of \code{\link{nbinomWaldTest}}.
224 | #' betaPrior must be set to TRUE in order for expanded model matrices
225 | #' to be fit.
226 | #' @param minReplicatesForReplace the minimum number of replicates required
227 | #' in order to use \code{\link{replaceOutliers}} on a
228 | #' sample. If there are samples with so many replicates, the model will
229 | #' be refit after these replacing outliers, flagged by Cook's distance.
230 | #' Set to \code{Inf} in order to never replace outliers.
231 | #' @param useT logical, passed to \code{\link{nbinomWaldTest}}, default is FALSE,
232 | #' where Wald statistics are assumed to follow a standard Normal
233 | #' @param minmu lower bound on the estimated count for fitting gene-wise dispersion
234 | #' and for use with \code{nbinomWaldTest} and \code{nbinomLRT}
235 | #' @param parallel if FALSE, no parallelization. if TRUE, parallel
236 | #' execution using \code{BiocParallel}, see next argument \code{BPPARAM}.
237 | #' A note on running in parallel using \code{BiocParallel}: it may be
238 | #' advantageous to remove large, unneeded objects from your current
239 | #' R environment before calling \code{DESeq},
240 | #' as it is possible that R's internal garbage collection
241 | #' will copy these files while running on worker nodes.
242 | #' @param BPPARAM an optional parameter object passed internally
243 | #' to \code{\link{bplapply}} when \code{parallel=TRUE}.
244 | #' If not specified, the parameters last registered with
245 | #' \code{\link{register}} will be used.
246 | #' @import DESeq2
247 | #'
248 | #' @return A matrix of differentially expressed genes and related statistics.
249 | #'
250 | #' @details
251 | #' This test does not support pre-processed genes. To use this method, please
252 | #' install DESeq2, using the instructions at
253 | #' https://bioconductor.org/packages/release/bioc/html/DESeq2.html
254 | #'
255 | #' @importFrom utils installed.packages
256 | #'
257 | #' @export
258 | DESeq2Test<-function(sub_data, min_gene_expressed, min_valid_cells, contrast=unique(sub_data$compare_group), test='Wald',
259 |                      fitType='parametric',sfType='ratio',betaPrior=FALSE,quiet=FALSE,modelMatrixType='standard',
260 |                      minReplicatesForReplace=7,useT=FALSE,minmu=0.5,parallel=FALSE,BPPARAM=bpparam()){
261 |   counts<-t(sub_data[,-ncol(sub_data)])
262 |   counts<-apply(counts,2,function(x) {storage.mode(x) <- 'numeric'; x})
263 |   expressed_genes <- rownames(subset(counts,rowSums(counts) >= min_gene_expressed))
264 |   valid_cells<-(colSums(counts)>=min_valid_cells)
265 |   counts<-counts[expressed_genes,valid_cells]
266 |   coldata<-as.data.frame(sub_data[valid_cells,'compare_group'])
267 |   rownames(coldata)<-rownames(sub_data[valid_cells,])
268 |   colnames(coldata)<-'compare_group'
269 |   dds<-DESeqDataSetFromMatrix(countData = counts,colData = coldata, design = ~ compare_group)
270 |   dds$condition <- factor(dds$compare_group)
271 |   dds<-DESeq(dds,test=test,
272 |              fitType=fitType,
273 |              sfType=sfType,
274 |              betaPrior=betaPrior,
275 |              quiet=quiet,
276 |              minReplicatesForReplace=minReplicatesForReplace, modelMatrixType=modelMatrixType,
277 |              useT=useT, minmu=minmu,
278 |              parallel=parallel, BPPARAM=BPPARAM)
279 |   res <- results(dds,contrast=c('compare_group',contrast))
280 |   res<-data.frame(rownames(counts),res$log2FoldChange,res$pvalue,res$padj,stringsAsFactors = FALSE)
281 |   colnames(res)<-c('gene','logFC','p.value','q.value')
282 |   return(res)
283 | }
284 | 
285 | #' Differential expression using scde
286 | #'
287 | #' Identifies differentially expressed genes between two groups of cells using
288 | #' scde
289 | #'
290 | #' @references "Bayesian approach to single-cell differential expression
291 | #' analysis" (Kharchenko PV, Silberstein L, Scadden DT, Nature Methods,
292 | #' doi:10.1038/nmeth.2967)
293 | #' https://github.com/hms-dbmi/scde
294 | #' @param sub_data Count data removed cell_type and selected certain two
295 | #' compare_group
296 | #' @param min_gene_expressed Genes expressed in minimum number of cells
297 | #' @param min_valid_cells Minimum number of genes detected in the cell
298 | #' @param contrast String vector specifying the contrast to be
299 | #' tested against the log2-fold-change threshold
300 | #' @param batch Different batch identifier
301 | #' @param @param n.randomizations number of bootstrap randomizations to be performed
302 | #' @param n.cores number of cores to utilize
303 | #' @param batch.models (optional) separate models for the batch data (if generated
304 | #' using batch-specific group argument). Normally the same models are used.
305 | #' @param return.posteriors whether joint posterior matrices should be returned
306 | #' @param verbose integer verbose level (1 for verbose)
307 | #' @import scde
308 | #' @return A matrix of differentially expressed genes and related statistics.
309 | #'
310 | #' @details
311 | #' This test does not support pre-processed genes. To use this method, please
312 | #' install scde, using the instructions at
313 | #' http://hms-dbmi.github.io/scde/tutorials.html
314 | #'
315 | #' @importFrom utils installed.packages
316 | #'
317 | #' @export
318 | SCDETest<-function(sub_data,min_gene_expressed,min_valid_cells,contrast=unique(sub_data$compare_group),batch=NULL,
319 |                    n.randomizations=150,n.cores=10,batch.models=models,return.posteriors=FALSE,verbose=1){
320 |   if(is.null(batch)){
321 |     batch<-rep(NaN,nrow(sub_data))
322 |     sub_data<-data.frame(sub_data,batch)
323 |     batch<-NULL
324 |   }
325 |   counts<-t(subset(sub_data,select=-c(compare_group,batch)))
326 |   counts<-apply(counts,2,function(x) {storage.mode(x) <- 'integer'; x})
327 |   expressed_genes <- rownames(subset(counts,rowSums(counts) >= min_gene_expressed))
328 |   valid_cells<-(colSums(counts)>=min_valid_cells)
329 |   counts<-counts[expressed_genes,valid_cells]
330 | 
331 |   groups<-as.factor(sub_data[valid_cells,'compare_group'])
332 |   o.ifm <- scde.error.models(counts = counts, groups = groups, n.cores = 2, threshold.segmentation = TRUE,
333 |                              min.size.entries = 100,save.crossfit.plots = FALSE, save.model.plots = FALSE, verbose = 1)
334 |   valid.cells <- o.ifm$corr.a > 0
335 |   o.ifm <- o.ifm[valid.cells, ]
336 |   o.prior <- scde.expression.prior(models=o.ifm,counts=counts)
337 |   if(is.null(batch)){
338 |     ediff <- scde.expression.difference(o.ifm, counts, o.prior, groups=groups,
339 |                                         n.randomizations=n.randomizations, n.cores=n.cores, verbose=verbose)
340 |   }else{
341 |     batch<-as.factor(sub_data[valid_cells,'batch'])
342 |     ediff_batch<- scde.expression.difference(o.ifm, counts, o.prior, groups = groups, batch = batch,
343 |                                              n.randomizations = n.randomizations, n.cores = n.cores,
344 |                                              return.posteriors=return.posteriors,verbose = verbose)
345 |     ediff<- ediff_batch$batch.adjusted
346 |   }
347 |   p.value <- 2*pnorm(abs(ediff$Z),lower.tail=FALSE) # 2-tailed p-value
348 |   q.value <- 2*pnorm(abs(ediff$cZ),lower.tail=FALSE) # Adjusted to control for FDR
349 |   res<-data.frame(rownames(counts),ediff$mle,p.value,q.value,stringsAsFactors = FALSE)
350 |   colnames(res) <- c('gene','logFC','p.value','q.value')
351 |   if(all(levels(groups)!=contrast)){
352 |     res$logFC<--res$logFC
353 |   }
354 |   return(res)
355 | }
356 | 
357 | #' Differential expression using monocle
358 | #'
359 | #' Identifies differentially expressed genes between two groups of cells using
360 | #' monocle
361 | #'
362 | #' @references Qiu X, Hill A, Packer J, Lin D, Ma Y, Trapnell C (2017).
363 | #' “Single-cell mRNA quantification and differential analysis with Census.”
364 | #' Nature Methods.
365 | #' https://github.com/cole-trapnell-lab/monocle-release
366 | #' @param sub_data Count data removed cell_type and selected certain two
367 | #' compare_group
368 | #' @param min_gene_expressed Genes expressed in minimum number of cells
369 | #' @param min_valid_cells Minimum number of genes detected in the cell
370 | #' @param contrast String vector specifying the contrast to be
371 | #' tested against the log2-fold-change threshold
372 | #' @param batch Different batch identifier
373 | #' @param cores The number of cores to be used while testing each gene
374 | #' for differential expression.
375 | #' @import monocle
376 | #' @return A matrix of differentially expressed genes and related statistics.
377 | #'
378 | #' @details
379 | #' This test does not support pre-processed genes. To use this method, please
380 | #' install monocle, using the instructions at
381 | #' https://bioconductor.org/packages/release/bioc/html/monocle.html
382 | #'
383 | #' @importFrom utils installed.packages
384 | #'
385 | #' @export
386 | MonocleTest<-function(sub_data,min_gene_expressed,min_valid_cells,contrast=unique(sub_data$compare_group),
387 |                       batch=NULL,cores=4){
388 |   if(is.null(batch)){
389 |     batch<-rep(NaN,nrow(sub_data))
390 |     sub_data<-data.frame(sub_data,batch)
391 |     batch<-NULL
392 |   }
393 |   counts<-t(subset(sub_data,select=-c(compare_group,batch)))
394 |   counts<-apply(counts,2,function(x) {storage.mode(x) <- 'numeric'; x})
395 |   if(is.null(batch)){
396 |     pd<-sub_data[,'compare_group',drop=FALSE] %>% mutate(num_genes_expressed=colSums(counts!=0))
397 |   }else{
398 |     pd<-sub_data[,c('compare_group','batch')] %>% mutate(num_genes_expressed=colSums(counts!=0))
399 |   }
400 |   rownames(pd)<-colnames(counts)
401 |   pd<-new('AnnotatedDataFrame',pd)
402 |   fd<-as.data.frame(rownames(counts))
403 |   colnames(fd)<-'gene_short_name'
404 |   fd<-fd %>% mutate(num_cells_expressed=rowSums(counts!=0))
405 |   rownames(fd)<-rownames(counts)
406 |   fd<-new('AnnotatedDataFrame',fd)
407 |   data <- newCellDataSet(as.matrix(counts), phenoData = pd, expressionFamily=negbinomial.size(),featureData=fd)
408 |   expressed_genes <- row.names(subset(fData(data),num_cells_expressed >= min_gene_expressed))
409 |   valid_cells <- row.names(subset(pData(data),num_genes_expressed >= min_valid_cells))
410 |   data <- data[expressed_genes,valid_cells]
411 |   data <- estimateSizeFactors(data)
412 |   data<- estimateDispersions(data)
413 |   gene<-rownames(counts)
414 |   if(is.null(batch)){
415 |     diff_test_res <- differentialGeneTest(data,fullModelFormulaStr = "~compare_group")
416 |     res<-diff_test_res[,c('pval','qval')]
417 |     colnames(res)<-c('p.value','q.value')
418 |     norm_data<- exprs(data)/pData(data)[,'Size_Factor']
419 |     logFC<-log(rowSums(norm_data[,data$compare_group==contrast[1]])/rowSums(norm_data[,data$compare_group==contrast[2]]),2)
420 |     res<-data.frame(logFC,res,stringsAsFactors = FALSE)
421 |   }else{
422 |     data <- reduceDimension(data,residualModelFormulaStr = "~batch",
423 |                             verbose = TRUE)
424 |     diff_test_res <- differentialGeneTest(data,fullModelFormulaStr = "~compare_group")
425 |     res<-diff_test_res[,c('pval','qval')]
426 |     colnames(res)<-c('p.value','q.value')
427 |     norm_data<- exprs(data)/pData(data)[,'Size_Factor']
428 |     logFC<-log(rowSums(norm_data[,data$compare_group==contrast[1]])/rowSums(norm_data[,data$compare_group==contrast[2]]),2)
429 |     res<-data.frame(logFC,res,stringsAsFactors = FALSE)
430 |   }
431 |   res<-data.frame(gene,res,stringsAsFactors = FALSE)
432 |   return(res)
433 | }
434 | 
435 | #' Differential expression using edgeR
436 | #'
437 | #' Identifies differentially expressed genes between two groups of cells using
438 | #' edgeR
439 | #'
440 | #' @references McCarthy, J. D, Chen, Yunshun, Smyth, K. G (2012). “Differential
441 | #' expression analysis of multifactor RNA-Seq experiments with respect to
442 | #' biological variation.” Nucleic Acids Research, 40(10), 4288-4297.
443 | #' @references Robinson MD, McCarthy DJ, Smyth GK (2010). “edgeR: a Bioconductor
444 | #' package for differential expression analysis of digital gene expression data.”
445 | #' Bioinformatics, 26(1), 139-140.
446 | #' https://github.com/cole-trapnell-lab/monocle-release
447 | #' @param sub_data Count data removed cell_type and selected certain two
448 | #' compare_group
449 | #' @param min_gene_expressed Genes expressed in minimum number of cells
450 | #' @param min_valid_cells Minimum number of genes detected in the cell
451 | #' @param contrast String vector specifying the contrast to be
452 | #' tested against the log2-fold-change threshold
453 | #' @param calcNormMethod normalization method to be used
454 | #' @param trend.method method for estimating dispersion trend. Possible values
455 | #' are "none", "movingave", "loess" and "locfit" (default).
456 | #' @param tagwise logical, should the tagwise dispersions be estimated
457 | #' @param robust logical, should the estimation of prior.df be robustified
458 | #' against outliers
459 | #' @import edgeR
460 | #' @return A matrix of differentially expressed genes and related statistics.
461 | #'
462 | #' @details
463 | #' This test does not support pre-processed genes. To use this method, please
464 | #' install edgeR, using the instructions at
465 | #' http://bioconductor.org/packages/release/bioc/html/edgeR.html
466 | #'
467 | #' @importFrom utils installed.packages
468 | #'
469 | #' @export
470 | edgeRTest<-function(sub_data,min_gene_expressed,min_valid_cells,contrast=unique(sub_data$compare_group),
471 |                     calcNormMethod='TMM',trend.method='locfit',tagwise=TRUE,robust=FALSE){
472 |   counts<-t(subset(sub_data,select=-compare_group))
473 |   counts<-apply(counts,2,function(x) {storage.mode(x) <- 'numeric'; x})
474 |   expressed_genes <- rownames(subset(counts,rowSums(counts) >= min_gene_expressed))
475 |   valid_cells<-(colSums(counts)>=min_valid_cells)
476 |   counts<-counts[expressed_genes,valid_cells]
477 |   groups<-as.factor(sub_data[valid_cells,'compare_group'])
478 |   dgList <- DGEList(counts=counts, genes=rownames(counts),group=groups)
479 |   dgList <- calcNormFactors(dgList, method=calcNormMethod)
480 |   designMat <- model.matrix(~groups)
481 |   dgList <- estimateDisp(dgList, design=designMat,trend.method=trend.method,tagwise=tagwise,robust=robust)
482 |   et <- exactTest(dgList,pair=contrast)
483 |   res<-et$table[,c('logFC','PValue')]
484 |   colnames(res)<-c('logFC','p.value')
485 |   res <- res %>% mutate(q.value=p.adjust(res$p.value, method = "BH"))
486 |   gene<-rownames(counts)
487 |   res<-data.frame(gene,res,stringsAsFactors = FALSE)
488 |   return(res)
489 | }
490 | 
491 | #' Differential expression using DEsingle
492 | #'
493 | #' Identifies differentially expressed genes between two groups of cells using
494 | #' DEsingle
495 | #'
496 | #' @references Zhun Miao, Ke Deng, Xiaowo Wang, Xuegong Zhang (2018). DEsingle
497 | #' for detecting three types of differential expression in single-cell RNA-seq
498 | #' data. Bioinformatics, bty332. 10.1093/bioinformatics/bty332.
499 | #'
500 | #' @param sub_data Count data removed cell_type and selected certain two
501 | #' compare_group
502 | #' @param min_gene_expressed Genes expressed in minimum number of cells
503 | #' @param min_valid_cells Minimum number of genes detected in the cell
504 | #' @param contrast String vector specifying the contrast to be
505 | #' tested against the log2-fold-change threshold
506 | #' @param parallel If FALSE (default), no parallel computation is used;
507 | #' if TRUE, parallel computation using \code{BiocParallel}, with argument
508 | #' \code{BPPARAM}.
509 | #' @param BPPARAM An optional parameter object passed internally to
510 | #' \code{\link{bplapply}} when \code{parallel=TRUE}. If not specified,
511 | #' \code{\link{bpparam}()} (default) will be used.
512 | #' @import DEsingle
513 | #' @return A matrix of differentially expressed genes and related statistics.
514 | #'
515 | #' @details
516 | #' This test does not support pre-processed genes. To use this method, please
517 | #' install DEsingle, using the instructions at
518 | #' https://github.com/miaozhun/DEsingle
519 | #'
520 | #' @importFrom utils installed.packages
521 | #'
522 | #' @export
523 | DESingleTest<-function(sub_data,min_gene_expressed,min_valid_cells,contrast=unique(sub_data$compare_group),
524 |                        parallel=FALSE,BPPARAM=bpparam()){
525 |   counts<-t(subset(sub_data,select=-compare_group))
526 |   counts<-apply(counts,2,function(x) {storage.mode(x) <- 'numeric'; x})
527 |   expressed_genes <- rownames(subset(counts,rowSums(counts) >= min_gene_expressed))
528 |   valid_cells<-(colSums(counts)>=min_valid_cells)
529 |   counts<-counts[expressed_genes,valid_cells]
530 |   groups<-as.factor(sub_data[valid_cells,'compare_group'])
531 |   results <- DEsingle(counts = counts, group = groups,parallel=parallel,BPPARAM=BPPARAM)
532 |   res<-results[,c('foldChange','pvalue','pvalue.adj.FDR')]
533 |   colnames(res)<-c('foldChange','p.value','q.value')
534 |   res<-res %>% mutate(logFC=log(foldChange,2)) %>% select(-foldChange)
535 |   if(all(levels(groups)!=contrast)){
536 |     res$logFC<--res$logFC
537 |   }
538 |   gene<-rownames(counts)
539 |   res<-data.frame(gene,res,stringsAsFactors = FALSE)
540 |   return(res)
541 | }
542 | 
543 | #' Differential expression using MAST
544 | #'
545 | #' Identifies differentially expressed genes between two groups of cells using
546 | #' MAST
547 | #'
548 | #' @references  MAST: a flexible statistical framework for assessing transcriptional
549 | #' changes and characterizing heterogeneity in single-cell RNA sequencing
550 | #' data G Finak, A McDavid, M Yajima, J Deng, V Gersuk, AK Shalek, CK Slichter
551 | #' et al Genome biology 16 (1), 278
552 | #'
553 | #' @param sub_data Count data removed cell_type and selected certain two
554 | #' compare_group
555 | #' @param min_gene_expressed Genes expressed in minimum number of cells
556 | #' @param min_valid_cells Minimum number of genes detected in the cell
557 | #' @param contrast String vector specifying the contrast to be
558 | #' tested against the log2-fold-change threshold
559 | #' @param method Character vector, either ’glm’, ’glmer’ or ’bayesglm’
560 | #' @param Silence Common problems with fitting some genes
561 | #' @param check_logged Set FALSE to override sanity checks that try to
562 | #' ensure that the default assay is log-transformed and has at least one
563 | #' exact zero
564 | #' @import MAST
565 | #' @return A matrix of differentially expressed genes and related statistics.
566 | #'
567 | #' @details
568 | #' To use this method, please install MAST, using the instructions at
569 | #' https://github.com/RGLab/MAST
570 | #'
571 | #' @importFrom utils installed.packages
572 | #'
573 | #' @export
574 | MASTTest<-function(sub_data,min_gene_expressed,min_valid_cells,contrast=unique(sub_data$compare_group),
575 |                    method='glm',silent=FALSE,check_logged=TRUE){
576 |   counts<-t(subset(sub_data,select=-compare_group))
577 |   counts<-apply(counts,2,function(x) {storage.mode(x) <- 'numeric'; x})
578 |   expressed_genes <- rownames(subset(counts,rowSums(counts) >= min_gene_expressed))
579 |   valid_cells<-(colSums(counts)>=min_valid_cells)
580 |   groups<-as.factor(sub_data[valid_cells,'compare_group'])
581 |   counts<-counts[expressed_genes,valid_cells]
582 |   counts<-log(counts+1,2)
583 |   cdat<-as.data.frame(data.frame(rownames(sub_data[valid_cells,]),sub_data[valid_cells,'compare_group']),stringsAsFactors = FALSE)
584 |   rownames(cdat)<-rownames(sub_data)
585 |   colnames(cdat)<-c('wellKey','compare_group')
586 |   fdat<-as.data.frame(rownames(counts),stringsAsFactors = FALSE)
587 |   colnames(fdat)<-'primerid'
588 |   sca <- FromMatrix(counts, cdat, fdat,check_logged=check_logged)
589 |   cond<-as.factor(cdat$compare_group)
590 |   zlmCond <- zlm(~compare_group, sca, method=method, silent=silent)
591 |   summaryCond <- summary(zlmCond, logFC=TRUE, doLRT=TRUE)
592 |   summaryDt <- summaryCond$datatable
593 |   #fcHurdle <- merge(summaryDt[component=='H',.(primerid, `Pr(>Chisq)`)], #hurdle P values
594 |   #                  summaryDt[component=='logFC', .(primerid, coef)], by='primerid') #logFC coefficients
595 |   p.val<-data.frame(summaryDt[summaryDt$component=='H',4],summaryDt[summaryDt$component=='H',1])
596 |   colnames(p.val)<-c('p.value','primerid')
597 |   log.FC<-data.frame(summaryDt[summaryDt$component=='logFC',7],summaryDt[summaryDt$component=='logFC',1])
598 |   colnames(log.FC)<-c('logFC','primerid')
599 |   res<-log.FC %>% inner_join(p.val,by=c('primerid'='primerid')) 
600 |   res<-res %>% dplyr::mutate(q.value=p.adjust(res$p.value, 'fdr')) %>% tibble::column_to_rownames('primerid') 
601 |   #res<-data.frame(fcHurdle[,c('coef','Pr(>Chisq)','fdr')],stringsAsFactors = FALSE)
602 |   #rownames(res)<-fcHurdle$primerid
603 |   if(all(levels(groups)!=contrast)){
604 |     res$logFC<- -res$logFC
605 |   }
606 |   gene<-rownames(counts)
607 |   res<-data.frame(gene,res,stringsAsFactors = FALSE)
608 |   return(res)
609 | }
610 | 


--------------------------------------------------------------------------------
/R/FindLR.R:
--------------------------------------------------------------------------------
 1 | #' Finding ligand-receptor pairs
 2 | #'
 3 | #' This function loads the highly expressed genes or differentail expressed
 4 | #' genes as a dataframe. Significant interactions are found through mapping
 5 | #' these genes to our ligand-receptor database.
 6 | #'
 7 | #' @param data_1 Data used to find the ligand-receptor pairs
 8 | #' @param data_2 Second dataset used to find ligand-receptor pairs. If set NULL,
 9 | #' paris will be found within data_1. Otherwise, pairs will be found between
10 | #' data_1 and data_2. Default is NULL.
11 | #' @param datatype Type of data used as input. Options are "mean count"
12 | #' and "DEG"
13 | #' @param comm_type Communication type. Available options are "cytokine",
14 | #' "checkpoint", "growth factor", "other"
15 | #' @param database Database used to find ligand-receptor pairs. If set NULL,
16 | #' the build-in database will be used.
17 | #' @import dplyr
18 | #' @references Cytokines, Inflammation and Pain. Zhang et al,2007.
19 | #' @references Cytokines, Chemokines and Their Receptors. Cameron et al, 2000-2013
20 | #' @references Robust prediction of response to immune checkpoint blockade therapy
21 | #' in metastatic melanoma. Auslander et al, 2018.
22 | #' @references A draft network of ligand-receptor-mediated multicellular signalling
23 | #' in human, Jordan A. Ramilowski, Nature Communications, 2015
24 | #' @return A dataframe of the significant interactions
25 | #' @export
26 | FindLR<-function(data_1,data_2=NULL,datatype,comm_type,database=NULL){
27 |   if(is.null(database)){
28 |     database<-iTALK:::database
29 |   }
30 |   database<-database[database$Classification==comm_type,]
31 |   if(datatype=='mean count'){
32 |     gene_list_1<-data_1
33 |     if(is.null(data_2)){
34 |       gene_list_2<-gene_list_1
35 |     }else{
36 |       gene_list_2<-data_2
37 |     }
38 |     ligand_ind<-which(database$Ligand.ApprovedSymbol %in% gene_list_1$gene)
39 |     receptor_ind<-which(database$Receptor.ApprovedSymbol %in% gene_list_2$gene)
40 |     ind<-intersect(ligand_ind,receptor_ind)
41 |     FilterTable_1<-database[ind,c('Ligand.ApprovedSymbol','Receptor.ApprovedSymbol')] %>%
42 |       left_join(gene_list_1[,c('gene','exprs','cell_type')],by=c('Ligand.ApprovedSymbol'='gene')) %>%
43 |       dplyr::rename(cell_from_mean_exprs=exprs,cell_from=cell_type) %>%
44 |       left_join(gene_list_2[,c('gene','exprs','cell_type')],by=c('Receptor.ApprovedSymbol'='gene')) %>%
45 |       dplyr::rename(cell_to_mean_exprs=exprs,cell_to=cell_type)
46 |     ligand_ind<-which(database$Ligand.ApprovedSymbol %in% gene_list_2$gene)
47 |     receptor_ind<-which(database$Receptor.ApprovedSymbol %in% gene_list_1$gene)
48 |     ind<-intersect(ligand_ind,receptor_ind)
49 |     FilterTable_2<-database[ind,c('Ligand.ApprovedSymbol','Receptor.ApprovedSymbol')] %>%
50 |       left_join(gene_list_2[,c('gene','exprs','cell_type')],by=c('Ligand.ApprovedSymbol'='gene')) %>%
51 |       dplyr::rename(cell_from_mean_exprs=exprs,cell_from=cell_type) %>%
52 |       left_join(gene_list_1[,c('gene','exprs','cell_type')],by=c('Receptor.ApprovedSymbol'='gene')) %>%
53 |       dplyr::rename(cell_to_mean_exprs=exprs,cell_to=cell_type)
54 |     FilterTable<-rbind(FilterTable_1,FilterTable_2)
55 |   }else if(datatype=='DEG'){
56 |     gene_list_1<-data_1
57 |     if(is.null(data_2)){
58 |       gene_list_2<-gene_list_1
59 |     }else{
60 |       gene_list_2<-data_2
61 |     }
62 |     ligand_ind<-which(database$Ligand.ApprovedSymbol %in% gene_list_1$gene)
63 |     receptor_ind<-which(database$Receptor.ApprovedSymbol %in% gene_list_2$gene)
64 |     ind<-intersect(ligand_ind,receptor_ind)
65 |     FilterTable_1<-database[ind,c('Ligand.ApprovedSymbol','Receptor.ApprovedSymbol')] %>%
66 |       left_join(gene_list_1[,c('gene','logFC','q.value','cell_type')],by=c('Ligand.ApprovedSymbol'='gene')) %>%
67 |       dplyr::rename(cell_from_logFC=logFC,cell_from_q.value=q.value,cell_from=cell_type) %>%
68 |       left_join(gene_list_2[,c('gene','logFC','q.value','cell_type')],by=c('Receptor.ApprovedSymbol'='gene')) %>%
69 |       dplyr::rename(cell_to_logFC=logFC,cell_to_q.value=q.value,cell_to=cell_type)
70 |     ligand_ind<-which(database$Ligand.ApprovedSymbol %in% gene_list_2$gene)
71 |     receptor_ind<-which(database$Receptor.ApprovedSymbol %in% gene_list_1$gene)
72 |     ind<-intersect(ligand_ind,receptor_ind)
73 |     FilterTable_2<-database[ind,c('Ligand.ApprovedSymbol','Receptor.ApprovedSymbol')] %>%
74 |       left_join(gene_list_2[,c('gene','logFC','q.value','cell_type')],by=c('Ligand.ApprovedSymbol'='gene')) %>%
75 |       dplyr::rename(cell_from_logFC=logFC,cell_from_q.value=q.value,cell_from=cell_type) %>%
76 |       left_join(gene_list_1[,c('gene','logFC','q.value','cell_type')],by=c('Receptor.ApprovedSymbol'='gene')) %>%
77 |       dplyr::rename(cell_to_logFC=logFC,cell_to_q.value=q.value,cell_to=cell_type)
78 |     FilterTable<-rbind(FilterTable_1,FilterTable_2)
79 |   }else{
80 |     stop('Error: invalid data type')
81 |   }
82 | 
83 |   FilterTable<-FilterTable[!duplicated(FilterTable),]
84 |   res<-as.data.frame(FilterTable) %>% dplyr::rename(ligand=Ligand.ApprovedSymbol,receptor=Receptor.ApprovedSymbol)
85 |   if(datatype=='DEG'){
86 |     res<-res[!(res$cell_from_logFC==0.0001 & res$cell_to_logFC==0.0001),]
87 |   }
88 |   res<-res %>% mutate(comm_type=comm_type)
89 |   return(res)
90 | }
91 | 


--------------------------------------------------------------------------------
/R/LRPlot.R:
--------------------------------------------------------------------------------
  1 | #' Plotting ligand-receptor pairs
  2 | #'
  3 | #' This function loads the significant interactions as a dataframe. A circle
  4 | #' plot will be generated using package circlize. The width of the arrow
  5 | #' represents the expression level/log fold change of the ligand; while the
  6 | #' width of arrow head represents the expression level/log fold change of the
  7 | #' receptor. Different color and the type of the arrow stands for whether
  8 | #' the ligand and/or receptor are upregulated or downregulated. Users can select
  9 | #' the colors represent the cell type by their own or chosen randomly by default.
 10 | #'
 11 | #' @references Gu, Z. (2014) circlize implements and enhances circular
 12 | #' visualization in R. Bioinformatics.
 13 | #' @param data A dataframe contains significant ligand-receptor pairs and related
 14 | #' information such as expression level/log fold change and cell type
 15 | #' @param datatype Type of data. Options are "mean count" and "DEG"
 16 | #' @param gene_col Colors used to represent different categories of genes.
 17 | #' @param transparency Transparency of link colors, 0 means no transparency and
 18 | #' 1 means full transparency. If transparency is already set in col or row.col
 19 | #' or column.col, this argument will be ignored. NAalso ignores this argument.
 20 | #' @param link.arr.lwd line width of the single line link which is put in the
 21 | #' center of the belt.
 22 | #' @param link.arr.lty line type of the single line link which is put in the
 23 | #' center of the belt.
 24 | #' @param link.arr.col color or the single line link which is put in the center
 25 | #' of the belt.
 26 | #' @param link.arr.width size of the single arrow head link which is put in the
 27 | #' center of the belt.
 28 | #' @param link.arr.type Type of the arrows, pass to Arrowhead. Default value is
 29 | #' triangle. There is an additional option big.arrow
 30 | #' @param facing Facing of text.
 31 | #' @param cell_col Colors used to represent types of cells. If set NULL, it
 32 | #' will be generated randomly
 33 | #' @param print.cell Whether or not print the type of cells on the outer layer
 34 | #' of the graph.
 35 | #' @param track.height_1 height of the cell notation track
 36 | #' @param track.height_2 height of the gene notation track
 37 | #' @param annotation.height_1 Track height corresponding to values in annotationTrack.
 38 | #' @param annotation.height_2 Track height corresponding to values in annotationTrack.
 39 | #' @param text.vjust adjustment on ’vertical’ (radical) direction. Besides to set it
 40 | #' as numeric values, the value can also be a string contain absoute unit, e.g.
 41 | #' "2.1mm", "-1 inche", but only "mm", "cm", "inches"/"inche" are allowed.
 42 | #' @import randomcoloR
 43 | #' @import graphics
 44 | #' @import circlize
 45 | #'
 46 | #' @return A figure of the significant interactions
 47 | #' @export
 48 | LRPlot<-function(data,datatype,gene_col=NULL,transparency=0.5,link.arr.lwd=1,link.arr.lty=NULL,link.arr.col=NULL,link.arr.width=NULL,
 49 |                  link.arr.type=NULL,facing='clockwise',cell_col=NULL,print.cell=TRUE,track.height_1=uh(2,'mm'),track.height_2=uh(12,'mm'),
 50 |                  annotation.height_1=0.01,annotation.height_2=0.01,text.vjust = '0.4cm',...){
 51 |   cell_group<-unique(c(data$cell_from,data$cell_to))
 52 |   genes<-c(structure(data$ligand,names=data$cell_from),structure(data$receptor,names=data$cell_to))
 53 |   genes<-genes[!duplicated(paste(names(genes),genes))]
 54 |   genes<-genes[order(names(genes))]
 55 |   if(is.null(link.arr.lty)){
 56 |     if(datatype=='mean count'){
 57 |       link.arr.lty='solid'
 58 |     }else if(datatype=='DEG'){
 59 |       link.arr.lty=structure(ifelse(data$cell_from_logFC==0.0001,'dashed','solid'),names=paste(data$cell_from,data$receptor))
 60 |     }else{
 61 |       print('invalid datatype')
 62 |     }
 63 |   }
 64 |   if(is.null(link.arr.col)){
 65 |     if(datatype=='mean count'){
 66 |       data<-data %>% mutate(link_col='black')
 67 |     }else if(datatype=='DEG'){
 68 |       data<-data %>% mutate(link_col=ifelse(cell_from_logFC==0.0001,ifelse(cell_to_logFC>0,'#d73027','#00ccff'),
 69 |                                             ifelse(cell_to_logFC==0.0001,ifelse(cell_from_logFC>0,'#d73027','#00ccff'),
 70 |                                                    ifelse(cell_from_logFC>0,ifelse(cell_to_logFC>0,'#d73027','#dfc27d'),
 71 |                                                           ifelse(cell_to_logFC>0,'#9933ff','#00ccff')))))
 72 |     }else{
 73 |       print('invalid datatype')
 74 |     }
 75 |   }else{
 76 |     data$link_col=link.arr.col
 77 |   }
 78 |   if(is.null(link.arr.type)){
 79 |     if(datatype=='mean count'){
 80 |       link.arr.type='triangle'
 81 |     }else if(datatype=='DEG'){
 82 |       link.arr.type=structure(ifelse(data$cell_to_logFC==0.0001,'ellipse','triangle'),names=paste(data$cell_from,data$receptor))
 83 |     }else{
 84 |       print('invalid datatype')
 85 |     }
 86 |   }
 87 |   if(is.null(gene_col)){
 88 |     comm_col<-structure(c('#99ff99','#99ccff','#ff9999','#ffcc99'),names=c('other','cytokine','checkpoint','growth factor'))
 89 |     gene_col<-structure(c(comm_col[data$comm_type],rep('#073c53',length(data$receptor))),names=c(data$ligand,data$receptor))
 90 |   }
 91 |   if(is.null(cell_col)){
 92 |     cell_col<-structure(randomColor(count=length(unique(names(genes))),luminosity='dark'),names=unique(names(genes)))
 93 |   }
 94 |   if(is.null(link.arr.lwd)){
 95 |     data<-data %>% mutate(arr_width=1)
 96 |   }else if(max(abs(link.arr.lwd))-min(abs(link.arr.lwd))==0 && all(link.arr.lwd!=0.0001)){
 97 |     data<-data %>% mutate(arr_width=ifelse(abs(link.arr.lwd<5),abs(link.arr.lwd),5))
 98 |   }else{
 99 |     data<-data %>% mutate(arr_width=ifelse(link.arr.lwd==0.0001,2,1+5/(max(abs(link.arr.lwd))-min(abs(link.arr.lwd)))*(abs(link.arr.lwd)-min(abs(link.arr.lwd)))))
100 |   }
101 |   if(length(cell_group)!=1){
102 |     gap.degree <- do.call("c", lapply(table(names(genes)), function(i) c(rep(1, i-1), 8)))
103 |   }else{
104 |     gap.degree <- do.call("c", lapply(table(names(genes)), function(i) c(rep(1, i))))
105 |   }
106 |   circos.par(gap.degree = gap.degree)
107 |   if(length(gene_col)==1){
108 |     grid.col=gene_col
109 |   }else{
110 |     grid.col=gene_col[genes]
111 |     names(grid.col)<-paste(names(genes),genes)
112 |   }
113 |   if(is.null(link.arr.width)){
114 |     data<-data %>% mutate(link.arr.width=data$arr_width/10)
115 |   }else if(max(abs(link.arr.width))-min(abs(link.arr.width))==0 && all(link.arr.width!=0.0001)){
116 |     data<-data %>% mutate(link.arr.width=ifelse(abs(link.arr.width)<0.5,abs(link.arr.width),0.5))
117 |   }else{
118 |     data<-data %>% mutate(link.arr.width=ifelse(link.arr.width==0.0001,0.2,(1+5/(max(abs(link.arr.width))-min(abs(link.arr.width)))*(abs(link.arr.width)-min(abs(link.arr.width))))/10))
119 |   }
120 |   chordDiagram(as.data.frame(cbind(paste(data$cell_from,data$ligand),paste(data$cell_to,data$receptor))), order=paste(names(genes),genes),
121 |                grid.col=grid.col,transparency=transparency,directional=1,direction.type='arrows',link.arr.lwd=data$arr_width,link.arr.lty=link.arr.lty,
122 |                link.arr.type=link.arr.type,link.arr.width=data$link.arr.width,link.arr.col=data$link_col,col='#00000000',annotationTrack=c('grid'),preAllocateTracks = list(
123 |                  list(track.height = track.height_1),list(track.height = track.height_2)),annotationTrackHeight = c(annotation.height_1,annotation.height_2),...)
124 | 
125 |   circos.trackPlotRegion(track.index = 2, panel.fun = function(x, y) {
126 |     xlim = get.cell.meta.data("xlim")
127 |     ylim = get.cell.meta.data("ylim")
128 |     sector.index = genes[get.cell.meta.data("sector.numeric.index")]
129 |     circos.text(mean(xlim),mean(ylim),sector.index, col = "black", cex = 0.7, facing = facing, niceFacing = TRUE)
130 |   }, bg.border = 0)
131 | 
132 |   if(print.cell){
133 |     for(c in unique(names(genes))) {
134 |       gene = as.character(genes[names(genes) == c])
135 |       highlight.sector(sector.index = paste(c,gene), track.index = 1, col = ifelse(length(cell_col)==1,cell_col,cell_col[c]), text = c, text.vjust = text.vjust, niceFacing = TRUE,lwd=1)
136 |     }
137 |   }
138 |   circos.clear()
139 | }
140 | 


--------------------------------------------------------------------------------
/R/NetView.R:
--------------------------------------------------------------------------------
 1 | #' Network Viewing of cell-cell communication
 2 | #'
 3 | #' This function loads the significant interactions as a dataframe, and colors
 4 | #' represent different types of cells as a structure. The width of edges represent
 5 | #' the strength of the communication. Labels on the edges show exactly how many
 6 | #' interactions exist between two types of cells.
 7 | #'
 8 | #' @references Csardi G, Nepusz T: The igraph software package for complex network
 9 | #' research, InterJournal, Complex Systems 1695. 2006.
10 | #' http://igraph.org
11 | #' @param data A dataframe containing ligand-receptor pairs and corresponding
12 | #' cell typesused to do the plotting
13 | #' @param col Colors used to represent different cell types
14 | #' @param label Whether or not shows the label of edges (number of connections
15 | #' between different cell types)
16 | #' @param edge.curved Specifies whether to draw curved edges, or not.
17 | #' This can be a logical or a numeric vector or scalar.
18 | #' First the vector is replicated to have the same length as the number of
19 | #' edges in the graph. Then it is interpreted for each edge separately.
20 | #' A numeric value specifies the curvature of the edge; zero curvature means
21 | #' straight edges, negative values means the edge bends clockwise, positive
22 | #' values the opposite. TRUE means curvature 0.5, FALSE means curvature zero
23 | #' @param shape The shape of the vertex, currently “circle”, “square”,
24 | #' “csquare”, “rectangle”, “crectangle”, “vrectangle”, “pie” (see
25 | #' vertex.shape.pie), ‘sphere’, and “none” are supported, and only by the
26 | #' plot.igraph command. “none” does not draw the vertices at all, although
27 | #' vertex label are plotted (if given). See shapes for details about vertex
28 | #' shapes and vertex.shape.pie for using pie charts as vertices.
29 | #' @param layout The layout specification. It must be a call to a layout
30 | #' specification function.
31 | #' @param vertex.size The size of vertex
32 | #' @param margin The amount of empty space below, over, at the left and right
33 | #'  of the plot, it is a numeric vector of length four. Usually values between
34 | #'  0 and 0.5 are meaningful, but negative values are also possible, that will
35 | #'  make the plot zoom in to a part of the graph. If it is shorter than four
36 | #'  then it is recycled.
37 | #' @param vertex.label.cex The label size of vertex
38 | #' @param vertex.label.color The color of label for vertex
39 | #' @param arrow.width The width of arrows
40 | #' @param edge.label.color The color for single arrow
41 | #' @param edge.label.cex The size of label for arrows
42 | #' @param edge.max.width The maximum arrow size
43 | #' @import network
44 | #' @import igraph
45 | #' @return A network graph of the significant interactions
46 | #' @export
47 | NetView<-function(data,col,label=TRUE,edge.curved=0.5,shape='circle',layout=nicely(),vertex.size=20,margin=0.2,
48 |                   vertex.label.cex=1.5,vertex.label.color='black',arrow.width=1.5,edge.label.color='black',edge.label.cex=1,edge.max.width=10){
49 |   net<-data %>% group_by(cell_from,cell_to) %>% dplyr::summarize(n=n())
50 |   net<-as.data.frame(net,stringsAsFactors=FALSE)
51 |   g<-graph.data.frame(net,directed=TRUE)
52 |   edge.start <- ends(g, es=E(g), names=FALSE)
53 |   coords<-layout_(g,layout)
54 |   if(nrow(coords)!=1){
55 |     coords_scale=scale(coords)
56 |   }else{
57 |     coords_scale<-coords
58 |   }
59 |   loop.angle<-ifelse(coords_scale[V(g),1]>0,-atan(coords_scale[V(g),2]/coords_scale[V(g),1]),pi-atan(coords_scale[V(g),2]/coords_scale[V(g),1]))
60 |   V(g)$size<-vertex.size
61 |   V(g)$color<-col[V(g)]
62 |   V(g)$label.color<-vertex.label.color
63 |   V(g)$label.cex<-vertex.label.cex
64 |   if(label){
65 |     E(g)$label<-E(g)$n
66 |   }
67 |   if(max(E(g)$n)==min(E(g)$n)){
68 |     E(g)$width<-2
69 |   }else{
70 |     E(g)$width<-1+edge.max.width/(max(E(g)$n)-min(E(g)$n))*(E(g)$n-min(E(g)$n))
71 |   }
72 |   E(g)$arrow.width<-arrow.width
73 |   E(g)$label.color<-edge.label.color
74 |   E(g)$label.cex<-edge.label.cex
75 |   E(g)$color<-V(g)$color[edge.start[,1]]
76 |   if(sum(edge.start[,2]==edge.start[,1])!=0){
77 |     E(g)$loop.angle[which(edge.start[,2]==edge.start[,1])]<-loop.angle[edge.start[which(edge.start[,2]==edge.start[,1]),1]]
78 |   }
79 |   plot(g,edge.curved=edge.curved,vertex.shape=shape,layout=coords_scale,margin=margin)
80 |   return(g)
81 | }
82 | 


--------------------------------------------------------------------------------
/R/TimePlot.R:
--------------------------------------------------------------------------------
 1 | #' Plotting ligand-receptor pairs
 2 | #'
 3 | #' This function loads count data as dataframe, ligand, receptor and two interactive
 4 | #' cells' names as strings. The plot shows the expression level of ligand and
 5 | #' receptor at different time, thus illustrates a dynamic change of a ligand-receptor
 6 | #' pairs.
 7 | #'
 8 | #' @param data A dataframe contains significant ligand-receptor pairs and related
 9 | #' information such as expression level/log fold change and cell type
10 | #' @param ligand String as selected ligand
11 | #' @param receptor String as selected receptor
12 | #' @param cell_from The cell type ligand gene belongs to
13 | #' @param cell_to The cell type receptor gene belongs to
14 | #' @param Time Different time points showing on the plot
15 | #' @import tidyr
16 | #' @import ggplot2
17 | #' @return A figure of the paired interactions
18 | #' @export
19 | TimePlot<-function(data,ligand,receptor,cell_from,cell_to,Time=NULL){
20 |   if(is.null(Time)){
21 |     Time=unique(data$time)
22 |   }
23 |   data<-data %>% filter(time %in% Time) %>% select(ligand,receptor,time,cell_type)
24 |   data_long <- gather(data, gene, value, c(ligand,receptor), factor_key=TRUE) %>%
25 |     filter((cell_type==cell_from & gene==ligand) | (cell_type==cell_to & gene==receptor))
26 |   data_long$time<-as.factor(data_long$time)
27 |   g<-ggplot(data_long,aes(x=time,y=value,color=gene))+
28 |     geom_point(position=position_dodge(0.75)) +
29 |     stat_summary(fun.y=mean, aes(ymin=..y.., ymax=..y..), geom='errorbar', width=0.5,position=position_dodge(0.75)) +
30 |     stat_summary(fun.ymin=function(x)(mean(x)-sd(x)), fun.ymax=function(x)(mean(x)+sd(x)),geom="errorbar", width=0.1,position=position_dodge(0.75)) +
31 |     theme_minimal()+theme(axis.line=element_line(),plot.title=element_text(size=14,face='bold',hjust=0.5))+ylab('gene expression')+xlab('time')+ggtitle(paste0(ligand,'-',receptor))
32 |   g
33 |   return(g)
34 | }
35 | 


--------------------------------------------------------------------------------
/R/rawParse.R:
--------------------------------------------------------------------------------
 1 | #' Parsing the data to get top expressed genes
 2 | #'
 3 | #' This function loads the count data as a dataframe. It assumes that each line
 4 | #' contains gene expression profile of one single cell, and each column
 5 | #' contains the one single gene expression profile in different cells. The dataframe
 6 | #' should also contain the cell type information with column name 'cell_type'.
 7 | #' Group information should also be included as 'compare_group' if users want
 8 | #' to call differntial expressed ligand-receptor pairs. Batch information as
 9 | #' 'batch' is optional. If included, users may want to use the raw count data
10 | #' for later analysis.
11 | #'
12 | #' @param data Input data, raw or normalized count with 'cell_type' column
13 | #' @param top_genes (scale 1 to 100) Top percent highly expressed genes used
14 | #' to find ligand-receptor pairs, default is 50
15 | #' @param stats Whether calculates the mean or the median of the data. Available
16 | #' options are 'mean' and 'median'.
17 | #' @importFrom progress progress_bar
18 | #' @return A dataframe of the data
19 | #' @export
20 | rawParse<-function(data,top_genes=50,stats='mean'){
21 |   res=NULL
22 |   cell_group<-unique(data$cell_type)
23 |   pb <- progress::progress_bar$new(total = length(cell_group))
24 |   pb$tick(0)
25 |   for(i in cell_group){
26 |     sub_data<-data[data$cell_type==i,]
27 |     counts<-t(subset(sub_data,select=-cell_type))
28 |     counts<-apply(counts,2,function(x) {storage.mode(x) <- 'numeric'; x})
29 |     if(stats=='mean'){
30 |       temp<-data.frame(rowMeans(counts),i,stringsAsFactors = FALSE)
31 |     }else if(stats=='median'){
32 |       temp<-data.frame(apply(counts, 1, FUN = median),i,stringsAsFactors = FALSE)
33 |     }else{
34 |       print('error stats option')
35 |     }
36 |     temp<-temp[order(temp[,1],decreasing=TRUE),]
37 |     temp<-temp[1:ceiling(nrow(temp)*top_genes/100),]
38 |     temp<-temp %>% tibble::rownames_to_column()
39 |     res<-rbind(res,temp)
40 |     pb$tick()
41 |   }
42 |   colnames(res)<-c('gene','exprs','cell_type')
43 |   return(res)
44 | }
45 | 


--------------------------------------------------------------------------------
/R/sysdata.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Coolgenome/iTALK/6d9b3907f00004fcac9514d2726ad68d524a952b/R/sysdata.rda


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # iTALK
 2 | `iTALK` is an R toolkit for characterizing and illustrating intercellular communication, developed and maintained by [Linghua Wang lab](https://www.mdanderson.org/research/departments-labs-institutes/labs/linghua-wang-laboratory.html) at the University of Texas MD Anderson Cancer Center. `iTALK` can be used to visualize the complexity, diversity and dynamics of cell-cell communication in a wide range of biological processes. For more information, please refer to [our manuscript](https://www.biorxiv.org/content/early/2019/01/04/507871).
 3 | 
 4 | # Installation
 5 | To install the developmental version from GitHub:
 6 | 
 7 | ```R
 8 | if(!require(devtools)) install.packages("devtools");
 9 | devtools::install_github("Coolgenome/iTALK", build_vignettes = TRUE)
10 | ```
11 | To load the installed `iTALK` in R:
12 | ```R
13 | library(iTALK)
14 | ```
15 | # Citation
16 | This package is intended for research use only. For any bugs, enhancement requests and other issues, please use the [`iTALK` GitHub issues tracker](https://github.com/Coolgenome/iTALk/issues) or email [Yuanxin Wang](mailto:ywang65@mdanderson.org). If you find iTALK useful and use iTALK in your publication, please cite the paper: [iTALK: an R Package to Characterize and Illustrate Intercellular Communication](https://www.biorxiv.org/content/early/2019/01/04/507871)
17 | 


--------------------------------------------------------------------------------
/data/LR_database.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Coolgenome/iTALK/6d9b3907f00004fcac9514d2726ad68d524a952b/data/LR_database.rda


--------------------------------------------------------------------------------
/example/example_code.r:
--------------------------------------------------------------------------------
 1 | 
 2 | # This example data is from 10x pbmc dataset. Samples are randomly selected from each cell type. And groups are randomly assigned to each sample to make the illustration.
 3 | 
 4 | library(iTALK)
 5 | 
 6 | # read the data
 7 | data<-read.table('example_data.txt',sep='\t',header=T,stringsAsFactors = F)
 8 | 
 9 | ## highly expressed ligand-receptor pairs
10 | 
11 | # find top 50 percent highly expressed genes
12 | highly_exprs_genes<-rawParse(data,top_genes=50,stats='mean')
13 | # find the ligand-receptor pairs from highly expressed genes
14 | comm_list<-c('growth factor','other','cytokine','checkpoint')
15 | cell_col<-structure(c('#4a84ad','#4a1dc6','#e874bf','#b79eed', '#ff636b', '#52c63b','#9ef49a'),names=unique(data$cell_type))
16 | par(mfrow=c(1,2))
17 | res<-NULL
18 | for(comm_type in comm_list){
19 |     res_cat<-FindLR(highly_exprs_genes,datatype='mean count',comm_type=comm_type)
20 |     res_cat<-res_cat[order(res_cat$cell_from_mean_exprs*res_cat$cell_to_mean_exprs,decreasing=T),]
21 |     #plot by ligand category
22 |     #overall network plot
23 |     NetView(res_cat,col=cell_col,vertex.label.cex=1,arrow.width=1,edge.max.width=5)
24 |     #top 20 ligand-receptor pairs
25 |     LRPlot(res_cat[1:20,],datatype='mean count',cell_col=cell_col,link.arr.lwd=res_cat$cell_from_mean_exprs[1:20],link.arr.width=res_cat$cell_to_mean_exprs[1:20])
26 |     title(comm_type)
27 |     res<-rbind(res,res_cat)
28 | }
29 | res<-res[order(res$cell_from_mean_exprs*res$cell_to_mean_exprs,decreasing=T),][1:20,]
30 | NetView(res,col=cell_col,vertex.label.cex=1,arrow.width=1,edge.max.width=5)
31 | LRPlot(res[1:20,],datatype='mean count',cell_col=cell_col,link.arr.lwd=res$cell_from_mean_exprs[1:20],link.arr.width=res$cell_to_mean_exprs[1:20])
32 | 
33 | ## significant ligand-receptor pairs between compare groups
34 | 
35 | # randomly assign the compare group to each sample
36 | data<-data %>% mutate(compare_group=sample(2,nrow(data),replace=TRUE))
37 | # find DEGenes of regulatory T cells and NK cells between these 2 groups
38 | deg_t<-DEG(data %>% filter(cell_type=='regulatory_t'),method='Wilcox',contrast=c(2,1))
39 | deg_nk<-DEG(data %>% filter(cell_type=='cd56_nk'),method='Wilcox',contrast=c(2,1))
40 | # find significant ligand-receptor pairs and do the plotting
41 | par(mfrow=c(1,2))
42 | res<-NULL
43 | for(comm_type in comm_list){
44 |     res_cat<-FindLR(deg_t,deg_nk,datatype='DEG',comm_type=comm_type)
45 |     res_cat<-res_cat[order(res_cat$cell_from_logFC*res_cat$cell_to_logFC,decreasing=T),]
46 |     #plot by ligand category
47 |     if(nrow(res_cat)==0){
48 |         next
49 |     }else if(nrow(res_cat>=20)){
50 |         LRPlot(res_cat[1:20,],datatype='DEG',cell_col=cell_col,link.arr.lwd=res_cat$cell_from_logFC[1:20],link.arr.width=res_cat$cell_to_logFC[1:20])
51 |     }else{
52 |         LRPlot(res_cat,datatype='DEG',cell_col=cell_col,link.arr.lwd=res_cat$cell_from_logFC,link.arr.width=res_cat$cell_to_logFC)
53 |     }
54 |     NetView(res_cat,col=cell_col,vertex.label.cex=1,arrow.width=1,edge.max.width=5)
55 |     title(comm_type)
56 |     res<-rbind(res,res_cat)
57 | }
58 | if(is.null(res)){
59 |     print('No significant pairs found')
60 | }else if(nrow(res)>=20){
61 |     res<-res[order(res$cell_from_logFC*res$cell_to_logFC,decreasing=T),][1:20,]
62 |     NetView(res,col=cell_col,vertex.label.cex=1,arrow.width=1,edge.max.width=5)
63 |     LRPlot(res[1:20,],datatype='DEG',cell_col=cell_col,link.arr.lwd=res$cell_from_logFC[1:20],link.arr.width=res$cell_to_logFC[1:20])
64 | }else{
65 |     NetView(res,col=cell_col,vertex.label.cex=1,arrow.width=1,edge.max.width=5)
66 |     LRPlot(res,datatype='DEG',cell_col=cell_col,link.arr.lwd=res$cell_from_logFC,link.arr.width=res$cell_to_logFC)
67 | }
68 | # I just randomly assigned the compare group to samples which has no biological difference for showing how to use the package.
69 | # So there should be no significant genes to be expected. 
70 | 


--------------------------------------------------------------------------------
/man/DEG.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DEG.R
 3 | \name{DEG}
 4 | \alias{DEG}
 5 | \title{Call DEGenes}
 6 | \usage{
 7 | DEG(data, method, min_gene_expressed = 0, min_valid_cells = 0,
 8 |   contrast = NULL, q_cut = 0.05, ...)
 9 | }
10 | \arguments{
11 | \item{data}{Input raw or normalized count data with column 'cell_type'
12 | and 'compare_group'}
13 | 
14 | \item{method}{Method used to call DEGenes. Available options are:
15 | \itemize{
16 |  \item{Wilcox}: Wilcoxon rank sum test
17 |  \item{DESeq2}: Negative binomial model based differential analysis
18 |  (Love et al, Genome Biology, 2014)
19 |  \item{SCDE}: Bayesian approach to single-cell differential
20 |  expression analysis (Kharchenko et al, Nature Method, 2014)
21 |  \item{monocle}: Census based differential analysis (Qiu et al,
22 |  Nature Methods, 2017)
23 |  \item{edgeR}: Negative binomial distributions, including empirical
24 |  Bayes estimation, exact tests, generalized linear models and
25 |  quasi-likelihood tests based differential analysis (McCarthy et al,
26 |  Nucleic Acids Research, 2012)
27 |  \item{DESingle}: Zero-Inflated Negative Binomial model to estimate
28 |  the proportion of real and dropout zeros and to define and detect
29 |  the 3 types of DE genes (Miao et al, Bioinformatics, 2018)
30 |  \item{MAST}: GLM-framework that treates cellular detection rate as a
31 |  covariate (Finak et al, Genome Biology, 2015)
32 |  }}
33 | 
34 | \item{min_gene_expressed}{Genes expressed in minimum number of cells}
35 | 
36 | \item{min_valid_cells}{Minimum number of genes detected in the cell}
37 | 
38 | \item{contrast}{String vector specifying the contrast to be
39 | tested against the log2-fold-change threshold}
40 | 
41 | \item{q_cut}{Cut-off for q value}
42 | }
43 | \value{
44 | A matrix of the differential expressed genes
45 | }
46 | \description{
47 | This function loads the data as a dataframe, and method as a string.
48 | It assumes that each line contains gene expression profile of one single
49 | cell, and each column contains the one single gene expression profile in
50 | different cells. The dataframe should also contain the cell type information
51 | with column name 'cell_type', as well as group information as 'compare_group'
52 | Batch information as 'batch' is optional. If included, users may want to use
53 | the raw count data for later analysis. Differential expressed genes will be
54 | called within each cell type by the method users select. For bulk RNAseq,
55 | we provide edgeR, DESeq2. And for scRNA-seq, popular methods in packages
56 | scde, monocle, DEsingle and MAST are available.
57 | }
58 | 


--------------------------------------------------------------------------------
/man/DESeq2Test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DEG.R
 3 | \name{DESeq2Test}
 4 | \alias{DESeq2Test}
 5 | \title{Differential expression using DESeq2}
 6 | \usage{
 7 | DESeq2Test(sub_data, min_gene_expressed, min_valid_cells,
 8 |   contrast = unique(sub_data$compare_group), test = "Wald",
 9 |   fitType = "parametric", sfType = "ratio", betaPrior = FALSE,
10 |   quiet = FALSE, modelMatrixType = "standard",
11 |   minReplicatesForReplace = 7, useT = FALSE, minmu = 0.5,
12 |   parallel = FALSE, BPPARAM = bpparam())
13 | }
14 | \arguments{
15 | \item{sub_data}{Count data removed cell_type and selected certain two
16 | compare_group}
17 | 
18 | \item{min_gene_expressed}{Genes expressed in minimum number of cells}
19 | 
20 | \item{min_valid_cells}{Minimum number of genes detected in the cell}
21 | 
22 | \item{contrast}{String vector specifying the contrast to be
23 | tested against the log2-fold-change threshold}
24 | 
25 | \item{test}{either "Wald" or "LRT", which will then use either
26 | Wald significance tests (defined by \code{\link{nbinomWaldTest}}),
27 | or the likelihood ratio test on the difference in deviance between a
28 | full and reduced model formula (defined by \code{\link{nbinomLRT}})}
29 | 
30 | \item{fitType}{either "parametric", "local", or "mean"
31 | for the type of fitting of dispersions to the mean intensity.
32 | See \code{\link{estimateDispersions}} for description.}
33 | 
34 | \item{sfType}{either "ratio", "poscounts", or "iterate"
35 | for teh type of size factor estimation. See
36 | \code{\link{estimateSizeFactors}} for description.}
37 | 
38 | \item{betaPrior}{whether or not to put a zero-mean normal prior on
39 | the non-intercept coefficients
40 | See \code{\link{nbinomWaldTest}} for description of the calculation
41 | of the beta prior. In versions \code{>=1.16}, the default is set
42 | to \code{FALSE}, and shrunken LFCs are obtained afterwards using
43 | \code{\link{lfcShrink}}.}
44 | 
45 | \item{quiet}{whether to print messages at each step}
46 | 
47 | \item{modelMatrixType}{either "standard" or "expanded", which describe
48 | how the model matrix, X of the GLM formula is formed.
49 | "standard" is as created by \code{model.matrix} using the
50 | design formula. "expanded" includes an indicator variable for each
51 | level of factors in addition to an intercept. for more information
52 | see the Description of \code{\link{nbinomWaldTest}}.
53 | betaPrior must be set to TRUE in order for expanded model matrices
54 | to be fit.}
55 | 
56 | \item{minReplicatesForReplace}{the minimum number of replicates required
57 | in order to use \code{\link{replaceOutliers}} on a
58 | sample. If there are samples with so many replicates, the model will
59 | be refit after these replacing outliers, flagged by Cook's distance.
60 | Set to \code{Inf} in order to never replace outliers.}
61 | 
62 | \item{useT}{logical, passed to \code{\link{nbinomWaldTest}}, default is FALSE,
63 | where Wald statistics are assumed to follow a standard Normal}
64 | 
65 | \item{minmu}{lower bound on the estimated count for fitting gene-wise dispersion
66 | and for use with \code{nbinomWaldTest} and \code{nbinomLRT}}
67 | 
68 | \item{parallel}{if FALSE, no parallelization. if TRUE, parallel
69 | execution using \code{BiocParallel}, see next argument \code{BPPARAM}.
70 | A note on running in parallel using \code{BiocParallel}: it may be
71 | advantageous to remove large, unneeded objects from your current
72 | R environment before calling \code{DESeq},
73 | as it is possible that R's internal garbage collection
74 | will copy these files while running on worker nodes.}
75 | 
76 | \item{BPPARAM}{an optional parameter object passed internally
77 | to \code{\link{bplapply}} when \code{parallel=TRUE}.
78 | If not specified, the parameters last registered with
79 | \code{\link{register}} will be used.}
80 | }
81 | \value{
82 | A matrix of differentially expressed genes and related statistics.
83 | }
84 | \description{
85 | Identifies differentially expressed genes between two groups of cells using
86 | DESeq2
87 | }
88 | \details{
89 | This test does not support pre-processed genes. To use this method, please
90 | install DESeq2, using the instructions at
91 | https://bioconductor.org/packages/release/bioc/html/DESeq2.html
92 | }
93 | \references{
94 | Love MI, Huber W and Anders S (2014). "Moderated estimation of
95 | fold change and dispersion for RNA-seq data with DESeq2." Genome Biology.
96 | https://bioconductor.org/packages/release/bioc/html/DESeq2.html
97 | }
98 | 


--------------------------------------------------------------------------------
/man/DESingleTest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DEG.R
 3 | \name{DESingleTest}
 4 | \alias{DESingleTest}
 5 | \title{Differential expression using DEsingle}
 6 | \usage{
 7 | DESingleTest(sub_data, min_gene_expressed, min_valid_cells,
 8 |   contrast = unique(sub_data$compare_group), parallel = FALSE,
 9 |   BPPARAM = bpparam())
10 | }
11 | \arguments{
12 | \item{sub_data}{Count data removed cell_type and selected certain two
13 | compare_group}
14 | 
15 | \item{min_gene_expressed}{Genes expressed in minimum number of cells}
16 | 
17 | \item{min_valid_cells}{Minimum number of genes detected in the cell}
18 | 
19 | \item{contrast}{String vector specifying the contrast to be
20 | tested against the log2-fold-change threshold}
21 | 
22 | \item{parallel}{If FALSE (default), no parallel computation is used;
23 | if TRUE, parallel computation using \code{BiocParallel}, with argument
24 | \code{BPPARAM}.}
25 | 
26 | \item{BPPARAM}{An optional parameter object passed internally to
27 | \code{\link{bplapply}} when \code{parallel=TRUE}. If not specified,
28 | \code{\link{bpparam}()} (default) will be used.}
29 | }
30 | \value{
31 | A matrix of differentially expressed genes and related statistics.
32 | }
33 | \description{
34 | Identifies differentially expressed genes between two groups of cells using
35 | DEsingle
36 | }
37 | \details{
38 | This test does not support pre-processed genes. To use this method, please
39 | install DEsingle, using the instructions at
40 | https://github.com/miaozhun/DEsingle
41 | }
42 | \references{
43 | Zhun Miao, Ke Deng, Xiaowo Wang, Xuegong Zhang (2018). DEsingle
44 | for detecting three types of differential expression in single-cell RNA-seq
45 | data. Bioinformatics, bty332. 10.1093/bioinformatics/bty332.
46 | }
47 | 


--------------------------------------------------------------------------------
/man/FindLR.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FindLR.R
 3 | \name{FindLR}
 4 | \alias{FindLR}
 5 | \title{Finding ligand-receptor pairs}
 6 | \usage{
 7 | FindLR(data_1, data_2 = NULL, datatype, comm_type, database = NULL)
 8 | }
 9 | \arguments{
10 | \item{data_1}{Data used to find the ligand-receptor pairs}
11 | 
12 | \item{data_2}{Second dataset used to find ligand-receptor pairs. If set NULL,
13 | paris will be found within data_1. Otherwise, pairs will be found between
14 | data_1 and data_2. Default is NULL.}
15 | 
16 | \item{datatype}{Type of data used as input. Options are "mean count"
17 | and "DEG"}
18 | 
19 | \item{comm_type}{Communication type. Available options are "cytokine",
20 | "checkpoint", "growth factor", "other"}
21 | 
22 | \item{database}{Database used to find ligand-receptor pairs. If set NULL,
23 | the build-in database will be used.}
24 | }
25 | \value{
26 | A dataframe of the significant interactions
27 | }
28 | \description{
29 | This function loads the highly expressed genes or differentail expressed
30 | genes as a dataframe. Significant interactions are found through mapping
31 | these genes to our ligand-receptor database.
32 | }
33 | \references{
34 | Cytokines, Inflammation and Pain. Zhang et al,2007.
35 | 
36 | Cytokines, Chemokines and Their Receptors. Cameron et al, 2000-2013
37 | 
38 | Robust prediction of response to immune checkpoint blockade therapy
39 | in metastatic melanoma. Auslander et al, 2018.
40 | 
41 | A draft network of ligand-receptor-mediated multicellular signalling
42 | in human, Jordan A. Ramilowski, Nature Communications, 2015
43 | }
44 | 


--------------------------------------------------------------------------------
/man/LRPlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/LRPlot.R
 3 | \name{LRPlot}
 4 | \alias{LRPlot}
 5 | \title{Plotting ligand-receptor pairs}
 6 | \usage{
 7 | LRPlot(data, datatype, gene_col = NULL, transparency = 0.5,
 8 |   link.arr.lwd = 1, link.arr.lty = NULL, link.arr.col = NULL,
 9 |   link.arr.width = NULL, link.arr.type = NULL, facing = "clockwise",
10 |   cell_col = NULL, print.cell = TRUE, track.height_1 = uh(2, "mm"),
11 |   track.height_2 = uh(12, "mm"), annotation.height_1 = 0.01,
12 |   annotation.height_2 = 0.01, text.vjust = "0.4cm", ...)
13 | }
14 | \arguments{
15 | \item{data}{A dataframe contains significant ligand-receptor pairs and related
16 | information such as expression level/log fold change and cell type}
17 | 
18 | \item{datatype}{Type of data. Options are "mean count" and "DEG"}
19 | 
20 | \item{gene_col}{Colors used to represent different categories of genes.}
21 | 
22 | \item{transparency}{Transparency of link colors, 0 means no transparency and
23 | 1 means full transparency. If transparency is already set in col or row.col
24 | or column.col, this argument will be ignored. NAalso ignores this argument.}
25 | 
26 | \item{link.arr.lwd}{line width of the single line link which is put in the
27 | center of the belt.}
28 | 
29 | \item{link.arr.lty}{line type of the single line link which is put in the
30 | center of the belt.}
31 | 
32 | \item{link.arr.col}{color or the single line link which is put in the center
33 | of the belt.}
34 | 
35 | \item{link.arr.width}{size of the single arrow head link which is put in the
36 | center of the belt.}
37 | 
38 | \item{link.arr.type}{Type of the arrows, pass to Arrowhead. Default value is
39 | triangle. There is an additional option big.arrow}
40 | 
41 | \item{facing}{Facing of text.}
42 | 
43 | \item{cell_col}{Colors used to represent types of cells. If set NULL, it
44 | will be generated randomly}
45 | 
46 | \item{print.cell}{Whether or not print the type of cells on the outer layer
47 | of the graph.}
48 | 
49 | \item{track.height_1}{height of the cell notation track}
50 | 
51 | \item{track.height_2}{height of the gene notation track}
52 | 
53 | \item{annotation.height_1}{Track height corresponding to values in annotationTrack.}
54 | 
55 | \item{annotation.height_2}{Track height corresponding to values in annotationTrack.}
56 | 
57 | \item{text.vjust}{adjustment on ’vertical’ (radical) direction. Besides to set it
58 | as numeric values, the value can also be a string contain absoute unit, e.g.
59 | "2.1mm", "-1 inche", but only "mm", "cm", "inches"/"inche" are allowed.}
60 | }
61 | \value{
62 | A figure of the significant interactions
63 | }
64 | \description{
65 | This function loads the significant interactions as a dataframe. A circle
66 | plot will be generated using package circlize. The width of the arrow
67 | represents the expression level/log fold change of the ligand; while the
68 | width of arrow head represents the expression level/log fold change of the
69 | receptor. Different color and the type of the arrow stands for whether
70 | the ligand and/or receptor are upregulated or downregulated. Users can select
71 | the colors represent the cell type by their own or chosen randomly by default.
72 | }
73 | \references{
74 | Gu, Z. (2014) circlize implements and enhances circular
75 | visualization in R. Bioinformatics.
76 | }
77 | 


--------------------------------------------------------------------------------
/man/MASTTest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DEG.R
 3 | \name{MASTTest}
 4 | \alias{MASTTest}
 5 | \title{Differential expression using MAST}
 6 | \usage{
 7 | MASTTest(sub_data, min_gene_expressed, min_valid_cells,
 8 |   contrast = unique(sub_data$compare_group), method = "glm",
 9 |   silent = FALSE, check_logged = TRUE)
10 | }
11 | \arguments{
12 | \item{sub_data}{Count data removed cell_type and selected certain two
13 | compare_group}
14 | 
15 | \item{min_gene_expressed}{Genes expressed in minimum number of cells}
16 | 
17 | \item{min_valid_cells}{Minimum number of genes detected in the cell}
18 | 
19 | \item{contrast}{String vector specifying the contrast to be
20 | tested against the log2-fold-change threshold}
21 | 
22 | \item{method}{Character vector, either ’glm’, ’glmer’ or ’bayesglm’}
23 | 
24 | \item{check_logged}{Set FALSE to override sanity checks that try to
25 | ensure that the default assay is log-transformed and has at least one
26 | exact zero}
27 | 
28 | \item{Silence}{Common problems with fitting some genes}
29 | }
30 | \value{
31 | A matrix of differentially expressed genes and related statistics.
32 | }
33 | \description{
34 | Identifies differentially expressed genes between two groups of cells using
35 | MAST
36 | }
37 | \details{
38 | To use this method, please install MAST, using the instructions at
39 | https://github.com/RGLab/MAST
40 | }
41 | \references{
42 | MAST: a flexible statistical framework for assessing transcriptional
43 | changes and characterizing heterogeneity in single-cell RNA sequencing
44 | data G Finak, A McDavid, M Yajima, J Deng, V Gersuk, AK Shalek, CK Slichter
45 | et al Genome biology 16 (1), 278
46 | }
47 | 


--------------------------------------------------------------------------------
/man/MonocleTest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DEG.R
 3 | \name{MonocleTest}
 4 | \alias{MonocleTest}
 5 | \title{Differential expression using monocle}
 6 | \usage{
 7 | MonocleTest(sub_data, min_gene_expressed, min_valid_cells,
 8 |   contrast = unique(sub_data$compare_group), batch = NULL, cores = 4)
 9 | }
10 | \arguments{
11 | \item{sub_data}{Count data removed cell_type and selected certain two
12 | compare_group}
13 | 
14 | \item{min_gene_expressed}{Genes expressed in minimum number of cells}
15 | 
16 | \item{min_valid_cells}{Minimum number of genes detected in the cell}
17 | 
18 | \item{contrast}{String vector specifying the contrast to be
19 | tested against the log2-fold-change threshold}
20 | 
21 | \item{batch}{Different batch identifier}
22 | 
23 | \item{cores}{The number of cores to be used while testing each gene
24 | for differential expression.}
25 | }
26 | \value{
27 | A matrix of differentially expressed genes and related statistics.
28 | }
29 | \description{
30 | Identifies differentially expressed genes between two groups of cells using
31 | monocle
32 | }
33 | \details{
34 | This test does not support pre-processed genes. To use this method, please
35 | install monocle, using the instructions at
36 | https://bioconductor.org/packages/release/bioc/html/monocle.html
37 | }
38 | \references{
39 | Qiu X, Hill A, Packer J, Lin D, Ma Y, Trapnell C (2017).
40 | “Single-cell mRNA quantification and differential analysis with Census.”
41 | Nature Methods.
42 | https://github.com/cole-trapnell-lab/monocle-release
43 | }
44 | 


--------------------------------------------------------------------------------
/man/NetView.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/NetView.R
 3 | \name{NetView}
 4 | \alias{NetView}
 5 | \title{Network Viewing of cell-cell communication}
 6 | \usage{
 7 | NetView(data, col, label = TRUE, edge.curved = 0.5, shape = "circle",
 8 |   layout = nicely(), vertex.size = 20, margin = 0.2,
 9 |   vertex.label.cex = 1.5, vertex.label.color = "black",
10 |   arrow.width = 1.5, edge.label.color = "black", edge.label.cex = 1,
11 |   edge.max.width = 10)
12 | }
13 | \arguments{
14 | \item{data}{A dataframe containing ligand-receptor pairs and corresponding
15 | cell typesused to do the plotting}
16 | 
17 | \item{col}{Colors used to represent different cell types}
18 | 
19 | \item{label}{Whether or not shows the label of edges (number of connections
20 | between different cell types)}
21 | 
22 | \item{edge.curved}{Specifies whether to draw curved edges, or not.
23 | This can be a logical or a numeric vector or scalar.
24 | First the vector is replicated to have the same length as the number of
25 | edges in the graph. Then it is interpreted for each edge separately.
26 | A numeric value specifies the curvature of the edge; zero curvature means
27 | straight edges, negative values means the edge bends clockwise, positive
28 | values the opposite. TRUE means curvature 0.5, FALSE means curvature zero}
29 | 
30 | \item{shape}{The shape of the vertex, currently “circle”, “square”,
31 | “csquare”, “rectangle”, “crectangle”, “vrectangle”, “pie” (see
32 | vertex.shape.pie), ‘sphere’, and “none” are supported, and only by the
33 | plot.igraph command. “none” does not draw the vertices at all, although
34 | vertex label are plotted (if given). See shapes for details about vertex
35 | shapes and vertex.shape.pie for using pie charts as vertices.}
36 | 
37 | \item{layout}{The layout specification. It must be a call to a layout
38 | specification function.}
39 | 
40 | \item{vertex.size}{The size of vertex}
41 | 
42 | \item{margin}{The amount of empty space below, over, at the left and right
43 | of the plot, it is a numeric vector of length four. Usually values between
44 | 0 and 0.5 are meaningful, but negative values are also possible, that will
45 | make the plot zoom in to a part of the graph. If it is shorter than four
46 | then it is recycled.}
47 | 
48 | \item{vertex.label.cex}{The label size of vertex}
49 | 
50 | \item{vertex.label.color}{The color of label for vertex}
51 | 
52 | \item{arrow.width}{The width of arrows}
53 | 
54 | \item{edge.label.color}{The color for single arrow}
55 | 
56 | \item{edge.label.cex}{The size of label for arrows}
57 | 
58 | \item{edge.max.width}{The maximum arrow size}
59 | }
60 | \value{
61 | A network graph of the significant interactions
62 | }
63 | \description{
64 | This function loads the significant interactions as a dataframe, and colors
65 | represent different types of cells as a structure. The width of edges represent
66 | the strength of the communication. Labels on the edges show exactly how many
67 | interactions exist between two types of cells.
68 | }
69 | \references{
70 | Csardi G, Nepusz T: The igraph software package for complex network
71 | research, InterJournal, Complex Systems 1695. 2006.
72 | http://igraph.org
73 | }
74 | 


--------------------------------------------------------------------------------
/man/SCDETest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DEG.R
 3 | \name{SCDETest}
 4 | \alias{SCDETest}
 5 | \title{Differential expression using scde}
 6 | \usage{
 7 | SCDETest(sub_data, min_gene_expressed, min_valid_cells,
 8 |   contrast = unique(sub_data$compare_group), batch = NULL,
 9 |   n.randomizations = 150, n.cores = 10, batch.models = models,
10 |   return.posteriors = FALSE, verbose = 1)
11 | }
12 | \arguments{
13 | \item{sub_data}{Count data removed cell_type and selected certain two
14 | compare_group}
15 | 
16 | \item{min_gene_expressed}{Genes expressed in minimum number of cells}
17 | 
18 | \item{min_valid_cells}{Minimum number of genes detected in the cell}
19 | 
20 | \item{contrast}{String vector specifying the contrast to be
21 | tested against the log2-fold-change threshold}
22 | 
23 | \item{batch}{Different batch identifier}
24 | 
25 | \item{n.cores}{number of cores to utilize}
26 | 
27 | \item{batch.models}{(optional) separate models for the batch data (if generated
28 | using batch-specific group argument). Normally the same models are used.}
29 | 
30 | \item{return.posteriors}{whether joint posterior matrices should be returned}
31 | 
32 | \item{verbose}{integer verbose level (1 for verbose)}
33 | 
34 | \item{@param}{n.randomizations number of bootstrap randomizations to be performed}
35 | }
36 | \value{
37 | A matrix of differentially expressed genes and related statistics.
38 | }
39 | \description{
40 | Identifies differentially expressed genes between two groups of cells using
41 | scde
42 | }
43 | \details{
44 | This test does not support pre-processed genes. To use this method, please
45 | install scde, using the instructions at
46 | http://hms-dbmi.github.io/scde/tutorials.html
47 | }
48 | \references{
49 | "Bayesian approach to single-cell differential expression
50 | analysis" (Kharchenko PV, Silberstein L, Scadden DT, Nature Methods,
51 | doi:10.1038/nmeth.2967)
52 | https://github.com/hms-dbmi/scde
53 | }
54 | 


--------------------------------------------------------------------------------
/man/TimePlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/TimePlot.R
 3 | \name{TimePlot}
 4 | \alias{TimePlot}
 5 | \title{Plotting ligand-receptor pairs}
 6 | \usage{
 7 | TimePlot(data, ligand, receptor, cell_from, cell_to, Time = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{A dataframe contains significant ligand-receptor pairs and related
11 | information such as expression level/log fold change and cell type}
12 | 
13 | \item{ligand}{String as selected ligand}
14 | 
15 | \item{receptor}{String as selected receptor}
16 | 
17 | \item{cell_from}{The cell type ligand gene belongs to}
18 | 
19 | \item{cell_to}{The cell type receptor gene belongs to}
20 | 
21 | \item{Time}{Different time points showing on the plot}
22 | }
23 | \value{
24 | A figure of the paired interactions
25 | }
26 | \description{
27 | This function loads count data as dataframe, ligand, receptor and two interactive
28 | cells' names as strings. The plot shows the expression level of ligand and
29 | receptor at different time, thus illustrates a dynamic change of a ligand-receptor
30 | pairs.
31 | }
32 | 


--------------------------------------------------------------------------------
/man/WilcoxTest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DEG.R
 3 | \name{WilcoxTest}
 4 | \alias{WilcoxTest}
 5 | \title{Differential expression using wilcox}
 6 | \usage{
 7 | WilcoxTest(sub_data, min_gene_expressed, min_valid_cells,
 8 |   contrast = unique(sub_data$compare_group), datatype = "raw count",
 9 |   verbose = 0)
10 | }
11 | \arguments{
12 | \item{sub_data}{Count data removed cell_type and selected certain two
13 | compare_group}
14 | 
15 | \item{min_gene_expressed}{Genes expressed in minimum number of cells}
16 | 
17 | \item{min_valid_cells}{Minimum number of genes detected in the cell}
18 | 
19 | \item{contrast}{String vector specifying the contrast to be
20 | tested against the log2-fold-change threshold}
21 | 
22 | \item{verbose}{Whether show the progress of computing}
23 | 
24 | \item{data_type}{Type of data. Available options are:
25 | \itemize{
26 |  \item{'raw data'}: Raw count data without any pre-processing
27 |  \item{'log count'}: Normalized and log-transformed data
28 | }}
29 | }
30 | \value{
31 | A matrix of differentially expressed genes and related statistics.
32 | }
33 | \description{
34 | Identifies differentially expressed genes between two groups of cells using
35 | a Wilcoxon Rank Sum test
36 | }
37 | 


--------------------------------------------------------------------------------
/man/edgeRTest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DEG.R
 3 | \name{edgeRTest}
 4 | \alias{edgeRTest}
 5 | \title{Differential expression using edgeR}
 6 | \usage{
 7 | edgeRTest(sub_data, min_gene_expressed, min_valid_cells,
 8 |   contrast = unique(sub_data$compare_group), calcNormMethod = "TMM",
 9 |   trend.method = "locfit", tagwise = TRUE, robust = FALSE)
10 | }
11 | \arguments{
12 | \item{sub_data}{Count data removed cell_type and selected certain two
13 | compare_group}
14 | 
15 | \item{min_gene_expressed}{Genes expressed in minimum number of cells}
16 | 
17 | \item{min_valid_cells}{Minimum number of genes detected in the cell}
18 | 
19 | \item{contrast}{String vector specifying the contrast to be
20 | tested against the log2-fold-change threshold}
21 | 
22 | \item{calcNormMethod}{normalization method to be used}
23 | 
24 | \item{trend.method}{method for estimating dispersion trend. Possible values
25 | are "none", "movingave", "loess" and "locfit" (default).}
26 | 
27 | \item{tagwise}{logical, should the tagwise dispersions be estimated}
28 | 
29 | \item{robust}{logical, should the estimation of prior.df be robustified
30 | against outliers}
31 | }
32 | \value{
33 | A matrix of differentially expressed genes and related statistics.
34 | }
35 | \description{
36 | Identifies differentially expressed genes between two groups of cells using
37 | edgeR
38 | }
39 | \details{
40 | This test does not support pre-processed genes. To use this method, please
41 | install edgeR, using the instructions at
42 | http://bioconductor.org/packages/release/bioc/html/edgeR.html
43 | }
44 | \references{
45 | McCarthy, J. D, Chen, Yunshun, Smyth, K. G (2012). “Differential
46 | expression analysis of multifactor RNA-Seq experiments with respect to
47 | biological variation.” Nucleic Acids Research, 40(10), 4288-4297.
48 | 
49 | Robinson MD, McCarthy DJ, Smyth GK (2010). “edgeR: a Bioconductor
50 | package for differential expression analysis of digital gene expression data.”
51 | Bioinformatics, 26(1), 139-140.
52 | https://github.com/cole-trapnell-lab/monocle-release
53 | }
54 | 


--------------------------------------------------------------------------------
/man/rawParse.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rawParse.R
 3 | \name{rawParse}
 4 | \alias{rawParse}
 5 | \title{Parsing the data to get top expressed genes}
 6 | \usage{
 7 | rawParse(data, top_genes = 50, stats = "mean")
 8 | }
 9 | \arguments{
10 | \item{data}{Input data, raw or normalized count with 'cell_type' column}
11 | 
12 | \item{top_genes}{(scale 1 to 100) Top percent highly expressed genes used
13 | to find ligand-receptor pairs, default is 50}
14 | 
15 | \item{stats}{Whether calculates the mean or the median of the data. Available
16 | options are 'mean' and 'median'.}
17 | }
18 | \value{
19 | A dataframe of the data
20 | }
21 | \description{
22 | This function loads the count data as a dataframe. It assumes that each line
23 | contains gene expression profile of one single cell, and each column
24 | contains the one single gene expression profile in different cells. The dataframe
25 | should also contain the cell type information with column name 'cell_type'.
26 | Group information should also be included as 'compare_group' if users want
27 | to call differntial expressed ligand-receptor pairs. Batch information as
28 | 'batch' is optional. If included, users may want to use the raw count data
29 | for later analysis.
30 | }
31 | 


--------------------------------------------------------------------------------