├── DESCRIPTION ├── NAMESPACE ├── R ├── DEG.R ├── FindLR.R ├── LRPlot.R ├── NetView.R ├── TimePlot.R ├── rawParse.R └── sysdata.rda ├── README.md ├── data └── LR_database.rda ├── example ├── example_code.r └── example_data.txt └── man ├── DEG.Rd ├── DESeq2Test.Rd ├── DESingleTest.Rd ├── FindLR.Rd ├── LRPlot.Rd ├── MASTTest.Rd ├── MonocleTest.Rd ├── NetView.Rd ├── SCDETest.Rd ├── TimePlot.Rd ├── WilcoxTest.Rd ├── edgeRTest.Rd └── rawParse.Rd /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: iTALK 2 | Type: Package 3 | Title: Characterize and Illustrate Intercellular Communication 4 | Version: 0.1.0 5 | Date: 2018-12-12 6 | Author: Yuanxin Wang 7 | Maintainer: Yuanxin Wang 8 | Description: iTALK, a computational approach to characterize, compare, 9 | and illustrate intercellular communication signals in the multicellular 10 | ecosystem using either bulk RNA sequencing data or single cell RNAseq data. 11 | iTALK can in principle be used to dissect the complexity, diversity, and 12 | dynamics of cell-cell communication from a wide range of cellular processes. 13 | License: CC-BY-NC-SA 14 | Encoding: UTF-8 15 | LazyData: true 16 | Depends: R (>= 3.4.0) 17 | Imports: 18 | progress, 19 | pbapply, 20 | dplyr, 21 | tidyr, 22 | graphics, 23 | randomcoloR, 24 | circlize, 25 | ggplot2, 26 | network, 27 | igraph, 28 | DESeq2, 29 | edgeR, 30 | monocle, 31 | scde, 32 | DEsingle, 33 | MAST, 34 | scater 35 | VignetteBuilder: knitr 36 | biocViews: RNASequencing 37 | RoxygenNote: 6.1.1 38 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(DEG) 4 | export(DESeq2Test) 5 | export(DESingleTest) 6 | export(FindLR) 7 | export(LRPlot) 8 | export(MASTTest) 9 | export(MonocleTest) 10 | export(NetView) 11 | export(SCDETest) 12 | export(TimePlot) 13 | export(WilcoxTest) 14 | export(edgeRTest) 15 | export(rawParse) 16 | import(Biobase) 17 | import(DESeq2) 18 | import(DEsingle) 19 | import(MAST) 20 | import(circlize) 21 | import(dplyr) 22 | import(edgeR) 23 | import(ggplot2) 24 | import(graphics) 25 | import(igraph) 26 | import(monocle) 27 | import(network) 28 | import(randomcoloR) 29 | import(scde) 30 | import(tidyr) 31 | importFrom(pbapply,pbsapply) 32 | importFrom(progress,progress_bar) 33 | importFrom(stats,wilcox.test) 34 | importFrom(utils,installed.packages) 35 | -------------------------------------------------------------------------------- /R/DEG.R: -------------------------------------------------------------------------------- 1 | #' Call DEGenes 2 | #' 3 | #' This function loads the data as a dataframe, and method as a string. 4 | #' It assumes that each line contains gene expression profile of one single 5 | #' cell, and each column contains the one single gene expression profile in 6 | #' different cells. The dataframe should also contain the cell type information 7 | #' with column name 'cell_type', as well as group information as 'compare_group' 8 | #' Batch information as 'batch' is optional. If included, users may want to use 9 | #' the raw count data for later analysis. Differential expressed genes will be 10 | #' called within each cell type by the method users select. For bulk RNAseq, 11 | #' we provide edgeR, DESeq2. And for scRNA-seq, popular methods in packages 12 | #' scde, monocle, DEsingle and MAST are available. 13 | #' 14 | #' @param data Input raw or normalized count data with column 'cell_type' 15 | #' and 'compare_group' 16 | #' @param method Method used to call DEGenes. Available options are: 17 | ##' \itemize{ 18 | ##' \item{Wilcox}: Wilcoxon rank sum test 19 | ##' \item{DESeq2}: Negative binomial model based differential analysis 20 | ##' (Love et al, Genome Biology, 2014) 21 | ##' \item{SCDE}: Bayesian approach to single-cell differential 22 | ##' expression analysis (Kharchenko et al, Nature Method, 2014) 23 | ##' \item{monocle}: Census based differential analysis (Qiu et al, 24 | ##' Nature Methods, 2017) 25 | ##' \item{edgeR}: Negative binomial distributions, including empirical 26 | ##' Bayes estimation, exact tests, generalized linear models and 27 | ##' quasi-likelihood tests based differential analysis (McCarthy et al, 28 | ##' Nucleic Acids Research, 2012) 29 | ##' \item{DESingle}: Zero-Inflated Negative Binomial model to estimate 30 | ##' the proportion of real and dropout zeros and to define and detect 31 | ##' the 3 types of DE genes (Miao et al, Bioinformatics, 2018) 32 | ##' \item{MAST}: GLM-framework that treates cellular detection rate as a 33 | ##' covariate (Finak et al, Genome Biology, 2015) 34 | ##' } 35 | #' @param min_gene_expressed Genes expressed in minimum number of cells 36 | #' @param min_valid_cells Minimum number of genes detected in the cell 37 | #' @param contrast String vector specifying the contrast to be 38 | #' tested against the log2-fold-change threshold 39 | #' @param q_cut Cut-off for q value 40 | #' @param add Whether add genes that are not differentially expressed 41 | #' but highly expressed for finding the significant pairs later 42 | #' @param top Same as in function rawParse 43 | #' @param stats Same as in function rawParse 44 | #' @return A matrix of the differential expressed genes 45 | #' @importFrom utils installed.packages 46 | #' @import dplyr 47 | #' @import DESeq2 48 | #' @import scde 49 | #' @import monocle 50 | #' @import edgeR 51 | #' @import DEsingle 52 | #' @import MAST 53 | #' @import Biobase 54 | #' @export 55 | DEG<-function(data,method, min_gene_expressed=0, min_valid_cells=0,contrast=NULL,q_cut=0.05,add=TRUE,top=50,stats='mean',...){ 56 | if(method %in% c('SCDE','monocle','DESingle','MAST') && dim(data)[1]>=400){ 57 | print('Warning: It may take a long time. You can go and brew a cup of coffee...') 58 | } 59 | if(length(unique(data$cell_type))!=1){ 60 | stop('Error: please compare data with sinlge cell type') 61 | } 62 | sub_data<-subset(data,select=-cell_type) 63 | combination<-combn(unique(sub_data$compare_group),2) 64 | res=NULL 65 | if(method=='Wilcox'){ 66 | for(i in ncol(combination)){ 67 | if(is.null(contrast)){ 68 | contrast=c(combination[,i]) 69 | } 70 | sub_data<-sub_data[sub_data$compare_group %in% combination[,i],] 71 | res<-rbind(res,WilcoxTest(sub_data,min_gene_expressed, min_valid_cells, contrast=contrast,...)) 72 | } 73 | }else if(method=='DESeq2'){ 74 | for(i in ncol(combination)){ 75 | if(is.null(contrast)){ 76 | contrast=c(combination[,i]) 77 | } 78 | sub_data<-sub_data[sub_data$compare_group %in% combination[,i],] 79 | res<-rbind(res,DESeq2Test(sub_data,min_gene_expressed, min_valid_cells, contrast=contrast,...)) 80 | } 81 | }else if(method=='SCDE'){ 82 | for(i in ncol(combination)){ 83 | if(is.null(contrast)){ 84 | contrast=c(combination[,i]) 85 | } 86 | sub_data<-sub_data[sub_data$compare_group %in% combination[,i],] 87 | res<-rbind(res,SCDETest(sub_data,min_gene_expressed, min_valid_cells, contrast=contrast,...)) 88 | } 89 | }else if(method=='monocle'){ 90 | for(i in ncol(combination)){ 91 | if(is.null(contrast)){ 92 | contrast=c(combination[,i]) 93 | } 94 | sub_data<-sub_data[sub_data$compare_group %in% combination[,i],] 95 | res<-rbind(res,MonocleTest(sub_data,min_gene_expressed, min_valid_cells, contrast=contrast,...)) 96 | } 97 | }else if(method=='edgeR'){ 98 | for(i in ncol(combination)){ 99 | if(is.null(contrast)){ 100 | contrast=c(combination[,i]) 101 | } 102 | sub_data<-sub_data[sub_data$compare_group %in% combination[,i],] 103 | res<-rbind(res,edgeRTest(sub_data,min_gene_expressed, min_valid_cells, contrast=contrast,...)) 104 | } 105 | }else if(method=='DESingle'){ 106 | for(i in ncol(combination)){ 107 | if(is.null(contrast)){ 108 | contrast=c(combination[,i]) 109 | } 110 | sub_data<-sub_data[sub_data$compare_group %in% combination[,i],] 111 | res<-rbind(res,DESingleTest(sub_data,min_gene_expressed, min_valid_cells, contrast=contrast,...)) 112 | } 113 | }else if(method=='MAST'){ 114 | for(i in ncol(combination)){ 115 | if(is.null(contrast)){ 116 | contrast=c(combination[,i]) 117 | } 118 | sub_data<-sub_data[sub_data$compare_group %in% combination[,i],] 119 | res<-rbind(res,MASTTest(sub_data,min_gene_expressed, min_valid_cells, contrast=contrast,...)) 120 | } 121 | }else{ 122 | stop('Error: method currently not available') 123 | } 124 | cell_type<-unique(data$cell_type) 125 | res<-data.frame(res,cell_type,stringsAsFactors = FALSE) 126 | res <- res %>% dplyr::filter(q.value% select(-compare_group),top=top,stats=stats)%>% select(c(gene,cell_type))%>% dplyr::mutate(logFC=0.0001,p.value=NA,q.value=NA) 129 | parsedData<-parsedData %>% anti_join(res,by=c('gene'='gene')) 130 | res<-rbind(res,parsedData) 131 | } 132 | return(res) 133 | } 134 | 135 | #' Differential expression using wilcox 136 | #' 137 | #' Identifies differentially expressed genes between two groups of cells using 138 | #' a Wilcoxon Rank Sum test 139 | #' @param sub_data Count data removed cell_type and selected certain two 140 | #' compare_group 141 | #' @param min_gene_expressed Genes expressed in minimum number of cells 142 | #' @param min_valid_cells Minimum number of genes detected in the cell 143 | #' @param contrast String vector specifying the contrast to be 144 | #' tested against the log2-fold-change threshold 145 | #' @param data_type Type of data. Available options are: 146 | ##' \itemize{ 147 | ##' \item{'raw data'}: Raw count data without any pre-processing 148 | ##' \item{'log count'}: Normalized and log-transformed data 149 | ##' } 150 | #' @param verbose Whether show the progress of computing 151 | #' 152 | #' @return A matrix of differentially expressed genes and related statistics. 153 | #' 154 | #' @importFrom pbapply pbsapply 155 | #' @importFrom stats wilcox.test 156 | #' 157 | #' @export 158 | WilcoxTest<-function(sub_data,min_gene_expressed, min_valid_cells, 159 | contrast=unique(sub_data$compare_group), 160 | datatype='raw count', verbose=0){ 161 | counts<-t(subset(sub_data,select=-compare_group)) 162 | counts<-apply(counts,2,function(x) {storage.mode(x) <- 'numeric'; x}) 163 | expressed_genes <- rownames(subset(counts,rowSums(counts) >= min_gene_expressed)) 164 | valid_cells<-(colSums(counts)>=min_valid_cells) 165 | groups<-as.factor(sub_data[valid_cells,'compare_group']) 166 | counts<-counts[expressed_genes,valid_cells] 167 | mysapply <- if (verbose) {pbsapply} else {sapply} 168 | p_val <- mysapply( 169 | X = 1:nrow(counts), 170 | FUN = function(i) { 171 | return(wilcox.test(counts[i,]~groups)$p.value) 172 | } 173 | ) 174 | if(datatype=='raw count'){ 175 | logFC<-log(rowSums(counts[,groups==contrast[1]])/ncol(counts[,groups==contrast[1]])/(rowSums(counts[,groups==contrast[2]])/ncol(counts[,groups==contrast[2]])),2) 176 | }else if(datatype=='log count'){ 177 | logFC<-rowSums(counts[,groups==contrast[1]])/ncol(counts[,groups==contrast[1]])-rowSums(counts[,groups==contrast[2]])/ncol(counts[,groups==contrast[2]]) 178 | }else{ 179 | stop('Error: invalid data type') 180 | } 181 | res<-data.frame(rownames(counts),logFC,p_val,stringsAsFactors = FALSE) 182 | colnames(res)<-c('gene','logFC','p.value') 183 | res <- res %>% mutate(q.value=p.adjust(res$p.value, method = "BH")) 184 | return(res) 185 | } 186 | 187 | #' Differential expression using DESeq2 188 | #' 189 | #' Identifies differentially expressed genes between two groups of cells using 190 | #' DESeq2 191 | #' 192 | #' @references Love MI, Huber W and Anders S (2014). "Moderated estimation of 193 | #' fold change and dispersion for RNA-seq data with DESeq2." Genome Biology. 194 | #' https://bioconductor.org/packages/release/bioc/html/DESeq2.html 195 | #' @param sub_data Count data removed cell_type and selected certain two 196 | #' compare_group 197 | #' @param min_gene_expressed Genes expressed in minimum number of cells 198 | #' @param min_valid_cells Minimum number of genes detected in the cell 199 | #' @param contrast String vector specifying the contrast to be 200 | #' tested against the log2-fold-change threshold 201 | #' @param test either "Wald" or "LRT", which will then use either 202 | #' Wald significance tests (defined by \code{\link{nbinomWaldTest}}), 203 | #' or the likelihood ratio test on the difference in deviance between a 204 | #' full and reduced model formula (defined by \code{\link{nbinomLRT}}) 205 | #' @param fitType either "parametric", "local", or "mean" 206 | #' for the type of fitting of dispersions to the mean intensity. 207 | #' See \code{\link{estimateDispersions}} for description. 208 | #' @param sfType either "ratio", "poscounts", or "iterate" 209 | #' for teh type of size factor estimation. See 210 | #' \code{\link{estimateSizeFactors}} for description. 211 | #' @param betaPrior whether or not to put a zero-mean normal prior on 212 | #' the non-intercept coefficients 213 | #' See \code{\link{nbinomWaldTest}} for description of the calculation 214 | #' of the beta prior. In versions \code{>=1.16}, the default is set 215 | #' to \code{FALSE}, and shrunken LFCs are obtained afterwards using 216 | #' \code{\link{lfcShrink}}. 217 | #' @param quiet whether to print messages at each step 218 | #' @param modelMatrixType either "standard" or "expanded", which describe 219 | #' how the model matrix, X of the GLM formula is formed. 220 | #' "standard" is as created by \code{model.matrix} using the 221 | #' design formula. "expanded" includes an indicator variable for each 222 | #' level of factors in addition to an intercept. for more information 223 | #' see the Description of \code{\link{nbinomWaldTest}}. 224 | #' betaPrior must be set to TRUE in order for expanded model matrices 225 | #' to be fit. 226 | #' @param minReplicatesForReplace the minimum number of replicates required 227 | #' in order to use \code{\link{replaceOutliers}} on a 228 | #' sample. If there are samples with so many replicates, the model will 229 | #' be refit after these replacing outliers, flagged by Cook's distance. 230 | #' Set to \code{Inf} in order to never replace outliers. 231 | #' @param useT logical, passed to \code{\link{nbinomWaldTest}}, default is FALSE, 232 | #' where Wald statistics are assumed to follow a standard Normal 233 | #' @param minmu lower bound on the estimated count for fitting gene-wise dispersion 234 | #' and for use with \code{nbinomWaldTest} and \code{nbinomLRT} 235 | #' @param parallel if FALSE, no parallelization. if TRUE, parallel 236 | #' execution using \code{BiocParallel}, see next argument \code{BPPARAM}. 237 | #' A note on running in parallel using \code{BiocParallel}: it may be 238 | #' advantageous to remove large, unneeded objects from your current 239 | #' R environment before calling \code{DESeq}, 240 | #' as it is possible that R's internal garbage collection 241 | #' will copy these files while running on worker nodes. 242 | #' @param BPPARAM an optional parameter object passed internally 243 | #' to \code{\link{bplapply}} when \code{parallel=TRUE}. 244 | #' If not specified, the parameters last registered with 245 | #' \code{\link{register}} will be used. 246 | #' @import DESeq2 247 | #' 248 | #' @return A matrix of differentially expressed genes and related statistics. 249 | #' 250 | #' @details 251 | #' This test does not support pre-processed genes. To use this method, please 252 | #' install DESeq2, using the instructions at 253 | #' https://bioconductor.org/packages/release/bioc/html/DESeq2.html 254 | #' 255 | #' @importFrom utils installed.packages 256 | #' 257 | #' @export 258 | DESeq2Test<-function(sub_data, min_gene_expressed, min_valid_cells, contrast=unique(sub_data$compare_group), test='Wald', 259 | fitType='parametric',sfType='ratio',betaPrior=FALSE,quiet=FALSE,modelMatrixType='standard', 260 | minReplicatesForReplace=7,useT=FALSE,minmu=0.5,parallel=FALSE,BPPARAM=bpparam()){ 261 | counts<-t(sub_data[,-ncol(sub_data)]) 262 | counts<-apply(counts,2,function(x) {storage.mode(x) <- 'numeric'; x}) 263 | expressed_genes <- rownames(subset(counts,rowSums(counts) >= min_gene_expressed)) 264 | valid_cells<-(colSums(counts)>=min_valid_cells) 265 | counts<-counts[expressed_genes,valid_cells] 266 | coldata<-as.data.frame(sub_data[valid_cells,'compare_group']) 267 | rownames(coldata)<-rownames(sub_data[valid_cells,]) 268 | colnames(coldata)<-'compare_group' 269 | dds<-DESeqDataSetFromMatrix(countData = counts,colData = coldata, design = ~ compare_group) 270 | dds$condition <- factor(dds$compare_group) 271 | dds<-DESeq(dds,test=test, 272 | fitType=fitType, 273 | sfType=sfType, 274 | betaPrior=betaPrior, 275 | quiet=quiet, 276 | minReplicatesForReplace=minReplicatesForReplace, modelMatrixType=modelMatrixType, 277 | useT=useT, minmu=minmu, 278 | parallel=parallel, BPPARAM=BPPARAM) 279 | res <- results(dds,contrast=c('compare_group',contrast)) 280 | res<-data.frame(rownames(counts),res$log2FoldChange,res$pvalue,res$padj,stringsAsFactors = FALSE) 281 | colnames(res)<-c('gene','logFC','p.value','q.value') 282 | return(res) 283 | } 284 | 285 | #' Differential expression using scde 286 | #' 287 | #' Identifies differentially expressed genes between two groups of cells using 288 | #' scde 289 | #' 290 | #' @references "Bayesian approach to single-cell differential expression 291 | #' analysis" (Kharchenko PV, Silberstein L, Scadden DT, Nature Methods, 292 | #' doi:10.1038/nmeth.2967) 293 | #' https://github.com/hms-dbmi/scde 294 | #' @param sub_data Count data removed cell_type and selected certain two 295 | #' compare_group 296 | #' @param min_gene_expressed Genes expressed in minimum number of cells 297 | #' @param min_valid_cells Minimum number of genes detected in the cell 298 | #' @param contrast String vector specifying the contrast to be 299 | #' tested against the log2-fold-change threshold 300 | #' @param batch Different batch identifier 301 | #' @param @param n.randomizations number of bootstrap randomizations to be performed 302 | #' @param n.cores number of cores to utilize 303 | #' @param batch.models (optional) separate models for the batch data (if generated 304 | #' using batch-specific group argument). Normally the same models are used. 305 | #' @param return.posteriors whether joint posterior matrices should be returned 306 | #' @param verbose integer verbose level (1 for verbose) 307 | #' @import scde 308 | #' @return A matrix of differentially expressed genes and related statistics. 309 | #' 310 | #' @details 311 | #' This test does not support pre-processed genes. To use this method, please 312 | #' install scde, using the instructions at 313 | #' http://hms-dbmi.github.io/scde/tutorials.html 314 | #' 315 | #' @importFrom utils installed.packages 316 | #' 317 | #' @export 318 | SCDETest<-function(sub_data,min_gene_expressed,min_valid_cells,contrast=unique(sub_data$compare_group),batch=NULL, 319 | n.randomizations=150,n.cores=10,batch.models=models,return.posteriors=FALSE,verbose=1){ 320 | if(is.null(batch)){ 321 | batch<-rep(NaN,nrow(sub_data)) 322 | sub_data<-data.frame(sub_data,batch) 323 | batch<-NULL 324 | } 325 | counts<-t(subset(sub_data,select=-c(compare_group,batch))) 326 | counts<-apply(counts,2,function(x) {storage.mode(x) <- 'integer'; x}) 327 | expressed_genes <- rownames(subset(counts,rowSums(counts) >= min_gene_expressed)) 328 | valid_cells<-(colSums(counts)>=min_valid_cells) 329 | counts<-counts[expressed_genes,valid_cells] 330 | 331 | groups<-as.factor(sub_data[valid_cells,'compare_group']) 332 | o.ifm <- scde.error.models(counts = counts, groups = groups, n.cores = 2, threshold.segmentation = TRUE, 333 | min.size.entries = 100,save.crossfit.plots = FALSE, save.model.plots = FALSE, verbose = 1) 334 | valid.cells <- o.ifm$corr.a > 0 335 | o.ifm <- o.ifm[valid.cells, ] 336 | o.prior <- scde.expression.prior(models=o.ifm,counts=counts) 337 | if(is.null(batch)){ 338 | ediff <- scde.expression.difference(o.ifm, counts, o.prior, groups=groups, 339 | n.randomizations=n.randomizations, n.cores=n.cores, verbose=verbose) 340 | }else{ 341 | batch<-as.factor(sub_data[valid_cells,'batch']) 342 | ediff_batch<- scde.expression.difference(o.ifm, counts, o.prior, groups = groups, batch = batch, 343 | n.randomizations = n.randomizations, n.cores = n.cores, 344 | return.posteriors=return.posteriors,verbose = verbose) 345 | ediff<- ediff_batch$batch.adjusted 346 | } 347 | p.value <- 2*pnorm(abs(ediff$Z),lower.tail=FALSE) # 2-tailed p-value 348 | q.value <- 2*pnorm(abs(ediff$cZ),lower.tail=FALSE) # Adjusted to control for FDR 349 | res<-data.frame(rownames(counts),ediff$mle,p.value,q.value,stringsAsFactors = FALSE) 350 | colnames(res) <- c('gene','logFC','p.value','q.value') 351 | if(all(levels(groups)!=contrast)){ 352 | res$logFC<--res$logFC 353 | } 354 | return(res) 355 | } 356 | 357 | #' Differential expression using monocle 358 | #' 359 | #' Identifies differentially expressed genes between two groups of cells using 360 | #' monocle 361 | #' 362 | #' @references Qiu X, Hill A, Packer J, Lin D, Ma Y, Trapnell C (2017). 363 | #' “Single-cell mRNA quantification and differential analysis with Census.” 364 | #' Nature Methods. 365 | #' https://github.com/cole-trapnell-lab/monocle-release 366 | #' @param sub_data Count data removed cell_type and selected certain two 367 | #' compare_group 368 | #' @param min_gene_expressed Genes expressed in minimum number of cells 369 | #' @param min_valid_cells Minimum number of genes detected in the cell 370 | #' @param contrast String vector specifying the contrast to be 371 | #' tested against the log2-fold-change threshold 372 | #' @param batch Different batch identifier 373 | #' @param cores The number of cores to be used while testing each gene 374 | #' for differential expression. 375 | #' @import monocle 376 | #' @return A matrix of differentially expressed genes and related statistics. 377 | #' 378 | #' @details 379 | #' This test does not support pre-processed genes. To use this method, please 380 | #' install monocle, using the instructions at 381 | #' https://bioconductor.org/packages/release/bioc/html/monocle.html 382 | #' 383 | #' @importFrom utils installed.packages 384 | #' 385 | #' @export 386 | MonocleTest<-function(sub_data,min_gene_expressed,min_valid_cells,contrast=unique(sub_data$compare_group), 387 | batch=NULL,cores=4){ 388 | if(is.null(batch)){ 389 | batch<-rep(NaN,nrow(sub_data)) 390 | sub_data<-data.frame(sub_data,batch) 391 | batch<-NULL 392 | } 393 | counts<-t(subset(sub_data,select=-c(compare_group,batch))) 394 | counts<-apply(counts,2,function(x) {storage.mode(x) <- 'numeric'; x}) 395 | if(is.null(batch)){ 396 | pd<-sub_data[,'compare_group',drop=FALSE] %>% mutate(num_genes_expressed=colSums(counts!=0)) 397 | }else{ 398 | pd<-sub_data[,c('compare_group','batch')] %>% mutate(num_genes_expressed=colSums(counts!=0)) 399 | } 400 | rownames(pd)<-colnames(counts) 401 | pd<-new('AnnotatedDataFrame',pd) 402 | fd<-as.data.frame(rownames(counts)) 403 | colnames(fd)<-'gene_short_name' 404 | fd<-fd %>% mutate(num_cells_expressed=rowSums(counts!=0)) 405 | rownames(fd)<-rownames(counts) 406 | fd<-new('AnnotatedDataFrame',fd) 407 | data <- newCellDataSet(as.matrix(counts), phenoData = pd, expressionFamily=negbinomial.size(),featureData=fd) 408 | expressed_genes <- row.names(subset(fData(data),num_cells_expressed >= min_gene_expressed)) 409 | valid_cells <- row.names(subset(pData(data),num_genes_expressed >= min_valid_cells)) 410 | data <- data[expressed_genes,valid_cells] 411 | data <- estimateSizeFactors(data) 412 | data<- estimateDispersions(data) 413 | gene<-rownames(counts) 414 | if(is.null(batch)){ 415 | diff_test_res <- differentialGeneTest(data,fullModelFormulaStr = "~compare_group") 416 | res<-diff_test_res[,c('pval','qval')] 417 | colnames(res)<-c('p.value','q.value') 418 | norm_data<- exprs(data)/pData(data)[,'Size_Factor'] 419 | logFC<-log(rowSums(norm_data[,data$compare_group==contrast[1]])/rowSums(norm_data[,data$compare_group==contrast[2]]),2) 420 | res<-data.frame(logFC,res,stringsAsFactors = FALSE) 421 | }else{ 422 | data <- reduceDimension(data,residualModelFormulaStr = "~batch", 423 | verbose = TRUE) 424 | diff_test_res <- differentialGeneTest(data,fullModelFormulaStr = "~compare_group") 425 | res<-diff_test_res[,c('pval','qval')] 426 | colnames(res)<-c('p.value','q.value') 427 | norm_data<- exprs(data)/pData(data)[,'Size_Factor'] 428 | logFC<-log(rowSums(norm_data[,data$compare_group==contrast[1]])/rowSums(norm_data[,data$compare_group==contrast[2]]),2) 429 | res<-data.frame(logFC,res,stringsAsFactors = FALSE) 430 | } 431 | res<-data.frame(gene,res,stringsAsFactors = FALSE) 432 | return(res) 433 | } 434 | 435 | #' Differential expression using edgeR 436 | #' 437 | #' Identifies differentially expressed genes between two groups of cells using 438 | #' edgeR 439 | #' 440 | #' @references McCarthy, J. D, Chen, Yunshun, Smyth, K. G (2012). “Differential 441 | #' expression analysis of multifactor RNA-Seq experiments with respect to 442 | #' biological variation.” Nucleic Acids Research, 40(10), 4288-4297. 443 | #' @references Robinson MD, McCarthy DJ, Smyth GK (2010). “edgeR: a Bioconductor 444 | #' package for differential expression analysis of digital gene expression data.” 445 | #' Bioinformatics, 26(1), 139-140. 446 | #' https://github.com/cole-trapnell-lab/monocle-release 447 | #' @param sub_data Count data removed cell_type and selected certain two 448 | #' compare_group 449 | #' @param min_gene_expressed Genes expressed in minimum number of cells 450 | #' @param min_valid_cells Minimum number of genes detected in the cell 451 | #' @param contrast String vector specifying the contrast to be 452 | #' tested against the log2-fold-change threshold 453 | #' @param calcNormMethod normalization method to be used 454 | #' @param trend.method method for estimating dispersion trend. Possible values 455 | #' are "none", "movingave", "loess" and "locfit" (default). 456 | #' @param tagwise logical, should the tagwise dispersions be estimated 457 | #' @param robust logical, should the estimation of prior.df be robustified 458 | #' against outliers 459 | #' @import edgeR 460 | #' @return A matrix of differentially expressed genes and related statistics. 461 | #' 462 | #' @details 463 | #' This test does not support pre-processed genes. To use this method, please 464 | #' install edgeR, using the instructions at 465 | #' http://bioconductor.org/packages/release/bioc/html/edgeR.html 466 | #' 467 | #' @importFrom utils installed.packages 468 | #' 469 | #' @export 470 | edgeRTest<-function(sub_data,min_gene_expressed,min_valid_cells,contrast=unique(sub_data$compare_group), 471 | calcNormMethod='TMM',trend.method='locfit',tagwise=TRUE,robust=FALSE){ 472 | counts<-t(subset(sub_data,select=-compare_group)) 473 | counts<-apply(counts,2,function(x) {storage.mode(x) <- 'numeric'; x}) 474 | expressed_genes <- rownames(subset(counts,rowSums(counts) >= min_gene_expressed)) 475 | valid_cells<-(colSums(counts)>=min_valid_cells) 476 | counts<-counts[expressed_genes,valid_cells] 477 | groups<-as.factor(sub_data[valid_cells,'compare_group']) 478 | dgList <- DGEList(counts=counts, genes=rownames(counts),group=groups) 479 | dgList <- calcNormFactors(dgList, method=calcNormMethod) 480 | designMat <- model.matrix(~groups) 481 | dgList <- estimateDisp(dgList, design=designMat,trend.method=trend.method,tagwise=tagwise,robust=robust) 482 | et <- exactTest(dgList,pair=contrast) 483 | res<-et$table[,c('logFC','PValue')] 484 | colnames(res)<-c('logFC','p.value') 485 | res <- res %>% mutate(q.value=p.adjust(res$p.value, method = "BH")) 486 | gene<-rownames(counts) 487 | res<-data.frame(gene,res,stringsAsFactors = FALSE) 488 | return(res) 489 | } 490 | 491 | #' Differential expression using DEsingle 492 | #' 493 | #' Identifies differentially expressed genes between two groups of cells using 494 | #' DEsingle 495 | #' 496 | #' @references Zhun Miao, Ke Deng, Xiaowo Wang, Xuegong Zhang (2018). DEsingle 497 | #' for detecting three types of differential expression in single-cell RNA-seq 498 | #' data. Bioinformatics, bty332. 10.1093/bioinformatics/bty332. 499 | #' 500 | #' @param sub_data Count data removed cell_type and selected certain two 501 | #' compare_group 502 | #' @param min_gene_expressed Genes expressed in minimum number of cells 503 | #' @param min_valid_cells Minimum number of genes detected in the cell 504 | #' @param contrast String vector specifying the contrast to be 505 | #' tested against the log2-fold-change threshold 506 | #' @param parallel If FALSE (default), no parallel computation is used; 507 | #' if TRUE, parallel computation using \code{BiocParallel}, with argument 508 | #' \code{BPPARAM}. 509 | #' @param BPPARAM An optional parameter object passed internally to 510 | #' \code{\link{bplapply}} when \code{parallel=TRUE}. If not specified, 511 | #' \code{\link{bpparam}()} (default) will be used. 512 | #' @import DEsingle 513 | #' @return A matrix of differentially expressed genes and related statistics. 514 | #' 515 | #' @details 516 | #' This test does not support pre-processed genes. To use this method, please 517 | #' install DEsingle, using the instructions at 518 | #' https://github.com/miaozhun/DEsingle 519 | #' 520 | #' @importFrom utils installed.packages 521 | #' 522 | #' @export 523 | DESingleTest<-function(sub_data,min_gene_expressed,min_valid_cells,contrast=unique(sub_data$compare_group), 524 | parallel=FALSE,BPPARAM=bpparam()){ 525 | counts<-t(subset(sub_data,select=-compare_group)) 526 | counts<-apply(counts,2,function(x) {storage.mode(x) <- 'numeric'; x}) 527 | expressed_genes <- rownames(subset(counts,rowSums(counts) >= min_gene_expressed)) 528 | valid_cells<-(colSums(counts)>=min_valid_cells) 529 | counts<-counts[expressed_genes,valid_cells] 530 | groups<-as.factor(sub_data[valid_cells,'compare_group']) 531 | results <- DEsingle(counts = counts, group = groups,parallel=parallel,BPPARAM=BPPARAM) 532 | res<-results[,c('foldChange','pvalue','pvalue.adj.FDR')] 533 | colnames(res)<-c('foldChange','p.value','q.value') 534 | res<-res %>% mutate(logFC=log(foldChange,2)) %>% select(-foldChange) 535 | if(all(levels(groups)!=contrast)){ 536 | res$logFC<--res$logFC 537 | } 538 | gene<-rownames(counts) 539 | res<-data.frame(gene,res,stringsAsFactors = FALSE) 540 | return(res) 541 | } 542 | 543 | #' Differential expression using MAST 544 | #' 545 | #' Identifies differentially expressed genes between two groups of cells using 546 | #' MAST 547 | #' 548 | #' @references MAST: a flexible statistical framework for assessing transcriptional 549 | #' changes and characterizing heterogeneity in single-cell RNA sequencing 550 | #' data G Finak, A McDavid, M Yajima, J Deng, V Gersuk, AK Shalek, CK Slichter 551 | #' et al Genome biology 16 (1), 278 552 | #' 553 | #' @param sub_data Count data removed cell_type and selected certain two 554 | #' compare_group 555 | #' @param min_gene_expressed Genes expressed in minimum number of cells 556 | #' @param min_valid_cells Minimum number of genes detected in the cell 557 | #' @param contrast String vector specifying the contrast to be 558 | #' tested against the log2-fold-change threshold 559 | #' @param method Character vector, either ’glm’, ’glmer’ or ’bayesglm’ 560 | #' @param Silence Common problems with fitting some genes 561 | #' @param check_logged Set FALSE to override sanity checks that try to 562 | #' ensure that the default assay is log-transformed and has at least one 563 | #' exact zero 564 | #' @import MAST 565 | #' @return A matrix of differentially expressed genes and related statistics. 566 | #' 567 | #' @details 568 | #' To use this method, please install MAST, using the instructions at 569 | #' https://github.com/RGLab/MAST 570 | #' 571 | #' @importFrom utils installed.packages 572 | #' 573 | #' @export 574 | MASTTest<-function(sub_data,min_gene_expressed,min_valid_cells,contrast=unique(sub_data$compare_group), 575 | method='glm',silent=FALSE,check_logged=TRUE){ 576 | counts<-t(subset(sub_data,select=-compare_group)) 577 | counts<-apply(counts,2,function(x) {storage.mode(x) <- 'numeric'; x}) 578 | expressed_genes <- rownames(subset(counts,rowSums(counts) >= min_gene_expressed)) 579 | valid_cells<-(colSums(counts)>=min_valid_cells) 580 | groups<-as.factor(sub_data[valid_cells,'compare_group']) 581 | counts<-counts[expressed_genes,valid_cells] 582 | counts<-log(counts+1,2) 583 | cdat<-as.data.frame(data.frame(rownames(sub_data[valid_cells,]),sub_data[valid_cells,'compare_group']),stringsAsFactors = FALSE) 584 | rownames(cdat)<-rownames(sub_data) 585 | colnames(cdat)<-c('wellKey','compare_group') 586 | fdat<-as.data.frame(rownames(counts),stringsAsFactors = FALSE) 587 | colnames(fdat)<-'primerid' 588 | sca <- FromMatrix(counts, cdat, fdat,check_logged=check_logged) 589 | cond<-as.factor(cdat$compare_group) 590 | zlmCond <- zlm(~compare_group, sca, method=method, silent=silent) 591 | summaryCond <- summary(zlmCond, logFC=TRUE, doLRT=TRUE) 592 | summaryDt <- summaryCond$datatable 593 | #fcHurdle <- merge(summaryDt[component=='H',.(primerid, `Pr(>Chisq)`)], #hurdle P values 594 | # summaryDt[component=='logFC', .(primerid, coef)], by='primerid') #logFC coefficients 595 | p.val<-data.frame(summaryDt[summaryDt$component=='H',4],summaryDt[summaryDt$component=='H',1]) 596 | colnames(p.val)<-c('p.value','primerid') 597 | log.FC<-data.frame(summaryDt[summaryDt$component=='logFC',7],summaryDt[summaryDt$component=='logFC',1]) 598 | colnames(log.FC)<-c('logFC','primerid') 599 | res<-log.FC %>% inner_join(p.val,by=c('primerid'='primerid')) 600 | res<-res %>% dplyr::mutate(q.value=p.adjust(res$p.value, 'fdr')) %>% tibble::column_to_rownames('primerid') 601 | #res<-data.frame(fcHurdle[,c('coef','Pr(>Chisq)','fdr')],stringsAsFactors = FALSE) 602 | #rownames(res)<-fcHurdle$primerid 603 | if(all(levels(groups)!=contrast)){ 604 | res$logFC<- -res$logFC 605 | } 606 | gene<-rownames(counts) 607 | res<-data.frame(gene,res,stringsAsFactors = FALSE) 608 | return(res) 609 | } 610 | -------------------------------------------------------------------------------- /R/FindLR.R: -------------------------------------------------------------------------------- 1 | #' Finding ligand-receptor pairs 2 | #' 3 | #' This function loads the highly expressed genes or differentail expressed 4 | #' genes as a dataframe. Significant interactions are found through mapping 5 | #' these genes to our ligand-receptor database. 6 | #' 7 | #' @param data_1 Data used to find the ligand-receptor pairs 8 | #' @param data_2 Second dataset used to find ligand-receptor pairs. If set NULL, 9 | #' paris will be found within data_1. Otherwise, pairs will be found between 10 | #' data_1 and data_2. Default is NULL. 11 | #' @param datatype Type of data used as input. Options are "mean count" 12 | #' and "DEG" 13 | #' @param comm_type Communication type. Available options are "cytokine", 14 | #' "checkpoint", "growth factor", "other" 15 | #' @param database Database used to find ligand-receptor pairs. If set NULL, 16 | #' the build-in database will be used. 17 | #' @import dplyr 18 | #' @references Cytokines, Inflammation and Pain. Zhang et al,2007. 19 | #' @references Cytokines, Chemokines and Their Receptors. Cameron et al, 2000-2013 20 | #' @references Robust prediction of response to immune checkpoint blockade therapy 21 | #' in metastatic melanoma. Auslander et al, 2018. 22 | #' @references A draft network of ligand-receptor-mediated multicellular signalling 23 | #' in human, Jordan A. Ramilowski, Nature Communications, 2015 24 | #' @return A dataframe of the significant interactions 25 | #' @export 26 | FindLR<-function(data_1,data_2=NULL,datatype,comm_type,database=NULL){ 27 | if(is.null(database)){ 28 | database<-iTALK:::database 29 | } 30 | database<-database[database$Classification==comm_type,] 31 | if(datatype=='mean count'){ 32 | gene_list_1<-data_1 33 | if(is.null(data_2)){ 34 | gene_list_2<-gene_list_1 35 | }else{ 36 | gene_list_2<-data_2 37 | } 38 | ligand_ind<-which(database$Ligand.ApprovedSymbol %in% gene_list_1$gene) 39 | receptor_ind<-which(database$Receptor.ApprovedSymbol %in% gene_list_2$gene) 40 | ind<-intersect(ligand_ind,receptor_ind) 41 | FilterTable_1<-database[ind,c('Ligand.ApprovedSymbol','Receptor.ApprovedSymbol')] %>% 42 | left_join(gene_list_1[,c('gene','exprs','cell_type')],by=c('Ligand.ApprovedSymbol'='gene')) %>% 43 | dplyr::rename(cell_from_mean_exprs=exprs,cell_from=cell_type) %>% 44 | left_join(gene_list_2[,c('gene','exprs','cell_type')],by=c('Receptor.ApprovedSymbol'='gene')) %>% 45 | dplyr::rename(cell_to_mean_exprs=exprs,cell_to=cell_type) 46 | ligand_ind<-which(database$Ligand.ApprovedSymbol %in% gene_list_2$gene) 47 | receptor_ind<-which(database$Receptor.ApprovedSymbol %in% gene_list_1$gene) 48 | ind<-intersect(ligand_ind,receptor_ind) 49 | FilterTable_2<-database[ind,c('Ligand.ApprovedSymbol','Receptor.ApprovedSymbol')] %>% 50 | left_join(gene_list_2[,c('gene','exprs','cell_type')],by=c('Ligand.ApprovedSymbol'='gene')) %>% 51 | dplyr::rename(cell_from_mean_exprs=exprs,cell_from=cell_type) %>% 52 | left_join(gene_list_1[,c('gene','exprs','cell_type')],by=c('Receptor.ApprovedSymbol'='gene')) %>% 53 | dplyr::rename(cell_to_mean_exprs=exprs,cell_to=cell_type) 54 | FilterTable<-rbind(FilterTable_1,FilterTable_2) 55 | }else if(datatype=='DEG'){ 56 | gene_list_1<-data_1 57 | if(is.null(data_2)){ 58 | gene_list_2<-gene_list_1 59 | }else{ 60 | gene_list_2<-data_2 61 | } 62 | ligand_ind<-which(database$Ligand.ApprovedSymbol %in% gene_list_1$gene) 63 | receptor_ind<-which(database$Receptor.ApprovedSymbol %in% gene_list_2$gene) 64 | ind<-intersect(ligand_ind,receptor_ind) 65 | FilterTable_1<-database[ind,c('Ligand.ApprovedSymbol','Receptor.ApprovedSymbol')] %>% 66 | left_join(gene_list_1[,c('gene','logFC','q.value','cell_type')],by=c('Ligand.ApprovedSymbol'='gene')) %>% 67 | dplyr::rename(cell_from_logFC=logFC,cell_from_q.value=q.value,cell_from=cell_type) %>% 68 | left_join(gene_list_2[,c('gene','logFC','q.value','cell_type')],by=c('Receptor.ApprovedSymbol'='gene')) %>% 69 | dplyr::rename(cell_to_logFC=logFC,cell_to_q.value=q.value,cell_to=cell_type) 70 | ligand_ind<-which(database$Ligand.ApprovedSymbol %in% gene_list_2$gene) 71 | receptor_ind<-which(database$Receptor.ApprovedSymbol %in% gene_list_1$gene) 72 | ind<-intersect(ligand_ind,receptor_ind) 73 | FilterTable_2<-database[ind,c('Ligand.ApprovedSymbol','Receptor.ApprovedSymbol')] %>% 74 | left_join(gene_list_2[,c('gene','logFC','q.value','cell_type')],by=c('Ligand.ApprovedSymbol'='gene')) %>% 75 | dplyr::rename(cell_from_logFC=logFC,cell_from_q.value=q.value,cell_from=cell_type) %>% 76 | left_join(gene_list_1[,c('gene','logFC','q.value','cell_type')],by=c('Receptor.ApprovedSymbol'='gene')) %>% 77 | dplyr::rename(cell_to_logFC=logFC,cell_to_q.value=q.value,cell_to=cell_type) 78 | FilterTable<-rbind(FilterTable_1,FilterTable_2) 79 | }else{ 80 | stop('Error: invalid data type') 81 | } 82 | 83 | FilterTable<-FilterTable[!duplicated(FilterTable),] 84 | res<-as.data.frame(FilterTable) %>% dplyr::rename(ligand=Ligand.ApprovedSymbol,receptor=Receptor.ApprovedSymbol) 85 | if(datatype=='DEG'){ 86 | res<-res[!(res$cell_from_logFC==0.0001 & res$cell_to_logFC==0.0001),] 87 | } 88 | res<-res %>% mutate(comm_type=comm_type) 89 | return(res) 90 | } 91 | -------------------------------------------------------------------------------- /R/LRPlot.R: -------------------------------------------------------------------------------- 1 | #' Plotting ligand-receptor pairs 2 | #' 3 | #' This function loads the significant interactions as a dataframe. A circle 4 | #' plot will be generated using package circlize. The width of the arrow 5 | #' represents the expression level/log fold change of the ligand; while the 6 | #' width of arrow head represents the expression level/log fold change of the 7 | #' receptor. Different color and the type of the arrow stands for whether 8 | #' the ligand and/or receptor are upregulated or downregulated. Users can select 9 | #' the colors represent the cell type by their own or chosen randomly by default. 10 | #' 11 | #' @references Gu, Z. (2014) circlize implements and enhances circular 12 | #' visualization in R. Bioinformatics. 13 | #' @param data A dataframe contains significant ligand-receptor pairs and related 14 | #' information such as expression level/log fold change and cell type 15 | #' @param datatype Type of data. Options are "mean count" and "DEG" 16 | #' @param gene_col Colors used to represent different categories of genes. 17 | #' @param transparency Transparency of link colors, 0 means no transparency and 18 | #' 1 means full transparency. If transparency is already set in col or row.col 19 | #' or column.col, this argument will be ignored. NAalso ignores this argument. 20 | #' @param link.arr.lwd line width of the single line link which is put in the 21 | #' center of the belt. 22 | #' @param link.arr.lty line type of the single line link which is put in the 23 | #' center of the belt. 24 | #' @param link.arr.col color or the single line link which is put in the center 25 | #' of the belt. 26 | #' @param link.arr.width size of the single arrow head link which is put in the 27 | #' center of the belt. 28 | #' @param link.arr.type Type of the arrows, pass to Arrowhead. Default value is 29 | #' triangle. There is an additional option big.arrow 30 | #' @param facing Facing of text. 31 | #' @param cell_col Colors used to represent types of cells. If set NULL, it 32 | #' will be generated randomly 33 | #' @param print.cell Whether or not print the type of cells on the outer layer 34 | #' of the graph. 35 | #' @param track.height_1 height of the cell notation track 36 | #' @param track.height_2 height of the gene notation track 37 | #' @param annotation.height_1 Track height corresponding to values in annotationTrack. 38 | #' @param annotation.height_2 Track height corresponding to values in annotationTrack. 39 | #' @param text.vjust adjustment on ’vertical’ (radical) direction. Besides to set it 40 | #' as numeric values, the value can also be a string contain absoute unit, e.g. 41 | #' "2.1mm", "-1 inche", but only "mm", "cm", "inches"/"inche" are allowed. 42 | #' @import randomcoloR 43 | #' @import graphics 44 | #' @import circlize 45 | #' 46 | #' @return A figure of the significant interactions 47 | #' @export 48 | LRPlot<-function(data,datatype,gene_col=NULL,transparency=0.5,link.arr.lwd=1,link.arr.lty=NULL,link.arr.col=NULL,link.arr.width=NULL, 49 | link.arr.type=NULL,facing='clockwise',cell_col=NULL,print.cell=TRUE,track.height_1=uh(2,'mm'),track.height_2=uh(12,'mm'), 50 | annotation.height_1=0.01,annotation.height_2=0.01,text.vjust = '0.4cm',...){ 51 | cell_group<-unique(c(data$cell_from,data$cell_to)) 52 | genes<-c(structure(data$ligand,names=data$cell_from),structure(data$receptor,names=data$cell_to)) 53 | genes<-genes[!duplicated(paste(names(genes),genes))] 54 | genes<-genes[order(names(genes))] 55 | if(is.null(link.arr.lty)){ 56 | if(datatype=='mean count'){ 57 | link.arr.lty='solid' 58 | }else if(datatype=='DEG'){ 59 | link.arr.lty=structure(ifelse(data$cell_from_logFC==0.0001,'dashed','solid'),names=paste(data$cell_from,data$receptor)) 60 | }else{ 61 | print('invalid datatype') 62 | } 63 | } 64 | if(is.null(link.arr.col)){ 65 | if(datatype=='mean count'){ 66 | data<-data %>% mutate(link_col='black') 67 | }else if(datatype=='DEG'){ 68 | data<-data %>% mutate(link_col=ifelse(cell_from_logFC==0.0001,ifelse(cell_to_logFC>0,'#d73027','#00ccff'), 69 | ifelse(cell_to_logFC==0.0001,ifelse(cell_from_logFC>0,'#d73027','#00ccff'), 70 | ifelse(cell_from_logFC>0,ifelse(cell_to_logFC>0,'#d73027','#dfc27d'), 71 | ifelse(cell_to_logFC>0,'#9933ff','#00ccff'))))) 72 | }else{ 73 | print('invalid datatype') 74 | } 75 | }else{ 76 | data$link_col=link.arr.col 77 | } 78 | if(is.null(link.arr.type)){ 79 | if(datatype=='mean count'){ 80 | link.arr.type='triangle' 81 | }else if(datatype=='DEG'){ 82 | link.arr.type=structure(ifelse(data$cell_to_logFC==0.0001,'ellipse','triangle'),names=paste(data$cell_from,data$receptor)) 83 | }else{ 84 | print('invalid datatype') 85 | } 86 | } 87 | if(is.null(gene_col)){ 88 | comm_col<-structure(c('#99ff99','#99ccff','#ff9999','#ffcc99'),names=c('other','cytokine','checkpoint','growth factor')) 89 | gene_col<-structure(c(comm_col[data$comm_type],rep('#073c53',length(data$receptor))),names=c(data$ligand,data$receptor)) 90 | } 91 | if(is.null(cell_col)){ 92 | cell_col<-structure(randomColor(count=length(unique(names(genes))),luminosity='dark'),names=unique(names(genes))) 93 | } 94 | if(is.null(link.arr.lwd)){ 95 | data<-data %>% mutate(arr_width=1) 96 | }else if(max(abs(link.arr.lwd))-min(abs(link.arr.lwd))==0 && all(link.arr.lwd!=0.0001)){ 97 | data<-data %>% mutate(arr_width=ifelse(abs(link.arr.lwd<5),abs(link.arr.lwd),5)) 98 | }else{ 99 | data<-data %>% mutate(arr_width=ifelse(link.arr.lwd==0.0001,2,1+5/(max(abs(link.arr.lwd))-min(abs(link.arr.lwd)))*(abs(link.arr.lwd)-min(abs(link.arr.lwd))))) 100 | } 101 | if(length(cell_group)!=1){ 102 | gap.degree <- do.call("c", lapply(table(names(genes)), function(i) c(rep(1, i-1), 8))) 103 | }else{ 104 | gap.degree <- do.call("c", lapply(table(names(genes)), function(i) c(rep(1, i)))) 105 | } 106 | circos.par(gap.degree = gap.degree) 107 | if(length(gene_col)==1){ 108 | grid.col=gene_col 109 | }else{ 110 | grid.col=gene_col[genes] 111 | names(grid.col)<-paste(names(genes),genes) 112 | } 113 | if(is.null(link.arr.width)){ 114 | data<-data %>% mutate(link.arr.width=data$arr_width/10) 115 | }else if(max(abs(link.arr.width))-min(abs(link.arr.width))==0 && all(link.arr.width!=0.0001)){ 116 | data<-data %>% mutate(link.arr.width=ifelse(abs(link.arr.width)<0.5,abs(link.arr.width),0.5)) 117 | }else{ 118 | data<-data %>% mutate(link.arr.width=ifelse(link.arr.width==0.0001,0.2,(1+5/(max(abs(link.arr.width))-min(abs(link.arr.width)))*(abs(link.arr.width)-min(abs(link.arr.width))))/10)) 119 | } 120 | chordDiagram(as.data.frame(cbind(paste(data$cell_from,data$ligand),paste(data$cell_to,data$receptor))), order=paste(names(genes),genes), 121 | grid.col=grid.col,transparency=transparency,directional=1,direction.type='arrows',link.arr.lwd=data$arr_width,link.arr.lty=link.arr.lty, 122 | link.arr.type=link.arr.type,link.arr.width=data$link.arr.width,link.arr.col=data$link_col,col='#00000000',annotationTrack=c('grid'),preAllocateTracks = list( 123 | list(track.height = track.height_1),list(track.height = track.height_2)),annotationTrackHeight = c(annotation.height_1,annotation.height_2),...) 124 | 125 | circos.trackPlotRegion(track.index = 2, panel.fun = function(x, y) { 126 | xlim = get.cell.meta.data("xlim") 127 | ylim = get.cell.meta.data("ylim") 128 | sector.index = genes[get.cell.meta.data("sector.numeric.index")] 129 | circos.text(mean(xlim),mean(ylim),sector.index, col = "black", cex = 0.7, facing = facing, niceFacing = TRUE) 130 | }, bg.border = 0) 131 | 132 | if(print.cell){ 133 | for(c in unique(names(genes))) { 134 | gene = as.character(genes[names(genes) == c]) 135 | highlight.sector(sector.index = paste(c,gene), track.index = 1, col = ifelse(length(cell_col)==1,cell_col,cell_col[c]), text = c, text.vjust = text.vjust, niceFacing = TRUE,lwd=1) 136 | } 137 | } 138 | circos.clear() 139 | } 140 | -------------------------------------------------------------------------------- /R/NetView.R: -------------------------------------------------------------------------------- 1 | #' Network Viewing of cell-cell communication 2 | #' 3 | #' This function loads the significant interactions as a dataframe, and colors 4 | #' represent different types of cells as a structure. The width of edges represent 5 | #' the strength of the communication. Labels on the edges show exactly how many 6 | #' interactions exist between two types of cells. 7 | #' 8 | #' @references Csardi G, Nepusz T: The igraph software package for complex network 9 | #' research, InterJournal, Complex Systems 1695. 2006. 10 | #' http://igraph.org 11 | #' @param data A dataframe containing ligand-receptor pairs and corresponding 12 | #' cell typesused to do the plotting 13 | #' @param col Colors used to represent different cell types 14 | #' @param label Whether or not shows the label of edges (number of connections 15 | #' between different cell types) 16 | #' @param edge.curved Specifies whether to draw curved edges, or not. 17 | #' This can be a logical or a numeric vector or scalar. 18 | #' First the vector is replicated to have the same length as the number of 19 | #' edges in the graph. Then it is interpreted for each edge separately. 20 | #' A numeric value specifies the curvature of the edge; zero curvature means 21 | #' straight edges, negative values means the edge bends clockwise, positive 22 | #' values the opposite. TRUE means curvature 0.5, FALSE means curvature zero 23 | #' @param shape The shape of the vertex, currently “circle”, “square”, 24 | #' “csquare”, “rectangle”, “crectangle”, “vrectangle”, “pie” (see 25 | #' vertex.shape.pie), ‘sphere’, and “none” are supported, and only by the 26 | #' plot.igraph command. “none” does not draw the vertices at all, although 27 | #' vertex label are plotted (if given). See shapes for details about vertex 28 | #' shapes and vertex.shape.pie for using pie charts as vertices. 29 | #' @param layout The layout specification. It must be a call to a layout 30 | #' specification function. 31 | #' @param vertex.size The size of vertex 32 | #' @param margin The amount of empty space below, over, at the left and right 33 | #' of the plot, it is a numeric vector of length four. Usually values between 34 | #' 0 and 0.5 are meaningful, but negative values are also possible, that will 35 | #' make the plot zoom in to a part of the graph. If it is shorter than four 36 | #' then it is recycled. 37 | #' @param vertex.label.cex The label size of vertex 38 | #' @param vertex.label.color The color of label for vertex 39 | #' @param arrow.width The width of arrows 40 | #' @param edge.label.color The color for single arrow 41 | #' @param edge.label.cex The size of label for arrows 42 | #' @param edge.max.width The maximum arrow size 43 | #' @import network 44 | #' @import igraph 45 | #' @return A network graph of the significant interactions 46 | #' @export 47 | NetView<-function(data,col,label=TRUE,edge.curved=0.5,shape='circle',layout=nicely(),vertex.size=20,margin=0.2, 48 | vertex.label.cex=1.5,vertex.label.color='black',arrow.width=1.5,edge.label.color='black',edge.label.cex=1,edge.max.width=10){ 49 | net<-data %>% group_by(cell_from,cell_to) %>% dplyr::summarize(n=n()) 50 | net<-as.data.frame(net,stringsAsFactors=FALSE) 51 | g<-graph.data.frame(net,directed=TRUE) 52 | edge.start <- ends(g, es=E(g), names=FALSE) 53 | coords<-layout_(g,layout) 54 | if(nrow(coords)!=1){ 55 | coords_scale=scale(coords) 56 | }else{ 57 | coords_scale<-coords 58 | } 59 | loop.angle<-ifelse(coords_scale[V(g),1]>0,-atan(coords_scale[V(g),2]/coords_scale[V(g),1]),pi-atan(coords_scale[V(g),2]/coords_scale[V(g),1])) 60 | V(g)$size<-vertex.size 61 | V(g)$color<-col[V(g)] 62 | V(g)$label.color<-vertex.label.color 63 | V(g)$label.cex<-vertex.label.cex 64 | if(label){ 65 | E(g)$label<-E(g)$n 66 | } 67 | if(max(E(g)$n)==min(E(g)$n)){ 68 | E(g)$width<-2 69 | }else{ 70 | E(g)$width<-1+edge.max.width/(max(E(g)$n)-min(E(g)$n))*(E(g)$n-min(E(g)$n)) 71 | } 72 | E(g)$arrow.width<-arrow.width 73 | E(g)$label.color<-edge.label.color 74 | E(g)$label.cex<-edge.label.cex 75 | E(g)$color<-V(g)$color[edge.start[,1]] 76 | if(sum(edge.start[,2]==edge.start[,1])!=0){ 77 | E(g)$loop.angle[which(edge.start[,2]==edge.start[,1])]<-loop.angle[edge.start[which(edge.start[,2]==edge.start[,1]),1]] 78 | } 79 | plot(g,edge.curved=edge.curved,vertex.shape=shape,layout=coords_scale,margin=margin) 80 | return(g) 81 | } 82 | -------------------------------------------------------------------------------- /R/TimePlot.R: -------------------------------------------------------------------------------- 1 | #' Plotting ligand-receptor pairs 2 | #' 3 | #' This function loads count data as dataframe, ligand, receptor and two interactive 4 | #' cells' names as strings. The plot shows the expression level of ligand and 5 | #' receptor at different time, thus illustrates a dynamic change of a ligand-receptor 6 | #' pairs. 7 | #' 8 | #' @param data A dataframe contains significant ligand-receptor pairs and related 9 | #' information such as expression level/log fold change and cell type 10 | #' @param ligand String as selected ligand 11 | #' @param receptor String as selected receptor 12 | #' @param cell_from The cell type ligand gene belongs to 13 | #' @param cell_to The cell type receptor gene belongs to 14 | #' @param Time Different time points showing on the plot 15 | #' @import tidyr 16 | #' @import ggplot2 17 | #' @return A figure of the paired interactions 18 | #' @export 19 | TimePlot<-function(data,ligand,receptor,cell_from,cell_to,Time=NULL){ 20 | if(is.null(Time)){ 21 | Time=unique(data$time) 22 | } 23 | data<-data %>% filter(time %in% Time) %>% select(ligand,receptor,time,cell_type) 24 | data_long <- gather(data, gene, value, c(ligand,receptor), factor_key=TRUE) %>% 25 | filter((cell_type==cell_from & gene==ligand) | (cell_type==cell_to & gene==receptor)) 26 | data_long$time<-as.factor(data_long$time) 27 | g<-ggplot(data_long,aes(x=time,y=value,color=gene))+ 28 | geom_point(position=position_dodge(0.75)) + 29 | stat_summary(fun.y=mean, aes(ymin=..y.., ymax=..y..), geom='errorbar', width=0.5,position=position_dodge(0.75)) + 30 | stat_summary(fun.ymin=function(x)(mean(x)-sd(x)), fun.ymax=function(x)(mean(x)+sd(x)),geom="errorbar", width=0.1,position=position_dodge(0.75)) + 31 | theme_minimal()+theme(axis.line=element_line(),plot.title=element_text(size=14,face='bold',hjust=0.5))+ylab('gene expression')+xlab('time')+ggtitle(paste0(ligand,'-',receptor)) 32 | g 33 | return(g) 34 | } 35 | -------------------------------------------------------------------------------- /R/rawParse.R: -------------------------------------------------------------------------------- 1 | #' Parsing the data to get top expressed genes 2 | #' 3 | #' This function loads the count data as a dataframe. It assumes that each line 4 | #' contains gene expression profile of one single cell, and each column 5 | #' contains the one single gene expression profile in different cells. The dataframe 6 | #' should also contain the cell type information with column name 'cell_type'. 7 | #' Group information should also be included as 'compare_group' if users want 8 | #' to call differntial expressed ligand-receptor pairs. Batch information as 9 | #' 'batch' is optional. If included, users may want to use the raw count data 10 | #' for later analysis. 11 | #' 12 | #' @param data Input data, raw or normalized count with 'cell_type' column 13 | #' @param top_genes (scale 1 to 100) Top percent highly expressed genes used 14 | #' to find ligand-receptor pairs, default is 50 15 | #' @param stats Whether calculates the mean or the median of the data. Available 16 | #' options are 'mean' and 'median'. 17 | #' @importFrom progress progress_bar 18 | #' @return A dataframe of the data 19 | #' @export 20 | rawParse<-function(data,top_genes=50,stats='mean'){ 21 | res=NULL 22 | cell_group<-unique(data$cell_type) 23 | pb <- progress::progress_bar$new(total = length(cell_group)) 24 | pb$tick(0) 25 | for(i in cell_group){ 26 | sub_data<-data[data$cell_type==i,] 27 | counts<-t(subset(sub_data,select=-cell_type)) 28 | counts<-apply(counts,2,function(x) {storage.mode(x) <- 'numeric'; x}) 29 | if(stats=='mean'){ 30 | temp<-data.frame(rowMeans(counts),i,stringsAsFactors = FALSE) 31 | }else if(stats=='median'){ 32 | temp<-data.frame(apply(counts, 1, FUN = median),i,stringsAsFactors = FALSE) 33 | }else{ 34 | print('error stats option') 35 | } 36 | temp<-temp[order(temp[,1],decreasing=TRUE),] 37 | temp<-temp[1:ceiling(nrow(temp)*top_genes/100),] 38 | temp<-temp %>% tibble::rownames_to_column() 39 | res<-rbind(res,temp) 40 | pb$tick() 41 | } 42 | colnames(res)<-c('gene','exprs','cell_type') 43 | return(res) 44 | } 45 | -------------------------------------------------------------------------------- /R/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Coolgenome/iTALK/6d9b3907f00004fcac9514d2726ad68d524a952b/R/sysdata.rda -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iTALK 2 | `iTALK` is an R toolkit for characterizing and illustrating intercellular communication, developed and maintained by [Linghua Wang lab](https://www.mdanderson.org/research/departments-labs-institutes/labs/linghua-wang-laboratory.html) at the University of Texas MD Anderson Cancer Center. `iTALK` can be used to visualize the complexity, diversity and dynamics of cell-cell communication in a wide range of biological processes. For more information, please refer to [our manuscript](https://www.biorxiv.org/content/early/2019/01/04/507871). 3 | 4 | # Installation 5 | To install the developmental version from GitHub: 6 | 7 | ```R 8 | if(!require(devtools)) install.packages("devtools"); 9 | devtools::install_github("Coolgenome/iTALK", build_vignettes = TRUE) 10 | ``` 11 | To load the installed `iTALK` in R: 12 | ```R 13 | library(iTALK) 14 | ``` 15 | # Citation 16 | This package is intended for research use only. For any bugs, enhancement requests and other issues, please use the [`iTALK` GitHub issues tracker](https://github.com/Coolgenome/iTALk/issues) or email [Yuanxin Wang](mailto:ywang65@mdanderson.org). If you find iTALK useful and use iTALK in your publication, please cite the paper: [iTALK: an R Package to Characterize and Illustrate Intercellular Communication](https://www.biorxiv.org/content/early/2019/01/04/507871) 17 | -------------------------------------------------------------------------------- /data/LR_database.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Coolgenome/iTALK/6d9b3907f00004fcac9514d2726ad68d524a952b/data/LR_database.rda -------------------------------------------------------------------------------- /example/example_code.r: -------------------------------------------------------------------------------- 1 | 2 | # This example data is from 10x pbmc dataset. Samples are randomly selected from each cell type. And groups are randomly assigned to each sample to make the illustration. 3 | 4 | library(iTALK) 5 | 6 | # read the data 7 | data<-read.table('example_data.txt',sep='\t',header=T,stringsAsFactors = F) 8 | 9 | ## highly expressed ligand-receptor pairs 10 | 11 | # find top 50 percent highly expressed genes 12 | highly_exprs_genes<-rawParse(data,top_genes=50,stats='mean') 13 | # find the ligand-receptor pairs from highly expressed genes 14 | comm_list<-c('growth factor','other','cytokine','checkpoint') 15 | cell_col<-structure(c('#4a84ad','#4a1dc6','#e874bf','#b79eed', '#ff636b', '#52c63b','#9ef49a'),names=unique(data$cell_type)) 16 | par(mfrow=c(1,2)) 17 | res<-NULL 18 | for(comm_type in comm_list){ 19 | res_cat<-FindLR(highly_exprs_genes,datatype='mean count',comm_type=comm_type) 20 | res_cat<-res_cat[order(res_cat$cell_from_mean_exprs*res_cat$cell_to_mean_exprs,decreasing=T),] 21 | #plot by ligand category 22 | #overall network plot 23 | NetView(res_cat,col=cell_col,vertex.label.cex=1,arrow.width=1,edge.max.width=5) 24 | #top 20 ligand-receptor pairs 25 | LRPlot(res_cat[1:20,],datatype='mean count',cell_col=cell_col,link.arr.lwd=res_cat$cell_from_mean_exprs[1:20],link.arr.width=res_cat$cell_to_mean_exprs[1:20]) 26 | title(comm_type) 27 | res<-rbind(res,res_cat) 28 | } 29 | res<-res[order(res$cell_from_mean_exprs*res$cell_to_mean_exprs,decreasing=T),][1:20,] 30 | NetView(res,col=cell_col,vertex.label.cex=1,arrow.width=1,edge.max.width=5) 31 | LRPlot(res[1:20,],datatype='mean count',cell_col=cell_col,link.arr.lwd=res$cell_from_mean_exprs[1:20],link.arr.width=res$cell_to_mean_exprs[1:20]) 32 | 33 | ## significant ligand-receptor pairs between compare groups 34 | 35 | # randomly assign the compare group to each sample 36 | data<-data %>% mutate(compare_group=sample(2,nrow(data),replace=TRUE)) 37 | # find DEGenes of regulatory T cells and NK cells between these 2 groups 38 | deg_t<-DEG(data %>% filter(cell_type=='regulatory_t'),method='Wilcox',contrast=c(2,1)) 39 | deg_nk<-DEG(data %>% filter(cell_type=='cd56_nk'),method='Wilcox',contrast=c(2,1)) 40 | # find significant ligand-receptor pairs and do the plotting 41 | par(mfrow=c(1,2)) 42 | res<-NULL 43 | for(comm_type in comm_list){ 44 | res_cat<-FindLR(deg_t,deg_nk,datatype='DEG',comm_type=comm_type) 45 | res_cat<-res_cat[order(res_cat$cell_from_logFC*res_cat$cell_to_logFC,decreasing=T),] 46 | #plot by ligand category 47 | if(nrow(res_cat)==0){ 48 | next 49 | }else if(nrow(res_cat>=20)){ 50 | LRPlot(res_cat[1:20,],datatype='DEG',cell_col=cell_col,link.arr.lwd=res_cat$cell_from_logFC[1:20],link.arr.width=res_cat$cell_to_logFC[1:20]) 51 | }else{ 52 | LRPlot(res_cat,datatype='DEG',cell_col=cell_col,link.arr.lwd=res_cat$cell_from_logFC,link.arr.width=res_cat$cell_to_logFC) 53 | } 54 | NetView(res_cat,col=cell_col,vertex.label.cex=1,arrow.width=1,edge.max.width=5) 55 | title(comm_type) 56 | res<-rbind(res,res_cat) 57 | } 58 | if(is.null(res)){ 59 | print('No significant pairs found') 60 | }else if(nrow(res)>=20){ 61 | res<-res[order(res$cell_from_logFC*res$cell_to_logFC,decreasing=T),][1:20,] 62 | NetView(res,col=cell_col,vertex.label.cex=1,arrow.width=1,edge.max.width=5) 63 | LRPlot(res[1:20,],datatype='DEG',cell_col=cell_col,link.arr.lwd=res$cell_from_logFC[1:20],link.arr.width=res$cell_to_logFC[1:20]) 64 | }else{ 65 | NetView(res,col=cell_col,vertex.label.cex=1,arrow.width=1,edge.max.width=5) 66 | LRPlot(res,datatype='DEG',cell_col=cell_col,link.arr.lwd=res$cell_from_logFC,link.arr.width=res$cell_to_logFC) 67 | } 68 | # I just randomly assigned the compare group to samples which has no biological difference for showing how to use the package. 69 | # So there should be no significant genes to be expected. 70 | -------------------------------------------------------------------------------- /man/DEG.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DEG.R 3 | \name{DEG} 4 | \alias{DEG} 5 | \title{Call DEGenes} 6 | \usage{ 7 | DEG(data, method, min_gene_expressed = 0, min_valid_cells = 0, 8 | contrast = NULL, q_cut = 0.05, ...) 9 | } 10 | \arguments{ 11 | \item{data}{Input raw or normalized count data with column 'cell_type' 12 | and 'compare_group'} 13 | 14 | \item{method}{Method used to call DEGenes. Available options are: 15 | \itemize{ 16 | \item{Wilcox}: Wilcoxon rank sum test 17 | \item{DESeq2}: Negative binomial model based differential analysis 18 | (Love et al, Genome Biology, 2014) 19 | \item{SCDE}: Bayesian approach to single-cell differential 20 | expression analysis (Kharchenko et al, Nature Method, 2014) 21 | \item{monocle}: Census based differential analysis (Qiu et al, 22 | Nature Methods, 2017) 23 | \item{edgeR}: Negative binomial distributions, including empirical 24 | Bayes estimation, exact tests, generalized linear models and 25 | quasi-likelihood tests based differential analysis (McCarthy et al, 26 | Nucleic Acids Research, 2012) 27 | \item{DESingle}: Zero-Inflated Negative Binomial model to estimate 28 | the proportion of real and dropout zeros and to define and detect 29 | the 3 types of DE genes (Miao et al, Bioinformatics, 2018) 30 | \item{MAST}: GLM-framework that treates cellular detection rate as a 31 | covariate (Finak et al, Genome Biology, 2015) 32 | }} 33 | 34 | \item{min_gene_expressed}{Genes expressed in minimum number of cells} 35 | 36 | \item{min_valid_cells}{Minimum number of genes detected in the cell} 37 | 38 | \item{contrast}{String vector specifying the contrast to be 39 | tested against the log2-fold-change threshold} 40 | 41 | \item{q_cut}{Cut-off for q value} 42 | } 43 | \value{ 44 | A matrix of the differential expressed genes 45 | } 46 | \description{ 47 | This function loads the data as a dataframe, and method as a string. 48 | It assumes that each line contains gene expression profile of one single 49 | cell, and each column contains the one single gene expression profile in 50 | different cells. The dataframe should also contain the cell type information 51 | with column name 'cell_type', as well as group information as 'compare_group' 52 | Batch information as 'batch' is optional. If included, users may want to use 53 | the raw count data for later analysis. Differential expressed genes will be 54 | called within each cell type by the method users select. For bulk RNAseq, 55 | we provide edgeR, DESeq2. And for scRNA-seq, popular methods in packages 56 | scde, monocle, DEsingle and MAST are available. 57 | } 58 | -------------------------------------------------------------------------------- /man/DESeq2Test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DEG.R 3 | \name{DESeq2Test} 4 | \alias{DESeq2Test} 5 | \title{Differential expression using DESeq2} 6 | \usage{ 7 | DESeq2Test(sub_data, min_gene_expressed, min_valid_cells, 8 | contrast = unique(sub_data$compare_group), test = "Wald", 9 | fitType = "parametric", sfType = "ratio", betaPrior = FALSE, 10 | quiet = FALSE, modelMatrixType = "standard", 11 | minReplicatesForReplace = 7, useT = FALSE, minmu = 0.5, 12 | parallel = FALSE, BPPARAM = bpparam()) 13 | } 14 | \arguments{ 15 | \item{sub_data}{Count data removed cell_type and selected certain two 16 | compare_group} 17 | 18 | \item{min_gene_expressed}{Genes expressed in minimum number of cells} 19 | 20 | \item{min_valid_cells}{Minimum number of genes detected in the cell} 21 | 22 | \item{contrast}{String vector specifying the contrast to be 23 | tested against the log2-fold-change threshold} 24 | 25 | \item{test}{either "Wald" or "LRT", which will then use either 26 | Wald significance tests (defined by \code{\link{nbinomWaldTest}}), 27 | or the likelihood ratio test on the difference in deviance between a 28 | full and reduced model formula (defined by \code{\link{nbinomLRT}})} 29 | 30 | \item{fitType}{either "parametric", "local", or "mean" 31 | for the type of fitting of dispersions to the mean intensity. 32 | See \code{\link{estimateDispersions}} for description.} 33 | 34 | \item{sfType}{either "ratio", "poscounts", or "iterate" 35 | for teh type of size factor estimation. See 36 | \code{\link{estimateSizeFactors}} for description.} 37 | 38 | \item{betaPrior}{whether or not to put a zero-mean normal prior on 39 | the non-intercept coefficients 40 | See \code{\link{nbinomWaldTest}} for description of the calculation 41 | of the beta prior. In versions \code{>=1.16}, the default is set 42 | to \code{FALSE}, and shrunken LFCs are obtained afterwards using 43 | \code{\link{lfcShrink}}.} 44 | 45 | \item{quiet}{whether to print messages at each step} 46 | 47 | \item{modelMatrixType}{either "standard" or "expanded", which describe 48 | how the model matrix, X of the GLM formula is formed. 49 | "standard" is as created by \code{model.matrix} using the 50 | design formula. "expanded" includes an indicator variable for each 51 | level of factors in addition to an intercept. for more information 52 | see the Description of \code{\link{nbinomWaldTest}}. 53 | betaPrior must be set to TRUE in order for expanded model matrices 54 | to be fit.} 55 | 56 | \item{minReplicatesForReplace}{the minimum number of replicates required 57 | in order to use \code{\link{replaceOutliers}} on a 58 | sample. If there are samples with so many replicates, the model will 59 | be refit after these replacing outliers, flagged by Cook's distance. 60 | Set to \code{Inf} in order to never replace outliers.} 61 | 62 | \item{useT}{logical, passed to \code{\link{nbinomWaldTest}}, default is FALSE, 63 | where Wald statistics are assumed to follow a standard Normal} 64 | 65 | \item{minmu}{lower bound on the estimated count for fitting gene-wise dispersion 66 | and for use with \code{nbinomWaldTest} and \code{nbinomLRT}} 67 | 68 | \item{parallel}{if FALSE, no parallelization. if TRUE, parallel 69 | execution using \code{BiocParallel}, see next argument \code{BPPARAM}. 70 | A note on running in parallel using \code{BiocParallel}: it may be 71 | advantageous to remove large, unneeded objects from your current 72 | R environment before calling \code{DESeq}, 73 | as it is possible that R's internal garbage collection 74 | will copy these files while running on worker nodes.} 75 | 76 | \item{BPPARAM}{an optional parameter object passed internally 77 | to \code{\link{bplapply}} when \code{parallel=TRUE}. 78 | If not specified, the parameters last registered with 79 | \code{\link{register}} will be used.} 80 | } 81 | \value{ 82 | A matrix of differentially expressed genes and related statistics. 83 | } 84 | \description{ 85 | Identifies differentially expressed genes between two groups of cells using 86 | DESeq2 87 | } 88 | \details{ 89 | This test does not support pre-processed genes. To use this method, please 90 | install DESeq2, using the instructions at 91 | https://bioconductor.org/packages/release/bioc/html/DESeq2.html 92 | } 93 | \references{ 94 | Love MI, Huber W and Anders S (2014). "Moderated estimation of 95 | fold change and dispersion for RNA-seq data with DESeq2." Genome Biology. 96 | https://bioconductor.org/packages/release/bioc/html/DESeq2.html 97 | } 98 | -------------------------------------------------------------------------------- /man/DESingleTest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DEG.R 3 | \name{DESingleTest} 4 | \alias{DESingleTest} 5 | \title{Differential expression using DEsingle} 6 | \usage{ 7 | DESingleTest(sub_data, min_gene_expressed, min_valid_cells, 8 | contrast = unique(sub_data$compare_group), parallel = FALSE, 9 | BPPARAM = bpparam()) 10 | } 11 | \arguments{ 12 | \item{sub_data}{Count data removed cell_type and selected certain two 13 | compare_group} 14 | 15 | \item{min_gene_expressed}{Genes expressed in minimum number of cells} 16 | 17 | \item{min_valid_cells}{Minimum number of genes detected in the cell} 18 | 19 | \item{contrast}{String vector specifying the contrast to be 20 | tested against the log2-fold-change threshold} 21 | 22 | \item{parallel}{If FALSE (default), no parallel computation is used; 23 | if TRUE, parallel computation using \code{BiocParallel}, with argument 24 | \code{BPPARAM}.} 25 | 26 | \item{BPPARAM}{An optional parameter object passed internally to 27 | \code{\link{bplapply}} when \code{parallel=TRUE}. If not specified, 28 | \code{\link{bpparam}()} (default) will be used.} 29 | } 30 | \value{ 31 | A matrix of differentially expressed genes and related statistics. 32 | } 33 | \description{ 34 | Identifies differentially expressed genes between two groups of cells using 35 | DEsingle 36 | } 37 | \details{ 38 | This test does not support pre-processed genes. To use this method, please 39 | install DEsingle, using the instructions at 40 | https://github.com/miaozhun/DEsingle 41 | } 42 | \references{ 43 | Zhun Miao, Ke Deng, Xiaowo Wang, Xuegong Zhang (2018). DEsingle 44 | for detecting three types of differential expression in single-cell RNA-seq 45 | data. Bioinformatics, bty332. 10.1093/bioinformatics/bty332. 46 | } 47 | -------------------------------------------------------------------------------- /man/FindLR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FindLR.R 3 | \name{FindLR} 4 | \alias{FindLR} 5 | \title{Finding ligand-receptor pairs} 6 | \usage{ 7 | FindLR(data_1, data_2 = NULL, datatype, comm_type, database = NULL) 8 | } 9 | \arguments{ 10 | \item{data_1}{Data used to find the ligand-receptor pairs} 11 | 12 | \item{data_2}{Second dataset used to find ligand-receptor pairs. If set NULL, 13 | paris will be found within data_1. Otherwise, pairs will be found between 14 | data_1 and data_2. Default is NULL.} 15 | 16 | \item{datatype}{Type of data used as input. Options are "mean count" 17 | and "DEG"} 18 | 19 | \item{comm_type}{Communication type. Available options are "cytokine", 20 | "checkpoint", "growth factor", "other"} 21 | 22 | \item{database}{Database used to find ligand-receptor pairs. If set NULL, 23 | the build-in database will be used.} 24 | } 25 | \value{ 26 | A dataframe of the significant interactions 27 | } 28 | \description{ 29 | This function loads the highly expressed genes or differentail expressed 30 | genes as a dataframe. Significant interactions are found through mapping 31 | these genes to our ligand-receptor database. 32 | } 33 | \references{ 34 | Cytokines, Inflammation and Pain. Zhang et al,2007. 35 | 36 | Cytokines, Chemokines and Their Receptors. Cameron et al, 2000-2013 37 | 38 | Robust prediction of response to immune checkpoint blockade therapy 39 | in metastatic melanoma. Auslander et al, 2018. 40 | 41 | A draft network of ligand-receptor-mediated multicellular signalling 42 | in human, Jordan A. Ramilowski, Nature Communications, 2015 43 | } 44 | -------------------------------------------------------------------------------- /man/LRPlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/LRPlot.R 3 | \name{LRPlot} 4 | \alias{LRPlot} 5 | \title{Plotting ligand-receptor pairs} 6 | \usage{ 7 | LRPlot(data, datatype, gene_col = NULL, transparency = 0.5, 8 | link.arr.lwd = 1, link.arr.lty = NULL, link.arr.col = NULL, 9 | link.arr.width = NULL, link.arr.type = NULL, facing = "clockwise", 10 | cell_col = NULL, print.cell = TRUE, track.height_1 = uh(2, "mm"), 11 | track.height_2 = uh(12, "mm"), annotation.height_1 = 0.01, 12 | annotation.height_2 = 0.01, text.vjust = "0.4cm", ...) 13 | } 14 | \arguments{ 15 | \item{data}{A dataframe contains significant ligand-receptor pairs and related 16 | information such as expression level/log fold change and cell type} 17 | 18 | \item{datatype}{Type of data. Options are "mean count" and "DEG"} 19 | 20 | \item{gene_col}{Colors used to represent different categories of genes.} 21 | 22 | \item{transparency}{Transparency of link colors, 0 means no transparency and 23 | 1 means full transparency. If transparency is already set in col or row.col 24 | or column.col, this argument will be ignored. NAalso ignores this argument.} 25 | 26 | \item{link.arr.lwd}{line width of the single line link which is put in the 27 | center of the belt.} 28 | 29 | \item{link.arr.lty}{line type of the single line link which is put in the 30 | center of the belt.} 31 | 32 | \item{link.arr.col}{color or the single line link which is put in the center 33 | of the belt.} 34 | 35 | \item{link.arr.width}{size of the single arrow head link which is put in the 36 | center of the belt.} 37 | 38 | \item{link.arr.type}{Type of the arrows, pass to Arrowhead. Default value is 39 | triangle. There is an additional option big.arrow} 40 | 41 | \item{facing}{Facing of text.} 42 | 43 | \item{cell_col}{Colors used to represent types of cells. If set NULL, it 44 | will be generated randomly} 45 | 46 | \item{print.cell}{Whether or not print the type of cells on the outer layer 47 | of the graph.} 48 | 49 | \item{track.height_1}{height of the cell notation track} 50 | 51 | \item{track.height_2}{height of the gene notation track} 52 | 53 | \item{annotation.height_1}{Track height corresponding to values in annotationTrack.} 54 | 55 | \item{annotation.height_2}{Track height corresponding to values in annotationTrack.} 56 | 57 | \item{text.vjust}{adjustment on ’vertical’ (radical) direction. Besides to set it 58 | as numeric values, the value can also be a string contain absoute unit, e.g. 59 | "2.1mm", "-1 inche", but only "mm", "cm", "inches"/"inche" are allowed.} 60 | } 61 | \value{ 62 | A figure of the significant interactions 63 | } 64 | \description{ 65 | This function loads the significant interactions as a dataframe. A circle 66 | plot will be generated using package circlize. The width of the arrow 67 | represents the expression level/log fold change of the ligand; while the 68 | width of arrow head represents the expression level/log fold change of the 69 | receptor. Different color and the type of the arrow stands for whether 70 | the ligand and/or receptor are upregulated or downregulated. Users can select 71 | the colors represent the cell type by their own or chosen randomly by default. 72 | } 73 | \references{ 74 | Gu, Z. (2014) circlize implements and enhances circular 75 | visualization in R. Bioinformatics. 76 | } 77 | -------------------------------------------------------------------------------- /man/MASTTest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DEG.R 3 | \name{MASTTest} 4 | \alias{MASTTest} 5 | \title{Differential expression using MAST} 6 | \usage{ 7 | MASTTest(sub_data, min_gene_expressed, min_valid_cells, 8 | contrast = unique(sub_data$compare_group), method = "glm", 9 | silent = FALSE, check_logged = TRUE) 10 | } 11 | \arguments{ 12 | \item{sub_data}{Count data removed cell_type and selected certain two 13 | compare_group} 14 | 15 | \item{min_gene_expressed}{Genes expressed in minimum number of cells} 16 | 17 | \item{min_valid_cells}{Minimum number of genes detected in the cell} 18 | 19 | \item{contrast}{String vector specifying the contrast to be 20 | tested against the log2-fold-change threshold} 21 | 22 | \item{method}{Character vector, either ’glm’, ’glmer’ or ’bayesglm’} 23 | 24 | \item{check_logged}{Set FALSE to override sanity checks that try to 25 | ensure that the default assay is log-transformed and has at least one 26 | exact zero} 27 | 28 | \item{Silence}{Common problems with fitting some genes} 29 | } 30 | \value{ 31 | A matrix of differentially expressed genes and related statistics. 32 | } 33 | \description{ 34 | Identifies differentially expressed genes between two groups of cells using 35 | MAST 36 | } 37 | \details{ 38 | To use this method, please install MAST, using the instructions at 39 | https://github.com/RGLab/MAST 40 | } 41 | \references{ 42 | MAST: a flexible statistical framework for assessing transcriptional 43 | changes and characterizing heterogeneity in single-cell RNA sequencing 44 | data G Finak, A McDavid, M Yajima, J Deng, V Gersuk, AK Shalek, CK Slichter 45 | et al Genome biology 16 (1), 278 46 | } 47 | -------------------------------------------------------------------------------- /man/MonocleTest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DEG.R 3 | \name{MonocleTest} 4 | \alias{MonocleTest} 5 | \title{Differential expression using monocle} 6 | \usage{ 7 | MonocleTest(sub_data, min_gene_expressed, min_valid_cells, 8 | contrast = unique(sub_data$compare_group), batch = NULL, cores = 4) 9 | } 10 | \arguments{ 11 | \item{sub_data}{Count data removed cell_type and selected certain two 12 | compare_group} 13 | 14 | \item{min_gene_expressed}{Genes expressed in minimum number of cells} 15 | 16 | \item{min_valid_cells}{Minimum number of genes detected in the cell} 17 | 18 | \item{contrast}{String vector specifying the contrast to be 19 | tested against the log2-fold-change threshold} 20 | 21 | \item{batch}{Different batch identifier} 22 | 23 | \item{cores}{The number of cores to be used while testing each gene 24 | for differential expression.} 25 | } 26 | \value{ 27 | A matrix of differentially expressed genes and related statistics. 28 | } 29 | \description{ 30 | Identifies differentially expressed genes between two groups of cells using 31 | monocle 32 | } 33 | \details{ 34 | This test does not support pre-processed genes. To use this method, please 35 | install monocle, using the instructions at 36 | https://bioconductor.org/packages/release/bioc/html/monocle.html 37 | } 38 | \references{ 39 | Qiu X, Hill A, Packer J, Lin D, Ma Y, Trapnell C (2017). 40 | “Single-cell mRNA quantification and differential analysis with Census.” 41 | Nature Methods. 42 | https://github.com/cole-trapnell-lab/monocle-release 43 | } 44 | -------------------------------------------------------------------------------- /man/NetView.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/NetView.R 3 | \name{NetView} 4 | \alias{NetView} 5 | \title{Network Viewing of cell-cell communication} 6 | \usage{ 7 | NetView(data, col, label = TRUE, edge.curved = 0.5, shape = "circle", 8 | layout = nicely(), vertex.size = 20, margin = 0.2, 9 | vertex.label.cex = 1.5, vertex.label.color = "black", 10 | arrow.width = 1.5, edge.label.color = "black", edge.label.cex = 1, 11 | edge.max.width = 10) 12 | } 13 | \arguments{ 14 | \item{data}{A dataframe containing ligand-receptor pairs and corresponding 15 | cell typesused to do the plotting} 16 | 17 | \item{col}{Colors used to represent different cell types} 18 | 19 | \item{label}{Whether or not shows the label of edges (number of connections 20 | between different cell types)} 21 | 22 | \item{edge.curved}{Specifies whether to draw curved edges, or not. 23 | This can be a logical or a numeric vector or scalar. 24 | First the vector is replicated to have the same length as the number of 25 | edges in the graph. Then it is interpreted for each edge separately. 26 | A numeric value specifies the curvature of the edge; zero curvature means 27 | straight edges, negative values means the edge bends clockwise, positive 28 | values the opposite. TRUE means curvature 0.5, FALSE means curvature zero} 29 | 30 | \item{shape}{The shape of the vertex, currently “circle”, “square”, 31 | “csquare”, “rectangle”, “crectangle”, “vrectangle”, “pie” (see 32 | vertex.shape.pie), ‘sphere’, and “none” are supported, and only by the 33 | plot.igraph command. “none” does not draw the vertices at all, although 34 | vertex label are plotted (if given). See shapes for details about vertex 35 | shapes and vertex.shape.pie for using pie charts as vertices.} 36 | 37 | \item{layout}{The layout specification. It must be a call to a layout 38 | specification function.} 39 | 40 | \item{vertex.size}{The size of vertex} 41 | 42 | \item{margin}{The amount of empty space below, over, at the left and right 43 | of the plot, it is a numeric vector of length four. Usually values between 44 | 0 and 0.5 are meaningful, but negative values are also possible, that will 45 | make the plot zoom in to a part of the graph. If it is shorter than four 46 | then it is recycled.} 47 | 48 | \item{vertex.label.cex}{The label size of vertex} 49 | 50 | \item{vertex.label.color}{The color of label for vertex} 51 | 52 | \item{arrow.width}{The width of arrows} 53 | 54 | \item{edge.label.color}{The color for single arrow} 55 | 56 | \item{edge.label.cex}{The size of label for arrows} 57 | 58 | \item{edge.max.width}{The maximum arrow size} 59 | } 60 | \value{ 61 | A network graph of the significant interactions 62 | } 63 | \description{ 64 | This function loads the significant interactions as a dataframe, and colors 65 | represent different types of cells as a structure. The width of edges represent 66 | the strength of the communication. Labels on the edges show exactly how many 67 | interactions exist between two types of cells. 68 | } 69 | \references{ 70 | Csardi G, Nepusz T: The igraph software package for complex network 71 | research, InterJournal, Complex Systems 1695. 2006. 72 | http://igraph.org 73 | } 74 | -------------------------------------------------------------------------------- /man/SCDETest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DEG.R 3 | \name{SCDETest} 4 | \alias{SCDETest} 5 | \title{Differential expression using scde} 6 | \usage{ 7 | SCDETest(sub_data, min_gene_expressed, min_valid_cells, 8 | contrast = unique(sub_data$compare_group), batch = NULL, 9 | n.randomizations = 150, n.cores = 10, batch.models = models, 10 | return.posteriors = FALSE, verbose = 1) 11 | } 12 | \arguments{ 13 | \item{sub_data}{Count data removed cell_type and selected certain two 14 | compare_group} 15 | 16 | \item{min_gene_expressed}{Genes expressed in minimum number of cells} 17 | 18 | \item{min_valid_cells}{Minimum number of genes detected in the cell} 19 | 20 | \item{contrast}{String vector specifying the contrast to be 21 | tested against the log2-fold-change threshold} 22 | 23 | \item{batch}{Different batch identifier} 24 | 25 | \item{n.cores}{number of cores to utilize} 26 | 27 | \item{batch.models}{(optional) separate models for the batch data (if generated 28 | using batch-specific group argument). Normally the same models are used.} 29 | 30 | \item{return.posteriors}{whether joint posterior matrices should be returned} 31 | 32 | \item{verbose}{integer verbose level (1 for verbose)} 33 | 34 | \item{@param}{n.randomizations number of bootstrap randomizations to be performed} 35 | } 36 | \value{ 37 | A matrix of differentially expressed genes and related statistics. 38 | } 39 | \description{ 40 | Identifies differentially expressed genes between two groups of cells using 41 | scde 42 | } 43 | \details{ 44 | This test does not support pre-processed genes. To use this method, please 45 | install scde, using the instructions at 46 | http://hms-dbmi.github.io/scde/tutorials.html 47 | } 48 | \references{ 49 | "Bayesian approach to single-cell differential expression 50 | analysis" (Kharchenko PV, Silberstein L, Scadden DT, Nature Methods, 51 | doi:10.1038/nmeth.2967) 52 | https://github.com/hms-dbmi/scde 53 | } 54 | -------------------------------------------------------------------------------- /man/TimePlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TimePlot.R 3 | \name{TimePlot} 4 | \alias{TimePlot} 5 | \title{Plotting ligand-receptor pairs} 6 | \usage{ 7 | TimePlot(data, ligand, receptor, cell_from, cell_to, Time = NULL) 8 | } 9 | \arguments{ 10 | \item{data}{A dataframe contains significant ligand-receptor pairs and related 11 | information such as expression level/log fold change and cell type} 12 | 13 | \item{ligand}{String as selected ligand} 14 | 15 | \item{receptor}{String as selected receptor} 16 | 17 | \item{cell_from}{The cell type ligand gene belongs to} 18 | 19 | \item{cell_to}{The cell type receptor gene belongs to} 20 | 21 | \item{Time}{Different time points showing on the plot} 22 | } 23 | \value{ 24 | A figure of the paired interactions 25 | } 26 | \description{ 27 | This function loads count data as dataframe, ligand, receptor and two interactive 28 | cells' names as strings. The plot shows the expression level of ligand and 29 | receptor at different time, thus illustrates a dynamic change of a ligand-receptor 30 | pairs. 31 | } 32 | -------------------------------------------------------------------------------- /man/WilcoxTest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DEG.R 3 | \name{WilcoxTest} 4 | \alias{WilcoxTest} 5 | \title{Differential expression using wilcox} 6 | \usage{ 7 | WilcoxTest(sub_data, min_gene_expressed, min_valid_cells, 8 | contrast = unique(sub_data$compare_group), datatype = "raw count", 9 | verbose = 0) 10 | } 11 | \arguments{ 12 | \item{sub_data}{Count data removed cell_type and selected certain two 13 | compare_group} 14 | 15 | \item{min_gene_expressed}{Genes expressed in minimum number of cells} 16 | 17 | \item{min_valid_cells}{Minimum number of genes detected in the cell} 18 | 19 | \item{contrast}{String vector specifying the contrast to be 20 | tested against the log2-fold-change threshold} 21 | 22 | \item{verbose}{Whether show the progress of computing} 23 | 24 | \item{data_type}{Type of data. Available options are: 25 | \itemize{ 26 | \item{'raw data'}: Raw count data without any pre-processing 27 | \item{'log count'}: Normalized and log-transformed data 28 | }} 29 | } 30 | \value{ 31 | A matrix of differentially expressed genes and related statistics. 32 | } 33 | \description{ 34 | Identifies differentially expressed genes between two groups of cells using 35 | a Wilcoxon Rank Sum test 36 | } 37 | -------------------------------------------------------------------------------- /man/edgeRTest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DEG.R 3 | \name{edgeRTest} 4 | \alias{edgeRTest} 5 | \title{Differential expression using edgeR} 6 | \usage{ 7 | edgeRTest(sub_data, min_gene_expressed, min_valid_cells, 8 | contrast = unique(sub_data$compare_group), calcNormMethod = "TMM", 9 | trend.method = "locfit", tagwise = TRUE, robust = FALSE) 10 | } 11 | \arguments{ 12 | \item{sub_data}{Count data removed cell_type and selected certain two 13 | compare_group} 14 | 15 | \item{min_gene_expressed}{Genes expressed in minimum number of cells} 16 | 17 | \item{min_valid_cells}{Minimum number of genes detected in the cell} 18 | 19 | \item{contrast}{String vector specifying the contrast to be 20 | tested against the log2-fold-change threshold} 21 | 22 | \item{calcNormMethod}{normalization method to be used} 23 | 24 | \item{trend.method}{method for estimating dispersion trend. Possible values 25 | are "none", "movingave", "loess" and "locfit" (default).} 26 | 27 | \item{tagwise}{logical, should the tagwise dispersions be estimated} 28 | 29 | \item{robust}{logical, should the estimation of prior.df be robustified 30 | against outliers} 31 | } 32 | \value{ 33 | A matrix of differentially expressed genes and related statistics. 34 | } 35 | \description{ 36 | Identifies differentially expressed genes between two groups of cells using 37 | edgeR 38 | } 39 | \details{ 40 | This test does not support pre-processed genes. To use this method, please 41 | install edgeR, using the instructions at 42 | http://bioconductor.org/packages/release/bioc/html/edgeR.html 43 | } 44 | \references{ 45 | McCarthy, J. D, Chen, Yunshun, Smyth, K. G (2012). “Differential 46 | expression analysis of multifactor RNA-Seq experiments with respect to 47 | biological variation.” Nucleic Acids Research, 40(10), 4288-4297. 48 | 49 | Robinson MD, McCarthy DJ, Smyth GK (2010). “edgeR: a Bioconductor 50 | package for differential expression analysis of digital gene expression data.” 51 | Bioinformatics, 26(1), 139-140. 52 | https://github.com/cole-trapnell-lab/monocle-release 53 | } 54 | -------------------------------------------------------------------------------- /man/rawParse.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rawParse.R 3 | \name{rawParse} 4 | \alias{rawParse} 5 | \title{Parsing the data to get top expressed genes} 6 | \usage{ 7 | rawParse(data, top_genes = 50, stats = "mean") 8 | } 9 | \arguments{ 10 | \item{data}{Input data, raw or normalized count with 'cell_type' column} 11 | 12 | \item{top_genes}{(scale 1 to 100) Top percent highly expressed genes used 13 | to find ligand-receptor pairs, default is 50} 14 | 15 | \item{stats}{Whether calculates the mean or the median of the data. Available 16 | options are 'mean' and 'median'.} 17 | } 18 | \value{ 19 | A dataframe of the data 20 | } 21 | \description{ 22 | This function loads the count data as a dataframe. It assumes that each line 23 | contains gene expression profile of one single cell, and each column 24 | contains the one single gene expression profile in different cells. The dataframe 25 | should also contain the cell type information with column name 'cell_type'. 26 | Group information should also be included as 'compare_group' if users want 27 | to call differntial expressed ligand-receptor pairs. Batch information as 28 | 'batch' is optional. If included, users may want to use the raw count data 29 | for later analysis. 30 | } 31 | --------------------------------------------------------------------------------