├── .travis.yml ├── DESCRIPTION ├── NAMESPACE ├── R ├── AllClasses.R ├── AllGenerics.R ├── RcppExports.R ├── cellMarker.R ├── cells.R ├── data.R ├── misc.R └── zzz.R ├── README.md ├── _config.yml ├── data ├── human.rda ├── human_celltaxonomy.rda ├── mouse.rda ├── mouse_celltaxonomy.rda └── rat.rda ├── man ├── as.data.frame.cellResult.Rd ├── cellMarker.Rd ├── cellResult-class.Rd ├── cells.Rd ├── detail.Rd ├── dot-add.list.Rd ├── dot-getdata.Rd ├── getdetail.Rd ├── humancells.Rd ├── list.tissue.Rd ├── marker.Rd ├── mousecells.Rd ├── ratcells.Rd ├── rcellmarker-package.Rd ├── result.Rd └── reverseList.Rd ├── rcellmarker.Rproj └── src ├── RcppExports.cpp ├── hyper.cpp ├── name_table.cpp ├── sf.cpp └── unique.cpp /.travis.yml: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------- 2 | # Travis-CI configuration for R packages 3 | # 4 | # REFERENCES: 5 | # * Travis CI: https://docs.travis-ci.com/user/languages/r# 6 | # YAML validated using http://www.yamllint.com/ 7 | #---------------------------------------------------------------- 8 | language: r 9 | sudo: false 10 | cache: packages 11 | warnings_are_errors: false 12 | r_check_args: --as-cran 13 | latex: false 14 | 15 | matrix: 16 | include: 17 | - os: osx 18 | r_check_args: '--ignore-vignettes' 19 | r_build_args: '--no-build-vignettes' 20 | - dist: trusty 21 | r_check_args: '--ignore-vignettes' 22 | r_build_args: '--no-build-vignettes' 23 | 24 | addons: 25 | apt: 26 | update: true 27 | 28 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: rcellmarker 2 | Type: Package 3 | Title: Identify cell type based on cell markers 4 | Version: 0.0.15 5 | Date: 2020-07-12 6 | Author: Kai Guo, Zhihan Wang, Pan Gao 7 | Maintainer: Kai Guo 8 | Description: Using over representation analysis method to identify cell type based on 9 | cell markers. Now only support human and mouse single cell sequencing. 10 | Depends: 11 | R (>= 3.5.0) 12 | License: GPL (>= 2) 13 | Imports: Rcpp (>= 1.0.5), 14 | dplyr, 15 | tidyr, 16 | purrr, 17 | magrittr, 18 | utils, 19 | stats, 20 | methods 21 | LinkingTo: Rcpp 22 | Suggests: 23 | knitr, 24 | testthat, 25 | rmarkdown 26 | RoxygenNote: 7.1.0 27 | biocViews: 28 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | exportPattern("^[[:alpha:]]+") 3 | importFrom(Rcpp, evalCpp) 4 | useDynLib(rcellmarker) 5 | S3method("$",cellResult) 6 | S3method("[",cellResult) 7 | S3method(as.data.frame,cellResult) 8 | S3method(detail,cellResult) 9 | S3method(dim,cellResult) 10 | S3method(head,cellResult) 11 | S3method(names,cellResult) 12 | S3method(result,cellResult) 13 | S3method(row.names,cellResult) 14 | S3method(tail,cellResult) 15 | export(cellMarker) 16 | export(cells) 17 | export(detail) 18 | export(getdetail) 19 | export(list.tissue) 20 | export(marker) 21 | export(result) 22 | export(reverseList) 23 | exportClasses(cellResult) 24 | importFrom(dplyr,distinct) 25 | importFrom(dplyr,filter) 26 | importFrom(dplyr,group_by) 27 | importFrom(dplyr,left_join) 28 | importFrom(dplyr,mutate) 29 | importFrom(dplyr,select) 30 | importFrom(dplyr,top_n) 31 | importFrom(magrittr,"%>%") 32 | importFrom(methods,new) 33 | importFrom(purrr,map) 34 | importFrom(purrr,safely) 35 | importFrom(stats,p.adjust) 36 | importFrom(tidyr,gather) 37 | importFrom(tidyr,nest) 38 | importFrom(tidyr,unnest) 39 | importFrom(utils,data) 40 | importFrom(utils,head) 41 | importFrom(utils,tail) 42 | -------------------------------------------------------------------------------- /R/AllClasses.R: -------------------------------------------------------------------------------- 1 | ##' Class "cellResult" 2 | ##' This class represents the result of enrichment analysis. 3 | ##' 4 | ##' 5 | ##' @name cellResult-class 6 | ##' @aliases cellResult-class 7 | ##' show,cellResult-method plot,cellResult-method 8 | ##' summary,cellResult-method 9 | ##' 10 | ##' @docType class 11 | ##' @slot result enrichment analysis results 12 | ##' @slot detail genes included in significant terms and original information 13 | ##' @slot species species name 14 | ##' @slot pvalueCutoff cutoff pvalue 15 | ##' @slot pAdjustMethod pvalue adjust method 16 | ##' @slot padjCutoff pvalue adjust cutoff value 17 | ##' @slot gene Gene IDs 18 | ##' @slot keytype Gene ID type 19 | ##' @slot sep character string used to separate the genes when concatenating 20 | ##' @exportClass cellResult 21 | ##' @author Kai Guo 22 | ##' @keywords classes 23 | setClass("cellResult", 24 | representation=representation( 25 | result = "data.frame", 26 | detail = "data.frame", 27 | species = "character", 28 | pvalueCutoff = "numeric", 29 | pAdjustMethod = "character", 30 | padjCutoff = "numeric", 31 | gene = "character", 32 | keytype = "character", 33 | sep = "character" 34 | ) 35 | ) -------------------------------------------------------------------------------- /R/AllGenerics.R: -------------------------------------------------------------------------------- 1 | ##' detail generic 2 | ##' @param x cellResult object 3 | ##' @return detail return detial for these significant genes 4 | ##' @export 5 | detail<-function(x){ 6 | UseMethod("detail",x) 7 | } 8 | ##' result generic 9 | ##' @param x cellResult object 10 | ##' @return result return dataframe and print summary 11 | ##' @export 12 | result<-function(x){ 13 | UseMethod("result",x) 14 | } -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | hyper_bench_vector <- function(xin, yin, N, n) { 5 | .Call('_rcellmarker_hyper_bench_vector', PACKAGE = 'rcellmarker', xin, yin, N, n) 6 | } 7 | 8 | name_table <- function(lh) { 9 | .Call('_rcellmarker_name_table', PACKAGE = 'rcellmarker', lh) 10 | } 11 | 12 | fast_factor <- function(x) { 13 | .Call('_rcellmarker_fast_factor', PACKAGE = 'rcellmarker', x) 14 | } 15 | 16 | sf <- function(x) { 17 | .Call('_rcellmarker_sf', PACKAGE = 'rcellmarker', x) 18 | } 19 | 20 | uniq <- function(xa) { 21 | .Call('_rcellmarker_uniq', PACKAGE = 'rcellmarker', xa) 22 | } 23 | 24 | -------------------------------------------------------------------------------- /R/cellMarker.R: -------------------------------------------------------------------------------- 1 | #' assign cell type based on cell cluster results 2 | #' @importFrom magrittr %>% 3 | #' @importFrom purrr safely 4 | #' @importFrom purrr map 5 | #' @importFrom tidyr nest 6 | #' @importFrom tidyr unnest 7 | #' @importFrom tidyr gather 8 | #' @importFrom dplyr select 9 | #' @importFrom dplyr filter 10 | #' @importFrom dplyr group_by 11 | #' @importFrom dplyr mutate 12 | #' @importFrom dplyr top_n 13 | #' @param x input file for marker annotation 14 | #' @param species species for annotation 15 | #' @param type source of marker genes (seurat(default), cellranger, custom) 16 | #' @param keytype keytype for input genes 17 | #' @param weight weight threshold for marker filtering 18 | #' @param format file format for user supplied data.frame(long: Cluster, gene; 19 | #' wide: gene, following cluster name as column name) 20 | #' @param cluster clutser number (default: NULL for annotate all clusters) 21 | #' @param tissue tissue for annotation (default: NULL to use all tissues) 22 | #' @param topn the number of cell type to list fro each cluster 23 | #' @param padj adjust p value threshold 24 | #' @param minSize minimal number of genes included in significant cell type 25 | #' @param maxSize maximum number of genes included in significant cell type 26 | #' @param padj.method pvalue adjust method(default: "BH") 27 | #' @export 28 | #' @author Kai Guo 29 | cellMarker <- function(x, type = 'seurat', db="default", species="human", keytype = 'SYMBOL', 30 | weight = NULL, format="long", 31 | cluster = NULL,tissue = NULL, topn = 3, 32 | padj = 0.05, minSize=3,maxSize=500, 33 | padj.method = "BH"){ 34 | if (!(db %in% c("default", "celltax"))) { 35 | stop("Invalid value for 'db'. It must be either 'default' or 'celltax'.") 36 | } 37 | options(warn = -1) 38 | cells_ <- safely(cells, otherwise = .empty_class()) 39 | if(type == 'cellranger'){ 40 | if(is.null(weight)) weight <- 100 41 | colnames(x)[1:2]<-c('GeneID','GeneName') 42 | x <- x%>%select(GeneName,contains("Weight")) 43 | colnames(x) <- sub('_Weight','',gsub('[\\.| ]','_',colnames(x))) 44 | x <- x%>%gather(Cluster,val,-GeneName)%>%filter(val>=weight)%>% 45 | select(Cluster,GeneName)%>% 46 | group_by(Cluster)%>%nest() 47 | if(!is.null(cluster)){ 48 | cluster <- paste0("Cluster_",cluster) 49 | x <- x%>%filter(Cluster%in%cluster) 50 | } 51 | x <- x %>%mutate(cellType=map(data, 52 | function(y)result(cells_(y$GeneName,species=species, 53 | keytype=keytype,minSize=minSize,padj=padj, 54 | maxSize=maxSize,db=db, 55 | padj.method=padj.method)$result))) 56 | x <- x%>%select(Cluster,cellType)%>%unnest(cellType)%>% 57 | group_by(Cluster)%>%top_n(-topn,wt=Padj) 58 | }else if(type == "seurat"){ 59 | if(is.null(weight)) weight <- 1 60 | if("avg_logFC"%in%colnames(x)){ 61 | x <-x%>%filter(avg_logFC >= weight,p_val_adj% 62 | select(cluster,gene)%>%group_by(cluster)%>%nest() 63 | }else{ 64 | x <-x%>%filter(avg_log2FC >= weight,p_val_adj% 65 | select(cluster,gene)%>%group_by(cluster)%>%nest() 66 | } 67 | if(!is.null(cluster)){ 68 | cl <- cluster 69 | x <- x%>%filter(cluster%in%cl) 70 | } 71 | x <- x %>%mutate(cellType=map(data, 72 | function(y)result(cells_(y$gene,species=species, 73 | keytype=keytype,minSize=minSize,padj=padj, 74 | maxSize=maxSize,db=db, 75 | padj.method=padj.method)$result))) 76 | x <- x%>%select(cluster,cellType)%>%unnest(cellType)%>% 77 | group_by(cluster)%>%top_n(-topn,wt=Padj) 78 | }else{ 79 | colnames(x)[1]<-'gene' 80 | if(format=="wide"){ 81 | x <- x%>%gather(Cluster,val,-gene)%>%select(Cluster,gene) 82 | } 83 | x <- x%>%group_by(Cluster)%>%nest() 84 | if(!is.null(cluster)){ 85 | cl <- cluster 86 | x <- x%>%filter(Cluster%in%cl) 87 | } 88 | x <- x %>%mutate(cellType=map(data, 89 | function(y)result(cells_(y$gene,species=species, 90 | keytype=keytype,minSize=minSize,padj=padj, 91 | maxSize=maxSize,db=db, 92 | padj.method=padj.method)$result))) 93 | x <- x%>%select(Cluster,cellType)%>%unnest(cellType)%>% 94 | group_by(Cluster)%>%top_n(-topn,wt=Padj) 95 | } 96 | as.data.frame(x) 97 | } 98 | -------------------------------------------------------------------------------- /R/cells.R: -------------------------------------------------------------------------------- 1 | #' cells function for cell type identification 2 | #' @importFrom magrittr %>% 3 | #' @importFrom stats p.adjust 4 | #' @importFrom methods new 5 | #' @param x vector contains gene names 6 | #' @param species species name 7 | #' @param tissue tissue type (default NULL) 8 | #' @param pvalue cutoff pvalue 9 | #' @param padj cutoff p adjust value 10 | #' @param keytype keytype for input genes 11 | #' @param minSize minimal number of genes included in significant cell type 12 | #' @param maxSize maximum number of genes included in significant cell type 13 | #' @param padj.method pvalue adjust method(default:"BH") 14 | #' @param sep character string used to separate the genes when concatenating 15 | #' @export 16 | #' @author Kai Guo 17 | cells<-function(x,species='human',db='default',keytype="SYMBOL", tissue = NULL, padj=0.05, pvalue=NULL, 18 | minSize=3,maxSize=500, 19 | padj.method="BH",sep = ","){ 20 | annot <- .getdata(species=species,db=db) 21 | annot <- na.omit(annot) 22 | keytype <- toupper(keytype) 23 | if(!is.null(tissue)){ 24 | annot <- annot[grepl(tissue,annot$tissueType,ignore.case = T),] 25 | } 26 | annot <- annot[,c(keytype,'cellType')] 27 | if(sum(x%in%annot[,1])==0){ 28 | return(.empty_class()) 29 | } 30 | ao2gene<-sf(annot) 31 | ao2gene_num<-name_table(ao2gene) 32 | gene2ao<-sf(annot[,c(2,1)]) 33 | input=as.vector(x) 34 | fgene2ao=gene2ao[input] 35 | fao2gene=reverseList(fgene2ao) 36 | k=name_table(fao2gene) 37 | n=length(unique(unlist(fao2gene))) 38 | M=ao2gene_num[names(k)] 39 | N=length(unique(annot[,1])) 40 | rhs<-hyper_bench_vector(k,M,N,n) 41 | lhs<-p.adjust(rhs,method=padj.method) 42 | rhs_gene<-unlist(lapply(fao2gene, function(x)paste(unique(x),sep="",collapse = sep))) 43 | resultFis<-data.frame("cellType"=names(rhs),"Annotated"=M[names(rhs)], 44 | "Significant"=k[names(rhs)],"Pvalue"=as.vector(rhs),"Padj"=lhs, 45 | "GeneID"=rhs_gene[names(rhs)]) 46 | resultFis<-resultFis[order(resultFis$Pvalue),] 47 | if(!is.null(pvalue)){ 48 | resultFis<-resultFis[resultFis$Pvalue=minSize,] 56 | rownames(resultFis)<-resultFis$cellType 57 | gene<-strsplit(as.vector(resultFis$GeneID),split=sep) 58 | # names(gene)<-resultFis$cellType 59 | gened<-data.frame("cellType"=rep(resultFis$cellType,times=unlist(lapply(gene,length))), 60 | "GeneID"=unlist(gene),row.names=NULL, 61 | "Pvalue"=rep(resultFis$Pvalue,times=unlist(lapply(gene,length))), 62 | "Padj"=rep(resultFis$Padj,times=unlist(lapply(gene,length))) 63 | ) 64 | gened$GeneID<-as.character(gened$GeneID) 65 | result<-new("cellResult", 66 | result = resultFis, 67 | detail = gened, 68 | species = species, 69 | pvalueCutoff = pvalue, 70 | pAdjustMethod = padj.method, 71 | padjCutoff = padj, 72 | gene = input, 73 | keytype = keytype, 74 | sep = sep 75 | ) 76 | return(result) 77 | } 78 | #' show support tissues 79 | #' @importFrom utils data 80 | #' @param species species name 81 | #' @export 82 | #' @author Kai Guo 83 | list.tissue <- function(species='human'){ 84 | dat <- .getdata(species=species) 85 | data.frame('Tissue'=sort(unique(humancells$tissueType))) 86 | } 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | ##' @title human 2 | ##' @description human cell markers for cell type identification 3 | ##' @format A list of data frame with five columns individually: 4 | ##' \describe{ 5 | ##' \item{SYMBOL}{SYMBOL gene IDs} 6 | ##' \item{cellType}{cell type} 7 | ##' \item{tissueType}{tissue Type} 8 | ##' \item{ENTREZID}{ENTREZID gene IDs} 9 | ##' \item{REFSEQ}{REFSEQ gene IDs} 10 | ##' \item{ENSEMBL}{ENSEMBL gene IDs} 11 | ##' } 12 | ##' @examples 13 | ##' data(human) 14 | ##' head(humancells) 15 | "humancells" 16 | ##' @title human_celltaxonomy 17 | ##' @description human cell markers (celltaxonomy; https://ngdc.cncb.ac.cn/celltaxonomy/download) for cell type identification 18 | ##' @format A list of data frame with five columns individually: 19 | ##' \describe{ 20 | ##' \item{SYMBOL}{SYMBOL gene IDs} 21 | ##' \item{cellType}{cell type} 22 | ##' \item{tissueType}{tissue Type} 23 | ##' \item{ENTREZID}{ENTREZID gene IDs} 24 | ##' \item{ENSEMBL}{ENSEMBL gene IDs} 25 | ##' } 26 | ##' @examples 27 | ##' data(human_celltaxonomy) 28 | ##' head(humancelltaxonomycells) 29 | "humancelltaxonomycells" 30 | ##' @title mouse 31 | ##' @description mouse cell markers for cell type identification 32 | ##' @format A list of data frame with five columns individually: 33 | ##' \describe{ 34 | ##' \item{SYMBOL}{SYMBOL gene IDs} 35 | ##' \item{cellType}{cell type} 36 | ##' \item{tissueType}{tissue Type} 37 | ##' \item{ENTREZID}{ENTREZID gene IDs} 38 | ##' \item{REFSEQ}{REFSEQ gene IDs} 39 | ##' \item{ENSEMBL}{ENSEMBL gene IDs} 40 | ##' } 41 | ##' @examples 42 | ##' data(mouse) 43 | ##' head(mousecells) 44 | "mousecells" 45 | ##' @title mouse_celltaxonomy 46 | ##' @description human cell markers (celltaxonomy; https://ngdc.cncb.ac.cn/celltaxonomy/download) for cell type identification 47 | ##' @format A list of data frame with five columns individually: 48 | ##' \describe{ 49 | ##' \item{SYMBOL}{SYMBOL gene IDs} 50 | ##' \item{cellType}{cell type} 51 | ##' \item{tissueType}{tissue Type} 52 | ##' \item{ENTREZID}{ENTREZID gene IDs} 53 | ##' \item{ENSEMBL}{ENSEMBL gene IDs} 54 | ##' } 55 | ##' @examples 56 | ##' data(mouse_celltaxonomy) 57 | ##' head(mousecelltaxonomycells) 58 | "mousecelltaxonomycells" 59 | ##' @title rat 60 | ##' @description rat cell markers for cell type identification 61 | ##' @format A list of data frame with five columns individually: 62 | ##' \describe{ 63 | ##' \item{SYMBOL}{SYMBOL gene IDs} 64 | ##' \item{cellType}{cell type} 65 | ##' \item{tissueType}{tissue Type} 66 | ##' \item{ENTREZID}{ENTREZID gene IDs} 67 | ##' \item{REFSEQ}{REFSEQ gene IDs} 68 | ##' \item{ENSEMBL}{ENSEMBL gene IDs} 69 | ##' } 70 | ##' @examples 71 | ##' data(rat) 72 | ##' head(ratcells) 73 | "ratcells" 74 | -------------------------------------------------------------------------------- /R/misc.R: -------------------------------------------------------------------------------- 1 | ##' @method as.data.frame cellResult 2 | ##' @export 3 | as.data.frame.cellResult <- function(x, ...) { 4 | as.data.frame(x@result, ...) 5 | } 6 | ##' @method row.names cellResult 7 | ##' @export 8 | row.names.cellResult <- function(x, ...) { 9 | row.names(x@result) 10 | } 11 | ##' @method names cellResult 12 | ##' @export 13 | names.cellResult <- function(x, ...) { 14 | names(x@result) 15 | } 16 | ##' @importFrom utils head 17 | ##' @method head cellResult 18 | ##' @export 19 | head.cellResult <- function(x, n=6L, ...) { 20 | head(x@result, n, ...) 21 | } 22 | ##' @importFrom utils tail 23 | ##' @method tail cellResult 24 | ##' @export 25 | tail.cellResult <- function(x, n=6L, ...) { 26 | tail(x@result, n, ...) 27 | } 28 | ##' @method dim cellResult 29 | ##' @export 30 | dim.cellResult <- function(x) { 31 | dim(x@result) 32 | } 33 | ##' @method [ cellResult 34 | ##' @export 35 | `[.cellResult` <- function(x, i, j) { 36 | x@result[i,j] 37 | } 38 | ##' @method $ cellResult 39 | ##' @export 40 | `$.cellResult` <- function(x, name) { 41 | x@result[, name] 42 | } 43 | 44 | ##' @method detail cellResult 45 | ##' @export 46 | detail.cellResult<-function(x){ 47 | as.data.frame(x@detail) 48 | } 49 | ##' @method result cellResult 50 | ##' @export 51 | result.cellResult<-function(x){ 52 | as.data.frame(x@result) 53 | } 54 | ##' get detail from with cellResult and combine with other information 55 | ##' @importFrom dplyr left_join 56 | ##' @param x cellResult object from cells function 57 | ##' @param y a data frame with gene name and other information 58 | ##' @param sep cellResult object sep 59 | ##' @author Kai Guo 60 | ##' @export 61 | getdetail<-function(x,y,sep=","){ 62 | if(!is.data.frame(y)){ 63 | y=data.frame(gene=y) 64 | } 65 | if(!("gene"%in%colnames(y))){ 66 | y$gene=rownames(y) 67 | } 68 | if(is.data.frame(x)){ 69 | sep=sep 70 | result <- x 71 | }else{ 72 | sep = x@sep 73 | result <- x@result 74 | } 75 | gene<-strsplit(as.vector(x$GeneID),split=sep) 76 | gened<-data.frame("cellType"=rep(result$cellType,times=unlist(lapply(gene,length))), 77 | "GeneID"=unlist(gene),row.names=NULL, 78 | "Pvalue"=rep(result$Pvalue,times=unlist(lapply(gene,length))), 79 | "Padj"=rep(result$Padj,times=unlist(lapply(gene,length))) 80 | ) 81 | gened$GeneID<-as.character(gened$GeneID) 82 | res<-left_join(gened,y,by=c("GeneID"="gene")) 83 | return(res) 84 | } 85 | ##' Functions to coerce cellResult to data.frame 86 | ##' @method as.data.frame cellResult 87 | ##' @export 88 | as.data.frame.cellResult <- function(x, ...) { 89 | as.data.frame(x@result, ...) 90 | } 91 | 92 | #' load the data based on the species name 93 | #' @param species species name 94 | #' @author Kai Guo 95 | .getdata <- function(species, db) { 96 | if(db == "default") { 97 | species = tolower(species) 98 | if (species == 'human') { 99 | data(human) 100 | dat <- humancells 101 | } else if (species == "mouse") { 102 | data(mouse) 103 | dat <- mousecells 104 | } else { 105 | data(rat) 106 | dat <- ratcells 107 | } 108 | } else if(db == "celltax") { 109 | if (species == 'human') { 110 | data(human_celltaxonomy) 111 | dat <- humancelltaxonomycells 112 | } else if (species == "mouse") { 113 | data(mouse_celltaxonomy) 114 | dat <- mousecelltaxonomycells 115 | } else { 116 | data(rat) 117 | dat <- ratcells 118 | } 119 | } 120 | 121 | dat 122 | } 123 | 124 | 125 | #' reverse List 126 | #' @param lhs list with names 127 | #' @export 128 | #' @author Kai Guo 129 | reverseList<-function(lhs){ 130 | lhs_n<-rep(names(lhs),times=lapply(lhs,function(x)length(x))) 131 | res<-sf(as.data.frame(cbind(lhs_n,unlist(lhs)))) 132 | return(res) 133 | } 134 | .empty_class <- function(){ 135 | new("cellResult", 136 | result = data.frame(), 137 | detail = data.frame(), 138 | species = "", 139 | pvalueCutoff = 0.05, 140 | pAdjustMethod = "BH", 141 | padjCutoff = 0.05, 142 | gene = "", 143 | keytype = "", 144 | sep = "" 145 | ) 146 | } 147 | ##' get the unique cluster markers for each cluster 148 | ##' @importFrom dplyr distinct 149 | ##' @param x cellResult object or result from cellMarker 150 | ##' @param sep character string used to separate the genes in GeneID column 151 | ##' @param .unique filter duplicate gene id or not 152 | ##' @author Kai Guo 153 | ##' @export 154 | marker <- function(x,sep=",",.unique=TRUE){ 155 | x <- as.data.frame(x) 156 | gene<-strsplit(as.vector(x$GeneID),split=sep) 157 | res <- data.frame("Cluster"= rep(x[,1],times=unlist(lapply(gene,length))), 158 | 'cellType'=rep(x$cellType,times=unlist(lapply(gene,length))), 159 | "GeneID" = unlist(gene)) 160 | if(isTRUE(.unique)){ 161 | res <- res%>%distinct(GeneID,.keep_all = T) 162 | } 163 | res 164 | } 165 | #' combine list together with name as one column 166 | .add.list<-function(x){ 167 | xname<-names(x) 168 | cluster<-rep(xname,times=unlist(lapply(x, nrow))) 169 | rr <- do.call(rbind,x) 170 | res<- cbind(cluster,rr) 171 | rownames(res)<-NULL 172 | res 173 | } 174 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onLoad <- function(libname, pkgname) { 2 | options(stringsAsFactors = FALSE) 3 | } 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rcellmarker 2 | # rcellmarker [![Project Status:](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) [![](https://img.shields.io/badge/devel%20version-0.0.15-green.svg)](https://github.com/guokai8/rcellmarker) ![Code Size:](https://img.shields.io/github/languages/code-size/guokai8/rcellmarker) 3 | ## Description 4 | _rcellmarker_ provides method to identify cell type based on single cell sequencing data. Since most methods try to annotate cell types manually after clustering the single-cell RNA-seq data. Such methods are labor-intensive and heavily rely on user expertise, which may lead to inconsistent results. Here, we present _rcellmarker_ package -- an automatic tool to annotate cell types from single-cell RNA-seq data. Now only support human, mouse and rat . 5 | ## Installation 6 | ``` 7 | library(devtools) 8 | install_github("guokai8/rcellmarker") 9 | ``` 10 | ## Quick tour 11 | ```{r} 12 | set.seed(123) 13 | library(rcellmarker) 14 | gene=sample(unique(human$SYMBOL),20) 15 | res<-cells(gene,species = "human",keytype ="SYMBOL") 16 | head(res) 17 | #if you have the result include the cluster information, padj value and avg_logFC named as "single" 18 | head(getdetail(res,single)) 19 | ## to show the markers for the cluster 20 | marker(res) 21 | ## if yuo have the results from Seurat of cellranger named as 'df' 22 | ## default only show the top 3 cell type for each cluster and you can also set the minSize which stand for minimal genes should be include in the celltype 23 | ## You can use paramater topn to change it 24 | res <- cellMarker(df,type='seurat',species='human',keytype='SYMBOL',weight=1) #weight for avg_logFC 25 | # or 26 | res <- cellMarker(df,type='cellranger',species='human',keytype='SYMBOL',weight=100) 27 | ## only do the assign for cluster 1 28 | res <- cellMarker(df,type='cellranger',species='human',keytype='SYMBOL', cluster=1, weight=100) 29 | # or you just have two columns include cluster and gene name 30 | res <- cellMarker(df,type='custom',species='human',keytype='SYMBOL') 31 | ## to show the markers for each cluster 32 | marker(res) 33 | ``` 34 | ## Note 35 | The _rcellmarker_ package use the __CellMarker__ and __PanglaoDB__ database as the reference with ID cleaning and merging. We also add information from the newest public papers related with single cell field from NCBI. The package is still under development. Other species will be supported soon. 36 | 37 | ## Contact information 38 | 39 | For any questions please contact guokai8@gmail.com 40 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-slate -------------------------------------------------------------------------------- /data/human.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guokai8/rcellmarker/43586ef9861ae9cbb7dc922273506cec5072e6b4/data/human.rda -------------------------------------------------------------------------------- /data/human_celltaxonomy.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guokai8/rcellmarker/43586ef9861ae9cbb7dc922273506cec5072e6b4/data/human_celltaxonomy.rda -------------------------------------------------------------------------------- /data/mouse.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guokai8/rcellmarker/43586ef9861ae9cbb7dc922273506cec5072e6b4/data/mouse.rda -------------------------------------------------------------------------------- /data/mouse_celltaxonomy.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guokai8/rcellmarker/43586ef9861ae9cbb7dc922273506cec5072e6b4/data/mouse_celltaxonomy.rda -------------------------------------------------------------------------------- /data/rat.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guokai8/rcellmarker/43586ef9861ae9cbb7dc922273506cec5072e6b4/data/rat.rda -------------------------------------------------------------------------------- /man/as.data.frame.cellResult.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/misc.R 3 | \name{as.data.frame.cellResult} 4 | \alias{as.data.frame.cellResult} 5 | \title{Functions to coerce cellResult to data.frame} 6 | \usage{ 7 | \method{as.data.frame}{cellResult}(x, ...) 8 | } 9 | \description{ 10 | Functions to coerce cellResult to data.frame 11 | } 12 | -------------------------------------------------------------------------------- /man/cellMarker.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cellMarker.R 3 | \name{cellMarker} 4 | \alias{cellMarker} 5 | \title{assign cell type based on cell cluster results} 6 | \usage{ 7 | cellMarker( 8 | x, 9 | type = "seurat", 10 | species = "human", 11 | keytype = "SYMBOL", 12 | weight = NULL, 13 | format = "long", 14 | cluster = NULL, 15 | tissue = NULL, 16 | topn = 3, 17 | padj = 0.05, 18 | minSize = 3, 19 | maxSize = 500, 20 | padj.method = "BH" 21 | ) 22 | } 23 | \arguments{ 24 | \item{x}{input file for marker annotation} 25 | 26 | \item{type}{source of marker genes (seurat(default), cellranger, custom)} 27 | 28 | \item{species}{species for annotation} 29 | 30 | \item{keytype}{keytype for input genes} 31 | 32 | \item{weight}{weight threshold for marker filtering} 33 | 34 | \item{format}{file format for user supplied data.frame(long: Cluster, gene; 35 | wide: gene, following cluster name as column name)} 36 | 37 | \item{cluster}{clutser number (default: NULL for annotate all clusters)} 38 | 39 | \item{tissue}{tissue for annotation (default: NULL to use all tissues)} 40 | 41 | \item{topn}{the number of cell type to list fro each cluster} 42 | 43 | \item{padj}{adjust p value threshold} 44 | 45 | \item{minSize}{minimal number of genes included in significant cell type} 46 | 47 | \item{maxSize}{maximum number of genes included in significant cell type} 48 | 49 | \item{padj.method}{pvalue adjust method(default: "BH")} 50 | } 51 | \description{ 52 | assign cell type based on cell cluster results 53 | } 54 | \author{ 55 | Kai Guo 56 | } 57 | -------------------------------------------------------------------------------- /man/cellResult-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllClasses.R 3 | \docType{class} 4 | \name{cellResult-class} 5 | \alias{cellResult-class} 6 | \alias{show,cellResult-method} 7 | \alias{plot,cellResult-method} 8 | \alias{summary,cellResult-method} 9 | \title{Class "cellResult" 10 | This class represents the result of enrichment analysis.} 11 | \description{ 12 | Class "cellResult" 13 | This class represents the result of enrichment analysis. 14 | } 15 | \section{Slots}{ 16 | 17 | \describe{ 18 | \item{\code{result}}{enrichment analysis results} 19 | 20 | \item{\code{detail}}{genes included in significant terms and original information} 21 | 22 | \item{\code{species}}{species name} 23 | 24 | \item{\code{pvalueCutoff}}{cutoff pvalue} 25 | 26 | \item{\code{pAdjustMethod}}{pvalue adjust method} 27 | 28 | \item{\code{padjCutoff}}{pvalue adjust cutoff value} 29 | 30 | \item{\code{gene}}{Gene IDs} 31 | 32 | \item{\code{keytype}}{Gene ID type} 33 | 34 | \item{\code{sep}}{character string used to separate the genes when concatenating} 35 | }} 36 | 37 | \author{ 38 | Kai Guo 39 | } 40 | \keyword{classes} 41 | -------------------------------------------------------------------------------- /man/cells.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cells.R 3 | \name{cells} 4 | \alias{cells} 5 | \title{cells function for cell type identification} 6 | \usage{ 7 | cells( 8 | x, 9 | species = "human", 10 | keytype = "SYMBOL", 11 | tissue = NULL, 12 | padj = 0.05, 13 | pvalue = NULL, 14 | minSize = 3, 15 | maxSize = 500, 16 | padj.method = "BH", 17 | sep = "," 18 | ) 19 | } 20 | \arguments{ 21 | \item{x}{vector contains gene names} 22 | 23 | \item{species}{species name} 24 | 25 | \item{keytype}{keytype for input genes} 26 | 27 | \item{tissue}{tissue type (default NULL)} 28 | 29 | \item{padj}{cutoff p adjust value} 30 | 31 | \item{pvalue}{cutoff pvalue} 32 | 33 | \item{minSize}{minimal number of genes included in significant cell type} 34 | 35 | \item{maxSize}{maximum number of genes included in significant cell type} 36 | 37 | \item{padj.method}{pvalue adjust method(default:"BH")} 38 | 39 | \item{sep}{character string used to separate the genes when concatenating} 40 | } 41 | \description{ 42 | cells function for cell type identification 43 | } 44 | \author{ 45 | Kai Guo 46 | } 47 | -------------------------------------------------------------------------------- /man/detail.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R 3 | \name{detail} 4 | \alias{detail} 5 | \title{detail generic} 6 | \usage{ 7 | detail(x) 8 | } 9 | \arguments{ 10 | \item{x}{cellResult object} 11 | } 12 | \value{ 13 | detail return detial for these significant genes 14 | } 15 | \description{ 16 | detail generic 17 | } 18 | -------------------------------------------------------------------------------- /man/dot-add.list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/misc.R 3 | \name{.add.list} 4 | \alias{.add.list} 5 | \title{combine list together with name as one column} 6 | \usage{ 7 | .add.list(x) 8 | } 9 | \description{ 10 | combine list together with name as one column 11 | } 12 | -------------------------------------------------------------------------------- /man/dot-getdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/misc.R 3 | \name{.getdata} 4 | \alias{.getdata} 5 | \title{load the data based on the species name} 6 | \usage{ 7 | .getdata(species) 8 | } 9 | \arguments{ 10 | \item{species}{species name} 11 | } 12 | \description{ 13 | load the data based on the species name 14 | } 15 | \author{ 16 | Kai Guo 17 | } 18 | -------------------------------------------------------------------------------- /man/getdetail.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/misc.R 3 | \name{getdetail} 4 | \alias{getdetail} 5 | \title{get detail from with cellResult and combine with other information} 6 | \usage{ 7 | getdetail(x, y, sep = ",") 8 | } 9 | \arguments{ 10 | \item{x}{cellResult object from cells function} 11 | 12 | \item{y}{a data frame with gene name and other information} 13 | 14 | \item{sep}{cellResult object sep} 15 | } 16 | \description{ 17 | get detail from with cellResult and combine with other information 18 | } 19 | \author{ 20 | Kai Guo 21 | } 22 | -------------------------------------------------------------------------------- /man/humancells.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{humancells} 5 | \alias{humancells} 6 | \title{human} 7 | \format{ 8 | A list of data frame with five columns individually: 9 | \describe{ 10 | \item{SYMBOL}{SYMBOL gene IDs} 11 | \item{cellType}{cell type} 12 | \item{tissueType}{tissue Type} 13 | \item{ENTREZID}{ENTREZID gene IDs} 14 | \item{REFSEQ}{REFSEQ gene IDs} 15 | \item{ENSEMBL}{ENSEMBL gene IDs} 16 | } 17 | } 18 | \usage{ 19 | humancells 20 | } 21 | \description{ 22 | human cell markers for cell type identification 23 | } 24 | \examples{ 25 | data(human) 26 | head(humancells) 27 | } 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /man/list.tissue.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cells.R 3 | \name{list.tissue} 4 | \alias{list.tissue} 5 | \title{show support tissues} 6 | \usage{ 7 | list.tissue(species = "human") 8 | } 9 | \arguments{ 10 | \item{species}{species name} 11 | } 12 | \description{ 13 | show support tissues 14 | } 15 | \author{ 16 | Kai Guo 17 | } 18 | -------------------------------------------------------------------------------- /man/marker.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/misc.R 3 | \name{marker} 4 | \alias{marker} 5 | \title{get the unique cluster markers for each cluster} 6 | \usage{ 7 | marker(x, sep = ",", .unique = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{cellResult object or result from cellMarker} 11 | 12 | \item{sep}{character string used to separate the genes in GeneID column} 13 | 14 | \item{.unique}{filter duplicate gene id or not} 15 | } 16 | \description{ 17 | get the unique cluster markers for each cluster 18 | } 19 | \author{ 20 | Kai Guo 21 | } 22 | -------------------------------------------------------------------------------- /man/mousecells.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{mousecells} 5 | \alias{mousecells} 6 | \title{mouse} 7 | \format{ 8 | A list of data frame with five columns individually: 9 | \describe{ 10 | \item{SYMBOL}{SYMBOL gene IDs} 11 | \item{cellType}{cell type} 12 | \item{tissueType}{tissue Type} 13 | \item{ENTREZID}{ENTREZID gene IDs} 14 | \item{REFSEQ}{REFSEQ gene IDs} 15 | \item{ENSEMBL}{ENSEMBL gene IDs} 16 | } 17 | } 18 | \usage{ 19 | mousecells 20 | } 21 | \description{ 22 | mouse cell markers for cell type identification 23 | } 24 | \examples{ 25 | data(mouse) 26 | head(mousecells) 27 | } 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /man/ratcells.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{ratcells} 5 | \alias{ratcells} 6 | \title{rat} 7 | \format{ 8 | A list of data frame with five columns individually: 9 | \describe{ 10 | \item{SYMBOL}{SYMBOL gene IDs} 11 | \item{cellType}{cell type} 12 | \item{tissueType}{tissue Type} 13 | \item{ENTREZID}{ENTREZID gene IDs} 14 | \item{REFSEQ}{REFSEQ gene IDs} 15 | \item{ENSEMBL}{ENSEMBL gene IDs} 16 | } 17 | } 18 | \usage{ 19 | ratcells 20 | } 21 | \description{ 22 | rat cell markers for cell type identification 23 | } 24 | \examples{ 25 | data(rat) 26 | head(ratcells) 27 | } 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /man/rcellmarker-package.Rd: -------------------------------------------------------------------------------- 1 | \name{rcellmarker-package} 2 | \alias{rcellmarker-package} 3 | \alias{rcellmarker} 4 | \docType{package} 5 | \title{ 6 | A short title line describing what the package does 7 | } 8 | \description{ 9 | A more detailed description of what the package does. A length 10 | of about one to five lines is recommended. 11 | } 12 | \details{ 13 | This section should provide a more detailed overview of how to use the 14 | package, including the most important functions. 15 | } 16 | \author{ 17 | Your Name, email optional. 18 | 19 | Maintainer: Your Name 20 | } 21 | \references{ 22 | This optional section can contain literature or other references for 23 | background information. 24 | } 25 | \keyword{ package } 26 | \seealso{ 27 | Optional links to other man pages 28 | } 29 | \examples{ 30 | \dontrun{ 31 | ## Optional simple examples of the most important functions 32 | ## These can be in \dontrun{} and \donttest{} blocks. 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /man/result.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R 3 | \name{result} 4 | \alias{result} 5 | \title{result generic} 6 | \usage{ 7 | result(x) 8 | } 9 | \arguments{ 10 | \item{x}{cellResult object} 11 | } 12 | \value{ 13 | result return dataframe and print summary 14 | } 15 | \description{ 16 | result generic 17 | } 18 | -------------------------------------------------------------------------------- /man/reverseList.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/misc.R 3 | \name{reverseList} 4 | \alias{reverseList} 5 | \title{reverse List} 6 | \usage{ 7 | reverseList(lhs) 8 | } 9 | \arguments{ 10 | \item{lhs}{list with names} 11 | } 12 | \description{ 13 | reverse List 14 | } 15 | \author{ 16 | Kai Guo 17 | } 18 | -------------------------------------------------------------------------------- /rcellmarker.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | // hyper_bench_vector 9 | NumericVector hyper_bench_vector(NumericVector& xin, NumericVector& yin, double N, double n); 10 | RcppExport SEXP _rcellmarker_hyper_bench_vector(SEXP xinSEXP, SEXP yinSEXP, SEXP NSEXP, SEXP nSEXP) { 11 | BEGIN_RCPP 12 | Rcpp::RObject rcpp_result_gen; 13 | Rcpp::RNGScope rcpp_rngScope_gen; 14 | Rcpp::traits::input_parameter< NumericVector& >::type xin(xinSEXP); 15 | Rcpp::traits::input_parameter< NumericVector& >::type yin(yinSEXP); 16 | Rcpp::traits::input_parameter< double >::type N(NSEXP); 17 | Rcpp::traits::input_parameter< double >::type n(nSEXP); 18 | rcpp_result_gen = Rcpp::wrap(hyper_bench_vector(xin, yin, N, n)); 19 | return rcpp_result_gen; 20 | END_RCPP 21 | } 22 | // name_table 23 | NumericVector name_table(List& lh); 24 | RcppExport SEXP _rcellmarker_name_table(SEXP lhSEXP) { 25 | BEGIN_RCPP 26 | Rcpp::RObject rcpp_result_gen; 27 | Rcpp::RNGScope rcpp_rngScope_gen; 28 | Rcpp::traits::input_parameter< List& >::type lh(lhSEXP); 29 | rcpp_result_gen = Rcpp::wrap(name_table(lh)); 30 | return rcpp_result_gen; 31 | END_RCPP 32 | } 33 | // fast_factor 34 | SEXP fast_factor(SEXP x); 35 | RcppExport SEXP _rcellmarker_fast_factor(SEXP xSEXP) { 36 | BEGIN_RCPP 37 | Rcpp::RObject rcpp_result_gen; 38 | Rcpp::RNGScope rcpp_rngScope_gen; 39 | Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); 40 | rcpp_result_gen = Rcpp::wrap(fast_factor(x)); 41 | return rcpp_result_gen; 42 | END_RCPP 43 | } 44 | // sf 45 | List sf(DataFrame& x); 46 | RcppExport SEXP _rcellmarker_sf(SEXP xSEXP) { 47 | BEGIN_RCPP 48 | Rcpp::RObject rcpp_result_gen; 49 | Rcpp::RNGScope rcpp_rngScope_gen; 50 | Rcpp::traits::input_parameter< DataFrame& >::type x(xSEXP); 51 | rcpp_result_gen = Rcpp::wrap(sf(x)); 52 | return rcpp_result_gen; 53 | END_RCPP 54 | } 55 | // uniq 56 | StringVector uniq(StringVector& xa); 57 | RcppExport SEXP _rcellmarker_uniq(SEXP xaSEXP) { 58 | BEGIN_RCPP 59 | Rcpp::RObject rcpp_result_gen; 60 | Rcpp::RNGScope rcpp_rngScope_gen; 61 | Rcpp::traits::input_parameter< StringVector& >::type xa(xaSEXP); 62 | rcpp_result_gen = Rcpp::wrap(uniq(xa)); 63 | return rcpp_result_gen; 64 | END_RCPP 65 | } 66 | 67 | static const R_CallMethodDef CallEntries[] = { 68 | {"_rcellmarker_hyper_bench_vector", (DL_FUNC) &_rcellmarker_hyper_bench_vector, 4}, 69 | {"_rcellmarker_name_table", (DL_FUNC) &_rcellmarker_name_table, 1}, 70 | {"_rcellmarker_fast_factor", (DL_FUNC) &_rcellmarker_fast_factor, 1}, 71 | {"_rcellmarker_sf", (DL_FUNC) &_rcellmarker_sf, 1}, 72 | {"_rcellmarker_uniq", (DL_FUNC) &_rcellmarker_uniq, 1}, 73 | {NULL, NULL, 0} 74 | }; 75 | 76 | RcppExport void R_init_rcellmarker(DllInfo *dll) { 77 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 78 | R_useDynamicSymbols(dll, FALSE); 79 | } 80 | -------------------------------------------------------------------------------- /src/hyper.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | double hyp_test( std::vector& xa) { 4 | double k=xa[0]; 5 | double M=xa[1]; 6 | double NM=xa[2]; 7 | double n=xa[3]; 8 | return R::phyper(k,M,NM,n,FALSE,FALSE); 9 | } 10 | //[[Rcpp::export]] 11 | NumericVector hyper_bench_vector(NumericVector& xin,NumericVector& yin,double N,double n){ 12 | int xsize=xin.size(); 13 | //double xt; 14 | NumericVector res; 15 | StringVector xnames=xin.names(); 16 | for(int i=0;ixres; 19 | xres.push_back(xin[tname]-1.0); 20 | xres.push_back(yin[tname]); 21 | xres.push_back(N-yin[tname]); 22 | xres.push_back(n); 23 | double pp=hyp_test(xres); 24 | res.push_back(pp); 25 | } 26 | res.attr("names")=xnames; 27 | return res; 28 | } 29 | -------------------------------------------------------------------------------- /src/name_table.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | // [[Rcpp::export]] 5 | NumericVector name_table(List& lh) { 6 | int n=lh.length(); 7 | StringVector names=lh.names(); 8 | NumericVector res(n); 9 | for(int i=0;i 2 | using namespace Rcpp; 3 | using namespace std; 4 | template 5 | SEXP fast_factor_template( const Vector& x ) { 6 | Vector levs = sort_unique(x); 7 | IntegerVector out = match(x, levs); 8 | out.attr("levels") = as(levs); 9 | out.attr("class") = "factor"; 10 | return out; 11 | } 12 | 13 | //[[Rcpp::export]] 14 | SEXP fast_factor( SEXP x ) { //modified from RcppCore 15 | switch( TYPEOF(x) ) { 16 | case INTSXP: return fast_factor_template(x); 17 | case REALSXP: return fast_factor_template(x); 18 | case STRSXP: return fast_factor_template(x); 19 | } 20 | return R_NilValue; 21 | } 22 | 23 | //[[Rcpp::export]] 24 | List sf(DataFrame &x){ 25 | StringVector Gene=x(0); 26 | StringVector Fa=x(1); 27 | StringVector tmp=unique(Fa); 28 | List res(Fa.size()); 29 | Function sp("split"); 30 | res=sp(Gene,fast_factor(Fa)); 31 | return(res); 32 | } 33 | -------------------------------------------------------------------------------- /src/unique.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | //[[Rcpp::export]] 4 | StringVector uniq(StringVector& xa){ 5 | StringVector rhs=unique(xa); 6 | return(rhs); 7 | } 8 | NumericVector uniq(NumericVector& xa){ 9 | NumericVector rhs=unique(xa); 10 | return(rhs); 11 | } 12 | --------------------------------------------------------------------------------