├── .travis.yml
├── DESCRIPTION
├── NAMESPACE
├── NEWS
├── R
    ├── buildAnntationHub.R
    ├── buildEnsembl.R
    ├── buildKEGG.R
    ├── buildNCBI.R
    ├── buildOwn.R
    ├── data.R
    ├── mergeDB.R
    ├── misc.R
    └── zzz.R
├── README.md
├── _config.yml
├── bioAnno.Rproj
├── data
    ├── ath.rda
    └── korg.rda
├── man
    ├── ath.Rd
    ├── dot-extratGO.Rd
    ├── dot-extratGene.Rd
    ├── dot-get.species.info.Rd
    ├── dot-getdbname.Rd
    ├── dot-getmartdb.Rd
    ├── dot-show.path.Rd
    ├── dot-show.tables.Rd
    ├── fromAnnHub.Rd
    ├── fromEnsembl.Rd
    ├── fromKEGG.Rd
    ├── fromNCBI.Rd
    ├── fromOwn.Rd
    ├── getTable.Rd
    ├── is_installed.Rd
    ├── korg.Rd
    ├── listSpecies.Rd
    ├── mergeDB.Rd
    └── simpleCap.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   └── test-bioAnno.R
└── vignettes
    └── bioAnno.Rmd


/.travis.yml:
--------------------------------------------------------------------------------
 1 | #----------------------------------------------------------------
 2 | # Travis-CI configuration for R packages
 3 | #
 4 | # REFERENCES:
 5 | # * Travis CI: https://docs.travis-ci.com/user/languages/r#
 6 | # YAML validated using http://www.yamllint.com/
 7 | #----------------------------------------------------------------
 8 | language: r
 9 | sudo: false
10 | cache: packages
11 | warnings_are_errors: false
12 | r_check_args: --as-cran
13 | latex: false
14 | env:
15 |   global:
16 |   # don't treat missing suggested packages as error
17 |   - _R_CHECK_FORCE_SUGGESTS_=false
18 | matrix:
19 |   include:
20 |     - dist: trusty
21 |       r_check_args: '--ignore-vignettes'
22 |       r_build_args: '--no-build-vignettes'
23 |     - os: osx
24 |       r_check_args: '--ignore-vignettes'
25 |       r_build_args: '--no-build-vignettes'
26 | 
27 | addons:
28 |   apt:
29 |     update: true
30 | 
31 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: bioAnno
 2 | Type: Package
 3 | Title: Build Annotation From Online Database
 4 | Version: 0.99.45
 5 | Authors@R: person("Kai", "Guo", email = "guokai8@gmail.com",
 6 |                   role = c("aut", "cre"))
 7 | Description: This package provide function for building annotation package 
 8 |             by using information from KEGG, NCBI, Ensembl and return OrgDb 
 9 |             object such as org.Hs.eg.db.
10 | License: GPL-2
11 | Encoding: UTF-8
12 | LazyData: true
13 | Depends: R (>= 4.0.0)
14 | Imports:
15 |       AnnotationForge,
16 |       AnnotationHub,
17 |       AnnotationDbi,
18 |       RSQLite,
19 |       KEGGREST,
20 |       biomaRt,
21 |       data.table,
22 |       dplyr,
23 |       jsonlite,
24 |       R.utils,
25 |       utils,
26 |       stats,
27 |       magrittr
28 | Suggests:
29 |     GO.db,
30 |     knitr,
31 |     testthat,
32 |     rmarkdown
33 | VignetteBuilder: knitr
34 | biocViews: Software, Annotation, Clustering, GO, KEGG,Pathways, Reactome
35 | RoxygenNote: 7.2.3
36 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(fromAnnHub)
 4 | export(fromEnsembl)
 5 | export(fromKEGG)
 6 | export(fromNCBI)
 7 | export(fromOwn)
 8 | export(getTable)
 9 | export(listSpecies)
10 | export(mergeDB)
11 | importFrom(AnnotationDbi,columns)
12 | importFrom(AnnotationDbi,keys)
13 | importFrom(AnnotationDbi,keytypes)
14 | importFrom(AnnotationDbi,select)
15 | importFrom(AnnotationForge,makeOrgPackage)
16 | importFrom(AnnotationHub,AnnotationHub)
17 | importFrom(AnnotationHub,query)
18 | importFrom(KEGGREST,keggLink)
19 | importFrom(KEGGREST,keggList)
20 | importFrom(R.utils,gunzip)
21 | importFrom(RSQLite,SQLite)
22 | importFrom(RSQLite,dbConnect)
23 | importFrom(RSQLite,dbDisconnect)
24 | importFrom(RSQLite,dbGetQuery)
25 | importFrom(RSQLite,dbListTables)
26 | importFrom(RSQLite,dbReadTable)
27 | importFrom(biomaRt,getBM)
28 | importFrom(biomaRt,listAttributes)
29 | importFrom(biomaRt,listDatasets)
30 | importFrom(biomaRt,useDataset)
31 | importFrom(biomaRt,useEnsembl)
32 | importFrom(biomaRt,useMart)
33 | importFrom(data.table,":=")
34 | importFrom(data.table,fread)
35 | importFrom(dplyr,collect)
36 | importFrom(dplyr,distinct)
37 | importFrom(dplyr,left_join)
38 | importFrom(dplyr,pull)
39 | importFrom(dplyr,select_)
40 | importFrom(httr,GET)
41 | importFrom(httr,content)
42 | importFrom(jsonlite,fromJSON)
43 | importFrom(jsonlite,toJSON)
44 | importFrom(magrittr,"%>%")
45 | importFrom(stats,na.omit)
46 | importFrom(stringr,str_trim)
47 | importFrom(utils,data)
48 | importFrom(utils,download.file)
49 | importFrom(utils,install.packages)
50 | importFrom(utils,remove.packages)
51 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
 1 | Changes in version 0.99.17 (2020-4-21)
 2 | + fixed issues and added new function
 3 | Changes in version 0.99.15 (2020-4-16)
 4 | + use suppressWarnings to shut down warnings
 5 | Changes in version 0.99.9 (2020-3-6)
 6 | + fix examples issues
 7 | Changes in version 0.99.8 (2020-3-6)
 8 | + fix examples issues
 9 | Changes in version 0.99.7 (2020-3-6)
10 | + fix examples issues
11 | Changes in version 0.99.6 (2020-3-6)
12 | + fix examples issues
13 | Changes in version 0.99.2 (2020-3-5)
14 | + fix NCBI issue
15 | Changes in version 0.99.1 (2020-3-5)
16 | + fix examples issue
17 | Changes in version 0.99.0 (2020-3-4)
18 | + Submitted to Bioconductor
19 | 


--------------------------------------------------------------------------------
/R/buildAnntationHub.R:
--------------------------------------------------------------------------------
  1 | #' @title extract annotation database by using AnnotationHub
  2 | #' @importFrom AnnotationHub AnnotationHub
  3 | #' @importFrom AnnotationHub query
  4 | #' @importFrom AnnotationDbi select
  5 | #' @importFrom AnnotationDbi keys
  6 | #' @importFrom AnnotationDbi columns
  7 | #' @importFrom AnnotationForge makeOrgPackage
  8 | #' @importFrom RSQLite dbGetQuery
  9 | #' @importFrom utils remove.packages
 10 | #' @importFrom utils install.packages
 11 | #' @importFrom stats na.omit
 12 | #' @param species species name(common name,kegg.species.code or scientifc name)
 13 | #' @param author author for the annotation package
 14 | #' @param maintainer maintainer for the annotation package
 15 | #' @param tax_id taxonomy id for the species
 16 | #' @param genus genus for the species
 17 | #' @param version version for the annotation package
 18 | #' @param install install the package or not
 19 | #' @param pkgname package name you want to choose
 20 | #' @param outputDir temporary file path
 21 | #' @param rebuild rebuild the package or not(default: FALSE)
 22 | #' @examples
 23 | #' ## build annoataion package for c elegans
 24 | #' fromAnnHub(species = "celegans", install = FALSE)
 25 | #' @author Kai Guo
 26 | #' @return annotation package
 27 | #' @export
 28 | fromAnnHub<-function(species, author = NULL,
 29 |         maintainer = NULL, tax_id = NULL, genus = NULL,
 30 |         version = NULL,install = TRUE, pkgname=NULL, outputDir = NULL, 
 31 |         rebuild = FALSE){
 32 |     dbi <- .getdbname(species)
 33 |     if(is.null(dbi)){
 34 |         dbi <- tryCatch({
 35 |                 .get.species.info(species = species);
 36 |                 dbi <- dbi["scientific.name"];
 37 |                 dbi <- paste0(unlist(strsplit(dbi, ' '))[c(1,2)], collapse = " ")},
 38 |                         error = function(e){
 39 |                                 return(NA)
 40 |                                 })
 41 |     }
 42 |     species <- gsub(' .*', '', species)
 43 |     if(!is.null(pkgname)){
 44 |       dbname <- paste0('org.', pkgname, '.eg.db')
 45 |     }else{
 46 |       dbname <- paste0('org.', species, '.eg.db')
 47 |     }
 48 |     if(isTRUE(rebuild)){
 49 |         suppressMessages(remove.packages(dbname))
 50 |     }
 51 |     if(is_installed(dbname)){
 52 |         suppressMessages(requireNamespace(dbname, quietly = TRUE))
 53 |         cat("You alreay had the annotation package: ", dbname, " \n")
 54 |     }else{
 55 |         # create the temp cache for AnnotationHub
 56 |         dir.create(paste0(tempdir(),"/AnnotationHub/"))
 57 |         ah <- AnnotationHub(cache = paste0(tempdir(),"/AnnotationHub/"), ask = FALSE)
 58 |         if(is.na(dbi)){
 59 |              dbi<-species   
 60 |         }
 61 |         ah <- query(ah, dbi)
 62 |         ahdb <- ah$title
 63 |         names(ahdb) <- ah$ah_id
 64 |         ### only need ors.xxx.eg.xxx or org.xxx.db.sqlite
 65 |         idx<-grep('^org', grep('\\.[eg|db]', ahdb, value = TRUE), value = TRUE)
 66 |          #idx <- grep(sub(' .*','',sub(' ','_',dbi)),ahdb,value=T)
 67 |         if(length(idx) > 1){
 68 |             cat("Please select which database you want to use (1,2,3,...): \n")
 69 |             for(i in seq_len(length(idx))){
 70 |                 cat(i, ":", idx[i], "\n")
 71 |             }
 72 |             idd <- readidx()
 73 |             idx <- idx[idd]
 74 |         }
 75 |     idn <- names(idx)
 76 |     res <- ah[[idn]]
 77 |     packinfo <- dbGetQuery(res$conn, "select * from metadata;")
 78 |     geneinfo <- select(res,keys = keys(res), columns = c("GENENAME"))
 79 |     geneinfo <- na.omit(geneinfo)
 80 |     colnames(geneinfo)[1] <- "GID"
 81 |     gene2entrezid <- data.frame("GID"=geneinfo$GID,"ENTREZID"= geneinfo$GID)
 82 |     gene2refseq <- select(res, keys = keys(res), columns = c("REFSEQ"))
 83 |     gene2refseq <- na.omit(gene2refseq)
 84 |     colnames(gene2refseq)[1] <- "GID"
 85 |     gene2symbol <- select(res, keys = keys(res), columns = c("SYMBOL"))
 86 |     gene2symbol <- na.omit(gene2symbol)
 87 |     colnames(gene2symbol)[1] <- "GID"
 88 |     gene2go <- select(res, keys = keys(res), columns = c("GOALL", "EVIDENCEALL"))
 89 |     gene2go <- gene2go[, c(1, 2, 3)]
 90 |     gene2go <- gene2go[!duplicated(gene2go), ]
 91 |     gene2go <- na.omit(gene2go)
 92 |     colnames(gene2go) <- c("GID", "GO", "EVIDENCE")
 93 |     pathway = FALSE
 94 |     if("PATH" %in% columns(res)){
 95 |         gene2path <- select(res, keys = keys(res), columns = c("PATH"))
 96 |         colnames(gene2path) <- c("GID", "PATH")
 97 |         gene2path <- gene2path[!duplicated(gene2path), ]
 98 |         gene2path <- na.omit(gene2path)
 99 |         pathway <- TRUE
100 |     }
101 |     gene2ensembl <- data.frame("GID"=keys(res)[1],"ENSEMBL"="")
102 |     if("ENSEMBL" %in% columns(res)){
103 |       gene2ensembl <- select(res, keys = keys(res), columns = c("ENSEMBL"))
104 |       colnames(gene2ensembl) <- c("GID", "ENSEMBL")
105 |       gene2ensembl <- gene2ensembl[!duplicated(gene2ensembl), ]
106 |       gene2ensembl <- na.omit(gene2ensembl)
107 |     }
108 |     if(is.null(version)){
109 |         version <- "0.0.1"
110 |     }
111 |     if(is.null(tax_id)){
112 |         tax_id <- packinfo[6,2]
113 |     }
114 |     if(is.null(author)){
115 |         author <- "myself"
116 |     }
117 |     if(is.null(maintainer)){
118 |         maintainer <- "myself<myself@email.com>"
119 |     }
120 |     if(is.null(genus)){
121 |         genus <- ""
122 |     }
123 |     if(is.null(species)){
124 |         species <- species
125 |     }
126 |     if(!is.null(pkgname)){
127 |       species <- pkgname
128 |     }
129 |     if(is.null(outputDir)){
130 |         outputDir <- tempdir()
131 |     }
132 |     if(isTRUE(pathway)){
133 |         package <- suppressWarnings(makeOrgPackage(
134 |             gene_info = geneinfo,
135 |             entrezid = gene2entrezid,
136 |             refseq = gene2refseq,
137 |             symbol = gene2symbol,
138 |             ensembl = gene2ensembl,
139 |             go = gene2go,
140 |             path = gene2path,
141 |             maintainer = maintainer,
142 |             author = author,
143 |             outputDir = outputDir,
144 |             tax_id = "tax_id",
145 |             genus = "",
146 |             species = species,
147 |             version = version,
148 |             verbose = FALSE,
149 |             goTable = "go"))
150 |     }else{
151 |         package <- suppressWarnings(makeOrgPackage(
152 |         gene_info = geneinfo,
153 |         refseq = gene2refseq,
154 |         symbol = gene2symbol,
155 |         ensembl = gene2ensembl,
156 |         entrezid = gene2entrezid,
157 |         go = gene2go,
158 |         maintainer = maintainer,
159 |         author = author,
160 |         outputDir = outputDir,
161 |         tax_id = "tax_id",
162 |         genus = "",
163 |         species = species,
164 |         version = version,
165 |         verbose = FALSE,
166 |         goTable = "go"))
167 |     }
168 |     if(isTRUE(install)){
169 |         install.packages(package, repos = NULL, type = "source")
170 |         unlink(package, recursive = TRUE)
171 |     }else{
172 |         .show.path(package)
173 |         .show.tables(package)
174 |         #return(package)
175 |     }
176 |     }
177 | }
178 | 


--------------------------------------------------------------------------------
/R/buildEnsembl.R:
--------------------------------------------------------------------------------
  1 | #' @title build annotation from ensembl
  2 | #' @title build annotation from ensembl
  3 | #' @importFrom biomaRt useEnsembl listAttributes
  4 | #' @importFrom biomaRt useDataset
  5 | #' @importFrom biomaRt getBM
  6 | #' @importFrom AnnotationForge makeOrgPackage
  7 | #' @importFrom utils remove.packages
  8 | #' @importFrom utils install.packages
  9 | #' @importFrom stats na.omit
 10 | #' @param host the ensemble API host,for plant you can use
 11 | #'             plants.ensembl.org and for human
 12 | #'             and other species you can use uswest.ensembl.org
 13 | #' @param species the sepcies you want to search,
 14 | #'             you can use listSpecies to get the species name
 15 | #' @param anntype the type of function annotation(GO,KEGG,PFAM,InterPro)
 16 | #'             you want get from ensemble
 17 | #' @param buildall include all prossbile annoation type listed in Ensembl
 18 | #' @param author author for the annotation package
 19 | #' @param maintainer maintainer for the annotation package
 20 | #' @param tax_id taxonomy id for the species
 21 | #' @param genus genus name for the annotation package
 22 | #' @param version version number for the annotation package
 23 | #' @param plant plant or animal species (TRUE/FALSE)
 24 | #' @param install install the package or not(default: TRUE)
 25 | #' @param pkgname package name you want to choose
 26 | #' @param rebuild rebuild the package or not(default: FALSE)
 27 | #' @param outputDir temporary output path
 28 | #' @examples
 29 | #' fromEnsembl(species = "Caenorhabditis elegans", anntype="GO")
 30 | #' @author Kai Guo
 31 | #' @return annotation package
 32 | #' @export
 33 | fromEnsembl <- function(species = "Caenorhabditis elegans",
 34 |                     host = NULL,
 35 |                     anntype = NULL, buildall = TRUE, author = NULL,
 36 |                     maintainer = NULL, tax_id = NULL, genus = NULL,
 37 |                     version = NULL, plant = FALSE,
 38 |                     install = TRUE, pkgname=NULL,outputDir = NULL, rebuild = FALSE){
 39 |     if(isTRUE(plant)){
 40 |         host = "https://plants.ensembl.org"
 41 |         mart = useEnsembl("plants_mart", host = host)
 42 |     }else{
 43 |      #   mart = useEnsembl("ensembl", mirror = host)
 44 |         mart = useEnsembl("ensembl")
 45 |         
 46 |     }
 47 |     dbinfo<-.getmartdb(species, mart)
 48 |     if(!is.null(pkgname)){
 49 |       dbname1 <- paste0('org.', pkgname, '.eg.db')
 50 |     }else{
 51 |       dbname1 <- paste0('org.', strsplit(species," ")[[1]][1], '.eg.db')
 52 |     }
 53 |     if(isTRUE(rebuild)){
 54 |         suppressMessages(remove.packages(dbname1))
 55 |     }
 56 |     if(is_installed(dbname1)){
 57 |         suppressMessages(requireNamespace(dbname1, quietly = TRUE))
 58 |         cat("You alreay had the annotation package: ", dbname1, " \n")
 59 |     }else{
 60 |     dbname <- as.character(dbinfo$dbname)
 61 |     dataset <- useDataset(dbname, mart = mart)
 62 |     attr <- listAttributes(dataset)$name
 63 |     if(is.null(anntype)){
 64 |         if(isTRUE(buildall)){
 65 |         anntype <- c("GO","KEGG","Reactome","PFAM","InterPro")
 66 |     }else{
 67 |         stop("You need to specific anntation type!\n")
 68 |     }
 69 |     }
 70 |     chr_values <- as.vector(unlist(dbinfo$chr_info$name))
 71 |     geneinfo <- getBM(attributes = c("ensembl_gene_id","description"),
 72 |                     filters ="chromosome_name", values = chr_values, dataset)
 73 |     # geneinfo<-geneinfo[nchar(geneinfo[,2])>1,]
 74 |     colnames(geneinfo) <- c("GID", "GENENAME")
 75 |     
 76 |     geneinfo <- na.omit(geneinfo)
 77 |     gene2ensembl <- data.frame("GID"=geneinfo$GID,"ENSEMBL"= geneinfo$GID)
 78 |     gene2symbol <- getBM(attributes = c("ensembl_gene_id","external_gene_name"),
 79 |             filters = "chromosome_name", values = chr_values, dataset)
 80 |     gene2symbol <- gene2symbol[nchar(gene2symbol[,2])>1,]
 81 |     colnames(gene2symbol) <- c("GID","SYMBOL")
 82 |     gene2entrezid <- getBM(attributes = c("ensembl_gene_id","entrezgene_id"),
 83 |             filters ="chromosome_name",values = chr_values, dataset)
 84 |     gene2entrezid <- gene2entrezid[nchar(gene2entrezid[,2])>1, ]
 85 |     colnames(gene2entrezid) <- c("GID","ENTREZID")
 86 |     gene2entrezid <- na.omit(gene2entrezid)
 87 |     if(("GO" %in% anntype) & sum(grepl('go_id',attr))>=1){
 88 |         gene2go <- getBM(attributes = c("ensembl_gene_id", "go_id",
 89 |                     "go_linkage_type"),
 90 |                     filters ="chromosome_name", values = chr_values, dataset)
 91 |         gene2go <- gene2go[nchar(gene2go[,2])>1, ]
 92 |         gene2go <- gene2go[nchar(gene2go[,3])>1, ]
 93 |         colnames(gene2go) <- c("GID", "GO", "EVIDENCE")
 94 |         gene2go <- na.omit(gene2go)
 95 |         gene2go <- gene2go[!duplicated(gene2go), ]
 96 |     }else{
 97 |         gene2go <- data.frame("GID" = geneinfo$GID,"GO" = "GO:0008150",
 98 |         "EVIDENCE" = "IEA")
 99 |         cat("Gene Ontology are not list in your annotation database\n")
100 |     }
101 |     if(("KEGG" %in% anntype) & (sum(grepl('kegg_enzyme',attr))>=1)){
102 |         gene2path <- getBM(attributes = c("ensembl_gene_id", "kegg_enzyme"),
103 |                     filters = "chromosome_name", values = chr_values, dataset)
104 |         gene2path[,2] <- sub('\\+.*', '', gene2path[,2])
105 |         gene2path <- gene2path[nchar(gene2path[,2])>1, ]
106 |         colnames(gene2path) <- c("GID", "PATH")
107 |         gene2path <- na.omit(gene2path)
108 |         gene2path <- gene2path[!duplicated(gene2path), ]
109 |     }else{
110 |         gene2path <- data.frame("GID" = geneinfo$GID,"PATH" = "01100")
111 |         cat("KEGG Pathway are not list in your annotation database\n")
112 |     }
113 |     if(("PFAM"%in%anntype)&(sum(grepl('pfam',attr))>=1)){
114 |         gene2pfam <- getBM(attributes = c("ensembl_gene_id","pfam"),
115 |                     filters ="chromosome_name", values = chr_values, dataset)
116 |         gene2pfam <- gene2pfam[nchar(gene2pfam[,2])>1, ]
117 |         colnames(gene2pfam) <- c("GID", "PFAM")
118 |         gene2pfam <- na.omit(gene2pfam)
119 |     }else{
120 |         gene2pfam <- data.frame("GID" = geneinfo$GID, "PFAM" = "PF00001")
121 |         cat("Protein Family are not list in your annotation database\n")
122 |     }
123 |     if(("InterPro" %in% anntype) & (sum(grepl('interpro',attr))>=1)){
124 |         gene2interpro <- getBM(attributes = c("ensembl_gene_id", "interpro"),
125 |             filters = "chromosome_name", values = chr_values, dataset)
126 |         gene2interpro <- gene2interpro[nchar(gene2interpro[, 2])>1, ]
127 |         colnames(gene2interpro) <- c("GID", "INTERPRO")
128 |         gene2interpro <- na.omit(gene2interpro)
129 |     }else{
130 |         gene2interpro <- data.frame("GID" = geneinfo$GID,
131 |             "INTREPRO" = "IPR000001")
132 |         cat("InterPro are not list in your annotation database\n")
133 |     }
134 |     if(("Reactome"%in%anntype) & (sum(grepl('reactome',attr))>=1)){
135 |         if(isTRUE(plant)){
136 |             gene2reactome <- getBM(attributes = c("ensembl_gene_id",
137 |             "plant_reactome_pathway"), filters = "chromosome_name",
138 |             values = chr_values, dataset)
139 |     }else{
140 |         gene2reactome <- getBM(attributes = c("ensembl_gene_id", "reactome"),
141 |         filters = "chromosome_name", values = chr_values, dataset)
142 |     }
143 |         gene2reactome <- gene2reactome[nchar(gene2reactome[,2])>1,]
144 |         colnames(gene2reactome) <- c("GID","REACTOME")
145 |         gene2reactome <- na.omit(gene2reactome)
146 |     }else{
147 |         gene2reactome <- data.frame("GID" = geneinfo$GID,
148 |         "REACTOME" = "RSA0000000")
149 |         cat("Reactome Pathway are not list in your annotation database\n")
150 |     }
151 |     if(is.null(author)){
152 |         author <-"myself"
153 |     }
154 |     if(is.null(maintainer)){
155 |         maintainer <- "mysel<myself@gmail.com>"
156 |     }
157 |     if(is.null(tax_id)){
158 |         tax_id <- "123"
159 |     }
160 |     if(is.null(version)){
161 |         version <- "0.0.1"
162 |     }
163 |     if(is.null(genus)){
164 |         genus <- ""
165 |     }
166 |     if(is.null(outputDir)){
167 |         outputDir <- tempdir()
168 |     }
169 |     species <- gsub(' .*', '', species)
170 |     if(!is.null(pkgname)){
171 |       species <- pkgname
172 |     }
173 |     package <- suppressWarnings(makeOrgPackage(gene_info = geneinfo,
174 |         symbol = gene2symbol,
175 |         entrezid = gene2entrezid,
176 |         ensembl = gene2ensembl,
177 |         go = gene2go,
178 |         path = gene2path,
179 |         pfam = gene2pfam,
180 |         interpro = gene2interpro,
181 |         reactome = gene2reactome,
182 |         version = version,
183 |         maintainer = maintainer,
184 |         author = author,
185 |         outputDir = outputDir,
186 |         tax_id = tax_id,
187 |         genus = genus,
188 |         species = species,
189 |         verbose = FALSE,
190 |         goTable = "go"
191 |     ))
192 |     if(isTRUE(install)){
193 |         install.packages(package, repos = NULL, type = "source")
194 |         unlink(package, recursive = TRUE)
195 |     }else{
196 |         .show.path(package)
197 |         .show.tables(package)
198 |         #return(package)
199 |     }
200 |     }
201 | }
202 | ##' @title list species available in Ensembl
203 | ##' @importFrom  biomaRt useMart
204 | ##' @importFrom  biomaRt listDatasets
205 | ##' @param host Ensembl host site
206 | ##' @param plant use plant database or not (default: FALSE)
207 | ##' @examples
208 | ##' listSpecies()
209 | ##' @author Kai Guo
210 | ##' @return data.frame with species information
211 | ##' @export
212 | listSpecies <- function(host = "www", plant = FALSE){
213 |     cat("You could choose different host to get high speed!\n")
214 |     if(isTRUE(plant)){
215 |         host <- "https://plants.ensembl.org"
216 |         mart <- useEnsembl("plants_mart", host = host)
217 |     }else{
218 |         cat("host: 'www', 'uswest', 'useast', 'asia'\n" )
219 |       #  mart <- useEnsembl("ENSEMBL_MART_ENSEMBL", mirror = host)
220 |         mart <- useEnsembl("ensembl")
221 |         
222 |     }
223 |     res <- tryCatch(
224 |         expr = { 
225 |             listDatasets(mart)
226 |         },
227 |         error = function(e){
228 |             NULL
229 |         },
230 |         warning = function(w){
231 |             NULL
232 |         })
233 |     if(is.data.frame(res)) colnames(res)[2] <- "species"
234 |     res
235 | }
236 | 
237 | 


--------------------------------------------------------------------------------
/R/buildKEGG.R:
--------------------------------------------------------------------------------
  1 | #' @title make annotation database from KEGG and GO from NCBI
  2 | #' @importFrom KEGGREST keggLink
  3 | #' @importFrom KEGGREST keggList
  4 | #' @importFrom AnnotationForge makeOrgPackage
  5 | #' @importFrom utils remove.packages
  6 | #' @importFrom utils install.packages
  7 | #' @importFrom stats na.omit
  8 | #' @importFrom utils data
  9 | #' @param species species name(common name,kegg.species.code or scientifc name)
 10 | #' @param anntype the type of function annotation(GO,KEGG,)
 11 | #'             you want get from KEGG
 12 | #' @param author author for the annotation package
 13 | #' @param maintainer maintainer for the annotation package
 14 | #' @param tax_id taxonomy id for the species
 15 | #' @param genus genus for the species
 16 | #' @param version version for the annotation package
 17 | #' @param pkgname package name you want to choose
 18 | #' @param install install the package or not(default: TRUE)
 19 | #' @param rebuild rebuild the package or not(default: FALSE)
 20 | #' @param outputDir temporary output path
 21 | #' @examples
 22 | #' fromKEGG(species = "eco", install = FALSE)
 23 | #' @author Kai Guo
 24 | #' @return annotation package
 25 | #' @export
 26 | #'
 27 | fromKEGG <- function(species="ath", anntype=c("KEGG"), author=NULL,
 28 |                 maintainer=NULL,tax_id=NULL,genus=NULL,version=NULL,pkgname=NULL,
 29 |                 install=TRUE,outputDir=NULL,rebuild=FALSE){
 30 |     cat("#########################################################################\n")
 31 |     cat("The bioAnno package downloads and uses KEGG data.Non-academic uses may
 32 | require a KEGG license agreement (details at http://www.kegg.jp/kegg/legal.html)\n")
 33 |     cat("The Gene Ontology are downloaded from NCBI if you choose GO.\n")
 34 |     cat("#########################################################################\n")
 35 |     dbinfo <- .get.species.info(species)
 36 |     species <- dbinfo["kegg.code"]
 37 |     if(!is.null(pkgname)){
 38 |       dbname <- paste0('org.', pkgname, '.eg.db')
 39 |     }else{
 40 |       dbname <- paste0('org.', species, '.eg.db')
 41 |     }
 42 |     if(isTRUE(rebuild)){
 43 |         suppressMessages(remove.packages(dbname))
 44 |     }
 45 |     if(is_installed(dbname)){
 46 |         suppressMessages(requireNamespace(dbname,quietly = TRUE))
 47 |         cat("You alreay had the annotation package: ", dbname, " \n")
 48 |     }else{
 49 | #  if (require(dbname,character.only=TRUE) & !isTRUE(rebuild)){
 50 | #    suppressMessages(require(dbname,character.only = T,quietly = T))
 51 | #  }else{
 52 |     geneinfo <- data.frame()
 53 |     gene2path <- data.frame()
 54 |     gene2ko <- data.frame()
 55 |     gene2go <- data.frame()
 56 |     tmp <- keggList(species)
 57 | #    geneinfo <- data.frame("GID" = sub(paste0(species,":"), '',
 58 | #        names(tmp)), "GENENAME" = sub(paste0(species,":"), '',
 59 | #                                      names(tmp)))
 60 |     geneinfo <- data.frame("GID" = sub(paste0(species,":"), '',
 61 |                                        names(tmp)), "GENENAME" = "")
 62 |     rownames(geneinfo) <- NULL
 63 |     gene2entrezid <- data.frame("GID" = geneinfo$GID, "ENTREZID" = geneinfo$GID )
 64 |     if("GO" %in% anntype){
 65 |       gene2go <- .extratGO(taxid = dbinfo["tax.id"])
 66 |       gene2go <- gene2go[!duplicated(gene2go), ]
 67 |     }
 68 |     if(nrow(gene2go) == 0){
 69 |         cat("No Gene Ontology information available !\n")
 70 |         gene2go <- data.frame("GID" = geneinfo$GID,
 71 |         "GO" = "GO:0008150", "EVIDENCE" = "IEA")
 72 |     }
 73 |     if(species=="ath"){
 74 |         if(!exists("ath")) data(ath)
 75 |         gene2go$GID <- ath[gene2go$GID, 1]
 76 |         gene2go <- na.omit(gene2go)
 77 |     }
 78 |     tmp <- keggLink('pathway', species)
 79 |     gene2path <- data.frame("GID" = sub(paste0(species,":"), '', names(tmp)),
 80 |         "PATH"= sub(species,'',sub('path:','',tmp)))
 81 |     tmp <- keggLink('ko', species)
 82 |     gene2ko <- data.frame("GID" = sub(paste0(species, ":"), '',
 83 |         names(tmp)), "KO" = sub('ko:', '', tmp))
 84 | 
 85 |     if(is.null(version)){
 86 |         version <- "0.0.1"
 87 |     }
 88 |     if(is.null(tax_id)){
 89 |         tax_id <- dbinfo["tax.id"]
 90 |     }
 91 |     if(is.null(author)){
 92 |         author <- "myself"
 93 |     }
 94 |     if(is.null(maintainer)){
 95 |         maintainer <- "myself<myself@email.com>"
 96 |     }
 97 |     if(is.null(genus)){
 98 |         genus <- dbinfo["scientific.name"]
 99 |     }
100 |     if(is.null(species)){
101 |         species <- species
102 |     }
103 |     if(!is.null(pkgname)){
104 |       species <- pkgname
105 |     }
106 |     if(is.null(outputDir)){
107 |         outputDir <- tempdir()
108 |     }
109 |     package <- suppressWarnings(makeOrgPackage(
110 |     gene_info = geneinfo,
111 |     path = gene2path,
112 |     entrezid = gene2entrezid,
113 |     ko = gene2ko,
114 |     go = gene2go,
115 |     maintainer = maintainer,
116 |     author = author,
117 |     outputDir = outputDir,
118 |     tax_id = tax_id,
119 |     genus = "",
120 |     species = species,
121 |     version = version,
122 |     verbose = FALSE,
123 |     goTable = "go"
124 |     ))
125 |     tmp <- NULL
126 |     if(isTRUE(install)){
127 |         install.packages(package, repos = NULL, type="source")
128 |         unlink(package, recursive = TRUE)
129 |     }else{
130 |         .show.path(package)
131 |         .show.tables(package)
132 |         return(package)
133 |     }
134 |     }
135 | }
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/R/buildNCBI.R:
--------------------------------------------------------------------------------
  1 | #' @title build annotation database from NCBI
  2 | #' @importFrom AnnotationForge makeOrgPackage
  3 | #' @importFrom utils remove.packages
  4 | #' @param species species name
  5 | #' @param author author for the annotation package
  6 | #' @param maintainer maintainer for the annotation package
  7 | #' @param tax_id taxonomy id for the species
  8 | #' @param genus genus for the species
  9 | #' @param version version for the annotation package
 10 | #' @param install install the package or not(default: TRUE)
 11 | #' @param pkgname package name you want to choose
 12 | #' @param rebuild rebuild the package or not(default: FALSE)
 13 | #' @param outputDir temporary output path
 14 | #' @examples
 15 | #' \donttest{
 16 | #' ## build annoataion package for Ecoli
 17 | #' fromNCBI(species = "eco", install = FALSE)
 18 | #' }
 19 | #' @author Kai Guo
 20 | #' @return annotation package
 21 | #' @export
 22 | fromNCBI <- function(species = "ath", author = NULL,
 23 |         maintainer = NULL, tax_id = NULL, genus=NULL, version = NULL,
 24 |         install = TRUE, pkgname = NULL, outputDir=NULL, rebuild = FALSE){
 25 |     dbinfo <- .get.species.info(species=species)
 26 |     species <- dbinfo["kegg.code"]
 27 |     dbname <- paste0('org.',species,'.eg.db')
 28 |     if(isTRUE(rebuild)){
 29 |         suppressMessages(remove.packages(dbname))
 30 |     }
 31 |     if(is_installed(dbname)){
 32 |         suppressMessages(requireNamespace(dbname,quietly = TRUE))
 33 |         cat("You alreay had the annotation package: ",dbname," \n")
 34 |     }else{
 35 |     #  if (require(dbname,character.only=TRUE)){
 36 |     #    suppressMessages(require(dbname,character.only = T,quietly = T))
 37 |     #  }else{
 38 |         geneinfo <- .extratGene(taxid = dbinfo['tax.id'])
 39 |         gene2entrezid <- data.frame("GID"=geneinfo$GID,"ENTREZID"= geneinfo$GID)
 40 |         gene2symbol<-geneinfo[,c("GID","SYMBOL")]
 41 |         gene2symbol[!duplicated(gene2symbol),]
 42 |         geneinfo <- geneinfo[,c("GID","DESCRIPTION")]
 43 |         geneinfo <- geneinfo[!duplicated(geneinfo),]
 44 |         gene2go <- .extratGO(taxid = dbinfo['tax.id'])
 45 |     if(nrow(gene2go)==0){
 46 |         cat("No Gene Ontology information available !\n")
 47 |         gene2go <- data.frame("GID" = geneinfo$GID, "GO" = "GO:0008150", 
 48 |                     "EVIDENCE" = "IEA")
 49 |     }
 50 |     if(is.null(version)){
 51 |         version <- "0.0.1"
 52 |     }
 53 |     if(is.null(tax_id)){
 54 |         tax_id <- dbinfo["tax.id"]
 55 |     }
 56 |     if(is.null(author)){
 57 |         author <- "myself"
 58 |     }
 59 |     if(is.null(maintainer)){
 60 |         maintainer <- "myself<myself@email.com>"
 61 |     }
 62 |     if(is.null(genus)){
 63 |         genus <- dbinfo["scientific.name"]
 64 |     }
 65 |     if(is.null(species)){
 66 |         species <- species
 67 |     }
 68 |     if(!is.null(pkgname)){
 69 |         species <- pkgname
 70 |     }
 71 |     if(is.null(outputDir)){
 72 |         outputDir <- tempdir()
 73 |     }
 74 |     package <- suppressWarnings(makeOrgPackage(
 75 |     gene_info = geneinfo,
 76 |     gene2symbol = gene2symbol,
 77 |     entrezid = gene2entrezid,
 78 |     go = gene2go,
 79 |     maintainer = maintainer,
 80 |     author = author,
 81 |     outputDir = outputDir,
 82 |     tax_id = tax_id,
 83 |     genus = "",
 84 |     species = species,
 85 |     version = version,
 86 |     verbose = FALSE,
 87 |     goTable = "go"
 88 |     ))
 89 |     tmp <- NULL
 90 |     if(isTRUE(install)){
 91 |         install.packages(package, repos = NULL, type = "source")
 92 |         unlink(package,recursive = TRUE)
 93 |     }else{
 94 |         .show.path(package)
 95 |         .show.tables(package)
 96 |         #return(package)
 97 |     }
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/R/buildOwn.R:
--------------------------------------------------------------------------------
  1 | #' build Own annotation database with user defined annotation file
  2 | #' @importFrom AnnotationForge makeOrgPackage
  3 | #' @importFrom utils install.packages
  4 | #' @importFrom stats na.omit
  5 | #' @importFrom stringr str_trim
  6 | #' @param geneinfo gene information table with two columns
  7 | #'                as default("GID","DESCRIPTION")
  8 | #' @param keytype key type for building the annotation db
  9 | #' @param gene2go Gene Onotoly information for  genes
 10 | #' @param gene2path KEGG Pathway information for genes
 11 | #' @param gene2symbol SYMBOL information for genes
 12 | #' @param gene2refseq REFSEQ or KO information for genes
 13 | #' @param gene2ensembl ENSEMBL or KO information for genes
 14 | #' @param gene2pfam PFAM information for genes
 15 | #' @param gene2reactome REACTOME Pathway or KO information for genes
 16 | #' @param gene2ko KO information for genes
 17 | #' @param gene2interpro INTERPRO information for genes
 18 | #' @param gene2entrezid ENTREZID information for genes
 19 | #' @param gene2biocyc BIOCYC information for genes
 20 | #' @param gene2kd KEGG DISEASE information for genes
 21 | #' @param gene2gad GAD information for genes
 22 | #' @param gene2fundo FunDO information for genes
 23 | #' @param author author for the annotation package
 24 | #' @param maintainer maintainer for the annotation package
 25 | #' @param tax_id taxonomy id for the species
 26 | #' @param genus genus for the species
 27 | #' @param version version for the annotation package
 28 | #' @param species species name(common name,kegg.species.code or scientifc name)
 29 | #' @param install install the package or not(default: TRUE)
 30 | #' @param pkgname package name you want to choose
 31 | #' @param outputDir temporary output path
 32 | #' @export
 33 | #' @examples
 34 | #' ## build your own annotation for Arabidopsis thaliana
 35 | #' data(ath)
 36 | #' fromOwn(geneinfo = ath, install = FALSE)
 37 | #' @return annotation package
 38 | #' @author Kai Guo
 39 | fromOwn <- function(geneinfo = geneinfo, keytype = NULL, gene2go = NULL, gene2path = NULL, 
 40 |                     gene2symbol = NULL, gene2refseq = NULL,  gene2ensembl = NULL,
 41 |                     gene2pfam = NULL, gene2reactome= NULL, gene2ko = NULL,
 42 |                     gene2interpro = NULL, gene2entrezid= NULL, gene2biocyc = NULL,
 43 |                     gene2kd = NULL,gene2fundo =NULL, gene2gad = NULL,
 44 |         version = NULL, maintainer = NULL, author = NULL, outputDir = NULL,
 45 |         tax_id = NULL, genus = NULL, species = NULL, install = TRUE, pkgname=NULL,rebuild=FALSE){
 46 | 
 47 |     cat("Please make sure you have Gene Ontology and KEGG pathway
 48 |         or KO data.frame ready.\n")
 49 |   dbname1 <- paste0('org.', pkgname, '.eg.db')
 50 |   if(isTRUE(rebuild)){
 51 |     suppressMessages(remove.packages(dbname1))
 52 |   }
 53 |     if(is.null(geneinfo)){
 54 |         stop("You must have Gene information table")
 55 |     }
 56 |     colnames(geneinfo)[1] <- "GID"
 57 |     #1
 58 |     geneinfo <- geneinfo[!duplicated(geneinfo), ]
 59 |     geneinfo <- na.omit(geneinfo)
 60 |     colnames(geneinfo)<-c('GID','GENENAME')
 61 |     geneinfo$GID<-str_trim(geneinfo$GID,side = "both")
 62 |     geneinfo$GENENAME<-str_trim(geneinfo$GENENAME,side = "both")
 63 |     #2
 64 |     if(!is.null(gene2go)){
 65 |         if(ncol(gene2go) == 2){
 66 |         gene2go$EVIDENCE <- "IEA"
 67 |         colnames(gene2go)[c(1,2)] <- c("GID", "GO")
 68 |         }else{
 69 |         colnames(gene2go) <- c("GID", "GO", "EVIDENCE")
 70 |         }
 71 |         gene2go <- gene2go[!duplicated(gene2go), ]
 72 |         gene2go <- na.omit(gene2go)
 73 |     }else{
 74 |          gene2go <- data.frame("GID" = geneinfo$GID,
 75 |         "GO" = "GO:0008150", "EVIDENCE" = "IEA")
 76 |     }
 77 |     #3
 78 |     if(!is.null(gene2path)){
 79 |         if(ncol(gene2path) != 2){
 80 |         stop("Dataframe must have only two columns")
 81 |         }
 82 |         colnames(gene2path) <- c("GID", "PATH")
 83 |         gene2path <- gene2path[!duplicated(gene2path), ]
 84 |         gene2path <- na.omit(gene2path)
 85 |     }else{
 86 |         gene2path <- data.frame("GID" = geneinfo$GID,
 87 |                         "PATH" = "")
 88 |     }
 89 |     #4
 90 |     if(!is.null(gene2symbol)){
 91 |       if(ncol(gene2symbol) != 2){
 92 |         stop("Dataframe must have only two columns")
 93 |       }
 94 |       colnames(gene2symbol) <- c("GID", "SYMBOL")
 95 |       gene2symbol <- gene2symbol[!duplicated(gene2symbol), ]
 96 |       gene2symbol <- na.omit(gene2symbol)
 97 |     }else if(keytype=="SYMBOL"){
 98 |       gene2symbol <- data.frame("GID" = geneinfo$GID,
 99 |                                 "SYMBOL" = geneinfo$GID)
100 |     }else{
101 |       gene2symbol <- data.frame("GID" = geneinfo$GID[1],
102 |                               "SYMBOL" = "")
103 |     }
104 |     #5
105 |     if(!is.null(gene2ensembl)){
106 |       if(ncol(gene2ensembl) != 2){
107 |         stop("Dataframe must have only two columns")
108 |       }
109 |       colnames(gene2ensembl) <- c("GID", "ENSEMBL")
110 |       gene2ensembl <- gene2ensembl[!duplicated(gene2ensembl), ]
111 |       gene2ensembl <- na.omit(gene2ensembl)
112 |     }else if(keytype == "ENSEMBL"){
113 |       gene2ensembl <- data.frame("GID" = geneinfo$GID,
114 |                                  "ENSEMBL" = geneinfo$GID)
115 |     }else{
116 |       gene2ensembl <- data.frame("GID" = geneinfo$GID[1],
117 |                                 "ENSEMBL" = "")
118 |     }
119 |     #6
120 |     if(!is.null(gene2refseq)){
121 |       if(ncol(gene2refseq) != 2){
122 |         stop("Dataframe must have only two columns")
123 |       }
124 |       colnames(gene2refseq) <- c("GID", "REFSEQ")
125 |       gene2refseq <- gene2refseq[!duplicated(gene2refseq), ]
126 |       gene2refseq <- na.omit(gene2refseq)
127 |     }else if(keytype == "REFSEQ"){
128 |       gene2refseq <- data.frame("GID" = geneinfo$GID,
129 |                                  "REFSEQ" = geneinfo$GID)
130 |     }else{
131 |       gene2refseq <- data.frame("GID" = geneinfo$GID,
132 |                                 "REFSEQ" = "")
133 |     }
134 |     #7
135 |     if(!is.null(gene2pfam)){
136 |       if(ncol(gene2pfam) != 2){
137 |         stop("Dataframe must have only two columns")
138 |       }
139 |       colnames(gene2pfam) <- c("GID", "PFAM")
140 |       gene2pfam <- gene2pfam[!duplicated(gene2pfam), ]
141 |       gene2pfam <- na.omit(gene2pfam)
142 |     }else{
143 |       gene2pfam <- data.frame("GID" = geneinfo$GID[1],
144 |                                 "PFAM" = "")
145 |     }
146 |     #8
147 |     if(!is.null(gene2interpro)){
148 |       if(ncol(gene2interpro) != 2){
149 |         stop("Dataframe must have only two columns")
150 |       }
151 |       colnames(gene2interpro) <- c("GID", "INTERPRO")
152 |       gene2interpro <- gene2interpro[!duplicated(gene2interpro), ]
153 |       gene2interpro <- na.omit(gene2interpro)
154 |     }else{
155 |       gene2interpro <- data.frame("GID" = geneinfo$GID[1],
156 |                               "INTERPRO" = "")
157 |     }
158 |     #9
159 |     if(!is.null(gene2reactome)){
160 |       if(ncol(gene2reactome) != 2){
161 |         stop("Dataframe must have only two columns")
162 |       }
163 |       colnames(gene2reactome) <- c("GID", "REACTOME")
164 |       gene2reactome <- gene2reactome[!duplicated(gene2reactome), ]
165 |       gene2reactome <- na.omit(gene2reactome)
166 |     }else{
167 |       gene2reactome <- data.frame("GID" = geneinfo$GID[1],
168 |                                   "REACTOME" = "")
169 |     }
170 |     #10
171 |     if(!is.null(gene2ko)){
172 |       if(ncol(gene2ko) != 2){
173 |         stop("Dataframe must have only two columns")
174 |       }
175 |       colnames(gene2ko) <- c("GID", "KO")
176 |       gene2ko <- gene2ko[!duplicated(gene2ko), ]
177 |       gene2ko <- na.omit(gene2ko)
178 |     }else{
179 |       gene2ko <- data.frame("GID" = geneinfo$GID[1],
180 |                                   "KO" = "")
181 |     }
182 |     #11
183 |     if(!is.null(gene2entrezid)){
184 |       if(ncol(gene2entrezid) != 2){
185 |         stop("Dataframe must have only two columns")
186 |       }
187 |       colnames(gene2entrezid) <- c("GID", "ENTREZID")
188 |       gene2entrezid <- gene2entrezid[!duplicated(gene2entrezid), ]
189 |       gene2entrezid <- na.omit(gene2entrezid)
190 |     }else if(keytype == "ENTREZID"){
191 |       
192 |       gene2entrezid <- data.frame("GID" = geneinfo$GID,
193 |                                   "ENTREZID" = geneinfo$GID)
194 |     }else{
195 |       gene2entrezid <- data.frame("GID" = geneinfo$GID[1],
196 |                             "ENTREZID" = "")
197 |     }
198 |     #12
199 |     if(!is.null(gene2biocyc)){
200 |       if(ncol(gene2biocyc) != 2){
201 |         stop("Dataframe must have only two columns")
202 |       }
203 |       colnames(gene2biocyc) <- c("GID", "BIOCYC")
204 |       gene2biocyc <- gene2biocyc[!duplicated(gene2biocyc), ]
205 |       gene2biocyc <- na.omit(gene2biocyc)
206 |     }else{
207 |       gene2biocyc <- data.frame("GID" = geneinfo$GID[1],
208 |                                 "BIOCYC" = "")
209 |     }
210 |     #13
211 |     if(!is.null(gene2kd)){
212 |       if(ncol(gene2kd) != 2){
213 |         stop("Dataframe must have only two columns")
214 |       }
215 |       colnames(gene2kd) <- c("GID", "KEGGDISEASE")
216 |       gene2kd <- gene2kd[!duplicated(gene2kd), ]
217 |       gene2kd <- na.omit(gene2kd)
218 |     }else{
219 |       gene2kd <- data.frame("GID" = geneinfo$GID[1],
220 |                             "KEGGDISEASE" = "")
221 |     }
222 |     #14
223 |     if(!is.null(gene2gad)){
224 |       if(ncol(gene2gad) != 2){
225 |         stop("Dataframe must have only two columns")
226 |       }
227 |       colnames(gene2gad) <- c("GID", "GAD")
228 |       gene2gad <- gene2gad[!duplicated(gene2gad), ]
229 |       gene2gad <- na.omit(gene2gad)
230 |     }else{
231 |       gene2gad <- data.frame("GID" = geneinfo$GID[1],
232 |                              "GAD" = "")
233 |     }
234 |     ##
235 |     #15
236 |     if(!is.null(gene2fundo)){
237 |       if(ncol(gene2fundo) != 2){
238 |         stop("Dataframe must have only two columns")
239 |       }
240 |       colnames(gene2fundo) <- c("GID", "FUNDO")
241 |       gene2fundo <- gene2fundo[!duplicated(gene2fundo), ]
242 |       gene2fundo <- na.omit(gene2fundo)
243 |     }else{
244 |       gene2fundo <- data.frame("GID" = geneinfo$GID[1],
245 |                                "FUNDO" = "")
246 |     }
247 |     ##
248 |     if(is.null(version)){
249 |         version <- "0.0.1"
250 |     }
251 |     if(is.null(tax_id)){
252 |         tax_id <- "xxx"
253 |     }
254 |     if(is.null(author)){
255 |         author <- "myself"
256 |     }
257 |     if(is.null(maintainer)){
258 |         maintainer <- "myself<myself@email.com>"
259 |     }
260 |     if(is.null(genus)){
261 |         genus <- ""
262 |     }
263 |     if(is.null(species)){
264 |         species <- "species"
265 |     }
266 |     if(!is.null(pkgname)){
267 |       species <- pkgname
268 |     }
269 |     if(is.null(outputDir)){
270 |         outputDir <- tempdir()
271 |     }
272 |     package <- suppressWarnings(makeOrgPackage(
273 |     gene_info = geneinfo,
274 |     symbol = gene2symbol,
275 |     refseq = gene2refseq,
276 |     entrezid = gene2entrezid,
277 |     go = gene2go,
278 |     path = gene2path,
279 |     ko = gene2ko,
280 |     pfam = gene2pfam,
281 |     interpro = gene2interpro,
282 |     reactome = gene2reactome,
283 |     ensembl = gene2ensembl,
284 |     biocyc = gene2biocyc,
285 |     disease = gene2kd,
286 |     gad = gene2gad,
287 |     fundo = gene2fundo,
288 |     version = version,
289 |     maintainer = maintainer,
290 |     author = author,
291 |     outputDir = outputDir,
292 |     tax_id = tax_id,
293 |     genus = genus,
294 |     species = species,
295 |     goTable = "go"
296 |     ))
297 |     if(isTRUE(install)){
298 |         install.packages(package, repos = NULL, type = "source")
299 |         unlink(package, recursive = TRUE)
300 |     }else{
301 |         .show.path(package)
302 |         .show.tables(package)
303 |         #return(package)
304 |     }
305 | }
306 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | ##' @title korg
 2 | ##' @name korg
 3 | ##' @description korg include species information from KEGG database.
 4 | ##'              korg data was modified from
 5 | ##'              (https://pathview.uncc.edu/data/korg.tsv)
 6 | ##' @format A matrix with five columns:
 7 | ##' \describe{
 8 | ##' \item{ktax.id}{the KEGG taxonomy ID}
 9 | ##' \item{tax.id}{the NCBI taxonomy ID}
10 | ##' \item{kegg.code}{the KEGG species code}
11 | ##' \item{scientific.name}{Scientific name of species}
12 | ##' \item{common.name}{common name of species}
13 | ##' }
14 | ##' @examples
15 | ##' head(korg)
16 | ##'
17 | "korg"
18 | 
19 | ##' @title TAIR10 geneid to ENTREZID
20 | ##' @name ath
21 | ##' @description The 'ath' dataset include the annotation information collected
22 | ##' form the TAIR10 database(htps://arabidopsis.org/download/index-auto.jsp
23 | ##' %3Fdir%3D%252Fdownload_files%252FGenes%252FTAIR10_genome_release). 
24 | ##' @format A data.frame with two columns: 
25 | ##' \describe{
26 | ##' \item{GID}{The arabidopsis GENE ID}
27 | ##' \item{ENTREZID}{NCBI ENTREZID ID for the arabidopsis}
28 | ##' }
29 | ##' @examples
30 | ##' head(ath)
31 | ##'
32 | "ath"
33 | 


--------------------------------------------------------------------------------
/R/mergeDB.R:
--------------------------------------------------------------------------------
  1 | #' merge two orgDB with keys
  2 | #' @importFrom dplyr distinct
  3 | #' @importFrom AnnotationDbi keytypes keys 
  4 | #' @importFrom RSQLite dbGetQuery dbListTables
  5 | #' @importFrom AnnotationForge makeOrgPackage
  6 | #' @importFrom utils remove.packages
  7 | #' @importFrom utils install.packages
  8 | #' @importFrom stats na.omit
  9 | #' @importFrom utils remove.packages
 10 | #' @importFrom stringr str_trim
 11 | #' @param dbleft a charater indicate the left orgDB
 12 | #' @param dbright a character indicate the right orgDB
 13 | #' @param keyleft the keytype use for merging in left orgDB
 14 | #' @param keyright the keytype use for merging in the right orgDB 
 15 | #' @param keytype the keytypes to be included in the merged orgDB ("GID","GENENAME")
 16 | #' @param keep the name of keytype you used if keyleft and keyright were not same
 17 | #' @param species the species name
 18 | #' @param author author for the annotation package
 19 | #' @param maintainer maintainer for the annotation package
 20 | #' @param tax_id taxonomy id for the species
 21 | #' @param genus genus name for the annotation package
 22 | #' @param version version number for the annotation package
 23 | #' @param pkgname package name you want to choose
 24 | #' @param install install the package or not(default: TRUE)
 25 | #' @param rebuild rebuild the package or not(default: FALSE)
 26 | #' @param outputDir temporary output path
 27 | #' @examples
 28 | #' fromKEGG(species = "hsa", anntype="KEGG")
 29 | #' fromAnnHub(species="human")
 30 | #' mergeDB("org.hsa.eg.db","org.human.eg.db",species="merge")
 31 | #' @export
 32 | #' @author Kai Guo
 33 | mergeDB<-function(dbleft,dbright,keyleft="GID",keyright="GID",keytype=NULL,keep = NULL,
 34 |                   species=NULL,author = NULL, 
 35 |                   maintainer = NULL, tax_id = NULL, genus = NULL,
 36 |                   version = NULL, pkgname=NULL,
 37 |                   install = TRUE, outputDir = NULL, rebuild = FALSE){
 38 |   ### extract keytype left
 39 |   if(!is.null(pkgname)){
 40 |     dbname <- paste0('org.', pkgname, '.eg.db')
 41 |   }else{
 42 |     dbname <- paste0('org.', species, '.eg.db')
 43 |   }
 44 |   if(isTRUE(rebuild)){
 45 |     suppressMessages(remove.packages(dbname))
 46 |   }
 47 |   if(is.null(keytype)){
 48 |     keytype=c("GID","GENENAME","SYMBOL")
 49 |   }
 50 |   #############################################
 51 |   #############################################
 52 |   keyleftl <- tolower(keyleft)
 53 |   keyrightl <- tolower(keyright)
 54 |   dbleft_name <- eval(parse(text=paste0(sub('\\.db','_dbconn()',dbleft))))
 55 |   dbright_name <- eval(parse(text=paste0(sub('\\.db','_dbconn()',dbright))))
 56 |   dblall<-dbListTables(dbleft_name)
 57 |   dbrall<-dbListTables(dbright_name)
 58 |   ##
 59 |   if(!keyleftl%in%dblall){
 60 |     keyleftl <-"genes"
 61 |   }
 62 |   if(!keyrightl%in%dbrall){
 63 |     keyrightl <-"genes"
 64 |   }
 65 |   ##
 66 |   dbl <-dbGetQuery(dbleft_name,paste0("SELECT * from"," ",keyleftl))
 67 |   dbr <-dbGetQuery(dbright_name,paste0("SELECT * from"," ",keyrightl))
 68 |   ktleft <- keytypes(eval(parse(text=dbleft)))
 69 |   ktright <- keytypes(eval(parse(text=dbright)))
 70 |   ### match the keytype in the left and right orgDB
 71 |   ksleft <- intersect(ktleft,keytype)
 72 |   ksright <- intersect(ktright,keytype)
 73 |   keys_left <- keys(eval(parse(text=dbleft)),keytype = keyleft )
 74 |   keys_right <- keys(eval(parse(text=dbleft)),keytype = keyright)
 75 |   gene2namel <- data.frame("GID" = keys_left, "GENENAME" = "")
 76 |   gene2namer <- data.frame("GID" = keys_right, "GENENAME" = "")
 77 |   gene2name <- rbind(gene2namel,gene2namer)
 78 |   if("GENENAME" %in% ksleft){
 79 |     gene2namel <- NULL
 80 |     gene2namel <- dbGetQuery(dbleft_name,"SELECT * from gene_info")
 81 |     gene2namel <- merge(dbl,gene2namel)
 82 |     gene2namel <- gene2namel[,2:3]
 83 |     colnames(gene2namel)<-c('GID','GENENAME')
 84 |   }
 85 |   if("GENENAME" %in% ksright){
 86 |     gene2namer <- NULL
 87 |     gene2namer <- dbGetQuery(dbright_name,"SELECT * from gene_info")
 88 |     gene2namer <- merge(dbr,gene2namer)
 89 |     gene2namer <- gene2namer[,2:3]
 90 |     colnames(gene2namer)<-c('GID','GENENAME')
 91 |   }
 92 |   if(!is.null(keep)){
 93 |     gene2namel[,keyleft] <- gene2namel[,1]
 94 |     gene2namer[,keyright] <- gene2namer[,1]
 95 |     colnames(gene2namel)[3] <- keep
 96 |     colnames(gene2namer)[3] <- keep
 97 |     ksleft <- setdiff(ksleft,keep)
 98 |     ksright <- setdiff(ksright,keep)
 99 |   }else{
100 |     gene2namel[,keyleft] <- gene2namel[,1]
101 |     gene2namer[,keyright] <- gene2namer[,1]
102 |   }
103 |   geneinfo<-rbind(gene2namel,gene2namer)
104 |   geneinfo<-na.omit(geneinfo)
105 |   geneinfo<-distinct(geneinfo)
106 |   colnames(geneinfo)[1:2]<-c('GID','GENENAME')
107 |   geneinfo$GID<-str_trim(geneinfo$GID,side = "both")
108 |   geneinfo$GENENAME<-str_trim(geneinfo$GENENAME,side = "both")
109 |   if(nrow(geneinfo)>1){
110 |     geneinfo <- geneinfo[geneinfo$GENENAME!="",]
111 |   }
112 |   gene2gol <- data.frame("GID" = geneinfo$GID[1], "GO" = "", 
113 |                         "EVIDENCE" = "IEA")
114 |   gene2gor <- data.frame("GID" = geneinfo$GID[1], "GO" = "", 
115 |                          "EVIDENCE" = "IEA")
116 |   if("GO" %in% ksleft){
117 |     gene2gol <- NULL
118 |     ####eval(parse(text=paste0("dbListTables(org.mac.eg","_dbconn())")))
119 |     gene2gol <- dbGetQuery(dbleft_name,"SELECT * from go_all")
120 |     gene2gol <- merge(dbl,gene2gol)
121 |     gene2gol <- gene2gol[,2:4]
122 |   }
123 |   if("GO" %in% ksright){
124 |     gene2gor <- NULL
125 |     gene2gor <- dbGetQuery(dbright_name,"SELECT * from go_all")
126 |     gene2gor <- merge(dbr,gene2gor)
127 |     gene2gor <- gene2gor[,2:4]
128 |   }
129 |   gene2go<-rbind(gene2gol,gene2gor)
130 |   gene2go<-na.omit(gene2go)
131 |   gene2go<-distinct(gene2go)
132 |   colnames(gene2go)<-c('GID','GO','EVIDENCE')
133 |   if(nrow(gene2go)>1){
134 |     gene2go<-gene2go[gene2go$GO!="",]
135 |   }
136 |   gene2pathl <- data.frame("GID" = geneinfo$GID[1],"PATH" = "01100")
137 |   gene2pathr <- data.frame("GID" = geneinfo$GID[1],"PATH" = "01100")
138 |   if("PATH" %in% ksleft){
139 |     gene2pathl <- NULL
140 |     if("path" %in% dblall){
141 |       gene2pathl <- dbGetQuery(dbleft_name,"SELECT * from path")
142 |     }else{
143 |       gene2pathl <- dbGetQuery(dbleft_name,"SELECT * from kegg")
144 |     }
145 |     gene2pathl <- merge(dbl,gene2pathl)
146 |     gene2pathl <- gene2pathl[,2:3]
147 |   }
148 |   if("PATH" %in% ksright){
149 |     gene2pathr <- NULL
150 |     if("path" %in% dbrall){
151 |       gene2pathr <- dbGetQuery(dbright_name,"SELECT * from path")
152 |     }else{
153 |       gene2pathr <- dbGetQuery(dbright_name,"SELECT * from kegg")
154 |     }
155 |     gene2pathr <- merge(dbl,gene2pathr)
156 |     gene2pathr <- gene2pathr[,2:3]
157 |     
158 |   }
159 |   gene2path<-rbind(gene2pathl,gene2pathr)
160 |   gene2path<-na.omit(gene2path)
161 |   gene2path<-distinct(gene2path)
162 |   colnames(gene2path)<-c('GID','PATH')
163 |   if(nrow(gene2path)>1){
164 |     gene2path<-gene2path[gene2path$PATH!="",]
165 |   }
166 |   gene2kol <- data.frame("GID" = geneinfo$GID[1],"KO" = "")
167 |   gene2kor <- data.frame("GID" = geneinfo$GID[1],"KO" = "")
168 |   if("KO" %in% ksleft){
169 |     gene2kol <- NULL
170 |     gene2kol <- dbGetQuery(dbleft_name,"SELECT * from ko")
171 |     gene2kol <- merge(dbl,gene2kol)
172 |     gene2kol <- gene2kol[,2:3]
173 |   }
174 |   if("KO" %in% ksright){
175 |     gene2kor <- NULL
176 |     gene2kor <- dbGetQuery(dbright_name,"SELECT * from ko")
177 |     gene2kor <- merge(dbl,gene2kor)
178 |     gene2kor <- gene2kor[,2:3]
179 |   }
180 |   gene2ko<-rbind(gene2kol,gene2kor)
181 |   gene2ko<-na.omit(gene2ko)
182 |   gene2ko<-distinct(gene2ko)
183 |   colnames(gene2ko)<-c('GID','KO')
184 |   if(nrow(gene2path)>1){
185 |     gene2ko<-gene2ko[gene2ko$KO!="",]
186 |   }
187 |   gene2refseql <- data.frame("GID" = geneinfo$GID[1], "REFSEQ" = "")
188 |   gene2refseqr <- data.frame("GID" = geneinfo$GID[1], "REFSEQ" = "")
189 |   if("REFSEQ" %in% ksleft){
190 |     gene2refseql <- NULL
191 |     gene2refseql <- dbGetQuery(dbleft_name,"SELECT * from refseq")
192 |     gene2refseql <- merge(dbl,gene2refseql)
193 |     gene2refseql <- gene2refseql[,2:3]
194 |   }
195 |   if("REFSEQ" %in% ksright){
196 |     gene2refseqr <- NULL
197 |     gene2refseqr <- dbGetQuery(dbright_name,"SELECT * from refseq")
198 |     gene2refseqr <- merge(dbl,gene2refseqr)
199 |     gene2refseqr <- gene2refseqr[,2:3]
200 |   }
201 |   gene2refseq<-rbind(gene2refseql,gene2refseqr)
202 |   colnames(gene2refseq)<-c('GID','REFSEQ')
203 |   if(nrow(gene2refseq)>1){
204 |     gene2refseq<-gene2refseq[gene2refseq$REFSEQ!="",]
205 |   }
206 |   if(keep == "REFSEQ"){
207 |     gene2refseq <- geneinfo[,c(1,3)]
208 |     geneinfo <- geneinfo[,1:2]
209 |   }
210 |   gene2refseq<-na.omit(gene2refseq)
211 |   gene2refseq<-distinct(gene2refseq)
212 |   gene2symboll <- data.frame("GID" = geneinfo$GID[1], "SYMBOL" = "")
213 |   gene2symbolr <- data.frame("GID" = geneinfo$GID[1], "SYMBOL" = "")
214 |   if("SYMBOL" %in% ksleft){
215 |     gene2symboll <- NULL
216 |     gene2symboll <- dbGetQuery(dbleft_name,"SELECT * from symbol")
217 |     gene2symboll <- merge(dbl,gene2symboll)
218 |     gene2symboll <- gene2symboll[,2:3]
219 |   }
220 |   if("SYMBOL" %in% ksright){
221 |     gene2symbolr <- NULL
222 |     gene2symbolr <- dbGetQuery(dbright_name,"SELECT * from symbol")
223 |     gene2symbolr <- merge(dbl,gene2symbolr)
224 |     gene2symbolr <- gene2symbolr[,2:3]
225 |   }
226 |   gene2symbol<-rbind(gene2symboll,gene2symbolr)
227 |   colnames(gene2symbol)<-c('GID','SYMBOL')
228 |   if(nrow(gene2symbol)>1){
229 |     gene2symbol<-gene2symbol[gene2symbol$SYMBOL!="",]
230 |   }
231 |   if(keep == "SYMBOL"){
232 |     gene2symbol <- geneinfo[,c(1,3)]
233 |     geneinfo <- geneinfo[,1:2]
234 |   }
235 |   gene2symbol<-na.omit(gene2symbol)
236 |   gene2symbol<-distinct(gene2symbol)
237 |   #
238 |   gene2ensembll <- data.frame("GID" = geneinfo$GID[1], "ENSEMBL" = "")
239 |   gene2ensemblr <- data.frame("GID" = geneinfo$GID[1], "ENSEMBL" = "")
240 |   if("ENSEMBL" %in% ksleft){
241 |     gene2ensembll <- NULL
242 |     gene2ensembll <- dbGetQuery(dbleft_name,"SELECT * from ensembl")
243 |     gene2ensembll <- merge(dbl,gene2ensembll)
244 |     gene2ensembll <- gene2ensembll[,2:3]
245 |   }
246 |   if("ENSEMBL" %in% ksright){
247 |     gene2ensemblr <- NULL
248 |     gene2ensemblr <- dbGetQuery(dbright_name,"SELECT * from ensembl")
249 |     gene2ensemblr <- merge(dbl,gene2ensemblr)
250 |     gene2ensemblr <- gene2ensemblr[,2:3]
251 |   }
252 |   gene2ensembl<-rbind(gene2ensembll,gene2ensemblr)
253 |   colnames(gene2ensembl)<-c('GID','ENSEMBL')
254 |   if(nrow(gene2ensembl)>1){
255 |     gene2ensembl<-gene2ensembl[gene2ensembl$ENSEMBL!="",]
256 |   }
257 |   #
258 |   if(keep == "ENSEMBL"){
259 |     gene2ensembl <- geneinfo[,c(1,3)]
260 |     geneinfo <- geneinfo[,1:2]
261 |   }
262 |   gene2ensembl<-na.omit(gene2ensembl)
263 |   gene2ensembl<-distinct(gene2ensembl)
264 |   #
265 |   gene2entrezidl <- data.frame("GID" = geneinfo$GID[1], "ENTREZID" = "")
266 |   gene2entrezidr <- data.frame("GID" = geneinfo$GID[1], "ENTREZID" = "")
267 |   if("ENTREZID" %in% ksleft){
268 |     gene2entrezidl <- NULL
269 |     gene2entrezidl <- dbGetQuery(dbleft_name,"SELECT * from entrezid")
270 |     gene2entrezidl <- merge(dbl,gene2entrezidl)
271 |     gene2entrezidl <- gene2entrezidl[,2:3]
272 |   }
273 |   if("ENTREZID" %in% ksright){
274 |     gene2entrezidr <- NULL
275 |     gene2entrezidr <- dbGetQuery(dbright_name,"SELECT * from entrezid")
276 |     gene2entrezidr <- merge(dbl,gene2entrezidr)
277 |     gene2entrezidr <- gene2entrezidr[,2:3]
278 |   }
279 |   gene2entrezid<-rbind(gene2entrezidl,gene2entrezidr)
280 |   colnames(gene2entrezid)<-c('GID','ENTREZID')
281 |   if(nrow(gene2entrezid)>1){
282 |     gene2entrezid<-gene2entrezid[gene2entrezid$ENTREZID!="",]
283 |   }
284 |   #
285 |   if(keep == "ENTREZID"){
286 |     gene2entrezid <- geneinfo[,c(1,3)]
287 |     geneinfo <- geneinfo[,1:2]
288 |   }
289 |   gene2entrezid<-na.omit(gene2entrezid)
290 |   gene2entrezid<-distinct(gene2entrezid)
291 |   gene2pfaml <- data.frame("GID" = geneinfo$GID[1], "PFAM" = "")
292 |   gene2pfamr <- data.frame("GID" = geneinfo$GID[1], "PFAM" = "")
293 |   if("PFAM" %in% ksleft){
294 |     gene2pfaml <- NULL
295 |     gene2pfaml <- dbGetQuery(dbleft_name,"SELECT * from pfam")
296 |     gene2pfaml <- merge(dbl,gene2pfaml)
297 |     gene2pfaml <- gene2pfaml[,2:3]
298 |   }
299 |   if("PFAM" %in% ksright){
300 |     gene2pfamr <- NULL
301 |     gene2pfamr <- dbGetQuery(dbright_name,"SELECT * from pfam")
302 |     gene2pfamr <- merge(dbl,gene2pfamr)
303 |     gene2pfamr <- gene2pfamr[,2:3]
304 |   }
305 |   gene2pfam<-rbind(gene2pfaml,gene2pfamr)
306 |   gene2pfam<-na.omit(gene2pfam)
307 |   gene2pfam<-distinct(gene2pfam)
308 |   colnames(gene2pfam)<-c('GID','PFAM')
309 |   if(nrow(gene2pfam)>1){
310 |     gene2pfam<-gene2pfam[gene2pfam$PFAM!="",]
311 |   }
312 |   gene2interprol <- data.frame("GID" = geneinfo$GID[1], "INTERPRO" = "")
313 |   gene2interpror <- data.frame("GID" = geneinfo$GID[1], "INTERPRO" = "")
314 |   if("INTERPRO" %in% ksleft){
315 |     gene2interprol <- NULL
316 |     gene2interprol <- dbGetQuery(dbleft_name,"SELECT * from interpro")
317 |     gene2interprol <- merge(dbl,gene2interprol)
318 |     gene2interprol <- gene2interprol[,2:3]
319 |   }
320 |   if("INTERPRO" %in% ksright){
321 |     gene2interpror <- NULL
322 |     gene2interpror <- dbGetQuery(dbright_name,"SELECT * from interpro")
323 |     gene2interpror <- merge(dbl,gene2interpror)
324 |     gene2interpror <- gene2interpror[,2:3]
325 |   }
326 |   gene2interpro<-rbind(gene2interprol,gene2interpror)
327 |   gene2interpro<-na.omit(gene2interpro)
328 |   gene2interpro<-distinct(gene2interpro)
329 |   colnames(gene2interpro)<-c('GID','INTERPRO')
330 |   if(nrow(gene2interpro)>1){
331 |     gene2interpro<-gene2interpro[gene2interpro$INTERPRO!="",]
332 |   }
333 |   #
334 |   gene2reactl <- data.frame("GID" = geneinfo$GID[1], "REACTOME" = "")
335 |   gene2reactr <- data.frame("GID" = geneinfo$GID[1], "REACTOME" = "")
336 |   if("REACTOME" %in% ksleft){
337 |     gene2reactl <- NULL
338 |     gene2reactl <- dbGetQuery(dbleft_name,"SELECT * from reactome")
339 |     gene2reactl <- merge(dbl,gene2reactl)
340 |     gene2reactl <- gene2reactl[,2:3]
341 |   }
342 |   if("REACTOME" %in% ksright){
343 |     gene2reactr <- NULL
344 |     gene2reactr <- dbGetQuery(dbright_name,"SELECT * from reactome")
345 |     gene2reactr <- merge(dbl,gene2reactr)
346 |     gene2reactr <- gene2reactr[,2:3]
347 |   }
348 |   gene2react<-rbind(gene2reactl,gene2reactr)
349 |   gene2react<-na.omit(gene2react)
350 |   gene2react<-distinct(gene2react)
351 |   colnames(gene2react)<-c('GID','REACTOME')
352 |   if(nrow(gene2react)>1){
353 |     gene2react<-gene2react[gene2react$REACTOME!="",]
354 |   }
355 |   #
356 |   gene2biocycl <- data.frame("GID" = geneinfo$GID[1], "BIOCYC" = "")
357 |   gene2biocycr <- data.frame("GID" = geneinfo$GID[1], "BIOCYC" = "")
358 |   if("BIOCYC" %in% ksleft){
359 |     gene2biocycl <- NULL
360 |     gene2biocycl <- dbGetQuery(dbleft_name,"SELECT * from biocyc")
361 |     gene2biocycl <- merge(dbl,gene2biocycl)
362 |     gene2biocycl <- gene2biocycl[,2:3]
363 |   }
364 |   if("BIOCYC" %in% ksright){
365 |     gene2biocycr <- NULL
366 |     gene2biocycr <- dbGetQuery(dbright_name,"SELECT * from biocyc")
367 |     gene2biocycr <- merge(dbl,gene2biocycr)
368 |     gene2biocycr <- gene2biocycr[,2:3]
369 |   }
370 |   gene2biocyc<-rbind(gene2biocycl,gene2biocycr)
371 |   gene2biocyc<-na.omit(gene2biocyc)
372 |   gene2biocyc<-distinct(gene2biocyc)
373 |   colnames(gene2biocyc)<-c('GID','BIOCYC')
374 |   if(nrow(gene2biocyc)>1){
375 |     gene2biocyc<-gene2biocyc[gene2biocyc$BIOCYC!="",]
376 |   }
377 |   ###
378 |   gene2kdl <- data.frame("GID" = geneinfo$GID[1], "KEGGDISEASE" = "")
379 |   gene2kdr <- data.frame("GID" = geneinfo$GID[1], "KEGGDISEASE" = "")
380 |   if("KEGGDISEASE" %in% ksleft){
381 |     gene2kdl <- NULL
382 |     gene2kdl <- dbGetQuery(dbleft_name,"SELECT * from disease")
383 |     gene2kdl <- merge(dbl,gene2kdl)
384 |     gene2kdl <- gene2kdl[,2:3]
385 |   }
386 |   if("KEGGDISEASE" %in% ksright){
387 |     gene2kdr <- NULL
388 |     gene2kdr <- dbGetQuery(dbright_name,"SELECT * from disease")
389 |     gene2kdr <- merge(dbl,gene2kdr)
390 |     gene2kdr <- gene2kdr[,2:3]
391 |   }
392 |   gene2kd<-rbind(gene2kdl,gene2kdr)
393 |   gene2kd<-na.omit(gene2kd)
394 |   gene2kd<-distinct(gene2kd)
395 |   colnames(gene2kd)<-c('GID','KEGGDISEASE')
396 |   if(nrow(gene2kd)>1){
397 |     gene2kd<-gene2kd[gene2kd$KEGGDISEASE!="",]
398 |   }
399 |   ####
400 |   gene2gadl <- data.frame("GID" = geneinfo$GID[1], "GAD" = "")
401 |   gene2gadr <- data.frame("GID" = geneinfo$GID[1], "GAD" = "")
402 |   if("GAD" %in% ksleft){
403 |     gene2gadl <- NULL
404 |     gene2gadl <- dbGetQuery(dbleft_name,"SELECT * from gad")
405 |     gene2gadl <- merge(dbl,gene2gadl)
406 |     gene2gadl <- gene2gadl[,2:3]
407 |   }
408 |   if("GAD" %in% ksright){
409 |     gene2gadr <- NULL
410 |     gene2gadr <- dbGetQuery(dbright_name,"SELECT * from gad")
411 |     gene2gadr <- merge(dbl,gene2gadr)
412 |     gene2gadr <- gene2gadr[,2:3]
413 |   }
414 |   gene2gad<-rbind(gene2gadl,gene2gadr)
415 |   gene2gad<-na.omit(gene2gad)
416 |   gene2gad<-distinct(gene2gad)
417 |   colnames(gene2gad)<-c('GID','GAD')
418 |   if(nrow(gene2gad)>1){
419 |     gene2gad<-gene2gad[gene2gad$GAD!="",]
420 |   }
421 |   ###
422 |   gene2fundol <- data.frame("GID" = geneinfo$GID[1], "FUNDO" = "")
423 |   gene2fundor <- data.frame("GID" = geneinfo$GID[1], "FUNDO" = "")
424 |   if("FUNDO" %in% ksleft){
425 |     gene2fundol <- NULL
426 |     gene2fundol <- dbGetQuery(dbleft_name,"SELECT * from fundo")
427 |     gene2fundol <- merge(dbl,gene2fundol)
428 |     gene2fundol <- gene2fundol[,2:3]
429 |   }
430 |   if("FUNDO" %in% ksright){
431 |     gene2fundor <- NULL
432 |     gene2fundor <- dbGetQuery(dbright_name,"SELECT * from fundo")
433 |     gene2fundor <- merge(dbl,gene2fundor)
434 |     gene2fundor <- gene2fundor[,2:3]
435 |   }
436 |   gene2fundo<-rbind(gene2fundol,gene2fundor)
437 |   gene2fundo<-na.omit(gene2fundo)
438 |   gene2fundo<-distinct(gene2fundo)
439 |   colnames(gene2fundo)<-c('GID','FUNDO')
440 |   if(nrow(gene2fundo)>1){
441 |     gene2fundo<-gene2fundo[gene2fundo$FUNDO!="",]
442 |   }
443 |   ###
444 |   if(is.null(author)){
445 |     author <-"myself"
446 |   }
447 |   if(is.null(maintainer)){
448 |     maintainer <- "mysel<myself@gmail.com>"
449 |   }
450 |   if(is.null(tax_id)){
451 |     tax_id <- "123"
452 |   }
453 |   if(is.null(species)){
454 |     species <- species
455 |   }
456 |   if(!is.null(pkgname)){
457 |     species <- pkgname
458 |   }
459 |   if(is.null(version)){
460 |     version <- "0.0.1"
461 |   }
462 |   if(is.null(genus)){
463 |     genus <- ""
464 |   }
465 |   if(is.null(outputDir)){
466 |     outputDir <- tempdir()
467 |   }
468 |   species <- gsub(' .*', '', species)
469 |   geneinfo<-na.omit(geneinfo)
470 |   geneinfo<-distinct(geneinfo)
471 |   package <- suppressWarnings(makeOrgPackage(gene_info = geneinfo,
472 |                                              symbol = gene2symbol,
473 |                                              entrezid = gene2entrezid,
474 |                                              refseq = gene2refseq,
475 |                                              ensembl = gene2ensembl,
476 |                                              go = gene2go,
477 |                                              path = gene2path,
478 |                                              ko = gene2ko,
479 |                                              pfam = gene2pfam,
480 |                                              interpro = gene2interpro,
481 |                                              reactome = gene2react,
482 |                                              biocyc = gene2biocyc,
483 |                                              disease = gene2kd,
484 |                                              gad = gene2gad,
485 |                                              fundo = gene2fundo,
486 |                                              version = version,
487 |                                              maintainer = maintainer,
488 |                                              author = author,
489 |                                              outputDir = outputDir,
490 |                                              tax_id = tax_id,
491 |                                              genus = genus,
492 |                                              species = species,
493 |                                              verbose = FALSE,
494 |                                              goTable = "go"
495 |   ))
496 |   if(isTRUE(install)){
497 |     install.packages(package, repos = NULL, type = "source")
498 |     unlink(package, recursive = TRUE)
499 |   }else{
500 |     .show.path(package)
501 |     .show.tables(package)
502 |     #return(package)
503 |   }
504 | }
505 | 


--------------------------------------------------------------------------------
/R/misc.R:
--------------------------------------------------------------------------------
  1 | ##' uppercase the first letter
  2 | ##' @param x string
  3 | ##' @return character with first letter uppercase
  4 | ##' @author Kai Guo
  5 | simpleCap <- function(x) {
  6 |     s <- strsplit(x, " ")[[1]]
  7 |     paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", 
  8 |         collapse = " ")
  9 | }
 10 | 
 11 | #' extract GO information from NCBI and filter by taxid
 12 | #' @importFrom data.table fread
 13 | #' @importFrom R.utils gunzip
 14 | #' @importFrom utils download.file
 15 | #' @importFrom data.table ":="
 16 | #' @param taxid taxonomy id for the species
 17 | #' @param species species name(common name,kegg.species.code or scientifc name)
 18 | #' @return dataframe with gene2go information
 19 | #' @author Kai Guo
 20 | .extratGO <- function(taxid = NULL, species = NULL){
 21 |     # temp file
 22 |     if(is.null(taxid)){
 23 |         taxid <- .get.species.info(species)['tax.id']
 24 |     }
 25 |     tmp <- paste(tempfile(), "gz", sep = ".")
 26 |     # import Gene to Gene Ontology from NCBI Gene database
 27 |     download.file("ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene2go.gz",
 28 |         quiet = TRUE,destfile = tmp)
 29 |     # uncompress
 30 |     gunzip(tmp)
 31 |     # read the file (linux and windows)
 32 |     gene2go <- fread(sub("\\.gz", "", tmp), verbose = FALSE, 
 33 |                 showProgress = FALSE)
 34 |     # select columns and rename
 35 |     gene2go <- unique(gene2go[, c(seq_len(4)), with = FALSE])
 36 |     colnames(gene2go) <- c("taxid", "GID", "GO", "EVIDENCE")
 37 | 
 38 |     # convert columns in character
 39 |     gene2go[, `:=`(taxid = as.character(gene2go$taxid),
 40 |             GID = as.character(gene2go$GID))]
 41 |     # filter with taxid
 42 |     gene2go <- as.data.frame(gene2go)
 43 |     gene2go <- gene2go[gene2go$taxid == taxid,2:4]
 44 |     return(gene2go)
 45 | }
 46 | 
 47 | #' extract gene information from NCBI
 48 | #' @importFrom data.table fread
 49 | #' @importFrom R.utils gunzip
 50 | #' @importFrom utils download.file
 51 | #' @importFrom data.table ":="
 52 | #' @param taxid taxonomy id for the species
 53 | #' @param species species name(common name,kegg.species.code or scientifc name)
 54 | #' @author Kai Guo
 55 | .extratGene <- function(taxid = NULL, species = NULL){
 56 |     if(is.null(taxid)){
 57 |         taxid <- .get.species.info(species)['tax.id']
 58 |     }
 59 |     # temp file
 60 |     tmp <- paste(tempfile(), "gz", sep = ".")
 61 |     # import Gene to Gene Ontology from NCBI Gene database
 62 |     download.file("ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz",
 63 |         quiet = TRUE, destfile = tmp)
 64 |     # uncompress
 65 |     gunzip(tmp)
 66 |     # read the file (linux and windows)
 67 |     gene2info <- fread(sub("\\.gz", "", tmp), verbose=FALSE, 
 68 |             showProgress = FALSE)
 69 |     # select columns and rename
 70 |     gene2info <- unique(gene2info[, c(seq_len(3), 9), with = FALSE])
 71 |     colnames(gene2info) <- c("taxid", "GID", "SYMBOL", "GENENAME")
 72 |     # convert columns in character
 73 |     gene2info[, `:=`(taxid = as.character(gene2go$taxid),
 74 |             GID = as.character(gene2go$GID))]
 75 |     # filter with taxid
 76 |     gene2go <- gene2go[taxid == taxid, 2:4]
 77 |     return(as.data.frame(gene2info))
 78 | }
 79 | #' modified from pathview kegg.species.code
 80 | #' @importFrom utils data
 81 | #' @param species species name(common name,kegg.species.code or scientifc name)
 82 | #' @param na.rm TRUE/FALSE
 83 | #' @return character with species name
 84 | #' @author Kai Guo
 85 | .get.species.info <- function (species = "hsa", na.rm = FALSE){
 86 |     nspec <- length(species)
 87 |     if(!exists("korg")) data(korg)
 88 |     ridx <- match(species, korg[, seq_len(5)]) %% nrow(korg)
 89 |     nai <- is.na(ridx)
 90 |     if (sum(nai) > 0) {
 91 |     na.msg <- sprintf("Unknown species '%s'! in KEGG database, try others...", paste(species[nai],
 92 |         sep = "", collapse = "', '"))
 93 |     message("Note: ", na.msg)
 94 |     }
 95 |     if (sum(nai) == nspec) {
 96 |         stop.msg <- "All species are invalid!"
 97 |         stop(stop.msg)
 98 |     }
 99 |     if (any(ridx[!nai] == 0)) ridx[!nai & ridx == 0] <- nrow(korg)
100 |     if (isTRUE(na.rm)) ridx = ridx[!nai]
101 |     species.info <- korg[ridx, ]
102 |     return(species.info)
103 | }
104 | 
105 | ##' @title get species information in Ensembl
106 | ##' @importFrom dplyr select_
107 | ##' @importFrom dplyr collect
108 | ##' @importFrom dplyr pull
109 | ##' @importFrom magrittr %>%
110 | ##' @importFrom biomaRt listDatasets
111 | ##' @importFrom jsonlite fromJSON toJSON 
112 | ##' @importFrom httr content GET
113 | ##' @param species species
114 | ##' @param mart biomaRt mart
115 | ##' @return list with species information
116 | ##' @author Kai Guo
117 | .getmartdb <- function(species, mart){
118 |     lhs <- listDatasets(mart)
119 |     spe <- simpleCap(species);
120 |     spe <- gsub(' ', '\\\\s', spe)
121 |     spe <- paste0(spe,'\\s','genes')
122 |     sel <- grepl(spe, lhs$description, ignore.case = FALSE)
123 |     tmp <- lhs[sel, ]
124 |     dataset <- tmp%>%select_(~dataset)%>%collect%>%pull(1)
125 |     if((length(dataset) == 0) | (length(dataset) > 1)){
126 |         stop("Maybe you need first check the avaliable database by
127 |             using listSpecies()\n")
128 |     }
129 |     chr <- tmp%>%select_(~dataset)%>%collect%>%pull(1)
130 |     organism <- gsub(' ', '_', sub('(_gene|_eg).*', '', chr))
131 |     if(organism == "Oryza_sativa_Japonica"){
132 |         organism = "Oryza_sativa"
133 |     }
134 |     if(mart@biomart =="plants_mart"){
135 |         pre_site <- "http://rest.ensembl.org/info/assembly/"
136 |     }else{
137 |         pre_site <- "http://rest.ensembl.org/info/assembly/"
138 |     }
139 |     tryCatch({
140 |     chr_d <- fromJSON(toJSON(content(GET(paste0(pre_site, organism,"?"), 
141 |             content_type("application/json")))))}, error = function(e)
142 |     stop(
143 |     "The API 'http://rest.ensembl.org' does not seem to work properly.
144 |     Are you connected to the internet? Is the homepage
145 |     'http://rest.ensembl.org' currently available?", call. = FALSE))
146 |     chr_info <- chr_d$top_level_region
147 |     chr_version <- chr_d$assembly_name
148 |     chr_assembly_date <- chr_d$assembly_date
149 |     rhs <- list(dbname = dataset, chr_info = chr_info,
150 |         chr_version = chr_version, chr_assembly_date = chr_assembly_date)
151 |     return(rhs)
152 | }
153 | 
154 | #'@title get database name by using species name
155 | #'@param species species name
156 | #'@return character with database name
157 | #'@author Kai Guo
158 | .getdbname <- function(species = species){
159 |     species = tryCatch(match.arg(species,c("anopheles",
160 |         "arabidopsis", "bovine", "celegans", "canine", "fly", "zebrafish",
161 |         "ecoli", "ecsakai", "chicken", "human", "mouse", "rhesus", "malaria",
162 |         "chipm", "rat",
163 |         "toxoplasma", "streptomyces", "pig", "yeast", "xenopus", "warm")),
164 |         error=function(cond){return("unsupported")})
165 |     if (species == "anopheles") {
166 |         dbname <- "org.Ag.eg.db.sqlite"
167 |     } else if (species == "bovine") {
168 |         dbname <- "org.Bt.eg.db.sqlite"
169 |     } else if (species == "canine") {
170 |         dbname <- "org.Cf.eg.db.sqlite"
171 |     } else if (species == "worm" || species == "celegans") {
172 |         dbname <- "org.Ce.eg.db.sqlite"
173 |     } else if (species == "chicken") {
174 |         dbname <- "org.Gg.eg.db.sqlite"
175 |     } else if (species == "ecolik12") {
176 |         dbname <- "org.EcK12.eg.db.sqlite"
177 |     } else if (species == "ecsakai") {
178 |         dbname <- "org.EcSakai.eg.db.sqlite"
179 |     } else if (species == "fly") {
180 |         dbname <- "org.Dm.eg.db.sqlite"
181 |     } else if (species == "human") {
182 |         dbname <- "org.Hs.eg.db.sqlite"
183 |     } else if (species == "chipm") {
184 |         dbname <- "org.Pt.eg.db.sqlite"
185 |     }else if (species == "mouse") {
186 |         dbname <- "org.Mm.eg.db.sqlite"
187 |     } else if (species == "pig") {
188 |         dbname <- "org.Ss.eg.db.sqlite"
189 |     } else if (species == "rat") {
190 |         dbname <- "org.Rn.eg.db.sqlite"
191 |     } else if (species == "rhesus") {
192 |         dbname <- "org.Mmu.eg.db.sqlite"
193 |     } else if (species == "xenopus") {
194 |         dbname <- "org.Xl.eg.db.sqlite"
195 |     } else if (species == "zebrafish") {
196 |         dbname <- "org.Dr.eg.db.sqlite"
197 |     } else {
198 |         dbname <- NULL
199 |     }
200 |     return(dbname)
201 | }
202 | ##' @author Kai Guo
203 | readidx <- function()
204 | {
205 |     n <- readline(prompt = "Enter an index: ")
206 |     return(as.integer(n))
207 | }
208 | ##' @title check package installed or not
209 | ##' @param pkg package name
210 | ##' @return TRUE/FALSE
211 | ##' @author Kai Guo
212 | is_installed <- function(pkg) {
213 |     nzchar(system.file(package = pkg))
214 | }
215 | ##' @title show the package path
216 | ##' @param package the full path of the package
217 | ##' @return whole path for the package
218 | ##' @author Kai Guo
219 | .show.path <- function(package){
220 |     cat("################################################################\n")
221 |     cat("Please find your annotation package in ...\n")
222 |     cat(package, "\n")
223 |     cat("You can install it by using\n")
224 |     cat(paste0("install.packages(\"", package,'\"',",repos = NULL,type='source')"), "\n")
225 |     cat("################################################################\n")
226 | }
227 | ##' @title show the package content
228 | ##' @importFrom RSQLite dbConnect
229 | ##' @importFrom RSQLite SQLite
230 | ##' @importFrom RSQLite dbListTables
231 | ##' @importFrom RSQLite dbReadTable
232 | ##' @importFrom RSQLite dbDisconnect
233 | ##' @param package the full path of the package
234 | ##' @return vector 
235 | ##' @author Kai Guo
236 | .show.tables <- function(package){
237 |     pkg <- basename(package)
238 |     path <- paste0(package, "/inst/extdata/", sub('.db', '.sqlite', pkg))
239 |     con <- dbConnect(SQLite(),path)
240 |     cat("Here are the tables in the package", pkg, "...\n")
241 |     dblist <- dbListTables(con)
242 |     cat(dblist, "\n")
243 |     cat("################################################################\n")
244 |     dbDisconnect(con)
245 | }
246 | ##' @title get annotataion table from temporary package
247 | ##' @importFrom RSQLite dbConnect
248 | ##' @importFrom RSQLite SQLite
249 | ##' @importFrom RSQLite dbReadTable
250 | ##' @importFrom RSQLite dbDisconnect
251 | ##' @importFrom dplyr left_join
252 | ##' @param path full path for the temporary package
253 | ##' @param table a character  indicate the table you want extract
254 | ##' @examples
255 | ##' data(ath)
256 | ##' pack <- fromOwn(geneinfo = ath, install = FALSE, species ="test")
257 | ##' # head(getTable(path = pack, table = "gene_info"))
258 | ##' @export
259 | ##' @return data.frame 
260 | ##' @author Kai Guo
261 | 
262 | getTable <- function(path, table = "go_all"){
263 |     pkg <- basename(path = path)
264 |     path <- paste0(path, "/inst/extdata/", sub('.db', '.sqlite', pkg))
265 |     con <- dbConnect(SQLite(), path)
266 |     gene_info <- dbReadTable(con, "gene_info")
267 |     anno <- dbReadTable(con, table)
268 |     res <- left_join(gene_info, anno, by = c("X_id" = "X_id"))
269 |     colnames(res)[1] <- "ID"
270 |     dbDisconnect(con)
271 |     return(res)
272 | }
273 | 
274 | 
275 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .onLoad <- function(libname, pkgname) {
2 |     options(stringsAsFactors = FALSE)
3 | }
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # bioAnno <a href="https://travis-ci.org/guokai8/bioAnno"><img src="https://travis-ci.org/guokai8/bioAnno.svg" alt="Build status"></a>  [![Project Status:](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) [![](https://img.shields.io/badge/devel%20version-0.99.45-green.svg)](https://github.com/guokai8/bioAnno)  [![DOI](https://zenodo.org/badge/224717599.svg)](https://zenodo.org/badge/latestdoi/224717599)
 2 |   
 3 | 
 4 | Build Annotation package by using information from __KEGG__, __NCBI__, __Ensembl__ and return OrgDb object such as org.Hs.eg.db. The _bioAnno_ package support all organisms list in __Ensembl__, __KEGG__, __NCBI__.  
 5 | ## Description
 6 | With the increasing of high throughput data generated, the requirement for
 7 | having annotation package ready is necessary for people doing functional 
 8 | enrichment analysis, id conversion and other type related analysis.
 9 | _bioAnno_ provide wrap functions include _fromKEGG_, _fromEnsembl_, 
10 | _fromNCBI_ and _fromAnnoHub_ to build annotation package. 
11 | And you can easily to build annotation package with 
12 | the KEGG species code (except _fromEnsembl_ which require scientific name).
13 | ## Installation
14 | ```
15 | library(devtools)
16 | install_github("guokai8/bioAnno")
17 | ``` 
18 | 
19 | ## Software Usage
20 | 
21 | ```
22 | library(bioAnno)
23 | ## build Annotation package by using fromKEGG
24 | fromKEGG(species="hsa")
25 | ## which will build and install package "org.hsa.eg.db" which will include KEGG, GO annotation 
26 | ## build Annotation package by using fromEnsembl 
27 | fromEnsembl(species="Human")    
28 | ## build from AnnotationHub   
29 | fromAnnHub(species="human")
30 | ```
31 | ### 2.4 Main Functions
32 | --  _fromKEGG_ build annotation package by extracting annotation information 
33 |     from Kyoto Encyclopedia of Genes and Genomes database (KEGG). 
34 |     You can use kegg species code as query species name.
35 | 
36 | -- _fromNCBI_ build annotation package by extracting annotation information from
37 |     NCBI database.
38 | 
39 | -- _fromENSEMBL_ build annotation package by extracting annotation information 
40 |     fromENSEMBL database. It includes function to build annotaion package for 
41 |     plant with parameter plant = TRUE   
42 |     
43 | -- _fromAnnhub_ build annotation package with the AnnotationHub package 
44 | 
45 | -- _mergeDB_ merge two annotation packages
46 | 
47 | ## Note
48 | The _bioAnno_ provide wrap function which help me to easily build annotation package.
49 | 
50 | ## Contact information
51 | 
52 | For any questions please contact guokai8@gmail.com
53 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-slate


--------------------------------------------------------------------------------
/bioAnno.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------
/data/ath.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guokai8/bioAnno/3c8936033b806b9f40c7028faa80e4971f6d0859/data/ath.rda


--------------------------------------------------------------------------------
/data/korg.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guokai8/bioAnno/3c8936033b806b9f40c7028faa80e4971f6d0859/data/korg.rda


--------------------------------------------------------------------------------
/man/ath.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{ath}
 5 | \alias{ath}
 6 | \title{TAIR10 geneid to ENTREZID}
 7 | \format{
 8 | A data.frame with two columns: 
 9 | \describe{
10 | \item{GID}{The arabidopsis GENE ID}
11 | \item{ENTREZID}{NCBI ENTREZID ID for the arabidopsis}
12 | }
13 | }
14 | \usage{
15 | ath
16 | }
17 | \description{
18 | The 'ath' dataset include the annotation information collected
19 | form the TAIR10 database(htps://arabidopsis.org/download/index-auto.jsp
20 | %3Fdir%3D%252Fdownload_files%252FGenes%252FTAIR10_genome_release).
21 | }
22 | \examples{
23 | head(ath)
24 | 
25 | }
26 | \keyword{datasets}
27 | 


--------------------------------------------------------------------------------
/man/dot-extratGO.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{.extratGO}
 4 | \alias{.extratGO}
 5 | \title{extract GO information from NCBI and filter by taxid}
 6 | \usage{
 7 | .extratGO(taxid = NULL, species = NULL)
 8 | }
 9 | \arguments{
10 | \item{taxid}{taxonomy id for the species}
11 | 
12 | \item{species}{species name(common name,kegg.species.code or scientifc name)}
13 | }
14 | \value{
15 | dataframe with gene2go information
16 | }
17 | \description{
18 | extract GO information from NCBI and filter by taxid
19 | }
20 | \author{
21 | Kai Guo
22 | }
23 | 


--------------------------------------------------------------------------------
/man/dot-extratGene.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{.extratGene}
 4 | \alias{.extratGene}
 5 | \title{extract gene information from NCBI}
 6 | \usage{
 7 | .extratGene(taxid = NULL, species = NULL)
 8 | }
 9 | \arguments{
10 | \item{taxid}{taxonomy id for the species}
11 | 
12 | \item{species}{species name(common name,kegg.species.code or scientifc name)}
13 | }
14 | \description{
15 | extract gene information from NCBI
16 | }
17 | \author{
18 | Kai Guo
19 | }
20 | 


--------------------------------------------------------------------------------
/man/dot-get.species.info.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{.get.species.info}
 4 | \alias{.get.species.info}
 5 | \title{modified from pathview kegg.species.code}
 6 | \usage{
 7 | .get.species.info(species = "hsa", na.rm = FALSE)
 8 | }
 9 | \arguments{
10 | \item{species}{species name(common name,kegg.species.code or scientifc name)}
11 | 
12 | \item{na.rm}{TRUE/FALSE}
13 | }
14 | \value{
15 | character with species name
16 | }
17 | \description{
18 | modified from pathview kegg.species.code
19 | }
20 | \author{
21 | Kai Guo
22 | }
23 | 


--------------------------------------------------------------------------------
/man/dot-getdbname.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{.getdbname}
 4 | \alias{.getdbname}
 5 | \title{get database name by using species name}
 6 | \usage{
 7 | .getdbname(species = species)
 8 | }
 9 | \arguments{
10 | \item{species}{species name}
11 | }
12 | \value{
13 | character with database name
14 | }
15 | \description{
16 | get database name by using species name
17 | }
18 | \author{
19 | Kai Guo
20 | }
21 | 


--------------------------------------------------------------------------------
/man/dot-getmartdb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{.getmartdb}
 4 | \alias{.getmartdb}
 5 | \title{get species information in Ensembl}
 6 | \usage{
 7 | .getmartdb(species, mart)
 8 | }
 9 | \arguments{
10 | \item{species}{species}
11 | 
12 | \item{mart}{biomaRt mart}
13 | }
14 | \value{
15 | list with species information
16 | }
17 | \description{
18 | get species information in Ensembl
19 | }
20 | \author{
21 | Kai Guo
22 | }
23 | 


--------------------------------------------------------------------------------
/man/dot-show.path.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{.show.path}
 4 | \alias{.show.path}
 5 | \title{show the package path}
 6 | \usage{
 7 | .show.path(package)
 8 | }
 9 | \arguments{
10 | \item{package}{the full path of the package}
11 | }
12 | \value{
13 | whole path for the package
14 | }
15 | \description{
16 | show the package path
17 | }
18 | \author{
19 | Kai Guo
20 | }
21 | 


--------------------------------------------------------------------------------
/man/dot-show.tables.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{.show.tables}
 4 | \alias{.show.tables}
 5 | \title{show the package content}
 6 | \usage{
 7 | .show.tables(package)
 8 | }
 9 | \arguments{
10 | \item{package}{the full path of the package}
11 | }
12 | \value{
13 | vector
14 | }
15 | \description{
16 | show the package content
17 | }
18 | \author{
19 | Kai Guo
20 | }
21 | 


--------------------------------------------------------------------------------
/man/fromAnnHub.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/buildAnntationHub.R
 3 | \name{fromAnnHub}
 4 | \alias{fromAnnHub}
 5 | \title{extract annotation database by using AnnotationHub}
 6 | \usage{
 7 | fromAnnHub(
 8 |   species,
 9 |   author = NULL,
10 |   maintainer = NULL,
11 |   tax_id = NULL,
12 |   genus = NULL,
13 |   version = NULL,
14 |   install = TRUE,
15 |   pkgname = NULL,
16 |   outputDir = NULL,
17 |   rebuild = FALSE
18 | )
19 | }
20 | \arguments{
21 | \item{species}{species name(common name,kegg.species.code or scientifc name)}
22 | 
23 | \item{author}{author for the annotation package}
24 | 
25 | \item{maintainer}{maintainer for the annotation package}
26 | 
27 | \item{tax_id}{taxonomy id for the species}
28 | 
29 | \item{genus}{genus for the species}
30 | 
31 | \item{version}{version for the annotation package}
32 | 
33 | \item{install}{install the package or not}
34 | 
35 | \item{pkgname}{package name you want to choose}
36 | 
37 | \item{outputDir}{temporary file path}
38 | 
39 | \item{rebuild}{rebuild the package or not(default: FALSE)}
40 | }
41 | \value{
42 | annotation package
43 | }
44 | \description{
45 | extract annotation database by using AnnotationHub
46 | }
47 | \examples{
48 | ## build annoataion package for c elegans
49 | fromAnnHub(species = "celegans", install = FALSE)
50 | }
51 | \author{
52 | Kai Guo
53 | }
54 | 


--------------------------------------------------------------------------------
/man/fromEnsembl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/buildEnsembl.R
 3 | \name{fromEnsembl}
 4 | \alias{fromEnsembl}
 5 | \title{build annotation from ensembl}
 6 | \usage{
 7 | fromEnsembl(
 8 |   species = "Caenorhabditis elegans",
 9 |   host = NULL,
10 |   anntype = NULL,
11 |   buildall = TRUE,
12 |   author = NULL,
13 |   maintainer = NULL,
14 |   tax_id = NULL,
15 |   genus = NULL,
16 |   version = NULL,
17 |   plant = FALSE,
18 |   install = TRUE,
19 |   pkgname = NULL,
20 |   outputDir = NULL,
21 |   rebuild = FALSE
22 | )
23 | }
24 | \arguments{
25 | \item{species}{the sepcies you want to search,
26 | you can use listSpecies to get the species name}
27 | 
28 | \item{host}{the ensemble API host,for plant you can use
29 | plants.ensembl.org and for human
30 | and other species you can use uswest.ensembl.org}
31 | 
32 | \item{anntype}{the type of function annotation(GO,KEGG,PFAM,InterPro)
33 | you want get from ensemble}
34 | 
35 | \item{buildall}{include all prossbile annoation type listed in Ensembl}
36 | 
37 | \item{author}{author for the annotation package}
38 | 
39 | \item{maintainer}{maintainer for the annotation package}
40 | 
41 | \item{tax_id}{taxonomy id for the species}
42 | 
43 | \item{genus}{genus name for the annotation package}
44 | 
45 | \item{version}{version number for the annotation package}
46 | 
47 | \item{plant}{plant or animal species (TRUE/FALSE)}
48 | 
49 | \item{install}{install the package or not(default: TRUE)}
50 | 
51 | \item{pkgname}{package name you want to choose}
52 | 
53 | \item{outputDir}{temporary output path}
54 | 
55 | \item{rebuild}{rebuild the package or not(default: FALSE)}
56 | }
57 | \value{
58 | annotation package
59 | }
60 | \description{
61 | build annotation from ensembl
62 | 
63 | build annotation from ensembl
64 | }
65 | \examples{
66 | fromEnsembl(species = "Caenorhabditis elegans", anntype="GO")
67 | }
68 | \author{
69 | Kai Guo
70 | }
71 | 


--------------------------------------------------------------------------------
/man/fromKEGG.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/buildKEGG.R
 3 | \name{fromKEGG}
 4 | \alias{fromKEGG}
 5 | \title{make annotation database from KEGG and GO from NCBI}
 6 | \usage{
 7 | fromKEGG(
 8 |   species = "ath",
 9 |   anntype = c("KEGG"),
10 |   author = NULL,
11 |   maintainer = NULL,
12 |   tax_id = NULL,
13 |   genus = NULL,
14 |   version = NULL,
15 |   pkgname = NULL,
16 |   install = TRUE,
17 |   outputDir = NULL,
18 |   rebuild = FALSE
19 | )
20 | }
21 | \arguments{
22 | \item{species}{species name(common name,kegg.species.code or scientifc name)}
23 | 
24 | \item{anntype}{the type of function annotation(GO,KEGG,)
25 | you want get from KEGG}
26 | 
27 | \item{author}{author for the annotation package}
28 | 
29 | \item{maintainer}{maintainer for the annotation package}
30 | 
31 | \item{tax_id}{taxonomy id for the species}
32 | 
33 | \item{genus}{genus for the species}
34 | 
35 | \item{version}{version for the annotation package}
36 | 
37 | \item{pkgname}{package name you want to choose}
38 | 
39 | \item{install}{install the package or not(default: TRUE)}
40 | 
41 | \item{outputDir}{temporary output path}
42 | 
43 | \item{rebuild}{rebuild the package or not(default: FALSE)}
44 | }
45 | \value{
46 | annotation package
47 | }
48 | \description{
49 | make annotation database from KEGG and GO from NCBI
50 | }
51 | \examples{
52 | fromKEGG(species = "eco", install = FALSE)
53 | }
54 | \author{
55 | Kai Guo
56 | }
57 | 


--------------------------------------------------------------------------------
/man/fromNCBI.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/buildNCBI.R
 3 | \name{fromNCBI}
 4 | \alias{fromNCBI}
 5 | \title{build annotation database from NCBI}
 6 | \usage{
 7 | fromNCBI(
 8 |   species = "ath",
 9 |   author = NULL,
10 |   maintainer = NULL,
11 |   tax_id = NULL,
12 |   genus = NULL,
13 |   version = NULL,
14 |   install = TRUE,
15 |   pkgname = NULL,
16 |   outputDir = NULL,
17 |   rebuild = FALSE
18 | )
19 | }
20 | \arguments{
21 | \item{species}{species name}
22 | 
23 | \item{author}{author for the annotation package}
24 | 
25 | \item{maintainer}{maintainer for the annotation package}
26 | 
27 | \item{tax_id}{taxonomy id for the species}
28 | 
29 | \item{genus}{genus for the species}
30 | 
31 | \item{version}{version for the annotation package}
32 | 
33 | \item{install}{install the package or not(default: TRUE)}
34 | 
35 | \item{pkgname}{package name you want to choose}
36 | 
37 | \item{outputDir}{temporary output path}
38 | 
39 | \item{rebuild}{rebuild the package or not(default: FALSE)}
40 | }
41 | \value{
42 | annotation package
43 | }
44 | \description{
45 | build annotation database from NCBI
46 | }
47 | \examples{
48 | \donttest{
49 | ## build annoataion package for Ecoli
50 | fromNCBI(species = "eco", install = FALSE)
51 | }
52 | }
53 | \author{
54 | Kai Guo
55 | }
56 | 


--------------------------------------------------------------------------------
/man/fromOwn.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/buildOwn.R
  3 | \name{fromOwn}
  4 | \alias{fromOwn}
  5 | \title{build Own annotation database with user defined annotation file}
  6 | \usage{
  7 | fromOwn(
  8 |   geneinfo = geneinfo,
  9 |   keytype = NULL,
 10 |   gene2go = NULL,
 11 |   gene2path = NULL,
 12 |   gene2symbol = NULL,
 13 |   gene2refseq = NULL,
 14 |   gene2ensembl = NULL,
 15 |   gene2pfam = NULL,
 16 |   gene2reactome = NULL,
 17 |   gene2ko = NULL,
 18 |   gene2interpro = NULL,
 19 |   gene2entrezid = NULL,
 20 |   gene2biocyc = NULL,
 21 |   gene2kd = NULL,
 22 |   gene2fundo = NULL,
 23 |   gene2gad = NULL,
 24 |   version = NULL,
 25 |   maintainer = NULL,
 26 |   author = NULL,
 27 |   outputDir = NULL,
 28 |   tax_id = NULL,
 29 |   genus = NULL,
 30 |   species = NULL,
 31 |   install = TRUE,
 32 |   pkgname = NULL,
 33 |   rebuild = FALSE
 34 | )
 35 | }
 36 | \arguments{
 37 | \item{geneinfo}{gene information table with two columns
 38 | as default("GID","DESCRIPTION")}
 39 | 
 40 | \item{keytype}{key type for building the annotation db}
 41 | 
 42 | \item{gene2go}{Gene Onotoly information for  genes}
 43 | 
 44 | \item{gene2path}{KEGG Pathway information for genes}
 45 | 
 46 | \item{gene2symbol}{SYMBOL information for genes}
 47 | 
 48 | \item{gene2refseq}{REFSEQ or KO information for genes}
 49 | 
 50 | \item{gene2ensembl}{ENSEMBL or KO information for genes}
 51 | 
 52 | \item{gene2pfam}{PFAM information for genes}
 53 | 
 54 | \item{gene2reactome}{REACTOME Pathway or KO information for genes}
 55 | 
 56 | \item{gene2ko}{KO information for genes}
 57 | 
 58 | \item{gene2interpro}{INTERPRO information for genes}
 59 | 
 60 | \item{gene2entrezid}{ENTREZID information for genes}
 61 | 
 62 | \item{gene2biocyc}{BIOCYC information for genes}
 63 | 
 64 | \item{gene2kd}{KEGG DISEASE information for genes}
 65 | 
 66 | \item{gene2fundo}{FunDO information for genes}
 67 | 
 68 | \item{gene2gad}{GAD information for genes}
 69 | 
 70 | \item{version}{version for the annotation package}
 71 | 
 72 | \item{maintainer}{maintainer for the annotation package}
 73 | 
 74 | \item{author}{author for the annotation package}
 75 | 
 76 | \item{outputDir}{temporary output path}
 77 | 
 78 | \item{tax_id}{taxonomy id for the species}
 79 | 
 80 | \item{genus}{genus for the species}
 81 | 
 82 | \item{species}{species name(common name,kegg.species.code or scientifc name)}
 83 | 
 84 | \item{install}{install the package or not(default: TRUE)}
 85 | 
 86 | \item{pkgname}{package name you want to choose}
 87 | }
 88 | \value{
 89 | annotation package
 90 | }
 91 | \description{
 92 | build Own annotation database with user defined annotation file
 93 | }
 94 | \examples{
 95 | ## build your own annotation for Arabidopsis thaliana
 96 | data(ath)
 97 | fromOwn(geneinfo = ath, install = FALSE)
 98 | }
 99 | \author{
100 | Kai Guo
101 | }
102 | 


--------------------------------------------------------------------------------
/man/getTable.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{getTable}
 4 | \alias{getTable}
 5 | \title{get annotataion table from temporary package}
 6 | \usage{
 7 | getTable(path, table = "go_all")
 8 | }
 9 | \arguments{
10 | \item{path}{full path for the temporary package}
11 | 
12 | \item{table}{a character  indicate the table you want extract}
13 | }
14 | \value{
15 | data.frame
16 | }
17 | \description{
18 | get annotataion table from temporary package
19 | }
20 | \examples{
21 | data(ath)
22 | pack <- fromOwn(geneinfo = ath, install = FALSE, species ="test")
23 | # head(getTable(path = pack, table = "gene_info"))
24 | }
25 | \author{
26 | Kai Guo
27 | }
28 | 


--------------------------------------------------------------------------------
/man/is_installed.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{is_installed}
 4 | \alias{is_installed}
 5 | \title{check package installed or not}
 6 | \usage{
 7 | is_installed(pkg)
 8 | }
 9 | \arguments{
10 | \item{pkg}{package name}
11 | }
12 | \value{
13 | TRUE/FALSE
14 | }
15 | \description{
16 | check package installed or not
17 | }
18 | \author{
19 | Kai Guo
20 | }
21 | 


--------------------------------------------------------------------------------
/man/korg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{korg}
 5 | \alias{korg}
 6 | \title{korg}
 7 | \format{
 8 | A matrix with five columns:
 9 | \describe{
10 | \item{ktax.id}{the KEGG taxonomy ID}
11 | \item{tax.id}{the NCBI taxonomy ID}
12 | \item{kegg.code}{the KEGG species code}
13 | \item{scientific.name}{Scientific name of species}
14 | \item{common.name}{common name of species}
15 | }
16 | }
17 | \usage{
18 | korg
19 | }
20 | \description{
21 | korg include species information from KEGG database.
22 |              korg data was modified from
23 |              (https://pathview.uncc.edu/data/korg.tsv)
24 | }
25 | \examples{
26 | head(korg)
27 | 
28 | }
29 | \keyword{datasets}
30 | 


--------------------------------------------------------------------------------
/man/listSpecies.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/buildEnsembl.R
 3 | \name{listSpecies}
 4 | \alias{listSpecies}
 5 | \title{list species available in Ensembl}
 6 | \usage{
 7 | listSpecies(host = "www", plant = FALSE)
 8 | }
 9 | \arguments{
10 | \item{host}{Ensembl host site}
11 | 
12 | \item{plant}{use plant database or not (default: FALSE)}
13 | }
14 | \value{
15 | data.frame with species information
16 | }
17 | \description{
18 | list species available in Ensembl
19 | }
20 | \examples{
21 | listSpecies()
22 | }
23 | \author{
24 | Kai Guo
25 | }
26 | 


--------------------------------------------------------------------------------
/man/mergeDB.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mergeDB.R
 3 | \name{mergeDB}
 4 | \alias{mergeDB}
 5 | \title{merge two orgDB with keys}
 6 | \usage{
 7 | mergeDB(
 8 |   dbleft,
 9 |   dbright,
10 |   keyleft = "GID",
11 |   keyright = "GID",
12 |   keytype = NULL,
13 |   keep = NULL,
14 |   species = NULL,
15 |   author = NULL,
16 |   maintainer = NULL,
17 |   tax_id = NULL,
18 |   genus = NULL,
19 |   version = NULL,
20 |   pkgname = NULL,
21 |   install = TRUE,
22 |   outputDir = NULL,
23 |   rebuild = FALSE
24 | )
25 | }
26 | \arguments{
27 | \item{dbleft}{a charater indicate the left orgDB}
28 | 
29 | \item{dbright}{a character indicate the right orgDB}
30 | 
31 | \item{keyleft}{the keytype use for merging in left orgDB}
32 | 
33 | \item{keyright}{the keytype use for merging in the right orgDB}
34 | 
35 | \item{keytype}{the keytypes to be included in the merged orgDB ("GID","GENENAME")}
36 | 
37 | \item{keep}{the name of keytype you used if keyleft and keyright were not same}
38 | 
39 | \item{species}{the species name}
40 | 
41 | \item{author}{author for the annotation package}
42 | 
43 | \item{maintainer}{maintainer for the annotation package}
44 | 
45 | \item{tax_id}{taxonomy id for the species}
46 | 
47 | \item{genus}{genus name for the annotation package}
48 | 
49 | \item{version}{version number for the annotation package}
50 | 
51 | \item{pkgname}{package name you want to choose}
52 | 
53 | \item{install}{install the package or not(default: TRUE)}
54 | 
55 | \item{outputDir}{temporary output path}
56 | 
57 | \item{rebuild}{rebuild the package or not(default: FALSE)}
58 | }
59 | \description{
60 | merge two orgDB with keys
61 | }
62 | \examples{
63 | fromKEGG(species = "hsa", anntype="KEGG")
64 | fromAnnHub(species="human")
65 | mergeDB("org.hsa.eg.db","org.human.eg.db",species="merge")
66 | }
67 | \author{
68 | Kai Guo
69 | }
70 | 


--------------------------------------------------------------------------------
/man/simpleCap.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{simpleCap}
 4 | \alias{simpleCap}
 5 | \title{uppercase the first letter}
 6 | \usage{
 7 | simpleCap(x)
 8 | }
 9 | \arguments{
10 | \item{x}{string}
11 | }
12 | \value{
13 | character with first letter uppercase
14 | }
15 | \description{
16 | uppercase the first letter
17 | }
18 | \author{
19 | Kai Guo
20 | }
21 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(bioAnno)
3 | test_check("bioAnno")
4 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bioAnno.R:
--------------------------------------------------------------------------------
1 | library(bioAnno)
2 | expect_silent(1+1)
3 | 


--------------------------------------------------------------------------------
/vignettes/bioAnno.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "The bioAnno package"
  3 | author:
  4 | - name: Kai Guo
  5 |   affiliation:
  6 |   - Biomedical Sciences, University of North Dakota
  7 | date: "`r Sys.Date()`"
  8 | output:
  9 |   html_document:
 10 |   df_print: paged
 11 |   word_document:
 12 |     toc: yes
 13 |     toc_depth: '6'
 14 |   rmarkdown::html_vignette: default
 15 |   pdf_document:
 16 |     toc: yes
 17 |     toc_depth: 6
 18 | vignette: |
 19 |   \usepackage[utf8]{inputenc}
 20 |   %\VignetteIndexEntry{bioAnno}
 21 |   %\VignetteEngine{knitr::knitr}
 22 | ---
 23 | __bioAnno__ An R package for building annotation package by using
 24 | information from __KEGG__, __NCBI__, __Ensembl__ and return OrgDb object
 25 | such as org.Hs.eg.db.   
 26 | 
 27 | ## 1. Introduction
 28 | 
 29 | With the increasing of high throughput data generated, the requirement for
 30 | having annotation package is necessary for people who want to do functional 
 31 | enrichment analysis, id conversion and other type related analysis.
 32 | _bioAnno_ provides wrap functions include _fromKEGG_, _fromEnsembl_, 
 33 | _fromNCBI_ and _fromAnnoHub_ to build annotation package. Making Organism 
 34 | Packages is a straightforward process using the helper functions _fromKEGG_, 
 35 | _fromNCBI_ and from fromAnnoHub. Moreover, users are also allowed to make their
 36 | own package based on their own annotation file by using _fromOwn_.
 37 | 
 38 | 
 39 | ## 2. Software Usage
 40 | ### 2.1 Installation
 41 | The package can be installed with following command 
 42 | ``` {r install, eval = FALSE}
 43 | if (!requireNamespace("BiocManager"))
 44 |     install.packages("BiocManager")
 45 | BiocManager::install("bioAnno")
 46 | ```
 47 | ### 2.2 Load package 
 48 | ```{r library, results = 'hide', message = FALSE}
 49 | library(bioAnno)
 50 | 
 51 | ```
 52 | 
 53 | ### 2.3 How to use it
 54 | ``` {r quick, message=FALSE}
 55 | library(bioAnno)
 56 | ## build E.coli annotation package by using fromKEGG function from
 57 | ## KEGG database.
 58 | fromKEGG(species="eco", install = FALSE)
 59 | ## which will build "org.eco.eg.db" package. The package contains
 60 | ## KEGG, GO annotation. You can use install = TRUE to direct 
 61 | ## install the package. 
 62 | ## build from arabidopsis thaliana annotation package by using fromAnnHub 
 63 | ## function
 64 | fromAnnHub(species="ath", install = FALSE)
 65 | ```
 66 | ### 2.4 Main Functions
 67 | --  _fromKEGG_ build annotation package by extracting annotation information 
 68 |     from Kyoto Encyclopedia of Genes and Genomes (KEGG) database. 
 69 |     You can use KEGG species code as the query name.
 70 | 
 71 | -- _fromNCBI_ build annotation package by extracting annotation information from
 72 |     NCBI database.
 73 | 
 74 | -- _fromENSEMBL_ build annotation package by extracting annotation information 
 75 |     fromENSEMBL database. It includes function to build annotaion package for 
 76 |     plant with parameter plant = TRUE.  
 77 |     
 78 | -- _fromAnnhub_ build annotation package with the AnnotationHub package 
 79 | 
 80 | -- _getTable_ get annotataion table from temporary package which need user provide the temporary path
 81 | 
 82 | ## 3 To use the annotation package user created
 83 | An organism level package (an ‘org’ package) you created uses a central gene 
 84 | identifier and contains mappings between this identifier and other kinds of
 85 | identifiers. The most common interface for retrieving data is the _select_
 86 | method.
 87 | ```{r load, message = FALSE}
 88 | #First make your own anntation package and loading the package
 89 | data(ath)
 90 | fromOwn(geneinfo = ath, install = TRUE)
 91 | library(org.species.eg.db)
 92 | ```
 93 | There are 4 common methods that work together to allow a select interface. The
 94 | 1st one is _columns_, which help you to discover which sorts of annotations can be extracted
 95 | from it.
 96 | ```{r columns, message = FALSE }
 97 | columns(org.species.eg.db)
 98 | ```
 99 | The next method is _keytypes_ which tells you the kinds of things that can be used as
100 | keys.
101 | ```{r keytypes, message = FALSE }
102 | keytypes(org.species.eg.db)
103 | ```
104 | The third method is _keys_ which is used to retrieve all the viable keys of a particular
105 | type.
106 | ```{r keys, message = FALSE}
107 | key <- keys(org.species.eg.db,keytype="ENTREZID")
108 | ```
109 | And finally there is _select_, which extracts data by using values supplied by the other method
110 | ```{r select, messgae = FALSE}
111 | result <- select(org.species.eg.db, keys=key,
112 | columns=c("GID","GO","PATH"),keytype="ENTREZID")
113 | head(result)
114 | ```
115 | Users are also allowed to use mapIds extract gene identifiers
116 | KEGG pathway from the annotation package.
117 | ```{r mapIds, message = FALSE}
118 | KEGG<-mapIds(org.species.eg.db,keys=key,column="PATH",keytype="ENTREZID")
119 | head(KEGG)
120 | ```
121 | Or for id conversion
122 | ```{r id, message = FALSE}
123 | mapIds(org.species.eg.db,keys=key[1:10],column="GID",keytype="ENTREZID")
124 | ```
125 | The version number of R and packages loaded for generating the vignette were:
126 | ```{r version, message = FALSE}
127 | sessionInfo()
128 | ```
129 | 
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------