├── .gitignore ├── .Rbuildignore ├── inst ├── diagram.png ├── KEGG.db │ ├── LICENSE │ ├── NAMESPACE │ ├── DESCRIPTION │ └── R │ │ └── zzz.R └── workflow.R ├── NEWS.md ├── NAMESPACE ├── man └── create_kegg_db.Rd ├── DESCRIPTION ├── README.md ├── Makefile ├── README.Rmd └── R └── create_kegg_db.R /.gitignore: -------------------------------------------------------------------------------- 1 | KEGG.db_* 2 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | README.md 2 | README.Rmd 3 | inst/workflow.R 4 | Makefile 5 | -------------------------------------------------------------------------------- /inst/diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuLab-SMU/createKEGGdb/HEAD/inst/diagram.png -------------------------------------------------------------------------------- /inst/KEGG.db/LICENSE: -------------------------------------------------------------------------------- 1 | Free for academic use. Non-academic users are requested to obtain a license agreement with KEGG. 2 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # createKEGGdb 0.0.3 4 | 5 | + fix typo in `get_path2name()` (2023-3-7, Tue) 6 | + update KEGG api (2023-3-5, Sun) 7 | 8 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(create_kegg_db) 4 | importFrom(RSQLite,dbConnect) 5 | importFrom(RSQLite,dbDisconnect) 6 | importFrom(RSQLite,dbDriver) 7 | importFrom(RSQLite,dbWriteTable) 8 | importFrom(clusterProfiler,download_KEGG) 9 | importFrom(magrittr,"%<>%") 10 | -------------------------------------------------------------------------------- /inst/KEGG.db/NAMESPACE: -------------------------------------------------------------------------------- 1 | import(methods) 2 | import(AnnotationDbi) 3 | 4 | ### Only put what is statically exported here. All the AnnObj instances 5 | ### created at load time are dynamically exported (refer to R/zzz.R for 6 | ### the details). 7 | export( 8 | KEGG, 9 | KEGG_dbconn, 10 | KEGG_dbfile, 11 | KEGG_dbschema, 12 | KEGG_dbInfo 13 | ) 14 | -------------------------------------------------------------------------------- /inst/KEGG.db/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: KEGG.db 2 | Title: KEGG.db for KEGG enrichment analysis. 3 | Description: KEGG.db for KEGG enrichment analysis. 4 | Version: 1.0 5 | Author: xxx 6 | Maintainer: xxx 7 | Depends: R (>= 2.7.0), methods, AnnotationDbi (>= 1.44.0) 8 | Imports: methods, AnnotationDbi 9 | Suggests: DBI 10 | License: BSD 11 | License_restricts_use: yes 12 | biocViews: AnnotationData, FunctionalAnnotation 13 | 14 | -------------------------------------------------------------------------------- /man/create_kegg_db.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/create_kegg_db.R 3 | \name{create_kegg_db} 4 | \alias{create_kegg_db} 5 | \title{create_kegg_db} 6 | \usage{ 7 | create_kegg_db(species) 8 | } 9 | \arguments{ 10 | \item{species}{one of KEGG supported species, e.g. hsa for human} 11 | } 12 | \value{ 13 | KEGG.db package generated in working directory 14 | } 15 | \description{ 16 | create KEGG.db package 17 | } 18 | \author{ 19 | Guangchuang Yu and Ziru Chen 20 | } 21 | -------------------------------------------------------------------------------- /inst/workflow.R: -------------------------------------------------------------------------------- 1 | library("DiagrammeR") 2 | 3 | 4 | grViz("digraph createKEGGdb { 5 | rankdir = TD 6 | node [shape = box, style=filled] 7 | layout = dot 8 | compound =true 9 | #color = crimson 10 | 11 | download [label='Query KEGG pathways for selected organisms'] 12 | database [label='Pack KEGG data into a sqlite file'] 13 | pkg0 [label='KEGG.db package skeleton'] 14 | pkg [label='Build KEGG.db package'] 15 | 16 | download -> database -> pkg 17 | pkg0 -> pkg 18 | 19 | }") -> x 20 | 21 | yyplot::gv2file(x, file = 'diagram.png' ) 22 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: createKEGGdb 2 | Title: Create KEGG.db Package 3 | Version: 0.0.3 4 | Authors@R: c( 5 | person("Guangchuang", "Yu", email = "guangchuangyu@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-6485-8781")), 6 | person("Ziru", "Chen", email = "chenziru@picb.ac.cn", role = "aut") 7 | ) 8 | Description: Query online KEGG annotation to generate KEGG.db package that can be used by clusterProfiler and other packages. 9 | Imports: 10 | clusterProfiler, 11 | RSQLite, 12 | magrittr, 13 | pkgbuild 14 | License: Artistic-2.0 15 | Encoding: UTF-8 16 | LazyData: true 17 | RoxygenNote: 7.2.3 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Create KEGG.db Package 4 | 5 | Query online KEGG annotation to generate KEGG.db package that can be 6 | used by clusterProfiler and other packages. 7 | 8 | ## :writing\_hand: Authors 9 | 10 | Guangchuang YU and Ziru Chen 11 | 12 | ## :arrow\_double\_down: Installation 13 | 14 | ``` r 15 | ## install.packages("remotes") 16 | remotes::install_github("YuLab-SMU/createKEGGdb") 17 | ``` 18 | 19 | ## :gear: Workflow 20 | 21 | ![](inst/diagram.png) 22 | 23 | ## :book: Documents 24 | 25 | - [KEGG数据本地化,再也不用担心网络问题了](https://mp.weixin.qq.com/s/k24iLQ9mFFBtEDxQoQGE4g) 26 | - [多物种批量下载KEGG数据并做成KEGG.db包](https://mp.weixin.qq.com/s/PwrdQAkG3pTlwMB6Mj8wXQ) 27 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PKGNAME := $(shell sed -n "s/Package: *\([^ ]*\)/\1/p" DESCRIPTION) 2 | PKGVERS := $(shell sed -n "s/Version: *\([^ ]*\)/\1/p" DESCRIPTION) 3 | PKGSRC := $(shell basename `pwd`) 4 | 5 | all: rd check clean 6 | 7 | rd: 8 | Rscript -e 'roxygen2::roxygenise(".")' 9 | 10 | readme: 11 | Rscript -e 'rmarkdown::render("README.Rmd")' 12 | 13 | build: 14 | cd ..;\ 15 | R CMD build $(PKGSRC) 16 | 17 | build2: 18 | cd ..;\ 19 | R CMD build --no-build-vignettes $(PKGSRC) 20 | 21 | install: 22 | cd ..;\ 23 | R CMD INSTALL $(PKGNAME)_$(PKGVERS).tar.gz 24 | 25 | check: build 26 | cd ..;\ 27 | Rscript -e 'rcmdcheck::rcmdcheck("$(PKGNAME)_$(PKGVERS).tar.gz", args="--as-cran")' 28 | 29 | check2: build 30 | cd ..;\ 31 | R CMD check $(PKGNAME)_$(PKGVERS).tar.gz 32 | 33 | clean: 34 | cd ..;\ 35 | $(RM) -r $(PKGNAME).Rcheck/ 36 | 37 | 38 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | md_document: 4 | variant: gfm 5 | html_preview: false 6 | --- 7 | 8 | 9 | 10 | 11 | # Create KEGG.db Package 12 | 13 | 14 | ```{r comment="", echo=FALSE, results='asis'} 15 | cat(packageDescription('createKEGGdb')$Description) 16 | ``` 17 | 18 | 19 | 20 | ## :writing_hand: Authors 21 | 22 | Guangchuang YU and Ziru Chen 23 | 24 | 25 | 26 | 27 | ## :arrow_double_down: Installation 28 | 29 | 30 | ```r 31 | ## install.packages("remotes") 32 | remotes::install_github("YuLab-SMU/createKEGGdb") 33 | ``` 34 | 35 | 36 | ## :gear: Workflow 37 | 38 | 39 | ![](inst/diagram.png) 40 | 41 | 42 | ## :book: Documents 43 | 44 | + [KEGG数据本地化,再也不用担心网络问题了](https://mp.weixin.qq.com/s/k24iLQ9mFFBtEDxQoQGE4g) 45 | + [多物种批量下载KEGG数据并做成KEGG.db包](https://mp.weixin.qq.com/s/PwrdQAkG3pTlwMB6Mj8wXQ) 46 | 47 | -------------------------------------------------------------------------------- /inst/KEGG.db/R/zzz.R: -------------------------------------------------------------------------------- 1 | datacache <- new.env(hash=TRUE, parent=emptyenv()) 2 | 3 | KEGG <- function() showQCData("KEGG", datacache) 4 | KEGG_dbconn <- function() dbconn(datacache) 5 | KEGG_dbfile <- function() dbfile(datacache) 6 | KEGG_dbschema <- function(file="", show.indices=FALSE) dbschema(datacache, file=file, show.indices=show.indices) 7 | KEGG_dbInfo <- function() dbInfo(datacache) 8 | 9 | .onLoad <- function(libname, pkgname) 10 | { 11 | ## Connect to the SQLite DB 12 | dbfile <- system.file("extdata", "KEGG.sqlite", package=pkgname, lib.loc=libname) 13 | assign("dbfile", dbfile, envir=datacache) 14 | dbconn <- dbFileConnect(dbfile) 15 | assign("dbconn", dbconn, envir=datacache) 16 | ## Create the AnnObj instances 17 | ann_objs <- createAnnObjs.SchemaChoice("KEGG_DB", "KEGG", "KEGG", dbconn, datacache) 18 | mergeToNamespaceAndExport(ann_objs, pkgname) 19 | packageStartupMessage(AnnotationDbi:::annoStartupMessages("KEGG.db")) 20 | } 21 | 22 | .onUnload <- function(libpath) 23 | { 24 | dbFileDisconnect(KEGG_dbconn()) 25 | } 26 | -------------------------------------------------------------------------------- /R/create_kegg_db.R: -------------------------------------------------------------------------------- 1 | ##' create KEGG.db package 2 | ##' 3 | ##' 4 | ##' @title create_kegg_db 5 | ##' @param species one of KEGG supported species, e.g. hsa for human 6 | ##' @return KEGG.db package generated in working directory 7 | ##' @export 8 | ##' @author Guangchuang Yu and Ziru Chen 9 | create_kegg_db <- function(species) { 10 | packagedir <- tempfile() # tempdir() maynot empty 11 | 12 | ## skeleton 13 | prepare_pkg_skeleton(packagedir) 14 | 15 | ## sqlite 16 | sqlite_path <- paste(packagedir, "inst", "extdata", sep=.Platform$file.sep) 17 | prepare_kegg_db(species, sqlite_path) 18 | 19 | ## build pkg 20 | pkgbuild::build(packagedir, dest_path = ".") 21 | } 22 | 23 | 24 | prepare_pkg_skeleton <- function(packagedir) { 25 | .fcp <- function(..., todir, file) { 26 | file.copy(from = system.file("KEGG.db", ..., file, package = "createKEGGdb"), 27 | to = paste(todir, file, sep = .Platform$file.sep)) 28 | } 29 | 30 | if(!dir.exists(packagedir)) { 31 | dir.create(packagedir) 32 | } 33 | 34 | ## to store sqlite 35 | sqlite_path <- paste(packagedir, "inst", "extdata", sep=.Platform$file.sep) 36 | if(!dir.exists(sqlite_path)){ 37 | dir.create(sqlite_path,recursive = TRUE) 38 | } 39 | 40 | R_src <- paste(packagedir, "R", sep=.Platform$file.sep) 41 | if(!dir.exists(R_src)){ 42 | dir.create(R_src,recursive = TRUE) 43 | } 44 | 45 | .fcp("R", todir = R_src, file = "zzz.R") 46 | .fcp(todir = packagedir, file = "DESCRIPTION") 47 | .fcp(todir = packagedir, file = "LICENSE") 48 | .fcp(todir = packagedir, file = "NAMESPACE") 49 | } 50 | 51 | 52 | ##' @importFrom magrittr %<>% 53 | get_path2name <- function(species){ 54 | if (length(species) == 1) { 55 | keggpathid2name.df <- clusterProfiler:::kegg_list("pathway", species) 56 | } else { 57 | keggpathid2name.list <- vector("list", length(species)) 58 | names(keggpathid2name.list) <- species 59 | for (i in species) { 60 | keggpathid2name.list[[i]] <- clusterProfiler:::kegg_list("pathway", i) 61 | } 62 | keggpathid2name.df <- do.call(rbind, keggpathid2name.list) 63 | rownames(keggpathid2name.df) <- NULL 64 | } 65 | keggpathid2name.df[,2] <- sub("\\s-\\s[a-zA-Z ]+\\(\\w+\\)$", "", keggpathid2name.df[,2]) 66 | # keggpathid2name.df[,1] %<>% gsub("path:map", "", .) 67 | colnames(keggpathid2name.df) <- c("path_id","path_name") 68 | return(keggpathid2name.df) 69 | } 70 | 71 | 72 | ##' @importFrom magrittr %<>% 73 | download.organisms.KEGG <- function(organism) { 74 | keggpathid2extid.df <- clusterProfiler:::kegg_link(organism, "pathway") 75 | if (is.null(keggpathid2extid.df)){ 76 | write(paste(Sys.time(),"Pathway data of",organism,"is null."), stderr()) 77 | }else{ 78 | message(paste0(Sys.time()," Getting KEGG data of ",organism,".")) 79 | keggpathid2extid.df[,1] %<>% gsub("[^:]+:", "", .) 80 | keggpathid2extid.df[,2] %<>% gsub("[^:]+:", "", .) 81 | colnames(keggpathid2extid.df) <- c("pathway_id","gene_or_orf_id") 82 | message(paste(Sys.time(),"KEGG data of",organism,"has been downloaded.")) 83 | } 84 | return(keggpathid2extid.df) 85 | } 86 | 87 | 88 | get_organisms_list <- function(db){ 89 | organisms <- clusterProfiler:::kegg_list(db) 90 | organisms_list <- as.character(organisms[,2]) 91 | return(organisms_list) 92 | } 93 | 94 | 95 | ##' @importFrom clusterProfiler download_KEGG 96 | ##' @importFrom RSQLite dbDriver 97 | ##' @importFrom RSQLite dbConnect 98 | ##' @importFrom RSQLite dbWriteTable 99 | ##' @importFrom RSQLite dbDisconnect 100 | prepare_kegg_db <- function(organisms, sqlite_path) { 101 | dbfile <- file.path(sqlite_path, "KEGG.sqlite") 102 | unlink(dbfile) 103 | ################################################### 104 | ### create database 105 | ################################################### 106 | drv <- dbDriver("SQLite") 107 | db <- dbConnect(drv, dbname=dbfile) 108 | 109 | KEGGPATHID2NAME <- get_path2name(organisms) 110 | ################################################### 111 | ### put the pathway2name data into the tables 112 | ################################################### 113 | dbWriteTable(conn = db, "pathway2name", KEGGPATHID2NAME, row.names=FALSE) 114 | 115 | if (length(organisms) == 1){ 116 | if(organisms == "all"){ 117 | organisms <- get_organisms_list("organism") 118 | } 119 | } 120 | for(organism in organisms){ 121 | KEGGPATHID2EXTID <- download.organisms.KEGG(organism) 122 | if(!is.null(KEGGPATHID2EXTID)){ 123 | ################################################### 124 | ### put the pathway2gene data into the tables 125 | ################################################### 126 | # 数据是直接添加进去,不会自动去重 127 | dbWriteTable(conn = db, "pathway2gene", KEGGPATHID2EXTID, row.names=FALSE,append = TRUE) 128 | message(paste(Sys.time(),"KEGG data of",organism,"has been added to the sqlite database.")) 129 | 130 | } 131 | } 132 | 133 | ################################################### 134 | ### append the metadata 135 | ################################################### 136 | metadata <- rbind(c("PATHNAMESOURCENAME", "KEGG PATHWAY"), 137 | c("PATHNAMESOURCEURL", "ftp://ftp.genome.jp/pub/kegg/pathway"), 138 | c("PATHNAMESOURCEDATE", format(Sys.Date(), "%Y%m%d")), 139 | c("KEGGSOURCENAME", "KEGG GENOME"), 140 | c("KEGGSOURCEURL", "ftp://ftp.genome.jp/pub/kegg/genomes"), 141 | c("KEGGSOURCEDATE", format(Sys.Date(), "%Y%m%d")), 142 | c("GOEXTSOURCEDATE", "2015-Sepec2go27"), 143 | c("GOEXTSOURCENAME", "Gene Ontology External Link"), 144 | c("GOEXTSOURCEURL", "http://www.geneontology.org/external2go"), 145 | c("Db type", "KEGGDB"), 146 | c("DBSCHEMA", "KEGG_DB"), 147 | c("DBSCHEMAVERSION", "2.1")) 148 | 149 | metadata <- as.data.frame(metadata) 150 | colnames(metadata) <- c("name", "value") #makeAnnDbPkg规定的 151 | dbWriteTable(conn = db, "metadata", metadata, row.names=FALSE) 152 | 153 | map.counts <- rbind(c("pathway2name", nrow(KEGGPATHID2NAME)), 154 | c("pathway2gene", nrow(KEGGPATHID2EXTID))) 155 | map.counts <- as.data.frame(map.counts) 156 | colnames(map.counts) <- c("map_name","count") 157 | dbWriteTable(conn = db, "map_counts", map.counts, row.names=FALSE) 158 | 159 | map.metadata <- rbind(c("ENZYMEID2GO","Gene Ontology External Link","http://www.geneontology.org/external2go","2015-Sepec2go27"), 160 | c("GO2ENZYMEID","Gene Ontology External Link","http://www.geneontology.org/external2go","2015-Sepec2go27"), 161 | c("EXTID2PATHID","KEGG GENOME","ftp://ftp.genome.jp/pub/kegg/genomes","2011-Mar15"), 162 | c("PATHID2EXTID","KEGG GENOME","ftp://ftp.genome.jp/pub/kegg/genomes","2011-Mar15"), 163 | c("PATHNAME2ID","KEGG PATHWAY","ftp://ftp.genome.jp/pub/kegg/pathway",format(Sys.Date(),"%Y%m%d")), 164 | c("PATHID2NAME","KEGG PATHWAY","ftp://ftp.genome.jp/pub/kegg/pathway",format(Sys.Date(),"%Y%m%d"))) 165 | map.metadata <- as.data.frame(map.metadata) 166 | colnames(map.metadata) <- c("map_name","source_name","source_url","source_date") 167 | dbWriteTable(conn = db, "map_metadata", map.metadata, row.names=FALSE) 168 | 169 | dbDisconnect(db) 170 | invisible(dbfile) 171 | } 172 | 173 | 174 | utils::globalVariables(".") 175 | 176 | --------------------------------------------------------------------------------