├── DESCRIPTION ├── NAMESPACE ├── NEWS ├── R ├── KEGGpathway2Graph2.R ├── circles.R ├── col.key.R ├── colorpanel2.R ├── combineKEGGnodes.R ├── cpd2kegg.R ├── cpdidmap.R ├── cpdkegg2name.R ├── cpdname2kegg.R ├── download.kegg.R ├── eg2id.R ├── ellipses.R ├── geneannot.map.R ├── id2eg.R ├── kegg.legend.R ├── kegg.species.code.R ├── keggview.graph.R ├── keggview.native.R ├── max.abs.R ├── mol.sum.R ├── node.color.R ├── node.info.R ├── node.map.R ├── parseKGML2.R ├── parseKGML2Graph2.R ├── parseReaction2.R ├── pathview.R ├── pathview.stamp.R ├── random.R ├── reaction2edge.R ├── render.kegg.node.R ├── sim.mol.data.R ├── sliced.shapes.R ├── strfit.R ├── subtypeDisplay.kedge.R ├── wordwrap.R └── zzz.R ├── README.md ├── data ├── KEGGEdgeSubtype.rda ├── bods.rda ├── cpd.accs.rda ├── cpd.names.rda ├── cpd.simtypes.rda ├── demo.paths.rda ├── gene.idtype.bods.rda ├── gene.idtype.list.rda ├── gse16873.d.rda ├── kegg.met.rda ├── ko.ids.rda ├── korg.rda ├── paths.hsa.rda └── rn.list.rda ├── inst ├── CITATION ├── extdata │ ├── gse16873.demo │ ├── hsa04110.png │ ├── hsa04110.xml │ └── kegg.sigmet.rda └── unitTests │ └── test_mapper.R ├── man ├── combineKEGGnodes.Rd ├── cpd.accs.Rd ├── cpdidmap.Rd ├── demo.data.Rd ├── download.kegg.Rd ├── eg2id.Rd ├── kegg.species.code.Rd ├── korg.Rd ├── mol.sum.Rd ├── node.color.Rd ├── node.info.Rd ├── node.map.Rd ├── pathview-internal.Rd ├── pathview-package.Rd ├── pathview.Rd ├── sim.mol.data.Rd └── wordwrap.Rd ├── tests └── runTests.R └── vignettes ├── pathview.Rnw ├── pathview.bib └── pathview.pdf /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: pathview 2 | Type: Package 3 | Title: a tool set for pathway based data integration and visualization 4 | Version: 1.47.1 5 | Date: 2025-03-28 6 | Author: Weijun Luo 7 | Maintainer: Weijun Luo 8 | Description: Pathview is a tool set for pathway based data integration 9 | and visualization. It maps and renders a wide variety of 10 | biological data on relevant pathway graphs. All users need is 11 | to supply their data and specify the target pathway. Pathview 12 | automatically downloads the pathway graph data, parses the data 13 | file, maps user data to the pathway, and render pathway graph 14 | with the mapped data. In addition, Pathview also seamlessly 15 | integrates with pathway and gene set (enrichment) analysis tools for 16 | large-scale and fully automated analysis. 17 | biocViews: Pathways, GraphAndNetwork, Visualization, 18 | GeneSetEnrichment, DifferentialExpression, GeneExpression, 19 | Microarray, RNASeq, Genetics, Metabolomics, Proteomics, 20 | SystemsBiology, Sequencing 21 | Depends: R (>= 2.10) 22 | Imports: KEGGgraph, XML, Rgraphviz, graph, png, AnnotationDbi, org.Hs.eg.db, KEGGREST, methods, utils 23 | Suggests: gage, org.Mm.eg.db, RUnit, BiocGenerics 24 | License: GPL (>=3.0) 25 | URL: https://github.com/datapplab/pathview, https://pathview.uncc.edu/ 26 | LazyLoad: yes 27 | LazyData: yes -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | import( 2 | KEGGgraph, 3 | Rgraphviz, 4 | KEGGREST, 5 | methods, 6 | graph 7 | ) 8 | 9 | importFrom(AnnotationDbi, mget, select, columns, keys) 10 | importClassesFrom(AnnotationDbi,AnnotationDb) 11 | importFrom(org.Hs.eg.db, org.Hs.eg.db) 12 | 13 | importFrom(png, readPNG) 14 | importFrom(utils, data, installed.packages, read.delim, globalVariables) 15 | importFrom(XML, xmlAttrs, xmlChildren, xmlName, xmlRoot, xmlTreeParse) 16 | importFrom(grDevices, col2rgb, dev.off, pdf, png, rgb) 17 | importFrom(graphics, image, layout, par, polygon, 18 | rasterImage, rect, segments, text) 19 | importFrom(stats, IQR, rnorm) 20 | 21 | export( 22 | download.kegg, 23 | node.info, 24 | combineKEGGnodes, reaction2edge, 25 | 26 | node.map, 27 | eg2id, id2eg, geneannot.map, 28 | cpdkegg2name, cpdname2kegg, cpd2kegg, cpdidmap, 29 | kegg.species.code, 30 | mol.sum, 31 | sim.mol.data, 32 | 33 | node.color, col.key, 34 | 35 | wordwrap, strfit, 36 | 37 | pathview, 38 | keggview.native, 39 | keggview.graph 40 | ) 41 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | VERSION 1.47.1 2 | ------------- 3 | 4 | o fix "R CMD check" error caused by some problem in org.EcK12.eg.db 5 | 6 | o remove org.EcK12.eg.db from Suggests for BioC build check 7 | 8 | o korg now include 10718 KEGG species or 951 new species beyond March 2024. 9 | 10 | 11 | VERSION 1.43.1 12 | ------------- 13 | 14 | o add org.EcK12.eg.db to Suggests for BioC build check 15 | 16 | o korg now include 9767 KEGG species or 1485 new species beyond 2022. 17 | 18 | 19 | VERSION 1.37.1/1.36.1 20 | ------------- 21 | 22 | o fix bug in download.kegg caused by recent change in kegg rest API 23 | urls (http to https). 24 | 25 | o korg now include 8282 KEGG species or 1449 new species beyond 2020. 26 | 27 | 28 | VERSION 1.31.3/1.30.3 29 | ------------- 30 | 31 | o fix bug in mol.sum with single gene data reported by 32 | easygsea. Similarly add drop=F to a few other lines in mol.sum and 33 | node.map. 34 | 35 | VERSION 1.31.2/1.30.2 36 | ------------- 37 | 38 | o solve the check error due to the change related to KEGGEdgeSubtype 39 | in KEGGgraph package (version 1.51.1). 40 | 41 | VERSION 1.31.1/1.30.1 42 | ------------- 43 | 44 | o korg now include 6833 KEGG species or 1588 new species beyond 2017. 45 | 46 | VERSION 1.29.1/1.28.1 47 | ------------- 48 | 49 | o developmental version now available on GitHub at: 50 | https://github.com/datapplab/pathview 51 | 52 | o fixed warning on "requireNamespace(pkg.name)" in geneannot.map.R 53 | and sim.mol.data.R. Now "import" from org.Hs.eg.db, instead of 54 | "suggest" it. Also import more functions and class from 55 | AnnotationDbi. 56 | 57 | o prevented potential error caused by class(exprs) == "data.frame" 58 | or "class()==" if conditions in multiple functions. class(exprs) 59 | now returns a vector of length 2, which caused the error. 60 | 61 | VERSION 1.23.3/1.22.3 62 | ------------- 63 | 64 | o fix error and warning cause by geneannot.map() function. change 65 | if(in.type == out.type) to if(all(in.type == out.type)). change 66 | biocLite installation to BiocManager::install. make similar changes 67 | in sim.mol.data() function. 68 | 69 | o update NAMESPACE file on importFrom lines on AnnotationDbi and 70 | BiocManager. 71 | 72 | VERSION 1.18.2 73 | ------------- 74 | 75 | o korg now include 5245 KEGG species. To speed up pathview package 76 | loading, an updated version of korg is now checked out from 77 | Pathview Web server only when it is acutally used and needed. 78 | 79 | VERSION 1.17.7 80 | ------------- 81 | 82 | o updated combineKEGGnodes.R, i.e. changed the stop error to a 83 | warning message when a group node include different node types, 84 | i.e. both gene and compound. This problematic KEGG node definition 85 | does exist in pathway 04136 Autophagy - other, and caused error 86 | when calling pathview with kegg.native = F. 87 | 88 | o fixed bug in mol.sum introduced at 1.15.1, i.e. indexing using 89 | which(eff.idx). This problem only affect direct call of mol.sum 90 | with sum.method other than "sum" and "mean". 91 | 92 | 93 | VERSION 1.17.1 94 | ------------- 95 | 96 | o major expansion in korg, which now include both KEGG and NCBI 97 | taxonomy IDs, two more gene ID types, i.e. NCBI protein and uniprot 98 | IDs. In addition, Entrez or NCBI Gene IDs are discontinued for most 99 | prokaryotes. 100 | 101 | o korg now include 4800 KEGG species, in the meantime, an updated 102 | version of korg is now checked out from Pathview Web server each 103 | time pathview package is loaded. 104 | 105 | 106 | VERSION 1.15.1 107 | ------------- 108 | 109 | o fixed bug in node.map on single row/mapped data, and in mapped row 110 | numbers. 111 | 112 | VERSION 1.10.2 113 | ------------- 114 | 115 | o fixed problem that no node mapped when one gene/protein ID maps to 116 | multiple Entrez Gene ID (like Enzyme IDs): id2eg call with unique.map=F. 117 | 118 | VERSION 1.10.1 119 | ------------- 120 | 121 | o fix bug in reaction2edge function, which throw error for a few 122 | metabolic pathways with no multi-node reaction group (examples 123 | including 00072, 00100). 124 | 125 | VERSION 1.9.3 126 | ------------- 127 | 128 | o pathview can accept a vector of multiple pathway ids, and 129 | map/render the user data onto all these pathways in one call. 130 | 131 | o one extra column "all.mapped" was added to pathview output 132 | data.frames as to show all the gene/compound IDs mapped to each 133 | node. 134 | 135 | o add geneannot.map as a generic function for gene ID or annotation 136 | mapping. 137 | 138 | o sim.mol.data now generate data with all major gene ID types for 139 | all 19 species in bods, not just human. 140 | 141 | o download.kegg now let the user to choose from xml, png or both 142 | file types to download for each input pathway. In the meantime, it 143 | uses the KEGG REST API instead of the classical KEGG download 144 | links. All potential pathways including the general pathways can be 145 | downloaded this way. 146 | 147 | o solve the redundant import from graph package. 148 | 149 | o import specific instead of all functions from XML package. 150 | 151 | VERSION 1.9.1 152 | ------------- 153 | 154 | o solve the install error due to the recent change in KEGGgraph 155 | package. 156 | 157 | VERSION 1.5.4 158 | ------------- 159 | 160 | o adjust run-time messages into 3 consistent classes: Info (on 161 | progress), Note and Warning. 162 | 163 | o include paths.hsa data, the full list of human pathway ID/names 164 | from KEGG, as to help user specify target pathways when calling 165 | pathview. 166 | 167 | o updated korg to included over 80 newly added species, such as 168 | sheep, apple, mandarin orange etc. Pathview can work with 3050 169 | species now. 170 | 171 | o adjust the definitions of 7 arguments for pathview function: 172 | discrete, limit,bins, both.dirs, trans.fun, low, mid, high. These 173 | used to be a list of two logical elements with "gene" and "cpd" as 174 | the names. They can be vectors of two or one element(s) now. This 175 | makes pathview easier to use now. 176 | 177 | o Vigette has been reformatted: add a Citation section, and some 178 | example on reading user data into R, fix a few typos. 179 | 180 | 181 | VERSION 1.4.2 182 | ------------- 183 | 184 | o fixed bug in render.kegg.node function, which generates black 185 | lines in some nodes of multiple sample view. 186 | 187 | VERSION 1.3.6 188 | ------------- 189 | 190 | o ajusted node x coordinate by +0.5 to better fit the color blocks 191 | in 2 layer native kegg views. 192 | 193 | VERSION 1.3.4 194 | ------------- 195 | 196 | o updated bods to included an extra column of id.type, the default 197 | gene ID type. 198 | 199 | 200 | VERSION 1.2.4 201 | ------------- 202 | 203 | o updated korg to included over 600 newly added species. Pathview 204 | can work with 2970 species now. 205 | 206 | o Make returned values from pathview, keggview.native and 207 | keggview.graph functions invisible. 208 | 209 | VERSION 1.2.3 210 | ------------- 211 | 212 | o Fixed bug in node.map function, which produces 0 values when all 213 | multiple genes in a node are NA's. 214 | 215 | VERSION 1.2.2 216 | ------------- 217 | 218 | o Fixed bug in mol.sum function, which generates "incorrect number of 219 | dimension" or NA's when sum.method="median" etc. 220 | 221 | VERSION 1.2.1 222 | ------------- 223 | 224 | o Fixed bug in "missing red disease gene node labels" in diease 225 | pathways. To avoid interfering with node coloring, set all disease 226 | gene labels to black instead. 227 | 228 | VERSION 1.1.7 229 | ------------- 230 | 231 | o Graphviz view can automatic choose different types of legends, 232 | either on nodes, edges or both depending on the specific pathways. 233 | 234 | o Vigette has been reformatted: the "Quick start" section added 235 | 236 | VERSION 1.1.6 237 | ------------- 238 | o Pathview can plot/integrate/compare multiple states/samples in the 239 | same graph. Several functions and data objects are revised: 240 | including pathview, keggview.native, keggview.graph, 241 | render.kegg.node etc. New section on multiple state data with 242 | working exampleshas been added. 243 | 244 | o Vigette has been reformatted: Data integration section splitted 245 | into multiple sub-sections. 246 | 247 | VERSION 1.1.5 248 | ------------- 249 | o Pathview works with species where default KEGG gene ID is not 250 | Entrez Gene. Several functions and data objects are revised: 251 | including pathview, node.map, sim.mol.data, kegg.species.code, 252 | korg. New section on KEGG species and Gene ID usage with working 253 | exampleshas been added. 254 | 255 | VERSION 1.1.3 256 | ------------- 257 | o Pathview paper published in Bioinformatics 258 | 259 | VERSION 1.0.0 260 | ------------- 261 | o Initial release with Bioconductor 262 | 263 | o Main function: pathview 264 | 265 | o Four functional modules: 266 | -Downloader: download.kegg; 267 | -Parser: node.info, combineKEGGnodes, reaction2edge; 268 | -Mapper: node.map, eg2id, id2eg, cpdkegg2name, cpdname2kegg, 269 | cpd2kegg, cpdidmap, kegg.species.code, mol.sum, 270 | sim.mol.data; 271 | -Viewer: keggview.native, keggview.graph, node.color, 272 | col.key, wordwrap, strfit 273 | -------------------------------------------------------------------------------- /R/KEGGpathway2Graph2.R: -------------------------------------------------------------------------------- 1 | KEGGpathway2Graph2 <- 2 | function (pathway, genesOnly = TRUE, expandGenes = TRUE, split.group=FALSE, check.reaction=TRUE) 3 | { 4 | stopifnot(is(pathway, "KEGGPathway")) 5 | if(split.group) pathway <- splitKEGGgroup(pathway) 6 | rdata=(pathway@reactions) 7 | if (expandGenes){ 8 | if(check.reaction & length(rdata)>0) message("Note: ", "Gene nodes not expanded when reactions are converted to edges!") 9 | else pathway <- expandKEGGPathway(pathway) 10 | } 11 | knodes <- nodes(pathway) 12 | kedges <- edges(pathway) 13 | node.entryIDs <- getEntryID(knodes) 14 | edge.entryIDs <- getEntryID(kedges) 15 | V <- node.entryIDs 16 | edL <- vector("list", length = length(V)) 17 | names(edL) <- V 18 | if (is.null(nrow(edge.entryIDs))) { 19 | for (i in seq(along = edL)) { 20 | edL[[i]] <- list() 21 | } 22 | } 23 | else { 24 | for (i in 1:length(V)) { 25 | id <- node.entryIDs[i] 26 | hasRelation <- id == edge.entryIDs[, "Entry1ID"] 27 | if (!any(hasRelation)) { 28 | edL[[i]] <- list(edges = NULL) 29 | } 30 | else { 31 | entry2 <- unname(unique(edge.entryIDs[hasRelation, 32 | "Entry2ID"])) 33 | edL[[i]] <- list(edges = entry2) 34 | } 35 | } 36 | } 37 | gR <- new("graphNEL", nodes = V, edgeL = edL, edgemode = "directed") 38 | 39 | if(check.reaction & length(rdata)>0){ 40 | r2e.res=reaction2edge(pathway, gR) 41 | gR=r2e.res[[1]] 42 | kedges=r2e.res[[2]] 43 | knodes=r2e.res[[3]] 44 | } 45 | 46 | names(kedges) <- sapply(kedges, function(x) paste(getEntryID(x), 47 | collapse = "~")) 48 | env.node <- new.env() 49 | env.edge <- new.env() 50 | assign("nodes", knodes, envir = env.node) 51 | assign("edges", kedges, envir = env.edge) 52 | nodeDataDefaults(gR, "KEGGNode") <- env.node 53 | edgeDataDefaults(gR, "KEGGEdge") <- env.edge 54 | if (genesOnly) { 55 | gR <- subGraphByNodeType(gR, "gene") 56 | } 57 | return(gR) 58 | } 59 | -------------------------------------------------------------------------------- /R/circles.R: -------------------------------------------------------------------------------- 1 | circles <- 2 | function(x0,y0,r,n=100, cols, ...){ 3 | theta <- seq(0, 2 * pi, length=n) 4 | x=matrix(rep(x0, each=n), nrow=n) 5 | y=matrix(rep(y0, each=n), nrow=n) 6 | x <- x + r * cos(theta) 7 | y <- y + r * sin(theta) 8 | for(i in 1:length(x0)) polygon(x[,i], y[,i], col=cols[i], ...) 9 | return(list(x,y)) 10 | } 11 | 12 | -------------------------------------------------------------------------------- /R/col.key.R: -------------------------------------------------------------------------------- 1 | col.key <- 2 | function(discrete=FALSE, limit=1.0, bins=10, cols=NULL, both.dirs=TRUE, low="green", mid="gray", high="red", graph.size, node.size, size.by.graph=TRUE, key.pos="topright", off.sets=c(x=0,y=0), align="n", cex=1, lwd=1){ 3 | 4 | if(both.dirs & length(limit)==1){ 5 | limit=c(-abs(limit), abs(limit)) 6 | } else if(length(limit)==1){ 7 | limit=c(0,limit) 8 | } 9 | disc.cond1=all(as.integer(limit)==limit) 10 | disc.cond2=(limit[2]-limit[1]) %% bins==0 11 | discrete=discrete & disc.cond1 & disc.cond2 12 | if(discrete){ 13 | limit[2]=limit[2]+1 14 | bins=bins+1 15 | } 16 | 17 | width=graph.size[1] 18 | height=graph.size[2] 19 | if(size.by.graph==T){ 20 | xs=width/80 21 | ys=height/40 22 | } else if(!missing(node.sizes)){ 23 | xs=node.size[1]*3/bins 24 | ys=node.size[2] 25 | } else{ 26 | message("Note: ", "color key not plotted, node.size is needed\n when size.by.graph=FALSE!") 27 | return(off.sets) 28 | } 29 | 30 | if(align=="x") { 31 | off.sets['x']=2*xs 32 | off.sets['y']=off.sets['y']+3*ys 33 | } 34 | if(align=="y") off.sets=off.sets+c(x=3*xs, y=0) 35 | if(align=="n") off.sets=off.sets+c(x=2*xs, y=2*ys) 36 | if(length(grep('right',key.pos))==1) { 37 | off.sets['x']=off.sets['x']+bins*xs 38 | x=width-off.sets['x'] 39 | } else { 40 | x=off.sets['x'] 41 | off.sets['x']=off.sets['x']+bins*xs 42 | } 43 | if(length(grep('top',key.pos))==1) y=height-off.sets['y'] else y=off.sets['y'] 44 | 45 | ckx=seq(x,x+bins*xs,length=bins+1) 46 | cky=c(y,y+ys) 47 | 48 | if(is.null(cols)){ 49 | if(both.dirs){ 50 | cols = colorpanel2(bins, low=low, mid=mid, high=high) 51 | } else cols = colorpanel2(bins, low=mid, high=high) 52 | } 53 | 54 | data.cuts = seq(from = limit[1], to =limit[2], length=bins+1) 55 | image(x=ckx, y=cky, z=cbind(data.cuts[-1]),col = cols, axes = FALSE, add=T) 56 | if(!discrete){ 57 | label=format(data.cuts[c(1,bins/2+1,bins+1)], digits=2) 58 | text(x=seq(x,x+bins*xs,length=length(label)), y=rep(y-ys, length(label)),label=label, cex=cex) 59 | } else{ 60 | label=paste(as.integer(data.cuts[c(1,bins)])) 61 | text(x=seq(x,x+bins*xs,length=length(label))+c(xs,-xs)/2, y=rep(y-ys, length(label)),label=label, cex=cex) 62 | } 63 | cky=c(y-0.25*ys,y+ys) 64 | for(i in 1:(bins+1)) lines(rep(ckx[i],2), cky, lwd=lwd) 65 | 66 | return(off.sets) 67 | } 68 | 69 | -------------------------------------------------------------------------------- /R/colorpanel2.R: -------------------------------------------------------------------------------- 1 | colorpanel2<- 2 | function (n, low, mid, high) 3 | { 4 | if (missing(mid) || missing(high)) { 5 | low <- col2rgb(low) 6 | if (missing(high)) 7 | high <- col2rgb(mid) 8 | else high <- col2rgb(high) 9 | red <- seq(low[1, 1], high[1, 1], length = n)/255 10 | green <- seq(low[3, 1], high[3, 1], length = n)/255 11 | blue <- seq(low[2, 1], high[2, 1], length = n)/255 12 | } 13 | else { 14 | isodd <- n%%2 == 1 15 | if (isodd) { 16 | n <- n + 1 17 | } 18 | low <- col2rgb(low) 19 | mid <- col2rgb(mid) 20 | high <- col2rgb(high) 21 | lower <- floor(n/2) 22 | upper <- n - lower 23 | red <- c(seq(low[1, 1], mid[1, 1], length = lower), 24 | seq(mid[1, 1], high[1, 1], length = upper))/255 25 | green <- c(seq(low[3, 1], mid[3, 1], length = lower), 26 | seq(mid[3, 1], high[3, 1], length = upper))/255 27 | blue <- c(seq(low[2, 1], mid[2, 1], length = lower), 28 | seq(mid[2, 1], high[2, 1], length = upper))/255 29 | if (isodd) { 30 | red <- red[-(lower + 1)] 31 | green <- green[-(lower + 1)] 32 | blue <- blue[-(lower + 1)] 33 | } 34 | } 35 | rgb(red, blue, green) 36 | } 37 | -------------------------------------------------------------------------------- /R/combineKEGGnodes.R: -------------------------------------------------------------------------------- 1 | combineKEGGnodes <- 2 | function (nodes, graph, combo.node) 3 | { 4 | nodes=unique(nodes) 5 | nodes.all=nodes(graph) 6 | ii=nodes %in% nodes.all 7 | if(!all(ii)){ 8 | anodes=nodes[!ii] 9 | stop(paste("Nodes ", paste(anodes, collapse=", "), " are not part of the graph!", sep="")) 10 | } 11 | 12 | 13 | 14 | knode=getKEGGnodeData(graph) 15 | kedge=getKEGGedgeData(graph) 16 | nodeNames=sapply(knode, getName) 17 | nodeType=sapply(knode, getType) 18 | nodeComp=sapply(knode, getComponent) 19 | node.size=sapply(nodeComp, length) 20 | grp.nodes=nodes.all[node.size>1] 21 | 22 | nt=unique(nodeType[nodes]) 23 | len.nt=length(nt) 24 | if("ortholog" %in% nt) len.nt=len.nt-1 25 | if(len.nt>1) { 26 | warn.msg="Nodes are not the same type, hence unable to combine!" 27 | message("Warning: ", warn.msg) 28 | return(graph) 29 | } 30 | 31 | if(combo.node %in% grp.nodes){ 32 | 33 | combgraph <- combineNodes(c(nodes,combo.node), graph, combo.node) 34 | gnode=knode[[combo.node]] 35 | ng.component= unique(c(gnode@component, nodes)) 36 | ng.graphics= gnode@graphics 37 | ng.graphics@height=as.integer(length(ng.component)/length(gnode@component)*ng.graphics@height) 38 | } else{ 39 | if(combo.node %in% nodes.all){ 40 | combgraph <- combineNodes(c(nodes,combo.node), graph, combo.node) 41 | ng.component= unique(c(combo.node, nodes)) 42 | ng.graphics= knode[[combo.node]]@graphics 43 | ng.graphics@height=as.integer(length(ng.component)*ng.graphics@height) 44 | } else{ 45 | combgraph <- combineNodes(nodes, graph, combo.node) 46 | ng.component= unique(nodes) 47 | ng.graphics= knode[[nodes[1]]]@graphics 48 | ng.graphics@height=as.integer(length(ng.component)*ng.graphics@height) 49 | } 50 | } 51 | 52 | ng.entryID= combo.node 53 | ng.name= "undefined" 54 | ng.type= "group" 55 | ng.link= as.character(NA) 56 | ng.reaction= as.character(NA) 57 | ng.map= as.character(NA) 58 | new.gnode <- new("KEGGGroup", component = ng.component, entryID = ng.entryID, 59 | name = ng.name, type = ng.type, link = ng.link, reaction = ng.reaction, 60 | map = ng.map, graphics = ng.graphics) 61 | 62 | knode.new=knode[!nodes.all %in% nodes] 63 | knode.new[[combo.node]]=new.gnode 64 | 65 | kedge.new=kedge 66 | enames=names(kedge) 67 | eeids=sapply(strsplit(enames, "~"), function(x) x) 68 | idx1=eeids[1,] %in% nodes 69 | idx2=eeids[2,] %in% nodes 70 | idx=idx1 | idx2 71 | for(i in which(idx1)) kedge.new[[i]]@entry1ID=eeids[1,i]=combo.node 72 | for(i in which(idx2)) kedge.new[[i]]@entry2ID=eeids[2,i]=combo.node 73 | names(kedge.new)[idx]=paste(eeids[1,idx],eeids[2,idx], sep = "~") 74 | 75 | env.node <- new.env() 76 | env.edge <- new.env() 77 | assign("nodes", knode.new, envir = env.node) 78 | assign("edges", kedge.new, envir = env.edge) 79 | nodeDataDefaults(combgraph, "KEGGNode") <- env.node 80 | edgeDataDefaults(combgraph, "KEGGEdge") <- env.edge 81 | return(combgraph) 82 | } 83 | 84 | -------------------------------------------------------------------------------- /R/cpd2kegg.R: -------------------------------------------------------------------------------- 1 | cpd2kegg <- 2 | function(in.ids, in.type){ 3 | #cpd id or name to kegg accession, use cpdname2kegg or cpdidmap 4 | # cpd.type=c(names(rn.list),"name") 5 | data(rn.list) 6 | cpd.type=tolower(c(names(rn.list),"name")) 7 | if(!tolower(in.type) %in% cpd.type) stop("Incorrect type!") 8 | kg.idx=grep("kegg", cpd.type) 9 | if(in.type %in% cpd.type[kg.idx]) stop("A native KEGG compound ID type, no need to map!") 10 | if(in.type=="name") { 11 | kg.accs=cpdname2kegg(in.ids) 12 | } else kg.accs=cpdidmap(in.ids, in.type=in.type, out.type="KEGG") 13 | 14 | return(kg.accs) 15 | } 16 | 17 | -------------------------------------------------------------------------------- /R/cpdidmap.R: -------------------------------------------------------------------------------- 1 | cpdidmap <- 2 | function(in.ids, in.type, out.type){ 3 | #map between cpd ids, both kegg and others 4 | cnames=c(in.type, out.type) 5 | in.type=tolower(in.type) 6 | out.type=tolower(out.type) 7 | data(rn.list) 8 | cpd.type=names(rn.list)=tolower(names(rn.list)) 9 | 10 | if(!in.type %in% cpd.type) stop("Incorrect type!") 11 | if(out.type=="kegg") kg.idx=grep("kegg", cpd.type) 12 | else if(out.type %in% cpd.type) kg.idx=which(cpd.type==out.type) 13 | else stop("Incorrect out.type!") 14 | if(in.type %in% cpd.type[kg.idx]) { 15 | message("Note: ", "A native KEGG compound ID type, no need to map!") 16 | return(invisible(0)) 17 | } 18 | in.ids=in.ids[!is.na(in.ids)] 19 | # in.ids=unique(in.ids) 20 | len.id=length(in.ids) 21 | kg.accs=rep(NA, len.id) 22 | 23 | data(cpd.accs) 24 | accs.intype=cpd.accs[rn.list[[in.type]], ] 25 | in.idx=in.ids %in% accs.intype$ACCESSION_NUMBER 26 | if(sum(in.idx)<1) {#stop("None of the compound ids mapped to the specified type!") 27 | message("Note: ", "None of the compound ids mapped to the specified type!") 28 | } else{ 29 | rev.idx=accs.intype$ACCESSION_NUMBER %in% in.ids 30 | if(sum(rev.idx)>sum(in.idx)) message("Note: ", "multiple compounds may map to a input ID, only the first one kept!") 31 | in.cpd=accs.intype$COMPOUND_ID[match(in.ids[in.idx], accs.intype$ACCESSION_NUMBER)] 32 | 33 | 34 | accs.kg=cpd.accs[unlist(rn.list[kg.idx]), ] 35 | map.idx=in.cpd %in% accs.kg$COMPOUND_ID 36 | if(sum(map.idx)<1) message("Note: ", "None of the compound ids mapped to specified output ID type(s)!") 37 | in.cpd=in.cpd[map.idx] 38 | cpd.idx=match(in.cpd, accs.kg$COMPOUND_ID) 39 | kg.accs[in.idx][map.idx]=as.character(accs.kg$ACCESSION_NUMBER[cpd.idx]) 40 | } 41 | kg.accs=cbind(in.ids, kg.accs) 42 | colnames(kg.accs)=cnames 43 | return(kg.accs) 44 | } 45 | 46 | -------------------------------------------------------------------------------- /R/cpdkegg2name.R: -------------------------------------------------------------------------------- 1 | cpdkegg2name <- 2 | function(in.ids, in.type=c("KEGG", "KEGG COMPOUND accession")[1]){ 3 | #kegg accession to cpd name 4 | cnames=c(in.type, "NAME") 5 | in.type=tolower(in.type) 6 | # out.type=tolower(out.type) 7 | data(rn.list) 8 | cpd.type=names(rn.list)=tolower(names(rn.list)) 9 | cpd.type=c(cpd.type,"kegg") 10 | kg.type=cpd.type[grep("kegg", cpd.type)] 11 | if(!in.type %in% kg.type) stop("Incorrect type!") 12 | in.type=gsub(" accession", "", in.type) 13 | 14 | data(cpd.names) 15 | if(in.type=="kegg") sel.rn=1:nrow(cpd.names) 16 | else sel.rn=cpd.names$SOURCE==in.type 17 | sel.cn=c("ACCESSION_NUMBER", "NAME") 18 | cpd.names=as.matrix(cpd.names[sel.rn, sel.cn]) 19 | rownames(cpd.names)=NULL 20 | data(kegg.met) 21 | cpd.names=as.data.frame(rbind(kegg.met[,1:2],cpd.names)) 22 | colnames(cpd.names)=sel.cn #@ 23 | 24 | # in.ids=unique(in.ids) 25 | len.id=length(in.ids) 26 | out.names=in.ids 27 | # out.names=rep(NA, len.id) 28 | 29 | in.idx=in.ids %in% cpd.names$ACCESSION_NUMBER 30 | if(sum(in.idx)<1) { 31 | message("Note: ", "None of the compound ids mapped to the specified type!") 32 | } else{ 33 | out.names[in.idx]=as.character(cpd.names$NAME[match(in.ids[in.idx], cpd.names$ACCESSION_NUMBER)]) 34 | } 35 | out.names=cbind(in.ids, out.names) 36 | colnames(out.names)=cnames 37 | return(out.names) 38 | } 39 | 40 | -------------------------------------------------------------------------------- /R/cpdname2kegg.R: -------------------------------------------------------------------------------- 1 | cpdname2kegg <- 2 | function(in.ids){ 3 | 4 | in.type="name" 5 | # in.ids=unique(in.ids) 6 | len.id=length(in.ids) 7 | kg.accs=rep(NA, len.id) 8 | 9 | data(cpd.names) 10 | in.idx=in.ids %in% cpd.names$NAME 11 | if(sum(in.idx)<1){ 12 | message("Note: ", "None of the compound ids mapped to the specified type!") 13 | } else{ 14 | kg.accs[in.idx]=as.character(cpd.names$ACCESSION_NUMBER[match(in.ids[in.idx], cpd.names$NAME)]) 15 | } 16 | kg.accs=cbind(in.ids, kg.accs) 17 | colnames(kg.accs)=c(in.type, "KEGG accession") 18 | return(kg.accs) 19 | } 20 | 21 | -------------------------------------------------------------------------------- /R/download.kegg.R: -------------------------------------------------------------------------------- 1 | download.kegg <- 2 | function (pathway.id = "00010", species = "hsa", kegg.dir = ".", file.type=c("xml", "png")) 3 | { 4 | npath=length(pathway.id) 5 | if(species!="ko") species=kegg.species.code(species, na.rm=T) 6 | nspec=length(species) 7 | 8 | if(npath!=1 | nspec!=1) { 9 | species=rep(species, npath) 10 | pathway.id=rep(pathway.id, each=nspec) 11 | } 12 | pathway.id <- paste(species, pathway.id, sep = "") 13 | uidx=!duplicated(pathway.id) 14 | pathway.id=pathway.id[uidx] 15 | species=species[uidx] 16 | npath=length(pathway.id) 17 | 18 | xml.fnames=paste(pathway.id, ".xml", sep="") 19 | png.fnames=paste(pathway.id, ".png", sep="") 20 | ## xml.fmt="http://www.genome.jp/kegg-bin/download?entry=%s&format=kgml" 21 | ## png.fmt="http://www.genome.jp/kegg/pathway/%s/%s" 22 | xml.fmt="https://rest.kegg.jp/get/%s/kgml" 23 | png.fmt="https://rest.kegg.jp/get/%s/image" 24 | all.status=rep("succeed", npath) 25 | names(all.status)=pathway.id 26 | warn.fmt.xml="Download of %s xml file failed!\nThis pathway may not exist!" 27 | warn.fmt.png="Download of %s png file failed!\nThis pathway may not exist!" 28 | 29 | 30 | if("xml" %in% file.type){ 31 | for (i in 1:npath) { 32 | msg=sprintf("Downloading xml files for %s, %d/%d pathways..", pathway.id[i], i, length(pathway.id)) 33 | message("Info: ", msg) 34 | xml.url=sprintf(xml.fmt, pathway.id[i]) 35 | xml.target=sprintf("%s/%s", kegg.dir, xml.fnames[i]) 36 | xml.status=try(download.file(xml.url, xml.target, quiet=T), silent=T) 37 | 38 | if(xml.status!=0) all.status[i]="failed" 39 | if(class(xml.status)[1]=="try-error"){ 40 | warn.msg=sprintf(warn.fmt.xml, pathway.id[i]) 41 | message("Warning: ", warn.msg) 42 | unlink(xml.target) 43 | } 44 | } 45 | } 46 | 47 | if("png" %in% file.type){ 48 | for (i in 1:npath) { 49 | msg=sprintf("Downloading png files for %s, %d/%d pathways..", pathway.id[i], i, length(pathway.id)) 50 | message("Info: ", msg) 51 | png.url=sprintf(png.fmt, pathway.id[i])#species[i], png.fnames[i]) 52 | png.target=sprintf("%s/%s", kegg.dir, png.fnames[i]) 53 | png.status=suppressWarnings(try(download.file(png.url, png.target, quiet=T, mode="wb"), silent=T)) 54 | 55 | if(png.status!=0) all.status[i]="failed" 56 | if(class(png.status)[1]=="try-error"){ 57 | warn.msg=sprintf(warn.fmt.png, pathway.id[i]) 58 | message("Warning: ", warn.msg) 59 | unlink(png.target) 60 | } 61 | } 62 | } 63 | 64 | return(all.status) 65 | } 66 | 67 | -------------------------------------------------------------------------------- /R/eg2id.R: -------------------------------------------------------------------------------- 1 | eg2id <-function(eg, category=gene.idtype.list[1:2], org="Hs", pkg.name=NULL, ...){ 2 | category=tolower(category) 3 | ei=category %in% c("entrez", "eg", "entrezid") 4 | if(all(ei)) stop("output ID or category cannot all be Entrez Gene ID!") 5 | if(any(ei)) category=category[!ei] 6 | geneannot.map(in.ids=eg, in.type="entrez", out.type=category, org=org, pkg.name=pkg.name, ...)#, unique.map=TRUE, na.rm=TRUE) 7 | } 8 | -------------------------------------------------------------------------------- /R/ellipses.R: -------------------------------------------------------------------------------- 1 | ellipses <- 2 | function(x0,y0,w,h,n=100, cols, ...){ 3 | np=length(x0) 4 | x=seq(-w,w,length=n/2) 5 | y=(1-(x/w)^2)^.5*h 6 | x=c(x,rev(x)) 7 | y=c(y,-rev(y)) 8 | x0=matrix(rep(x0, each=n), nrow=n) 9 | y0=matrix(rep(y0, each=n), nrow=n) 10 | x <- x0 + x 11 | y <- y0 + y 12 | for(i in 1:np) polygon(x[,i], y[,i], col=cols[i], ...) 13 | return(list(x,y)) 14 | } 15 | 16 | -------------------------------------------------------------------------------- /R/geneannot.map.R: -------------------------------------------------------------------------------- 1 | geneannot.map <- function(in.ids, in.type, out.type, org="Hs", pkg.name=NULL, unique.map=TRUE, na.rm=TRUE, keep.order=TRUE){ 2 | if(is.null(pkg.name)) {#pkg.name=paste("org", org, "eg.db", sep=".") 3 | data(bods) 4 | ridx=grep(tolower(paste0(org, "[.]")), tolower(bods[,1])) 5 | if(length(ridx)==0) { 6 | ridx=grep(tolower(org), tolower(bods[,2:3])) %% nrow(bods) 7 | if(length(ridx)==0) stop("Wrong org value!") 8 | if(any(ridx==0)) ridx[ridx==0]=nrow(bods) 9 | } 10 | pkg.name=bods[ridx,1] 11 | } 12 | 13 | pkg.on=try(requireNamespace(pkg.name),silent = TRUE) 14 | if(!pkg.on) { 15 | # source("http://bioconductor.org/biocLite.R") 16 | # biocLite(pkg.name, suppressUpdates =TRUE) 17 | BiocManager::install(pkg.name, update=FALSE) 18 | pkg.on=try(requireNamespace(pkg.name),silent = TRUE) 19 | if(!pkg.on) stop(paste("Fail to install/load gene annotation package ", pkg.name, "!", sep="")) 20 | } 21 | 22 | 23 | db.obj <- eval(parse(text=paste0(pkg.name, "::", pkg.name))) 24 | id.types <- columns(db.obj) #columns(eval(as.name(pkg.name))) 25 | 26 | in.type=toupper(in.type) 27 | out.type=toupper(out.type) 28 | eii=in.type==toupper("entrez") | in.type==toupper("eg") 29 | if(any(eii)) in.type[eii]="ENTREZID" 30 | eio=out.type==toupper("entrez") | out.type==toupper("eg") 31 | if(any(eio)) out.type[eio]="ENTREZID" 32 | if(all(in.type==out.type)) stop("in.type and out.type are the same, no need to map!") 33 | 34 | nin=length(in.type) 35 | if(nin!=1) stop("in.type must be of length 1!") 36 | out.type=out.type[!out.type %in% in.type] 37 | nout=length(out.type) 38 | 39 | msg <- paste0("must from: ", paste(id.types, collapse=", "), "!") 40 | if (! in.type %in% id.types) stop("'in.type' ", msg) 41 | if (! all(out.type %in% id.types)) stop("'out.type' ", msg) 42 | 43 | in.ids0=in.ids 44 | in.ids <- unique(as.character(in.ids))#unique necessary for select()# if(unique.map) 45 | out.ids=character(length(in.ids)) 46 | res <- try(suppressWarnings(AnnotationDbi::select(db.obj, 47 | keys = in.ids, 48 | keytype = in.type, 49 | columns=c(in.type, out.type)))) 50 | if(class(res)[1]=="data.frame"){ 51 | 52 | res <- res[, c(in.type, out.type)] 53 | 54 | if(nout==1) na.idx <- is.na(res[,2]) 55 | else na.idx <- apply(res[,-1],1,function(x) all(is.na(x))) 56 | if (sum(na.idx)>0) { 57 | n.na <- length(unique(res[na.idx, 1])) 58 | if (n.na>0) { 59 | print(paste("Note:", n.na, "of", length(in.ids), "unique input IDs unmapped.")) 60 | } 61 | if (na.rm) res <- res[!na.idx, ] 62 | } 63 | 64 | cns=colnames(res) 65 | if(unique.map){ 66 | if(length(out.type)==1) umaps=tapply(res[,out.type], res[,in.type], paste, sep="", collapse="; ") 67 | else umaps=apply(res[,out.type], 2, function(x) tapply(x, res[,in.type], function(y) paste(unique(y), sep="", collapse="; "))) 68 | umaps=cbind(umaps) 69 | res.uniq=cbind(rownames(umaps), umaps) 70 | res=res.uniq 71 | colnames(res)=cns 72 | } 73 | 74 | res=as.matrix(res) 75 | if(!keep.order){ 76 | rownames(res)=NULL 77 | return(res) 78 | } else { 79 | res1=matrix(NA, ncol=length(cns), nrow=length(in.ids0)) 80 | res1[,1]=in.ids0 81 | rns=match(in.ids0, res[,1]) 82 | res1[,-1]=res[rns,-1] 83 | colnames(res1)=cns 84 | return(res1) 85 | } 86 | } else{ 87 | res=cbind(in.ids,out.ids) 88 | colnames(res)=c(in.type,out.type) 89 | return(res) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /R/id2eg.R: -------------------------------------------------------------------------------- 1 | id2eg <-function(ids, category=gene.idtype.list[1], org="Hs", pkg.name=NULL, ...){ 2 | category=tolower(category) 3 | if(category %in% c("entrez", "eg", "entrezid")) stop("input ID or category is already Entrez Gene ID!") 4 | geneannot.map(in.ids=ids, in.type=category, out.type="entrez", org=org, pkg.name=pkg.name, ...) 5 | } 6 | -------------------------------------------------------------------------------- /R/kegg.legend.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables("KEGGEdgeSubtype") 2 | 3 | kegg.legend=function (type=c("both", "edge", "node")[1]) { 4 | if(!type %in% c("both", "edge", "node")){ 5 | msg.fmt="type has to be one of: \"%s\", \"%s\" and \"%s\"!" 6 | msg=sprintf(msg.fmt, "both", "edge", "node") 7 | message("Note: ", msg) 8 | return(invisible(0)) 9 | } 10 | # if (!exists("KEGGEdgeSubtype")) { 11 | # data(KEGGEdgeSubtype) 12 | # } 13 | KEGGEdgeSubtype <- KEGGEdgeSubtype[-7,] 14 | levels(KEGGEdgeSubtype$name)[13]="others/unknown" 15 | subtypes <- KEGGEdgeSubtype$name 16 | cols <- as.character(KEGGEdgeSubtype$color) 17 | labels <- as.character(KEGGEdgeSubtype$label) 18 | fontcolors <- as.character(KEGGEdgeSubtype$fontcolor) 19 | arrowheads <- as.character(KEGGEdgeSubtype$arrowhead) 20 | styles <- as.character(KEGGEdgeSubtype$style) 21 | ltytrans <- c(solid = 1, dashed = 2, dotted = 3) 22 | ltys <- ltytrans[styles] 23 | nst=nrow(KEGGEdgeSubtype) 24 | 25 | if(type=="edge"){ 26 | top.margin=2 27 | xlim=c(-0.4,1.8) 28 | ylim=c(0, nst) 29 | main.txt="Edge types" 30 | } else if(type=="node"){ 31 | top.margin=2 32 | xlim=c(-0.4,1.8) 33 | ylim=c(0, nst) 34 | main.txt="Node types" 35 | ntext.x=0.7 36 | xs=1.3 37 | } else { 38 | top.margin=3 39 | xlim=c(-0.2,4.0) 40 | ylim=c(0, nst+1) 41 | main.txt="KEGG diagram legend" 42 | ntext.x=3.1 43 | xs=3.7 44 | } 45 | opar <- par(mar = c(0, 0, top.margin, 0), mgp = c(0, 0, 0), lwd=2) 46 | on.exit(par(opar)) 47 | 48 | plot(1, 1, type = "n", xlim = xlim, ylim = ylim, 49 | axes = FALSE, xlab = "", ylab = "", main = main.txt) 50 | if(type %in% c("both","edge")){ 51 | for (i in 1:nst) { 52 | j=nst-i 53 | text(0.8, j, subtypes[i], pos = 2, cex = 1.2) 54 | segments(1, j, 1.8, j, col = cols[i], lty = ltys[i]) 55 | text(1.4, j+.2, labels[i], col = fontcolors[i]) 56 | if (arrowheads[i] == "normal") { 57 | x=c(1.77,1.77,1.8) 58 | h=0.15/cos(pi/6) 59 | y=c(h,-h,0)+j 60 | polygon(x,y,col=cols[i], border=cols[i], lwd=1) 61 | } 62 | if (arrowheads[i] == "open") { 63 | x=c(1.78,1.78,1.8) 64 | h=0.1/cos(pi/6) 65 | y=c(h,-h,0)+j 66 | polygon(x,y,col=cols[i], border=cols[i], lwd=1) 67 | } 68 | if (arrowheads[i] == "tee") { 69 | lines(c(1.8,1.8), c(.3,-.3)+j, col = cols[i], lwd=3) 70 | } 71 | } 72 | } 73 | 74 | if(type %in% c("both","node")){ 75 | ntypes=c("gene", "group", "compound", "map") 76 | naliases=c("protein/enzyme", "complex", "metabolite/glycan", "pathway") 77 | nshapes=c("rectangle", "rectangle group", "ellipse", "plaintext") 78 | node.types=data.frame(types=ntypes, aliases=naliases, shapes=nshapes, width=rep(46,4), height=rep(17,4)) 79 | 80 | # if(!edges.only){ 81 | ntypes=as.character(node.types$types) 82 | naliases=as.character(node.types$aliases) 83 | nlabels=paste(ntypes, "\n(", naliases, ")", sep="") 84 | nshapes=as.character(node.types$shapes) 85 | nwidth=node.types$width/250 86 | nheight=node.types$height/250 87 | nnt=nrow(node.types) 88 | for (i in 1:nnt) { 89 | j=nst-i*3 90 | text(ntext.x, j, nlabels[i], pos = 2, cex = 1.2) 91 | } 92 | ys=nst-(1:4)*3 93 | rect.x1=rep(xs-nwidth,4) 94 | rect.x2=rep(xs+nwidth,4) 95 | rect.y1=rep(ys[1:2],c(1,3))-nheight[1:2]*c(1,2,0,2)*6 96 | rect.y2=rep(ys[1:2],c(1,3))+nheight[1:2]*c(1,2,2,0)*6 97 | rect(rect.x1,rect.y1,rect.x2,rect.y2) 98 | ellipses(xs,ys[3],nwidth[3],nheight[3]*6, cols=NULL) 99 | text(xs, ys[4], "Pathway name", cex = 1.2) 100 | if(type!="node"){ 101 | text(1.0, nst+1, "Edge Types", cex = 1.2, font=2) 102 | text(3.0, nst+1, "Node Types", cex = 1.2, font=2) 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /R/kegg.species.code.R: -------------------------------------------------------------------------------- 1 | kegg.species.code <- 2 | function(species="hsa", na.rm=FALSE, code.only=TRUE){ 3 | nspec=length(species) 4 | if(!exists("korg")) data(korg, package="pathview") 5 | 6 | ridx=match(species, korg[,1:5]) %% nrow(korg) 7 | nai=is.na(ridx) 8 | if(sum(nai)>0) { 9 | si=try(load(url("https://pathview.uncc.edu/data/korg.1.rda"))) 10 | if(class(si)[1]!="try-error"){ 11 | ridx.1=match(species, korg.1[,1:5]) %% nrow(korg.1) 12 | nai.1=is.na(ridx.1) 13 | if(sum(nai.1)0) { 21 | na.msg=sprintf("Unknown species '%s'!", paste(species[nai], sep="", collapse="', '")) 22 | message("Note: ", na.msg) 23 | } 24 | if(sum(nai)==nspec) { 25 | stop.msg="All species are invalid!" 26 | stop(stop.msg) 27 | } 28 | } 29 | if(any(ridx[!nai]==0)) ridx[!nai & ridx==0]=nrow(korg) 30 | if(na.rm) ridx=ridx[!nai] 31 | if(code.only) coln=3 else coln=c(3,6:10) 32 | species.info=korg[ridx,coln] 33 | return(species.info) 34 | } 35 | 36 | -------------------------------------------------------------------------------- /R/keggview.graph.R: -------------------------------------------------------------------------------- 1 | keggview.graph <-function( 2 | plot.data.gene=NULL, 3 | plot.data.cpd=NULL, 4 | cols.ts.gene=NULL, 5 | cols.ts.cpd=NULL, 6 | node.data, 7 | path.graph, 8 | pathway.name, 9 | out.suffix="pathview", 10 | pdf.size=c(7,7), 11 | 12 | multi.state=TRUE, 13 | same.layer=TRUE, 14 | match.data=TRUE, 15 | rankdir=c("LR","TB")[1], 16 | is.signal=TRUE, 17 | split.group=F, 18 | afactor=1, 19 | 20 | text.width=15, #k 21 | cex=0.5, 22 | map.cpdname=FALSE, #k 23 | cpd.lab.offset=1.0, 24 | 25 | 26 | discrete=list(gene=FALSE, cpd=FALSE), 27 | limit=list(gene=1, cpd=1), 28 | bins=list(gene=10, cpd=10), 29 | both.dirs=list(gene=T, cpd=T), 30 | low = list(gene = "green", cpd = "blue"), 31 | mid = list(gene = "gray", cpd = "gray"), 32 | high = list(gene = "red", cpd = "yellow"), 33 | na.col="transparent", 34 | 35 | new.signature=TRUE, 36 | plot.col.key=TRUE, 37 | key.align="x", 38 | key.pos="topright", 39 | sign.pos="bottomright",#g 40 | ...){ 41 | 42 | gR1=path.graph 43 | 44 | #group nodes mapping and merge 45 | grp.idx=node.data$size>1 46 | if(sum(grp.idx)>0 & !split.group){ 47 | sub2grp=cbind(unlist(node.data$component[grp.idx], use.names=F), rep(names(grp.idx)[grp.idx], node.data$size[grp.idx])) 48 | du.idx=duplicated(sub2grp[,1]) 49 | if(sum(du.idx)>0){ 50 | du.rn=sub2grp[,1] %in% sub2grp[du.idx,1] 51 | message("Warning: reconcile groups sharing member nodes!") 52 | print(sub2grp[du.rn,]) 53 | du.grps=sub2grp[du.idx,] 54 | rn=which(du.idx) 55 | for(r in rn){ 56 | comps=node.data$component[[sub2grp[r,2]]] 57 | comps=comps[comps!=sub2grp[r,1]] 58 | node.data$component[[sub2grp[r,2]]]=comps 59 | node.data$size[sub2grp[r,2]]=node.data$size[sub2grp[r,2]]-1 60 | } 61 | sub2grp=sub2grp[!du.idx,] 62 | } 63 | rownames(sub2grp)=sub2grp[,1] 64 | } else sub2grp=NULL 65 | 66 | if(sum(grp.idx)>0 & !split.group){ 67 | for(gn in names(grp.idx)[grp.idx]){ 68 | gR1=combineKEGGnodes(node.data$component[[gn]], gR1, gn) 69 | } 70 | } else if(split.group){ 71 | gR1=subGraph(nodes(gR1)[node.data$size==1], gR1) 72 | } 73 | nNames=nodes(gR1) 74 | nSizes=node.data$size[nNames] 75 | 76 | #unconnected nodes processing 77 | deg=degree(gR1) 78 | deg=deg$inDegree+deg$outDegree 79 | if(is.signal & sum(deg<1)>0){ 80 | gR2=subKEGGgraph(nNames[deg>0], gR1) 81 | nNames=nNames[deg>0] 82 | nSizes=nSizes[deg>0] 83 | if(!is.null(sub2grp)){ 84 | # sub.idx=!sub2grp[,1] %in% names(deg[deg<1]) 85 | sub.idx=sub2grp[,1] %in% nNames |sub2grp[,2] %in% nNames 86 | } else sub.idx=0 87 | } else { 88 | gR2=gR1 89 | if(!is.null(sub2grp)){ 90 | sub.idx=rep(T, nrow(sub2grp)) 91 | } else sub.idx=0 92 | } 93 | 94 | if(length(nNames)<2){ 95 | msg=sprintf("%s not rendered, 0 or 1 connected nodes!\nTry \"kegg.native=T\" instead!", pathway.name) 96 | message("Note: ", msg) 97 | return(list()) 98 | } 99 | 100 | #give up the KEGG positions, use graphviz layout 101 | 102 | 103 | #general attributes 104 | attrs=list() 105 | attrs$graph$rankdir="LR" 106 | attrs$node <- list(fixedsize=FALSE) 107 | 108 | #node attributes 109 | ntype=node.data$type[nNames] 110 | cpd.idx=ntype=="compound" 111 | map.idx=ntype=="map" 112 | rect.idx=!(cpd.idx|map.idx) 113 | nAttr=list() 114 | nAttr$label=rep('', length(nNames)) 115 | #nAttr$shape=rep('rectangle', length(nNames)) 116 | shapes=node.data$shape[nNames] 117 | if(any(cpd.idx)) shapes[cpd.idx]="ellipse" 118 | if(any(map.idx)) shapes[map.idx]="plaintext" 119 | #if(any(map.idx)) shapes[map.idx]="rectangle" 120 | nAttr$shape=shapes 121 | nAttr$height=.75*17/46*nSizes*afactor 122 | nAttr$width=rep(.75, length(nNames))*afactor 123 | if(any(cpd.idx)){ 124 | nAttr$height[cpd.idx]=nAttr$height[cpd.idx]*1.5 125 | nAttr$width[cpd.idx]=nAttr$width[cpd.idx]*1.5 126 | } 127 | if(any(map.idx)){ 128 | nAttr$height[map.idx]=nAttr$height[map.idx]*1.5 129 | nAttr$width[map.idx]=nAttr$width[map.idx]*2 130 | } 131 | nAttr<- lapply(nAttr, function(x) {names(x) <- nNames 132 | x}) 133 | 134 | na.col=colorpanel2(1, low=na.col, high=na.col) 135 | fillcol=rep(na.col, length(nNames)) 136 | names(fillcol)=nNames 137 | 138 | #edge attributes 139 | subdisplay <- subtypeDisplay(gR2) 140 | if(length(subdisplay)<1) eAttrs=list() else{ 141 | na.rn=apply(subdisplay, 2, function(x) sum(is.na(x))==7) 142 | if(sum(na.rn)>0) subdisplay[,na.rn]=KEGGEdgeSubtype[KEGGEdgeSubtype[,1]=="others",rownames(subdisplay)] 143 | eLabel <- subdisplay["label", ] 144 | eCol <- subdisplay["color", ] 145 | eTextCol <- subdisplay["fontcolor", ] 146 | eLty <- subdisplay["style", ] 147 | eArrowhead <- subdisplay["arrowhead", ] 148 | if (ncol(subdisplay) == 1) { 149 | tmp <- colnames(subdisplay)[1] 150 | names(eLabel) <- names(eCol) <- names(eTextCol) <- tmp 151 | names(eLty) <- names(eArrowhead) <- tmp 152 | } 153 | eAttrs <- list(lty = eLty, col = eCol, textCol = eTextCol, 154 | label = eLabel, arrowhead = eArrowhead) 155 | } 156 | 157 | gR2.layout=gR2 158 | edgeRenderInfo(gR2.layout)=eAttrs 159 | layoutType=ifelse(is.signal, "dot", "neato") 160 | gR2.layout <- layoutGraph(gR2.layout, attrs = attrs, nodeAttrs=nAttr, layoutType=layoutType) 161 | edgeRenderInfo(gR2.layout)=eAttrs 162 | # edgeRenderInfo(gR2.layout)$col=eAttrs$col 163 | nri=nodeRenderInfo(gR2.layout) 164 | loc=list(x=nri$nodeX, y=nri$nodeY) 165 | if(sum(rect.idx)>0){ 166 | w.unit=min(nri$lWidth[rect.idx]) 167 | h.unit=min(nri$height[rect.idx]) 168 | } 169 | cni=nSizes>1 170 | if(sum(cni)>0){ 171 | xloc=rep(loc[[1]][cni], nSizes[cni]) 172 | sn.y=unlist(sapply(nSizes[cni], function(x) seq(-(x-1)/2, (x-1)/2,1)),use.names =F) 173 | yloc=rep(loc[[2]][cni], nSizes[cni])+h.unit*sn.y 174 | } else xloc=yloc=NULL 175 | xloc.nd=c(xloc,loc[[1]][nSizes==1 & rect.idx]) 176 | yloc.nd=c(yloc,loc[[2]][nSizes==1 & rect.idx]) 177 | #labs=gsub("-", "- ", node.data$labels) 178 | #labs=sapply(labs,wordwrap,len=text.width) 179 | #labs=gsub("- ", "-", labs) 180 | labs=node.data$labels 181 | labs[nNames[map.idx]]=sapply(labs[nNames[map.idx]],wordwrap,width=text.width, break.word=F) 182 | labs[nNames[cpd.idx]]=sapply(labs[nNames[cpd.idx]],wordwrap,width=text.width, break.word=T) 183 | 184 | 185 | cols.ts.gene=cbind(cols.ts.gene) 186 | cols.ts.cpd=cbind(cols.ts.cpd) 187 | nc.gene=max(ncol(cols.ts.gene),0) 188 | nc.cpd=max(ncol(cols.ts.cpd),0)#@ 189 | nplots=max(nc.gene,nc.cpd) 190 | pn.suffix=colnames(cols.ts.gene) 191 | if(length(pn.suffix)nc.cpd) cols.ts.cpd= cols.ts.cpd[, rep(1:nc.cpd, nplots)[1:nplots]] 199 | # if(nc.genenc.cpd & !is.null(cols.ts.cpd)){ 202 | na.mat=matrix(na.col, ncol=nplots-nc.cpd, nrow=nrow(cols.ts.cpd)) 203 | cols.ts.cpd= cbind(cols.ts.cpd, na.mat) 204 | } 205 | if(nc.gene0){ 231 | cn.col=rep(NA, sum(sub.idx)) 232 | cn.col=fillcol[sci.node] 233 | names(cn.col)=sub2grp[sub.idx,1] 234 | rect.col=c(cn.col,fillcol[nSizes==1 & rect.idx]) 235 | rect.col[rect.col==na.col]=NA 236 | rect.col=matrix(rep(rect.col,nplots), ncol=nplots) 237 | } 238 | if(sum(cpd.idx)>0){ 239 | ell.col=fillcol[cpd.idx] 240 | ell.col[ell.col==na.col]=NA 241 | ell.col=matrix(rep(ell.col,nplots), ncol=nplots) 242 | w.e=min(nri$lWidth[cpd.idx]) 243 | h.e=min(nri$height[cpd.idx]) 244 | xloc.e=loc[[1]][cpd.idx] 245 | yloc.e=loc[[2]][cpd.idx] 246 | } 247 | 248 | 249 | # fillcol=matrix(rep(fillcol,nplots), ncol=nplots) 250 | fillcol=matrix(na.col, nrow=length(nNames), ncol=nplots) 251 | rownames(fillcol)=nNames 252 | 253 | if(!is.null(cols.ts.gene) & sum(rect.idx)>0){ 254 | fillcol[nidx.gene,1:nc.gene]=cols.ts.gene[cidx.gene,] #need to be added in pathview.graph 255 | cn.col=matrix(NA, nrow=sum(sub.idx), ncol=nc.gene) 256 | cn.col[]=cols.ts.gene[sci.gene,] 257 | rownames(cn.col)=sub2grp[sub.idx,1] 258 | if(nc.gene>1) rect.col=rbind(cn.col,fillcol[nSizes==1 & rect.idx,1:nc.gene]) 259 | else rect.col=c(cn.col,fillcol[nSizes==1 & rect.idx,1]) 260 | rect.col[rect.col==na.col]=NA 261 | } 262 | 263 | if(!is.null(cols.ts.cpd) & sum(cpd.idx)>0){ 264 | fillcol[nidx.cpd,1:nc.cpd]=cols.ts.cpd[cidx.cpd,] 265 | if(sum(cpd.idx)>0){ 266 | ell.col=fillcol[cpd.idx,1:nc.cpd] 267 | ell.col[ell.col==na.col]=NA 268 | } 269 | } 270 | 271 | multi.state=multi.state & nplots>1 272 | if(multi.state) { 273 | nplots=1 274 | pn.suffix=paste(out.suffix, "multi", sep=".") 275 | if(sum(rect.idx>0)) rect.col.plot=rect.col 276 | if(sum(cpd.idx)>0) ell.col.plot=ell.col 277 | } 278 | 279 | for(np in 1:nplots){ 280 | gfile=paste(pathway.name, pn.suffix[np],"pdf", sep=".") 281 | # gfile=paste(pathway.name, pn.suffix[1],"pdf", sep=".") 282 | out.msg=sprintf(out.fmt, gfile) 283 | message("Info: ", out.msg) 284 | 285 | #KEGG legend type 286 | ntypes=length(unique(node.data$type[nNames])) 287 | etypes=nrow(unique(t(subdisplay))) 288 | if(!same.layer) kl.type="both" 289 | 290 | else{ 291 | if(ntypes<3 & sum(cpd.idx)<3) kl.type="edge" 292 | else if(etypes<3) kl.type="node" 293 | else { 294 | kl.type="both" 295 | same.layer=FALSE 296 | } 297 | } 298 | 299 | pdf.width=ifelse(same.layer,1.5,1)* pdf.size[1] 300 | pdf(gfile, width=pdf.width,height=pdf.size[2]) 301 | op <- par(no.readonly = TRUE) # save default, for resetting... 302 | if(same.layer) nf <- layout(cbind(1,2), c(2,1))#, TRUE) 303 | rg=renderGraph(gR2.layout) 304 | gri=graphRenderInfo(rg) 305 | par(mai=gri$mai, usr=gri$usr) 306 | 307 | #text(loc, label = labs[nNames], cex = cex) 308 | if(sum(rect.idx)>0){ 309 | if(!multi.state) rect.col.plot=cbind(rect.col)[,np] 310 | # rect(xloc.nd-w.unit,yloc.nd-h.unit/2, xloc.nd+w.unit,yloc.nd+h.unit/2, col=rect.col) 311 | rect.out=sliced.shapes(xloc.nd,yloc.nd, w.unit, h.unit/2, cols=rect.col.plot, shape="rectangle") 312 | } 313 | if(sum(cpd.idx)>0) { 314 | if(!multi.state) ell.col.plot=cbind(ell.col)[,np] 315 | # ellipses(xloc.e, yloc.e, w.e, h.e/2, cols=ell.col) 316 | ell.out=sliced.shapes(xloc.e, yloc.e, w.e, h.e/2, cols=ell.col.plot, shape="ellipse") 317 | } 318 | if(sum(cni)>0){ 319 | if(sum(sub.idx)>0) text(xloc, yloc, label = labs[sub2grp[sub.idx,1]], cex = cex) 320 | } 321 | #text(loc, label = labs[nNames], cex = cex) 322 | if(sum(!cpd.idx)>0) text(loc[[1]][!cpd.idx], loc[[2]][!cpd.idx], label = labs[nNames[!cpd.idx]], cex = cex) 323 | if(sum(cpd.idx)>0) { 324 | if(map.cpdname & !is.null(cols.ts.cpd)) yloc.et=yloc.e+h.e*cpd.lab.offset else yloc.et=yloc.e 325 | text(xloc.e, yloc.et, label = labs[nNames[cpd.idx]], cex = cex) 326 | } 327 | 328 | 329 | pv.pars=list() 330 | pv.pars$gsizes=c(gri$bbox[2,1],gri$bbox[2,2]) 331 | if(sum(rect.idx)>0){ 332 | pv.pars$nsizes=c(w.unit,h.unit) 333 | } else pv.pars$nsizes=c(w.e,h.e) 334 | pv.pars$op=op 335 | pv.pars$key.cex=cex*1.5 336 | pv.pars$key.lwd=1 337 | pv.pars$sign.cex=1.2*cex 338 | 339 | off.sets=c(x=0,y=0) 340 | align="n" 341 | 342 | ucol.gene=unique(as.vector(cols.ts.gene)) 343 | na.col.gene=ucol.gene %in% c(na.col, NA) 344 | if(plot.col.key & !is.null(cols.ts.gene) & !all(na.col.gene)) { 345 | off.sets=col.key(limit=limit$gene, bins=bins$gene, both.dirs=both.dirs$gene, discrete=discrete$gene, graph.size=pv.pars$gsizes, 346 | node.size=pv.pars$nsizes, key.pos=key.pos, cex=pv.pars$key.cex, lwd=pv.pars$key.lwd, low=low$gene, mid=mid$gene, high=high$gene, align="n") 347 | align=key.align 348 | 349 | } 350 | 351 | ucol.cpd=unique(as.vector(cols.ts.cpd)) 352 | na.col.cpd=ucol.cpd %in% c(na.col, NA) 353 | if(plot.col.key & !is.null(cols.ts.cpd) & !all(na.col.cpd)) { 354 | off.sets=col.key(limit=limit$cpd, bins=bins$cpd, both.dirs=both.dirs$cpd, discrete=discrete$cpd, graph.size=pv.pars$gsizes, node.size=pv.pars$nsizes, key.pos=key.pos, off.sets=off.sets, cex=pv.pars$key.cex, lwd=pv.pars$key.lwd, low=low$cpd, mid=mid$cpd, high=high$cpd, align=align) 355 | } 356 | 357 | if(new.signature) pathview.stamp(position=sign.pos, graph.sizes=pv.pars$gsizes, on.kegg=F, cex = pv.pars$sign.cex) 358 | # kegg.legend(edges.only=same.layer) 359 | kegg.legend(type=kl.type) 360 | par(pv.pars$op) 361 | dev.off() 362 | } 363 | 364 | return(invisible(pv.pars)) 365 | } 366 | 367 | -------------------------------------------------------------------------------- /R/keggview.native.R: -------------------------------------------------------------------------------- 1 | keggview.native <- 2 | function( 3 | plot.data.gene=NULL, 4 | plot.data.cpd=NULL, 5 | cols.ts.gene=NULL, 6 | cols.ts.cpd=NULL, 7 | node.data, 8 | pathway.name, 9 | out.suffix="pathview", 10 | kegg.dir=".", 11 | 12 | multi.state=TRUE, 13 | match.data=TRUE, 14 | same.layer=TRUE, # 15 | res=300, # 16 | cex = 0.25,# 17 | 18 | discrete=list(gene=FALSE, cpd=FALSE), 19 | limit=list(gene=1, cpd=1), 20 | bins=list(gene=10, cpd=10), 21 | both.dirs=list(gene=T, cpd=T), 22 | low = list(gene = "green", cpd = "blue"), 23 | mid = list(gene = "gray", cpd = "gray"), 24 | high = list(gene = "red", cpd = "yellow"), 25 | na.col="transparent", 26 | # na.col="white", 27 | 28 | new.signature=TRUE, 29 | plot.col.key=TRUE, 30 | key.align="x", 31 | key.pos="topright", 32 | # sign.pos="bottomright",#g 33 | ...){ 34 | 35 | #read image 36 | img <- readPNG(paste(kegg.dir, "/", pathway.name, ".png", 37 | sep = "")) 38 | width <- ncol(img) 39 | height <- nrow(img) 40 | 41 | cols.ts.gene=cbind(cols.ts.gene) 42 | cols.ts.cpd=cbind(cols.ts.cpd) 43 | nc.gene=max(ncol(cols.ts.gene),0) 44 | nc.cpd=max(ncol(cols.ts.cpd),0)#@ 45 | nplots=max(nc.gene,nc.cpd) 46 | pn.suffix=colnames(cols.ts.gene) 47 | if(length(pn.suffix)nc.cpd) cols.ts.cpd= cols.ts.cpd[, rep(1:nc.cpd, nplots)[1:nplots]] 56 | # if(nc.genenc.cpd & !is.null(cols.ts.cpd)){ 59 | na.mat=matrix(na.col, ncol=nplots-nc.cpd, nrow=nrow(cols.ts.cpd)) 60 | cols.ts.cpd= cbind(cols.ts.cpd, na.mat) 61 | } 62 | if(nc.gene1 76 | if(multi.state) { 77 | nplots=1 78 | pn.suffix=paste(out.suffix, "multi", sep=".") 79 | if(nc.gene>0) cols.gene.plot=cols.ts.gene 80 | if(nc.cpd>0) cols.cpd.plot=cols.ts.cpd 81 | } 82 | 83 | for(np in 1:nplots){ 84 | #plot setup 85 | img.file =paste(pathway.name,pn.suffix[np],"png", sep=".") 86 | out.msg=sprintf(out.fmt, img.file) 87 | message("Info: ", out.msg) 88 | png(img.file, width = width, height = height, res=res) 89 | 90 | op=par(mar = c(0, 0, 0, 0)) 91 | plot(c(0, width), c(0, height), type = "n", xlab = "", ylab = "",xaxs = "i",yaxs = "i") 92 | if(new.signature) img[height-4:25, 17:137, 1:3]=1 93 | if(same.layer!=T) rasterImage(img, 0, 0, width, height, interpolate = F) 94 | 95 | if(!is.null(cols.ts.gene) & nc.gene>=np){ 96 | if(!multi.state) cols.gene.plot=cols.ts.gene[,np] 97 | if(same.layer!=T){ 98 | render.kegg.node(plot.data.gene, cols.gene.plot, img, same.layer=same.layer, type="gene", cex=cex) 99 | } else{ 100 | img=render.kegg.node(plot.data.gene, cols.gene.plot, img, same.layer=same.layer, type="gene") 101 | } 102 | } 103 | 104 | if(!is.null(cols.ts.cpd) & nc.cpd>=np){ 105 | if(!multi.state) cols.cpd.plot=cols.ts.cpd[,np] 106 | if(same.layer!=T){ 107 | render.kegg.node(plot.data.cpd, cols.cpd.plot, img, same.layer=same.layer, type="compound", cex=cex) 108 | } else{ 109 | img=render.kegg.node(plot.data.cpd, cols.cpd.plot, img, same.layer=same.layer, type="compound") 110 | } 111 | } 112 | 113 | if(same.layer==T) rasterImage(img, 0, 0, width, height, interpolate = F) 114 | 115 | pv.pars=list() 116 | pv.pars$gsizes=c(width=width, height=height) 117 | pv.pars$nsizes=c(46,17) 118 | pv.pars$op=op 119 | pv.pars$key.cex=2.*72/res 120 | pv.pars$key.lwd=1.2*72/res 121 | pv.pars$sign.cex=cex 122 | off.sets=c(x=0,y=0) 123 | align="n" 124 | 125 | # na.col=colorpanel2(1, low=na.col, high=na.col) 126 | ucol.gene=unique(as.vector(cols.ts.gene)) 127 | na.col.gene=ucol.gene %in% c(na.col, NA) 128 | 129 | if(plot.col.key & !is.null(cols.ts.gene) & !all(na.col.gene)) { 130 | off.sets=col.key(limit=limit$gene, bins=bins$gene, both.dirs=both.dirs$gene, discrete=discrete$gene, graph.size=pv.pars$gsizes, 131 | node.size=pv.pars$nsizes, key.pos=key.pos, cex=pv.pars$key.cex, lwd=pv.pars$key.lwd, low=low$gene, mid=mid$gene, high=high$gene, align="n") 132 | align=key.align 133 | 134 | } 135 | 136 | ucol.cpd=unique(as.vector(cols.ts.cpd)) 137 | na.col.cpd=ucol.cpd %in% c(na.col, NA) 138 | if(plot.col.key & !is.null(cols.ts.cpd) & !all(na.col.cpd)) { 139 | off.sets=col.key(limit=limit$cpd, bins=bins$cpd, both.dirs=both.dirs$cpd, discrete=discrete$cpd, graph.size=pv.pars$gsizes, node.size=pv.pars$nsizes, key.pos=key.pos, off.sets=off.sets, cex=pv.pars$key.cex, lwd=pv.pars$key.lwd, low=low$cpd, mid=mid$cpd, high=high$cpd, align=align) 140 | } 141 | 142 | if(new.signature) pathview.stamp(x=17, y=20, on.kegg=T, cex = pv.pars$sign.cex) 143 | par(pv.pars$op) 144 | dev.off() 145 | } 146 | 147 | return(invisible(pv.pars)) 148 | } 149 | 150 | -------------------------------------------------------------------------------- /R/max.abs.R: -------------------------------------------------------------------------------- 1 | max.abs <- 2 | function(x, na.rm=TRUE){ 3 | if(na.rm) x=x[!is.na(x)] 4 | midx=which.max(abs(x)) 5 | x[midx] 6 | } 7 | 8 | -------------------------------------------------------------------------------- /R/mol.sum.R: -------------------------------------------------------------------------------- 1 | mol.sum <- 2 | function(mol.data, id.map, gene.annotpkg="org.Hs.eg.db", sum.method=c("sum","mean", "median", "max", "max.abs", "random")[1]){ 3 | if(is.character(mol.data)){ 4 | gd.names=mol.data 5 | mol.data=rep(1, length(mol.data)) 6 | names(mol.data)=gd.names 7 | ng=length(mol.data) 8 | } else if(!is.null(mol.data)){ 9 | if(length(dim(mol.data))==2){ 10 | gd.names=rownames(mol.data) 11 | ng=nrow(mol.data) 12 | } else if(is.numeric(mol.data) & is.null(dim(mol.data))){ 13 | gd.names=names(mol.data) 14 | ng=length(mol.data) 15 | } else stop("wrong mol.data format!") 16 | } else stop("NULL mol.data!") 17 | 18 | if(is.character(id.map) & length(id.map)==1){ 19 | id.map=id2eg(gd.names, category=id.map, pkg.name=gene.annotpkg) 20 | } 21 | 22 | sel.idx=id.map[,2]>"" & !is.na(id.map[,2]) 23 | id.map=id.map[sel.idx,,drop=F] 24 | eff.idx=gd.names %in% id.map[,1] 25 | mapped.ids=id.map[match(gd.names[eff.idx], id.map[,1]),2] 26 | if(sum(eff.idx)<1) stop("no ID can be mapped!") 27 | else if(sum(eff.idx)==1){ 28 | mapped.data=cbind(mol.data)[eff.idx,,drop=F] #rbind(cbind(mol.data)[eff.idx,]) 29 | rownames(mapped.data)=mapped.ids[1] 30 | } 31 | else{ 32 | if(sum.method %in% c("sum","mean")){ 33 | sum.method=eval(as.name(sum.method)) 34 | # mapped.data=apply(cbind(cbind(mol.data)[eff.idx,]),2,function(x){ 35 | mapped.data=apply(cbind(mol.data)[eff.idx,,drop=F],2,function(x){ 36 | sum.res=tapply(x, mapped.ids, sum.method, na.rm=T) 37 | return(sum.res) 38 | }) 39 | if(length(unique(mapped.ids))==1){ 40 | if(length(mapped.data)>1){ 41 | mapped.data=rbind(mapped.data) 42 | rownames(mapped.data)=mapped.ids[1] 43 | } 44 | else names(mapped.data)=mapped.ids[1] 45 | } 46 | } else{ 47 | sum.method=eval(as.name(sum.method)) 48 | mol.data=cbind(mol.data)[eff.idx,,drop=F] #cbind(cbind(mol.data)[eff.idx,]) 49 | if(all(mol.data>=0) | all(mol.data<=0)){ 50 | vars=apply(cbind(mol.data), 1, IQR) 51 | } else vars=apply(cbind(mol.data), 1, sum, na.rm=T) 52 | 53 | sel.rn=tapply(1:sum(eff.idx), mapped.ids, function(x){ 54 | # sel.rn=tapply(which(eff.idx), mapped.ids, function(x){ 55 | if(length(x)==1) return(x) 56 | else return(x[which.min(abs(vars[x]-sum.method(vars[x], na.rm=T)))]) 57 | }) 58 | # if(length(sel.rn)>1) mapped.data=cbind(mol.data[sel.rn,]) 59 | # else mapped.data=mol.data[sel.rn,,drop=F] #rbind(mol.data[sel.rn,]) 60 | mapped.data=mol.data[sel.rn,,drop=F] 61 | rownames(mapped.data)=names(sel.rn) 62 | } 63 | } 64 | return(mapped.data) 65 | 66 | } 67 | 68 | -------------------------------------------------------------------------------- /R/node.color.R: -------------------------------------------------------------------------------- 1 | node.color <- 2 | function(plot.data=NULL, discrete=FALSE, limit=1, bins=10, both.dirs=TRUE, low="green", mid="gray", high="red", na.col="transparent", trans.fun=NULL){ 3 | if(is.null(plot.data)) return(NULL) 4 | node.summary=plot.data[,-c(1:8)]#c(1:7) 5 | if(length(dim(node.summary))==2) { 6 | node.summary=as.matrix(node.summary) 7 | } else names(node.summary)=rownames(plot.data) 8 | if(!is.null(trans.fun)) node.summary=trans.fun(node.summary) 9 | if(both.dirs & length(limit)==1){ 10 | limit=c(-abs(limit), abs(limit)) 11 | } else if(length(limit)==1){ 12 | limit=c(0,limit) 13 | } 14 | disc.cond1=all(as.integer(limit)==limit) 15 | disc.cond2=(limit[2]-limit[1]) %% bins==0 16 | if(discrete & disc.cond1 & disc.cond2){ 17 | node.summary[]=as.integer(node.summary) 18 | limit[2]=limit[2]+1 19 | bins=bins+1 20 | } else if(discrete){ 21 | message("Note: ", "limit or bins not proper, data not treated as discrete!") 22 | } 23 | 24 | 25 | node.summary[node.summary > limit[2]] = limit[2] 26 | node.summary[node.summary < limit[1]] = limit[1] 27 | if(both.dirs){ 28 | cols = colorpanel2(bins, low=low, mid=mid, high=high) 29 | } else cols = colorpanel2(bins, low=mid, high=high) 30 | na.col=colorpanel2(1, low=na.col, high=na.col) 31 | data.cuts = seq(from = limit[1], to =limit[2], length=bins+1) 32 | index.ts = cols.ts = node.summary 33 | index.ts[] = cut(node.summary, data.cuts, include.lowest = TRUE, right=F) 34 | cols.ts[]=cols[index.ts] 35 | cols.ts[is.na(cols.ts)]=na.col 36 | return(cols.ts) 37 | } 38 | 39 | -------------------------------------------------------------------------------- /R/node.info.R: -------------------------------------------------------------------------------- 1 | node.info <- 2 | function(object, short.name=TRUE){ 3 | cobj=class(object)[1] 4 | if(cobj=="character"){ 5 | object <-parseKGML2(object) 6 | ndata=nodes(object) 7 | } else if(cobj=="KEGGPathway"){ 8 | ndata=nodes(object) 9 | } else if(cobj=="graphNEL"){ 10 | ndata=getKEGGnodeData(object) 11 | } else { 12 | stop("object should be either a filename, KEGGPathway, or graphNEL!") 13 | } 14 | 15 | nodeNames=sapply(ndata, getName) 16 | nodeType=sapply(ndata, getType) 17 | nodeComp=sapply(ndata, getComponent) 18 | node.size=sapply(nodeComp, length) 19 | 20 | grs1=sapply(ndata, function(x){ 21 | grs=x@graphics 22 | c(labels=grs@name, shape=grs@type) 23 | }) 24 | grs2=sapply(ndata, function(x){ 25 | grs=x@graphics 26 | c(x=grs@x,y=grs@y,width=grs@width,height=grs@height) 27 | }) 28 | grs1=t(grs1) 29 | grs2=t(grs2) 30 | 31 | graphic.data=as.list(cbind(data.frame(grs1, stringsAsFactors=F), data.frame(grs2))) 32 | nd.list=list(kegg.names=nodeNames, type=nodeType, component=nodeComp, size=node.size) 33 | nd.list=c(nd.list, graphic.data) 34 | if(short.name){ 35 | gnames=sapply(strsplit(nd.list$labels, ", "), "[[", 1) 36 | map.idx=nd.list$type=="map" 37 | gnames[map.idx]=nd.list$labels[map.idx] 38 | gnames[is.na(gnames)]="" 39 | gnames=gsub("[.][.][.]", "", gnames) 40 | nd.list$labels=gnames 41 | nd.list$kegg.names=lapply(nd.list$kegg.names, function(x) gsub("^.*:", "", x)) 42 | } 43 | 44 | nn=names(nodeNames) 45 | nd.list= lapply(nd.list, function(x) { 46 | names(x) <- nn 47 | return(x) 48 | }) 49 | return(nd.list) 50 | } 51 | 52 | -------------------------------------------------------------------------------- /R/node.map.R: -------------------------------------------------------------------------------- 1 | node.map <- 2 | function(mol.data=NULL, node.data, node.types=c("gene", "ortholog", "compound")[1], node.sum =c("sum","mean", "median", "max", "max.abs", "random")[1], entrez.gnodes=TRUE){ 3 | type.sel=node.data$type %in% node.types 4 | if(sum(type.sel)<1){ 5 | message("Note: ", "No specified node types in the pathway!") 6 | plot.data=NULL 7 | return(plot.data) 8 | } 9 | node.data=lapply(node.data, "[", type.sel) 10 | n.nodes=length(node.data$kegg.names) 11 | spacials=as.matrix(as.data.frame(node.data[c("type", "x", "y", "width", "height")])) 12 | if(node.types[1]=="gene"){ 13 | kng=node.data$kegg.names[node.data$type=="gene"] 14 | kng.char=gsub("[0-9]", "", unlist(kng)) 15 | if(any(kng.char>"")) entrez.gnodes=FALSE 16 | } 17 | 18 | na.plot.data=function(){ 19 | sapply(1:n.nodes, function(i){ 20 | kns=node.data$kegg.names[[i]] 21 | if(node.types[1]=="gene" & entrez.gnodes) items=as.numeric(kns) 22 | else items=kns 23 | ord=order(items) 24 | items=items[ord] 25 | kns=kns[ord] 26 | return(c(kns[1],"", spacials[i,], NA)) 27 | }) 28 | } 29 | 30 | if(is.null(mol.data)){ 31 | plot.data=na.plot.data() 32 | } else{ 33 | 34 | #map gene data 35 | if(is.character(mol.data)){ 36 | gd.names=mol.data 37 | mol.data=rep(1, length(mol.data)) 38 | names(mol.data)=gd.names 39 | } 40 | mol.data=cbind(mol.data) 41 | 42 | if(is.null(colnames(mol.data))) colnames(mol.data)=paste("ge", 1:ncol(mol.data),sep="") 43 | mapped.mols <- intersect(unlist(node.data$kegg.names), row.names(mol.data)) 44 | if(length(mapped.mols)==0){ 45 | message("Warning: ", paste("None of the genes or compounds mapped to the pathway!", 46 | "Argument gene.idtype or cpd.idtype may be wrong.", sep="\n")) 47 | plot.data=na.plot.data() 48 | } else{ 49 | if(node.types[1]=="gene" & entrez.gnodes) mapped.mols =as.numeric(mapped.mols) 50 | 51 | 52 | plot.data=sapply(1:n.nodes, function(i){ 53 | kns=node.data$kegg.names[[i]] 54 | if(node.types[1]=="gene" & entrez.gnodes) items=as.numeric(kns) 55 | else items=kns 56 | ord=order(items) 57 | items=items[ord] 58 | kns=kns[ord] 59 | hit=items %in% mapped.mols 60 | if(sum(hit)==0) { 61 | return(c(kns[1], "", spacials[i,], rep(NA, ncol(mol.data)))) 62 | } else if(sum(hit)==1) { 63 | edata=mol.data[as.character(items[hit]),] 64 | return(c(kns[hit], kns[hit], spacials[i,], edata)) 65 | } else { 66 | node.sum=eval(as.name(node.sum)) 67 | # edata=apply(cbind(mol.data[as.character(items[hit]),]), 2, node.sum, na.rm=T) 68 | # edata=apply(cbind(mol.data[as.character(items[hit]),]), 2, function(x){ 69 | edata=apply(mol.data[as.character(items[hit]),,drop=F], 2, function(x){ 70 | x=x[!is.na(x)] 71 | if(length(x)<1) return(NA) 72 | else return(node.sum(x, na.rm=F)) 73 | }) 74 | return(c(kns[hit][1], paste(kns[hit],collapse=","), spacials[i,], edata)) 75 | } 76 | }) 77 | } 78 | } 79 | 80 | colnames(plot.data)=names(node.data$kegg.names) 81 | plot.data=as.data.frame(t(plot.data), stringsAsFactors = F) 82 | plot.data$labels=node.data$labels 83 | ncs=ncol(plot.data) 84 | plot.data=plot.data[,c(1,ncs,2:(ncs-1))] 85 | if(is.null(mol.data)) cns="mol.data" else cns=colnames(mol.data) 86 | colnames(plot.data)[c(1,3,9:ncs)]=c("kegg.names","all.mapped",cns)#c(1,8:ncs) 87 | for(ic in (1:ncol(plot.data))[-c(1:4)]) plot.data[,ic]=as.numeric(plot.data[,ic])#-c(1:3) 88 | 89 | return(plot.data) 90 | } 91 | 92 | -------------------------------------------------------------------------------- /R/parseKGML2.R: -------------------------------------------------------------------------------- 1 | parseKGML2<-function (file) 2 | { 3 | doc <- xmlTreeParse(file, getDTD = FALSE) 4 | r <- xmlRoot(doc) 5 | childnames <- sapply(xmlChildren(r), xmlName) 6 | isEntry <- childnames == "entry" 7 | isRelation <- childnames == "relation" 8 | isReaction <- childnames == "reaction" 9 | kegg.pathwayinfo <- parsePathwayInfo(r) 10 | kegg.nodes <- sapply(r[isEntry], parseEntry) 11 | kegg.edges <- sapply(r[isRelation], parseRelation) 12 | kegg.reactions <- sapply(r[isReaction], parseReaction2) 13 | names(kegg.nodes) <- sapply(kegg.nodes, getEntryID) 14 | pathway <- new("KEGGPathway", pathwayInfo = kegg.pathwayinfo, 15 | nodes = kegg.nodes, edges = kegg.edges, reactions = kegg.reactions) 16 | return(pathway) 17 | } 18 | -------------------------------------------------------------------------------- /R/parseKGML2Graph2.R: -------------------------------------------------------------------------------- 1 | parseKGML2Graph2 <-function (file, ...) 2 | { 3 | pathway <- parseKGML2(file) 4 | gR <- KEGGpathway2Graph2(pathway, ...) 5 | return(gR) 6 | } 7 | -------------------------------------------------------------------------------- /R/parseReaction2.R: -------------------------------------------------------------------------------- 1 | parseReaction2 <- 2 | function (reaction) 3 | { 4 | attrs <- xmlAttrs(reaction) 5 | name <- attrs[["name"]] 6 | type <- attrs[["type"]] 7 | children <- xmlChildren(reaction) 8 | childrenNames <- names(children) 9 | substrateIndices <- grep("^substrate$", childrenNames) 10 | productIndices <- grep("^product$", childrenNames) 11 | substrateName <- substrateAltName <- vector("character", 12 | length(substrateIndices)) 13 | productName <- productAltName <- vector("character", length(productIndices)) 14 | for (i in seq(along = substrateIndices)) { 15 | ind <- substrateIndices[i] 16 | substrate <- children[[ind]] 17 | substrateName[i] <- xmlAttrs(substrate)[["id"]] 18 | substrateChildren <- xmlChildren(substrate) 19 | if (length(substrateChildren) > 0) { 20 | substrateAlt <- substrateChildren$alt 21 | substrateAltName[i] <- xmlAttrs(substrateAlt)[["name"]] 22 | } 23 | else { 24 | substrateAlt <- as.character(NA) 25 | substrateAltName[i] <- as.character(NA) 26 | } 27 | } 28 | for (i in seq(along = productIndices)) { 29 | ind <- productIndices[i] 30 | product <- children[[ind]] 31 | productName[i] <- xmlAttrs(product)[["id"]] 32 | productChildren <- xmlChildren(product) 33 | if (length(productChildren) > 0) { 34 | productAlt <- productChildren$alt 35 | productAltName[i] <- xmlAttrs(productAlt)[["name"]] 36 | } 37 | else { 38 | productAlt <- as.character(NA) 39 | productAltName[i] <- as.character(NA) 40 | } 41 | } 42 | new("KEGGReaction", name = name, type = type, substrateName = substrateName, 43 | substrateAltName = substrateAltName, productName = productName, 44 | productAltName = productAltName) 45 | } 46 | 47 | -------------------------------------------------------------------------------- /R/pathview.R: -------------------------------------------------------------------------------- 1 | pathview <- 2 | function( 3 | gene.data=NULL, 4 | cpd.data=NULL, 5 | # xml.file=NULL, 6 | pathway.id, 7 | species = "hsa", 8 | kegg.dir=".", 9 | cpd.idtype="kegg", 10 | gene.idtype="entrez", 11 | gene.annotpkg=NULL, 12 | min.nnodes=3,# 13 | 14 | kegg.native=TRUE, 15 | map.null=TRUE, 16 | expand.node=FALSE, #g 17 | split.group=FALSE, #g 18 | 19 | map.symbol=TRUE, 20 | map.cpdname=TRUE, #g 21 | 22 | node.sum="sum", 23 | discrete=list(gene=FALSE, cpd=FALSE), 24 | limit=list(gene=1, cpd=1), 25 | bins=list(gene=10, cpd=10), 26 | both.dirs=list(gene=T, cpd=T), 27 | trans.fun = list(gene = NULL, cpd = NULL), 28 | low = list(gene = "green", cpd = "blue"), 29 | mid = list(gene = "gray", cpd = "gray"), 30 | high = list(gene = "red", cpd = "yellow"), 31 | na.col="transparent", 32 | 33 | # new.signature=TRUE, 34 | # plot.col.key=TRUE, 35 | # key.align="x", 36 | # key.pos="topright", 37 | # sign.pos="bottomright",#g 38 | ...){ 39 | 40 | #length-2 arguments check 41 | 42 | dtypes=!is.null(gene.data)+!is.null(cpd.data) 43 | cond0=dtypes==1 & is.numeric(limit) & length(limit)>1 44 | if(cond0){ 45 | if(limit[1]!=limit[2] & is.null(names(limit))) 46 | limit=list(gene=limit[1:2], cpd=limit[1:2]) 47 | } 48 | if(is.null(trans.fun)) trans.fun=list(gene = NULL, cpd = NULL) 49 | 50 | arg.len2=c("discrete", "limit","bins", "both.dirs", "trans.fun", "low", "mid", "high") 51 | for(arg in arg.len2){ 52 | obj1=eval(as.name(arg)) 53 | if(length(obj1)==1) obj1=rep(obj1,2) 54 | if(length(obj1)>2) obj1=obj1[1:2] 55 | obj1=as.list(obj1) 56 | ns=names(obj1) 57 | if(length(ns)==0 |!all(c("gene", "cpd") %in% ns)) names(obj1)=c("gene", "cpd") 58 | assign(arg, obj1) 59 | } 60 | 61 | #data.checck 62 | if(is.character(gene.data)){ 63 | gd.names=gene.data 64 | gene.data=rep(1, length(gene.data)) 65 | names(gene.data)=gd.names 66 | both.dirs$gene=FALSE 67 | ng=length(gene.data) 68 | nsamp.g=1 69 | } else if(!is.null(gene.data)){ 70 | if(length(dim(gene.data))==2){ 71 | gd.names=rownames(gene.data) 72 | ng=nrow(gene.data) 73 | nsamp.g=2 74 | } else if(is.numeric(gene.data) & is.null(dim(gene.data))){ 75 | gd.names=names(gene.data) 76 | ng=length(gene.data) 77 | nsamp.g=1 78 | } else stop("wrong gene.data format!") 79 | } else if(is.null(cpd.data)){ 80 | stop("gene.data and cpd.data are both NULL!") 81 | } 82 | gene.idtype=toupper(gene.idtype) 83 | data(bods) 84 | # data(gene.idtype.bods) 85 | if(species!="ko"){ 86 | species.data=kegg.species.code(species, na.rm=T, code.only=FALSE) 87 | } else { 88 | species.data=c(kegg.code="ko", entrez.gnodes="0", kegg.geneid="K01488", ncbi.geneid=NA, ncbi.proteinid=NA, uniprot=NA) 89 | gene.idtype="KEGG" 90 | msg.fmt="Only KEGG ortholog gene ID is supported, make sure it looks like \"%s\"!" 91 | msg=sprintf(msg.fmt, species.data["kegg.geneid"]) 92 | message("Note: ", msg) 93 | } 94 | if(length(dim(species.data))==2) { 95 | message("Note: ", "More than two valide species!") 96 | species.data=species.data[1,] 97 | } 98 | species=species.data["kegg.code"] 99 | entrez.gnodes=species.data["entrez.gnodes"]==1 100 | if(is.na(species.data["ncbi.geneid"])){ 101 | if(!is.na(species.data["kegg.geneid"])){ 102 | msg.fmt="Mapping via KEGG gene ID (not Entrez) is supported for this species,\nit looks like \"%s\"!" 103 | msg=sprintf(msg.fmt, species.data["kegg.geneid"]) 104 | message("Note: ", msg) 105 | } else{ 106 | stop("This species is not annotated in KEGG!") 107 | } 108 | } 109 | if(is.null(gene.annotpkg)) gene.annotpkg=bods[match(species, bods[,3]),1] 110 | if(length(grep("ENTREZ|KEGG|NCBIPROT|UNIPROT", gene.idtype))<1 & !is.null(gene.data)){ 111 | if(is.na(gene.annotpkg)) stop("No proper gene annotation package available!") 112 | if(!gene.idtype %in% gene.idtype.bods[[species]]) stop("Wrong input gene ID type!") 113 | gene.idmap=id2eg(gd.names, category=gene.idtype, pkg.name=gene.annotpkg, unique.map=F) 114 | gene.data=mol.sum(gene.data, gene.idmap) 115 | gene.idtype="ENTREZ" 116 | } 117 | 118 | if(gene.idtype!="KEGG" & !entrez.gnodes & !is.null(gene.data)){ 119 | id.type=gene.idtype 120 | if(id.type=="ENTREZ") id.type="ENTREZID" 121 | kid.map=names(species.data)[-c(1:2)] 122 | kid.types=names(kid.map)=c("KEGG", "ENTREZID", "NCBIPROT", "UNIPROT") 123 | kid.map2=gsub("[.]", "-", kid.map) 124 | kid.map2["UNIPROT"]="up" 125 | if(is.na(kid.map[id.type])) stop("Wrong input gene ID type for the species!") 126 | message("Info: Getting gene ID data from KEGG...") 127 | gene.idmap=keggConv(kid.map2[id.type],species) 128 | message("Info: Done with data retrieval!") 129 | kegg.ids=gsub(paste(species, ":", sep=""), "", names(gene.idmap)) 130 | in.ids=gsub(paste0(kid.map2[id.type],":"), "", gene.idmap) 131 | gene.idmap=cbind(in.ids, kegg.ids) 132 | gene.data=mol.sum(gene.data, gene.idmap) 133 | gene.idtype="KEGG" 134 | } 135 | 136 | 137 | if(is.character(cpd.data)){ 138 | cpdd.names=cpd.data 139 | cpd.data=rep(1, length(cpd.data)) 140 | names(cpd.data)=cpdd.names 141 | both.dirs$cpd=FALSE 142 | ncpd=length(cpd.data) 143 | } else if(!is.null(cpd.data)){ 144 | if(length(dim(cpd.data))==2){ 145 | cpdd.names=rownames(cpd.data) 146 | ncpd=nrow(cpd.data) 147 | } else if(is.numeric(cpd.data) & is.null(dim(cpd.data))){ 148 | cpdd.names=names(cpd.data) 149 | ncpd=length(cpd.data) 150 | } else stop("wrong cpd.data format!") 151 | } 152 | if(length(grep("kegg", cpd.idtype))<1 & !is.null(cpd.data)){ 153 | data(rn.list) 154 | cpd.types=c(names(rn.list),"name") 155 | cpd.types=tolower(cpd.types) 156 | cpd.types=cpd.types[-grep("kegg", cpd.types)] 157 | if(!tolower(cpd.idtype) %in% cpd.types) stop("Wrong input cpd ID type!") 158 | cpd.idmap=cpd2kegg(cpdd.names, in.type=cpd.idtype) 159 | cpd.data=mol.sum(cpd.data, cpd.idmap) 160 | } 161 | 162 | 163 | #parse 164 | warn.fmt="Parsing %s file failed, please check the file!" 165 | 166 | if(length(grep(species, pathway.id))>0) { 167 | pathway.name = pathway.id 168 | pathway.id = gsub(species, "", pathway.id) 169 | } else pathway.name = paste(species, pathway.id, sep = "") 170 | kfiles=list.files(path=kegg.dir, pattern="[.]xml|[.]png") 171 | npath=length(pathway.id) 172 | out.list=list()#vector(mode = "list", length = npath) 173 | 174 | #if(is.null(xml.file) | length(xml.file)!=npath)#custom xml and png file need to have the same names 175 | tfiles.xml=paste(pathway.name, "xml", sep=".") 176 | tfiles.png=paste(pathway.name, "png", sep=".") 177 | if(kegg.native) ttype=c("xml", "png") else ttype="xml" 178 | xml.file <- paste(kegg.dir, "/", tfiles.xml, sep = "") 179 | 180 | for(i in 1:npath){ 181 | ## out.list=lapply(1:npath, function(i){ 182 | if(kegg.native) tfiles=c(tfiles.xml[i],tfiles.png[i]) 183 | else tfiles=tfiles.xml[i] 184 | if(!all(tfiles %in% kfiles)){ 185 | dstatus=download.kegg(pathway.id = pathway.id[i], species = species, kegg.dir=kegg.dir, file.type=ttype) 186 | if(dstatus=="failed") { 187 | warn.fmt="Failed to download KEGG xml/png files, %s skipped!" 188 | warn.msg=sprintf(warn.fmt, pathway.name[i]) 189 | message("Warning: ", warn.msg) 190 | return(invisible(0))#out.list[[i]]=0 191 | } 192 | } 193 | 194 | if(kegg.native){ 195 | node.data=try(node.info(xml.file[i]), silent=T) 196 | if(class(node.data)[1]=="try-error"){ 197 | warn.msg=sprintf(warn.fmt, xml.file[i]) 198 | message("Warning: ", warn.msg) 199 | return(invisible(0)) 200 | } 201 | node.type=c("gene","enzyme", "compound", "ortholog") 202 | sel.idx=node.data$type %in% node.type 203 | nna.idx=!is.na(node.data$x+node.data$y+node.data$width+node.data$height) 204 | sel.idx=sel.idx & nna.idx 205 | if(sum(sel.idx)1){ 225 | plot.data.gene=node.map(gene.data, node.data, node.types=gene.node.type, node.sum=node.sum, entrez.gnodes=entrez.gnodes) 226 | kng=plot.data.gene$kegg.names 227 | kng.char=gsub("[0-9]", "", unlist(kng)) 228 | if(any(kng.char>"")) entrez.gnodes=FALSE 229 | if(map.symbol & species!="ko" & entrez.gnodes) { 230 | if(is.na(gene.annotpkg)) { 231 | warn.fmt="No annotation package for the species %s, gene symbols not mapped!" 232 | warn.msg=sprintf(warn.fmt, species) 233 | message("Warning: ", warn.msg) 234 | } else { 235 | # browser() 236 | plot.data.gene$labels=eg2id(as.character(plot.data.gene$kegg.names), category="SYMBOL", pkg.name=gene.annotpkg)[,2] 237 | mapped.gnodes=rownames(plot.data.gene) 238 | node.data$labels[mapped.gnodes]=plot.data.gene$labels 239 | } 240 | } 241 | cols.ts.gene=node.color(plot.data.gene, limit$gene, bins$gene, both.dirs=both.dirs$gene, trans.fun=trans.fun$gene, discrete=discrete$gene, low=low$gene, mid=mid$gene, high=high$gene, na.col=na.col) 242 | } else plot.data.gene=cols.ts.gene=NULL 243 | if((!is.null(cpd.data) | map.null) & sum(node.data$type=="compound")>1){ 244 | # if(sum(node.data$type=="compound")>1){ 245 | plot.data.cpd=node.map(cpd.data, node.data, node.types="compound", node.sum=node.sum) 246 | if(map.cpdname & !kegg.native) { #@ 247 | plot.data.cpd$labels=cpdkegg2name(plot.data.cpd$labels)[,2] 248 | mapped.cnodes=rownames(plot.data.cpd) 249 | node.data$labels[mapped.cnodes]=plot.data.cpd$labels 250 | } 251 | cols.ts.cpd=node.color(plot.data.cpd, limit$cpd, bins$cpd, both.dirs=both.dirs$cpd, trans.fun=trans.fun$cpd, discrete=discrete$cpd, low=low$cpd, mid=mid$cpd, high=high$cpd, na.col=na.col) 252 | } else plot.data.cpd=cols.ts.cpd=NULL 253 | 254 | if(kegg.native){ 255 | pv.pars= keggview.native(plot.data.gene=plot.data.gene, cols.ts.gene=cols.ts.gene, plot.data.cpd=plot.data.cpd, cols.ts.cpd=cols.ts.cpd, node.data=node.data, pathway.name=pathway.name[i], kegg.dir=kegg.dir, limit=limit, bins=bins, both.dirs=both.dirs,discrete=discrete, low=low, mid=mid, high=high, na.col=na.col, ...) 256 | } else{ 257 | pv.pars= keggview.graph(plot.data.gene=plot.data.gene, cols.ts.gene=cols.ts.gene, plot.data.cpd=plot.data.cpd, cols.ts.cpd=cols.ts.cpd, node.data=node.data, path.graph=gR1, pathway.name=pathway.name[i], map.cpdname=map.cpdname, split.group=split.group, limit=limit, bins=bins, both.dirs=both.dirs, discrete=discrete, low=low, mid=mid, high=high, na.col=na.col, ...) 258 | } 259 | 260 | plot.data.gene=cbind(plot.data.gene, cols.ts.gene) 261 | if(!is.null(plot.data.gene)){ 262 | cnames=colnames(plot.data.gene)[-(1:8)] 263 | nsamp=length(cnames)/2 264 | if(nsamp>1){ 265 | cnames[(nsamp+1):(2*nsamp)]=paste(cnames[(nsamp+1):(2*nsamp)], "col", sep=".") 266 | } else cnames[2]="mol.col" 267 | colnames(plot.data.gene)[-(1:8)]=cnames 268 | } 269 | plot.data.cpd=cbind(plot.data.cpd, cols.ts.cpd) 270 | if(!is.null(plot.data.cpd)){ 271 | cnames=colnames(plot.data.cpd)[-(1:8)] 272 | nsamp=length(cnames)/2 273 | if(nsamp>1){ 274 | cnames[(nsamp+1):(2*nsamp)]=paste(cnames[(nsamp+1):(2*nsamp)], "col", sep=".") 275 | } else cnames[2]="mol.col" 276 | colnames(plot.data.cpd)[-(1:8)]=cnames 277 | } 278 | # return(list(plot.data.gene=plot.data.gene, plot.data.cpd=plot.data.cpd))#out.list[[i]]= 279 | out.list[[i]]=list(plot.data.gene=plot.data.gene, plot.data.cpd=plot.data.cpd) 280 | } 281 | 282 | if(npath==1) out.list=out.list[[1]] else names(out.list)=pathway.name 283 | 284 | return(invisible(out.list)) 285 | } 286 | 287 | -------------------------------------------------------------------------------- /R/pathview.stamp.R: -------------------------------------------------------------------------------- 1 | pathview.stamp <- 2 | function(x=NULL, y=NULL, position="bottomright", graph.sizes, on.kegg=TRUE, cex=1){ 3 | if(on.kegg) labels ="Data on KEGG graph\nRendered by Pathview" 4 | else labels="-Data with KEGG pathway-\n-Rendered by Pathview-" 5 | if(is.null(x)| is.null(y)){ 6 | x=graph.sizes[1]*.80 7 | y=graph.sizes[2]/40 8 | if(length(grep('left',position))==1) x=graph.sizes[1]/40 9 | if(length(grep('top', position))==1) y=graph.sizes[2]-y 10 | } 11 | text(x=x, y=y, labels=labels, adj=0, cex = cex, font=2) 12 | } 13 | 14 | -------------------------------------------------------------------------------- /R/random.R: -------------------------------------------------------------------------------- 1 | random <- 2 | function(x, na.rm=TRUE){ 3 | if(na.rm) x=x[!is.na(x)] 4 | lenx=length(x) 5 | if(lenx==0) return(NA) 6 | ifelse(lenx==1, x, sample(x, 1)) 7 | } 8 | -------------------------------------------------------------------------------- /R/reaction2edge.R: -------------------------------------------------------------------------------- 1 | reaction2edge <- 2 | function(path, gR){ 3 | 4 | # gR=kegg2graph(path,genes=F,expand=F, split.group=F) 5 | ndata=nodes(path) 6 | rdata=(path@reactions) 7 | edata=edges(path) 8 | if(length(ndata)<2 | length(rdata)<1){ 9 | message("Note: ", "Pathway has no reaction, no conversion needed!") 10 | return(list(gR, edata, ndata)) 11 | } 12 | 13 | snames=slotNames(rdata[[1]]) 14 | rdata.tab=sapply(rdata, function(x){ 15 | sapply(snames, function(sn) slot(x, sn))#x@sn not work 16 | }) 17 | rdata.tab=t(rdata.tab) 18 | 19 | snames2=slotNames(ndata[[1]])[c(1,2,3,5)] 20 | ndata.tab=sapply(ndata, function(x){ 21 | sapply(snames2, function(sn) paste(slot(x, sn), collapse=","))#x@sn not work 22 | }) 23 | ndata.tab=t(ndata.tab) 24 | rn.tab=ndata.tab[,"reaction"]!="NA" 25 | rn.tab=ndata.tab[rn.tab,] 26 | 27 | len.ed=length(edata) 28 | if(len.ed>0){ 29 | snames3=slotNames(edata[[1]]) 30 | edata.tab=sapply(edata, function(x){ 31 | idtype=sapply(snames3[1:3], function(sn) slot(x, sn))#x@sn not work 32 | stnames=paste(sapply(x@subtype, function(y) y@name), collapse=",") 33 | stvals=paste(sapply(x@subtype, function(y) y@value), collapse=",") 34 | return(c(idtype, subtype.name=stnames,subtype.value=stvals)) 35 | }) 36 | edata.tab=t(edata.tab) 37 | } 38 | 39 | #new node.data 40 | rn.grps=split(rn.tab[,"entryID"], rn.tab[,"reaction"]) 41 | rn.len=sapply(rn.grps, length) 42 | grp.idx=which(rn.len>1) 43 | # grp nodes only for reactions with >1 genes 44 | eid=max(as.numeric(names(ndata)))+1:length(grp.idx) 45 | eid=as.character(eid) 46 | rnames=names(rn.grps) 47 | ndata.new=list() 48 | if(length(grp.idx)>0){ 49 | for(i in 1:length(grp.idx)){ 50 | j=grp.idx[i] 51 | ndata1=ndata[[rn.grps[[j]][1]]] 52 | gdata1=ndata1@graphics 53 | h1=gdata1@height*rn.len[j] 54 | gdata.new=new("KEGGGraphics",name=NA_character_, fgcolor="#000000", bgcolor="#FFFFFF", type="rectangle", 55 | x=gdata1@x, y=gdata1@y, width=gdata1@width, height=h1) 56 | ndata.new[[i]]<- new("KEGGGroup", component = rn.grps[[j]], entryID = eid[i], 57 | name="undefined", type="group", link = "NA", reaction = rnames[j], 58 | map = "NA", graphics = gdata.new) 59 | } 60 | names(ndata.new)=eid 61 | # gR=graph::addNode(eid, gR) 62 | gR=addNode(eid, gR) 63 | } 64 | ndata.new=c(ndata,ndata.new) 65 | 66 | #create edges for ECrel and maplink types of relationships 67 | slen=sapply(rdata.tab[,3], length) 68 | plen=sapply(rdata.tab[,5], length) 69 | glen=rn.len[match(rdata.tab[,"name"], rnames)] 70 | 71 | from1=to1=from2=to2=NULL 72 | for(i in 1:nrow(rdata.tab)){ 73 | subs=rdata.tab[[i,"substrateName"]] 74 | prods=rdata.tab[[i,"productName"]] 75 | enzs=rn.grps[[rdata.tab[[i,"name"]]]] 76 | rtype=rdata.tab[[i,"type"]] 77 | from1=c(from1, rep(subs, each=glen[i])) 78 | to1=c(to1, rep(enzs, slen[i])) 79 | to2=c(to2, rep(prods, each=glen[i])) 80 | from2=c(from2, rep(enzs, plen[i])) 81 | if(rtype=="reversible"){ 82 | to2=c(to2, rep(subs, each=glen[i])) 83 | from2=c(from2, rep(enzs, slen[i])) 84 | from1=c(from1, rep(prods, each=glen[i])) 85 | to1=c(to1, rep(enzs, plen[i])) 86 | } 87 | } 88 | names(from1)=names(from2)=names(to1)=names(to2)=NULL 89 | di1=duplicated(paste(from1, to1)) 90 | di2=duplicated(paste(from2, to2)) 91 | from1=from1[!di1] 92 | to1=to1[!di1] 93 | from2=from2[!di2] 94 | to2=to2[!di2] 95 | 96 | 97 | #revise edges on the graph 98 | if(len.ed>0){ 99 | eci1=edata.tab[,"type"]=="ECrel" 100 | eci2=edata.tab[,"type"]=="maplink" 101 | eci=eci1|eci2 102 | gene1=edata.tab[eci1,"entry1ID"] 103 | gene2=edata.tab[eci1,"entry2ID"] 104 | mln1=edata.tab[eci2,"entry1ID"] 105 | mln2=edata.tab[eci2,"entry2ID"] 106 | cpd=edata.tab[eci2,"subtype.value"] 107 | if(sum(eci)>0) gR=removeEdge(from=c(gene1,mln1), to=c(gene2,mln2), graph=gR) 108 | if(sum(eci2)>0) gR=addEdge(from=cpd, to=mln2, graph=gR) 109 | } 110 | gR=addEdge(from=from1, to=to1, graph=gR) 111 | #may overlap with maplink edges 112 | gR=addEdge(from=from2, to=to2, graph=gR) 113 | 114 | #revise edge.data 115 | st1=new("KEGGEdgeSubType", name="compound", value="NA") 116 | other.args=list(Class="KEGGEdge",type = "CErel", subtype = list(subtype=st1)) 117 | edata1=mapply(new, entry1ID = from1, entry2ID = to1, MoreArgs=other.args) 118 | other.args=list(Class="KEGGEdge",type = "ECrel", subtype = list(subtype=st1)) 119 | edata2=mapply(new, entry1ID = from2, entry2ID = to2, MoreArgs=other.args) 120 | other.args=list(Class="KEGGEdge",type = "ECrel", subtype = list(subtype=st1))#"maplink" 121 | edata3=NULL 122 | if(len.ed>0){ 123 | if(sum(eci2)>0){ 124 | other.args=list(Class="KEGGEdge",type = "maplink", subtype = list(subtype=st1)) 125 | edata3=mapply(new, entry1ID = cpd, entry2ID = mln2, MoreArgs=other.args) 126 | } 127 | } 128 | edata.new=c(edata1, edata2, edata3) 129 | if(len.ed>0) edata.new=c(edata[!eci],edata.new) 130 | return(list(gR, edata.new, ndata.new)) 131 | } 132 | 133 | -------------------------------------------------------------------------------- /R/render.kegg.node.R: -------------------------------------------------------------------------------- 1 | render.kegg.node <- 2 | function(plot.data, cols.ts, img, same.layer=TRUE, type=c("gene","compound")[1], text.col="black", cex=0.25){ 3 | width=ncol(img) 4 | height=nrow(img) 5 | nn=nrow(plot.data) 6 | pwids=plot.data$width 7 | if(!all(pwids==max(pwids))){ 8 | message("Info: ", "some node width is different from others, and hence adjusted!") 9 | wc=table(pwids) 10 | pwids=plot.data$width=as.numeric(names(wc)[which.max(wc)]) 11 | } 12 | 13 | if(type=="gene"){ 14 | if(same.layer!=T){ 15 | rect.out=sliced.shapes(plot.data$x+0.5, height-plot.data$y, plot.data$width/2-0.5, plot.data$height/2-0.25, cols=cols.ts, draw.border=F, shape="rectangle") 16 | text(plot.data$x+0.5, height-plot.data$y, labels = as.character(plot.data$labels), 17 | cex = cex, col = text.col) 18 | return(invisible(1)) 19 | } else{ 20 | img2=img 21 | pidx=cbind(ceiling(plot.data$x-plot.data$width/2)+1, 22 | floor(plot.data$x+plot.data$width/2)+1, 23 | ceiling(plot.data$y-plot.data$height/2)+1, 24 | floor(plot.data$y+plot.data$height/2)+1) 25 | cols.ts=cbind(cols.ts) 26 | ns=ncol(cols.ts) 27 | brk.x= sapply(plot.data$width/2, function(wi) seq(-wi, wi, length = ns+1)) 28 | for(k in 1:ns){ 29 | col.rgb=col2rgb(cols.ts[,k])/255 30 | pxr=t(apply(pidx[,1:2], 1, function(x) x[1]:x[2]))-plot.data$x-1 31 | sel=pxr>=ceiling(brk.x[k,]) & pxr<=floor(brk.x[k+1,]) 32 | for(i in 1:nn){ 33 | sel.px=(pidx[i,1]:pidx[i,2])[sel[i,]] 34 | node.rgb=img[pidx[i,3]:pidx[i,4],sel.px, 1:3] 35 | node.rgb.sum=apply(node.rgb,c(1,2), sum) 36 | blk.ind=which(node.rgb.sum==0|node.rgb.sum==1,arr.ind=T) 37 | node.rgb=array(col.rgb[,i],dim(node.rgb)[3:1]) 38 | node.rgb=aperm(node.rgb, 3:1) 39 | for(j in 1:3) node.rgb[cbind(blk.ind,j)]=0 40 | img2[pidx[i,3]:pidx[i,4],sel.px, 1:3]=node.rgb 41 | } 42 | } 43 | return(img2) 44 | } 45 | } else if(type=="compound"){ 46 | if(same.layer!=T){ 47 | nc.cols=ncol(cbind(cols.ts)) 48 | if(nc.cols>2){#block the background circle 49 | na.cols=rep("#FFFFFF", nrow(plot.data)) 50 | cir.out=sliced.shapes(plot.data$x, height-plot.data$y, plot.data$width[1], plot.data$width[1], cols=na.cols, draw.border=F, shape="ellipse", lwd=0.2) 51 | } 52 | cir.out=sliced.shapes(plot.data$x, height-plot.data$y, plot.data$width[1], plot.data$width[1], cols=cols.ts, shape="ellipse", blwd=0.2) 53 | return(invisible(1)) 54 | } else{ 55 | # col.rgb=col2rgb(cols.ts)/255 56 | blk=c(0,0,0) 57 | img2=img 58 | w=ncol(img) #repeat 59 | h=nrow(img) #repeat 60 | cidx=rep(1:w, each=h) 61 | ridx=rep(1:h, w) 62 | pidx=lapply(1:nn, function(i){ 63 | ii=which((cidx-plot.data$x[i])^2+(ridx-plot.data$y[i])^2<(plot.data$width[i])^2) 64 | imat=cbind(cbind(ridx, cidx)[rep(ii,each=3),],1:3) 65 | imat[,1:2]=imat[,1:2]+1 66 | ib=which(abs((cidx-plot.data$x[i])^2+(ridx-plot.data$y[i])^2-(plot.data$width[i])^2)<=8) 67 | ibmat=cbind(cbind(ridx, cidx)[rep(ib,each=3),],1:3) 68 | ibmat[,1:2]=ibmat[,1:2]+1 69 | return(list(fill=imat,border=ibmat)) 70 | }) 71 | 72 | cols.ts=cbind(cols.ts) 73 | ns=ncol(cols.ts) 74 | brk.x= sapply(plot.data$width, function(wi) seq(-wi, wi, length = ns+1)) 75 | for(i in 1:nn){ 76 | pxr=pidx[[i]]$fill[,2]-1-plot.data$x[i] 77 | col.rgb=col2rgb(cols.ts[i,])/255 78 | for(k in 1:ns){ 79 | sel=pxr>=brk.x[k,i] & pxr<=brk.x[k+1,i] 80 | img2[pidx[[i]]$fill[sel,]]=col.rgb[,k] 81 | } 82 | img2[pidx[[i]]$border]=blk 83 | } 84 | return(img2) 85 | } 86 | } else stop("unrecognized node type!") 87 | } 88 | 89 | -------------------------------------------------------------------------------- /R/sim.mol.data.R: -------------------------------------------------------------------------------- 1 | sim.mol.data=function(mol.type=c("gene","gene.ko","cpd")[1], id.type=NULL, species="hsa", discrete=FALSE, nmol=1000, nexp=1, rand.seed=100) 2 | { 3 | msg.fmt="\"%s\" is not a good \"%s\" \"%s\" ID type for simulation!" 4 | msg.fmt2="\"%s\" has only %i unique IDs!" 5 | set.seed(rand.seed) 6 | 7 | species=species[1] 8 | if(species!="ko"){ 9 | species.data=kegg.species.code(species, na.rm=T, code.only=FALSE) 10 | species=species.data["kegg.code"] 11 | } else if(mol.type=="gene") mol.type="gene.ko" 12 | 13 | 14 | if(mol.type=="gene"){ 15 | if(is.null(id.type)) id.type="KEGG" 16 | id.type=toupper(id.type) 17 | if(id.type=="ENTREZ") id.type="ENTREZID" 18 | kid.map=names(species.data)[-c(1:2)] 19 | kid.types=names(kid.map)=c("KEGG", "ENTREZID", "NCBIPROT", "UNIPROT") 20 | kid.map2=gsub("[.]", "-", kid.map) 21 | kid.map2["UNIPROT"]="up" 22 | 23 | data(bods) 24 | data(gene.idtype.bods) 25 | org19=bods[,"kegg code"] 26 | 27 | kegg.mapping=F 28 | if(species %in% org19){ 29 | if(!id.type %in% gene.idtype.bods[[species]]) kegg.mapping=T 30 | } else if (species != "ko") kegg.mapping=T 31 | # print(kegg.mapping) 32 | 33 | if(kegg.mapping){#!species %in% c(org19, "ko") | id.type %in% kid.types[c(1,3)]){ 34 | if(!id.type %in% kid.types){ 35 | msg=sprintf(msg.fmt, id.type, species, mol.type) 36 | stop(msg) 37 | } 38 | if(is.na(species.data[kid.map[id.type]])){ 39 | if(!is.na(species.data["kegg.geneid"])){ 40 | msg.fmt3="Only native KEGG gene ID is supported for species \"%s\"! Use KEGG ID instead." 41 | msg=sprintf(msg.fmt3, species) 42 | message("Note: ", msg) 43 | id.type="KEGG" 44 | } else{ 45 | msg.fmt3="Simulation is not supported for species \"%s\"!" 46 | msg=sprintf(msg.fmt3, species) 47 | stop(msg) 48 | } 49 | } 50 | 51 | if(id.type=="KEGG") { 52 | gid.map=keggList(species) 53 | all.mn=gsub(paste(species, ":", sep=""), "", names(gid.map)) 54 | } else { 55 | gid.map=keggConv(kid.map2[id.type],species) 56 | all.mn=gsub(paste0(kid.map2[id.type],":"), "", gid.map) 57 | } 58 | } else if(species %in% org19){ 59 | # if(id.type=="ENTREZ") id.type="ENTREZID" 60 | if(id.type=="KEGG") { 61 | gid.map=keggConv("ncbi-geneid",species) 62 | all.mn=gsub(paste(species, ":", sep=""), "", names(gid.map)) 63 | } else if(id.type %in% gene.idtype.bods[[species]]){ 64 | idx=which(bods[,3]==species) 65 | pkg.name=bods[idx,1] 66 | pkg.on=requireNamespace(pkg.name) 67 | if(!pkg.on) { 68 | # source("http://bioconductor.org/biocLite.R") 69 | # biocLite(pkg.name, suppressUpdates =TRUE) 70 | BiocManager::install(pkg.name, update=FALSE) 71 | pkg.on=requireNamespace(pkg.name) 72 | if(!pkg.on) stop(paste("Fail to install/load gene annotation package ", pkg.name, "!", sep="")) 73 | } 74 | 75 | db.obj <- eval(parse(text=paste0(pkg.name, "::", pkg.name))) 76 | all.mn <-keys(db.obj, keytype=id.type) 77 | } else stop("Wrong gene ID type!") 78 | } 79 | } else if(mol.type=="cpd"){ 80 | data(cpd.accs) 81 | data(cpd.simtypes) 82 | data(rn.list) 83 | accn=cpd.accs$ACCESSION_NUMBER 84 | if(is.null(id.type)) id.type="KEGG COMPOUND accession" 85 | if(!id.type %in% cpd.simtypes){ 86 | msg=sprintf(msg.fmt, id.type, mol.type) 87 | stop(msg) 88 | } 89 | all.mn=unique(as.character(accn[rn.list[[id.type]]])) 90 | } else if(mol.type=="gene.ko"){ 91 | data(ko.ids) 92 | all.mn=ko.ids 93 | } else stop("Invalid mol.type!") 94 | 95 | nuids=length(all.mn) 96 | if(nmol>nuids){ 97 | msg=sprintf(msg.fmt2, id.type, nuids) 98 | message("Note: ", msg) 99 | nmol=nuids 100 | } 101 | sel.mn=sample(all.mn, nmol) 102 | if(discrete) return(sel.mn) 103 | sel.mn.data=matrix(rnorm(nmol*nexp), ncol=nexp) 104 | rownames(sel.mn.data)=sel.mn 105 | colnames(sel.mn.data)=paste("exp", 1:nexp, sep="") 106 | return(sel.mn.data[, 1:nexp]) 107 | } 108 | 109 | -------------------------------------------------------------------------------- /R/sliced.shapes.R: -------------------------------------------------------------------------------- 1 | sliced.shapes=function (x0, y0, w, h, n = 100, cols, shape=c("ellipse", "rectangle")[1], draw.border=TRUE, blwd=1, ...) 2 | { 3 | cols=cbind(cols) 4 | dd=dim(cols) 5 | ns=dd[2] 6 | n=(round(n/ns)*ns+1)*2 7 | nw=length(w) 8 | # if(length(w)==1) x = seq(-w, w, length = n/2) 9 | x = sapply(w, function(wi) seq(-wi, wi, length = n/2)) 10 | 11 | if(is.character(shape)){ 12 | if(shape=="ellipse") { 13 | y = (1 - (x/w)^2)^0.5 * h 14 | } else if(shape=="rectangle"){ 15 | n=(ns+1)*2 16 | # x = seq(-w, w, length = n/2) 17 | # y = rep(h, length(x)) 18 | x = sapply(w, function(wi) seq(-wi, wi, length = n/2)) 19 | y=x 20 | y[]=h 21 | } else stop("Wrong shape specified!") 22 | } else if(is.function(shape)) y=shape(x) 23 | else stop("Wrong shape class specified!") 24 | 25 | # x = c(x, rev(x)) 26 | # y = c(y, -rev(y)) 27 | x=apply(x, 2, function(xj) c(xj, rev(xj))) 28 | y=apply(y, 2, function(yj) c(yj, -rev(yj))) 29 | 30 | np = length(x0) 31 | x0 = matrix(rep(x0, each = n), nrow = n) 32 | y0 = matrix(rep(y0, each = n), nrow = n) 33 | 34 | x1 <- x0 + x[,1:nw] 35 | y1 <- y0 + y[,1:nw] 36 | 37 | if(ns==1) { 38 | cols=cols[,1] 39 | bidx=if(draw.border) NULL else NA 40 | for (i in 1:np) polygon(x1[, i], y1[, i], col = cols[i], border=bidx, lwd=blwd, ...) 41 | } else{ 42 | # brk.x=seq(-w, w, length = ns+1) 43 | brk.x= sapply(w, function(wi) seq(-wi, wi, length = ns+1)) 44 | for (i in 1:np) { 45 | for(j in 1:ns){ 46 | if(nw==1) sel=x>=brk.x[j] & x<=brk.x[j+1] 47 | else sel=x[,i]>=brk.x[j,i] & x[,i]<=brk.x[j+1,i] 48 | polygon(x1[sel, i], y1[sel, i], col = cols[i, j], border=NA, ...) 49 | } 50 | if(draw.border) polygon(x1[, i], y1[, i], col = NA, lwd=blwd, ...) 51 | } 52 | } 53 | return(list(x, y)) 54 | } 55 | -------------------------------------------------------------------------------- /R/strfit.R: -------------------------------------------------------------------------------- 1 | strfit <- 2 | function(s, width=20){ 3 | spaces=c(" ", "\t", "\n") 4 | s=gsub("([ \t\n])+"," ", s) # s=gsub("([ \t\n])+","\\1", s) 5 | sv=strsplit(s, "")[[1]] 6 | len.s=length(sv) 7 | passed=NULL 8 | 9 | while(len.s>0){ 10 | if (length(sv) 1){ 13 | display.matrix <- sapply(subtypes, subtypeDisplay) 14 | display <- display.matrix[,1] 15 | display[c("name","value")] <- apply(display.matrix[c("name","value"), ], 1, paste, collapse = ",") 16 | dlabel=display.matrix["label", ] 17 | display["label"] <- paste(dlabel[dlabel>""], collapse = ",") 18 | } else{ 19 | etype=getType(object) 20 | if(etype=="PCrel"){ 21 | st.uk=new("KEGGEdgeSubType", name = "compound", value = "unknown") 22 | } else{ 23 | message("Info: ", paste("Edge type:", etype, "without subtype sepecified!")) 24 | st.uk=new("KEGGEdgeSubType", name = "unknown", value = "unknown") 25 | } 26 | display <- subtypeDisplay(st.uk) 27 | } 28 | return(display) 29 | } 30 | .local(object, ...) 31 | } 32 | 33 | setMethod("subtypeDisplay", signature(object = "KEGGEdge"), subtypeDisplay.kedge) 34 | -------------------------------------------------------------------------------- /R/wordwrap.R: -------------------------------------------------------------------------------- 1 | wordwrap <- 2 | function(s, width=20, break.word=FALSE){ 3 | if(!break.word) { 4 | wrapped=paste(strwrap(s,width),collapse="\n") 5 | } else wrapped=strfit(s,width) 6 | return(wrapped) 7 | } 8 | 9 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onLoad <- function(libname, pkgname) { 2 | if(FALSE){ 3 | pnames=rownames(installed.packages()) 4 | if("pathview" %in% pnames){ 5 | data(gene.idtype.list, package ="pathview") 6 | data(gene.idtype.bods, package ="pathview") 7 | data(cpd.simtypes, package ="pathview") 8 | data(korg, package ="pathview") 9 | # korg=try(read.delim(file="https://pathview.uncc.edu/data/korg.tsv", sep="\t")) 10 | # if(class(korg)[1]=="data.frame"){ 11 | # korg$ncbi.geneid=as.character(korg$ncbi.geneid) 12 | # korg=as.matrix(korg) 13 | # } else data(korg, package ="pathview") 14 | } 15 | } 16 | disclaimer="##############################################################################\nPathview is an open source software package distributed under GNU General Public License version 3 (GPLv3). Details of GPLv3 is available at http://www.gnu.org/licenses/gpl-3.0.html. Particullary, users are required to formally cite the original Pathview paper (not just mention it) in publications or products. For details, do citation(\"pathview\") within R. \n\nThe pathview downloads and uses KEGG data. Non-academic uses may require a KEGG license agreement (details at http://www.kegg.jp/kegg/legal.html).\n##############################################################################\n\n" 17 | packageStartupMessage(wordwrap(disclaimer, 80)) 18 | } 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # pathview R package 3 | 4 | [![](https://img.shields.io/badge/release%20version-1.46.0-blue.svg)](https://www.bioconductor.org/packages/pathview) 5 | [![](https://img.shields.io/badge/devel%20version-1.47.1-green.svg)](https://github.com/datapplab/pathview) 6 | [![](https://img.shields.io/badge/BioC%20since-2013-blue.svg)](https://www.bioconductor.org/packages/pathview) 7 | [![](https://img.shields.io/badge/GitHub%20since-2020-green.svg)](https://github.com/datapplab/pathview) 8 | 9 | ## Overview 10 | 11 | Pathview is a leading tool for pathway based data integration and visualization. It maps, integrates and renders a wide variety of biological data on relevant pathway graphs. Pathview has 3 important features: 12 | * Interpretable graphs with publication quality: KEGG view for easy interpretation and Graphviz view for better graphical control. 13 | * Strong data integration capacity. It works with: 1) all data mappable to pathways, 2) 30 of molecular ID types (genes/protein, compound/metabolite), 3) 10000+ species, 4) various data attributes and formats. 14 | * Simple and powerful: fully automated and error-resistant, seamlessly integrates with a wide range of pathway and gene set (enrichment) analysis tools. 15 | 16 | 17 | ## Citation 18 | 19 | Please cite the Pathview paper when using this open-source package. This will help the project and our team: 20 | 21 | Luo W, Brouwer C. Pathview: an R/Biocondutor package for pathway-based data integration and visualization. Bioinformatics, 2013, 29(14):1830-1831, doi: 10.1093/bioinformatics/btt285 22 | 23 | ## Installation (within R) 24 | 25 | ``` r 26 | # install from BioConductor 27 | if(!requireNamespace("BiocManager", quietly = TRUE)) 28 | install.packages("BiocManager") 29 | BiocManager::install("pathview") 30 | 31 | # Or the development version from GitHub: 32 | # install.packages("devtools") 33 | devtools::install_github("datapplab/pathview") 34 | ``` 35 | 36 | ## Quick start with demo data (R code) 37 | 38 | Note Pathview focuses on KEGG pathways, which is good for most regular analyses. If you are interested in working with other major pathway databases, including Reactome, MetaCyc, SMPDB, PANTHER, METACROP etc, you can use [SBGNview](https://github.com/datapplab/SBGNview). Please check [the quick start page](https://github.com/datapplab/SBGNview) and [the main tutorial](https://bioconductor.org/packages/devel/bioc/vignettes/SBGNview/inst/doc/SBGNview.Vignette.html) for details. 39 | 40 | ``` r 41 | library(pathview) 42 | data(gse16873.d) 43 | pv.out <- pathview(gene.data = gse16873.d[, 1], pathway.id = "04110", 44 | species = "hsa", out.suffix = "gse16873") 45 | ``` 46 | 47 | ## More information 48 | 49 | Please check the BioC page for tutorials and extra documentations. 50 | 51 | Also see the Pathview Web server for interactive GUI with example graphics. 52 | 53 | Thank you for your interest. 54 | 55 | -------------------------------------------------------------------------------- /data/KEGGEdgeSubtype.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/data/KEGGEdgeSubtype.rda -------------------------------------------------------------------------------- /data/bods.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/data/bods.rda -------------------------------------------------------------------------------- /data/cpd.accs.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/data/cpd.accs.rda -------------------------------------------------------------------------------- /data/cpd.names.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/data/cpd.names.rda -------------------------------------------------------------------------------- /data/cpd.simtypes.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/data/cpd.simtypes.rda -------------------------------------------------------------------------------- /data/demo.paths.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/data/demo.paths.rda -------------------------------------------------------------------------------- /data/gene.idtype.bods.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/data/gene.idtype.bods.rda -------------------------------------------------------------------------------- /data/gene.idtype.list.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/data/gene.idtype.list.rda -------------------------------------------------------------------------------- /data/gse16873.d.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/data/gse16873.d.rda -------------------------------------------------------------------------------- /data/kegg.met.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/data/kegg.met.rda -------------------------------------------------------------------------------- /data/ko.ids.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/data/ko.ids.rda -------------------------------------------------------------------------------- /data/korg.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/data/korg.rda -------------------------------------------------------------------------------- /data/paths.hsa.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/data/paths.hsa.rda -------------------------------------------------------------------------------- /data/rn.list.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/data/rn.list.rda -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite pathview:") 2 | 3 | citEntry(entry="article", 4 | author = "Luo, Weijun and Brouwer, Cory", 5 | title = "Pathview: an R/Bioconductor package for pathway-based data integration and visualization", 6 | journal = "Bioinformatics", 7 | year = "2013", 8 | doi = "10.1093/bioinformatics/btt285", 9 | volume = "29", 10 | number = "14", 11 | pages = "1830-1831", 12 | textVersion = "Luo, W. and Brouwer C., Pathview: an R/Bioconductor package for pathway-based data integration and visualization. Bioinformatics, 2013, 29(14): 1830-1831, doi: 10.1093/bioinformatics/btt285" 13 | ) 14 | 15 | citFooter("This free open-source software implements academic research 16 | by the authors. Its development took a large amount of extra time and 17 | effort. If you use it, please support the project by citing the listed 18 | journal articles.") 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /inst/extdata/gse16873.demo: -------------------------------------------------------------------------------- 1 | Gene HN_1 DCIS_1 HN_2 DCIS_2 HN_3 DCIS_3 HN_4 DCIS_4 HN_5 DCIS_5 HN_6 DCIS_6 2 | "10000" 6.76598376536597 6.45833896235432 6.9217204319536 6.77449273786321 7.01056413685588 6.98677932888329 6.95876101804887 6.88819908678198 6.94991205518153 6.94858896828537 7.22042707595284 7.07015894544086 3 | "10001" 6.33947419514941 6.75534224082697 7.17736920123286 6.84259661377314 7.39261086181081 6.87947395461589 6.29657085324708 6.13003372917385 7.83122166884993 7.9423438915663 7.66524771376566 7.79925505073766 4 | "10002" 6.59175502863998 6.79030428090433 6.73535929388352 6.77325516902381 6.7000155602841 7.04188090107744 6.58628489196164 6.5010106921351 6.88493079258514 7.65249005676775 7.34250510449454 7.50079119370645 5 | "10003" 6.82209174980839 6.59053877610036 6.50845164761916 6.41185853276729 6.57564019111106 6.47091290813382 6.89688557322208 6.84887153421704 6.61514347646099 6.40708703355295 6.61817406122687 6.65161854362501 6 | "100048912" 7.35605110163376 7.31114385945287 7.3855126404074 7.33348118070917 7.39223285773688 7.42862323352024 7.31457929185033 7.36265752286998 7.37004366447065 7.3972494799858 7.3831956161515 7.43764300727089 7 | "10004" 6.94193530841294 6.85437293705211 6.88397257130438 6.83369532009087 6.85504330000182 6.85686443307118 7.02107198215727 7.05131032925118 6.76724202081043 6.77527641442359 6.87385451648887 6.80524702364719 8 | "10005" 7.902341077051 7.77607426213967 7.568751107454 8.04653854389745 7.12419819170204 7.0180757113115 8.15661044089212 8.39127450614394 6.74553507841298 6.84229936301615 6.82404422061718 6.88756426448977 9 | "10006" 7.4615456930118 8.11182774570887 8.60673569541685 8.80179570481096 8.53588746475447 8.53052881783658 7.26535075865324 7.03902530244459 8.7163593016669 8.79894982916584 8.65149091787097 8.58204741362192 10 | "10007" 8.54386991950627 8.7005611409899 8.2631231505887 8.31847481898154 8.86567125460136 8.96453360659684 8.69017548320143 8.66446788599138 8.73439291668783 8.94700169719338 8.55319710362125 8.71255631184282 11 | "10009" 6.68893982073829 6.66964338299304 6.65417441513673 6.65931578861604 6.68541464095139 6.66575431611678 6.70908351608842 6.71496634422926 6.67930372142497 6.65613014387589 6.68863587494742 6.68835504942527 12 | "1000" 7.17292051245523 7.16523085740733 7.2730515286033 7.44521361574812 7.26016592640365 7.06133449650419 7.60327602010073 7.40129093141229 7.1348083284852 7.05485356951437 7.29007582915043 7.11718471942569 13 | "10010" 7.80737602881725 7.45637608057569 7.15693855385245 6.79726681959596 8.11676538292223 8.06274538887818 6.96911758278159 7.16452818450463 7.83535651022017 7.58501413485647 7.06668509501744 6.94389472692944 14 | "100127886" 6.51069903832279 6.32530226681905 6.36367384570523 6.35354641468795 6.46072579190987 6.51645291045695 6.53591031441103 6.53874035324076 6.51411364501337 6.54007592497383 6.54905407345576 6.58786621594744 15 | "100127972" 6.85148069791494 6.84653682737295 6.85551929520544 6.85323855287747 6.8497219466604 6.85288536411587 6.85392495776727 6.85750467274487 6.8500585593404 6.85208039932076 6.85228021868682 6.85182962635792 16 | "100128008" 7.72574422665057 7.53655287358725 7.23784938121576 7.28698309527448 7.28502408755179 7.28529702382486 7.79387079056099 7.91582010926061 7.09168203666834 7.01396683685259 7.21362432450251 7.08859323768764 17 | "100128062" 8.6664158930277 9.7056312730616 9.94216120421217 9.97495756863587 9.6761329199679 9.33316649335485 8.32146946421007 8.04467051609803 9.93655189768057 10.1928588448917 9.90556646104814 10.3111603929774 18 | "100128414" 6.53678965303007 6.53658550827655 6.5366402564041 6.53665296723951 6.53670276118127 6.53667089378446 6.53679498006509 6.53671421096133 6.53664470055234 6.53664015332861 6.53679116570369 6.5366571247774 19 | "100128919" 7.78303616079193 7.60602113913067 7.85369482221654 7.78439683370338 7.61798639140685 7.61310710592395 7.67415114842118 7.83281319336442 7.59460493577793 7.62175595985548 7.58866646002092 7.52132203195 20 | "100129015" 6.96860893432489 6.95667817800457 6.96750796612186 6.96741732321978 6.96523225178018 6.96676499790712 6.98647205012908 6.99262128626134 6.95682907650651 6.94964478625559 6.96891851143154 6.96094866048636 21 | "100129128" 6.90853449300491 7.07620232824971 6.79225231094014 6.73909236139323 6.74789719939804 6.75440841059231 7.01694149890514 6.94101323976665 6.73366575290477 6.64901067480594 6.77301111102756 6.70389339415626 22 | "100129250" 7.7088583904501 7.51004035703944 7.40085411451198 7.46024869075222 7.50907997667524 7.46180755914568 7.79068588636315 7.71299291384275 7.25116513958543 7.25436450222056 7.32606269419825 7.27846540937309 23 | "100129271" 7.653694440354 7.45475575021612 7.42892191255311 7.4474416426499 7.47381495530849 7.47766206116212 7.70093201742188 7.52607941573344 7.42781710867686 7.32849241848139 7.38966143034958 7.32462199838776 24 | "100129361" 7.0475402428967 7.75300515342121 7.97549235199201 8.34366790080937 7.7864164132094 7.56782844261016 6.92701207706786 7.08684695748606 7.86925870053905 8.39378391299342 7.84568066223448 7.81303078582153 25 | "100129410" 6.888397497979 6.88835086481116 6.88818297371744 6.8881434295108 6.88812585838886 6.888469099786 6.88842955753197 6.88839121562979 6.8882368678159 6.8881377089142 6.88830523085207 6.88806402992437 26 | "100129482" 6.88954785124434 6.74250666722887 7.27692202940597 7.11924980103099 7.17277255698877 7.10328357610961 6.99329167754743 6.84201437931259 6.97327009762473 7.03573958463571 7.07838564720988 7.21878387257581 27 | "100129500" 7.26154328349332 7.12563126876401 7.01380240304445 6.9513653774799 7.11263744978147 7.2856854316728 7.41866063711631 7.26586392642217 6.87155113468084 6.87764436413769 6.95194742024766 6.97223073729938 28 | "100129503" 6.97078828027104 6.77707053361518 7.39284733581844 7.27681499433957 6.75267794797235 6.91279448743872 7.06657829934835 7.05795577741475 6.95728054447256 6.92115935833558 6.80063903270371 6.8814267289362 29 | "100129624" 7.05073677612728 7.00314803348421 6.98331326893578 6.99688956138333 7.03540047232961 7.07228314790078 7.15452819635657 7.0636616571933 7.01355828966537 7.00364865468448 7.03490183906907 7.06563635583003 30 | "100129656" 6.97003877340637 6.87876591131587 6.63409917804485 6.69628555850602 6.74099718037835 6.74553462479608 7.28453664710986 7.22135483074344 6.64306930100356 6.62658311011125 6.6205787587495 6.64387584661278 31 | "100129668" 8.01733379213804 7.97332955211915 7.26945527619135 7.35887460067645 7.34227612789625 7.41325703089936 8.17894533269511 7.71440611415106 8.0449530271842 7.82666238675585 7.72277214442734 7.72232911760411 32 | "100129686" 6.50758986224116 6.44131141930819 6.42063252395213 6.43896679806893 6.38520592820472 6.41567720536935 6.60832287569375 6.53165854189399 6.37713916899297 6.44875989554163 6.45194702166072 6.44869841374155 33 | "100129762" 7.93259657995109 6.47991780630229 7.73009507127065 7.64370809744003 7.18841487281778 7.33575337950471 6.89645177408705 6.30097566664394 7.88869276614677 6.90026700255146 7.38010454860292 7.0000087361095 34 | "100129973" 7.14634908835169 6.81503166989159 6.73427503169379 6.66190401882381 6.72763350892319 6.62387946633452 7.60734333317616 7.50013079811615 6.53210521564092 6.41564292332852 6.57868630751985 6.58604899817134 35 | "100130100" 7.50974910964405 7.34895262444607 7.32604701419926 7.29447719380561 7.26765672081937 7.29677268611676 7.47514675729476 7.44162624077723 7.33788873476256 7.1894860204485 7.3136017968616 7.30436348953909 36 | "100130130" 6.58554950789056 6.87306539523653 7.35698881582171 7.46373164123134 7.9530277192062 7.69911513438815 6.81075894195825 6.38702491905967 7.98655887405135 8.23155780733537 7.8241961847082 8.25480843575182 37 | "100130134" 6.36422638916751 6.35568204559537 6.36352138247955 6.37220955613347 6.36317529250446 6.36991239688019 6.37830835357977 6.37492422532375 6.37473396912051 6.37535826559338 6.39037889970292 6.38781994486607 38 | "100130291" 6.48829251902477 6.48843379363705 6.48837532307394 6.48858698649661 6.48837669167287 6.48854001338179 6.48828081479147 6.48828436606563 6.48845089540907 6.48864763288218 6.48827411494503 6.48839599638366 39 | "100130348" 7.50343325235688 7.34626294363167 6.84396693523621 6.92156703915089 7.15968192051316 7.21881509566083 7.93072231929736 7.71991294400217 6.8534505974417 6.78753329266844 6.99613505135066 7.08616530594978 40 | "100130449" 6.7271147463162 7.32086047141812 6.45760762112038 6.81800177027328 6.87064566249883 9.58147470214468 6.67758696066327 6.77299566137898 7.12701723297742 7.64035177850704 7.17475916308194 7.80937915103707 41 | "100130464" 7.35731862999371 7.25917274138218 7.26393769780089 7.22866368781733 7.29080875145602 7.31208459747911 7.32976417685101 7.37406334569195 7.29098698035929 7.31652448982157 7.34447895374271 7.3532767219585 42 | "100130544" 9.04282072805899 8.10152441251584 7.612968423723 8.51980044156006 8.33938490823959 8.38047310052174 8.9991054067365 9.21371074393126 7.89878685779248 7.71026647127393 8.03666343241638 8.43062952676982 43 | "100130624" 8.00951428904337 7.50854480055184 7.5823943863181 7.95858992884673 7.28938066931157 7.18311708462266 7.86468789401324 7.41544113910935 7.0286568377111 7.07624784460996 7.00766687668039 6.72364537187617 44 | "100130633" 7.76062239212325 7.95003956611231 7.43419550036782 7.4897959404714 7.983200037823 7.99205180466464 7.87591649858071 7.33276575334155 8.00612281031398 7.97727385306284 7.88342631896151 7.82835076386374 45 | "100130703" 6.27574252907505 6.26829589807983 6.25631787132883 6.26389455883389 6.29275328239862 6.29199415734066 6.28589789693674 6.29986404009631 6.27182353988717 6.278214391962 6.28128125712589 6.30396360761238 46 | "100130862" 7.0113150512485 6.99226677203283 7.21940785600776 7.34716216980266 7.66601214259027 7.95625115189608 6.97320329402953 6.93755305398827 7.82168001243515 8.14386687437445 7.8185428440279 7.77232737117814 47 | "100131005" 7.66336522954422 7.77186219514934 7.33315808586767 7.6340538962191 7.47241642399691 7.38567974308469 8.2418047241325 8.09933964879042 7.64445621593447 8.04188108154815 7.57822762582525 7.88432345868839 48 | "100131164" 7.47783227575802 7.41448914369589 7.69852885632395 7.61183547336247 7.65812562602168 7.69892862371658 7.58234598041009 7.45744850513155 7.60479854009237 7.63466249106063 7.6328194576097 7.65067588759455 49 | "100131298" 6.5472038688517 6.52291846166971 6.52824287357311 6.5239968589113 6.53802294731525 6.53499999879503 6.54530030122964 6.53588954056202 6.53947953963376 6.54926822680403 6.54345429894883 6.54083804451729 50 | "100131613" 7.26942948343194 6.76568280845204 6.65962778851633 6.7063374421102 6.91144513279742 6.94904822860264 7.20470005026926 7.38290034947929 6.8559586380635 6.94748528531118 7.08700323412246 6.97843087548027 51 | "100132129" 7.17996420351105 7.07667010700557 6.90276121412632 6.99941638311465 7.01221088838986 6.94701488105773 7.366497833211 7.50121438759142 6.90023280623434 6.85358086096495 6.82482907499854 6.92748596459944 52 | "100132302" 7.0981854470809 7.16463410345536 6.83192324615482 6.84817057955655 6.99943463281512 6.9344834543313 7.29293994844906 7.34826037484012 6.89522761720242 6.8283323883744 6.88682655168974 6.912768535199 53 | "100132323" 8.00754529124554 7.67094792911418 6.54727431422007 6.93303374749171 7.36496796266592 7.37334342366756 8.48659724908339 8.92400641863377 6.884312452033 6.72627008930305 6.91804498491114 6.928367391256 54 | "100132430" 8.61564075311854 8.26993294121283 7.66487692587944 8.59731280370359 9.18408490797944 10.405876201055 8.30676870888662 9.04529578400119 8.54390831047406 8.62189036370479 8.76919721548172 9.08451527184715 55 | "100132833" 6.99855286026902 6.62708483318596 6.49025562867378 6.56488024521588 6.37087913835668 6.4282694402563 7.27318076829694 6.86364491873323 6.52820036996748 6.85627144679766 6.48611193793182 6.70937824729083 56 | "100133166" 7.74105573158116 8.50466878856088 7.92234371185103 7.59458207887335 8.67038024519207 8.38721286801653 7.15751561198887 6.77396327466627 9.11566282080112 9.37326677473994 8.85134600593493 8.96194843894227 57 | "100133233" 6.47547345595469 6.24520190866498 7.08985490319493 6.87573405178833 6.87751453986892 7.09290432047829 6.36907108870788 6.77625690573586 7.12843164806655 6.9810234770265 7.12057690812343 7.2633298852216 58 | "100133276" 7.16238811636635 7.1476531587668 6.64081780173533 6.78956861089913 7.43804693893086 7.91450956897985 7.19073411041361 7.20905506412394 7.77608964209803 8.08601583946935 8.00160400571174 7.8626248722535 59 | "10013" 8.16144788657824 7.7827339288886 7.6574511833261 7.64080935482526 7.58410896572289 7.62254367553094 8.495311476235 8.38935871696473 7.52773011304308 7.5603256683363 7.6059234978731 7.66778625803853 60 | "10014" 8.33136670072334 7.8822607396955 7.4110739594086 7.42264471890645 7.36446375297582 7.24545610285069 8.64790643376593 7.88810248742183 7.286896879087 7.17869068682472 7.2255205223285 7.20427847362722 61 | "10015" 8.46477454337469 9.54126250356314 8.81848514336941 8.89170016661472 8.74122293098327 8.48763166626297 7.93482273625348 8.29943586282276 9.46642176978055 9.44217206041184 9.09328678200174 9.31357544161748 62 | "10016" 7.60825921917482 8.47552357354571 8.11013311866387 8.23898522602953 8.09897572873202 7.74917606839952 7.62856123434315 7.12235230298627 8.50636349353375 8.80658605143389 8.42282652153773 8.75924757111853 63 | "10017" 6.9928064424649 6.98657492948753 6.99783557459428 7.0023433146994 6.99394268744544 7.00342206081677 7.01618148346852 7.01202513190262 6.99169052964834 6.99038494919476 6.99860921725143 6.99508874089504 64 | "10018" 6.63502723363255 6.61525858471589 6.60724239667109 6.62477334707795 6.5794374310588 6.62887119556758 6.68724076440916 6.68077181269749 6.56839112421838 6.57621049107556 6.6173256044474 6.60677730673211 65 | "10019" 8.13069298366366 8.38511109910508 8.64015240657852 8.17396769551964 8.70062916272128 9.01916070305403 7.90285541706217 7.55855238649854 9.342354323992 9.35126476406816 9.52358784542429 8.8842451071271 66 | "1001" 8.95004665321129 8.27461258490732 9.61475994902559 7.86824681295989 9.03327157802184 8.34564900122246 9.51580636142602 9.5865820871096 8.59833653509284 7.31034768128832 8.79194348070755 7.8480378834158 67 | "10020" 8.13038360918743 8.0744155917971 7.84874034037227 7.39736196196083 8.84566046173095 7.85497777491097 7.46980135508369 8.18501760913687 7.98760842284376 7.68983705057488 7.80303917357512 7.64134397681064 68 | "10021" 7.16905627734712 7.08337279070154 7.00445337975173 7.00591873466378 7.10580817912265 7.08230768472543 7.15433809996883 7.1789781694798 7.05164689612559 7.04182407068448 7.08843042741943 7.08764816141836 69 | "10022" 7.11125858840688 7.02157829439842 7.03380902519319 7.0097286917614 7.0280445336556 7.04209018010691 7.1612564026209 7.14580527939338 7.01287609291583 7.03888486427196 7.08557058549417 7.09502050422823 70 | "10023" 7.9730260611853 8.20655911308719 7.0416033275529 7.12542949964577 7.73170595169067 7.40350420922596 8.28455071357716 7.8009794553312 7.63064703375477 7.73937100754296 7.4266406885425 7.80784223126564 71 | "10024" 7.59857333901546 7.50679543672521 7.38795587421104 7.3881824534848 7.30478028686528 7.57817756018511 7.74652487614768 7.86826211885111 7.17745565079673 7.17731440448024 7.2347449262628 7.3024320910583 72 | "10025" 8.27920677366796 7.82313249998422 7.24364676130379 7.36301242513049 7.2026950947177 7.04615920232382 8.88812500573416 8.83164421042596 6.71066546967178 6.60696113378163 6.71897681602098 6.75123506944202 73 | "10026" 7.53429249192604 8.1643203335715 7.96521742506522 7.67665257751426 8.62417567620077 8.57131313369432 7.5623849892922 7.4532861074099 8.59802365667989 8.82993855614132 8.2033105681625 8.57061296247421 74 | "1002" 7.19291899902761 7.0905590805963 7.09336498111414 7.03365586201655 7.10708289379885 7.15893391497499 7.46268513702198 7.26959518759163 7.04338150048656 6.91919816003953 6.99370547613598 7.01631815967253 75 | "10036" 7.5735076871956 7.54079034287258 7.20469457587254 7.2468030147944 7.23809796223218 7.24147436334775 7.8498521479531 7.93351816802025 7.10725207753276 7.07238815506473 7.13310529763948 7.14913446809322 76 | "10038" 8.1741527151226 7.80973257433881 7.9183591632659 7.73540794212955 7.90593273446564 7.42826078850866 7.61992559351261 7.32614334640811 8.10536298708116 8.022110501446 8.027427391781 8.11823905181417 77 | "10039" 7.9772862680375 7.8765464469037 7.18292886999426 7.1427725557375 7.24926907243116 7.36058941952526 8.56656517970884 8.22889243386636 6.94440055348982 6.94936285014047 7.08331074280551 7.10149079112303 78 | "1003" 7.38494209484367 8.37195291256441 7.86485805783088 6.7242135247869 7.40189319569772 6.35527386956607 7.54620638383971 6.87757846469001 7.04972079950674 6.68853455614205 7.35672823353264 7.01281473719223 79 | "10040" 7.48279509238881 7.75123847271833 7.76059835975516 8.11146050754202 7.85809467081269 7.46807205231142 6.95941251566534 7.43060706732304 8.25345476288736 8.39986992266175 7.7113314848815 8.06038534036032 80 | "10042" 8.9553023378471 8.82886469888152 7.68929491520751 7.91877751816493 7.85274157167665 7.72798771441814 9.04161965963804 9.21464857134236 7.95801787108225 7.98181578451337 7.98543010877742 7.91942395690244 81 | "10043" 7.74362536102008 7.65802146817838 7.33770580047693 7.61238942292750 7.39608256114658 7.44027226300567 7.9528613043337 7.73702927902727 7.25084343164572 7.23173804212467 7.15436192193162 7.2904813198142 82 | "10045" 7.01987334373326 6.71713681843228 6.94544325206436 7.1325216383909 6.90209118738598 7.10376075244813 7.5358236331921 6.85367988406145 6.89332910617373 6.68028019157537 6.78460002202237 6.76818727690206 83 | "10046" 7.50432243393645 7.1976683397036 7.50114709191406 7.97360358094268 7.41901046759383 7.76945255591037 7.16196775159334 7.53541888191397 7.54022476605233 7.62841142834246 7.61617401129424 7.38489000450579 84 | "10047" 7.88564690599913 7.60707193528116 7.19809624887333 7.2366234420553 7.47139710393964 7.31287306081498 7.76679587237019 8.26384351177413 7.08600857436309 7.08915454099576 7.2756123486552 7.31227434259121 85 | "10048" 7.42946543771255 7.85584080530638 8.18126666234535 8.2201299293501 7.88122536717491 7.8458258949549 7.50542179084694 7.4839212618316 7.99659498707774 8.30145701632484 8.14803632347738 8.21670419741246 86 | "10049" 7.52829852145846 7.83936603872292 8.16173179883128 8.0478716658337 7.97049683689396 8.01422165015402 7.57355330795455 7.3891037831502 8.12301218209275 8.1772547253146 7.7949898447989 7.77536296830122 87 | "1004" 6.64203085676364 6.62258569876232 6.57008893984319 6.53027831055039 6.63098417275318 6.63385128466037 6.56884761749308 6.68196072492815 6.67762410219436 6.63144694442686 6.70152395519862 6.72277354416746 88 | "10050" 7.01457411846374 6.92514950033194 7.13362161214875 7.11691408831023 7.11081059051858 7.02575384141236 7.03951174700645 7.02763808415662 7.18604921985823 7.14526842833134 7.19865145682744 7.15284554084904 89 | "10051" 7.1384626921355 7.48239206348516 7.45642430146023 7.86955671730084 7.94559256839882 8.10279672102522 7.12995645906576 7.45255610282691 8.17749877462922 8.50874693254364 8.11752530349045 8.22813396078515 90 | "10052" 6.96649487450417 6.98519504723348 7.22427797718582 7.2142243361814 7.17056325107947 7.25102840016921 6.78110757500534 6.99085346505115 7.12851155384791 7.07604242201562 7.19651325691749 7.1356991099356 91 | "10053" 8.71124988371178 8.27324481927306 7.57559579694504 8.07389422321033 7.6529116880718 7.90163725109562 8.77229441878591 9.03016831791525 7.7604967034153 7.88017081736863 8.25775301608048 8.22372873614197 92 | "10054" 10.4942903146742 10.6261345033758 10.1429250383237 10.0556068830766 10.4944066162311 9.88406366472148 9.6353641564544 9.97310210923315 10.2595309869268 10.1716615177174 10.1069921602148 9.8545368213797 93 | "10055" 9.02610529782937 9.19666755290076 8.71643106020271 8.94197741400755 8.81979112012197 9.12527047886394 9.28510584400762 9.58533465047127 8.5288940154976 8.60859745970832 8.64276411352253 8.5601173257555 94 | "10057" 8.44739909844397 9.45846434266294 8.6113838943431 9.38438609689677 9.00176271006236 9.78831971487636 8.85363777133719 9.69594092577111 9.21949475186613 9.6071000734881 8.96595599208737 9.29382707292894 95 | "10058" 8.08635450517026 7.92774094217994 7.82741964843478 8.02855243314923 7.81940277884466 7.64853696052288 8.31998048723328 8.71331296732598 7.69296945709279 7.58815835765082 7.63303376930125 7.60494191562106 96 | "10059" 6.91417167299075 7.3254175245798 7.52998123365251 7.7265462780736 7.27834214992821 7.39532105211144 6.7857372770489 6.91249749791706 7.6995708460708 7.88140255464909 7.52956950771197 7.63522901752163 97 | "1005" 6.68038888801687 6.67434672149919 7.01783991921612 7.05355228982381 6.96396204127822 7.06042842388268 6.65011603696249 6.60596284121414 7.0040735257899 7.0788389094873 6.948906697075 7.0992286011253 98 | "10060" 6.75912793313802 6.74962929431628 6.7429388827367 6.74109430917118 6.76162275760677 6.76533253583324 6.78456029776402 6.78893604741277 6.75532705634256 6.7592087298793 6.76874773112557 6.7715872056185 99 | "10061" 7.78559960855733 7.3428278945884 6.90000408470055 6.91291718482105 7.13876876058173 7.07607036083061 8.08755759901448 7.79779921704708 7.02171586596342 6.76549923246714 6.93291143704938 6.92318127704829 100 | "10062" 8.03397997091605 7.55586897773572 7.53457768125377 8.03114893571229 7.96441975491423 8.0262414067168 8.36834169992882 7.89492777060131 7.44478398523672 7.43911285749807 7.41201555939977 7.37718374400536 101 | "10066" 9.34030167986838 9.03077720932157 8.59128468914064 9.01593609728877 8.50369612391452 8.67270538379976 9.67572975152504 9.26043782376736 7.96043289155506 8.13510735067635 8.09241437202971 8.11864513349024 102 | -------------------------------------------------------------------------------- /inst/extdata/hsa04110.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/inst/extdata/hsa04110.png -------------------------------------------------------------------------------- /inst/extdata/kegg.sigmet.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/inst/extdata/kegg.sigmet.rda -------------------------------------------------------------------------------- /inst/unitTests/test_mapper.R: -------------------------------------------------------------------------------- 1 | test_id2eg <- function() { 2 | symbs.good=c("A1BG","SERPINA3","PLIN2") 3 | egs.good=c("1","12","123") 4 | 5 | egs.mapped1=id2eg(symbs.good, category="SYMBOL", org = "Hs")[,2] 6 | checkEquals(egs.mapped1, egs.good) 7 | 8 | egs.mapped2=id2eg(c(egs.good,symbs.good), category="SYMBOL", org = "Hs")[,2] 9 | checkEquals(egs.mapped2, c(rep(NA,3), egs.good)) 10 | 11 | checkException(id2eg(symbs.good, category="SYMBOL", org = "Hss")) 12 | checkException(id2eg(symbs.good, category="SYMBL", org = "Hs")) 13 | } 14 | 15 | test_eg2id <- function() { 16 | symbs.good=c("A1BG","SERPINA3","PLIN2") 17 | egs.good=c("1","12","123") 18 | 19 | symbs.mapped1=eg2id(egs.good, category="SYMBOL", org = "Hs")[,2] 20 | checkEquals(symbs.mapped1, symbs.good) 21 | 22 | symbs.mapped2=eg2id(c(egs.good,symbs.good), category="SYMBOL", org = "Hs")[,2] 23 | checkEquals(symbs.mapped2, c(symbs.good,rep(NA,3))) 24 | 25 | checkException(eg2id(egs.good, category="SYMBOL", org = "Hss")) 26 | checkException(eg2id(egs.good, category="SYMBL", org = "Hs")) 27 | } 28 | 29 | test_cpdidmap <- function() { 30 | cpd.cas=c("49620-06-6", "104206-65-7", "507-60-8", "7235-40-7", 31 | "17440-83-4") 32 | 33 | #map and reverse map being consistent 34 | id.map.cas <- cpdidmap(in.ids = cpd.cas, in.type = "CAS Registry Number", 35 | out.type = "KEGG COMPOUND accession") 36 | id.map.cas2 <- cpdidmap(in.ids = id.map.cas[,2], 37 | in.type = "KEGG COMPOUND accession", 38 | out.type = "CAS Registry Number") 39 | na.idx=is.na(id.map.cas[,2]) 40 | checkEquals(id.map.cas2[,2], cpd.cas[!na.idx]) 41 | 42 | #in.ids and in.type not matching 43 | id.map.cas3 <- cpdidmap(in.ids = cpd.cas, in.type = "KEGG COMPOUND accession", 44 | out.type = "CAS Registry Number") 45 | checkTrue(all(is.na(id.map.cas3[,2]))) 46 | 47 | #in.type and out.type are similar, no need to map 48 | id.map.cas4<-cpdidmap(in.ids = cpd.cas, in.type = "KEGG COMPOUND accession", 49 | out.type = "kegg") 50 | checkEquals(id.map.cas4, 0) 51 | 52 | #wrong in.type, error 53 | checkException(cpdidmap(in.ids = cpd.cas, in.type = "Unknown accession", 54 | out.type = "kegg")) 55 | } 56 | -------------------------------------------------------------------------------- /man/combineKEGGnodes.Rd: -------------------------------------------------------------------------------- 1 | \name{combineKEGGnodes} 2 | \alias{combineKEGGnodes} 3 | \alias{reaction2edge} 4 | %- Also NEED an '\alias' for EACH other topic documented here. 5 | \title{ 6 | Special treatment of nodes or edges for KEGG pathway rendering 7 | } 8 | \description{ 9 | \code{combineKEGGnodes} combines nodes into a group in a KEGG pathway 10 | graph. 11 | \code{reaction2edge} converts reactions into edges in KEGG pathway 12 | graph. 13 | } 14 | \usage{ 15 | combineKEGGnodes(nodes, graph, combo.node) 16 | reaction2edge(path, gR) 17 | } 18 | %- maybe also 'usage' for other objects documented here. 19 | \arguments{ 20 | \item{nodes}{ 21 | character, names of the names to be combined. 22 | } 23 | \item{graph, gR}{ 24 | a object of "graphNEL" class, the graph parsed and converted from KEGG 25 | pathway. 26 | } 27 | \item{path}{ 28 | a object of "KEGGPathway" class, the parsed KEGG pathway. 29 | } 30 | \item{combo.node}{ 31 | character, the name of result combined node. 32 | } 33 | } 34 | \details{ 35 | \code{combineKEGGnodes} not only combines nodes in the graph object, 36 | but also corresponding node data in the KEGG pathway object. This 37 | function is needed for KEGG-defined group nodes and parsed enzyme 38 | groups involved in the same reaction. 39 | \code{reaction2edge} converts a reaction into 2 consecutive edges 40 | between substrate and enzyme and enzyme and product. This function is 41 | needed as to faithfully show the compound-enzyme nodes and their 42 | interactions in Graphviz-style view of KEGG pathway. 43 | } 44 | \value{ 45 | The results returned by \code{combineKEGGnodes} is a combined graph 46 | of "graphNEL" class. 47 | The results returned by \code{reaction2edge} is a list of 3 48 | elements: \code{gR}, the converted graph ("graphNEL"); edata.new, the 49 | new edge data ("KEGGEdge"); ndata.new, the new node data ("KEGGNode"). 50 | } 51 | \references{ 52 | Luo, W. and Brouwer, C., Pathview: an R/Bioconductor package for 53 | pathway based data integration and visualization. Bioinformatics, 54 | 2013, 29(14): 1830-1831, doi: 10.1093/bioinformatics/btt285 55 | } 56 | \author{ 57 | Weijun Luo 58 | } 59 | 60 | \seealso{ 61 | \code{\link{node.info}} the main parser function 62 | } 63 | % Add one or more standard keywords, see file 'KEYWORDS' in the 64 | % R documentation directory. 65 | \keyword{ ~kwd1 } 66 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 67 | -------------------------------------------------------------------------------- /man/cpd.accs.Rd: -------------------------------------------------------------------------------- 1 | \name{cpd.accs} 2 | \alias{cpd.accs} 3 | \alias{cpd.names} 4 | \alias{kegg.met} 5 | \alias{ko.ids} 6 | \alias{rn.list} 7 | \alias{gene.idtype.list} 8 | \alias{gene.idtype.bods} 9 | \alias{cpd.simtypes} 10 | \docType{data} 11 | \title{ 12 | Mapping data between compound or gene IDs and KEGG accessions 13 | } 14 | \description{ 15 | Mapping data between compound or gene IDs and KEGG accessions 16 | } 17 | \usage{ 18 | data(cpd.accs) 19 | data(cpd.names) 20 | data(kegg.met) 21 | data(ko.ids) 22 | data(rn.list) 23 | data(gene.idtype.list) 24 | data(gene.idtype.bods) 25 | data(cpd.simtypes) 26 | } 27 | \format{ 28 | cpd.accs is a data frame with 30054 observations on the following 4 variables. 29 | cpd.names is a data frame with 12314 observations on the following 5 30 | variables. 31 | kegg.met is a character matrix of 694 rows and 3 columns. 32 | ko.ids is a character vector 8511 KEGG ortholog gene IDs, as used in 33 | KEGG ortholog pathways. 34 | rn.list is a namedlist of 21 vectors. Each vector records the row numbers 35 | for one of 21 dfferent compound ID types in cpd.accs data.frame. 36 | gene.idtype.list is a character vector of 13 common gene, transcript 37 | or protein ID types. Note some ID types are species specific, for 38 | example TAIR or ORF. 39 | gene.idtype.bods is a list of character vectors ofcommon gene, transcript 40 | or protein ID types for the 19 major research species in bods. Each 41 | element corresponds to a species. 42 | cpd.simtypes is a character vector of 7 common compound related ID 43 | types, each of them has over 1000 unique entries. Hence these ID types 44 | are good for generating simulation compound data. 45 | } 46 | \source{ 47 | ftp://ftp.ebi.ac.uk/pub/databases/chebi/Flat_file_tab_delimited/ 48 | 49 | http://www.genome.jp/kegg-bin/get_htext?br08001.keg 50 | } 51 | \examples{ 52 | data(cpd.accs) 53 | data(rn.list) 54 | names(rn.list) 55 | cpd.accs[rn.list[[1]][1:4],] 56 | lapply(rn.list[1:4], function(rn) cpd.accs[rn[1:4],]) 57 | 58 | data(kegg.met) 59 | head(kegg.met) 60 | } 61 | \keyword{datasets} 62 | -------------------------------------------------------------------------------- /man/cpdidmap.Rd: -------------------------------------------------------------------------------- 1 | \name{cpdidmap} 2 | \alias{cpdidmap} 3 | \alias{cpd2kegg} 4 | \alias{cpdkegg2name} 5 | \alias{cpdname2kegg} 6 | 7 | \title{ 8 | Mapping between compound IDs and KEGG accessions 9 | } 10 | \description{ 11 | These auxillary compound ID mappers connect KEGG compound/glycan/drug 12 | accessions to compound names/synonyms and other commonly used 13 | compound-related IDs. 14 | } 15 | \usage{ 16 | cpdidmap(in.ids, in.type, out.type) 17 | cpd2kegg(in.ids, in.type) 18 | cpdkegg2name(in.ids, in.type = c("KEGG", "KEGG COMPOUND accession")[1]) 19 | cpdname2kegg(in.ids) 20 | } 21 | 22 | \arguments{ 23 | \item{in.ids}{ 24 | character, input IDs to be mapped. 25 | } 26 | \item{in.type}{ 27 | character, the input ID type, needs to be either "KEGG" (including 28 | compound, glycan and durg) or one of the 29 | compound-related ID types used in CHEMBL database. For a full list of 30 | the CHEMBL IDs, do \code{data(rn.list); names(rn.list)}. For 31 | \code{cpdkegg2name)}, default in.type = "KEGG". 32 | } 33 | \item{out.type}{ 34 | character, the output ID type, needs to be either "KEGG" (including 35 | compound/glycan/durg) or one of the 36 | compound-related ID types used in CHEMBL database. For a full list of 37 | the CHEMBL IDs, do \code{data(rn.list); names(rn.list)}. 38 | } 39 | } 40 | \details{ 41 | character, the output ID type, needs to be either "KEGG" or one of the 42 | compound-related ID types used in CHEMBL database. For a full list of 43 | the CHEMBL IDs, do \code{data(rn.list); names(rn.list)}. 44 | 45 | KEGG has its own compound ID system, including compound (glycan/durg) 46 | accessions. Therefore, all compound 47 | data need to be mapped to KEGG accessions when working with KEGG 48 | pathways. Function \code{cpd2kegg} does this mapping by calling 49 | \code{cpdname2kegg} or \code{cpdidmap}. On the other hand, we 50 | frequently want to check or show compound full names or other commonly 51 | used IDs instead of the less informative KEGG accessions when working with KEGG compound nodes, 52 | Functions \code{cpdkegg2name} and \code{cpdidmap} do this reverse mapping. 53 | These functions are 54 | written as part of the Pathview mapper module, they are equally useful 55 | for other compound ID or data mapping tasks. 56 | The use of these functions depends on a few data objects: 57 | "cpd.accs", "cpd.names", "keg.met" and "rn.list", which are included in 58 | this package. To access them, use \code{data()} function. 59 | } 60 | \value{ 61 | a 2-column character matrix recording the mapping between input IDs to 62 | the target ID type. 63 | } 64 | \references{ 65 | Luo, W. and Brouwer, C., Pathview: an R/Bioconductor package for 66 | pathway based data integration and visualization. Bioinformatics, 67 | 2013, 29(14): 1830-1831, doi: 10.1093/bioinformatics/btt285 68 | } 69 | \author{ 70 | Weijun Luo 71 | } 72 | 73 | \seealso{ 74 | \code{\link{eg2id}} and \code{\link{id2eg}} the auxillary gene ID mappers, 75 | \code{\link{mol.sum}} the auxillary molecular data mapper, 76 | \code{\link{node.map}} the node data mapper function. 77 | } 78 | \examples{ 79 | data(cpd.simtypes) 80 | #generate simulated compound data named with non-KEGG ("CAS Registry Number")IDs 81 | cpd.cas <- sim.mol.data(mol.type = "cpd", id.type = cpd.simtypes[2], 82 | nmol = 10000) 83 | #construct map between non-KEGG ID and KEGG ID ("KEGG COMPOUND accession") 84 | id.map.cas <- cpdidmap(in.ids = names(cpd.cas), in.type = cpd.simtypes[2], 85 | out.type = "KEGG COMPOUND accession") 86 | #Map molecular data onto standard KEGG IDs 87 | cpd.kc <- mol.sum(mol.data = cpd.cas, id.map = id.map.cas) 88 | #check the results 89 | head(cpd.cas) 90 | head(id.map.cas) 91 | head(cpd.kc) 92 | 93 | #map KEGG ID to compound name 94 | cpd.names=cpdkegg2name(in.ids=id.map.cas[,2]) 95 | head(cpd.names) 96 | } 97 | % Add one or more standard keywords, see file 'KEYWORDS' in the 98 | % R documentation directory. 99 | \keyword{ ~kwd1 } 100 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 101 | -------------------------------------------------------------------------------- /man/demo.data.Rd: -------------------------------------------------------------------------------- 1 | \name{demo.data} 2 | \alias{demo.paths} 3 | \alias{gse16873.d} 4 | \alias{paths.hsa} 5 | \docType{data} 6 | \title{ 7 | Data for demo purpose 8 | } 9 | \description{ 10 | demo.paths includes pathway ids and optimal plotting parameters when 11 | calling pathview. 12 | 13 | GSE16873 is a breast cancer study (Emery et al, 2009) downloaded from 14 | Gene Expression Omnibus (GEO). Dataset gse16873 is pre-processed using FARMS 15 | method and includes 6 patient cases, 16 | each with HN (histologically normal) and DCIS (ductal carcinoma in situ) 17 | RMA samples. The same dataset is also used in \code{gage} 18 | package. Dataset gse16873.d includes the gene expression changes of two 19 | pairs of DCIS vs HN samples. 20 | 21 | paths.hsa includes the full list of human pathway ID/names from KEGG. 22 | } 23 | \usage{ 24 | data(demo.paths) 25 | data(gse16873.d) 26 | data(paths.hsa) 27 | } 28 | \format{ 29 | demo.paths is a named list with ids and plotting parameters for 3 30 | pathways. For details do: 31 | 32 | \code{data(demo.paths); demo.paths} 33 | 34 | gse16873.d is a numeric matrix with over 10000 rows (genes) and 2 35 | columns (samples). For details do: 36 | \code{data(gse16873.d); str(gse16873.d)}. 37 | 38 | paths.hsa is a named vector mapping KEGG pathway ID to human pathway names. 39 | } 40 | \source{ 41 | http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE16873 42 | } 43 | \keyword{datasets} 44 | -------------------------------------------------------------------------------- /man/download.kegg.Rd: -------------------------------------------------------------------------------- 1 | \name{download.kegg} 2 | \alias{download.kegg} 3 | 4 | \title{ 5 | Download KEGG pathway graphs and associated KGML data 6 | } 7 | \description{ 8 | This is the downloader function for KEGG pathways, automatically 9 | download graph images and associated KGML data. 10 | } 11 | \usage{ 12 | download.kegg(pathway.id = "00010", species = "hsa", kegg.dir = ".", 13 | file.type=c("xml", "png")) 14 | } 15 | 16 | \arguments{ 17 | \item{pathway.id}{ 18 | character, 5-digit KEGG pathway IDs. Default pathway.id="00010". 19 | } 20 | \item{species}{ 21 | character, either the KEGG code, scientific name or the common name of 22 | the target species. When KEGG ortholog pathway is considered, 23 | species="ko". Default species="hsa", it is equivalent to use either 24 | "Homo sapiens" (scientific name) or "human" (common name). 25 | } 26 | \item{kegg.dir}{ 27 | character, the directory of KEGG pathway data file (.xml) and image file 28 | (.png). Default kegg.dir="." (current working directory). 29 | } 30 | \item{file.type}{ 31 | character, the file type(s) to be downloaded, either KEGG pathway data 32 | file (xml) or image file (png). Default include both types. 33 | } 34 | } 35 | \details{ 36 | Species can be specified as either kegg code, scientific name or the 37 | common name. Scientific name and the common name are always mapped to 38 | kegg code first. 39 | Length of species should be either 1 or the same as pathway.id, if 40 | not, the same set of pathway.id will be applied to all species. 41 | } 42 | \value{ 43 | a named character vector, either "succeed" or "failed", indicating the 44 | download status of corresponding pathways. 45 | } 46 | \references{ 47 | Luo, W. and Brouwer, C., Pathview: an R/Bioconductor package for 48 | pathway based data integration and visualization. Bioinformatics, 49 | 2013, 29(14): 1830-1831, doi: 10.1093/bioinformatics/btt285 50 | } 51 | \author{ 52 | Weijun Luo 53 | } 54 | 55 | 56 | \seealso{ 57 | \code{\link{pathview}} the main function, 58 | \code{\link{node.info}} the parser, 59 | } 60 | \examples{ 61 | data(demo.paths) 62 | sel.2paths=demo.paths$sel.paths[1:2] 63 | download.kegg(pathway.id = sel.2paths, species = "hsa") 64 | #pathway files should be downloaded into current working directory 65 | } 66 | % Add one or more standard keywords, see file 'KEYWORDS' in the 67 | % R documentation directory. 68 | \keyword{ ~kwd1 } 69 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 70 | -------------------------------------------------------------------------------- /man/eg2id.Rd: -------------------------------------------------------------------------------- 1 | \name{eg2id} 2 | \alias{eg2id} 3 | \alias{id2eg} 4 | \alias{geneannot.map} 5 | %- Also NEED an '\alias' for EACH other topic documented here. 6 | \title{ 7 | Mapping between different gene ID and annotation types 8 | } 9 | \description{ 10 | These auxillary gene ID mappers connect different gene ID or annotation 11 | types, especially they are used to map Entrez Gene ID to external gene, 12 | transcript or protein IDs or vise versa. 13 | } 14 | \usage{ 15 | eg2id(eg, category = gene.idtype.list[1:2], org = "Hs", pkg.name = NULL, 16 | ...) 17 | id2eg(ids, category = gene.idtype.list[1], org = "Hs", pkg.name = NULL, ...) 18 | geneannot.map(in.ids, in.type, out.type, org="Hs", pkg.name=NULL, 19 | unique.map=TRUE, na.rm=TRUE, keep.order=TRUE) 20 | } 21 | \arguments{ 22 | \item{eg}{ 23 | character, input Entrez Gene IDs. 24 | } 25 | \item{ids}{ 26 | character, input gene/transcript/protein IDs to be converted to Entrez 27 | Gene IDs. 28 | } 29 | \item{in.ids}{ 30 | character, input gene/transcript/protein IDs to be converted or mapped 31 | to other Gene IDs or annotation types. 32 | } 33 | \item{category}{ 34 | character, for \code{eg2id} the output ID types to map from Entrez Gene, 35 | d to be c("SYMBOL", "GENENAME"); for \code{id2eg}, the input ID type to 36 | be mapped to Entrez Gene, default to be "SYMBOL". 37 | } 38 | \item{in.type}{ 39 | character, the input gene/transcript/protein ID type to be mapped or 40 | converted to other ID/annotation types. 41 | } 42 | \item{out.type}{ 43 | character, the output gene/transcript/protein ID type to be mapped or 44 | converted to other ID/annotation types. 45 | } 46 | \item{org}{ 47 | character, the two-letter abbreviation of organism name, or KEGG species 48 | code, or the common species name, used to determine the gene annotation 49 | package. For all potential values check: \code{data(bods); 50 | bods}. Default org="Hs", and can also be "hsa" or "human" (case 51 | insensitive). Only effective when pkg.name is not NULL. 52 | } 53 | \item{pkg.name}{ 54 | character, name of the gene annotation package. This package should be 55 | one of the standard annotation packages from Bioconductor, such as 56 | "org.Hs.eg.db". Check \code{data(bods); bods} for a full list of standard 57 | annotation packages. You may also use your custom annotation package 58 | built with AnnotationDbi, the Bioconductor Annotation Database 59 | Interface. Default pkg.name=NULL, hence argument \code{org} should be 60 | specified. 61 | } 62 | \item{unique.map}{ 63 | logical, whether to combine multiple entries mapped to the same input ID 64 | as a single entry (separted by "; "). Default unique.map=TRUE. 65 | } 66 | \item{na.rm}{ 67 | logical, whether to remove the lines where input ID is not mapped (NA 68 | for mapped entries). Default na.rm=TRUE. 69 | } 70 | \item{keep.order}{ 71 | logical, whether to keep the original input order even with all unmapped 72 | input IDs. Default keep.order=TRUE. 73 | } 74 | \item{\dots}{ 75 | other arguments to be passed to geneannot.map function. 76 | } 77 | } 78 | \details{ 79 | KEGG uses Entrez Gene ID as its standard gene ID. Therefore, all gene 80 | data need to be mapped to Entrez Genes when working with KEGG 81 | pathways. Function \code{id2eg} does this mapping. On the other hand, we 82 | frequently want to check or show gene symbols or full names instead of 83 | the less informative Entrez Gene ID when working with KEGG gene nodes, 84 | Function \code{eg2id} does this reverse mapping. Both \code{id2eg} and 85 | \code{eg2id} are wrapper functions of \code{geneannot.map} function. The 86 | latter can be used to map between a range of major 87 | gene/transcript/protein IDs or annotation types, not just Entrez Gene ID. 88 | These functions are written as part of the Pathview mapper module, they 89 | are equally useful for other gene ID or data mapping tasks. 90 | The use of these functions depends on gene annotation packages like 91 | "org.Hs.eg.db", which are Bioconductor standard. IFf no such packages not available for 92 | your interesting organisms, you may build one with Bioconductor 93 | AnnotationDbi package. 94 | } 95 | \value{ 96 | a 2- or multi-column character matrix recording the mapping between input IDs to 97 | the target ID type(s). 98 | } 99 | \references{ 100 | Luo, W. and Brouwer, C., Pathview: an R/Bioconductor package for 101 | pathway based data integration and visualization. Bioinformatics, 102 | 2013, 29(14): 1830-1831, doi: 10.1093/bioinformatics/btt285 103 | } 104 | \author{ 105 | Weijun Luo 106 | } 107 | \seealso{ 108 | \code{\link{cpd2kegg}} etc the auxillary compound ID mappers, 109 | \code{\link{mol.sum}} the auxillary molecular data mapper, 110 | \code{\link{node.map}} the node data mapper function. 111 | } 112 | \examples{ 113 | data(gene.idtype.list) 114 | #generate simulated gene data named with non-KEGG/Entrez gene IDs 115 | gene.ensprot <- sim.mol.data(mol.type = "gene", id.type = gene.idtype.list[4], 116 | nmol = 50000) 117 | #construct map between non-KEGG ID and KEGG ID (Entrez gene) 118 | id.map.ensprot <- id2eg(ids = names(gene.ensprot), 119 | category = gene.idtype.list[4], org = "Hs") 120 | #Map molecular data onto Entrez Gene IDs 121 | gene.entrez <- mol.sum(mol.data = gene.ensprot, id.map = id.map.ensprot) 122 | #check the results 123 | head(gene.ensprot) 124 | head(id.map.ensprot) 125 | head(gene.entrez) 126 | 127 | #map Entrez Gene to Gene Symbol and Name 128 | eg.symbname=eg2id(eg=id.map.ensprot[,2]) 129 | #entries with more than 1 Entrez Genes are not mapped 130 | head(eg.symbname) 131 | 132 | #not run: map between other ID types for other species 133 | #ath.tair=sim.mol.data(id.type="tair", species="ath", nmol=1000) 134 | #data(gene.idtype.bods) 135 | #gid.map <-geneannot.map(in.ids=names(ath.tair)[rep(1:100,each=2)], 136 | #in.type="tair", out.type=gene.idtype.bods$ath[-1], org="At") 137 | #gid.map1 <-geneannot.map(in.ids=names(ath.tair)[rep(1:100,each=2)], 138 | #in.type="tair", out.type=gene.idtype.bods$ath[-1], org="At", 139 | #unique.map=F, keep.order=F) 140 | #str(gid.map) 141 | #str(gid.map1) 142 | } 143 | % Add one or more standard keywords, see file 'KEYWORDS' in the 144 | % R documentation directory. 145 | \keyword{ ~kwd1 } 146 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 147 | -------------------------------------------------------------------------------- /man/kegg.species.code.Rd: -------------------------------------------------------------------------------- 1 | \name{kegg.species.code} 2 | \alias{kegg.species.code} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{ 5 | Mapping species name to KEGG code 6 | } 7 | \description{ 8 | This function maps species name to KEGG code. 9 | } 10 | \usage{ 11 | kegg.species.code(species = "hsa", na.rm = FALSE, code.only = TRUE) 12 | } 13 | %- maybe also 'usage' for other objects documented here. 14 | \arguments{ 15 | \item{species}{ 16 | character, either the KEGG code, scientific name or the common name of 17 | the target species. Default species="hsa", it is equivalent to use either 18 | "Homo sapiens" (scientific name) or "human" (common name). 19 | } 20 | \item{na.rm}{ 21 | logical, should unmapped entris be removed. Default na.rm = FALSE. 22 | } 23 | \item{code.only}{ 24 | logical, whether to extract KEGG species code only or with gene ID usage 25 | info too. Default , code.only = TRUE. 26 | } 27 | } 28 | \value{ 29 | a character vector of mapped KEGG code of species. 30 | } 31 | \references{ 32 | Luo, W. and Brouwer, C., Pathview: an R/Bioconductor package for 33 | pathway based data integration and visualization. Bioinformatics, 34 | 2013, 29(14): 1830-1831, doi: 10.1093/bioinformatics/btt285 35 | } 36 | \author{ 37 | Weijun Luo 38 | } 39 | \seealso{ 40 | \code{\link{korg}} the species and KEGG code mapping data, 41 | \code{\link{cpd2kegg}} etc the auxillary compound ID mappers, 42 | \code{\link{download.kegg}} the downloader function. 43 | } 44 | \examples{ 45 | species=c("ptr", "Mus musculus", "dog", "happ") 46 | kcode=kegg.species.code(species = species, na.rm = FALSE) 47 | print(kcode) 48 | } 49 | \keyword{ ~kwd1 } 50 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 51 | -------------------------------------------------------------------------------- /man/korg.Rd: -------------------------------------------------------------------------------- 1 | \name{korg} 2 | \alias{korg} 3 | \alias{bods} 4 | \docType{data} 5 | \title{ 6 | Mapping data on KEGG species code and corresponding Bioconductor gene 7 | annotation package 8 | } 9 | \description{ 10 | 11 | Data on KEGG species, including taxonomy IDs, KEGG code, scientific name, 12 | common name, corresponding gene ID types, and gene annotation package 13 | names in Bioconductor 14 | } 15 | \usage{ 16 | data(korg) 17 | data(bods) 18 | } 19 | \format{ 20 | korg is a character matrix of ~4800 rows and 10 columns. First 5 21 | columns are KEGG and NCBI taxonomy IDs, KEGG species code, scientific 22 | name and common name, followed columns on gene ID types used for each 23 | species: entrez.gnodes ("1" or "0", whether EntrezGene is the default 24 | gene ID) and representative KEGG gene ID, NCBI or Entrez Gene ID, NCBI 25 | protein and Uniprot ID. Note korg includes 4800 KEGG species (as of 26 | 06/2017), in the meantime, an updated version of korg is now checked 27 | out from Pathview Web server each time pathview package is loaded. 28 | 29 | 30 | bods is a character matrix of 19 rows and 3 columns on the mapping 31 | between gene annotation package names in Bioconductor, common name and 32 | KEGG code of most common research species. 33 | } 34 | \source{ 35 | http://www.genome.jp/kegg-bin/get_htext?br08601.keg 36 | 37 | http://bioconductor.org/packages/release/BiocViews.html#___OrgDb 38 | } 39 | \examples{ 40 | data(korg) 41 | data(bods) 42 | head(korg) 43 | head(bods) 44 | } 45 | \keyword{datasets} 46 | -------------------------------------------------------------------------------- /man/mol.sum.Rd: -------------------------------------------------------------------------------- 1 | \name{mol.sum} 2 | \alias{mol.sum} 3 | 4 | \title{ 5 | Mapping and summation of molecular data onto standard IDs 6 | } 7 | \description{ 8 | Molecular data like gene or metabolite data are frequently annotated by 9 | various types of IDs. This function maps and summarize molecular data 10 | onto standard gene or compound IDs. It would be straightforward to 11 | integrate, analyze or visualize the "standardized" data with pathways or 12 | functional categories. 13 | } 14 | \usage{ 15 | mol.sum(mol.data, id.map, gene.annotpkg = "org.Hs.eg.db", sum.method = 16 | c("sum", "mean", "median", "max", "max.abs", "random")[1]) 17 | } 18 | %- maybe also 'usage' for other objects documented here. 19 | \arguments{ 20 | \item{mol.data}{ 21 | Either vector (single sample) or a matrix-like data (multiple 22 | sample). Vector should be numeric with molecule IDs as names or it 23 | may also be character of molecule IDs. Character vector is treated as 24 | discrete or count data. Matrix-like data structure has molecules as 25 | rows and samples as columns. Row names should be molecule IDs. Default 26 | mol.data=NULL. 27 | This argument is equivalent to gene.data or cpd.data in the pathview 28 | function. Check pahtview function for more information. 29 | } 30 | \item{id.map}{ 31 | a two-column character matrix, giving the mapping between molecular IDs 32 | used in mol.data and taget/standard molecular IDs. Then mol.data are 33 | gene data, \code{id.map} may also be a character specifying the type of 34 | IDs used in mol.data. The two-column mapping matrix will be generated 35 | automatically. 36 | } 37 | \item{gene.annotpkg}{ 38 | character, name of the gene annotation package. This package should be 39 | one of the standard annotation packages from Bioconductor, such as 40 | "org.Hs.eg.db" (default). Check \code{data(bods); bods} for a full list of standard 41 | annotation packages. You may also use your custom annotation package 42 | built with AnnotationDbi, the Bioconductor Annotation Database 43 | Interface. Only effective when mol.data are gene.data and id.map gives 44 | the ID type being used. 45 | } 46 | \item{sum.method}{ 47 | character, the method name to calculate node summary given that 48 | multiple genes or compounds are mapped to it. Poential options include 49 | "sum","mean", "median", "max", "max.abs" and "random". Default sum.method="sum". 50 | } 51 | } 52 | \details{ 53 | This function is called in pathview main function when gene.idtype or 54 | cpd.idtype is not the standard type, so that the molecular data can be 55 | mapped and summarized onto standard IDs. This is needed for further 56 | mapping to KEGG pathways. The same standard ID mapping is needed when 57 | carry out pathway or functional analysis on molecular data, which are 58 | labeled by non-standard (or alien) IDs or probe names, like in most of 59 | the microarray or metabolomics datasets. In other words, function 60 | \code{mol.sum} can be useful in all these situations. 61 | } 62 | \value{ 63 | a numeric vector or matrix. Its dimensionality is the same as the input 64 | mol.data except row names are standard molecular IDs. 65 | } 66 | \references{ 67 | Luo, W. and Brouwer, C., Pathview: an R/Bioconductor package for 68 | pathway based data integration and visualization. Bioinformatics, 69 | 2013, 29(14): 1830-1831, doi: 10.1093/bioinformatics/btt285 70 | } 71 | \author{ 72 | Weijun Luo 73 | } 74 | 75 | 76 | \seealso{ 77 | \code{\link{node.map}} the node data mapper function. 78 | \code{\link{id2eg}}, \code{\link{cpd2kegg}} etc the auxillary molecular ID mappers, 79 | \code{\link{pathview}} the main function, 80 | } 81 | \examples{ 82 | data(gene.idtype.list) 83 | #generate simulated gene data named with non-KEGG/Entrez gene IDs 84 | gene.ensprot <- sim.mol.data(mol.type = "gene", id.type = gene.idtype.list[4], 85 | nmol = 50000) 86 | #construct map between non-KEGG ID and KEGG ID (Entrez gene) 87 | id.map.ensprot <- id2eg(ids = names(gene.ensprot), 88 | category = gene.idtype.list[4], org = "Hs") 89 | #Map molecular data onto Entrez Gene IDs 90 | gene.entrez <- mol.sum(mol.data = gene.ensprot, id.map = id.map.ensprot) 91 | #check the results 92 | head(gene.ensprot) 93 | head(id.map.ensprot) 94 | head(gene.entrez) 95 | } 96 | % Add one or more standard keywords, see file 'KEYWORDS' in the 97 | % R documentation directory. 98 | \keyword{ ~kwd1 } 99 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 100 | -------------------------------------------------------------------------------- /man/node.color.Rd: -------------------------------------------------------------------------------- 1 | \name{node.color} 2 | \alias{node.color} 3 | \alias{col.key} 4 | 5 | \title{ 6 | Code molecular data as pseudo colors on the pathway graph 7 | } 8 | \description{ 9 | 10 | 11 | \code{node.color} converts the mapped molecular (gene, protein 12 | or metabolite etc) data as pseudo colors on pathway nodes. 13 | \code{col.key} draws color key(s) for mapped molecular data on the 14 | pathway graph. 15 | } 16 | \usage{ 17 | node.color(plot.data = NULL, discrete=FALSE, limit, bins, both.dirs = 18 | TRUE, low = "green", mid = "gray", high = "red", na.col = "transparent", 19 | trans.fun = NULL) 20 | col.key(discrete=FALSE, limit = 1, bins = 10, cols = NULL, both.dirs = 21 | TRUE, low = "green", mid = "gray", high = "red", graph.size, node.size, 22 | size.by.graph = TRUE, key.pos = "topright", off.sets = c(x = 0, y = 0), 23 | align = "n", cex = 1, lwd = 1) 24 | } 25 | %- maybe also 'usage' for other objects documented here. 26 | \arguments{ 27 | \item{plot.data}{ 28 | the result returned by \code{node.map} function. It is a data.frame composed 29 | of parsed KGML data and summary molecular data 30 | for each mapped node. Rows are mapped nodes, and columns are 31 | parsed or mapped node data. Check \code{node.map} for details. 32 | } 33 | \item{discrete}{ 34 | logical, whether to treat the molecular data or node summary data as discrete. d 35 | discrete=FALSE, otherwise, mol.data will be a charactor vector of 36 | molecular IDs. 37 | } 38 | \item{limit}{ 39 | a list of two numeric elements with "gene" and "cpd" as the names. This 40 | argument specifies the limit values for gene.data and cpd.data when converting 41 | them to pseudo colors. Each element of the list could be of length 1 or 42 | 2. Length 1 suggests discrete data or 1 directional (positive-valued) 43 | data, or the absolute limit for 2 directional data. Length 2 suggests 2 44 | directional data. Default limit=list(gene=0.5, cpd=1). 45 | } 46 | \item{bins}{ 47 | a list of two integer elements with "gene" and "cpd" as the names. This 48 | argument specifies the number of levels or bins for gene.data and cpd.data when converting 49 | them to pseudo colors. Default limit=list(gene=10, cpd=10). 50 | } 51 | \item{both.dirs}{ 52 | a list of two logical elements with "gene" and "cpd" as the names. This 53 | argument specifies whether gene.data and cpd.data are 1 directional or 2 54 | directional data when converting them to pseudo colors. Default limit=list(gene=TRUE, cpd=TRUE). 55 | } 56 | \item{trans.fun}{ 57 | a list of two function (not character) elements with "gene" and "cpd" as the names. This 58 | argument specifies whether and how gene.data and cpd.data are 59 | transformed. Examples are \code{log}, \code{abs} or users' own 60 | functions. Default limit=list( gene=NULL, cpd=NULL). 61 | } 62 | \item{low, mid, high}{ 63 | each is a list of two colors with "gene" and "cpd" as the names. This 64 | argument specifies the color spectra to code gene.data and 65 | cpd.data. When data are 1 directional (TRUE value in both.dirs), only 66 | mid and high are used to specify the color spectra. Default spectra (low-mid-high) 67 | "green"-"gray"-"red" and "blue"-"gray"-"yellow" are used for gene.data 68 | and cpd.data respectively. 69 | The values for 'low, mid, high' can be given as color names 70 | ('red'), plot color index (2=red), and HTML-style RGB, 71 | ("\#FF0000"=red). 72 | } 73 | \item{na.col}{ 74 | color used for NA's or missing values in gene.data and cpd.data. d 75 | na.col="transparent". 76 | } 77 | 78 | \item{cols}{ 79 | character, specifying a discrete spectrum of colors to be plotted as 80 | color key. Note this argument is usually NULL (default), otherwise, the 81 | number of discrete colors has to match \code{bins}. 82 | } 83 | \item{graph.size}{ 84 | numeric vector of length 2, i.e. the sizes (width, height) of the 85 | pathway graph panel. This is needed to determine the sizes and exact 86 | location of the color key. 87 | } 88 | \item{node.size}{ 89 | numeric vector of length 2, i.e. the sizes (width, height) of the 90 | standard gene nodes (rectangles). This is needed to determine the sizes 91 | and exact location of the color key when size.by.graph=FALSE. 92 | } 93 | \item{size.by.graph}{ 94 | logical, whether to determine the sizes and exact location of the color 95 | key with respect to the size of the whole graph panel or that of a 96 | single node. Default size.by.graph=TRUE. 97 | } 98 | \item{key.pos}{ 99 | character, controlling the position of color key(s). Potentail values 100 | are "bottomleft", "bottomright", "topleft" and "topright". d 101 | key.pos="topright". 102 | } 103 | \item{off.sets}{ 104 | numeric vector of length 2, with "x" and "y" as the names. This argument 105 | specifies the offset values in x and y axes when plotting a new color 106 | key, as to avoid overlap with existing color keys or boundaries. Note 107 | that the \code{off.sets} value is reset and returned each time 108 | \code{col.key} function is called, as for the reference of plotting the 109 | next color key. Default off.sets=c(0,0). 110 | } 111 | \item{align}{ 112 | character, controlling how the color keys are aligned when 113 | needed. Potential values are "x", aligned by x coordinates, and "y", 114 | aligned by y coordinates. Default align="x". 115 | } 116 | \item{cex}{ 117 | A numerical value giving the amount by which legend text 118 | and symbols should be scaled relative to the default 1. 119 | } 120 | \item{lwd}{ 121 | numeric, the line width, a _positive_ number, defaulting to '1'. 122 | } 123 | } 124 | \details{ 125 | \code{node.color} converts the mapped molecular data (gene.data or cpd.data) by 126 | node.map function into pseudo colors, which then can be plotted on the 127 | pathway graph. 128 | \code{col.key} is used in combination with node.color in pathview, although 129 | this function can be used independently for similar tasks. 130 | } 131 | \value{ 132 | \code{node.color} returns a vector or matrix of colors. Its 133 | dimensionality is the same as the corresponding gene.data or cpd.data. 134 | \code{col.key} plots a color key on existing pathway graph, then returns 135 | a updated version of off.sets for the reference of next color key. 136 | } 137 | \references{ 138 | Luo, W. and Brouwer, C., Pathview: an R/Bioconductor package for 139 | pathway based data integration and visualization. Bioinformatics, 140 | 2013, 29(14): 1830-1831, doi: 10.1093/bioinformatics/btt285 141 | } 142 | \author{ 143 | Weijun Luo 144 | } 145 | 146 | \seealso{ 147 | \code{\link{keggview.native}} and \code{\link{keggview.graph}} the 148 | viwer functions, 149 | \code{\link{node.map}} the node data mapper function. 150 | } 151 | \examples{ 152 | xml.file=system.file("extdata", "hsa04110.xml", package = "pathview") 153 | node.data=node.info(xml.file) 154 | names(node.data) 155 | data(gse16873.d) 156 | plot.data.gene=node.map(mol.data=gse16873.d[,1], node.data, 157 | node.types="gene") 158 | head(plot.data.gene) 159 | cols.ts.gene=node.color(plot.data.gene, limit=1, bins=10) 160 | head(cols.ts.gene) 161 | } 162 | % Add one or more standard keywords, see file 'KEYWORDS' in the 163 | % R documentation directory. 164 | \keyword{ ~kwd1 } 165 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 166 | -------------------------------------------------------------------------------- /man/node.info.Rd: -------------------------------------------------------------------------------- 1 | \name{node.info} 2 | \alias{node.info} 3 | 4 | \title{ 5 | Extract node information from KEGG pathway 6 | } 7 | \description{ 8 | The parser function, parser KGML file and/or extract node information 9 | from KEGG pathway. 10 | } 11 | \usage{ 12 | node.info(object, short.name = TRUE) 13 | } 14 | 15 | \arguments{ 16 | \item{object}{ 17 | either a character specifying the full KGML file name (with directory), 18 | or a object of "KEGGPathway" class, or a object of "graphNEL" 19 | class. The latter two are parsed results of KGML file. 20 | } 21 | \item{short.name}{ 22 | logical, if TRUE, the short labels, i.e. the first iterm separated by 23 | "," in the long labels are parsed out as node labels. Default short.name=TRUE. 24 | } 25 | } 26 | \details{ 27 | Parser function node.info extract node data from parsed KEGG 28 | pathways. KGML files are parsed using \code{parseKGML2} and 29 | \code{KEGGpathway2Graph2}. These functions from KEGGgraph package have 30 | been heavily modified for reaction parsing and conversion to 31 | edges. 32 | } 33 | \value{ 34 | a named list of 10 elements: "kegg.names", "type", "component", "size", 35 | "labels", "shape", "x", "y", "width" and "height". Each elements record 36 | the corresponding attribute for all nodes in the parsed KEGG pathway. 37 | } 38 | \references{ 39 | Luo, W. and Brouwer, C., Pathview: an R/Bioconductor package for 40 | pathway based data integration and visualization. Bioinformatics, 41 | 2013, 29(14): 1830-1831, doi: 10.1093/bioinformatics/btt285 42 | } 43 | \author{ 44 | Weijun Luo 45 | } 46 | \seealso{ 47 | \code{\link{pathview}} the main function, 48 | \code{\link{combineKEGGnodes}} and \code{\link{reaction2edge}} for 49 | special treatment of nodes or edges. 50 | } 51 | \examples{ 52 | xml.file=system.file("extdata", "hsa04110.xml", package = "pathview") 53 | node.data=node.info(xml.file) 54 | names(node.data) 55 | #or parse into a graph object, then extract node info 56 | gR1=pathview:::parseKGML2Graph2(xml.file, genesOnly=FALSE, expand=FALSE, split.group=FALSE) 57 | node.data=node.info(gR1) 58 | } 59 | % Add one or more standard keywords, see file 'KEYWORDS' in the 60 | % R documentation directory. 61 | \keyword{ ~kwd1 } 62 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 63 | -------------------------------------------------------------------------------- /man/node.map.Rd: -------------------------------------------------------------------------------- 1 | \name{node.map} 2 | \alias{node.map} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{ 5 | Map molecular data onto KEGG pathway nodes 6 | } 7 | \description{ 8 | The mapper function, mapping molecular data(gene expression, metabolite 9 | abundance etc)to nodes in KEGG pathway. 10 | } 11 | \usage{ 12 | node.map(mol.data = NULL, node.data, node.types = c("gene", "ortholog", 13 | "compound")[1], node.sum = c("sum", "mean", "median", "max", "max.abs", 14 | "random")[1], entrez.gnodes=TRUE) 15 | } 16 | \arguments{ 17 | \item{mol.data}{ 18 | Either vector (single sample) or a matrix-like data (multiple 19 | sample). Vector should be numeric with molecule IDs as names or it 20 | may also be character of molecule IDs. Character vector is treated as 21 | discrete or count data. Matrix-like data structure has molecules as 22 | rows and samples as columns. Row names should be molecule IDs. Default 23 | mol.data=NULL. 24 | This argument is equivalent to gene.data or cpd.data in the pathview 25 | function. Check pahtview function for more information. 26 | } 27 | \item{node.data}{ 28 | a named list of 10 elements, the results returned by \code{node.info}, 29 | check the function for details. 30 | } 31 | \item{node.types}{ 32 | character, sepcify the node type to map the mol.data to, either "gene", 33 | "compound", or "compound". Default node.types="gene". 34 | } 35 | \item{node.sum}{ 36 | character, the method name to calculate node summary given that 37 | multiple genes or compounds are mapped to it. Poential options include 38 | "sum","mean", "median", "max", "max.abs" and "random". Default node.sum="sum". 39 | } 40 | \item{entrez.gnodes}{ 41 | logical, whether EntrezGene (NCBI GeneID) is used as the default gene 42 | ID in the KEGG data files. This is needed because KEGG uses different 43 | types default gene ID for different species. Some most common model 44 | species use EntrezGene, but majority of others use Locus tag. Default 45 | entrez.gnodes=TRUE. 46 | } 47 | } 48 | \details{ 49 | Mapper function node.map maps user supplied molecular data to KEGG 50 | pathways. This function takes standard KEGG molecular IDs (Entrez Gene 51 | ID or KEGG Compound Accession) and map them to pathway nodes. None KEGG 52 | molecular gene IDs or Compound IDs are pre-mapped to standard KEGG IDs 53 | by calling another function \code{mol.sum}. When 54 | multiple molecules map to one node, the corresponding molecular data are 55 | summarized into a single node summary by calling function specified by 56 | \code{node.sum}. This mapped node summary data together with the parsed 57 | KGML data are then returned for further processing. 58 | Proper input data include: gene expression, protein 59 | expression, genetic association, metabolite abundance, genomic data, 60 | literature, and other data types mappable to pathways. 61 | The input mol.data may be NULL, then no molecular data are actually 62 | mapped, but all nodes of the specified node.type are considered 63 | "mappable" and their parsed KGML data returned. 64 | } 65 | \value{ 66 | A data.frame composed of parsed KGML data and summary molecular data 67 | for each mapped node. Each row is a mapped node, and columns are: 68 | \item{kegg.names}{standard KEGG IDs/Names for mapped nodes. It's 69 | Entrez Gene ID or KEGG Compound Accessions.} 70 | \item{labels}{Node labels to be used when needed 71 | } 72 | \item{type}{node type, currently 4 types are supported: 73 | "gene","enzyme", "compound" and "ortholog". 74 | } 75 | \item{x}{x coordinate in the original KEGG pathway graph. 76 | } 77 | \item{y}{y coordinate in the original KEGG pathway graph. 78 | } 79 | \item{width}{node width in the original KEGG pathway graph. 80 | } 81 | \item{height}{node height in the original KEGG pathway graph. 82 | } 83 | \item{other columns }{columns of the mapped gene/compound data 84 | } 85 | } 86 | \references{ 87 | Luo, W. and Brouwer, C., Pathview: an R/Bioconductor package for 88 | pathway based data integration and visualization. Bioinformatics, 89 | 2013, 29(14): 1830-1831, doi: 10.1093/bioinformatics/btt285 90 | } 91 | \author{ 92 | Weijun Luo 93 | } 94 | 95 | \seealso{ 96 | \code{\link{mol.sum}} the auxillary molecular data mapper, 97 | \code{\link{id2eg}}, \code{\link{cpd2kegg}} etc the auxillary molecular ID mappers, 98 | \code{\link{node.color}} the node color coder, 99 | \code{\link{pathview}} the main function, 100 | \code{\link{node.info}} the parser. 101 | } 102 | \examples{ 103 | xml.file=system.file("extdata", "hsa04110.xml", package = "pathview") 104 | node.data=node.info(xml.file) 105 | names(node.data) 106 | data(gse16873.d) 107 | plot.data.gene=node.map(mol.data=gse16873.d[,1], node.data, 108 | node.types="gene") 109 | head(plot.data.gene) 110 | } 111 | % Add one or more standard keywords, see file 'KEYWORDS' in the 112 | % R documentation directory. 113 | \keyword{ ~kwd1 } 114 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 115 | -------------------------------------------------------------------------------- /man/pathview-internal.Rd: -------------------------------------------------------------------------------- 1 | \name{pathview-internal} 2 | 3 | \alias{KEGGEdgeSubtype} 4 | \alias{parseReaction2} 5 | \alias{subtypeDisplay.kedge} 6 | \alias{KEGGpathway2Graph2} 7 | \alias{parseKGML2.R} 8 | \alias{parseKGML2Graph2} 9 | \alias{max.abs} 10 | \alias{random} 11 | \alias{kegg.legend} 12 | \alias{pathview.stamp} 13 | \alias{circles} 14 | \alias{ellipses} 15 | \alias{sliced.shapes} 16 | \alias{colorpanel2} 17 | 18 | \title{Internal functions} 19 | \description{ 20 | Not intended to be called by the users. 21 | } 22 | \details{ 23 | These functions are not to be called by the user directly. 24 | 25 | Functions parseReaction2, parseKGML2, KEGGpathway2Graph2 and 26 | parseKGML2Graph2 parse KEGG pathways 27 | from KGML files. Function subtypeDisplay.kedge and data 28 | KEGGEdgeSubtype extact and store edge subtypes and corresponding 29 | rendering information. All these functions/data were modified from the 30 | original copies in KEGGgraph package. 31 | 32 | Function kegg.legend generates legend for KEGG edge and node 33 | types. Function pathview.stamp generates pathview sisgnature on graphs. 34 | 35 | Function colorpanel2 comes from gplots package function colorpanel. 36 | 37 | Functions max.abs and random among others are method to summarize data 38 | at molecular level or node level when multiple items mapping to the 39 | same ID/node. 40 | 41 | Function circles, ellipses and sliced.shapes draw KEGG nodes in colored shapes 42 | (circles and ellipses). 43 | 44 | Functions deComp and rownorm were written by Weijun Luo, the author of 45 | gage package. 46 | } 47 | \keyword{ internal } 48 | -------------------------------------------------------------------------------- /man/pathview-package.Rd: -------------------------------------------------------------------------------- 1 | \name{pathview-package} 2 | \alias{pathview-package} 3 | \docType{package} 4 | \title{ 5 | Pathway based data integration and visualization 6 | } 7 | \description{ 8 | Pathway based data integration and visualization 9 | } 10 | \details{ 11 | \tabular{ll}{ 12 | Package: \tab pathview\cr 13 | Type: \tab Package\cr 14 | Version: \tab 1.0\cr 15 | Date: \tab 2012-12-26\cr 16 | License: \tab What license is it under?\cr 17 | LazyLoad: \tab yes\cr 18 | } 19 | ~~ An overview of how to use the package, including the most important ~~ 20 | ~~ functions ~~ 21 | } 22 | \author{ 23 | Weijun Luo 24 | 25 | Maintainer: Weijun Luo 26 | } 27 | \references{ 28 | Luo, W. and Brouwer, C., Pathview: an R/Bioconductor package for 29 | pathway based data integration and visualization. Bioinformatics, 30 | 2013, 29(14): 1830-1831, doi: 10.1093/bioinformatics/btt285 31 | } 32 | \keyword{package} 33 | -------------------------------------------------------------------------------- /man/pathview.Rd: -------------------------------------------------------------------------------- 1 | \name{pathview} 2 | \alias{pathview} 3 | \alias{keggview.native} 4 | \alias{keggview.graph} 5 | 6 | %- Also NEED an '\alias' for EACH other topic documented here. 7 | \title{ 8 | Pathway based data integration and visualization 9 | } 10 | \description{ 11 | Pathview is a tool set for pathway based data integration and 12 | visualization. It maps and renders user data on relevant pathway 13 | graphs. All users need is to supply their gene or compound data and 14 | specify the target pathway. Pathview automatically downloads the pathway 15 | graph data, parses the data file, maps user data to the pathway, and 16 | render pathway graph with the mapped data. 17 | Pathview generates both native KEGG view and Graphviz views for 18 | pathways. keggview.native and keggview.graph are the two viewer 19 | functions, and pathview is the main function providing a unified 20 | interface to downloader, parser, mapper and viewer functions. 21 | } 22 | \usage{ 23 | pathview(gene.data = NULL, cpd.data = NULL, pathway.id, 24 | species = "hsa", kegg.dir = ".", cpd.idtype = "kegg", gene.idtype = 25 | "entrez", gene.annotpkg = NULL, min.nnodes = 3, kegg.native = TRUE, 26 | map.null = TRUE, expand.node = FALSE, split.group = FALSE, map.symbol = 27 | TRUE, map.cpdname = TRUE, node.sum = "sum", discrete=list(gene=FALSE, 28 | cpd=FALSE), limit = list(gene = 1, cpd = 1), bins = list(gene = 10, cpd 29 | = 10), both.dirs = list(gene = T, cpd = T), trans.fun = list(gene = 30 | NULL, cpd = NULL), low = list(gene = "green", cpd = "blue"), mid = 31 | list(gene = "gray", cpd = "gray"), high = list(gene = "red", cpd = 32 | "yellow"), na.col = "transparent", ...) 33 | 34 | keggview.native(plot.data.gene = NULL, plot.data.cpd = NULL, 35 | cols.ts.gene = NULL, cols.ts.cpd = NULL, node.data, pathway.name, 36 | out.suffix = "pathview", kegg.dir = ".", multi.state=TRUE, match.data = 37 | TRUE, same.layer = TRUE, res = 300, cex = 0.25, discrete = 38 | list(gene=FALSE, cpd=FALSE), limit= list(gene = 1, cpd = 1), bins = 39 | list(gene = 10, cpd = 10), both.dirs =list(gene = T, cpd = T), low = 40 | list(gene = "green", cpd = "blue"), mid = list(gene = "gray", cpd = 41 | "gray"), high = list(gene = "red", cpd = "yellow"), na.col = 42 | "transparent", new.signature = TRUE, plot.col.key = TRUE, key.align = 43 | "x", key.pos = "topright", ...) 44 | 45 | keggview.graph(plot.data.gene = NULL, plot.data.cpd = NULL, cols.ts.gene 46 | = NULL, cols.ts.cpd = NULL, node.data, path.graph, pathway.name, 47 | out.suffix = "pathview", pdf.size = c(7, 7), multi.state=TRUE, 48 | same.layer = TRUE, match.data = TRUE, rankdir = c("LR", "TB")[1], 49 | is.signal = TRUE, split.group = F, afactor = 1, text.width = 15, cex = 50 | 0.5, map.cpdname = FALSE, cpd.lab.offset = 1.0, 51 | discrete=list(gene=FALSE, cpd=FALSE), limit = list(gene = 1, cpd = 1), 52 | bins = list(gene = 10, cpd = 10), both.dirs = list(gene = T, cpd = T), 53 | low = list(gene = "green", cpd = "blue"), mid = list(gene = "gray", cpd 54 | = "gray"), high = list(gene = "red", cpd = "yellow"), na.col = 55 | "transparent", new.signature = TRUE, plot.col.key = TRUE, key.align = 56 | "x", key.pos = "topright", sign.pos = "bottomright", ...) } 57 | 58 | \arguments{ 59 | \item{gene.data}{ 60 | either vector (single sample) or a matrix-like data (multiple 61 | sample). Vector should be numeric with gene IDs as names or it may 62 | also be character of gene IDs. Character vector is treated as discrete 63 | or count data. Matrix-like data structure has genes as rows and 64 | samples as columns. Row names should be gene IDs. Here gene ID is a 65 | generic concepts, including multiple types of gene, transcript and 66 | protein uniquely mappable to KEGG gene IDs. KEGG ortholog IDs are also 67 | treated as gene IDs as to handle metagenomic data. Check details for 68 | mappable ID types. Default gene.data=NULL. 69 | 70 | numeric, character, continuous 71 | } 72 | \item{cpd.data}{ 73 | the same as gene.data, excpet named with IDs mappable to KEGG 74 | compound IDs. Over 20 types of IDs included in CHEMBL database can be 75 | used here. Check details for mappable ID types. Default 76 | cpd.data=NULL. Note that gene.data and cpd.data can't be NULL 77 | simultaneously. 78 | } 79 | \item{pathway.id}{ 80 | character vector, the KEGG pathway ID(s), usually 5 digit, may also include the 3 81 | letter KEGG species code. 82 | } 83 | \item{species}{ 84 | character, either the kegg code, scientific name or the common name of 85 | the target species. This applies to both pathway and gene.data or 86 | cpd.data. When KEGG ortholog pathway is considered, 87 | species="ko". Default species="hsa", it is equivalent to use either 88 | "Homo sapiens" (scientific name) or "human" (common name). 89 | } 90 | \item{kegg.dir}{ 91 | character, the directory of KEGG pathway data file (.xml) and image file 92 | (.png). Users may supply their own data files in the same format and 93 | naming convention of KEGG's (species code + pathway id, 94 | e.g. hsa04110.xml, hsa04110.png etc) in this directory. Default 95 | kegg.dir="." (current working directory). 96 | } 97 | \item{cpd.idtype}{ 98 | character, ID type used for the cpd.data. Default cpd.idtype="kegg" (include 99 | compound, glycan and drug accessions). 100 | } 101 | \item{gene.idtype}{ 102 | character, ID type used for the gene.data, case insensitive. Default 103 | gene.idtype="entrez", i.e. Entrez Gene, which are the primary KEGG gene ID 104 | for many common model organisms. For other species, gene.idtype should 105 | be set to "KEGG" as KEGG use other types of gene IDs. For the common 106 | model organisms (to check the list, do: \code{data(bods); bods}), you 107 | may also specify other types of valid IDs. To check the ID list, do: 108 | \code{data(gene.idtype.list); gene.idtype.list}. 109 | } 110 | \item{gene.annotpkg}{ 111 | character, the name of the annotation package to use for mapping between 112 | other gene ID types including symbols and Entrez gene ID. Default gene.annotpkg=NULL. 113 | } 114 | \item{min.nnodes}{ 115 | integer, minimal number of nodes of type "gene","enzyme", "compound" or 116 | "ortholog" for a pathway to be considered. Default min.nnodes=3. 117 | } 118 | \item{kegg.native}{ 119 | logical, whether to render pathway graph as native KEGG graph (.png) or 120 | using graphviz layout engine (.pdf). Default kegg.native=TRUE. 121 | } 122 | \item{map.null}{ 123 | logical, whether to map the NULL gene.data or cpd.data to pathway. When 124 | NULL data are mapped, the gene or compound nodes in the pathway will be 125 | rendered as actually mapped nodes, except with NA-valued color. When 126 | NULL data are not mapped, the nodes are rendered as unmapped nodes. This 127 | argument mainly affects native KEGG graph view, i.e. when 128 | kegg.native=TRUE. Default map.null=TRUE. 129 | } 130 | \item{expand.node}{ 131 | logical, whether the multiple-gene nodes are expanded into single-gene 132 | nodes. Each expanded single-gene nodes inherits all edges from the 133 | original multiple-gene node. This option only affects graphviz graph view, i.e. when 134 | kegg.native=FALSE. This option is not effective for most metabolic 135 | pathways where it conflits with converting reactions to edges. Default expand.node=FLASE. 136 | } 137 | \item{split.group}{ 138 | logical, whether split node groups are split to individual nodes. Each 139 | split member nodes inherits all edges from the node group. This 140 | option only affects graphviz graph view, i.e. when 141 | kegg.native=FALSE. This option also effects most metabolic 142 | pathways even without group nodes defined orginally. For these pathways, 143 | genes involved in the same reaction are grouped automatically when 144 | converting reactions to edges unless split.group=TRUE. d 145 | split.group=FLASE. 146 | } 147 | \item{map.symbol}{ 148 | logical, whether map gene IDs to symbols for gene node labels or use the 149 | graphic name from the KGML file. This option is only effective for 150 | kegg.native=FALSE or same.layer=FALSE when kegg.native=TRUE. For 151 | same.layer=TRUE when kegg.native=TRUE, the native KEGG labels will be 152 | kept. Default map.symbol=TRUE. 153 | } 154 | \item{map.cpdname}{ 155 | logical, whether map compound IDs to formal names for compound node labels or use the 156 | graphic name from the KGML file (KEGG compound accessions). This option is only effective for 157 | kegg.native=FALSE. When kegg.native=TRUE, the native KEGG labels will be 158 | kept. Default map.cpdname=TRUE. 159 | } 160 | \item{node.sum}{ 161 | character, the method name to calculate node summary given that 162 | multiple genes or compounds are mapped to it. Poential options include 163 | "sum","mean", "median", "max", "max.abs" and "random". Default node.sum="sum". 164 | } 165 | \item{discrete}{ 166 | a list of two logical elements with "gene" and "cpd" as the names. This 167 | argument tells whether gene.data or cpd.data should be treated as 168 | discrete. Default dsicrete=list(gene=FALSE, cpd=FALSE), i.e. both data should 169 | be treated as continuous. 170 | } 171 | \item{limit}{ 172 | a list of two numeric elements with "gene" and "cpd" as the names. This 173 | argument specifies the limit values for gene.data and cpd.data when converting 174 | them to pseudo colors. Each element of the list could be of length 1 or 175 | 2. Length 1 suggests discrete data or 1 directional (positive-valued) 176 | data, or the absolute limit for 2 directional data. Length 2 suggests 2 177 | directional data. Default limit=list(gene=1, cpd=1). 178 | } 179 | \item{bins}{ 180 | a list of two integer elements with "gene" and "cpd" as the names. This 181 | argument specifies the number of levels or bins for gene.data and cpd.data when converting 182 | them to pseudo colors. Default limit=list(gene=10, cpd=10). 183 | } 184 | \item{both.dirs}{ 185 | a list of two logical elements with "gene" and "cpd" as the names. This 186 | argument specifies whether gene.data and cpd.data are 1 directional or 2 187 | directional data when converting them to pseudo colors. Default limit=list(gene=TRUE, cpd=TRUE). 188 | } 189 | \item{trans.fun}{ 190 | a list of two function (not character) elements with "gene" and "cpd" as the names. This 191 | argument specifies whether and how gene.data and cpd.data are 192 | transformed. Examples are \code{log}, \code{abs} or users' own 193 | functions. Default limit=list(gene=NULL, cpd=NULL). 194 | } 195 | \item{low, mid, high}{ 196 | each is a list of two colors with "gene" and "cpd" as the names. This 197 | argument specifies the color spectra to code gene.data and 198 | cpd.data. When data are 1 directional (TRUE value in both.dirs), only 199 | mid and high are used to specify the color spectra. Default spectra (low-mid-high) 200 | "green"-"gray"-"red" and "blue"-"gray"-"yellow" are used for gene.data 201 | and cpd.data respectively. 202 | The values for 'low, mid, high' can be given as color names 203 | ('red'), plot color index (2=red), and HTML-style RGB, 204 | ("\#FF0000"=red). 205 | } 206 | \item{na.col}{ 207 | color used for NA's or missing values in gene.data and cpd.data. d 208 | na.col="transparent". 209 | } 210 | \item{\dots}{ 211 | extra arguments passed to keggview.native or keggview.graph function. 212 | } 213 | 214 | special arguments for keggview.native or keggview.graph function. 215 | \item{plot.data.gene}{ 216 | data.frame returned by node.map function for rendering mapped gene 217 | nodes, including node name, type, positions (x, y), sizes (width, 218 | height), and mapped gene.data. This data is also used as input for 219 | pseduo-color coding through node.color function. Default plot.data.gene=NULL. 220 | } 221 | \item{plot.data.cpd}{ 222 | same as plot.data.gene function, except for mapped compound node data. d 223 | plot.data.cpd=NULL. Default plot.data.cpd=NULL. Note that plot.data.gene and 224 | plot.data.cpd can't be NULL simultaneously. 225 | } 226 | \item{cols.ts.gene}{ 227 | vector or matrix of colors returned by node.color function for rendering 228 | gene.data. Dimensionality is the same as the latter. Default cols.ts.gene=NULL. 229 | } 230 | \item{cols.ts.cpd}{ 231 | same as cols.ts.gene, except corresponding to cpd.data. d 232 | cols.ts.cpd=NULL. Note that cols.ts.gene and cols.ts.cpd 233 | plot.data.gene can't be NULL simultaneously. 234 | } 235 | \item{node.data}{ 236 | list returned by node.info function, which parse KGML file directly or 237 | indirectly, and extract the node data. 238 | } 239 | \item{pathway.name}{ 240 | character, the full KEGG pathway name in the format of 3-letter species 241 | code with 5-digit pathway id, eg "hsa04612". 242 | } 243 | \item{out.suffix}{ 244 | character, the suffix to be added after the pathway name as part of the 245 | output graph file. Sample names or column names of the gene.data or 246 | cpd.data are also added when there are multiple samples. Default out.suffix="pathview". 247 | } 248 | \item{multi.state}{ 249 | logical, whether multiple states (samples or columns) gene.data or 250 | cpd.data should be integrated and plotted in the same graph. Default 251 | match.data=TRUE. In other words, gene or compound nodes will be sliced 252 | into multiple pieces corresponding to the number of states in the 253 | data. 254 | } 255 | \item{match.data}{ 256 | logical, whether the samples of gene.data and cpd.data are 257 | paired. Default match.data=TRUE. When let sample sizes of gene.data 258 | and cpd.data be m and n, when m>n, extra columns of NA's (mapped to no 259 | color) will be added to cpd.data as to make the sample size the 260 | same. This will result in the smae number of slice in gene nodes and 261 | compound when multi.state=TRUE. 262 | } 263 | \item{same.layer}{ 264 | logical, control plotting layers: 265 | 1) if node colors be plotted in the same layer as the pathway 266 | graph when kegg.native=TRUE, 2) if edge/node type legend be plotted in 267 | the same page when kegg.native=FALSE. 268 | } 269 | \item{res}{ 270 | The nominal resolution in ppi which will be recorded in the 271 | bitmap file, if a positive integer. Also used for 'units' 272 | other than the default, and to convert points to pixels. This 273 | argument is only effective when kegg.native=TRUE. Default res=300. 274 | } 275 | \item{cex}{ 276 | A numerical value giving the amount by which plotting text 277 | and symbols should be scaled relative to the default 1. Default cex=0.25 278 | when kegg.native=TRUE, cex=0.5 when kegg.native=FALSE. 279 | } 280 | \item{new.signature}{ 281 | logical, whether pathview signature is added to the pathway 282 | graphs. Default new.signature=TRUE. 283 | } 284 | \item{plot.col.key}{ 285 | logical, whether color key is added to the pathway 286 | graphs. Default plot.col.key= TRUE. 287 | 288 | } 289 | \item{key.align}{ 290 | character, controlling how the color keys are aligned when both 291 | gene.data and cpd.data are not NULL. Potential values are "x", aligned 292 | by x coordinates, and "y", aligned by y coordinates. Default key.align="x". 293 | } 294 | \item{key.pos}{ 295 | character, controlling the position of color key(s). Potentail values 296 | are "bottomleft", "bottomright", "topleft" and "topright". d 297 | key.pos="topright". 298 | } 299 | \item{sign.pos}{ 300 | character, controlling the position of pathview signature. Only 301 | effective when kegg.native=FALSE, Signature position is fixed in place 302 | of the original KEGG signature when kegg.native=TRUE. Potentail values 303 | are "bottomleft", "bottomright", "topleft" and "topright". d 304 | sign.pos="bottomright". 305 | } 306 | 307 | \item{path.graph}{ 308 | a graph object parsed from KGML file, only effective when 309 | kegg.native=FALSE. 310 | } 311 | \item{pdf.size}{ 312 | a numeric vector of length 2, giving the width and height of the pathway 313 | graph pdf file. Note that pdf width increase by half when 314 | same.layer=TRUE to accommodate legends. Only effective when 315 | kegg.native=FALSE. Default pdf.size=c(7,7). 316 | } 317 | \item{rankdir}{ 318 | character, either "LR" (left to right) or "TB" (top to bottom), 319 | specifying the pathway graph layout direction. Only effective when 320 | kegg.native=FALSE. Default rank.dir="LR". 321 | } 322 | \item{is.signal}{ 323 | logical, if the pathway is treated as a signaling pathway, where all the 324 | unconnected nodes are dropped. This argument also affect the graph 325 | layout type, i.e. "dot" for signals or "neato" otherwise. Only effective when 326 | kegg.native=FALSE. Default is.signal=TRUE. 327 | } 328 | \item{afactor}{ 329 | numeric, node amplifying factor. This argument is for node size 330 | fine-tuning, its effect is subtler than expected. Only effective when 331 | kegg.native=FALSE. Default afctor=1. 332 | } 333 | \item{text.width}{ 334 | numeric, specifying the line width for text wrap. Only effective when 335 | kegg.native= FALSE. Default text.width=15 (characters). 336 | } 337 | \item{cpd.lab.offset}{ 338 | numeric, specifying how much compound labels should be put above the 339 | default position or node center. This argument is useful when 340 | map.cpdname=TRUE, i.e. compounds are labelled by full name, which 341 | affects the look of compound nodes and color. Only effective when 342 | kegg.native=FALSE. Default cpd.lab.offset=1.0. 343 | } 344 | } 345 | \details{ 346 | Pathview maps and renders user data on relevant pathway graphs. Pathview 347 | is a stand alone program for pathway based data integration and 348 | visualization. It also seamlessly integrates with pathway and functional 349 | analysis tools for large-scale and fully automated analysis. 350 | Pathview provides strong support for data Integration. It works with: 1) 351 | essentially all types of biological data mappable to pathways, 2) over 352 | 10 types of gene or protein IDs, and 20 types of compound or metabolite 353 | IDs, 3) pathways for over 2000 species as well as KEGG orthology, 4) 354 | varoius data attributes and formats, i.e. continuous/discrete data, 355 | matrices/vectors, single/multiple samples etc. 356 | To see mappable external gene/protein IDs do: 357 | \code{data(gene.idtype.list)}, to see mappable external compound related 358 | IDs do: \code{data(rn.list)}; names(rn.list). 359 | Pathview generates both native KEGG view and Graphviz views for 360 | pathways. Currently only KEGG pathways are implemented. Hopefully, pathways from 361 | Reactome, NCI and other databases will be supported in the future. 362 | } 363 | \value{ 364 | From viersion 1.9.3, pathview can accept either a single pathway or 365 | multiple pathway ids. The result returned by pathview function is a 366 | named list corresponding to the input pathway ids. Each element (for 367 | each pathway itself is a named list, with 2 368 | elements ("plot.data.gene", "plot.data.cpd"). Both elements 369 | are data.frame or NULL depends on the corresponding input 370 | data gene.data and cpd.data. These data.frames record the plot data 371 | for mapped gene or compound nodes: rows are mapped genes/compounds, 372 | columns are: 373 | \item{kegg.names}{standard KEGG IDs/Names for mapped nodes. It's 374 | Entrez Gene ID or KEGG Compound Accessions.} 375 | \item{labels}{Node labels to be used when needed. 376 | } 377 | \item{all.mapped}{All molecule (gene or compound) IDs mapped to this 378 | node. 379 | } 380 | \item{type}{node type, currently 4 types are supported: 381 | "gene","enzyme", "compound" and "ortholog". 382 | } 383 | \item{x}{x coordinate in the original KEGG pathway graph. 384 | } 385 | \item{y}{y coordinate in the original KEGG pathway graph. 386 | } 387 | \item{width}{node width in the original KEGG pathway graph. 388 | } 389 | \item{height}{node height in the original KEGG pathway graph. 390 | } 391 | \item{other columns }{columns of the mapped gene/compound data and 392 | corresponding pseudo-color codes for individual samples 393 | } 394 | 395 | The results returned by \code{keggview.native} and 396 | code{keggview.graph} are both a list of graph plotting 397 | parameters. These are not intended to be used externally. 398 | 399 | } 400 | \references{ 401 | Luo, W. and Brouwer, C., Pathview: an R/Bioconductor package for 402 | pathway based data integration and visualization. Bioinformatics, 403 | 2013, 29(14): 1830-1831, doi: 10.1093/bioinformatics/btt285 404 | } 405 | \author{ 406 | Weijun Luo 407 | } 408 | \seealso{ 409 | \code{\link{download.kegg}} the downloader, 410 | \code{\link{node.info}} the parser, 411 | \code{\link{node.map}} and \code{\link{node.color}} the mapper. 412 | } 413 | 414 | \examples{ 415 | #load data 416 | data(gse16873.d) 417 | data(demo.paths) 418 | 419 | #KEGG view: gene data only 420 | i <- 1 421 | pv.out <- pathview(gene.data = gse16873.d[, 1], pathway.id = 422 | demo.paths$sel.paths[i], species = "hsa", out.suffix = "gse16873", 423 | kegg.native = TRUE) 424 | str(pv.out) 425 | head(pv.out$plot.data.gene) 426 | #result PNG file in current directory 427 | 428 | #Graphviz view: gene data only 429 | pv.out <- pathview(gene.data = gse16873.d[, 1], pathway.id = 430 | demo.paths$sel.paths[i], species = "hsa", out.suffix = "gse16873", 431 | kegg.native = FALSE, sign.pos = demo.paths$spos[i]) 432 | #result PDF file in current directory 433 | 434 | #KEGG view: both gene and compound data 435 | sim.cpd.data=sim.mol.data(mol.type="cpd", nmol=3000) 436 | i <- 3 437 | print(demo.paths$sel.paths[i]) 438 | pv.out <- pathview(gene.data = gse16873.d[, 1], cpd.data = sim.cpd.data, 439 | pathway.id = demo.paths$sel.paths[i], species = "hsa", out.suffix = 440 | "gse16873.cpd", keys.align = "y", kegg.native = TRUE, key.pos = demo.paths$kpos1[i]) 441 | str(pv.out) 442 | head(pv.out$plot.data.cpd) 443 | 444 | #multiple states in one graph 445 | set.seed(10) 446 | sim.cpd.data2 = matrix(sample(sim.cpd.data, 18000, 447 | replace = TRUE), ncol = 6) 448 | pv.out <- pathview(gene.data = gse16873.d[, 1:3], 449 | cpd.data = sim.cpd.data2[, 1:2], pathway.id = demo.paths$sel.paths[i], 450 | species = "hsa", out.suffix = "gse16873.cpd.3-2s", keys.align = "y", 451 | kegg.native = TRUE, match.data = FALSE, multi.state = TRUE, same.layer = TRUE) 452 | str(pv.out) 453 | head(pv.out$plot.data.cpd) 454 | 455 | #result PNG file in current directory 456 | 457 | ##more examples of pathview usages are shown in the vignette. 458 | } 459 | % Add one or more standard keywords, see file 'KEYWORDS' in the 460 | % R documentation directory. 461 | \keyword{ ~kwd1 } 462 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 463 | -------------------------------------------------------------------------------- /man/sim.mol.data.Rd: -------------------------------------------------------------------------------- 1 | \name{sim.mol.data} 2 | \alias{sim.mol.data} 3 | 4 | \title{ 5 | Simulate molecular data for pathview experiment 6 | } 7 | \description{ 8 | The molecular data simulator generates either gene.data or cpd.data of 9 | different ID types, molecule numbers, sample sizes, either 10 | continuous or discrete. 11 | } 12 | \usage{ 13 | sim.mol.data(mol.type = c("gene", "gene.ko", "cpd")[1], id.type = NULL, 14 | species="hsa", discrete = FALSE, nmol = 1000, nexp = 1, rand.seed=100) 15 | } 16 | 17 | \arguments{ 18 | \item{mol.type}{ 19 | character of length 1, specifing the molecular type, either "gene" (including 20 | transcripts, proteins), or "gene.ko" (KEGG ortholog genes, as defined in 21 | KEGG ortholog pathways), or "cpd" (including metabolites, glycans, 22 | drugs). Note that KEGG ortholog gene are considered "gene" in function 23 | \code{pathview}. Default mol.type="gene". 24 | } 25 | \item{id.type}{ 26 | character of length 1, the molecular ID type. When mol.type="gene", 27 | proper ID types include "KEGG" and "ENTREZ" (Entrez Gene). Multiple other ID 28 | types are also valid When species is among 19 major species fully 29 | annotated in Bioconductor, e.g. "hsa" (human), "mmu" (mouse) etc, check: 30 | 31 | \code{data(gene.idtype.bods); gene.idtype.bods} for 32 | other valid ID types. When mol.type="cpd", check \code{data(cpd.simtypes); 33 | cpd.simtypes} for valid ID types. Default id.type=NULL, then "Entrez" and 34 | "KEGG COMPOUND accession" will be assumed for mol.type = "gene" or 35 | "cpd". 36 | } 37 | \item{species}{ 38 | character, either the kegg code, scientific name or the common name of 39 | the target species. This is only effective when mol.type = 40 | "gene". Setting species="ko" is equilvalent to 41 | mol.type="gene.ko". Default species="hsa", equivalent to either "Homo 42 | sapiens" (scientific name) or "human" (common name). Gene data id.type 43 | has multiple other choices for 19 major research species, for details 44 | do: \code{data(gene.idtype.bods); gene.idtype.bods}. When other 45 | species are specified, gene id.type is limited to "KEGG" and "ENTREZ". 46 | } 47 | \item{discrete}{ 48 | logical, whether to generate discrete or continuous data. d 49 | discrete=FALSE, otherwise, mol.data will be a charactor vector of 50 | molecular IDs. 51 | } 52 | \item{nmol}{ 53 | integer, the target number of different molecules. Note that the 54 | specified id.type may not have as many different IDs as nmol. In this 55 | case, all IDs of id.type are used. 56 | } 57 | \item{nexp}{ 58 | integer, the sample size or the number of columns in the result 59 | simulated data. 60 | } 61 | \item{rand.seed}{ 62 | numeric of length 1, the seed number to start the random sampling 63 | process. This argumemnt makes the simulation reproducible as long as 64 | its value keeps the same. Default rand.seed=100. 65 | } 66 | } 67 | \details{ 68 | This function is written mainly for simulation or experiment with 69 | pathview package. With the simulated molecular data, you may check 70 | whether and how pathview works for molecular data of different types, 71 | IDs, format or sample sizes etc. You may also generate both gene.data 72 | and cpd.data and check data pathway based integration with pathview. 73 | } 74 | \value{ 75 | either vector (single sample) or a matrix-like data (multiple 76 | sample), depends on the value of \code{nexp}. Vector should be numeric 77 | with molecular IDs as names or it may also be character of molecular 78 | IDs depending on the value of \code{discrete}. Matrix-like data structure has molecules as 79 | rows and samples as columns. Row names should be molecular IDs. 80 | 81 | This returned data can be used directly as gene.data or cpd.data 82 | input of \code{pathview} main function. 83 | } 84 | \references{ 85 | Luo, W. and Brouwer, C., Pathview: an R/Bioconductor package for 86 | pathway based data integration and visualization. Bioinformatics, 87 | 2013, 29(14): 1830-1831, doi: 10.1093/bioinformatics/btt285 88 | } 89 | \author{ 90 | Weijun Luo 91 | } 92 | 93 | \seealso{ 94 | \code{\link{node.map}} the node data mapper function. 95 | \code{\link{mol.sum}} the auxillary molecular data mapper, 96 | \code{\link{id2eg}}, \code{\link{cpd2kegg}} etc the auxillary molecular ID mappers, 97 | \code{\link{pathview}} the main function, 98 | } 99 | \examples{ 100 | #continuous compound data 101 | cpd.data.c=sim.mol.data(mol.type="cpd", nmol=3000) 102 | #discrete compound data 103 | cpd.data.d=sim.mol.data(mol.type="cpd", nmol=3000, discrete=TRUE) 104 | head(cpd.data.c) 105 | head(cpd.data.d) 106 | #continuous compound data named with "CAS Registry Number" 107 | cpd.cas <- sim.mol.data(mol.type = "cpd", id.type = "CAS Registry Number", nmol = 10000) 108 | 109 | #gene data with two samples 110 | gene.data.2=sim.mol.data(mol.type="gene", nmol=1000, nexp=2) 111 | head(gene.data.2) 112 | 113 | #KEGG ortholog gene data 114 | ko.data=sim.mol.data(mol.type="gene.ko", nmol=5000) 115 | } 116 | % Add one or more standard keywords, see file 'KEYWORDS' in the 117 | % R documentation directory. 118 | \keyword{ ~kwd1 } 119 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 120 | -------------------------------------------------------------------------------- /man/wordwrap.Rd: -------------------------------------------------------------------------------- 1 | \name{wordwrap} 2 | \alias{wordwrap} 3 | \alias{strfit} 4 | 5 | \title{ 6 | Wrap or break strings into lines of specified width 7 | } 8 | \description{ 9 | strfit does hard wrapping, i.e. break within long words, wordwrap is a 10 | wrapper of strfit but also provides soft wrapping option, i.e. break 11 | only between words, and keep long words intact. 12 | } 13 | \usage{ 14 | wordwrap(s, width = 20, break.word = FALSE) 15 | strfit(s, width = 20) 16 | } 17 | %- maybe also 'usage' for other objects documented here. 18 | \arguments{ 19 | \item{s}{ 20 | characcter, strings to be wrapped or broken down. 21 | } 22 | \item{width}{ 23 | integer, target line width in terms of number of characters. d 24 | width=20. 25 | } 26 | \item{break.word}{ 27 | logical, whether to break within words or only between words as to fit 28 | the line width. Default break.word=FALSE, i.e. keep words intact and only 29 | break between words. Therefore, some line may exceed the \code{width} 30 | limit. 31 | } 32 | } 33 | \details{ 34 | These functions are called as to wrap long node labels into shorter 35 | lines on pathway graphs in \code{keggview.graph} function (when 36 | keggview.native=FALSE). They are equally useful for wrapping long 37 | labels in other types of graphs or output formats. 38 | } 39 | \value{ 40 | character of the same length of \code{s} except that each element has 41 | been wrapped softly or hardly. 42 | } 43 | \references{ 44 | Luo, W. and Brouwer, C., Pathview: an R/Bioconductor package for 45 | pathway based data integration and visualization. Bioinformatics, 46 | 2013, 29(14): 1830-1831, doi: 10.1093/bioinformatics/btt285 47 | } 48 | \author{ 49 | Weijun Luo 50 | } 51 | 52 | \seealso{ 53 | \code{strwrap} in R base. 54 | } 55 | \examples{ 56 | long.str="(S)-Methylmalonate semialdehyde" 57 | wr1=wordwrap(long.str, width=15) 58 | #long word intact 59 | cat(wr1, sep="\n") 60 | wr2=strfit(long.str, width=15) 61 | #long word split 62 | cat(wr2, sep="\n") 63 | } 64 | % Add one or more standard keywords, see file 'KEYWORDS' in the 65 | % R documentation directory. 66 | \keyword{ ~kwd1 } 67 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 68 | -------------------------------------------------------------------------------- /tests/runTests.R: -------------------------------------------------------------------------------- 1 | BiocGenerics:::testPackage("pathview") 2 | -------------------------------------------------------------------------------- /vignettes/pathview.bib: -------------------------------------------------------------------------------- 1 | @Article{hochreiter:etal:2006, 2 | author = {Sepp Hochreiter and Djork-Arne Clevert and Klaus Obermayer}, 3 | title = {A new summarization method for affymetrix probe level data}, 4 | journal = {Bioinformatics}, 5 | year = {2006}, 6 | key = {}, 7 | volume = {22}, 8 | number = {}, 9 | pages = {943-949}, 10 | month = {}, 11 | note = {}, 12 | annote = {} 13 | } 14 | 15 | @Article{luo:etal:2009, 16 | author = {Luo, Weijun and Friedman, Michael and Shedden, Kerby and Hankenson, Kurt and Woolf, Peter}, 17 | title = {{GAGE}: generally applicable gene set enrichment for pathway analysis}, 18 | journal = {BMC Bioinformatics}, 19 | year = {2009}, 20 | volume = {10}, 21 | number = {1}, 22 | pages = {161}, 23 | month = {}, 24 | URL = {http://www.biomedcentral.com/1471-2105/10/161} 25 | } 26 | 27 | 28 | @Article{luo:etal:2013, 29 | author = {Luo, Weijun and Brouwer, Cory}, 30 | title = {{Pathview: an R/Bioconductor package for pathway-based data integration and visualization}}, 31 | journal = {Bioinformatics}, 32 | year = {2013}, 33 | doi = {10.1093/bioinformatics/btt285}, 34 | volume = {29}, 35 | number = {14}, 36 | pages = {1830-1831}, 37 | month = {}, 38 | URL = {http://bioinformatics.oxfordjournals.org/content/29/14/1830.full} 39 | } 40 | 41 | @Article{luo:etal:2017, 42 | author = {Luo, Weijun and Pant, Gaurav and Bhavnasi, Yeshvant K and Blanchard, Steven G and Brouwer, Cory}, 43 | title = {{Pathview Web: user friendly pathway visualization and data integration}}, 44 | journal = {Nucleic Acids Research}, 45 | year = {2017}, 46 | doi = {10.1093/nar/gkx372}, 47 | volume = {Web server issue}, 48 | number = {}, 49 | pages = {gkx372}, 50 | month = {}, 51 | URL = {https://academic.oup.com/nar/article-lookup/doi/10.1093/nar/gkx372} 52 | } 53 | 54 | @Article{kim:etal:2005, 55 | author = {Kim, SY and Volsky, DJ}, 56 | title = {{PAGE}: parametric analysis of gene set enrichment}, 57 | journal = {BMC Bioinformatics}, 58 | year = {2005}, 59 | key = {}, 60 | volume = {6}, 61 | number = {}, 62 | pages = {144}, 63 | month = {}, 64 | note = {}, 65 | annote = {} 66 | } 67 | 68 | @Article{subra:etal:2005, 69 | author = {Subramanian, Aravind and Tamayo, Pablo and Mootha, Vamsi K. and Mukherjee, Sayan and Ebert, Benjamin L. and Gillette, Michael A. and Paulovich, Amanda and Pomeroy, Scott L. and Golub, Todd R. and Lander, Eric S. and Mesirov, Jill P.}, 70 | title = {Gene set enrichment analysis: A knowledge-based approach for interpreting genome-wide expression profiles}, 71 | journal = {Proceedings of the National Academy of Sciences of the United States of America}, 72 | year = {2005}, 73 | key = {}, 74 | volume = {102}, 75 | number = {43}, 76 | pages = {15545-15550}, 77 | month = {}, 78 | note = {}, 79 | annote = {} 80 | } 81 | 82 | @Article{emery:etal:2009, 83 | author = {Lyndsey A. Emery and Anusri Tripathi and Chialin King and Maureen Kavanah and Jane Mendez and Michael D. Stone and Antonio de las Morenas and Paola Sebastiani and Carol L. Rosenberg}, 84 | title = {Early Dysregulation of Cell Adhesion and Extracellular Matrix Pathways in Breast Cancer Progression}, 85 | journal = {The American journal of pathology}, 86 | year = {2009}, 87 | key = {}, 88 | volume = {175}, 89 | number = {}, 90 | pages = {1292-1302}, 91 | month = {}, 92 | note = {}, 93 | annote = {} 94 | } 95 | 96 | @Article{zhang:etal:2009, 97 | author = {Zhang, Jitao David and Wiemann, Stefan}, 98 | title = {{KEGGgraph: a graph approach to KEGG PATHWAY in R and Bioconductor}}, 99 | volume = {25}, 100 | number = {11}, 101 | pages = {1470-1471}, 102 | year = {2009}, 103 | doi = {}, 104 | abstract ={Motivation: KEGG PATHWAY is a service of Kyoto Encyclopedia of Genes and Genomes (KEGG), constructing manually curated pathway maps that represent current knowledge on biological networks in graph models. While valuable graph tools have been implemented in R/Bioconductor, to our knowledge there is currently no software package to parse and analyze KEGG pathways with graph theory.Results: We introduce the software package KEGGgraph in R and Bioconductor, an interface between KEGG pathways and graph models as well as a collection of tools for these graphs. Superior to existing approaches, KEGGgraph captures the pathway topology and allows further analysis or dissection of pathway graphs. We demonstrate the use of the package by the case study of analyzing human pancreatic cancer pathway.Availability:KEGGgraph is freely available at the Bioconductor web site (http://www.bioconductor.org). KGML files can be downloaded from KEGG FTP site (ftp://ftp.genome.jp/pub/kegg/xml).Contact: j.zhang@dkfz-heidelberg.deSupplementary information: Supplementary data are available at Bioinformatics online.}, 105 | URL = {}, 106 | eprint = {http://bioinformatics.oxfordjournals.org/content/25/11/1470.full.pdf+html}, 107 | journal = {Bioinformatics} 108 | } -------------------------------------------------------------------------------- /vignettes/pathview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datapplab/pathview/1f17eb73930610ec9ca51545f536f79a728f6b19/vignettes/pathview.pdf --------------------------------------------------------------------------------