├── .BBSoptions ├── .Rbuildignore ├── .Rinstignore ├── .github └── workflows │ └── R-CMD-check.yaml ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS ├── R ├── addAttributeList.R ├── checkInputFile.R ├── checkInputFilePc.R ├── checkOutputFile.R ├── convertDataFrameListsToVectors.R ├── convertSifToGmt.R ├── convertSifToSpia.R ├── downloadFile.R ├── downloadPc.R ├── downloadPc2.R ├── downloadSignedPC.R ├── fetch.R ├── filterSif.R ├── fromPsimi.R ├── getCacheFiles.R ├── getErrorMessage.R ├── getNeighbors.R ├── getPc.R ├── getPcDatabaseNames.R ├── getPcRequest.R ├── getPcUrl.R ├── getShortestPathSif.R ├── getSifInteractionCategories.R ├── graphPc.R ├── integrateBiopax.R ├── loadSifInIgraph.R ├── mapValues.R ├── mergeBiopax.R ├── paxtoolsr.R ├── pcDirections.R ├── pcFormats.R ├── pcGraphQueries.R ├── processPcRequest.R ├── readBiopax.R ├── readGmt.R ├── readPcPathwaysInfo.R ├── readSbgn.R ├── readSif.R ├── readSifnx.R ├── searchListOfVectors.R ├── searchPc.R ├── splitSifnxByPathway.R ├── summarize.R ├── summarizeSif.R ├── toCytoscape.R ├── toGSEA.R ├── toLevel3.R ├── toSBGN.R ├── toSif.R ├── toSifnx.R ├── topPathways.R ├── traverse.R └── validate.R ├── README.md ├── inst ├── .gitignore ├── CITATION ├── extdata │ ├── 10523676-compact.xml │ ├── REACT_12034-3.owl │ ├── biopax3-short-metabolic-pathway.owl │ ├── biopax3_utf8_char.xml │ ├── dna_replication.owl │ ├── pathways.txt.gz │ ├── raf_map_kinase_cascade_reactome.owl │ ├── sif_color_code.txt │ ├── tca_cycle.sif │ ├── test_PathwayCommons12.kegg.hgnc.gmt │ ├── test_PathwayCommons12.netpath.hgnc.gmt │ ├── test_biopax.owl │ ├── test_edgelist.txt │ ├── test_gsea.gmt │ ├── test_gsea_1.gmt │ ├── test_sbgn.xml │ ├── test_sif.txt │ ├── test_sif_shortestPath.txt │ ├── test_sifnx.txt │ ├── test_sifnx2.txt │ ├── test_sifnx_250.txt │ └── test_sifnx_sm.txt ├── ignore │ ├── convertIds.R │ ├── convertSifnxIds.R │ ├── convertToDF.R │ ├── convertToDT.R │ ├── convertToDataFrameWithListOfVectors.R │ ├── convertToPathwayList.R │ ├── del.txt │ ├── extractIds.R │ └── readSifnxSmall.R ├── java │ └── paxtools-4.3.1.jar └── paxtoolsNotes.txt ├── java └── README.md ├── man ├── addAttributeList.Rd ├── convertDataFrameListsToVectors.Rd ├── convertSifToGmt.Rd ├── downloadFile.Rd ├── downloadPc2.Rd ├── downloadSignedPC.Rd ├── fetch.Rd ├── filterSif.Rd ├── getCacheFiles.Rd ├── getErrorMessage.Rd ├── getNeighbors.Rd ├── getPc.Rd ├── getPcDatabaseNames.Rd ├── getPcUrl.Rd ├── getShortestPathSif.Rd ├── getSifInteractionCategories.Rd ├── graphPc.Rd ├── integrateBiopax.Rd ├── loadSifInIgraph.Rd ├── mapAttributes.Rd ├── mapValues.Rd ├── mergeBiopax.Rd ├── pcDirections.Rd ├── pcFormats.Rd ├── pcGraphQueries.Rd ├── processPcRequest.Rd ├── readBiopax.Rd ├── readGmt.Rd ├── readPcPathwaysInfo.Rd ├── readSbgn.Rd ├── readSif.Rd ├── readSifnx.Rd ├── searchListOfVectors.Rd ├── searchPc.Rd ├── splitSifnxByPathway.Rd ├── summarize.Rd ├── summarizeSif.Rd ├── toCytoscape.Rd ├── toGSEA.Rd ├── toLevel3.Rd ├── toSBGN.Rd ├── toSif.Rd ├── toSifnx.Rd ├── topPathways.Rd ├── traverse.Rd └── validate.Rd ├── tests ├── runTests.old ├── testthat.R └── testthat │ ├── test_pathwayCommons.R │ └── test_paxtools.R └── vignettes ├── extract_pathways.R ├── getNeighbors.R └── using_paxtoolsr.Rmd /.BBSoptions: -------------------------------------------------------------------------------- 1 | # The reason this package is marked as unsupported on win32 is that 2 | # it requires the Java JDK. However it seems that Oracle no longer 3 | # provides the JDK for 32-bit windows: 4 | # https://www.oracle.com/java/technologies/javase-jdk14-downloads.html 5 | UnsupportedPlatforms: win32 6 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^installPackages.R 4 | ^README.md 5 | ^\.travis\.yml$ 6 | ^\.httr-oauth$ 7 | ^\.git$ 8 | ^\.github$ 9 | ^reconf.sh$ 10 | -------------------------------------------------------------------------------- /.Rinstignore: -------------------------------------------------------------------------------- 1 | framed.sty 2 | paxtoolsr-concordance.tex 3 | paxtoolsr.tex -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | schedule: 3 | - cron: '0 8 * * *' 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | name: R-CMD-check 12 | 13 | jobs: 14 | check: 15 | runs-on: ${{ matrix.config.os }} 16 | 17 | name: ${{ matrix.config.os }}, R ${{ matrix.config.r }}, Java ${{ matrix.config.java }} 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | config: 23 | - {os: windows-latest, r: 'release', java: 14} 24 | - {os: windows-latest, r: 'devel', java: 14} 25 | - {os: macOS-latest, r: 'release', java: 14} 26 | - {os: ubuntu-20.04, r: 'release', java: 14} 27 | - {os: ubuntu-20.04, r: 'devel', java: 14} 28 | 29 | steps: 30 | - uses: actions/checkout@v2 31 | 32 | - uses: r-lib/actions/setup-r@v1 33 | with: 34 | r-version: ${{ matrix.config.r }} 35 | 36 | - name: Install System Dependencies (Linux) 37 | if: runner.os == 'Linux' 38 | run: | 39 | sudo apt-get update && sudo apt-get -yqq install libcurl4-openssl-dev 40 | 41 | - uses: r-lib/actions/setup-pandoc@v1 42 | 43 | - uses: actions/setup-java@v2 44 | with: 45 | distribution: 'adopt' 46 | java-version: ${{ matrix.config.java }} 47 | 48 | - name: Query Dependencies 49 | run: | 50 | install.packages('remotes') 51 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 52 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 53 | shell: Rscript {0} 54 | 55 | # - name: Restore R Package Cache 56 | # uses: actions/cache@v2 57 | # with: 58 | # path: ${{ env.R_LIBS_USER }} 59 | # key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 60 | # restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 61 | 62 | - name: Show Java Info (Linux) 63 | run: "bash -c 'java -version && which java && echo $PATH && echo $JAVA_HOME'" 64 | 65 | - name: Setup R Java Support 66 | if: runner.os != 'Windows' 67 | run: "echo export PATH=$PATH > reconf.sh; echo export JAVA_HOME=$JAVA_HOME >> reconf.sh; echo R CMD javareconf >> reconf.sh; sudo bash reconf.sh" 68 | 69 | - name: Install Dependencies 70 | run: | 71 | remotes::install_deps(dependencies = TRUE) 72 | remotes::install_cran("rcmdcheck") 73 | shell: Rscript {0} 74 | 75 | - name: Session Info 76 | run: | 77 | options(width = 100) 78 | pkgs <- installed.packages()[, "Package"] 79 | sessioninfo::session_info(pkgs, include_base = TRUE) 80 | shell: Rscript {0} 81 | 82 | - name: Run Check (Linux/macOS) 83 | env: 84 | _R_CHECK_CRAN_INCOMING_REMOTE_: false 85 | if: runner.os != 'Windows' 86 | run: | 87 | options(crayon.enabled = TRUE) 88 | rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") 89 | shell: Rscript {0} 90 | 91 | - name: Run Check (Windows) 92 | env: 93 | _R_CHECK_CRAN_INCOMING_REMOTE_: false 94 | if: runner.os == 'Windows' 95 | run: | 96 | options(crayon.enabled = TRUE) 97 | rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran", "--no-multiarch"), error_on = "warning", check_dir = "check") 98 | shell: Rscript {0} 99 | 100 | # - name: R CMD check (Windows) 101 | # if: runner.os == 'Windows' 102 | # run: "bash -c 'R CMD check --no-multiarch --no-manual .'" 103 | # 104 | # - name: R CMD check (Linux) 105 | # if: runner.os == 'Linux' 106 | # run: "bash -c 'xvfb-run R CMD check --no-manual .'" 107 | # 108 | # - name: R CMD check (macOS) 109 | # if: runner.os == 'macOS' 110 | # run: "bash -c 'NOAWT=1 R CMD check --no-manual .'" 111 | 112 | - name: Show Install Log 113 | run: "bash -c 'if [ -e paxtoolsr.Rcheck/00install.out ]; then cat paxtoolsr.Rcheck/00install.out; fi'" 114 | 115 | - name: Upload Check Results 116 | if: failure() 117 | uses: actions/upload-artifact@master 118 | with: 119 | name: ${{ runner.config.os }}-r${{ matrix.config.r }}-j${{ matrix.config.java }}-results 120 | path: paxtoolsr.Rcheck 121 | 122 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .DS_Store 5 | .httr-oauth 6 | paxtoolsr.Rproj 7 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: paxtoolsr 2 | Type: Package 3 | Title: Access Pathways from Multiple Databases Through BioPAX and Pathway Commons 4 | Version: 1.27.4 5 | Date: 2023-01-10 6 | Authors@R: person("Augustin", "Luna", email = "lunaa@cbio.mskcc.org", 7 | role = c("aut", "cre")) 8 | Imports: 9 | utils, 10 | httr, 11 | igraph, 12 | plyr, 13 | rjson, 14 | R.utils, 15 | jsonlite, 16 | readr, 17 | rappdirs 18 | Depends: 19 | R (>= 3.2), 20 | rJava (>= 0.9-8), 21 | methods, 22 | XML 23 | Suggests: 24 | testthat, 25 | knitr, 26 | BiocStyle, 27 | formatR, 28 | rmarkdown, 29 | RColorBrewer, 30 | foreach, 31 | doSNOW, 32 | parallel, 33 | org.Hs.eg.db, 34 | clusterProfiler 35 | SystemRequirements: Java (>= 1.6) 36 | License: LGPL-3 37 | Description: The package provides a set of R functions for interacting with 38 | BioPAX OWL files using Paxtools and the querying Pathway Commons (PC) molecular 39 | interaction database. Pathway Commons is a project by the Memorial 40 | Sloan-Kettering Cancer Center (MSKCC), Dana-Farber Cancer Institute (DFCI), 41 | and the University of Toronto. Pathway Commons databases include: BIND, 42 | BioGRID, CORUM, CTD, DIP, DrugBank, HPRD, HumanCyc, IntAct, KEGG, 43 | MirTarBase, Panther, PhosphoSitePlus, Reactome, RECON, TRANSFAC. 44 | VignetteBuilder: knitr 45 | LazyData: false 46 | biocViews: GeneSetEnrichment, GraphAndNetwork, Pathways, Software, 47 | SystemsBiology, NetworkEnrichment, Network, Reactome, KEGG 48 | URL: https://github.com/BioPAX/paxtoolsr 49 | Encoding: UTF-8 50 | RoxygenNote: 7.1.1 51 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(addAttributeList) 4 | export(convertDataFrameListsToVectors) 5 | export(convertSifToGmt) 6 | export(downloadFile) 7 | export(downloadPc2) 8 | export(downloadSignedPC) 9 | export(fetch) 10 | export(filterSif) 11 | export(getCacheFiles) 12 | export(getErrorMessage) 13 | export(getNeighbors) 14 | export(getPc) 15 | export(getPcDatabaseNames) 16 | export(getPcUrl) 17 | export(getShortestPathSif) 18 | export(getSifInteractionCategories) 19 | export(graphPc) 20 | export(integrateBiopax) 21 | export(loadSifInIgraph) 22 | export(mapAttributes) 23 | export(mapValues) 24 | export(mergeBiopax) 25 | export(pcDirections) 26 | export(pcFormats) 27 | export(pcGraphQueries) 28 | export(processPcRequest) 29 | export(readBiopax) 30 | export(readGmt) 31 | export(readPcPathwaysInfo) 32 | export(readSbgn) 33 | export(readSif) 34 | export(readSifnx) 35 | export(searchListOfVectors) 36 | export(searchPc) 37 | export(splitSifnxByPathway) 38 | export(summarize) 39 | export(summarizeSif) 40 | export(toCytoscape) 41 | export(toGSEA) 42 | export(toLevel3) 43 | export(toSBGN) 44 | export(toSif) 45 | export(toSifnx) 46 | export(topPathways) 47 | export(traverse) 48 | export(validate) 49 | import(XML) 50 | import(rJava) 51 | importFrom(R.utils,gunzip) 52 | importFrom(XML,xmlTreeParse) 53 | importFrom(httr,GET) 54 | importFrom(httr,HEAD) 55 | importFrom(httr,accept) 56 | importFrom(httr,add_headers) 57 | importFrom(httr,build_url) 58 | importFrom(httr,content) 59 | importFrom(httr,http_date) 60 | importFrom(httr,http_status) 61 | importFrom(httr,parse_url) 62 | importFrom(httr,progress) 63 | importFrom(httr,url_success) 64 | importFrom(httr,write_disk) 65 | importFrom(igraph,"E<-") 66 | importFrom(igraph,"V<-") 67 | importFrom(igraph,E) 68 | importFrom(igraph,V) 69 | importFrom(igraph,all_shortest_paths) 70 | importFrom(igraph,are.connected) 71 | importFrom(igraph,ecount) 72 | importFrom(igraph,edge.attributes) 73 | importFrom(igraph,ends) 74 | importFrom(igraph,get.edgelist) 75 | importFrom(igraph,graph.attributes) 76 | importFrom(igraph,graph.edgelist) 77 | importFrom(igraph,list.edge.attributes) 78 | importFrom(igraph,list.vertex.attributes) 79 | importFrom(igraph,set_edge_attr) 80 | importFrom(igraph,set_vertex_attr) 81 | importFrom(igraph,vertex.attributes) 82 | importFrom(jsonlite,toJSON) 83 | importFrom(methods,is) 84 | importFrom(plyr,ldply) 85 | importFrom(rappdirs,user_cache_dir) 86 | importFrom(readr,cols) 87 | importFrom(readr,read_tsv) 88 | importFrom(rjson,fromJSON) 89 | importFrom(utils,URLencode) 90 | importFrom(utils,read.table) 91 | importFrom(utils,select.list) 92 | importFrom(utils,setTxtProgressBar) 93 | importFrom(utils,txtProgressBar) 94 | importFrom(utils,write.table) 95 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | # Version 1.27.4 2 | Increase file limit size 3 | 4 | # Version 1.1.11 5 | Documentation update 6 | 7 | # Version 1.1.10 8 | Changed vignette to use quicker examples 9 | Changed DESCRIPTION 10 | 11 | # Version 1.1.9 12 | Documentation update 13 | 14 | # Version 1.1.8 15 | Removed PC dependent tests for paxtools functions 16 | Added skip_on_bioc() 17 | Updated vignette to use glycolysis pathway that can be converted to gene set 18 | Added fill=TRUE to read.table commands in paxtools.R 19 | 20 | # Version 1.1.6 21 | Added RStudio files and README.md 22 | 23 | # Version 1.1.5 24 | Updated to paxtools 4.3 25 | Added better Java error messages 26 | Added the ability to switch PC versions 27 | Updated to PC version 6 28 | 29 | # Version 1.1.4 30 | Fixed topPathways() bug 31 | Fixed fetch() example/test 32 | Changed title 33 | 34 | # Version 1.1.3 35 | Added CITATION file 36 | 37 | # Version 1.1.2 38 | Update vignette to use BiocStyle 39 | 40 | # Version 1.1.1 41 | Transitioned to Pathway Commons Version 5 (PC2 v5) 42 | Changed downloadPc to download from PC2 v5 43 | Changed code to validate Pathway Commons webservice requests 44 | Added table of contents to vignette 45 | Added description for traverse() in vignette 46 | Changed R dependency to 3.1.1 to match Bioconductor 3 47 | Added function to download a single file extended SIF, splitSifnx 48 | Added the ability to download GMT in downloadPc 49 | Added function to read GMT files 50 | Added tests for new functions 51 | 52 | # Version 1.0.0 53 | Initial release 54 | -------------------------------------------------------------------------------- /R/addAttributeList.R: -------------------------------------------------------------------------------- 1 | #' Add attributes using a list of vectors to an igraph object 2 | #' 3 | #' @param g an igraph object 4 | #' @param attr the name of the attribute 5 | #' @param l the list of vectors 6 | #' 7 | #' @return the modified igraph object 8 | #' 9 | #' @examples 10 | #' library(igraph) 11 | #' g <- barabasi.game(20) 12 | #' g <- set_vertex_attr(g, "name", value=LETTERS[1:20]) 13 | #' g <- addAttributeList(g, "isProt", 14 | #' list(A=TRUE, B=FALSE, C=TRUE, D=TRUE, E=FALSE)) 15 | #' 16 | #' @concept paxtoolsr 17 | #' @export 18 | #' 19 | #' @importFrom igraph V V<- set_vertex_attr 20 | addAttributeList <- function(g, attr, l) { 21 | for(i in seq_along(l)) { 22 | #cat("i", i, "\n") 23 | vertex <- V(g)$name[V(g)$name %in% names(l)[i]] 24 | 25 | # Check if vertex exists 26 | if(length(vertex) == 1) { 27 | idx <- match(vertex, V(g)$name) 28 | #cat("attr", attr, "idx", idx, " v ", vertex, "\n") 29 | 30 | g <- set_vertex_attr(g, attr, idx, l[[vertex]]) 31 | } 32 | } 33 | 34 | return(g) 35 | } 36 | -------------------------------------------------------------------------------- /R/checkInputFile.R: -------------------------------------------------------------------------------- 1 | #' Utility method; create temporary file if necessary 2 | #' 3 | #' @param file a string or XMLInternalDocument 4 | #' @return location of file 5 | #' 6 | #' @concept paxtoolsr 7 | #' @noRd 8 | checkInputFile <- function(file) { 9 | if(typeof(file) == "externalptr") { 10 | tmp <- tempfile() 11 | saveXML(file, tmp) 12 | 13 | file <- tmp 14 | } 15 | 16 | return(file) 17 | } 18 | -------------------------------------------------------------------------------- /R/checkInputFilePc.R: -------------------------------------------------------------------------------- 1 | #' Check input files from PC 2 | #' 3 | #' @param inputFile Path to file 4 | #' 5 | #' @details Program will terminate if checks are invalid 6 | #' 7 | #' @return No value is returned 8 | #' 9 | #' @concept paxtoolsr 10 | #' @noRd 11 | checkInputFilePc <- function(inputFile) { 12 | if(!file.exists(inputFile)) { 13 | stop("ERROR: inputFile not file.") 14 | } 15 | 16 | if(file.size(inputFile) > 2e9) { 17 | stop("ERROR: A maximum file size limit of 1GB has been placed on files being read. Reading larger files with this function may be very slow. Please contact package author for workarounds.") 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /R/checkOutputFile.R: -------------------------------------------------------------------------------- 1 | #' Utility method; create temporary file if necessary 2 | #' 3 | #' @param file a string 4 | #' @return location of file 5 | #' 6 | #' @concept paxtoolsr 7 | #' @noRd 8 | checkOutputFile <- function(file) { 9 | if(is.null(file)) { 10 | file <- tempfile() 11 | } 12 | 13 | return(file) 14 | } 15 | -------------------------------------------------------------------------------- /R/convertDataFrameListsToVectors.R: -------------------------------------------------------------------------------- 1 | #' Convert columns with list in data.frame to vector 2 | #' 3 | #' @param df a data.frame 4 | #' @param delimiter a delimiter to concatenate (DEFAULT: ;) 5 | #' 6 | #' @return a data.frame without list columns 7 | #' 8 | #' @note Lists as columns are useful programmatically, 9 | #' but cause issue in writing output to text-based files 10 | #' 11 | #' @examples 12 | #' df <- data.frame(id = 1:2, name = c("Jon", "Mark"), 13 | #' children = I(list(c("Mary", "James"), c("Greta", "Sally")))) 14 | #' df <- convertDataFrameListsToVectors(df) 15 | #' 16 | #' @concept paxtoolsr 17 | #' @export 18 | convertDataFrameListsToVectors <- function(df, delimiter=";") { 19 | listCols <- sapply(1:ncol(df), function(x, y) { 20 | is.list(y[,x]) 21 | }, df) 22 | listCols <- which(listCols) 23 | 24 | for(i in listCols) { 25 | tmp <- sapply(df[,i], function(x) { 26 | paste(x, collapse = delimiter) 27 | }) 28 | df[,i] <- tmp 29 | } 30 | 31 | return(df) 32 | } 33 | -------------------------------------------------------------------------------- /R/convertSifToGmt.R: -------------------------------------------------------------------------------- 1 | #' Convert SIF to GMT 2 | #' 3 | #' @param sif a data.frame representing a SIF (Simple Interaction Format) 4 | #' @param name the name of the gene set 5 | #' @param returnSmallMolecules a boolean whether to return genes 6 | #' or small molecules in the gene set 7 | #' 8 | #' @return a list with one entry being a vector 9 | #' 10 | #' @examples 11 | #' sif <- readSif(system.file("extdata", "test_sif.txt", package="paxtoolsr")) 12 | #' gmt <- convertSifToGmt(sif) 13 | #' 14 | #' @concept paxtoolsr 15 | #' @export 16 | convertSifToGmt <- function(sif, name="gmt", returnSmallMolecules=FALSE) { 17 | ids <- unique(c(sif$PARTICIPANT_A, sif$PARTICIPANT_B)) 18 | 19 | if(returnSmallMolecules) { 20 | idx <- grepl("^CHEBI", ids) 21 | } else { 22 | idx <- !grepl("^CHEBI", ids) 23 | } 24 | 25 | results <- list() 26 | results[[name]] <- sort(ids[idx]) 27 | 28 | return(results) 29 | } 30 | -------------------------------------------------------------------------------- /R/convertSifToSpia.R: -------------------------------------------------------------------------------- 1 | #' Convert SIF Interaction Types to SPIA types 2 | #' 3 | #' @param edges a data.frame of interactions; must have INTERACTION_TYPE column 4 | #' 5 | #' @return the edges data.frame with the converted interaction types 6 | #' 7 | #' @concept paxtoolsr 8 | #' @noRd 9 | convertSifToSpia <- function(edges) { 10 | sifType <- c("controls-state-change-of", 11 | "controls-expression-of", 12 | "controls-degradation-of", 13 | "controls-transport-of", 14 | "catalysis-precedes", 15 | "in-complex-with") 16 | 17 | spiaType <- c("process(phosphorylation)", 18 | "process(expression)", 19 | "process(indirect effect)", 20 | "process(indirect effect)", 21 | "process(indirect effect)", 22 | "process(binding/association)") 23 | 24 | tmp <- cbind(sifType, spiaType) 25 | spiaConv <- as.data.frame(tmp, stringsAsFactors=FALSE) 26 | 27 | for(i in 1:nrow(spiaConv)) { 28 | idx <- edges[,"INTERACTION_TYPE"] == spiaConv[i, "sifType"] 29 | edges[idx, "INTERACTION_TYPE"] <- spiaConv[i, "spiaType"] 30 | } 31 | 32 | return(edges) 33 | } 34 | -------------------------------------------------------------------------------- /R/downloadFile.R: -------------------------------------------------------------------------------- 1 | #' Check Cache and Download File 2 | #' 3 | #' @param baseUrl a string, entire download URL except filename 4 | #' @param fileName a string, the filename of file to be downloaded 5 | #' @param destDir a string, the path where a file should be saved 6 | #' @param cacheEnv a string, environment variable pointing to specific cache 7 | #' @param verbose show debugging information 8 | #' 9 | #' @return a boolean TRUE if the file was downloaded or already exists, FALSE otherwise 10 | #' 11 | #' @details 12 | #' Description of file formats: http://www.pathwaycommons.org/pc2/formats 13 | #' 14 | #' @examples 15 | #' downloadFile("http://google.com/", fileName="index.html", destDir=tempdir()) 16 | #' 17 | #' @concept paxtoolsr 18 | #' @seealso \code{\link{readSif}, \link{readBiopax}, \link{readSbgn}, \link{readSifnx}, \link{readGmt}} 19 | #' @export 20 | #' 21 | #' @importFrom httr HEAD GET http_status write_disk progress add_headers http_date 22 | #' @importFrom utils read.table write.table 23 | downloadFile <- function(baseUrl, fileName, destDir=NULL, cacheEnv="PAXTOOLSR_CACHE", verbose=FALSE) { 24 | url <- URLencode(paste0(baseUrl, fileName)) 25 | fileIdx <- NULL 26 | 27 | if(!is.null(destDir)) { 28 | filePath <- file.path(destDir, fileName) 29 | } else { 30 | cacheMapPath <- file.path(Sys.getenv(cacheEnv), "cacheMap.txt") 31 | cacheMap <- read.table(cacheMapPath, sep="\t", header=TRUE, stringsAsFactors=FALSE) 32 | filePath <- file.path(Sys.getenv(cacheEnv), fileName) 33 | 34 | fileIdx <- which(cacheMap[,"fileName"] == fileName) 35 | } 36 | 37 | if(verbose) { 38 | cat("filePath: ", filePath, "\n") 39 | cat("url: ", url, "\n") 40 | } 41 | 42 | if(length(fileIdx) == 0) { 43 | headResp <- HEAD(url, add_headers("If-Modified-Since"="")) 44 | } else { 45 | headResp <- HEAD(url, add_headers("If-Modified-Since"=cacheMap[fileIdx,"retrievedDate"])) 46 | } 47 | 48 | httpStatus <- http_status(headResp) 49 | 50 | if(grepl("success", httpStatus$category, ignore.case=TRUE)) { 51 | getResp <- GET(url=url, write_disk(filePath, overwrite=TRUE), progress()) 52 | 53 | if(is.null(destDir)) { 54 | # Current date 55 | retrievedDate <- http_date(as.POSIXlt(Sys.time(), "GMT")) 56 | 57 | if(length(fileIdx) == 0) { 58 | cacheMap <- rbind(cacheMap, data.frame(fileName=fileName, retrievedDate=retrievedDate, url=url)) 59 | } else { 60 | cacheMap[fileIdx,"fileName"] <- fileName 61 | cacheMap[fileIdx,"retrievedDate"] <- retrievedDate 62 | cacheMap[fileIdx,"url"] <- url 63 | } 64 | 65 | write.table(cacheMap, file=cacheMapPath, quote=FALSE, sep="\t", row.names=FALSE) 66 | } 67 | } 68 | 69 | if(length(fileIdx) != 0 || grepl("success", httpStatus$category, ignore.case=TRUE)) { 70 | return(TRUE) 71 | } else { 72 | if(verbose) { 73 | cat("fileIdx: ", fileIdx, "\n") 74 | cat("httpStatus$category: ", httpStatus$category, "\n") 75 | } 76 | 77 | return(FALSE) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /R/downloadPc.R: -------------------------------------------------------------------------------- 1 | #' #' Download Pathway Commons data (DEPRECATED) 2 | #' #' 3 | #' #' Download Pathway Commons data in various formats 4 | #' #' 5 | #' #' @param format a string describing the format to be downloaded; 6 | #' #' currently, only the Extended Simple Interaction Format (SIF) "SIFNX" and 7 | #' #' Gene Set Enrichment Analysis "GMT" formats for the entire Pathway Commons 8 | #' #' database are supported. 9 | #' #' @param verbose a boolean debugging information 10 | #' #' @return a named list with named pathways, each entry contains a vector of gene 11 | #' #' symbols (for GMT) or a list with two data.frames (for SIFNX): 12 | #' #' \itemize{ 13 | #' #' \item edges Network edges with the following columns: 14 | #' #' PARTICIPANT_A: Edge (interaction) participant, 15 | #' #' INTERACTION_TYPE: Interaction type (see details), 16 | #' #' PARTICIPANT_B: Edge (interaction) participant, 17 | #' #' INTERACTION_DATA_SOURCE: Semi-colon delimited list of database sources 18 | #' #' of the interaction, 19 | #' #' INTERACTION_PUBMED_ID: Semi-colon delimited list of NCBI Pubmed IDs 20 | #' #' that give evidence for the interaction 21 | #' #' \item nodes Node information: 22 | #' #' PARTICIPANT: Interaction participant, 23 | #' #' PARTICIPANT_TYPE: BioPAX class (see details), 24 | #' #' PARTICIPANT_NAME: Display name for the participant, 25 | #' #' UNIFICATION_XREF: A UnificationXref defines a reference to an entity 26 | #' #' in an external resource that has the same biological identity as 27 | #' #' the referring entity, 28 | #' #' RELATIONSHIP_XREF: An RelationshipXref defines a reference to an entity 29 | #' #' in an external resource that does not have the same biological 30 | #' #' identity as the referring entity. 31 | #' #' } 32 | #' #' 33 | #' #' @details 34 | #' #' 35 | #' #' Use \code{\link{downloadPc2}} 36 | #' #' 37 | #' #' @examples 38 | #' #' \dontrun{ 39 | #' #' downloadPc(format="GMT") 40 | #' #' } 41 | #' #' 42 | #' #' @concept paxtoolsr 43 | #' #' @seealso \code{\link{downloadPc2}} 44 | #' #' @export 45 | #' downloadPc <- function(format=c("SIFNX", "GMT"), verbose=FALSE) { 46 | #' if(format == "SIFNX") { 47 | #' orgFile <- tempfile("sifnx", fileext=".gz") 48 | #' 49 | #' url <- paste0(getPcUrl(), "downloads/Pathway%20Commons.", getOption("pc.version"), 50 | #' ".All.EXTENDED_BINARY_SIF.hgnc.sif.gz ") 51 | #' 52 | #' if(verbose) { 53 | #' cat("URL: ", url, "\n") 54 | #' } 55 | #' 56 | #' download.file(url, orgFile) 57 | #' 58 | #' #con <- gzcon(file(orgFile, "r")) 59 | #' results <- readSifnx(orgFile) 60 | #' } 61 | #' 62 | #' if(format == "BIOPAX") { 63 | #' } 64 | #' 65 | #' if(format == "GMT") { 66 | #' file <- tempfile("gmt", fileext = ".gz") 67 | #' url <- paste0(getPcUrl(), "downloads/Pathway%20Commons.", getOption("pc.version"), 68 | #' ".All.GSEA.hgnc.gmt.gz") 69 | #' 70 | #' if(verbose) { 71 | #' cat("URL: ", url, "\n") 72 | #' } 73 | #' 74 | #' download.file(url, file) 75 | #' 76 | #' results <- readGmt(gzfile(file)) 77 | #' } 78 | #' 79 | #' return(results) 80 | #' } 81 | -------------------------------------------------------------------------------- /R/downloadPc2.R: -------------------------------------------------------------------------------- 1 | #' Download Pathway Commons files (uses menu and cache) 2 | #' 3 | #' @param selectedFileName a string, a name of a file to skip the the interactive selection 4 | #' @param destDir a string, the destination directory for the file to be 5 | #' downloaded (Default: NULL). If NULL, then file will be downloaded to cache 6 | #' directory at Sys.getenv("PAXTOOLSR_CACHE") 7 | #' @param returnNames return a vector of names matching the given regular expression 8 | #' @param version a version number for a previous version of Pathway Commons data; 9 | #' versions 3 and above. Parameter set as version="8". Available versions "http://www.pathwaycommons.org/archives/PC2/" 10 | #' @param verbose a flag to display debugging information (Default: FALSE) 11 | #' @param ... additional parameters to send to corresponding read* methods 12 | #' 13 | #' @return an R object using one of the read* methods provided in this package 14 | #' corresponding to the file downloaded 15 | #' 16 | #' @examples 17 | #' \dontrun{ 18 | #' downloadPc2(version="8") 19 | #' downloadPc2(version="8", returnNames="ext.*sif") 20 | #' downloadPc2("PathwayCommons.8.inoh.GSEA.hgnc.gmt.gz", version="8", verbose=TRUE) 21 | #' } 22 | #' 23 | #' @aliases downloadPc 24 | #' @concept paxtoolsr 25 | #' @export 26 | #' 27 | #' @importFrom R.utils gunzip 28 | #' @importFrom utils select.list 29 | downloadPc2 <- function(selectedFileName=NULL, destDir=NULL, returnNames=NULL, version, verbose=FALSE, ...) { 30 | if(is.null(destDir)) { 31 | stopifnot(Sys.getenv("PAXTOOLSR_CACHE") != "") 32 | destDir <- Sys.getenv("PAXTOOLSR_CACHE") 33 | } 34 | 35 | selectedFilePath <- file.path(destDir, selectedFileName) 36 | 37 | # Download file if it does not exist 38 | if(identical(selectedFilePath, character(0)) || !file.exists(selectedFilePath) || !is.null(returnNames)) { 39 | if(!is.null(version)) { 40 | baseUrl <- Sys.getenv("PC_ARCHIVE_URL") 41 | 42 | doc <- htmlParse(baseUrl) 43 | links <- xpathSApply(doc, "//a/@href") 44 | 45 | if(version == "current") { 46 | idx <- grepl(paste0("^current"), links) 47 | } else { 48 | idx <- grepl(paste0("^v", version), links) 49 | } 50 | 51 | downloadsSubDir <- unname(links[idx]) 52 | } else { 53 | baseUrl <- Sys.getenv("PC_URL") 54 | #baseUrl <- "http://www.pathwaycommons.org/pc2/" 55 | downloadsSubDir <- "downloads/" 56 | } 57 | 58 | url <- paste0(baseUrl, downloadsSubDir) 59 | 60 | if(verbose) { 61 | cat("URL: ", url, "\n") 62 | } 63 | 64 | # Parse webpage 65 | doc <- htmlParse(url) 66 | 67 | # Extract links 68 | links <- xpathSApply(doc, "//a/@href") 69 | 70 | # Process links; get only gzipped files 71 | idx <- grepl(".gz", links) 72 | tmp <- strsplit(links[idx], "/") 73 | tmp2 <- lapply(tmp, function(x) { x[length(x)] }) 74 | 75 | tmp3 <- unname(unlist(tmp2)) 76 | #tmp3 <- strsplit(tmp3, ";") 77 | tmp3 <- lapply(tmp3, function(x) { x[1] }) 78 | 79 | #filenames <- gsub(downloadsSubDir, "", tmp3) 80 | # Remove any existing starting slash 81 | #filenames <- gsub("/", "", filenames) 82 | 83 | #tmp4 <- lapply(strsplit(tmp3, "/"), function(url) { 84 | # url[length(url)] 85 | #}) 86 | 87 | filenames <- unlist(tmp3) 88 | 89 | if(!is.null(returnNames)) { 90 | idx <- grepl(returnNames, filenames, ignore.case=TRUE) 91 | return(filenames[idx]) 92 | } 93 | 94 | # Construct URLs 95 | tmp3 <- paste0(baseUrl, downloadsSubDir, filenames) 96 | 97 | # Show menu if user does not specify a file 98 | if(is.null(selectedFileName)) { 99 | selectedFileName <- select.list(filenames, graphics=FALSE) 100 | } 101 | 102 | # NOTE: File not found if not first URL encoded 103 | tmpUrl <- paste0(baseUrl, downloadsSubDir) 104 | 105 | if(verbose) { 106 | cat("baseUrl: ", tmpUrl, "\n") 107 | cat("fileName: ", selectedFileName, "\n") 108 | cat("destDir: ", destDir, "\n") 109 | } 110 | 111 | downloadResult <- paxtoolsr::downloadFile(baseUrl=tmpUrl, fileName=selectedFileName, destDir=destDir, verbose=verbose) 112 | 113 | if(!downloadResult) { 114 | stop("ERROR: File was not found.") 115 | } 116 | 117 | selectedFilePath <- file.path(destDir, selectedFileName) 118 | } 119 | 120 | if(verbose) { 121 | cat("selectedFilePath: ", selectedFilePath, "\n") 122 | } 123 | 124 | tmpFile <- R.utils::gunzip(selectedFilePath, remove=FALSE, temporary=TRUE, skip=TRUE) 125 | 126 | # READ FUNCTIONS ---- 127 | ## Parse GMT 128 | if(grepl("GSEA", selectedFileName) || grepl("gmt", selectedFileName)) { 129 | results <- readGmt(tmpFile, ...) 130 | return(results) 131 | } 132 | 133 | ## Parse EXTENDED_BINARY_SIF (TXT) 134 | if(grepl("TXT", selectedFileName) || grepl("txt", selectedFileName) || grepl("EXTENDED_BINARY_SIF", selectedFileName)) { 135 | results <- readSifnx(tmpFile, ...) 136 | return(results) 137 | } 138 | 139 | ## Parse BINARY_SIF (SIF) 140 | if(grepl("SIF", selectedFileName) || grepl("BINARY_SIF", selectedFileName) || grepl("sif", selectedFileName)) { 141 | results <- readSif(tmpFile, ...) 142 | return(results) 143 | } 144 | 145 | ## Parse BIOPAX 146 | if(grepl("BIOPAX", selectedFileName)) { 147 | results <- readBiopax(tmpFile, ...) 148 | return(results) 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /R/downloadSignedPC.R: -------------------------------------------------------------------------------- 1 | #' Download a SIF file containing only signed interactions 2 | #' 3 | #' @param destDir a string, the destination directory for the file to be 4 | #' downloaded (Default: NULL). If NULL, then file will be downloaded to cache 5 | #' directory at Sys.getenv("PAXTOOLSR_CACHE") 6 | #' @param forceCache a boolean to force the use of a cached version (DEFAULT: FALSE); 7 | #' the current host of the file (GitHub) does not support the LAST-MODIFIED header 8 | #' 9 | #' @return a SIF containing interactions that are considered signed (i.e. 10 | #' interactions causing an increase on decrease in a molecular species) 11 | #' 12 | #' @examples 13 | #' # downloadSignedPC() 14 | #' 15 | #' @export 16 | #' @importFrom utils read.table 17 | downloadSignedPC <- function(destDir=NULL, forceCache=FALSE) { 18 | baseUrl <- Sys.getenv("SIGNED_PC_URL") 19 | selectedFileName <- Sys.getenv("SIGNED_PC_FILE") 20 | 21 | if(is.null(destDir)) { 22 | stopifnot(Sys.getenv("PAXTOOLSR_CACHE") != "") 23 | selectedFilePath <- file.path(Sys.getenv("PAXTOOLSR_CACHE"), selectedFileName) 24 | } else { 25 | selectedFilePath <- file.path(destDir, selectedFileName) 26 | } 27 | 28 | if(!forceCache) { 29 | downloadResult <- paxtoolsr::downloadFile(baseUrl=baseUrl, fileName=selectedFileName) 30 | 31 | if(!downloadResult) { 32 | stop("ERROR: File was not found.") 33 | } 34 | } 35 | 36 | tmpFile <- gunzip(selectedFilePath, remove=FALSE, temporary=TRUE, skip=TRUE) 37 | 38 | results <- read.table(tmpFile, 39 | header=FALSE, 40 | sep="\t", 41 | fill=TRUE, 42 | col.names=c("PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B", "IDS", "SITES"), 43 | stringsAsFactors=FALSE, 44 | colClasses=rep("character", 5)) 45 | 46 | return(results) 47 | } 48 | -------------------------------------------------------------------------------- /R/fetch.R: -------------------------------------------------------------------------------- 1 | #' Fetch a set of IDs from a BioPAX OWL file 2 | #' 3 | #' This function will create a subsetted object with specified URIs. 4 | #' 5 | #' @param inputFile a string of the name of the input BioPAX OWL file 6 | #' @param outputFile a string with the name of the output BioPAX OWL file 7 | #' @param idList a vector of IDs from the BioPAX OWL file 8 | #' @return an XMLInternalDocument representing a BioPAX OWL file 9 | #' 10 | #' @details Only entities in the input BioPAX file will be used in the fetch. 11 | #' IDs used must be URIs for the entities of interest. Additional properties 12 | #' such as cross-references for fetched entities will be included in the output. 13 | #' 14 | #' @examples 15 | #' outFile <- tempfile() 16 | #' ids <- c("http://identifiers.org/uniprot/P36894", 17 | #' "http://identifiers.org/uniprot/Q13873") 18 | #' results <- fetch(system.file("extdata", "REACT_12034-3.owl", package="paxtoolsr"), 19 | #' outFile, ids) 20 | #' 21 | #' @concept paxtoolsr 22 | #' @export 23 | fetch <- function(inputFile, outputFile=NULL, idList) { 24 | inputFile <- checkInputFile(inputFile) 25 | outputFile <- checkOutputFile(outputFile) 26 | 27 | idList <- paste(idList, collapse=",") 28 | 29 | command <- "fetch" 30 | commandJStr <- .jnew("java/lang/String", command) 31 | inputJStr <- .jnew("java/lang/String", inputFile) 32 | 33 | idListJStr <- .jnew("java/lang/String", idList) 34 | outputJStr <- .jnew("java/lang/String", outputFile) 35 | 36 | argsList <- list(commandJStr, inputJStr, idListJStr, outputJStr) 37 | 38 | .jcall("org/biopax/paxtools/PaxtoolsMain","V",command,.jarray(argsList, "java/lang/String")) 39 | .jcheck() 40 | 41 | results <- xmlTreeParse(outputFile, useInternalNodes=TRUE) 42 | return(results) 43 | } 44 | -------------------------------------------------------------------------------- /R/filterSif.R: -------------------------------------------------------------------------------- 1 | #' Keep interactions in SIF network based on certain criteria 2 | #' 3 | #' @param sif a binary SIF as a data.frame with three columns: 4 | #' "PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B" 5 | #' @param ids a vector of IDs to be kept 6 | #' @param interactionTypes a vector of interaction types to be kept 7 | #' (List of interaction types: http://www.pathwaycommons.org/pc2/formats) 8 | #' @param dataSources a vector of data sources to be kept. For Extended SIF. 9 | #' @param interactionPubmedIds a vector of Pubmed IDs to be kept. For Extended SIF. 10 | #' @param pathwayNames a vector of pathway names to be kept. For Extended SIF. 11 | #' @param mediatorIds a vector of mediator IDs to be kept. For Extended SIF. 12 | #' Mediator IDs are the full BioPAX objects that were simplified to interaction 13 | #' given in the SIF. For Extended SIF. 14 | #' @param edgelist a two-column data.frame where each row is an interaction to be kept. 15 | #' Directionality is ignored (e.g. Edge A B will return interactions A B and B A from SIF) 16 | #' @param idsBothParticipants a boolean whether both interaction participants should be in 17 | #' a given interaction when using the ids parameter; TRUE if both (DEFAULT: TRUE) 18 | #' @param edgelistCheckReverse a boolean whether to check for edges in the reverse order (DEFAULT: TRUE) 19 | #' @param verbose Show debugging information (DEFAULT: FALSE) 20 | #' 21 | #' @return filtered interactions with three columns: "PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B". 22 | #' The intersection of multiple filters is returned. 23 | #' 24 | #' @examples 25 | #' results <- readSif(system.file("extdata", "test_sif.txt", package="paxtoolsr")) 26 | #' intTypes <- c("controls-state-change-of", "controls-expression-of", "catalysis-precedes") 27 | #' filteredNetwork <- filterSif(results, intTypes) 28 | #' 29 | #' tmp <- readSifnx(system.file("extdata", "test_sifnx_250.txt", package = "paxtoolsr")) 30 | #' results <- filterSif(tmp$edges, ids=c("CHEBI:17640", "MCM3")) 31 | #' results <- filterSif(tmp$edges, dataSources=c("INOH", "KEGG")) 32 | #' results <- filterSif(tmp$edges, dataSources=c("IntAct"), ids=c("CHEBI:17640", "MCM3")) 33 | #' results <- filterSif(tmp$edges, pathwayNames=c("Metabolic pathways")) 34 | #' results <- filterSif(tmp$edges, 35 | #' mediatorIds=c("http://purl.org/pc2/8/MolecularInteraction_1452626895158")) 36 | #' results <- filterSif(tmp$edges, interactionPubmedId="17654400") 37 | #' 38 | #' tmp <- readSifnx(system.file("extdata", "test_sifnx_250.txt", package = "paxtoolsr")) 39 | #' edgelist <- read.table(system.file("extdata", "test_edgelist.txt", package = "paxtoolsr"), 40 | #' sep="\t", header=FALSE, stringsAsFactors=FALSE) 41 | #' results <- filterSif(tmp$edges, edgelist=edgelist) 42 | #' 43 | #' @concept paxtoolsr 44 | #' @export 45 | filterSif <- function(sif, ids=NULL, interactionTypes=NULL, dataSources=NULL, interactionPubmedIds=NULL, pathwayNames=NULL, mediatorIds=NULL, edgelist=NULL, idsBothParticipants=FALSE, edgelistCheckReverse=TRUE, verbose=FALSE) { 46 | idxList <- NULL 47 | 48 | if(!is.null(ids)) { 49 | aIdx <- which(sif$PARTICIPANT_A %in% ids) 50 | bIdx <- which(sif$PARTICIPANT_B %in% ids) 51 | 52 | if(idsBothParticipants) { 53 | idxIds <- intersect(aIdx, bIdx) 54 | } else { 55 | idxIds <- unique(c(aIdx, bIdx)) 56 | } 57 | 58 | #cat("II: ", paste(idxIds, collapse=","), "\n") 59 | idxList[["idxIds"]] <- idxIds 60 | } 61 | 62 | if(!is.null(interactionTypes)) { 63 | idxInteractionTypes <- which(sif$INTERACTION_TYPE %in% interactionTypes) 64 | 65 | #cat("IIT: ", paste(idxInteractionTypes, collapse=","), "\n") 66 | idxList[["idxInteractionTypes"]] <- idxInteractionTypes 67 | } 68 | 69 | if(!is.null(dataSources)) { 70 | results <- searchListOfVectors(dataSources, sif$INTERACTION_DATA_SOURCE) 71 | 72 | idxDataSources <- unique(unlist(results)) 73 | 74 | #cat("IDS: ", paste(idxDataSources, collapse=","), "\n") 75 | idxList[["idxDataSources"]] <- idxDataSources 76 | } 77 | 78 | if(!is.null(interactionPubmedIds)) { 79 | idxInteractionPubmedId <- which(sif$INTERACTION_PUBMED_ID %in% interactionPubmedIds) 80 | 81 | #cat("IIT: ", paste(idxInteractionPubmedId, collapse=","), "\n") 82 | idxList[["idxInteractionPubmedId"]] <- idxInteractionPubmedId 83 | } 84 | 85 | if(!is.null(pathwayNames)) { 86 | idxPathwayNames <- which(sif$PATHWAY_NAMES %in% pathwayNames) 87 | 88 | #cat("IIT: ", paste(idxPathwayNames, collapse=","), "\n") 89 | idxList[["idxPathwayNames"]] <- idxPathwayNames 90 | } 91 | 92 | if(!is.null(mediatorIds)) { 93 | results <- searchListOfVectors(mediatorIds, sif$MEDIATOR_IDS) 94 | 95 | idxMediatorIds <- unique(unlist(results)) 96 | 97 | #cat("IIT: ", paste(idxMediatorIds, collapse=","), "\n") 98 | idxList[["idxMediatorIds"]] <- idxMediatorIds 99 | } 100 | 101 | if(!is.null(edgelist)) { 102 | aIdx <- which(sif$PARTICIPANT_A %in% edgelist[,1]) 103 | bIdx <- which(sif$PARTICIPANT_B %in% edgelist[,2]) 104 | idxEdgelist1 <- intersect(aIdx, bIdx) 105 | 106 | # Same in reverse 107 | idxEdgelist2 <- NULL 108 | 109 | if(edgelistCheckReverse) { 110 | aIdx <- which(sif$PARTICIPANT_A %in% edgelist[,2]) 111 | bIdx <- which(sif$PARTICIPANT_B %in% edgelist[,1]) 112 | idxEdgelist2 <- intersect(aIdx, bIdx) 113 | } 114 | 115 | idxEdgelist <- c(idxEdgelist1, idxEdgelist2) 116 | 117 | #cat("II: ", paste(idxIds, collapse=","), "\n") 118 | idxList[["idxEdgelist"]] <- idxEdgelist 119 | } 120 | 121 | idx <- Reduce(intersect, idxList) 122 | 123 | filteredNetwork <- sif[idx, ] 124 | 125 | return(filteredNetwork) 126 | } 127 | -------------------------------------------------------------------------------- /R/fromPsimi.R: -------------------------------------------------------------------------------- 1 | # #' Read PSIMI file 2 | # #' 3 | # #' This function reads in a PSIMI file. 4 | # #' 5 | # #' @param inputFile a string of the name of the input PSIMI file 6 | # #' @param outputFile a string of the name of the output BioPAX OWL file 7 | # #' @param bpLevelArg a string representing the BioPAX level for the output file 8 | # #' (default: NULL) 9 | # #' @return an XMLInternalDocument representing a BioPAX OWL file 10 | # #' 11 | # #' @details The Proteomics Standard Initiative (PSIMI) format is described at 12 | # #' https://code.google.com/p/psimi/wiki/PsimiTabFormat 13 | # #' 14 | # #' This function has been removed from Paxtools as of 4.3.1 15 | # #' 16 | # #' @examples 17 | # #' outFile <- tempfile() 18 | # #' results <- fromPsimi(system.file("extdata", "10523676-compact.xml", package="paxtoolsr"), 19 | # #' outFile, 20 | # #' "3") 21 | # #' 22 | # #' @concept paxtoolsr 23 | # fromPsimi <- function(inputFile, outputFile=NULL, bpLevelArg=3) { 24 | # command <- "fromPsimi" 25 | # 26 | # outputFile <- checkOutputFile(outputFile) 27 | # bpLevelArg <- as.character(bpLevelArg) 28 | # 29 | # commandJStr <- .jnew("java/lang/String", command) 30 | # inputJStr <- .jnew("java/lang/String", inputFile) 31 | # outputJStr <- .jnew("java/lang/String", outputFile) 32 | # bpLevelArgJStr <- .jnew("java/lang/String", bpLevelArg) 33 | # 34 | # argsList <- list(commandJStr, bpLevelArgJStr, inputJStr, outputJStr) 35 | # 36 | # .jcall("org/biopax/paxtools/PaxtoolsMain","V",command,.jarray(argsList, "java/lang/String")) 37 | # .jcheck() 38 | # 39 | # results <- xmlTreeParse(outputFile, useInternalNodes=TRUE) 40 | # return(results) 41 | # 42 | # } 43 | -------------------------------------------------------------------------------- /R/getCacheFiles.R: -------------------------------------------------------------------------------- 1 | #' List files in cache directory 2 | #' 3 | #' @return a vector of the files in the cache directory 4 | #' 5 | #' @examples 6 | #' getCacheFiles() 7 | #' 8 | #' @concept paxtoolsr 9 | #' @export 10 | getCacheFiles <- function() { 11 | return(dir(Sys.getenv("PAXTOOLSR_CACHE"))) 12 | } 13 | -------------------------------------------------------------------------------- /R/getErrorMessage.R: -------------------------------------------------------------------------------- 1 | #' Get Error Message for a Pathway Commons Error 2 | #' 3 | #' @param code a three digit numerical error code 4 | #' @return an error message for the code 5 | #' 6 | #' @examples 7 | #' results <- getErrorMessage("452") 8 | #' 9 | #' @concept paxtoolsr 10 | #' @export 11 | getErrorMessage <- function(code) { 12 | codes <- c("452", "460", "500", "503") 13 | messages <- c("Bad Request (illegal or no arguments)", 14 | "No Results Found", 15 | "Internal Server Error", 16 | "Server is temporarily unavailable due to regular maintenance") 17 | 18 | errors <- data.frame(codes=codes, messages=messages, stringsAsFactors=FALSE) 19 | 20 | message <- errors$messages[errors$codes==code] 21 | 22 | if(length(message)==1) { 23 | return(message) 24 | } else { 25 | return("Unknown Error") 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /R/getNeighbors.R: -------------------------------------------------------------------------------- 1 | #' Get the neighbors of a set of IDs in a BioPAX file 2 | #' 3 | #' This function retrieves a set of neighbors for a set of IDs in a BioPAX file. 4 | #' 5 | #' @param inputFile a string with the name of the input BioPAX OWL file 6 | #' @param outputFile a string with the name of the output BioPAX OWL file 7 | #' @param idList a vector of IDs from the BioPAX OWL file 8 | #' @return an XMLInternalDocument representing a BioPAX OWL file 9 | #' 10 | #' @details Only entities in the input BioPAX file will be searched for neighbors. 11 | #' IDs used must be URIs for the entities of interest. 12 | #' 13 | #' @examples 14 | #' outFile <- tempfile() 15 | #' results <- getNeighbors(system.file("extdata", 16 | #' "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr"), 17 | #' outFile, 18 | #' c("HTTP://WWW.REACTOME.ORG/BIOPAX/48887#PROTEIN2360_1_9606", 19 | #' "HTTP://WWW.REACTOME.ORG/BIOPAX/48887#PROTEIN1631_1_9606")) 20 | #' 21 | #' @concept paxtoolsr 22 | #' @export 23 | getNeighbors <- function(inputFile, outputFile=NULL, idList) { 24 | inputFile <- checkInputFile(inputFile) 25 | outputFile <- checkOutputFile(outputFile) 26 | 27 | idList <- paste(idList, collapse=",") 28 | 29 | command <- "getNeighbors" 30 | commandJStr <- .jnew("java/lang/String", command) 31 | inputJStr <- .jnew("java/lang/String", inputFile) 32 | 33 | idListJStr <- .jnew("java/lang/String", idList) 34 | outputJStr <- .jnew("java/lang/String", outputFile) 35 | 36 | argsList <- list(commandJStr, inputJStr, idListJStr, outputJStr) 37 | #DEBUG 38 | #cat("ARGSLIST:", commandJStr, "\n") 39 | 40 | .jcall("org/biopax/paxtools/PaxtoolsMain","V",command,.jarray(argsList, "java/lang/String")) 41 | .jcheck() 42 | 43 | results <- xmlTreeParse(outputFile, useInternalNodes=TRUE) 44 | return(results) 45 | } 46 | -------------------------------------------------------------------------------- /R/getPc.R: -------------------------------------------------------------------------------- 1 | #' Get Pathway Commons BioPAX elements 2 | #' 3 | #' This command retrieves full pathway information for a set of elements such as 4 | #' pathway, interaction or physical entity given the RDF IDs. 5 | #' 6 | #' @param uri a vector that includes valid/existing BioPAX element's URI (RDF 7 | #' ID; for utility classes that were "normalized", such as entity refereneces 8 | #' and controlled vocabularies, it is usually a Idntifiers.org URL. Multiple 9 | #' IDs are allowed per query, for example, 10 | #' c("http://identifiers.org/uniprot/Q06609", 11 | #' "http://identifiers.org/uniprot/Q549Z0") See also about MIRIAM and 12 | #' Identifiers.org in details. 13 | #' @param format output format (Default: BIOPAX). Valid options can be found using 14 | #' \code{\link{pcFormats}} 15 | #' @param verbose a boolean, display the command used to query Pathway Commons 16 | #' @param ... additional arguments to read* methods that handle data from Pathway Commons 17 | #' 18 | #' @return a XMLInternalDocument object 19 | #' 20 | #' @details Get commands only retrieve the BioPAX elements that are directly 21 | #' mapped to the ID. Use the "traverse query to traverse BioPAX graph and 22 | #' obtain child/owner elements. 23 | #' 24 | #' Information on MIRIAM and Identifiers.org 25 | #' \url{http://www.pathwaycommons.org/pc2/#miriam} 26 | #' 27 | #' @seealso \code{\link{pcFormats}} 28 | #' 29 | #' @examples 30 | #' uri <- "http://identifiers.org/uniprot/O14503" 31 | #' #results <- getPc(uri) 32 | #' 33 | #' uri <- c("http://identifiers.org/uniprot/O14503", "http://identifiers.org/uniprot/Q9P2X7") 34 | #' #results <- getPc(uri, verbose=TRUE) 35 | #' 36 | #' @concept paxtoolsr 37 | #' @export 38 | getPc <- function(uri, format="BIOPAX", verbose=FALSE, ...) { 39 | baseUrl <- paste0(getPcUrl(), "get") 40 | 41 | uriList <- lapply(uri, function(x) { x }) 42 | names(uriList) <- rep("uri", length(uriList)) 43 | 44 | format <- toupper(format) 45 | stopifnot(format %in% names(pcFormats())) 46 | formatList <- NULL 47 | if(!is.null(format)) { 48 | formatList <- list(format=format) 49 | } 50 | 51 | queryList <- c(uriList, formatList) 52 | 53 | tmpUrl <- parse_url(baseUrl) 54 | tmpUrl$query <- queryList 55 | url <- build_url(tmpUrl) 56 | 57 | tmp <- getPcRequest(url, verbose) 58 | results <- processPcRequest(tmp, format, ...) 59 | return(results) 60 | } 61 | -------------------------------------------------------------------------------- /R/getPcDatabaseNames.R: -------------------------------------------------------------------------------- 1 | #' Get a Pathway Commons Databases 2 | #' 3 | #' @param version PC2 version 4 | #' 5 | #' @return a names of databases that can be used as part of queries 6 | #' 7 | #' @examples 8 | #' getPcDatabaseNames(version=10) 9 | #' 10 | #' @concept paxtoolsr 11 | #' @export 12 | getPcDatabaseNames <- function(version) { 13 | dbMapping <- c("psp"="phosphositeplus", "reconx"="recon x", "wp"="wikipathways") 14 | 15 | t1 <- downloadPc2(returnNames="BioPAX", version=version) 16 | t2 <- strsplit(t1, "\\.") 17 | t3 <- lapply(t2, function(x) { 18 | tmp <- setdiff(x, c("PathwayCommons", "PathwayCommons10", "PathwayCommons9", "BIOPAX", "gz", "owl", "8", "7", "6", "5", "4")) 19 | tmp 20 | }) 21 | t4 <- setdiff(unlist(t3), c("All", "Warehouse", "Detailed")) 22 | t5 <- t4 23 | for(i in seq_along(t5)) { 24 | key <- names(dbMapping)[i] 25 | if(t5[i] %in% names(dbMapping)) { 26 | t5[i] <- dbMapping[[t5[i]]] 27 | } 28 | } 29 | 30 | t6 <- sort(t5) 31 | 32 | return(t6) 33 | } 34 | -------------------------------------------------------------------------------- /R/getPcRequest.R: -------------------------------------------------------------------------------- 1 | #' Get a Pathway Commons Webservice Request 2 | #' 3 | #' @param url Pathway Commons webservice request URL 4 | #' @param verbose a boolean whether to display debugging information 5 | #' 6 | #' @return request results 7 | #' 8 | #' @concept paxtoolsr 9 | #' @keywords internal 10 | #' @noRd 11 | #' @importFrom httr HEAD GET content accept 12 | getPcRequest <- function(url, verbose) { 13 | url <- URLencode(url) 14 | 15 | if(verbose) { 16 | cat("URL: ", url, "\n") 17 | } 18 | 19 | # tmp <- tryCatch(getURLContent(url, .opts=list(followlocation=TRUE)), 20 | # error=function(e) { 21 | # #DEBUG 22 | # #cat("X", e$message, "\n") 23 | # 24 | # code <- substr(e$message, 1, 3) 25 | # 26 | # # Make sure the code is numeric 27 | # if(grepl("^\\d+$", code)) { 28 | # message <- getErrorMessage(code) 29 | # } else { 30 | # code <- NA 31 | # message <- e$message 32 | # } 33 | # 34 | # result <- paste("ERROR: Code:", code, "Message:", message) 35 | # result 36 | # }) 37 | # 38 | # statusCode <- url.exists(url, .opts=list(followlocation=TRUE), .header=TRUE)["status"] 39 | statusCode <- "" 40 | 41 | maxTries <- 2 42 | counter <- 0 43 | 44 | # Retry a couple of times after a few seconds 45 | while(statusCode != "200" && counter <= maxTries) { 46 | statusCode <- HEAD(url)$status 47 | counter <- counter + 1 48 | Sys.sleep(3) 49 | } 50 | 51 | # Check HTTP status code; 200 is success 52 | if(statusCode == "200") { 53 | #tmp <- getURLContent(url, .opts=list(followlocation=TRUE)) 54 | 55 | #Set preference order of accept types 56 | tmp <- content(GET(url, 57 | accept("text/xml,text/plain,application/json"), 58 | add_headers("Cache-Control"="nocache")), 59 | as="text") 60 | } else { 61 | # Make sure the statusCode is numeric 62 | if(grepl("^\\d+$", statusCode)) { 63 | message <- getErrorMessage(statusCode) 64 | } else { 65 | statusCode <- NA 66 | message <- NA 67 | } 68 | 69 | tmp <- paste("ERROR: Code:", statusCode, "Message:", message) 70 | } 71 | 72 | if(grepl("^ERROR", tmp)) { 73 | stop(paste(tmp, "(PC Webservice Error)")) 74 | } 75 | 76 | return(tmp) 77 | } 78 | -------------------------------------------------------------------------------- /R/getPcUrl.R: -------------------------------------------------------------------------------- 1 | #' Get base Pathway Commons URL 2 | #' 3 | #' @return a string with base Pathway Commons URL 4 | #' 5 | #' @details paxtoolsr will support versions Pathway Commons 5 and later. Old 6 | #' versions of the webservice will not be not be operational. Users can parse 7 | #' older BioPAX outputs as an alternative. 8 | #' 9 | #' @examples 10 | #' url <- getPcUrl() 11 | #' 12 | #' @concept paxtoolsr 13 | #' @export 14 | #' 15 | #' @importFrom httr url_success 16 | getPcUrl <- function() { 17 | #url <- NULL 18 | 19 | baseUrl <- Sys.getenv("PC_URL") 20 | 21 | # TODO: Need to get old files from archives 22 | #baseUrl <- "http://purl.org/pc2/" 23 | #curUrl <- paste0(baseUrl, getOption("pc.version"), "/") 24 | #tmpVersion <- as.numeric(getOption("pc.version")) + 1 25 | #nextUrl <- paste0(baseUrl, tmpVersion, "/") 26 | # 27 | # if(url_success(curUrl)) { 28 | # url <- curUrl 29 | # } 30 | # 31 | # if(url_success(nextUrl)) { 32 | # url <- nextUrl 33 | # } 34 | 35 | url <- baseUrl 36 | 37 | if(is.null(url)) { 38 | #stop(paste("ERROR: Pathway Commons webservice cannot be reached. URLs tried:", curUrl, nextUrl)) 39 | stop(paste("ERROR: Pathway Commons webservice cannot be reached. URLs tried:", url)) 40 | } 41 | 42 | return(url) 43 | } 44 | -------------------------------------------------------------------------------- /R/getShortestPathSif.R: -------------------------------------------------------------------------------- 1 | #' Get the shortest between two IDs (HGNC or CHEBI) 2 | #' 3 | #' @param sif a SIF network 4 | #' @param idA HGNC or CHEBI (CHEBI:XXXXX) ID 5 | #' @param idB HGNC or CHEBI (CHEBI:XXXXX) ID 6 | #' @param mode see shortest_paths() in igraph 7 | #' @param weights see shortest_paths() in igraph 8 | #' @param verbose a boolean whether to show debugging information 9 | #' @param filterFun a function to filter multiple paths of the same length 10 | #' @param ... additional arguments passed on to filterFun 11 | #' 12 | #' @return a data.frame representing a SIF network 13 | #' 14 | #' @examples 15 | #' idA <- "DAP3" 16 | #' idB <- "RPS16" 17 | #' sif <- readSif(system.file("extdata", "test_sif_shortestPath.txt", package="paxtoolsr")) 18 | #' filterFun <- function(vpaths) { idx <- sample(1:length(vpaths), 1); return(vpaths[[idx]]) } 19 | #' m1 <- getShortestPathSif(sif, idA, idB, mode="all", weights=NULL, filterFun=filterFun, verbose=TRUE) 20 | #' 21 | #' @concept paxtoolsr 22 | #' @export 23 | #' 24 | #' @importFrom igraph V V<- all_shortest_paths are.connected ends list.edge.attributes 25 | getShortestPathSif <- function(sif, idA, idB, mode=c("all", "out", "in"), weights=NULL, verbose=FALSE, filterFun, ...) { 26 | #idA <- "CCND1" 27 | #idB <- "MAZ" 28 | 29 | g <- loadSifInIgraph(sif) 30 | 31 | aIdx <- match(idA, V(g)$name) 32 | bIdx <- match(idB, V(g)$name) 33 | 34 | results <- data.frame(PARTICIPANT_A=character(), 35 | INTERACTION_TYPE=character(), 36 | PARTICIPANT_B=character(), 37 | INTERACTION_DATA_SOURCE=character(), 38 | INTERACTION_PUBMED_ID=character(), 39 | PATHWAY_NAMES=character(), 40 | stringsAsFactors=FALSE) 41 | 42 | if(!is.na(aIdx) && !is.na(bIdx)) { 43 | # s1 <- shortest_paths(g, aIdx, bIdx, output="epath", mode=mode, weights=weights) 44 | # cat("PATH: ", length(s1), "\n") 45 | s2 <- all_shortest_paths(g, aIdx, bIdx, mode=mode, weights=weights) 46 | 47 | if(verbose) { 48 | cat("ALL PATHS: ", length(s2$res), "\n") 49 | } 50 | #s1$epath[[1]] 51 | } else { 52 | if(verbose) { 53 | warning("ERROR: Node not found: aIdx: ", aIdx, " bIdx: ", bIdx, "\n") 54 | } 55 | return(results) 56 | } 57 | 58 | if(length(s2$res) == 0) { 59 | if(verbose) { 60 | warning("ERROR: No path found for aIdx: ", aIdx, " bIdx: ", bIdx, "\n") 61 | } 62 | return(results) 63 | } 64 | 65 | s1 <- filterFun(s2$res, ...) 66 | 67 | if(is.null(s1)) { 68 | if(verbose) { 69 | warning("ERROR: No paths from filterFun returned\n") 70 | } 71 | return(results) 72 | } 73 | 74 | v1 <- s1$name 75 | e1 <- NULL 76 | 77 | # Get edge path given nodes 78 | for(i in 1:(length(v1)-1)) { 79 | if(are.connected(g, v1[i], v1[i+1])) { 80 | r1 <- E(g, P=c(v1[i], v1[i+1])) 81 | } else { 82 | r1 <- E(g, P=c(v1[i+1], v1[i])) 83 | } 84 | 85 | e1 <- c(e1, r1) 86 | } 87 | 88 | #E(g)[e1] 89 | #E(g)[e1]$interactionType 90 | 91 | epath <- E(g)[e1] 92 | 93 | for(i in seq_along(epath)) { 94 | idx <- epath[i] 95 | tmpV <- ends(g, idx) 96 | tmpInteractionType <- E(g)[idx]$interactionType 97 | 98 | if("interactionPubmedId" %in% list.edge.attributes(g)) { 99 | tmpInteractionPubmedId <- lapply(E(g)[idx]$interactionPubmedId, paste, collapse=";")[[1]] 100 | } else { 101 | tmpInteractionPubmedId <- "" 102 | } 103 | 104 | if("pathwayNames" %in% list.edge.attributes(g)) { 105 | tmpPathwayNames <- lapply(E(g)[idx]$pathwayNames, paste, collapse=";")[[1]] 106 | } else { 107 | tmpPathwayNames <- "" 108 | } 109 | 110 | if("interactionDataSource" %in% list.edge.attributes(g)) { 111 | tmpInteractionDataSource <- lapply(E(g)[idx]$interactionDataSource, paste, collapse=";")[[1]] 112 | } else { 113 | tmpInteractionDataSource <- "" 114 | } 115 | 116 | if("mediatorIds" %in% list.edge.attributes(g)) { 117 | tmpMediatorIds <- lapply(E(g)[idx]$mediatorIds, paste, collapse=";")[[1]] 118 | } else { 119 | tmpMediatorIds <- "" 120 | } 121 | 122 | tmpResults <- data.frame(PARTICIPANT_A=tmpV[1, 1], 123 | INTERACTION_TYPE=tmpInteractionType, 124 | PARTICIPANT_B=tmpV[1, 2], 125 | INTERACTION_DATA_SOURCE=tmpInteractionDataSource, 126 | INTERACTION_PUBMED_ID=tmpInteractionPubmedId, 127 | PATHWAY_NAMES=tmpPathwayNames, 128 | MEDIATOR_IDS=tmpMediatorIds, 129 | stringsAsFactors=FALSE) 130 | 131 | results <- rbind(results, tmpResults) 132 | } 133 | 134 | return(results) 135 | } 136 | -------------------------------------------------------------------------------- /R/getSifInteractionCategories.R: -------------------------------------------------------------------------------- 1 | #' Get a list of categories of SIF interactions 2 | #' 3 | #' @return a list of interactions in categories 4 | #' 5 | #' @details 6 | #' Description of interaction types: http://www.pathwaycommons.org/pc2/formats 7 | #' Categories provided: 8 | #' BetweenProteins, 9 | #' BetweenProteinsOther (often from high-throughput experiments), 10 | #' BetweenProteinSmallMolecule, 11 | #' BetweenSmallMolecules, 12 | #' SignedInteractions 13 | #' 14 | #' @examples 15 | #' sifCat <- getSifInteractionCategories() 16 | #' sifCat[["BetweenProteins"]] 17 | #' 18 | #' @concept paxtoolsr 19 | #' @export 20 | getSifInteractionCategories <- function() { 21 | protInt <- c("controls-state-change-of", "controls-expression-of", 22 | "controls-degradation-of", "controls-transport-of", 23 | "catalysis-precedes", "in-complex-with") 24 | 25 | protOtherInt <- c("interacts-with", "neighbor-of") 26 | 27 | protSmMolInt <- c("consumption-controlled-by", "controls-production-of", 28 | "controls-transport-of-chemical", "chemical-affects") 29 | 30 | smMolInt <- c("reacts-with", "used-to-produce") 31 | 32 | signedInt <- c("dephosphorylates", "phosphorylates", "downregulates-expression", "upregulates-expression") 33 | 34 | return(list("BetweenProteins"=protInt, 35 | "BetweenProteinsOther"=protOtherInt, 36 | "BetweenProteinSmallMolecule"=protSmMolInt, 37 | "BetweenSmallMolecules"=smMolInt, 38 | "SignedInteractions"=signedInt)) 39 | } 40 | -------------------------------------------------------------------------------- /R/graphPc.R: -------------------------------------------------------------------------------- 1 | #' Get Pathway Commons BioPAX elements 2 | #' 3 | #' This function will retrieve a set of BioPAX elements given a graph query match. 4 | #' 5 | #' @param kind graph query. Valid options can be found using \code{\link{pcGraphQueries}} 6 | #' See Details for information on graph queries. 7 | #' @param source source object's URI/ID. Multiple source URIs/IDs are allowed 8 | #' per query, for example c("http://identifiers.org/uniprot/Q06609", 9 | #' "http://identifiers.org/uniprot/Q549Z0") 10 | #' See a note about MIRIAM and Identifiers.org in details 11 | #' @param target [Required for PATHSFROMTO graph query] target URI/ID. Multiple 12 | #' target URIs are allowed per query; for example c("http://identifiers.org/uniprot/Q06609", 13 | #' "http://identifiers.org/uniprot/Q549Z0") 14 | #' See a note about MIRIAM and Identifiers.org in details 15 | #' @param direction [Optional, for NEIGHBORHOOD and COMMONSTREAM algorithms] - 16 | #' graph search direction. Valid options: \code{\link{pcDirections}}. 17 | #' @param limit graph query search distance limit (default: 1). 18 | #' @param format output format. Valid options: \code{\link{pcFormats}} 19 | #' @param datasource datasource filter (same as for 'search'). 20 | #' @param organism organism filter (same as for 'search'). 21 | #' @param verbose a boolean, display the command used to query Pathway Commons 22 | #' @return depending on the the output format a different object may be returned. 23 | #' \code{\link{pcFormats}} 24 | #' 25 | #' @seealso \code{\link{pcFormats}, \link{pcDirections}} 26 | #' 27 | #' @examples 28 | #' source <- "http://identifiers.org/uniprot/O14503" 29 | #' #results <- graphPc(source=source, kind="neighborhood", format="TXT") 30 | #' 31 | #' @concept paxtoolsr 32 | #' @export 33 | graphPc <- function(kind, source, target=NULL, direction=NULL, limit=NULL, 34 | format=NULL, datasource=NULL, organism=NULL, 35 | verbose=FALSE) { 36 | 37 | baseUrl <- paste0(getPcUrl(), "graph") 38 | kindList <- list(kind=kind) 39 | 40 | sourceList <- NULL 41 | if(!is.null(source)) { 42 | # Put into the correct format 43 | #sources <- paste(paste0("source=", source), collapse="&") 44 | #url <- paste(url, "&", sources, sep="") 45 | 46 | sourceList <- lapply(source, function(x) { x }) 47 | names(sourceList) <- rep("source", length(sourceList)) 48 | } 49 | 50 | #DEBUG 51 | #cat("TARGET: ", target, "\n") 52 | #cat("KIND: ", kind, "\n") 53 | 54 | targetList <- NULL 55 | if(kind == "PATHSFROMTO") { 56 | if(!is.null(target)) { 57 | #targets <- paste(paste0("target=", target), collapse="&") 58 | #url <- paste(url, "&", targets, sep="") 59 | 60 | targetList <- lapply(target, function(x) { x }) 61 | names(targetList) <- rep("source", length(targetList)) 62 | } else { 63 | stop("target must be set if kind is PATHSFROMTO") 64 | } 65 | } 66 | 67 | directionList <- NULL 68 | if(!is.null(direction)) { 69 | direction <- toupper(direction) 70 | stopifnot(direction %in% pcDirections()) 71 | 72 | #url <- paste(url, "&direction=", direction, sep="") 73 | directionList <- list(direction=direction) 74 | } 75 | 76 | limitList <- NULL 77 | if(!is.null(limit)) { 78 | #url <- paste(url, "&limit=", limit, sep="") 79 | limitList <- list(limit=limit) 80 | } 81 | 82 | # Pre-process arguments 83 | ## Convert to uppercase to avoid issues in if statements 84 | format <- toupper(format) 85 | stopifnot(format %in% names(pcFormats())) 86 | formatList <- NULL 87 | if(!is.null(format)) { 88 | #url <- paste(url, "&format=", format, sep="") 89 | formatList <- list(format=format) 90 | } 91 | 92 | datasourceList <- NULL 93 | if(!is.null(datasource)) { 94 | # Put into the correct format 95 | #datasources <- paste(paste0("datasource=", datasource), collapse="&") 96 | #url <- paste(url, "&", datasources, sep="") 97 | 98 | datasourceList <- lapply(datasource, function(x) { x }) 99 | names(datasourceList) <- rep("datasource", length(datasourceList)) 100 | } 101 | 102 | organismList <- NULL 103 | if(!is.null(organism)) { 104 | # url <- paste(url, "&organism=", organism, sep="") 105 | organismList <- list(organism=organism) 106 | } 107 | 108 | queryList <- c(kindList, sourceList, targetList, directionList, limitList, 109 | formatList, datasourceList, organismList) 110 | 111 | tmpUrl <- parse_url(baseUrl) 112 | tmpUrl$query <- queryList 113 | url <- build_url(tmpUrl) 114 | 115 | tmp <- getPcRequest(url, verbose) 116 | if(tmp == "") { 117 | stop("ERROR: Result was empty") 118 | } 119 | 120 | # Ensure UTF-8 encoding 121 | tmp <- iconv(tmp, to="UTF-8", sub="?") 122 | 123 | results <- processPcRequest(tmp, format) 124 | return(results) 125 | } 126 | -------------------------------------------------------------------------------- /R/integrateBiopax.R: -------------------------------------------------------------------------------- 1 | #' Integrate two BioPAX OWL files (DEPRECATED) 2 | #' 3 | #' This function merges two BioPAX OWL files 4 | #' 5 | #' @param inputFile1 a string of the name of the input BioPAX OWL file 6 | #' @param inputFile2 a string of the name of the input BioPAX OWL file 7 | #' @param outputFile a string of the name of the output integrated BioPAX 8 | #' OWL file 9 | #' @return an XMLInternalDocument representing a BioPAX OWL file 10 | #' 11 | #' @details This method is deprecated. Use mergeBiopax instead. 12 | #' 13 | #' @examples 14 | #' outFile <- tempfile() 15 | #' results <- integrateBiopax(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", 16 | #' package="paxtoolsr"), 17 | #' system.file("extdata", "dna_replication.owl", package="paxtoolsr"), 18 | #' outFile) 19 | #' 20 | #' @concept paxtoolsr 21 | #' @export 22 | #' @seealso \code{\link{mergeBiopax}} 23 | integrateBiopax <- function(inputFile1, inputFile2, outputFile=NULL) { 24 | inputFile1 <- checkInputFile(inputFile1) 25 | inputFile2 <- checkInputFile(inputFile2) 26 | outputFile <- checkOutputFile(outputFile) 27 | 28 | command <- "integrate" 29 | commandJStr <- .jnew("java/lang/String", command) 30 | file1JStr <- .jnew("java/lang/String", inputFile1) 31 | file2JStr <- .jnew("java/lang/String", inputFile2) 32 | 33 | outputJStr <- .jnew("java/lang/String", outputFile) 34 | 35 | argsList <- list(commandJStr, file1JStr, file2JStr, outputJStr) 36 | 37 | .jcall("org/biopax/paxtools/PaxtoolsMain","V",command,.jarray(argsList, "java/lang/String")) 38 | .jcheck() 39 | 40 | results <- xmlTreeParse(outputFile, useInternalNodes=TRUE) 41 | return(results) 42 | } 43 | -------------------------------------------------------------------------------- /R/loadSifInIgraph.R: -------------------------------------------------------------------------------- 1 | #' Load SIF as igraph Network 2 | #' 3 | #' @param sif a binary SIF as a data.frame with three columns: 4 | #' "PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B" 5 | #' @param directed a boolean weather the returned graph should be directed (DEFAULT: TRUE) 6 | #' 7 | #' @return a directed igraph network with interaction types 8 | #' 9 | #' @examples 10 | #' results <- readSif(system.file("extdata", "test_sif.txt", package="paxtoolsr")) 11 | #' g <- loadSifInIgraph(results) 12 | #' 13 | #' @details Users are likely to run into issues if the input SIF has factor levels 14 | #' 15 | #' @concept paxtoolsr 16 | #' @export 17 | #' 18 | #' @importFrom igraph graph.edgelist E E<- set_edge_attr 19 | loadSifInIgraph <- function(sif, directed=TRUE) { 20 | # Handle SIF undirected reactions 21 | tmpSifUndirected <- sif[which(sif$INTERACTION_TYPE %in% "in-complex-with"),] 22 | a <- tmpSifUndirected$PARTICIPANT_A 23 | b <- tmpSifUndirected$PARTICIPANT_B 24 | tmpSifUndirected$PARTICIPANT_A <- b 25 | tmpSifUndirected$PARTICIPANT_B <- a 26 | sif <- rbind(sif, tmpSifUndirected) 27 | 28 | # Convert to igraph 29 | tmpSif <- sif[, c("PARTICIPANT_A", "PARTICIPANT_B")] 30 | g <- graph.edgelist(as.matrix(tmpSif), directed=directed) 31 | g <- set_edge_attr(g, "interactionType", index=E(g), sif[, "INTERACTION_TYPE"]) 32 | 33 | interactionDataSourceIdx <- which("INTERACTION_DATA_SOURCE" == colnames(sif)) 34 | 35 | if(length(interactionDataSourceIdx) == 1) { 36 | g <- set_edge_attr(g, "interactionDataSource", index=E(g), sif[, interactionDataSourceIdx]) 37 | } 38 | 39 | interactionPubmedIdIdx <- which("INTERACTION_PUBMED_ID" == colnames(sif)) 40 | 41 | if(length(interactionPubmedIdIdx) == 1) { 42 | g <- set_edge_attr(g, "interactionPubmedId", index=E(g), sif[, interactionPubmedIdIdx]) 43 | } 44 | 45 | pathwayIdx <- which("PATHWAY_NAMES" == colnames(sif)) 46 | 47 | if(length(pathwayIdx) == 1) { 48 | g <- set_edge_attr(g, "pathwayNames", index=E(g), sif[, pathwayIdx]) 49 | } 50 | 51 | mediatorIdsIdx <- which("MEDIATOR_IDS" == colnames(sif)) 52 | 53 | if(length(mediatorIdsIdx) == 1) { 54 | g <- set_edge_attr(g, "mediatorIds", index=E(g), sif[, mediatorIdsIdx]) 55 | } 56 | 57 | return(g) 58 | } 59 | -------------------------------------------------------------------------------- /R/mapValues.R: -------------------------------------------------------------------------------- 1 | #' Map values from One Vector to Another 2 | #' 3 | #' @param data a vector of strings where values will be replaced 4 | #' @param oldValue a vector that matches values in the data vector 5 | #' @param newValue a vector of new values that will replace the old values 6 | #' 7 | #' @return return the vector with the mapped values. If there was no 8 | #' corresponding entry then replace it with an NA. 9 | #' 10 | #' @examples 11 | #' data <- c("A", "B", "C", "X", "Y", "Z") 12 | #' oldValue <- LETTERS[1:20] 13 | #' newValue <- letters[1:20] 14 | #' results <- mapValues(data, oldValue, newValue) 15 | #' 16 | #' @concept paxtoolsr 17 | #' @export 18 | mapValues <- function(data, oldValue, newValue) { 19 | # convert any factors to characters 20 | #if (is.factor(data)) data <- as.character(data) 21 | #if (is.factor(oldvalue)) oldvalue <- as.character(oldvalue) 22 | #if (is.factor(newvalue)) newvalue <- as.character(newvalue) 23 | 24 | # Create the return vector 25 | newVec <- data 26 | 27 | # Put replaced values into the correct position in the return vector 28 | for (i in unique(oldValue)) newVec[data == i] <- newValue[oldValue == i] 29 | 30 | return(newVec) 31 | } 32 | -------------------------------------------------------------------------------- /R/mergeBiopax.R: -------------------------------------------------------------------------------- 1 | #' Merges two BioPAX OWL files 2 | #' 3 | #' This function merges two BioPAX OWL files 4 | #' 5 | #' @param inputFile1 a string of the name of the input BioPAX OWL file 6 | #' @param inputFile2 a string of the name of the input BioPAX OWL file 7 | #' @param outputFile a string of the name of the output merged BioPAX 8 | #' OWL file (Optional) 9 | #' @return an XMLInternalDocument representing a BioPAX OWL file 10 | #' 11 | #' @details Only entities that share IDs will be merged. No additional merging 12 | #' occurs on cross-references. Merging may result in warning messages caused 13 | #' as a result of redundant actions being checked against by the Java library; 14 | #' these messages may be ignored. 15 | #' 16 | #' @examples 17 | #' outFile <- tempfile() 18 | #' results <- mergeBiopax(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", 19 | #' package="paxtoolsr"), 20 | #' system.file("extdata", "dna_replication.owl", 21 | #' package="paxtoolsr"), 22 | #' outFile) 23 | #' 24 | #' @concept paxtoolsr 25 | #' @export 26 | mergeBiopax <- function(inputFile1, inputFile2, outputFile=NULL) { 27 | inputFile1 <- checkInputFile(inputFile1) 28 | inputFile2 <- checkInputFile(inputFile2) 29 | outputFile <- checkOutputFile(outputFile) 30 | 31 | command <- "merge" 32 | commandJStr <- .jnew("java/lang/String", command) 33 | file1JStr <- .jnew("java/lang/String", inputFile1) 34 | file2JStr <- .jnew("java/lang/String", inputFile2) 35 | 36 | outputJStr <- .jnew("java/lang/String", outputFile) 37 | 38 | argsList <- list(commandJStr, file1JStr, file2JStr, outputJStr) 39 | 40 | .jcall("org/biopax/paxtools/PaxtoolsMain","V",command,.jarray(argsList, "java/lang/String")) 41 | .jcheck() 42 | 43 | results <- xmlTreeParse(outputFile, useInternalNodes=TRUE) 44 | return(results) 45 | } 46 | -------------------------------------------------------------------------------- /R/paxtoolsr.R: -------------------------------------------------------------------------------- 1 | .packageName <- "paxtoolsr" 2 | 3 | #' @import rJava 4 | #' @import XML 5 | #' @importFrom rappdirs user_cache_dir 6 | .onLoad <- function(lib, pkg){ 7 | # Set Pathway Commons version 8 | options(pc.version="12") 9 | Sys.setenv("PC_URL" = "http://www.pathwaycommons.org/pc2/") 10 | Sys.setenv("PC_ARCHIVE_URL" = "http://www.pathwaycommons.org/archives/PC2/") 11 | Sys.setenv("SIGNED_PC_URL"="http://tmp.lunean.com/") 12 | Sys.setenv("SIGNED_PC_FILE"="SignedPC_20160511.sif.gz") 13 | 14 | # Create cache directory in user home directory 15 | # NOTE: This is no longer backwards compatible with < 4.0.0 16 | cacheDir <- rappdirs::user_cache_dir("paxtoolsr") 17 | Sys.setenv("PAXTOOLSR_CACHE" = cacheDir) 18 | 19 | cacheMap <- file.path(cacheDir, "cacheMap.txt") 20 | 21 | if(!dir.exists(Sys.getenv("PAXTOOLSR_CACHE"))) { 22 | dir.create(file.path(cacheDir), recursive=TRUE) 23 | stopifnot(dir.exists(cacheDir)) 24 | } 25 | 26 | if(!dir.exists(cacheDir) || Sys.getenv("PAXTOOLSR_CACHE") != cacheDir) { 27 | stop(paste0("cacheDir does not exist: "), cacheDir) 28 | } 29 | 30 | #Add cacheMap.txt 31 | if(!file.exists(cacheMap)) { 32 | tmp <- data.frame(fileName=character(), 33 | retrievedDate=character(), 34 | url=character(), 35 | stringsAsFactors=FALSE) 36 | 37 | write.table(tmp, file=file.path(cacheDir, "cacheMap.txt"), 38 | quote=FALSE, sep="\t", col.names=TRUE, row.names=FALSE) 39 | } 40 | 41 | dlp <- Sys.getenv("DYLD_LIBRARY_PATH") 42 | if (dlp != "") { # for Mac OS X we need to remove X11 from lib-path 43 | Sys.setenv("DYLD_LIBRARY_PATH"=sub("/usr/X11R6/lib","",dlp)) 44 | } 45 | 46 | #jar.paxtools <- paste(lib, pkg, "java", "paxtools-jar-with-dependencies.jar", 47 | # sep=.Platform$file.sep) 48 | #.jinit(classpath=c(jar.paxtools)) 49 | #.jpackage(pkg, jars=c("paxtools-jar-with-dependencies.jar")) 50 | jars <- list.files(path=paste(lib, pkg, "java", sep=.Platform$file.sep), 51 | pattern="jar$", full.names=TRUE) 52 | 53 | #.jaddClassPath(jars) 54 | #.jpackage(pkg, jars=jars) 55 | .jpackage(pkg, jars=c("paxtools-4.3.1.jar")) 56 | #.jpackage(pkg, lib) 57 | #print(.jclassPath()) 58 | 59 | #DEBUG 60 | #packageStartupMessage(paste("paxtoolsr loaded. The classpath is: ", 61 | #paste(.jclassPath(), collapse=" " ))) 62 | 63 | # Taken from xlsxjars packages 64 | # What's your java version? Need >= 1.5.0. 65 | jversion <- .jcall('java.lang.System','S','getProperty','java.version') 66 | if (jversion < "1.5.0") { 67 | stop(paste("Your java version is ", jversion, ". Need 1.5.0 or higher.", 68 | sep="")) 69 | } 70 | } 71 | 72 | .onAttach <- function(libname, pkgname){ 73 | # JAVA ---- 74 | ## Check if Java exists 75 | # check_java <- system('which java', intern=TRUE) 76 | # 77 | # if(identical(check_java, character(0))) { 78 | # startupMsg <- "ERROR: Java not found" 79 | # } else { 80 | # checkJavaVersion <- system('java -version 2>&1 >/dev/null', intern=TRUE) 81 | # startupMsg <- paste0("MSG: Java found: ", checkJavaVersion[1], "\n") 82 | # } 83 | 84 | startupMsg <- paste0('Consider citing this package: Luna A, et al. PaxtoolsR: pathway analysis in R using Pathway Commons. PMID: 26685306; citation("paxtoolsr")') 85 | 86 | packageStartupMessage(startupMsg) 87 | } 88 | 89 | #jar.paxtools <- "lib/paxtools-4.2.1.jar" 90 | #jar.paxtools <- "lib/paxtools-jar-with-dependencies.jar" 91 | #.jinit(classpath=c(jar.paxtools)) 92 | -------------------------------------------------------------------------------- /R/pcDirections.R: -------------------------------------------------------------------------------- 1 | #' Acceptable Pathway Commons Directions 2 | #' 3 | #' A simple function to see valid options 4 | #' 5 | #' @return acceptable Pathway Commons directions 6 | #' 7 | #' @details 8 | #' 9 | #' \itemize{ 10 | #' \item BOTHSTREAM where the current entity can either be the source or 11 | #' target of an interaction 12 | #' \item DOWNSTREAM where the current entity can only be the source 13 | #' \item UPSTREAM where the current entity can only be the target 14 | #' } 15 | #' 16 | #' @examples 17 | #' pcDirections() 18 | #' 19 | #' @concept paxtoolsr 20 | #' @export 21 | pcDirections <- function() { 22 | pcDirections <- c("BOTHSTREAM", "DOWNSTREAM", "UPSTREAM") 23 | 24 | return(pcDirections) 25 | } 26 | -------------------------------------------------------------------------------- /R/pcFormats.R: -------------------------------------------------------------------------------- 1 | #' Acceptable Pathway Commons Formats 2 | #' 3 | #' A simple function to see valid options 4 | #' 5 | #' @return acceptable Pathway Commons formats 6 | #' 7 | #' @details See references. 8 | #' 9 | #' @references Output Formats Description: \url{http://www.pathwaycommons.org/pc2/help/formats.html} 10 | #' 11 | #' @examples 12 | #' pcFormats() 13 | #' 14 | #' @concept paxtoolsr 15 | #' @export 16 | pcFormats <- function() { 17 | #pcFormats <- c("BINARY_SIF", "BIOPAX", "EXTENDED_BINARY_SIF", "GSEA", "SBGN") 18 | pcFormats <- list("BIOPAX"="BioPAX Level 3 RDF/XML Format", 19 | "GSEA"="Gene Set Expression Analysis Format", 20 | "JSONLD"="JSON-LD format", 21 | "SBGN"="Systems Biology Graphical Notation Format", 22 | "SIF"="Simple Binary Interaction Format", 23 | "TXT"="Extended SIF") 24 | 25 | return(pcFormats) 26 | } 27 | -------------------------------------------------------------------------------- /R/pcGraphQueries.R: -------------------------------------------------------------------------------- 1 | #' Acceptable Pathway Commons Graph Queries 2 | #' 3 | #' A simple function to see valid options 4 | #' 5 | #' @return acceptable Pathway Commons graph queries 6 | #' 7 | #' @details 8 | #' 9 | #' \itemize{ 10 | #' \item COMMONSTREAM searches common downstream or common upstream of a 11 | #' specified set of entities based on the given directions within the 12 | #' boundaries of a specified length limit 13 | #' \item NEIGHBORHOOD searches the neighborhood of given source set of nodes 14 | #' \item PATHSBETWEEN finds the paths between specific source set of states or 15 | #' entities within the boundaries of a specified length limit 16 | #' \item PATHSFROMTO finds the paths from a specific source set of states or 17 | #' entities to a specific target set of states or entities within the 18 | #' boundaries of a specified length limit 19 | #' } 20 | #' 21 | #' @examples 22 | #' pcGraphQueries() 23 | #' 24 | #' @concept paxtoolsr 25 | #' @export 26 | pcGraphQueries <- function() { 27 | pcGraphQueries <- c("COMMONSTREAM", "NEIGHBORHOOD", "PATHSBETWEEN", "PATHSFROMTO") 28 | 29 | return(pcGraphQueries) 30 | } 31 | -------------------------------------------------------------------------------- /R/processPcRequest.R: -------------------------------------------------------------------------------- 1 | #' Process Pathway Commons request in various formats 2 | #' 3 | #' @param content a string, content to be processed 4 | #' @param format a string, the type of format 5 | #' @param ... other arguments passed to read* methods for reading different formats 6 | #' 7 | #' @return an R object using one of the read* methods provided in this package 8 | #' corresponding to the format 9 | #' 10 | #' @examples 11 | #' fileName <- system.file("extdata", "test_biopax.owl", package="paxtoolsr") 12 | #' content <- readChar(fileName, file.info(fileName)$size) 13 | #' results <- processPcRequest(content, "BIOPAX") 14 | #' 15 | #' @seealso \code{\link{pcFormats}} 16 | #' 17 | #' @concept paxtoolsr 18 | #' @export 19 | #' 20 | #' @importFrom rjson fromJSON 21 | #' @importFrom XML xmlTreeParse 22 | processPcRequest <- function(content, format, ...) { 23 | if(format == "JSON") { 24 | results <- fromJSON(content) 25 | return(results) 26 | } else if(format == "XML") { 27 | results <- xmlTreeParse(content, useInternalNodes=TRUE) 28 | return(results) 29 | } 30 | 31 | filename <- tempfile() 32 | write(content, file=filename) 33 | stopifnot(file.info(filename)$size > 0) 34 | 35 | #DEBUG 36 | #cat("FILENAME: ", filename, "\n") 37 | 38 | if(format %in% c("TXT", "EXTENDED_BINARY_SIF")) { 39 | results <- readSifnx(filename, ...) 40 | } else if(format %in% c("SIF", "BINARY_SIF")) { 41 | results <- readSif(filename, ...) 42 | } else if(format == "BIOPAX") { 43 | results <- readBiopax(filename, ...) 44 | } else if(format == "SBGN") { 45 | results <- readSbgn(filename, ...) 46 | } else if(format == "GSEA") { 47 | results <- readGmt(filename, ...) 48 | } else { 49 | results <- content 50 | } 51 | 52 | return(results) 53 | } 54 | -------------------------------------------------------------------------------- /R/readBiopax.R: -------------------------------------------------------------------------------- 1 | #' Read BioPAX files as XML documents 2 | #' 3 | #' @param inputFile an inputFile 4 | #' @return an XMLInternalDocument 5 | #' 6 | #' @examples 7 | #' results <- readBiopax(system.file("extdata", "biopax3-short-metabolic-pathway.owl", 8 | #' package="paxtoolsr")) 9 | #' 10 | #' @concept paxtoolsr 11 | #' @export 12 | readBiopax <- function(inputFile) { 13 | checkInputFilePc(inputFile) 14 | 15 | results <- xmlTreeParse(inputFile, useInternalNodes=TRUE) 16 | return(results) 17 | } 18 | -------------------------------------------------------------------------------- /R/readGmt.R: -------------------------------------------------------------------------------- 1 | #' Read in gene sets from GMT files 2 | #' 3 | #' This function will read in gene sets in the GMT format into a named list. 4 | #' 5 | #' @param inputFile an inputFile 6 | #' @param removePrefix Pathway Commons genesets are prefixed with a NCBI organism taxonomy number (e.g. 9606 for humans); this is a boolean whether to remove the prefix (default: FALSE) 7 | #' @param returnInfo a boolean whether to return information on genesets; these results are returned a list of two items: 1) basic GMT results and 2) datasource, organism, and id type information for each gene set (default: FALSE) 8 | #' 9 | #' @return a named list where each entry corresponds to a gene set or a list described in the returnInfo parameter 10 | #' 11 | #' @examples 12 | #' f1 <- system.file("extdata", "test_PathwayCommons12.kegg.hgnc.gmt", 13 | #' package="paxtoolsr") 14 | #' f2 <- system.file("extdata", "test_PathwayCommons12.netpath.hgnc.gmt", 15 | #' package="paxtoolsr") 16 | #' 17 | #' results <- readGmt(f1) 18 | #' results <- readGmt(f2) 19 | #' results <- readGmt(f1, removePrefix=TRUE) 20 | #' results <- readGmt(f2, returnInfo=TRUE) 21 | #' 22 | #' @concept paxtoolsr 23 | #' @importFrom methods is 24 | #' @export 25 | readGmt <- function(inputFile, removePrefix=FALSE, returnInfo=FALSE) { 26 | checkInputFilePc(inputFile) 27 | 28 | fileContents <- readLines(inputFile) 29 | 30 | # NOTE: Removing empty strings is necessary 31 | fileContents <- fileContents[fileContents != ""] 32 | 33 | # Extract file contents and make a list of vectors 34 | tmpResults <- sapply(fileContents, function(x) { 35 | tmp <- unlist(strsplit(x, "\t", fixed = TRUE)) 36 | }) 37 | 38 | if(is(tmpResults, "matrix")) { 39 | t2 <- tmpResults 40 | tmpResults <- list() 41 | tmpResults[[t2[1]]] <- as.vector(t2) 42 | } 43 | 44 | # Extract the URI as the name for the geneset 45 | names(tmpResults) <- sapply(tmpResults, function(x) { 46 | if(removePrefix) { 47 | t1 <- strsplit(x, ": ")[[1]] 48 | t2 <- paste(t1[2:length(t1)], collapse = ": ") 49 | results <- trimws(t2) 50 | } else { 51 | results <- x[1] 52 | } 53 | 54 | return(results) 55 | }) 56 | 57 | # This splits the name into various elements 58 | if(returnInfo) { 59 | results <- lapply(tmpResults, function(x) { 60 | t1 <- trimws(strsplit(x[2], ";")[[1]]) 61 | tmp <- strsplit(t1, ": ") 62 | 63 | name <- tmp[[1]][2] 64 | dataSource <- tmp[[2]][2] 65 | organism <- tmp[[3]][2] 66 | idType <- tmp[[4]][2] 67 | geneSet <- x[-(1:2)] 68 | 69 | results <- list(geneSet = geneSet, name = name, dataSource = dataSource, organism = organism, idType = idType) 70 | 71 | return(results) 72 | }) 73 | } else { 74 | # Remove the first two entries, i.e. the name and description 75 | results <- lapply(tmpResults, function(x) x[-(1:2)]) 76 | } 77 | 78 | return(results) 79 | } 80 | -------------------------------------------------------------------------------- /R/readPcPathwaysInfo.R: -------------------------------------------------------------------------------- 1 | #' Read in Pathway Commons Pathways Information 2 | #' 3 | #' @param inputFile an inputFile; if NULL then retrieve the current pathways.txt; see details (default: NULL) 4 | #' @param version a version number for a previous version of Pathway Commons data; 5 | #' versions 3 and above. Parameter set as version="8". Available versions "http://www.pathwaycommons.org/archives/PC2/" 6 | #' 7 | #' @return a data.frame 8 | #' 9 | #' @details This file is generally found as pathways.txt.gz (e.g. 10 | #' http://www.pathwaycommons.org/archives/PC2/current/pathways.txt.gz) 11 | #' 12 | #' @examples 13 | #' inputFile <- system.file("extdata", "pathways.txt.gz", package="paxtoolsr") 14 | #' results <- readPcPathwaysInfo(inputFile, version="8") 15 | #' 16 | #' @concept paxtoolsr 17 | #' @export 18 | #' @importFrom utils read.table 19 | readPcPathwaysInfo <- function(inputFile=NULL, version=NULL) { 20 | if(is.null(inputFile) && is.null(version)) { 21 | stop("ERROR: Either inputFile or version must be specified") 22 | } 23 | 24 | if(is.null(inputFile) && !is.null(version)) { 25 | url <- paste0("http://www.pathwaycommons.org/archives/PC2/v", version, "/") 26 | fileName <- "pathways.txt.gz" 27 | 28 | downloadFile(url, fileName) 29 | 30 | inputFile <- file.path(Sys.getenv("PAXTOOLSR_CACHE"), fileName) 31 | } 32 | 33 | if(!file.exists(inputFile)) { 34 | stop("ERROR: inputFile was not found") 35 | } 36 | 37 | pathwayChildrenFile <- tempfile("pathwayChildren", fileext=".txt") 38 | pathwayInfoFile <- tempfile("pathwayInfo", fileext=".txt") 39 | 40 | # Open file connections 41 | pathwayChildrenCon <- file(pathwayChildrenFile, "w") 42 | pathwayInfoCon <- file(pathwayInfoFile, "w") 43 | 44 | if(grepl(".gz$", inputFile)) { 45 | con <- gzfile(inputFile) 46 | } else { 47 | con <- file(inputFile) 48 | } 49 | 50 | newLineFlag <- FALSE 51 | 52 | # Read single lines 53 | lineTmp <- readLines(con, warn=FALSE) 54 | 55 | # Skip first line because it is empty 56 | for (i in seq_along(lineTmp)) { 57 | line <- lineTmp[i] 58 | 59 | if(grepl("^$", line)) { 60 | newLineFlag <- TRUE 61 | next 62 | } 63 | 64 | if(!newLineFlag) { 65 | writeLines(line, pathwayChildrenCon) 66 | } else { 67 | writeLines(line, pathwayInfoCon) 68 | } 69 | } 70 | 71 | close(pathwayChildrenCon) 72 | close(pathwayInfoCon) 73 | close(con) 74 | 75 | pathwayChildren <- read.table(pathwayChildrenFile, header=TRUE, sep="\t", quote="", 76 | stringsAsFactors=FALSE, fill=TRUE) 77 | pathwayInfo <- read.table(pathwayInfoFile, header=TRUE, sep="\t", quote="", 78 | stringsAsFactors=FALSE, fill=TRUE) 79 | 80 | tmpResults <- merge(pathwayChildren, pathwayInfo, by=c("PATHWAY_URI", "DISPLAY_NAME")) 81 | 82 | tmpResults$DIRECT_SUB_PATHWAY_URIS <- I(strsplit(tmpResults$DIRECT_SUB_PATHWAY_URIS, ";")) 83 | tmpResults$ALL_SUB_PATHWAY_URIS <- I(strsplit(tmpResults$ALL_SUB_PATHWAY_URIS, ";")) 84 | tmpResults$ALL_NAMES <- I(strsplit(tmpResults$ALL_NAMES, ";")) 85 | 86 | tmpResults$DATASOURCE <- tolower(tmpResults$DATASOURCE) 87 | 88 | # Add a column that has all the sub-pathway names 89 | pathwayNames <- list() 90 | 91 | for(i in 1:nrow(tmpResults)) { 92 | t1 <- tmpResults[i, "ALL_SUB_PATHWAY_URIS"][[1]] 93 | 94 | subPathwayNames <- NULL 95 | 96 | for(j in 1:length(t1)) { 97 | idx <- which(tmpResults$PATHWAY_URI == t1[j]) 98 | subPathwayNames <- c(subPathwayNames, tmpResults$DISPLAY_NAME[idx]) 99 | } 100 | 101 | pathwayNames[[i]] <- subPathwayNames 102 | } 103 | 104 | results <- data.frame(tmpResults, ALL_SUB_PATHWAY_NAMES=I(pathwayNames), stringsAsFactors = FALSE) 105 | 106 | return(results) 107 | } 108 | -------------------------------------------------------------------------------- /R/readSbgn.R: -------------------------------------------------------------------------------- 1 | #' Read SBGN files as XML documents 2 | #' 3 | #' @param inputFile an inputFile 4 | #' @return an XMLInternalDocument 5 | #' 6 | #' @examples 7 | #' results <- readSbgn(system.file("extdata", "test_sbgn.xml", package="paxtoolsr")) 8 | #' 9 | #' @concept paxtoolsr 10 | #' @export 11 | readSbgn <- function(inputFile) { 12 | checkInputFilePc(inputFile) 13 | 14 | results <- xmlTreeParse(inputFile, useInternalNodes=TRUE) 15 | return(results) 16 | } 17 | -------------------------------------------------------------------------------- /R/readSif.R: -------------------------------------------------------------------------------- 1 | #' Read in a binary SIF file 2 | #' 3 | #' @param inputFile an inputFile 4 | #' @return a data.frame with the interactions in the binary SIF format 5 | #' 6 | #' @examples 7 | #' results <- readSif(system.file("extdata", "test_sif.txt", package="paxtoolsr")) 8 | #' 9 | #' @concept paxtoolsr 10 | #' @export 11 | #' 12 | #' @importFrom readr read_tsv cols 13 | readSif <- function(inputFile) { 14 | checkInputFilePc(inputFile) 15 | 16 | results <- read_tsv(inputFile, 17 | progress = TRUE, 18 | col_names = c("PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B"), 19 | col_types = cols(.default = "c")) 20 | 21 | results <- as.data.frame(results) 22 | #colnames(results) <- c("PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B") 23 | 24 | return(results) 25 | } 26 | -------------------------------------------------------------------------------- /R/readSifnx.R: -------------------------------------------------------------------------------- 1 | #' Read in a Extended SIF file 2 | #' 3 | #' @param inputFile an inputFile 4 | #' 5 | #' @return a list with nodes and edges entries 6 | #' 7 | #' @details SIFNX files from Pathway Commons commonly come a single file that 8 | #' includes a tab-delimited sections for nodes and another for edges. The 9 | #' sections are separated by an empty lines. These sections must be split before 10 | #' they are read. 11 | #' 12 | #' @examples 13 | #' results <- readSifnx(system.file("extdata", "test_sifnx.txt", package="paxtoolsr")) 14 | #' 15 | #' @concept paxtoolsr 16 | #' @export 17 | #' 18 | #' @importFrom readr read_tsv cols 19 | readSifnx <- function(inputFile) { 20 | checkInputFilePc(inputFile) 21 | 22 | tmp <- readChar(inputFile, nchars=file.info(inputFile)$size, useBytes=TRUE) 23 | 24 | edgesFile <- tempfile() 25 | nodesFile <- tempfile() 26 | 27 | idx <- gregexpr("PARTICIPANT\tPARTICIPANT_TYPE", tmp, fixed = TRUE) 28 | tmpEdges <- substr(tmp, 1, idx[[1]][1]-1) 29 | writeChar(tmpEdges, edgesFile) 30 | 31 | tmpNodes <- substr(tmp, idx[[1]][1], nchar(tmp)) 32 | writeChar(tmpNodes, nodesFile) 33 | 34 | nodes <- suppressWarnings(read_tsv(nodesFile, 35 | progress = TRUE, 36 | col_names = TRUE, 37 | col_types = cols(.default = "c"))) 38 | 39 | edges <- suppressWarnings(read_tsv(edgesFile, 40 | progress = TRUE, 41 | col_names = TRUE, 42 | col_types = cols(.default = "c"))) 43 | 44 | results <- list(nodes=nodes, edges=edges) 45 | 46 | return(results) 47 | 48 | # # EDGES 49 | # edgesInteractionDataSource <- strsplit(as.character(edges$INTERACTION_DATA_SOURCE), ";") 50 | # edgesPubmedId <- strsplit(as.character(edges$INTERACTION_PUBMED_ID), ";") 51 | # edgesPathwayNames <- strsplit(as.character(edges$PATHWAY_NAMES), ";") 52 | # 53 | # # NODES 54 | # nodesUniXref <- strsplit(as.character(nodes$UNIFICATION_XREF), ";") 55 | # names(nodesUniXref) <- nodes$PARTICIPANT 56 | # 57 | # nodesRelXref <- strsplit(as.character(nodes$RELATIONSHIP_XREF), ";") 58 | # names(nodesRelXref) <- nodes$PARTICIPANT 59 | # 60 | # nodesType <- nodes$PARTICIPANT_TYPE 61 | # names(nodesType) <- nodes$PARTICIPANT 62 | # 63 | # nodesName <- strsplit(as.character(nodes$PARTICIPANT_NAME), ";") 64 | # names(nodesName) <- nodes$PARTICIPANT 65 | } 66 | -------------------------------------------------------------------------------- /R/searchListOfVectors.R: -------------------------------------------------------------------------------- 1 | #' Search List of Vectors 2 | #' 3 | #' @param q query vector 4 | #' @param lst list of vectors to search 5 | #' 6 | #' @return a list of vectors with the same length as the query vector, each list 7 | #' entry will have indicies for lst where there was a match with the query 8 | #' vector. Return NA if there were no matches. 9 | #' 10 | #' @details 11 | #' Taken from: http://stackoverflow.com/questions/11002391/fast-way-of-getting-index-of-match-in-list 12 | #' 13 | #' @examples 14 | #' lst <- list(1:3, 3:5, 3:7) 15 | #' q <- c(3, 5) 16 | #' results <- searchListOfVectors(q, lst) 17 | #' names(results) <- q 18 | #' 19 | #' lst <- list(LETTERS[1:3], LETTERS[3:5], LETTERS[3:7]) 20 | #' q <- c("C", "E") 21 | #' searchListOfVectors(q, lst) 22 | #' 23 | #' lst <- list(LETTERS[3], LETTERS[4:6]) 24 | #' q <- "C" 25 | #' searchListOfVectors(q, lst) 26 | #' 27 | #' lst <- list(LETTERS[3], LETTERS[4:6]) 28 | #' q <- c("C") 29 | #' searchListOfVectors(q, lst) 30 | #' 31 | #' lst <- list(LETTERS[3], LETTERS[4:6]) 32 | #' q <- c("C", "E") 33 | #' searchListOfVectors(q, lst) 34 | #' 35 | #' lst <- list(LETTERS[3], LETTERS[4:6]) 36 | #' q <- "Z" 37 | #' searchListOfVectors(q, lst) 38 | #' 39 | #' @concept paxtoolsr 40 | #' @export 41 | searchListOfVectors <- function(q, lst) { 42 | tmp <- rep(seq_along(lst), sapply(lst, length)) 43 | resultsSe <- sapply(q, function(x) tmp[which(unlist(lst) %in% x)], simplify=FALSE) 44 | 45 | if(!is(resultsSe, "list")) { 46 | return(NA) 47 | } 48 | 49 | return(resultsSe) 50 | } 51 | -------------------------------------------------------------------------------- /R/searchPc.R: -------------------------------------------------------------------------------- 1 | #' Search Pathway Commons 2 | #' 3 | #' This command provides a text search using the Lucene query syntax. 4 | #' 5 | #' @param q a keyword, name, external identifier, or a Lucene query string. 6 | #' @param page an integer giving the search result page number (N>=0, default: 7 | #' 0) 8 | #' @param datasource a vector that is a filter by data source (use names or URIs 9 | #' of pathway data sources or of any existing Provenance object). If multiple 10 | #' data source values are specified, a union of hits from specified sources is 11 | #' returned. For example, datasource as c("reactome", "pid") returns hits 12 | #' associated with Reactome or PID. 13 | #' @param organism a vector that is an organism filter. The organism can be 14 | #' specified either by official name, e.g. "homo sapiens" or by NCBI taxonomy 15 | #' id, e.g. "9606". Similar to data sources, if multiple organisms are 16 | #' declared a union of all hits from specified organisms is returned. For 17 | #' example organism as c("9606", "10016") returns results for both human and 18 | #' mice. Only humans, "9606" is officially supported. 19 | #' @param type BioPAX class filter. See Details. 20 | #' @param verbose a boolean, display the command used to query Pathway Commons 21 | #' @return an XMLInternalDocument with results 22 | #' 23 | #' @details Indexed fields were selected based on most common searches. Some of 24 | #' these fields are direct BioPAX properties, others are composite 25 | #' relationships. All index fields are (case-sensitive):comment, ecnumber, 26 | #' keyword, name, pathway, term, xrefdb, xrefid, dataSource, and organism. The 27 | #' pathway field maps to all participants of pathways that contain the 28 | #' keyword(s) in any of its text fields. This field is transitive in the sense 29 | #' that participants of all sub-pathways are also returned. Finally, keyword is 30 | #' a transitive aggregate field that includes all searchable keywords of that 31 | #' element and its child elements - e.g. a complex would be returned by a 32 | #' keyword search if one of its members has a match. Keyword is the default 33 | #' field type. All searches can also be filtered by data source and organism. It 34 | #' is also possible to restrict the domain class using the 'type' parameter. 35 | #' This query can be used standalone or to retrieve starting points for graph 36 | #' searches. Search strings are case insensitive unless put inside quotes. 37 | #' 38 | #' BioPAX classes can be found at \url{http://www.pathwaycommons.org/pc2/#biopax_types} 39 | #' 40 | #' @examples 41 | #' query <- "Q06609" 42 | #' #results <- searchPc(query) 43 | #' 44 | #' query <- "glycolysis" 45 | #' #results <- searchPc(query, type="Pathway") 46 | #' 47 | #' @concept paxtoolsr 48 | #' @export 49 | searchPc <- function(q, page=0, datasource=NULL, organism=NULL, type=NULL, 50 | verbose=FALSE) { 51 | baseUrl <- paste0(getPcUrl(), "search.xml") 52 | 53 | qList <- list(q=q) 54 | pageList <- list(page=page) 55 | 56 | datasourceList <- NULL 57 | if(!is.null(datasource)) { 58 | # Put into the correct format 59 | #datasources <- paste(paste0("datasource=", datasource), collapse="&") 60 | #url <- paste(url, "&", datasources, sep="") 61 | 62 | datasourceList <- lapply(datasource, function(x) { x }) 63 | names(datasourceList) <- rep("datasource", length(datasourceList)) 64 | } 65 | 66 | organismList <- NULL 67 | if(!is.null(organism)) { 68 | #organisms <- paste(paste0("organism=", organism), collapse="&") 69 | #url <- paste(url, "&", organisms, sep="") 70 | 71 | organismList <- lapply(organism, function(x) { x }) 72 | names(organismList) <- rep("organism", length(organismList)) 73 | } 74 | 75 | typeList <- NULL 76 | if(!is.null(type)) { 77 | #url <- paste(url, "&type=", type, sep="") 78 | typeList <- list(type=type) 79 | } 80 | 81 | queryList <- c(qList, pageList, datasourceList, organismList, typeList) 82 | 83 | tmpUrl <- parse_url(baseUrl) 84 | tmpUrl$query <- queryList 85 | url <- build_url(tmpUrl) 86 | 87 | tmp <- getPcRequest(url, verbose) 88 | results <- processPcRequest(tmp, "XML") 89 | 90 | return(results) 91 | } 92 | -------------------------------------------------------------------------------- /R/splitSifnxByPathway.R: -------------------------------------------------------------------------------- 1 | #' Splits SIFNX entries into individual pathways 2 | #' 3 | #' @param edges a data.frame with SIF content with the additional column "PATHWAY_NAMES". 4 | #' "PATHWAY_NAMES" should include pathway names delimited with a semi-colon: ";". 5 | #' @param parallel a boolean that will parallelize the process; requires foreach/doSNOW/parallel packages 6 | #' @return a list of where each entry is a vector of row indicies for a given pathway 7 | #' 8 | #' @details 9 | #' This method can be slow; ~1.5 minutes for 150K+ rows. 10 | #' Has a parallelized method to speed things up. 11 | #' 12 | #' @concept paxtoolsr 13 | #' @export 14 | #' @importFrom utils txtProgressBar setTxtProgressBar 15 | splitSifnxByPathway <- function(edges, parallel=FALSE) { 16 | stopifnot("PATHWAY_NAMES" %in% colnames(edges)) 17 | 18 | #tmp <- strsplit(edges$PATHWAY_NAMES, ";", fixed=TRUE) 19 | tmp <- edges$PATHWAY_NAMES 20 | tmp2 <- unique(tmp) 21 | pathwayNames <- unique(unlist(tmp2)) 22 | 23 | iterations <- length(pathwayNames) 24 | cat("NUMBER OF PATHWAYS: ", iterations, "\n") 25 | 26 | # Make sure the necessary packages are available 27 | if(parallel) { 28 | numCores <- parallel::detectCores() 29 | cl <- parallel::makeCluster(numCores, outfile="") # number of cores. Notice 'outfile' 30 | doSNOW::registerDoSNOW(cl) 31 | 32 | pb <- txtProgressBar(min = 1, max = iterations, style = 3) 33 | 34 | results <- foreach::foreach(i=1:iterations, .packages=c("paxtoolsr")) %dopar% { 35 | setTxtProgressBar(pb, i) 36 | pathwayName <- pathwayNames[i] 37 | 38 | tmpResults <- searchListOfVectors(pathwayName, tmp) 39 | return(as.vector(tmpResults)) 40 | } 41 | 42 | names(results) <- pathwayNames 43 | 44 | close(pb) 45 | parallel::stopCluster(cl) 46 | } else { 47 | if(iterations > 1) { 48 | pb <- txtProgressBar(min = 1, max = iterations, style = 3) 49 | } 50 | 51 | results <- list() 52 | 53 | for(i in 1:iterations) { 54 | if(iterations > 1) { 55 | setTxtProgressBar(pb, i) 56 | } 57 | 58 | pathwayName <- pathwayNames[i] 59 | 60 | tmpResults <- searchListOfVectors(pathwayName, tmp) 61 | results[[pathwayName]] <- unname(unlist(tmpResults)) 62 | } 63 | } 64 | 65 | return(results) 66 | } 67 | -------------------------------------------------------------------------------- /R/summarize.R: -------------------------------------------------------------------------------- 1 | #' Summarize a BioPAX file 2 | #' 3 | #' This function provides a summary of BioPAX classes. 4 | #' 5 | #' @param inputFile a string of the name of the input BioPAX OWL file 6 | #' @return list with BioPAX class counts 7 | #' 8 | #' @details BioPAX classes are defined by the BioPAX specification: 9 | #' \url{http://www.biopax.org/} 10 | #' 11 | #' @examples 12 | #' summary <- summarize(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", 13 | #' package="paxtoolsr")) 14 | #' 15 | #' @concept paxtoolsr 16 | #' @export 17 | summarize <- function(inputFile) { 18 | inputFile <- checkInputFile(inputFile) 19 | outputFile <- tempfile() 20 | 21 | command <- "summarize" 22 | commandJStr <- .jnew("java/lang/String", command) 23 | inputJStr <- .jnew("java/lang/String", inputFile) 24 | outputJStr <- .jnew("java/lang/String", outputFile) 25 | 26 | argsList <- list(commandJStr, inputJStr, outputJStr) 27 | 28 | .jcall("org/biopax/paxtools/PaxtoolsMain","V",command,.jarray(argsList, "java/lang/String")) 29 | .jcheck() 30 | 31 | # Make a vector with each line text as a vector entry 32 | lines <- readLines(outputFile) 33 | 34 | results <- list() 35 | 36 | for(line in lines) { 37 | 38 | # Makes sure a line matches 39 | if(grepl("^[A-Za-z]+\\s=\\s\\d+", line)) { 40 | 41 | # Removes any characters at the end of the line that probably have parentheses 42 | tmp <- gsub("^([A-Za-z]+)\\s=\\s(\\d+).*", "\\1=\\2", line) 43 | 44 | # Produces a vector with two entries 45 | tmp2 <- strsplit(tmp, "=")[[1]] 46 | results[[tmp2[1]]] <- tmp2[2] 47 | } 48 | } 49 | 50 | return(results) 51 | } 52 | -------------------------------------------------------------------------------- /R/summarizeSif.R: -------------------------------------------------------------------------------- 1 | #' Summarize a SIF Network 2 | #' 3 | #' @param sif a binary SIF as a data.frame with three columns: 4 | #' "PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B" 5 | #' @return a list containing a count of the unique genes in the SIF and counts for the interaction types in the network 6 | #' 7 | #' @examples 8 | #' results <- readSif(system.file("extdata", "test_sif.txt", package="paxtoolsr")) 9 | #' summarizeSif(results) 10 | #' 11 | #' @concept paxtoolsr 12 | #' @export 13 | summarizeSif <- function(sif) { 14 | uniqueNodes <- length(unique(c(sif[,1], sif[,3]))) 15 | interactionTypeFreq <- table(sif[,2]) 16 | 17 | results <- list(uniqueNodes=uniqueNodes, totalInteractions=nrow(sif), interactionTypeFreq=interactionTypeFreq) 18 | 19 | return(results) 20 | } 21 | -------------------------------------------------------------------------------- /R/toCytoscape.R: -------------------------------------------------------------------------------- 1 | #' Convert igraph to Cytoscape JSON 2 | #' 3 | #' @param igraphobj an igraph object 4 | #' 5 | #' @note From https://github.com/idekerlab/cy-rest-R/blob/17f748426bb5e48ba4075b9d97318ad582b250da/utility/cytoscape_util.R 6 | #' 7 | #' @return a JSON object 8 | #' 9 | #' @examples 10 | #' library(igraph) 11 | #' g <- barabasi.game(20) 12 | #' json <- toCytoscape(g) 13 | #' 14 | #' @concept paxtoolsr 15 | #' @export 16 | #' 17 | #' @importFrom jsonlite toJSON 18 | #' @importFrom igraph graph.attributes list.vertex.attributes vertex.attributes V get.edgelist ecount edge.attributes 19 | toCytoscape <- function (igraphobj) { 20 | # Extract graph attributes 21 | graph_attr = graph.attributes(igraphobj) 22 | 23 | # Extract nodes 24 | node_count = length(V(igraphobj)) 25 | if('name' %in% list.vertex.attributes(igraphobj)) { 26 | V(igraphobj)$id <- V(igraphobj)$name 27 | } else { 28 | V(igraphobj)$id <- as.character(c(1:node_count)) 29 | } 30 | 31 | nodes <- V(igraphobj) 32 | v_attr = vertex.attributes(igraphobj) 33 | v_names = list.vertex.attributes(igraphobj) 34 | 35 | nds <- array(0, dim=c(node_count)) 36 | for(i in 1:node_count) { 37 | if(i %% 1000 == 0) { 38 | print(i) 39 | } 40 | nds[[i]] = list(data = mapAttributes(v_names, v_attr, i)) 41 | } 42 | 43 | edges <- get.edgelist(igraphobj) 44 | edge_count = ecount(igraphobj) 45 | e_attr <- edge.attributes(igraphobj) 46 | e_names = list.edge.attributes(igraphobj) 47 | 48 | attr_exists = FALSE 49 | e_names_len = 0 50 | if(identical(e_names, character(0)) == FALSE) { 51 | attr_exists = TRUE 52 | e_names_len = length(e_names) 53 | } 54 | e_names_len <- length(e_names) 55 | 56 | eds <- array(0, dim=c(edge_count)) 57 | for(i in 1:edge_count) { 58 | st = list(source=toString(edges[i,1]), target=toString(edges[i,2])) 59 | 60 | # Extract attributes 61 | if(attr_exists) { 62 | eds[[i]] = list(data=c(st, mapAttributes(e_names, e_attr, i))) 63 | } else { 64 | eds[[i]] = list(data=st) 65 | } 66 | 67 | if(i %% 1000 == 0) { 68 | print(i) 69 | } 70 | } 71 | 72 | el = list(nodes=nds, edges=eds) 73 | 74 | x <- list(data = graph_attr, elements = el) 75 | #print("Done.") 76 | return (toJSON(x, auto_unbox=TRUE, pretty=TRUE)) 77 | } 78 | 79 | #' Map Attributes from igraph to Cytoscape JSON 80 | #' 81 | #' @param attr.names names of attributes 82 | #' @param all.attr all attributes 83 | #' @param i index 84 | #' 85 | #' @note From https://github.com/idekerlab/cy-rest-R/blob/17f748426bb5e48ba4075b9d97318ad582b250da/utility/cytoscape_util.R 86 | #' 87 | #' @return attributes 88 | #' 89 | #' @concept paxtoolsr 90 | #' @export 91 | mapAttributes <- function(attr.names, all.attr, i) { 92 | attr = list() 93 | cur.attr.names = attr.names 94 | attr.names.length = length(attr.names) 95 | 96 | for(j in 1:attr.names.length) { 97 | if(is.na(all.attr[[j]][i]) == FALSE) { 98 | # attr[j] = all.attr[[j]][i] 99 | attr <- c(attr, all.attr[[j]][i]) 100 | } else { 101 | cur.attr.names <- cur.attr.names[cur.attr.names != attr.names[j]] 102 | } 103 | } 104 | names(attr) = cur.attr.names 105 | return (attr) 106 | } 107 | -------------------------------------------------------------------------------- /R/toGSEA.R: -------------------------------------------------------------------------------- 1 | #' Converts a BioPAX OWL file to a GSEA GMT gene set 2 | #' 3 | #' This function converts pathway information stored as BioPAX files into the 4 | #' the GSEA .gmt format. 5 | #' 6 | #' @param inputFile a string of the name of the input OWL file 7 | #' @param outputFile a string of the name of the output file 8 | #' @param database a string of the name of the identifier type to be included 9 | #' (e.g. "HGNC Symbol") 10 | #' @param crossSpeciesCheckFlag a boolean that ensures participant protein is 11 | #' from same species 12 | #' @return see readGmt() 13 | #' 14 | #' @details The GSEA GMT format is a tab-delimited format where each row 15 | #' represents a gene set. The first column is the gene set name. The second 16 | #' column is a brief description. Other columns for each row contain genes in 17 | #' the gene set; these rows may be of unequal lengths. 18 | #' 19 | #' @examples 20 | #' outFile <- tempfile() 21 | #' results <- toGSEA(system.file("extdata", "biopax3-short-metabolic-pathway.owl", 22 | #' package="paxtoolsr"), 23 | #' outFile, 24 | #' "uniprot", 25 | #' crossSpeciesCheckFlag=TRUE) 26 | #' 27 | #' @concept paxtoolsr 28 | #' @export 29 | toGSEA <- function(inputFile, outputFile=NULL, database="uniprot", crossSpeciesCheckFlag=TRUE) { 30 | inputFile <- checkInputFile(inputFile) 31 | outputFile <- checkOutputFile(outputFile) 32 | 33 | if(crossSpeciesCheckFlag) { 34 | crossSpeciesCheckFlag <- "crossSpeciesCheck" 35 | } else { 36 | crossSpeciesCheckFlag <- "" 37 | } 38 | 39 | command <- "toGSEA" 40 | commandJStr <- .jnew("java/lang/String", command) 41 | inputJStr <- .jnew("java/lang/String", inputFile) 42 | outputJStr <- .jnew("java/lang/String", outputFile) 43 | dbJStr <- .jnew("java/lang/String", database) 44 | flagJStr <- .jnew("java/lang/String", as.character(crossSpeciesCheckFlag)) 45 | 46 | argsList <- list(commandJStr, inputJStr, outputJStr, dbJStr, flagJStr) 47 | 48 | .jcall("org/biopax/paxtools/PaxtoolsMain","V",command,.jarray(argsList, "java/lang/String")) 49 | .jcheck() 50 | 51 | # tmp <- read.table(outputFile, sep="\t", as.is=TRUE, fill=TRUE) 52 | # 53 | # # as.vector(unlist()) to remove column names from tmp 54 | # results <- list(name=tmp[,1], 55 | # description=tmp[,2], 56 | # geneSet=as.vector(unlist(tmp[,3:length(tmp)]))) 57 | 58 | results <- readGmt(outputFile) 59 | 60 | return(results) 61 | } 62 | -------------------------------------------------------------------------------- /R/toLevel3.R: -------------------------------------------------------------------------------- 1 | #' Convert a PSIMI or older BioPAX OWL file to BioPAX Level 3 2 | #' 3 | #' This file will convert PSIMI or older BioPAX objects to BioPAX Level 3 4 | #' 5 | #' @param inputFile a string of the name of the input file 6 | #' @param outputFile a string of the name of the output BioPAX OWL file 7 | #' @return an XMLInternalDocument representing a BioPAX OWL file 8 | #' 9 | #' @examples 10 | #' inputFile <- system.file("extdata", "raf_map_kinase_cascade_reactome.owl", 11 | #' package="paxtoolsr") 12 | #' outFile <- tempfile() 13 | #' results <- toLevel3(inputFile, outFile) 14 | #' 15 | #' @concept paxtoolsr 16 | #' @export 17 | toLevel3 <- function(inputFile, outputFile=NULL) { 18 | inputFile <- checkInputFile(inputFile) 19 | outputFile <- checkOutputFile(outputFile) 20 | 21 | command <- "toLevel3" 22 | 23 | commandJStr <- .jnew("java/lang/String", command) 24 | inputJStr <- .jnew("java/lang/String", inputFile) 25 | outputJStr <- .jnew("java/lang/String", outputFile) 26 | 27 | argsList <- list(commandJStr, inputJStr, outputJStr) 28 | 29 | .jcall("org/biopax/paxtools/PaxtoolsMain","V",command,.jarray(argsList, "java/lang/String")) 30 | .jcheck() 31 | 32 | #results <- xmlTreeParse(outputFile, useInternalNodes=TRUE) 33 | results <- readBiopax(inputFile) 34 | 35 | return(results) 36 | } 37 | -------------------------------------------------------------------------------- /R/toSBGN.R: -------------------------------------------------------------------------------- 1 | #' Convert a BioPAX OWL file to SBGNML 2 | #' 3 | #' This function will convert a BioPAX OWL file into the Systems Biology Graphical 4 | #' Notation (SBGN) Markup Language (SBGNML) XML representation 5 | #' 6 | #' @param inputFile a string of the name of the input BioPAX OWL file 7 | #' @param outputFile a string of the name of the output SBGNML file 8 | #' @return see readSbgn() 9 | #' 10 | #' @details Objects in the SBGNML format are laid out using a Compound Spring 11 | #' Embedder (CoSE) layout 12 | #' 13 | #' @references \url{http://www.cs.bilkent.edu.tr/~ivis/layout/cose-animated-demo/cose.html} 14 | #' 15 | #' @examples 16 | #' outFile <- tempfile() 17 | #' results <- toSBGN(system.file("extdata", "biopax3-short-metabolic-pathway.owl", 18 | #' package="paxtoolsr"), 19 | #' outFile) 20 | #' 21 | #' @concept paxtoolsr 22 | #' @export 23 | toSBGN <- function(inputFile, outputFile=NULL) { 24 | inputFile <- checkInputFile(inputFile) 25 | outputFile <- checkOutputFile(outputFile) 26 | 27 | command <- "toSBGN" 28 | commandJStr <- .jnew("java/lang/String", command) 29 | inputJStr <- .jnew("java/lang/String", inputFile) 30 | outputJStr <- .jnew("java/lang/String", outputFile) 31 | 32 | argsList <- list(commandJStr, inputJStr, outputJStr) 33 | 34 | .jcall("org/biopax/paxtools/PaxtoolsMain","V",command,.jarray(argsList, "java/lang/String")) 35 | .jcheck() 36 | 37 | # results <- xmlTreeParse(outputFile, useInternalNodes=TRUE) 38 | results <- readSbgn(outputFile) 39 | 40 | return(results) 41 | } 42 | -------------------------------------------------------------------------------- /R/toSif.R: -------------------------------------------------------------------------------- 1 | #' Convert a BioPAX OWL file to SIF 2 | #' 3 | #' Convert a BioPAX OWL file to a binary SIF file 4 | #' 5 | #' @param inputFile a string of the name of the input BioPAX OWL file 6 | #' @param outputFile a string of the name of the output SIF file (Optional) 7 | #' @return see readSif() 8 | #' 9 | #' @details Information on SIF conversion is provided on the Pathway Commons 10 | #' site: \url{http://www.pathwaycommons.org/pc2/} 11 | #' 12 | #' @examples 13 | #' outFile <- tempfile() 14 | #' results <- toSif(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", 15 | #' package="paxtoolsr"), 16 | #' outFile) 17 | #' 18 | #' @concept paxtoolsr 19 | #' @export 20 | toSif <- function(inputFile, outputFile=NULL) { 21 | inputFile <- checkInputFile(inputFile) 22 | outputFile <- checkOutputFile(outputFile) 23 | 24 | #DEBUG 25 | #cat("OUTPUTFILE: ", outputFile) 26 | 27 | command <- "toSif" 28 | commandJStr <- .jnew("java/lang/String", command) 29 | inputJStr <- .jnew("java/lang/String", inputFile) 30 | outputJStr <- .jnew("java/lang/String", outputFile) 31 | 32 | argsList <- list(commandJStr, inputJStr, outputJStr) 33 | 34 | .jcall("org/biopax/paxtools/PaxtoolsMain","V",command,.jarray(argsList, "java/lang/String")) 35 | .jcheck() 36 | 37 | # results <- read.table(outputFile, sep="\t", as.is=TRUE, quote="") 38 | # colnames(results) <- c("PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B") 39 | 40 | results <- readSif(outputFile) 41 | 42 | return(results) 43 | } 44 | -------------------------------------------------------------------------------- /R/toSifnx.R: -------------------------------------------------------------------------------- 1 | # toSifnx(" 2 | # \n" + 3 | # converter.writeInteractionsInSIFNX(m, out, out, 4 | # "EntityReference/name,EntityReference/xref" "Interaction/dataSource/displayName" 5 | 6 | #' Converts BioPAX OWL file to extended binary SIF representation 7 | #' 8 | #' @param inputFile a string with the name of the input BioPAX OWL file 9 | #' @param outputFile a string with the name of the output file for SIFNX 10 | #' information 11 | #' @param idType a string either "hgnc" or "uniprot" (DEFAULT: uniprot, more common) 12 | #' 13 | #' @return see readSifnx() 14 | #' 15 | #' @details Information on SIF conversion is provided on the Pathway Commons 16 | #' site: \url{http://www.pathwaycommons.org/pc2/}. Also, this is a Java-based 17 | #' methods, it is best to use full paths. 18 | #' 19 | #' @examples 20 | #' inputFile <- system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr") 21 | #' results <- toSifnx(inputFile=inputFile) 22 | #' 23 | #' @concept paxtoolsr 24 | #' @export 25 | toSifnx <- function(inputFile, outputFile=tempfile(), idType="uniprot") { 26 | inputFile <- checkInputFile(inputFile) 27 | #outputNodesFile <- checkOutputFile(outputNodesFile) 28 | #outputEdgesFile <- checkOutputFile(outputEdgesFile) 29 | outputFile <- checkOutputFile(outputFile) 30 | 31 | #nodePropsCollapsed <- paste(nodeProps, collapse=",") 32 | #edgePropsCollapsed <- paste(edgeProps, collapse=",") 33 | 34 | command <- "toSifnx" 35 | commandJStr <- .jnew("java/lang/String", command) 36 | inputJStr <- .jnew("java/lang/String", inputFile) 37 | 38 | #outputEdgesJStr <- .jnew("java/lang/String", outputEdgesFile) 39 | #outputNodesJStr <- .jnew("java/lang/String", outputNodesFile) 40 | outputJStr <- .jnew("java/lang/String", outputFile) 41 | 42 | #nodePropsJStr <- .jnew("java/lang/String", nodePropsCollapsed) 43 | #edgePropsJStr <- .jnew("java/lang/String", edgePropsCollapsed) 44 | idTypeJStr <- .jnew("java/lang/String", idType) 45 | 46 | #argsList <- list(commandJStr, inputJStr, outputJStr, nodePropsJStr, edgePropsJStr) 47 | argsList <- list(commandJStr, inputJStr, outputJStr, idTypeJStr) 48 | 49 | #DEBUG 50 | #str(argsList) 51 | 52 | .jcall("org/biopax/paxtools/PaxtoolsMain","V",command,.jarray(argsList, "java/lang/String")) 53 | .jcheck() 54 | 55 | # nodes <- read.table(outputNodesFile, sep="\t", as.is=TRUE, quote="", 56 | # fill=TRUE) 57 | # edges <- read.table(outputEdgesFile, sep="\t", as.is=TRUE, quote="", 58 | # fill=TRUE) 59 | # 60 | # colnames(edges) <- c("PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B", edgeProps) 61 | # colnames(nodes) <- c("PARTICIPANT", nodeProps) 62 | # 63 | # results <- list(edges=edges, nodes=nodes) 64 | 65 | results <- readSifnx(outputFile) 66 | 67 | return(results) 68 | } 69 | -------------------------------------------------------------------------------- /R/topPathways.R: -------------------------------------------------------------------------------- 1 | #' Retrieve top pathways 2 | #' 3 | #' This command returns all "top" pathways. 4 | #' 5 | #' @param q [Optional] a keyword, name, external identifier, or a Lucene query string, like in 'search', but the default is '*' (match all). 6 | #' @param datasource filter by data source (same as for 'search'). 7 | #' @param organism organism filter (same as for 'search'). 8 | #' @param verbose a boolean, display the command used to query Pathway Commons 9 | #' @return a data.frame with the following columns: 10 | #' \itemize{ 11 | #' \item uri URI ID for the pathway 12 | #' \item biopaxClass the type of BioPAX object 13 | #' \item name a human readable name 14 | #' \item dataSource the dataSource for the pathway 15 | #' \item organism an organism identifier 16 | #' \item pathway URI ID for the pathway 17 | #' } 18 | #' 19 | #' @details Pathways that are neither 'controlled' nor 'pathwayComponent' of 20 | #' another process. 21 | #' 22 | #' @examples 23 | #' #results <- topPathways(q="TP53", datasource="panther") 24 | #' 25 | #' @concept paxtoolsr 26 | #' @export 27 | #' 28 | #' @importFrom plyr ldply 29 | #' @importFrom httr build_url parse_url 30 | topPathways <- function(q=NULL, datasource=NULL, organism=NULL, verbose=FALSE) { 31 | baseUrl <- paste0(getPcUrl(), "top_pathways") 32 | 33 | stopifnot(!is.null(q)) 34 | qList <- NULL 35 | if(!is.null(q)) { 36 | qList <- list(q=q) 37 | } 38 | 39 | datasourceList <- NULL 40 | if(!is.null(datasource)) { 41 | datasourceList <- list(datasource=datasource) 42 | } 43 | 44 | organismList <- NULL 45 | if(!is.null(organism)) { 46 | organismList <- list(organism=organism) 47 | } 48 | 49 | queryList <- c(qList, datasourceList, organismList) 50 | 51 | tmpUrl <- parse_url(baseUrl) 52 | tmpUrl$query <- queryList 53 | url <- build_url(tmpUrl) 54 | 55 | tmp <- getPcRequest(url, verbose) 56 | results <- processPcRequest(tmp, "XML") 57 | 58 | #DEBUG 59 | #str(results) 60 | #return(results) 61 | 62 | resultsDf <- ldply(xmlToList(results), data.frame, stringsAsFactors=FALSE) 63 | 64 | if("organism" %in% colnames(resultsDf)) { 65 | resultsDf <- resultsDf[,c("uri", "biopaxClass", "name", 66 | "dataSource", "organism")] 67 | } else { 68 | resultsDf <- resultsDf[,c("uri", "biopaxClass", "name", 69 | "dataSource")] 70 | } 71 | 72 | # Remove NAs 73 | resultsDf <- resultsDf[which(!is.na(resultsDf[,"uri"])),] 74 | 75 | return(resultsDf) 76 | } 77 | -------------------------------------------------------------------------------- /R/traverse.R: -------------------------------------------------------------------------------- 1 | #' Access Pathway Commons using XPath-type expressions 2 | #' 3 | #' This command provides XPath-like access to the Pathway Commons. 4 | #' 5 | #' @param uri a BioPAX element URI - specified similarly to the 'GET' command 6 | #' above). Multiple IDs are allowed (uri=...&uri=...&uri=...). 7 | #' @param path a BioPAX propery path in the form of 8 | #' property1[:type1]/property2[:type2]; see properties, inverse properties, 9 | #' Paxtools, org.biopax.paxtools.controller.PathAccessor. 10 | #' @param verbose a boolean, display the command used to query Pathway Commons 11 | #' @return an XMLInternalDocument with results 12 | #' 13 | #' @details With traverse users can explicitly state the paths they would like 14 | #' to access. The format of the path query is in the form: [Initial 15 | #' Class]/[property1]:[classRestriction(optional)]/[property2]... A "*" sign 16 | #' after the property instructs path accessor to transitively traverse that 17 | #' property. For example, the following path accessor will traverse through 18 | #' all physical entity components within a complex: 19 | #' "Complex/component*/entityReference/xref:UnificationXref" The following 20 | #' will list display names of all participants of interactions, which are 21 | #' components (pathwayComponent) of a pathway (note: pathwayOrder property, 22 | #' where same or other interactions can be reached, is not considered here): 23 | #' "Pathway/pathwayComponent:Interaction/participant*/displayName" The 24 | #' optional parameter classRestriction allows to restrict/filter the returned 25 | #' property values to a certain subclass of the range of that property. In the 26 | #' first example above, this is used to get only the Unification Xrefs. Path 27 | #' accessors can use all the official BioPAX properties as well as additional 28 | #' derived classes and parameters in paxtools such as inverse parameters and 29 | #' interfaces that represent anonymous union classes in OWL. (See Paxtools 30 | #' documentation for more details). 31 | #' 32 | #' @examples 33 | #' uri <- "http://identifiers.org/uniprot/P38398" 34 | #' #results <- traverse(uri=uri, path="ProteinReference/organism/displayName") 35 | #' 36 | #' @references Paxtools Documentation: \url{http://www.biopax.org/m2site/} 37 | #' 38 | #' @concept paxtoolsr 39 | #' @export 40 | #' @importFrom utils URLencode 41 | traverse <- function(uri, path, verbose=FALSE) { 42 | baseUrl <- paste0(getPcUrl(), "traverse") 43 | 44 | #uri <- unname(sapply(uri, URLencode, reserved=TRUE)) 45 | #path <- URLencode(path, reserved = TRUE) 46 | 47 | uriList <- NULL 48 | if(!is.null(uri)) { 49 | # Put into the correct format 50 | #uris <- paste(paste0("uri=", uri), collapse="&") 51 | #url <- paste(baseUrl, uris, sep="") 52 | 53 | uriList <- lapply(uri, function(x) { x }) 54 | names(uriList) <- rep("uri", length(uriList)) 55 | } 56 | 57 | pathList <- list(path=path) 58 | 59 | queryList <- c(uriList, pathList) 60 | 61 | tmpUrl <- parse_url(baseUrl) 62 | tmpUrl$query <- queryList 63 | url <- build_url(tmpUrl) 64 | 65 | tmp <- getPcRequest(url, verbose) 66 | results <- processPcRequest(tmp, "XML") 67 | return(results) 68 | } 69 | -------------------------------------------------------------------------------- /R/validate.R: -------------------------------------------------------------------------------- 1 | #' Validate BioPAX files 2 | #' 3 | #' This function validates BioPAX files for errors. 4 | #' 5 | #' @param inputFile a string of the name of the input BioPAX OWL file 6 | #' @param outputFile a string of the name of the output file containing 7 | #' validation results 8 | #' @param type a string denoting the type of output: xml (default), html, biopax 9 | #' @param autoFix a boolean that determines if the input file should be 10 | #' fixed automatically. Errors that can be automatically fixed include 11 | #' generating displayName properties from names, inferring organism, and 12 | #' inferring dataSource 13 | #' @param onlyErrors a boolean of whether to only display errors 14 | #' @param maxErrors a integer denoting the number of errors to return 15 | #' @param notStrict a boolean of whether to be strict in validation (default: FALSE) 16 | #' 17 | #' @return an XMLInternalDocument is returned if type is set to "xml" otherwise 18 | #' the location of the outputfile is returned. 19 | #' 20 | #' @details See the publication by Rodchenkov, et al. for information on the 21 | #' BioPAX validator. See \url{http://biopax.baderlab.org/validator} for 22 | #' additional information on validator. 23 | #' See \url{http://biopax.baderlab.org/validator/errorTypes.html} for 24 | #' information on error types. 25 | #' 26 | #' @references Rodchenkov I, Demir E, Sander C, Bader GD. The BioPAX Validator, 27 | #' \url{http://www.ncbi.nlm.nih.gov/pubmed/23918249} 28 | #' 29 | #' @examples 30 | #' outFile <- tempfile() 31 | #' rawDoc <- validate(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", 32 | #' package="paxtoolsr"), onlyErrors=TRUE) 33 | #' 34 | #' @concept paxtoolsr 35 | #' @export 36 | validate <- function(inputFile, outputFile=NULL, 37 | type=c("xml", "html", "biopax"), 38 | autoFix=FALSE, onlyErrors=FALSE, maxErrors=NULL, 39 | notStrict=FALSE) { 40 | #DEBUG 41 | #inputFile <- files[i] 42 | #outputfile <- NULL 43 | #type <- NULL 44 | #autoFix <- FALSE 45 | #onlyErrors <- FALSE 46 | #maxErrors <- NULL 47 | #notStrict <- TRUE 48 | 49 | inputFile <- checkInputFile(inputFile) 50 | type <- match.arg(type) 51 | 52 | command <- "validate" 53 | commandJStr <- .jnew("java/lang/String", command) 54 | 55 | inputJStr <- .jnew("java/lang/String", inputFile) 56 | 57 | outputFile <- checkOutputFile(outputFile) 58 | 59 | outputJStr <- .jnew("java/lang/String", outputFile) 60 | 61 | if(is.null(type)) { 62 | outputTypeJStr <- .jnew("java/lang/String", "xml") 63 | } else { 64 | outputTypeJStr <- .jnew("java/lang/String", type) 65 | } 66 | 67 | argsList <- list(commandJStr, inputJStr, outputJStr, outputTypeJStr) 68 | 69 | if(autoFix) { 70 | autoFixJStr <- .jnew("java/lang/String", "auto-fix") 71 | argsList <- append(argsList, autoFixJStr) 72 | } 73 | 74 | if(onlyErrors) { 75 | onlyErrorsJStr <- .jnew("java/lang/String", "only-errors") 76 | argsList <- append(argsList, onlyErrorsJStr) 77 | } 78 | 79 | if(!is.null(maxErrors)) { 80 | maxErrorsJStr <- .jnew("java/lang/String", paste("maxerrors=", 81 | maxErrors, sep="")) 82 | argsList <- append(argsList, maxErrorsJStr) 83 | } 84 | 85 | if(notStrict) { 86 | nonStrictJStr <- .jnew("java/lang/String", "notstrict") 87 | argsList <- append(argsList, nonStrictJStr) 88 | } 89 | 90 | .jcall("org/biopax/paxtools/PaxtoolsMain","V",command,.jarray(argsList, "java/lang/String")) 91 | .jcheck() 92 | 93 | if(type == "xml") { 94 | results <- xmlTreeParse(outputFile, useInternalNodes=TRUE) 95 | return(results) 96 | } else { 97 | return(outputFile) 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![R-CMD-check](https://github.com/BioPAX/paxtoolsr/actions/workflows/R-CMD-check.yaml/badge.svg) 2 | 3 | **NOTE:** Users interested in the source code should download the code directly from Bioconductor repositories: 4 | 5 | * [Release Version](http://bioconductor.org/packages/devel/bioc/html/paxtoolsr.html) 6 | * [Development Version](http://bioconductor.org/packages/devel/bioc/html/paxtoolsr.html) 7 | 8 | # PaxtoolsR 9 | 10 | An R package providing [Paxtools](http://www.biopax.org/paxtools.php) and [Pathway Commons](http://www.pathwaycommons.org/) functionality. This project provides users with the ability to read BioPAX files and access Pathway Commons web service functions to: 11 | 12 | * Merge multiple BioPAX files 13 | * Extract sub-networks from BioPAX files 14 | * Do a number of format conversions 15 | * Validate BioPAX files 16 | * Search and retrieve Pathway Commons data 17 | 18 | This package is primarily directed towards R users who wish to work with binary interactions networks in the form of Simple Interaction Format (SIF) networks. 19 | 20 | ## Install PaxtoolsR from Bioconductor (Recommended) 21 | 22 | ### Dependencies 23 | #### Windows (tested on Windows 10) 24 | 25 | [Java](http://www.oracle.com/technetwork/java/javase/downloads/index.html) needs to be installed. NOTE: If using a 64-bit system, make sure to install (or re-install) the 64-bit version. Otherwise, you may encounter an [rJava issue with JAVA_HOME](http://www.r-statistics.com/2012/08/how-to-load-the-rjava-package-after-the-error-java_home-cannot-be-determined-from-the-registry/). 26 | 27 | * NOTE: Installation on Windows 10 from GitHub using devtools::install_github may require args="--no-multiarch" 28 | 29 | #### OS X (tested on Mavericks OSX 10.9+) 30 | 31 | Java needs to be installed. If it is not installed, you will be prompted to install Java the first time you load the paxtoolsr package (NOTE: This prompt may crash RStudio, but installation of Java should not be affected). 32 | 33 | * NOTE: Further instructions on rJava installation are found here: http://stackoverflow.com/questions/30738974/rjava-load-error-in-rstudio-r-after-upgrading-to-osx-yosemite 34 | 35 | #### Ubuntu (tested on Ubuntu 14.04) 36 | 37 | Run these commands in the Terminal: 38 | 39 | # For latest R version 40 | sudo apt-add-repository -y ppa:marutter/rrutter 41 | sudo apt-get -y update 42 | sudo apt-get -y upgrade 43 | sudo apt-get -y install r-base 44 | # For plyr 45 | sudo apt-get -y install g++ 46 | # For RCurl 47 | sudo apt-get -y install libcurl4-openssl-dev 48 | # For rJava 49 | sudo apt-get -y install liblzma-dev 50 | sudo apt-get -y install libbz2-dev 51 | sudo apt-get -y install libpcre++-dev 52 | sudo apt-get -y install openjdk-7-jdk 53 | # For XML 54 | sudo apt-get -y install libxml2-dev 55 | # To let R find Java 56 | sudo R CMD javareconf 57 | 58 | ### Install Bioconductor and PaxtoolsR 59 | 60 | Run these commands within R: 61 | 62 | if (!requireNamespace("BiocManager", quietly=TRUE)) 63 | install.packages("BiocManager") 64 | BiocManager::install("paxtoolsr") 65 | 66 | # Install PaxtoolsR Development Version from GitHub 67 | 68 | setRepositories(ind=1:6) 69 | options(repos="http://cran.rstudio.com/") 70 | if(!require(remotes)) { install.packages("remotes") } 71 | library(remotes) 72 | 73 | remotes::install_github("BioPAX/paxtoolsr") 74 | remotes::install_github("BioPAX/paxtoolsr", args="--no-multiarch") # On Windows, 64-bit 75 | 76 | # Using PaxtoolsR: R Vignette (Tutorial) 77 | 78 | The tutorial describes a number of possible use cases, including network visualization 79 | and gene set enrichment analysis using this R package. Once installed, view tutorials for 80 | PaxtoolsR using the following command: 81 | 82 | ``` 83 | library(paxtoolsr) 84 | browseVignettes("paxtoolsr") 85 | ``` 86 | 87 | A copy of the vignette [Using PaxtoolsR](http://bioconductor.org/packages/release/bioc/vignettes/paxtoolsr/inst/doc/using_paxtoolsr.html) is viewable from the Bioconductor website. 88 | -------------------------------------------------------------------------------- /inst/.gitignore: -------------------------------------------------------------------------------- 1 | README_BIOC.md 2 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite paxtoolsr:") 2 | 3 | citEntry(entry="article", 4 | author = "Luna, A. and Babur, O. and Aksoy, B. A. and Demir, E. and Sander, C.", 5 | title = "{{P}axtools{R}: {P}athway {A}nalysis in {R} {U}sing {P}athway {C}ommons}", 6 | journal = "Bioinformatics", 7 | year = "2015", 8 | month = "Dec", 9 | textVersion = paste("Luna, A et al. PaxtoolsR: Pathway Analysis in R Using Pathway Commons. 2015 Dec. http://www.ncbi.nlm.nih.gov/pubmed/26685306")) 10 | 11 | citFooter("This free open-source software implements academic research 12 | by the authors. Its development took a large amount of extra time and 13 | effort. If you use it, please support the project by citing the listed 14 | journal articles.") 15 | 16 | -------------------------------------------------------------------------------- /inst/extdata/biopax3_utf8_char.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioPAX/paxtoolsr/49ae807e1957d14d5fd9f5bd065de1c27423a859/inst/extdata/biopax3_utf8_char.xml -------------------------------------------------------------------------------- /inst/extdata/pathways.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioPAX/paxtoolsr/49ae807e1957d14d5fd9f5bd065de1c27423a859/inst/extdata/pathways.txt.gz -------------------------------------------------------------------------------- /inst/extdata/sif_color_code.txt: -------------------------------------------------------------------------------- 1 | INTERACTION_TYPE COLOR 2 | consumption-controlled-by #FF7F00 3 | used-to-produce #F781BF 4 | reacts-with #F781BF 5 | chemical-affects #984EA3 6 | neighbor-of #999999 7 | in-complex-with #FFFF33 8 | controls-state-change-of #377EB8 9 | catalysis-precedes #E41A1C 10 | controls-production-of #FF7F00 11 | controls-transport-of-chemical #A65628 12 | controls-transport-of #A65628 13 | controls-phosphorylation-of #377EB8 14 | controls-expression-of #4DAF4A 15 | interacts-with #999999 16 | -------------------------------------------------------------------------------- /inst/extdata/tca_cycle.sif: -------------------------------------------------------------------------------- 1 | IDH3A in-same-complex IDH3G 2 | IDH3B in-same-complex IDH3G 3 | DLD in-same-complex PDHB 4 | DLAT in-same-complex PDHA1 5 | DLD in-same-complex PDHA1 6 | DLAT in-same-complex PDHB 7 | PDHB in-same-complex PDHX 8 | SDHA in-same-complex SDHC 9 | DLST in-same-complex OGDH 10 | DLAT in-same-complex PDHX 11 | SDHA in-same-complex SDHD 12 | DLD in-same-complex PDHX 13 | SDHB in-same-complex SDHC 14 | PDHA1 in-same-complex PDHX 15 | PDHA1 in-same-complex PDHB 16 | SUCLA2 in-same-complex SUCLG1 17 | SUCLG1 in-same-complex SUCLG2 18 | SDHB in-same-complex SDHD 19 | DLD in-same-complex OGDH 20 | SDHA in-same-complex SDHB 21 | SDHC in-same-complex SDHD 22 | IDH3A in-same-complex IDH3B 23 | DLD in-same-complex DLST 24 | DLAT in-same-complex DLD 25 | -------------------------------------------------------------------------------- /inst/extdata/test_edgelist.txt: -------------------------------------------------------------------------------- 1 | BCAS2 EFTUD2 2 | MYH14 SLC25A5 3 | ADGRL2 PDZD8 4 | ADAM15 ITGB1 5 | CYB5R3 TIMM23B 6 | ECH1 RBFOX2 7 | CCT3 STRAP 8 | DHRS7 EPN1 9 | CLP1 TJP1 10 | IQCB1 MAP4 11 | -------------------------------------------------------------------------------- /inst/extdata/test_gsea_1.gmt: -------------------------------------------------------------------------------- 1 | 9606: 5HT1 type receptor mediated signaling pathway datasource: panther; organism: 9606; id type: uniprot O00161 O14649 O95721 P08908 P13861 P17612 P22612 P22694 P23763 P28221 P28222 P28566 P30939 P31323 P31645 P51817 P51828 P60880 P63027 Q08462 Q13277 Q15836 Q9BV40 Q9NPC2 2 | -------------------------------------------------------------------------------- /inst/extdata/test_sif.txt: -------------------------------------------------------------------------------- 1 | IDH3A in-same-complex IDH3G 2 | IDH3B in-same-complex IDH3G 3 | DLD in-same-complex PDHB 4 | DLAT in-same-complex PDHA1 5 | DLD in-same-complex PDHA1 6 | DLAT in-same-complex PDHB 7 | PDHB in-same-complex PDHX 8 | SDHA in-same-complex SDHC 9 | DLST in-same-complex OGDH 10 | DLAT in-same-complex PDHX 11 | SDHA in-same-complex SDHD 12 | DLD in-same-complex PDHX 13 | SDHB in-same-complex SDHC 14 | PDHA1 in-same-complex PDHX 15 | PDHA1 in-same-complex PDHB 16 | SUCLA2 in-same-complex SUCLG1 17 | SUCLG1 in-same-complex SUCLG2 18 | SDHB in-same-complex SDHD 19 | DLD in-same-complex OGDH 20 | SDHA in-same-complex SDHB 21 | SDHC in-same-complex SDHD 22 | IDH3A in-same-complex IDH3B 23 | DLD in-same-complex DLST 24 | DLAT in-same-complex DLD 25 | -------------------------------------------------------------------------------- /inst/extdata/test_sif_shortestPath.txt: -------------------------------------------------------------------------------- 1 | PARTICIPANT_A INTERACTION_TYPE PARTICIPANT_B DAP3 interacts-with SH3GL2 SH3GL2 interacts-with TIAM2 TIAM2 interacts-with FOXD3 SH3GL2 interacts-with SLC10A7 SLC10A7 interacts-with FOXD3 FOXD3 interacts-with RPS16 FOXD3 interacts-with RNPS1 RBL2 interacts-with RHOC -------------------------------------------------------------------------------- /inst/ignore/convertIds.R: -------------------------------------------------------------------------------- 1 | #' Covert IDs Using org.Hs.eg.db 2 | #' 3 | #' @param ids a vector of ids 4 | #' @param from the original ID type 5 | #' @param to the ID type to convert to 6 | #' 7 | #' @return a vector of converted IDs 8 | #' 9 | #' @examples 10 | #' convertIds("TP53", "SYMBOL", "ENTREZID") 11 | #' 12 | #' @concept paxtoolsr 13 | #' @export 14 | convertIds <- function(ids, from="UNIPROT", to="SYMBOL") { 15 | if(!require("org.Hs.eg.db")) { 16 | stop("This function requires org.Hs.eg.db.") 17 | } 18 | 19 | df <- select(org.Hs.eg.db, keys=ids, columns=c(from, to), keytype=from) 20 | 21 | return(df[,2]) 22 | } 23 | -------------------------------------------------------------------------------- /inst/ignore/convertSifnxIds.R: -------------------------------------------------------------------------------- 1 | #' Convert IDs in a SIFNX 2 | #' 3 | #' @param sifnx a SIFNX object (e.g. from the downloadPc2 function) 4 | #' @param participantType the type of participant on which the conversion will occur. 5 | #' Important because not all ID types apply to all entities and otherwise those entities 6 | #' would be labeled as missing an ID. 7 | #' @param mapping a two column data.frame with columns mapping$PARTICIPANT (old 8 | #' IDs to convert from) and mapping$ID (new IDs to convert to) 9 | #' @param idType an ID type for conversion (not used if mapping parameter is used) 10 | #' @param naRm remove edges where NA's were introduced due to failed conversions 11 | #' 12 | #' @return a SIFNX list with nodes and edges. Only edges will have converted IDs 13 | #' 14 | #' @examples 15 | #' lst <- readSifnx(system.file("extdata", "test_sifnx.txt", package="paxtoolsr")) 16 | #' newSifnx <- convertToDataFrameWithListOfVectors(lst) 17 | #' #t1 <- extractIds(sifnx$nodes, participantType="ProteinReference", idType="UniProt Knowledgebase") 18 | #' tmp <- convertSifnxIds(lst, idType="UniProt Knowledgebase") 19 | #' FIXME 20 | #' 21 | #' @concept paxtoolsr 22 | #' @export 23 | convertSifnxIds <- function(sifnx, participantType="ProteinReference", idType="NCBI Gene", 24 | mapping=NULL, naRm=TRUE) { 25 | 26 | if(is.null(mapping)) { 27 | t1 <- extractIds(sifnx$nodes, participantType=participantType, idType=idType) 28 | t2 <- data.frame(PARTICIPANT=names(t1), ID=t1, stringsAsFactors = FALSE) 29 | 30 | newA <- mapValues(sifnx$edges$PARTICIPANT_A, t2$PARTICIPANT, t2$ID) 31 | newB <- mapValues(sifnx$edges$PARTICIPANT_B, t2$PARTICIPANT, t2$ID) 32 | } else { 33 | newA <- mapValues(sifnx$edges$PARTICIPANT_A, mapping$PARTICIPANT, mapping$ID) 34 | newB <- mapValues(sifnx$edges$PARTICIPANT_B, mapping$PARTICIPANT, mapping$ID) 35 | } 36 | 37 | #newA <- mapvalues(sifnx$edges$PARTICIPANT_A, t2$PARTICIPANT, t2$ID, warn_missing=FALSE) 38 | #newB <- mapvalues(sifnx$edges$PARTICIPANT_B, t2$PARTICIPANT, t2$ID, warn_missing=FALSE) 39 | 40 | # Replace entries not found with NA 41 | notFound <- setdiff(newA, sifnx$edges$PARTICIPANT_A) 42 | for(i in notFound) { 43 | idx <- which(sifnx$nodes$PARTICIPANT == i) 44 | 45 | if(sifnx$nodes[idx,"PARTICIPANT_TYPE"] == participantType) { 46 | newA[newA == idx] <- NA 47 | } 48 | } 49 | 50 | # Replace entries not found with NA 51 | notFound <- setdiff(newB, sifnx$edges$PARTICIPANT_B) 52 | for(i in notFound) { 53 | idx <- which(sifnx$nodes$PARTICIPANT == i) 54 | 55 | if(sifnx$nodes[idx,"PARTICIPANT_TYPE"] == participantType) { 56 | newB[newB == idx] <- NA 57 | } 58 | } 59 | 60 | sifnx$edges$PARTICIPANT_A <- newA 61 | sifnx$edges$PARTICIPANT_B <- newB 62 | 63 | if(naRm) { 64 | a <- which(is.na(sifnx$edges$PARTICIPANT_A)) 65 | b <- which(is.na(sifnx$edges$PARTICIPANT_B)) 66 | 67 | tmp <- unique(c(a, b)) 68 | idx <- setdiff(1:nrow(sifnx$edges), tmp) 69 | 70 | sifnx$edges <- sifnx$edges[idx, ] 71 | } 72 | 73 | return(sifnx) 74 | } 75 | -------------------------------------------------------------------------------- /inst/ignore/convertToDF.R: -------------------------------------------------------------------------------- 1 | #' Convert Results from readSifnx to data.frame 2 | #' 3 | #' @param lst a list returned from readSifnx 4 | #' @return a list entries converted to data.frame 5 | #' 6 | #' @concept paxtoolsr 7 | #' @export 8 | #' 9 | #' @examples 10 | #' sifnx <- readSifnx(system.file("extdata", "test_sifnx.txt", package="paxtoolsr")) 11 | #' class(sifnx$edges) 12 | #' 13 | #' dfSifnx <- convertToDF(sifnx) 14 | #' class(dfSifnx$edges) 15 | #' 16 | #' dtSifnx <- convertToDT(dfSifnx) 17 | #' class(dtSifnx$edges) 18 | #' 19 | #' @importFrom data.table setDF 20 | convertToDF <- function(lst) { 21 | nodes <- lst$nodes 22 | nodes <- setDF(nodes) 23 | nodes$PARTICIPANT_NAME <- vapply(nodes$PARTICIPANT_NAME, paste, collapse = ";", character(1L)) 24 | nodes$UNIFICATION_XREF <- vapply(nodes$UNIFICATION_XREF, paste, collapse = ";", character(1L)) 25 | nodes$RELATIONSHIP_XREF <- vapply(nodes$RELATIONSHIP_XREF, paste, collapse = ";", character(1L)) 26 | 27 | edges <- lst$edges 28 | edges <- setDF(edges) 29 | edges$INTERACTION_DATA_SOURCE <- vapply(edges$INTERACTION_DATA_SOURCE, paste, collapse = ";", character(1L)) 30 | edges$INTERACTION_PUBMED_ID <- vapply(edges$INTERACTION_PUBMED_ID, paste, collapse = ";", character(1L)) 31 | edges$PATHWAY_NAMES <- vapply(edges$PATHWAY_NAMES, paste, collapse = ";", character(1L)) 32 | edges$MEDIATOR_IDS <- vapply(edges$MEDIATOR_IDS, paste, collapse = ";", character(1L)) 33 | 34 | lst$edges <- edges 35 | lst$nodes <- nodes 36 | 37 | return(lst) 38 | } 39 | -------------------------------------------------------------------------------- /inst/ignore/convertToDT.R: -------------------------------------------------------------------------------- 1 | #' Convert Results from readSifnx to data.table 2 | #' 3 | #' @param lst a list returned from readSifnx 4 | #' @return a list entries converted to data.table 5 | #' 6 | #' @details The SIFNX format is an evolving format. Older datasets may not have 7 | #' all the columns this function expects. In these cases, the columns will be 8 | #' added with all NULL entries. 9 | #' 10 | #' @concept paxtoolsr 11 | #' @export 12 | #' 13 | #' @importFrom data.table setDT 14 | convertToDT <- function(lst) { 15 | nodes <- lst$nodes 16 | nodes <- setDT(nodes) 17 | nodes$PARTICIPANT_NAME <- strsplit(nodes$PARTICIPANT_NAME, ";") 18 | nodes$UNIFICATION_XREF <- strsplit(nodes$UNIFICATION_XREF, ";") 19 | nodes$RELATIONSHIP_XREF <- strsplit(nodes$RELATIONSHIP_XREF, ";") 20 | 21 | edges <- lst$edges 22 | edges <- setDT(edges) 23 | edges$INTERACTION_DATA_SOURCE <- strsplit(edges$INTERACTION_DATA_SOURCE, ";") 24 | edges$INTERACTION_PUBMED_ID <- strsplit(as.character(edges$INTERACTION_PUBMED_ID), ";") 25 | edges$PATHWAY_NAMES <- strsplit(as.character(edges$PATHWAY_NAMES), ";") 26 | edges$MEDIATOR_IDS <- strsplit(as.character(edges$MEDIATOR_IDS), ";") 27 | 28 | lst$edges <- edges 29 | lst$nodes <- nodes 30 | 31 | return(lst) 32 | } 33 | -------------------------------------------------------------------------------- /inst/ignore/convertToDataFrameWithListOfVectors.R: -------------------------------------------------------------------------------- 1 | #' Convert Results from readSifnx to data.frame 2 | #' 3 | #' @param lst a list returned from readSifnx 4 | #' @return a list entries converted to data.frame 5 | #' 6 | #' @details The SIFNX format is an evolving format. Older datasets may not have 7 | #' all the columns this function expects. In these cases, the columns will be 8 | #' added with all NULL entries. 9 | #' 10 | #' @concept paxtoolsr 11 | #' @export 12 | #' 13 | #' @examples 14 | #' lst <- readSifnx(system.file("extdata", "test_sifnx_250.txt", package="paxtoolsr")) 15 | #' newSifnx <- convertToDataFrameWithListOfVectors(lst) 16 | convertToDataFrameWithListOfVectors <- function(lst) { 17 | nodes <- lst$nodes 18 | nodes <- as.data.frame(nodesOrg) 19 | for(col in colnames(nodes)) { 20 | nodes[, col] <- I(strsplit(nodes[, col], ";")) 21 | } 22 | 23 | nodes$UNIFICATION_XREF <- I(strsplit(nodes$UNIFICATION_XREF, ";")) 24 | 25 | edges <- lst$edges 26 | for(col in colnames(edges)) { 27 | edges[, col] <- I(strsplit(edges[, col], ";")) 28 | } 29 | 30 | lst$edges <- edges 31 | lst$nodes <- nodes 32 | 33 | return(lst) 34 | } 35 | -------------------------------------------------------------------------------- /inst/ignore/convertToPathwayList.R: -------------------------------------------------------------------------------- 1 | # # Convert to graphite Pathway Object 2 | # # 3 | # # @param lst list returned from 4 | # # 5 | # # @concept paxtoolsr 6 | # # @export 7 | # convertToPathwayList <- function(id="kegg", title="kegg", ident="DISPLAYNAME", 8 | # species="hsapiens", lst) { 9 | # database <- unique(dbResults$edges$INTERACTION_DATA_SOURCE) 10 | # 11 | # edges <- data.frame(src=lst$edges$PARTICIPANT_A, 12 | # dest=lst$edges$PARTICIPANT_B, 13 | # direction=nrow(lst$edges), 14 | # type=lst$edges$INTERACTION_TYPE) 15 | # timestamp <- Sys.Date() 16 | # 17 | # pathway <- new("Pathway", 18 | # id=id, 19 | # title=title, 20 | # edges=edges, 21 | # database=database, 22 | # species=species, 23 | # identifier=ident, 24 | # timestamp=timestamp) 25 | # 26 | # return(pathway) 27 | # } 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /inst/ignore/del.txt: -------------------------------------------------------------------------------- 1 | var,val 2 | t0,x2;x4 3 | t1,x1;x2;x3 4 | t2,x2;x3 5 | t3,x4;x5 6 | t4,x1;x7 7 | t5,x2;x8 8 | t6,x3;x9;x10 9 | t7,x4;x11 10 | t8,x8;x10 11 | t9,x10;x1 12 | -------------------------------------------------------------------------------- /inst/ignore/extractIds.R: -------------------------------------------------------------------------------- 1 | #' Extract IDs from an Extended SIF 2 | #' 3 | #' @param nodes extended SIF nodes entries as a data.frame with RELATIONSHIP_XREF as a vector of IDs 4 | #' @param participantType a vector of types of participants to search; 5 | #' useful to only search protein (ProteinReference) or small molecule (SmallMoleculeReference) related entries. 6 | #' @param idType the type of ID to search for; case-insensitive 7 | #' 8 | #' @return a named vector of the first matches for the given ID type 9 | #' 10 | #' @details IMPORTANT: Only the first matching ID will be returned. In some cases, multiple IDs will exist. 11 | #' 12 | #' @examples 13 | #' t1 <- readSifnx(system.file("extdata", "test_sifnx.txt", package="paxtoolsr")) 14 | #' t2 <- convertToDataFrameWithListOfVectors(t1) 15 | #' 16 | #' results <- extractIds(t2$nodes) 17 | #' 18 | #' @concept paxtoolsr 19 | #' @export 20 | extractIds <- function(nodes, participantType="ProteinReference", idType="hgnc symbol") { 21 | idx <- which(nodes$PARTICIPANT_TYPE %in% participantType) 22 | 23 | t1 <- sapply(nodes$RELATIONSHIP_XREF[idx], function(x) { 24 | tmp <- grep(idType, x, ignore.case=TRUE, value=TRUE)[1] 25 | 26 | # Account for colon at the end of ID names 27 | substr(tmp, (nchar(idType)+2), nchar(tmp)) 28 | }) 29 | 30 | t3 <- unlist(t1) 31 | 32 | # Only get the first entry 33 | #nodeNames <- unlist(lapply(nodes$PARTICIPANT[idx], function(x){ x[[1]][1] })) 34 | nodeNames <- nodes$PARTICIPANT[idx] 35 | names(t3) <- nodeNames 36 | 37 | return(t3) 38 | } 39 | -------------------------------------------------------------------------------- /inst/ignore/readSifnxSmall.R: -------------------------------------------------------------------------------- 1 | #' Read small extended SIFs (SIFNX) 2 | #' 3 | #' @param inputFile Name of the imput file 4 | #' 5 | #' @return a list of nodes and edges 6 | #' 7 | #' @note This function is a dependency free version of readSifnx; this is not suitable for very large files 8 | #' 9 | #' @examples 10 | #' sifnx <- readSifnxSmall(system.file("extdata", "test_sifnx.txt", package="paxtoolsr")) 11 | #' 12 | #' @concept paxtoolsr 13 | #' @importFrom utils read.table 14 | readSifnxSmall <- function(inputFile) { 15 | checkInputFilePc(inputFile) 16 | 17 | edgesFile <- tempfile("edges", fileext=".txt") 18 | nodesFile <- tempfile("nodes", fileext=".txt") 19 | 20 | # Open file connections 21 | edgesCon <- file(edgesFile, "w") 22 | nodesCon <- file(nodesFile, "w") 23 | 24 | con <- file(inputFile) 25 | 26 | newLineFlag <- FALSE 27 | 28 | # Read single lines 29 | lineTmp <- readLines(con, warn=FALSE) 30 | 31 | for (i in 1:length(lineTmp)) { 32 | line <- lineTmp[i] 33 | 34 | if(grepl("^$", line)) { 35 | newLineFlag <- TRUE 36 | next 37 | } 38 | 39 | if(!newLineFlag) { 40 | writeLines(line, edgesCon) 41 | } else { 42 | writeLines(line, nodesCon) 43 | } 44 | } 45 | 46 | close(edgesCon) 47 | close(nodesCon) 48 | close(con) 49 | 50 | edges <- read.table(edgesFile, header=TRUE, sep="\t", quote="", 51 | stringsAsFactors=FALSE, fill=TRUE, row.names=NULL) 52 | nodes <- read.table(nodesFile, header=TRUE, sep="\t", quote="", 53 | stringsAsFactors=FALSE, fill=TRUE, row.names=NULL) 54 | 55 | results <- list(nodes=nodes, 56 | edges=edges) 57 | 58 | return(results) 59 | } 60 | 61 | # data <- c("A", "B", "C", "X", "Y", "Z") 62 | # oldValue <- LETTERS[1:20] 63 | # newValue <- letters[1:20] 64 | # results <- mapValues(data, oldValue, newValue) 65 | -------------------------------------------------------------------------------- /inst/java/paxtools-4.3.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioPAX/paxtoolsr/49ae807e1957d14d5fd9f5bd065de1c27423a859/inst/java/paxtools-4.3.1.jar -------------------------------------------------------------------------------- /inst/paxtoolsNotes.txt: -------------------------------------------------------------------------------- 1 | java -jar ./inst/java/paxtools-4.3.1.jar merge ./inst/extdata/raf_map_kinase_cascade_reactome.owl ./inst/extdata/biopax3-short-metabolic-pathway.owl DEL.TXT 2 | java -jar ./inst/java/paxtools-4.3.1.jar toSif ./inst/extdata/raf_map_kinase_cascade_reactome.owl DEL.TXT 3 | java -jar ./inst/java/paxtools-4.3.1.jar toSifnx ./inst/extdata/raf_map_kinase_cascade_reactome.owl DEL.TXT 4 | -------------------------------------------------------------------------------- /java/README.md: -------------------------------------------------------------------------------- 1 | Source for Paxtools can be downloaded from: 2 | 3 | https://github.com/BioPAX/paxtools 4 | -------------------------------------------------------------------------------- /man/addAttributeList.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/addAttributeList.R 3 | \name{addAttributeList} 4 | \alias{addAttributeList} 5 | \title{Add attributes using a list of vectors to an igraph object} 6 | \usage{ 7 | addAttributeList(g, attr, l) 8 | } 9 | \arguments{ 10 | \item{g}{an igraph object} 11 | 12 | \item{attr}{the name of the attribute} 13 | 14 | \item{l}{the list of vectors} 15 | } 16 | \value{ 17 | the modified igraph object 18 | } 19 | \description{ 20 | Add attributes using a list of vectors to an igraph object 21 | } 22 | \examples{ 23 | library(igraph) 24 | g <- barabasi.game(20) 25 | g <- set_vertex_attr(g, "name", value=LETTERS[1:20]) 26 | g <- addAttributeList(g, "isProt", 27 | list(A=TRUE, B=FALSE, C=TRUE, D=TRUE, E=FALSE)) 28 | 29 | } 30 | \concept{paxtoolsr} 31 | -------------------------------------------------------------------------------- /man/convertDataFrameListsToVectors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convertDataFrameListsToVectors.R 3 | \name{convertDataFrameListsToVectors} 4 | \alias{convertDataFrameListsToVectors} 5 | \title{Convert columns with list in data.frame to vector} 6 | \usage{ 7 | convertDataFrameListsToVectors(df, delimiter = ";") 8 | } 9 | \arguments{ 10 | \item{df}{a data.frame} 11 | 12 | \item{delimiter}{a delimiter to concatenate (DEFAULT: ;)} 13 | } 14 | \value{ 15 | a data.frame without list columns 16 | } 17 | \description{ 18 | Convert columns with list in data.frame to vector 19 | } 20 | \note{ 21 | Lists as columns are useful programmatically, 22 | but cause issue in writing output to text-based files 23 | } 24 | \examples{ 25 | 26 | df <- data.frame(id = 1:2, name = c("Jon", "Mark"), 27 | children = I(list(c("Mary", "James"), c("Greta", "Sally")))) 28 | df <- convertDataFrameListsToVectors(df) 29 | 30 | } 31 | \concept{paxtoolsr} 32 | -------------------------------------------------------------------------------- /man/convertSifToGmt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convertSifToGmt.R 3 | \name{convertSifToGmt} 4 | \alias{convertSifToGmt} 5 | \title{Convert SIF to GMT} 6 | \usage{ 7 | convertSifToGmt(sif, name = "gmt", returnSmallMolecules = FALSE) 8 | } 9 | \arguments{ 10 | \item{sif}{a data.frame representing a SIF (Simple Interaction Format)} 11 | 12 | \item{name}{the name of the gene set} 13 | 14 | \item{returnSmallMolecules}{a boolean whether to return genes 15 | or small molecules in the gene set} 16 | } 17 | \value{ 18 | a list with one entry being a vector 19 | } 20 | \description{ 21 | Convert SIF to GMT 22 | } 23 | \examples{ 24 | sif <- readSif(system.file("extdata", "test_sif.txt", package="paxtoolsr")) 25 | gmt <- convertSifToGmt(sif) 26 | 27 | } 28 | \concept{paxtoolsr} 29 | -------------------------------------------------------------------------------- /man/downloadFile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/downloadFile.R 3 | \name{downloadFile} 4 | \alias{downloadFile} 5 | \title{Check Cache and Download File} 6 | \usage{ 7 | downloadFile( 8 | baseUrl, 9 | fileName, 10 | destDir = NULL, 11 | cacheEnv = "PAXTOOLSR_CACHE", 12 | verbose = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{baseUrl}{a string, entire download URL except filename} 17 | 18 | \item{fileName}{a string, the filename of file to be downloaded} 19 | 20 | \item{destDir}{a string, the path where a file should be saved} 21 | 22 | \item{cacheEnv}{a string, environment variable pointing to specific cache} 23 | 24 | \item{verbose}{show debugging information} 25 | } 26 | \value{ 27 | a boolean TRUE if the file was downloaded or already exists, FALSE otherwise 28 | } 29 | \description{ 30 | Check Cache and Download File 31 | } 32 | \details{ 33 | Description of file formats: http://www.pathwaycommons.org/pc2/formats 34 | } 35 | \examples{ 36 | downloadFile("http://google.com/", fileName="index.html", destDir=tempdir()) 37 | 38 | } 39 | \seealso{ 40 | \code{\link{readSif}, \link{readBiopax}, \link{readSbgn}, \link{readSifnx}, \link{readGmt}} 41 | } 42 | \concept{paxtoolsr} 43 | -------------------------------------------------------------------------------- /man/downloadPc2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/downloadPc2.R 3 | \name{downloadPc2} 4 | \alias{downloadPc2} 5 | \alias{downloadPc} 6 | \title{Download Pathway Commons files (uses menu and cache)} 7 | \usage{ 8 | downloadPc2( 9 | selectedFileName = NULL, 10 | destDir = NULL, 11 | returnNames = NULL, 12 | version, 13 | verbose = FALSE, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{selectedFileName}{a string, a name of a file to skip the the interactive selection} 19 | 20 | \item{destDir}{a string, the destination directory for the file to be 21 | downloaded (Default: NULL). If NULL, then file will be downloaded to cache 22 | directory at Sys.getenv("PAXTOOLSR_CACHE")} 23 | 24 | \item{returnNames}{return a vector of names matching the given regular expression} 25 | 26 | \item{version}{a version number for a previous version of Pathway Commons data; 27 | versions 3 and above. Parameter set as version="8". Available versions "http://www.pathwaycommons.org/archives/PC2/"} 28 | 29 | \item{verbose}{a flag to display debugging information (Default: FALSE)} 30 | 31 | \item{...}{additional parameters to send to corresponding read* methods} 32 | } 33 | \value{ 34 | an R object using one of the read* methods provided in this package 35 | corresponding to the file downloaded 36 | } 37 | \description{ 38 | Download Pathway Commons files (uses menu and cache) 39 | } 40 | \examples{ 41 | \dontrun{ 42 | downloadPc2(version="8") 43 | downloadPc2(version="8", returnNames="ext.*sif") 44 | downloadPc2("PathwayCommons.8.inoh.GSEA.hgnc.gmt.gz", version="8", verbose=TRUE) 45 | } 46 | 47 | } 48 | \concept{paxtoolsr} 49 | -------------------------------------------------------------------------------- /man/downloadSignedPC.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/downloadSignedPC.R 3 | \name{downloadSignedPC} 4 | \alias{downloadSignedPC} 5 | \title{Download a SIF file containing only signed interactions} 6 | \usage{ 7 | downloadSignedPC(destDir = NULL, forceCache = FALSE) 8 | } 9 | \arguments{ 10 | \item{destDir}{a string, the destination directory for the file to be 11 | downloaded (Default: NULL). If NULL, then file will be downloaded to cache 12 | directory at Sys.getenv("PAXTOOLSR_CACHE")} 13 | 14 | \item{forceCache}{a boolean to force the use of a cached version (DEFAULT: FALSE); 15 | the current host of the file (GitHub) does not support the LAST-MODIFIED header} 16 | } 17 | \value{ 18 | a SIF containing interactions that are considered signed (i.e. 19 | interactions causing an increase on decrease in a molecular species) 20 | } 21 | \description{ 22 | Download a SIF file containing only signed interactions 23 | } 24 | \examples{ 25 | # downloadSignedPC() 26 | 27 | } 28 | -------------------------------------------------------------------------------- /man/fetch.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fetch.R 3 | \name{fetch} 4 | \alias{fetch} 5 | \title{Fetch a set of IDs from a BioPAX OWL file} 6 | \usage{ 7 | fetch(inputFile, outputFile = NULL, idList) 8 | } 9 | \arguments{ 10 | \item{inputFile}{a string of the name of the input BioPAX OWL file} 11 | 12 | \item{outputFile}{a string with the name of the output BioPAX OWL file} 13 | 14 | \item{idList}{a vector of IDs from the BioPAX OWL file} 15 | } 16 | \value{ 17 | an XMLInternalDocument representing a BioPAX OWL file 18 | } 19 | \description{ 20 | This function will create a subsetted object with specified URIs. 21 | } 22 | \details{ 23 | Only entities in the input BioPAX file will be used in the fetch. 24 | IDs used must be URIs for the entities of interest. Additional properties 25 | such as cross-references for fetched entities will be included in the output. 26 | } 27 | \examples{ 28 | outFile <- tempfile() 29 | ids <- c("http://identifiers.org/uniprot/P36894", 30 | "http://identifiers.org/uniprot/Q13873") 31 | results <- fetch(system.file("extdata", "REACT_12034-3.owl", package="paxtoolsr"), 32 | outFile, ids) 33 | 34 | } 35 | \concept{paxtoolsr} 36 | -------------------------------------------------------------------------------- /man/filterSif.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/filterSif.R 3 | \name{filterSif} 4 | \alias{filterSif} 5 | \title{Keep interactions in SIF network based on certain criteria} 6 | \usage{ 7 | filterSif( 8 | sif, 9 | ids = NULL, 10 | interactionTypes = NULL, 11 | dataSources = NULL, 12 | interactionPubmedIds = NULL, 13 | pathwayNames = NULL, 14 | mediatorIds = NULL, 15 | edgelist = NULL, 16 | idsBothParticipants = FALSE, 17 | edgelistCheckReverse = TRUE, 18 | verbose = FALSE 19 | ) 20 | } 21 | \arguments{ 22 | \item{sif}{a binary SIF as a data.frame with three columns: 23 | "PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B"} 24 | 25 | \item{ids}{a vector of IDs to be kept} 26 | 27 | \item{interactionTypes}{a vector of interaction types to be kept 28 | (List of interaction types: http://www.pathwaycommons.org/pc2/formats)} 29 | 30 | \item{dataSources}{a vector of data sources to be kept. For Extended SIF.} 31 | 32 | \item{interactionPubmedIds}{a vector of Pubmed IDs to be kept. For Extended SIF.} 33 | 34 | \item{pathwayNames}{a vector of pathway names to be kept. For Extended SIF.} 35 | 36 | \item{mediatorIds}{a vector of mediator IDs to be kept. For Extended SIF. 37 | Mediator IDs are the full BioPAX objects that were simplified to interaction 38 | given in the SIF. For Extended SIF.} 39 | 40 | \item{edgelist}{a two-column data.frame where each row is an interaction to be kept. 41 | Directionality is ignored (e.g. Edge A B will return interactions A B and B A from SIF)} 42 | 43 | \item{idsBothParticipants}{a boolean whether both interaction participants should be in 44 | a given interaction when using the ids parameter; TRUE if both (DEFAULT: TRUE)} 45 | 46 | \item{edgelistCheckReverse}{a boolean whether to check for edges in the reverse order (DEFAULT: TRUE)} 47 | 48 | \item{verbose}{Show debugging information (DEFAULT: FALSE)} 49 | } 50 | \value{ 51 | filtered interactions with three columns: "PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B". 52 | The intersection of multiple filters is returned. 53 | } 54 | \description{ 55 | Keep interactions in SIF network based on certain criteria 56 | } 57 | \examples{ 58 | results <- readSif(system.file("extdata", "test_sif.txt", package="paxtoolsr")) 59 | intTypes <- c("controls-state-change-of", "controls-expression-of", "catalysis-precedes") 60 | filteredNetwork <- filterSif(results, intTypes) 61 | 62 | tmp <- readSifnx(system.file("extdata", "test_sifnx_250.txt", package = "paxtoolsr")) 63 | results <- filterSif(tmp$edges, ids=c("CHEBI:17640", "MCM3")) 64 | results <- filterSif(tmp$edges, dataSources=c("INOH", "KEGG")) 65 | results <- filterSif(tmp$edges, dataSources=c("IntAct"), ids=c("CHEBI:17640", "MCM3")) 66 | results <- filterSif(tmp$edges, pathwayNames=c("Metabolic pathways")) 67 | results <- filterSif(tmp$edges, 68 | mediatorIds=c("http://purl.org/pc2/8/MolecularInteraction_1452626895158")) 69 | results <- filterSif(tmp$edges, interactionPubmedId="17654400") 70 | 71 | tmp <- readSifnx(system.file("extdata", "test_sifnx_250.txt", package = "paxtoolsr")) 72 | edgelist <- read.table(system.file("extdata", "test_edgelist.txt", package = "paxtoolsr"), 73 | sep="\t", header=FALSE, stringsAsFactors=FALSE) 74 | results <- filterSif(tmp$edges, edgelist=edgelist) 75 | 76 | } 77 | \concept{paxtoolsr} 78 | -------------------------------------------------------------------------------- /man/getCacheFiles.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getCacheFiles.R 3 | \name{getCacheFiles} 4 | \alias{getCacheFiles} 5 | \title{List files in cache directory} 6 | \usage{ 7 | getCacheFiles() 8 | } 9 | \value{ 10 | a vector of the files in the cache directory 11 | } 12 | \description{ 13 | List files in cache directory 14 | } 15 | \examples{ 16 | getCacheFiles() 17 | 18 | } 19 | \concept{paxtoolsr} 20 | -------------------------------------------------------------------------------- /man/getErrorMessage.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getErrorMessage.R 3 | \name{getErrorMessage} 4 | \alias{getErrorMessage} 5 | \title{Get Error Message for a Pathway Commons Error} 6 | \usage{ 7 | getErrorMessage(code) 8 | } 9 | \arguments{ 10 | \item{code}{a three digit numerical error code} 11 | } 12 | \value{ 13 | an error message for the code 14 | } 15 | \description{ 16 | Get Error Message for a Pathway Commons Error 17 | } 18 | \examples{ 19 | results <- getErrorMessage("452") 20 | 21 | } 22 | \concept{paxtoolsr} 23 | -------------------------------------------------------------------------------- /man/getNeighbors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getNeighbors.R 3 | \name{getNeighbors} 4 | \alias{getNeighbors} 5 | \title{Get the neighbors of a set of IDs in a BioPAX file} 6 | \usage{ 7 | getNeighbors(inputFile, outputFile = NULL, idList) 8 | } 9 | \arguments{ 10 | \item{inputFile}{a string with the name of the input BioPAX OWL file} 11 | 12 | \item{outputFile}{a string with the name of the output BioPAX OWL file} 13 | 14 | \item{idList}{a vector of IDs from the BioPAX OWL file} 15 | } 16 | \value{ 17 | an XMLInternalDocument representing a BioPAX OWL file 18 | } 19 | \description{ 20 | This function retrieves a set of neighbors for a set of IDs in a BioPAX file. 21 | } 22 | \details{ 23 | Only entities in the input BioPAX file will be searched for neighbors. 24 | IDs used must be URIs for the entities of interest. 25 | } 26 | \examples{ 27 | outFile <- tempfile() 28 | results <- getNeighbors(system.file("extdata", 29 | "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr"), 30 | outFile, 31 | c("HTTP://WWW.REACTOME.ORG/BIOPAX/48887#PROTEIN2360_1_9606", 32 | "HTTP://WWW.REACTOME.ORG/BIOPAX/48887#PROTEIN1631_1_9606")) 33 | 34 | } 35 | \concept{paxtoolsr} 36 | -------------------------------------------------------------------------------- /man/getPc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getPc.R 3 | \name{getPc} 4 | \alias{getPc} 5 | \title{Get Pathway Commons BioPAX elements} 6 | \usage{ 7 | getPc(uri, format = "BIOPAX", verbose = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{uri}{a vector that includes valid/existing BioPAX element's URI (RDF 11 | ID; for utility classes that were "normalized", such as entity refereneces 12 | and controlled vocabularies, it is usually a Idntifiers.org URL. Multiple 13 | IDs are allowed per query, for example, 14 | c("http://identifiers.org/uniprot/Q06609", 15 | "http://identifiers.org/uniprot/Q549Z0") See also about MIRIAM and 16 | Identifiers.org in details.} 17 | 18 | \item{format}{output format (Default: BIOPAX). Valid options can be found using 19 | \code{\link{pcFormats}}} 20 | 21 | \item{verbose}{a boolean, display the command used to query Pathway Commons} 22 | 23 | \item{...}{additional arguments to read* methods that handle data from Pathway Commons} 24 | } 25 | \value{ 26 | a XMLInternalDocument object 27 | } 28 | \description{ 29 | This command retrieves full pathway information for a set of elements such as 30 | pathway, interaction or physical entity given the RDF IDs. 31 | } 32 | \details{ 33 | Get commands only retrieve the BioPAX elements that are directly 34 | mapped to the ID. Use the "traverse query to traverse BioPAX graph and 35 | obtain child/owner elements. 36 | 37 | Information on MIRIAM and Identifiers.org 38 | \url{http://www.pathwaycommons.org/pc2/#miriam} 39 | } 40 | \examples{ 41 | uri <- "http://identifiers.org/uniprot/O14503" 42 | #results <- getPc(uri) 43 | 44 | uri <- c("http://identifiers.org/uniprot/O14503", "http://identifiers.org/uniprot/Q9P2X7") 45 | #results <- getPc(uri, verbose=TRUE) 46 | 47 | } 48 | \seealso{ 49 | \code{\link{pcFormats}} 50 | } 51 | \concept{paxtoolsr} 52 | -------------------------------------------------------------------------------- /man/getPcDatabaseNames.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getPcDatabaseNames.R 3 | \name{getPcDatabaseNames} 4 | \alias{getPcDatabaseNames} 5 | \title{Get a Pathway Commons Databases} 6 | \usage{ 7 | getPcDatabaseNames(version) 8 | } 9 | \arguments{ 10 | \item{version}{PC2 version} 11 | } 12 | \value{ 13 | a names of databases that can be used as part of queries 14 | } 15 | \description{ 16 | Get a Pathway Commons Databases 17 | } 18 | \examples{ 19 | getPcDatabaseNames(version=10) 20 | 21 | } 22 | \concept{paxtoolsr} 23 | -------------------------------------------------------------------------------- /man/getPcUrl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getPcUrl.R 3 | \name{getPcUrl} 4 | \alias{getPcUrl} 5 | \title{Get base Pathway Commons URL} 6 | \usage{ 7 | getPcUrl() 8 | } 9 | \value{ 10 | a string with base Pathway Commons URL 11 | } 12 | \description{ 13 | Get base Pathway Commons URL 14 | } 15 | \details{ 16 | paxtoolsr will support versions Pathway Commons 5 and later. Old 17 | versions of the webservice will not be not be operational. Users can parse 18 | older BioPAX outputs as an alternative. 19 | } 20 | \examples{ 21 | url <- getPcUrl() 22 | 23 | } 24 | \concept{paxtoolsr} 25 | -------------------------------------------------------------------------------- /man/getShortestPathSif.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getShortestPathSif.R 3 | \name{getShortestPathSif} 4 | \alias{getShortestPathSif} 5 | \title{Get the shortest between two IDs (HGNC or CHEBI)} 6 | \usage{ 7 | getShortestPathSif( 8 | sif, 9 | idA, 10 | idB, 11 | mode = c("all", "out", "in"), 12 | weights = NULL, 13 | verbose = FALSE, 14 | filterFun, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{sif}{a SIF network} 20 | 21 | \item{idA}{HGNC or CHEBI (CHEBI:XXXXX) ID} 22 | 23 | \item{idB}{HGNC or CHEBI (CHEBI:XXXXX) ID} 24 | 25 | \item{mode}{see shortest_paths() in igraph} 26 | 27 | \item{weights}{see shortest_paths() in igraph} 28 | 29 | \item{verbose}{a boolean whether to show debugging information} 30 | 31 | \item{filterFun}{a function to filter multiple paths of the same length} 32 | 33 | \item{...}{additional arguments passed on to filterFun} 34 | } 35 | \value{ 36 | a data.frame representing a SIF network 37 | } 38 | \description{ 39 | Get the shortest between two IDs (HGNC or CHEBI) 40 | } 41 | \examples{ 42 | idA <- "DAP3" 43 | idB <- "RPS16" 44 | sif <- readSif(system.file("extdata", "test_sif_shortestPath.txt", package="paxtoolsr")) 45 | filterFun <- function(vpaths) { idx <- sample(1:length(vpaths), 1); return(vpaths[[idx]]) } 46 | m1 <- getShortestPathSif(sif, idA, idB, mode="all", weights=NULL, filterFun=filterFun, verbose=TRUE) 47 | 48 | } 49 | \concept{paxtoolsr} 50 | -------------------------------------------------------------------------------- /man/getSifInteractionCategories.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getSifInteractionCategories.R 3 | \name{getSifInteractionCategories} 4 | \alias{getSifInteractionCategories} 5 | \title{Get a list of categories of SIF interactions} 6 | \usage{ 7 | getSifInteractionCategories() 8 | } 9 | \value{ 10 | a list of interactions in categories 11 | } 12 | \description{ 13 | Get a list of categories of SIF interactions 14 | } 15 | \details{ 16 | Description of interaction types: http://www.pathwaycommons.org/pc2/formats 17 | Categories provided: 18 | BetweenProteins, 19 | BetweenProteinsOther (often from high-throughput experiments), 20 | BetweenProteinSmallMolecule, 21 | BetweenSmallMolecules, 22 | SignedInteractions 23 | } 24 | \examples{ 25 | sifCat <- getSifInteractionCategories() 26 | sifCat[["BetweenProteins"]] 27 | 28 | } 29 | \concept{paxtoolsr} 30 | -------------------------------------------------------------------------------- /man/graphPc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/graphPc.R 3 | \name{graphPc} 4 | \alias{graphPc} 5 | \title{Get Pathway Commons BioPAX elements} 6 | \usage{ 7 | graphPc( 8 | kind, 9 | source, 10 | target = NULL, 11 | direction = NULL, 12 | limit = NULL, 13 | format = NULL, 14 | datasource = NULL, 15 | organism = NULL, 16 | verbose = FALSE 17 | ) 18 | } 19 | \arguments{ 20 | \item{kind}{graph query. Valid options can be found using \code{\link{pcGraphQueries}} 21 | See Details for information on graph queries.} 22 | 23 | \item{source}{source object's URI/ID. Multiple source URIs/IDs are allowed 24 | per query, for example c("http://identifiers.org/uniprot/Q06609", 25 | "http://identifiers.org/uniprot/Q549Z0") 26 | See a note about MIRIAM and Identifiers.org in details} 27 | 28 | \item{target}{[Required for PATHSFROMTO graph query] target URI/ID. Multiple 29 | target URIs are allowed per query; for example c("http://identifiers.org/uniprot/Q06609", 30 | "http://identifiers.org/uniprot/Q549Z0") 31 | See a note about MIRIAM and Identifiers.org in details} 32 | 33 | \item{direction}{[Optional, for NEIGHBORHOOD and COMMONSTREAM algorithms] - 34 | graph search direction. Valid options: \code{\link{pcDirections}}.} 35 | 36 | \item{limit}{graph query search distance limit (default: 1).} 37 | 38 | \item{format}{output format. Valid options: \code{\link{pcFormats}}} 39 | 40 | \item{datasource}{datasource filter (same as for 'search').} 41 | 42 | \item{organism}{organism filter (same as for 'search').} 43 | 44 | \item{verbose}{a boolean, display the command used to query Pathway Commons} 45 | } 46 | \value{ 47 | depending on the the output format a different object may be returned. 48 | \code{\link{pcFormats}} 49 | } 50 | \description{ 51 | This function will retrieve a set of BioPAX elements given a graph query match. 52 | } 53 | \examples{ 54 | source <- "http://identifiers.org/uniprot/O14503" 55 | #results <- graphPc(source=source, kind="neighborhood", format="TXT") 56 | 57 | } 58 | \seealso{ 59 | \code{\link{pcFormats}, \link{pcDirections}} 60 | } 61 | \concept{paxtoolsr} 62 | -------------------------------------------------------------------------------- /man/integrateBiopax.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/integrateBiopax.R 3 | \name{integrateBiopax} 4 | \alias{integrateBiopax} 5 | \title{Integrate two BioPAX OWL files (DEPRECATED)} 6 | \usage{ 7 | integrateBiopax(inputFile1, inputFile2, outputFile = NULL) 8 | } 9 | \arguments{ 10 | \item{inputFile1}{a string of the name of the input BioPAX OWL file} 11 | 12 | \item{inputFile2}{a string of the name of the input BioPAX OWL file} 13 | 14 | \item{outputFile}{a string of the name of the output integrated BioPAX 15 | OWL file} 16 | } 17 | \value{ 18 | an XMLInternalDocument representing a BioPAX OWL file 19 | } 20 | \description{ 21 | This function merges two BioPAX OWL files 22 | } 23 | \details{ 24 | This method is deprecated. Use mergeBiopax instead. 25 | } 26 | \examples{ 27 | outFile <- tempfile() 28 | results <- integrateBiopax(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", 29 | package="paxtoolsr"), 30 | system.file("extdata", "dna_replication.owl", package="paxtoolsr"), 31 | outFile) 32 | 33 | } 34 | \seealso{ 35 | \code{\link{mergeBiopax}} 36 | } 37 | \concept{paxtoolsr} 38 | -------------------------------------------------------------------------------- /man/loadSifInIgraph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/loadSifInIgraph.R 3 | \name{loadSifInIgraph} 4 | \alias{loadSifInIgraph} 5 | \title{Load SIF as igraph Network} 6 | \usage{ 7 | loadSifInIgraph(sif, directed = TRUE) 8 | } 9 | \arguments{ 10 | \item{sif}{a binary SIF as a data.frame with three columns: 11 | "PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B"} 12 | 13 | \item{directed}{a boolean weather the returned graph should be directed (DEFAULT: TRUE)} 14 | } 15 | \value{ 16 | a directed igraph network with interaction types 17 | } 18 | \description{ 19 | Load SIF as igraph Network 20 | } 21 | \details{ 22 | Users are likely to run into issues if the input SIF has factor levels 23 | } 24 | \examples{ 25 | results <- readSif(system.file("extdata", "test_sif.txt", package="paxtoolsr")) 26 | g <- loadSifInIgraph(results) 27 | 28 | } 29 | \concept{paxtoolsr} 30 | -------------------------------------------------------------------------------- /man/mapAttributes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/toCytoscape.R 3 | \name{mapAttributes} 4 | \alias{mapAttributes} 5 | \title{Map Attributes from igraph to Cytoscape JSON} 6 | \usage{ 7 | mapAttributes(attr.names, all.attr, i) 8 | } 9 | \arguments{ 10 | \item{attr.names}{names of attributes} 11 | 12 | \item{all.attr}{all attributes} 13 | 14 | \item{i}{index} 15 | } 16 | \value{ 17 | attributes 18 | } 19 | \description{ 20 | Map Attributes from igraph to Cytoscape JSON 21 | } 22 | \note{ 23 | From https://github.com/idekerlab/cy-rest-R/blob/17f748426bb5e48ba4075b9d97318ad582b250da/utility/cytoscape_util.R 24 | } 25 | \concept{paxtoolsr} 26 | -------------------------------------------------------------------------------- /man/mapValues.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mapValues.R 3 | \name{mapValues} 4 | \alias{mapValues} 5 | \title{Map values from One Vector to Another} 6 | \usage{ 7 | mapValues(data, oldValue, newValue) 8 | } 9 | \arguments{ 10 | \item{data}{a vector of strings where values will be replaced} 11 | 12 | \item{oldValue}{a vector that matches values in the data vector} 13 | 14 | \item{newValue}{a vector of new values that will replace the old values} 15 | } 16 | \value{ 17 | return the vector with the mapped values. If there was no 18 | corresponding entry then replace it with an NA. 19 | } 20 | \description{ 21 | Map values from One Vector to Another 22 | } 23 | \examples{ 24 | data <- c("A", "B", "C", "X", "Y", "Z") 25 | oldValue <- LETTERS[1:20] 26 | newValue <- letters[1:20] 27 | results <- mapValues(data, oldValue, newValue) 28 | 29 | } 30 | \concept{paxtoolsr} 31 | -------------------------------------------------------------------------------- /man/mergeBiopax.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mergeBiopax.R 3 | \name{mergeBiopax} 4 | \alias{mergeBiopax} 5 | \title{Merges two BioPAX OWL files} 6 | \usage{ 7 | mergeBiopax(inputFile1, inputFile2, outputFile = NULL) 8 | } 9 | \arguments{ 10 | \item{inputFile1}{a string of the name of the input BioPAX OWL file} 11 | 12 | \item{inputFile2}{a string of the name of the input BioPAX OWL file} 13 | 14 | \item{outputFile}{a string of the name of the output merged BioPAX 15 | OWL file (Optional)} 16 | } 17 | \value{ 18 | an XMLInternalDocument representing a BioPAX OWL file 19 | } 20 | \description{ 21 | This function merges two BioPAX OWL files 22 | } 23 | \details{ 24 | Only entities that share IDs will be merged. No additional merging 25 | occurs on cross-references. Merging may result in warning messages caused 26 | as a result of redundant actions being checked against by the Java library; 27 | these messages may be ignored. 28 | } 29 | \examples{ 30 | 31 | outFile <- tempfile() 32 | results <- mergeBiopax(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", 33 | package="paxtoolsr"), 34 | system.file("extdata", "dna_replication.owl", 35 | package="paxtoolsr"), 36 | outFile) 37 | 38 | } 39 | \concept{paxtoolsr} 40 | -------------------------------------------------------------------------------- /man/pcDirections.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pcDirections.R 3 | \name{pcDirections} 4 | \alias{pcDirections} 5 | \title{Acceptable Pathway Commons Directions} 6 | \usage{ 7 | pcDirections() 8 | } 9 | \value{ 10 | acceptable Pathway Commons directions 11 | } 12 | \description{ 13 | A simple function to see valid options 14 | } 15 | \details{ 16 | \itemize{ 17 | \item BOTHSTREAM where the current entity can either be the source or 18 | target of an interaction 19 | \item DOWNSTREAM where the current entity can only be the source 20 | \item UPSTREAM where the current entity can only be the target 21 | } 22 | } 23 | \examples{ 24 | pcDirections() 25 | 26 | } 27 | \concept{paxtoolsr} 28 | -------------------------------------------------------------------------------- /man/pcFormats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pcFormats.R 3 | \name{pcFormats} 4 | \alias{pcFormats} 5 | \title{Acceptable Pathway Commons Formats} 6 | \usage{ 7 | pcFormats() 8 | } 9 | \value{ 10 | acceptable Pathway Commons formats 11 | } 12 | \description{ 13 | A simple function to see valid options 14 | } 15 | \details{ 16 | See references. 17 | } 18 | \examples{ 19 | pcFormats() 20 | 21 | } 22 | \references{ 23 | Output Formats Description: \url{http://www.pathwaycommons.org/pc2/help/formats.html} 24 | } 25 | \concept{paxtoolsr} 26 | -------------------------------------------------------------------------------- /man/pcGraphQueries.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pcGraphQueries.R 3 | \name{pcGraphQueries} 4 | \alias{pcGraphQueries} 5 | \title{Acceptable Pathway Commons Graph Queries} 6 | \usage{ 7 | pcGraphQueries() 8 | } 9 | \value{ 10 | acceptable Pathway Commons graph queries 11 | } 12 | \description{ 13 | A simple function to see valid options 14 | } 15 | \details{ 16 | \itemize{ 17 | \item COMMONSTREAM searches common downstream or common upstream of a 18 | specified set of entities based on the given directions within the 19 | boundaries of a specified length limit 20 | \item NEIGHBORHOOD searches the neighborhood of given source set of nodes 21 | \item PATHSBETWEEN finds the paths between specific source set of states or 22 | entities within the boundaries of a specified length limit 23 | \item PATHSFROMTO finds the paths from a specific source set of states or 24 | entities to a specific target set of states or entities within the 25 | boundaries of a specified length limit 26 | } 27 | } 28 | \examples{ 29 | pcGraphQueries() 30 | 31 | } 32 | \concept{paxtoolsr} 33 | -------------------------------------------------------------------------------- /man/processPcRequest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/processPcRequest.R 3 | \name{processPcRequest} 4 | \alias{processPcRequest} 5 | \title{Process Pathway Commons request in various formats} 6 | \usage{ 7 | processPcRequest(content, format, ...) 8 | } 9 | \arguments{ 10 | \item{content}{a string, content to be processed} 11 | 12 | \item{format}{a string, the type of format} 13 | 14 | \item{...}{other arguments passed to read* methods for reading different formats} 15 | } 16 | \value{ 17 | an R object using one of the read* methods provided in this package 18 | corresponding to the format 19 | } 20 | \description{ 21 | Process Pathway Commons request in various formats 22 | } 23 | \examples{ 24 | fileName <- system.file("extdata", "test_biopax.owl", package="paxtoolsr") 25 | content <- readChar(fileName, file.info(fileName)$size) 26 | results <- processPcRequest(content, "BIOPAX") 27 | 28 | } 29 | \seealso{ 30 | \code{\link{pcFormats}} 31 | } 32 | \concept{paxtoolsr} 33 | -------------------------------------------------------------------------------- /man/readBiopax.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/readBiopax.R 3 | \name{readBiopax} 4 | \alias{readBiopax} 5 | \title{Read BioPAX files as XML documents} 6 | \usage{ 7 | readBiopax(inputFile) 8 | } 9 | \arguments{ 10 | \item{inputFile}{an inputFile} 11 | } 12 | \value{ 13 | an XMLInternalDocument 14 | } 15 | \description{ 16 | Read BioPAX files as XML documents 17 | } 18 | \examples{ 19 | results <- readBiopax(system.file("extdata", "biopax3-short-metabolic-pathway.owl", 20 | package="paxtoolsr")) 21 | 22 | } 23 | \concept{paxtoolsr} 24 | -------------------------------------------------------------------------------- /man/readGmt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/readGmt.R 3 | \name{readGmt} 4 | \alias{readGmt} 5 | \title{Read in gene sets from GMT files} 6 | \usage{ 7 | readGmt(inputFile, removePrefix = FALSE, returnInfo = FALSE) 8 | } 9 | \arguments{ 10 | \item{inputFile}{an inputFile} 11 | 12 | \item{removePrefix}{Pathway Commons genesets are prefixed with a NCBI organism taxonomy number (e.g. 9606 for humans); this is a boolean whether to remove the prefix (default: FALSE)} 13 | 14 | \item{returnInfo}{a boolean whether to return information on genesets; these results are returned a list of two items: 1) basic GMT results and 2) datasource, organism, and id type information for each gene set (default: FALSE)} 15 | } 16 | \value{ 17 | a named list where each entry corresponds to a gene set or a list described in the returnInfo parameter 18 | } 19 | \description{ 20 | This function will read in gene sets in the GMT format into a named list. 21 | } 22 | \examples{ 23 | f1 <- system.file("extdata", "test_PathwayCommons12.kegg.hgnc.gmt", 24 | package="paxtoolsr") 25 | f2 <- system.file("extdata", "test_PathwayCommons12.netpath.hgnc.gmt", 26 | package="paxtoolsr") 27 | 28 | results <- readGmt(f1) 29 | results <- readGmt(f2) 30 | results <- readGmt(f1, removePrefix=TRUE) 31 | results <- readGmt(f2, returnInfo=TRUE) 32 | 33 | } 34 | \concept{paxtoolsr} 35 | -------------------------------------------------------------------------------- /man/readPcPathwaysInfo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/readPcPathwaysInfo.R 3 | \name{readPcPathwaysInfo} 4 | \alias{readPcPathwaysInfo} 5 | \title{Read in Pathway Commons Pathways Information} 6 | \usage{ 7 | readPcPathwaysInfo(inputFile = NULL, version = NULL) 8 | } 9 | \arguments{ 10 | \item{inputFile}{an inputFile; if NULL then retrieve the current pathways.txt; see details (default: NULL)} 11 | 12 | \item{version}{a version number for a previous version of Pathway Commons data; 13 | versions 3 and above. Parameter set as version="8". Available versions "http://www.pathwaycommons.org/archives/PC2/"} 14 | } 15 | \value{ 16 | a data.frame 17 | } 18 | \description{ 19 | Read in Pathway Commons Pathways Information 20 | } 21 | \details{ 22 | This file is generally found as pathways.txt.gz (e.g. 23 | http://www.pathwaycommons.org/archives/PC2/current/pathways.txt.gz) 24 | } 25 | \examples{ 26 | inputFile <- system.file("extdata", "pathways.txt.gz", package="paxtoolsr") 27 | results <- readPcPathwaysInfo(inputFile, version="8") 28 | 29 | } 30 | \concept{paxtoolsr} 31 | -------------------------------------------------------------------------------- /man/readSbgn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/readSbgn.R 3 | \name{readSbgn} 4 | \alias{readSbgn} 5 | \title{Read SBGN files as XML documents} 6 | \usage{ 7 | readSbgn(inputFile) 8 | } 9 | \arguments{ 10 | \item{inputFile}{an inputFile} 11 | } 12 | \value{ 13 | an XMLInternalDocument 14 | } 15 | \description{ 16 | Read SBGN files as XML documents 17 | } 18 | \examples{ 19 | results <- readSbgn(system.file("extdata", "test_sbgn.xml", package="paxtoolsr")) 20 | 21 | } 22 | \concept{paxtoolsr} 23 | -------------------------------------------------------------------------------- /man/readSif.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/readSif.R 3 | \name{readSif} 4 | \alias{readSif} 5 | \title{Read in a binary SIF file} 6 | \usage{ 7 | readSif(inputFile) 8 | } 9 | \arguments{ 10 | \item{inputFile}{an inputFile} 11 | } 12 | \value{ 13 | a data.frame with the interactions in the binary SIF format 14 | } 15 | \description{ 16 | Read in a binary SIF file 17 | } 18 | \examples{ 19 | results <- readSif(system.file("extdata", "test_sif.txt", package="paxtoolsr")) 20 | 21 | } 22 | \concept{paxtoolsr} 23 | -------------------------------------------------------------------------------- /man/readSifnx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/readSifnx.R 3 | \name{readSifnx} 4 | \alias{readSifnx} 5 | \title{Read in a Extended SIF file} 6 | \usage{ 7 | readSifnx(inputFile) 8 | } 9 | \arguments{ 10 | \item{inputFile}{an inputFile} 11 | } 12 | \value{ 13 | a list with nodes and edges entries 14 | } 15 | \description{ 16 | Read in a Extended SIF file 17 | } 18 | \details{ 19 | SIFNX files from Pathway Commons commonly come a single file that 20 | includes a tab-delimited sections for nodes and another for edges. The 21 | sections are separated by an empty lines. These sections must be split before 22 | they are read. 23 | } 24 | \examples{ 25 | results <- readSifnx(system.file("extdata", "test_sifnx.txt", package="paxtoolsr")) 26 | 27 | } 28 | \concept{paxtoolsr} 29 | -------------------------------------------------------------------------------- /man/searchListOfVectors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/searchListOfVectors.R 3 | \name{searchListOfVectors} 4 | \alias{searchListOfVectors} 5 | \title{Search List of Vectors} 6 | \usage{ 7 | searchListOfVectors(q, lst) 8 | } 9 | \arguments{ 10 | \item{q}{query vector} 11 | 12 | \item{lst}{list of vectors to search} 13 | } 14 | \value{ 15 | a list of vectors with the same length as the query vector, each list 16 | entry will have indicies for lst where there was a match with the query 17 | vector. Return NA if there were no matches. 18 | } 19 | \description{ 20 | Search List of Vectors 21 | } 22 | \details{ 23 | Taken from: http://stackoverflow.com/questions/11002391/fast-way-of-getting-index-of-match-in-list 24 | } 25 | \examples{ 26 | lst <- list(1:3, 3:5, 3:7) 27 | q <- c(3, 5) 28 | results <- searchListOfVectors(q, lst) 29 | names(results) <- q 30 | 31 | lst <- list(LETTERS[1:3], LETTERS[3:5], LETTERS[3:7]) 32 | q <- c("C", "E") 33 | searchListOfVectors(q, lst) 34 | 35 | lst <- list(LETTERS[3], LETTERS[4:6]) 36 | q <- "C" 37 | searchListOfVectors(q, lst) 38 | 39 | lst <- list(LETTERS[3], LETTERS[4:6]) 40 | q <- c("C") 41 | searchListOfVectors(q, lst) 42 | 43 | lst <- list(LETTERS[3], LETTERS[4:6]) 44 | q <- c("C", "E") 45 | searchListOfVectors(q, lst) 46 | 47 | lst <- list(LETTERS[3], LETTERS[4:6]) 48 | q <- "Z" 49 | searchListOfVectors(q, lst) 50 | 51 | } 52 | \concept{paxtoolsr} 53 | -------------------------------------------------------------------------------- /man/searchPc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/searchPc.R 3 | \name{searchPc} 4 | \alias{searchPc} 5 | \title{Search Pathway Commons} 6 | \usage{ 7 | searchPc( 8 | q, 9 | page = 0, 10 | datasource = NULL, 11 | organism = NULL, 12 | type = NULL, 13 | verbose = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{q}{a keyword, name, external identifier, or a Lucene query string.} 18 | 19 | \item{page}{an integer giving the search result page number (N>=0, default: 20 | 0)} 21 | 22 | \item{datasource}{a vector that is a filter by data source (use names or URIs 23 | of pathway data sources or of any existing Provenance object). If multiple 24 | data source values are specified, a union of hits from specified sources is 25 | returned. For example, datasource as c("reactome", "pid") returns hits 26 | associated with Reactome or PID.} 27 | 28 | \item{organism}{a vector that is an organism filter. The organism can be 29 | specified either by official name, e.g. "homo sapiens" or by NCBI taxonomy 30 | id, e.g. "9606". Similar to data sources, if multiple organisms are 31 | declared a union of all hits from specified organisms is returned. For 32 | example organism as c("9606", "10016") returns results for both human and 33 | mice. Only humans, "9606" is officially supported.} 34 | 35 | \item{type}{BioPAX class filter. See Details.} 36 | 37 | \item{verbose}{a boolean, display the command used to query Pathway Commons} 38 | } 39 | \value{ 40 | an XMLInternalDocument with results 41 | } 42 | \description{ 43 | This command provides a text search using the Lucene query syntax. 44 | } 45 | \details{ 46 | Indexed fields were selected based on most common searches. Some of 47 | these fields are direct BioPAX properties, others are composite 48 | relationships. All index fields are (case-sensitive):comment, ecnumber, 49 | keyword, name, pathway, term, xrefdb, xrefid, dataSource, and organism. The 50 | pathway field maps to all participants of pathways that contain the 51 | keyword(s) in any of its text fields. This field is transitive in the sense 52 | that participants of all sub-pathways are also returned. Finally, keyword is 53 | a transitive aggregate field that includes all searchable keywords of that 54 | element and its child elements - e.g. a complex would be returned by a 55 | keyword search if one of its members has a match. Keyword is the default 56 | field type. All searches can also be filtered by data source and organism. It 57 | is also possible to restrict the domain class using the 'type' parameter. 58 | This query can be used standalone or to retrieve starting points for graph 59 | searches. Search strings are case insensitive unless put inside quotes. 60 | 61 | BioPAX classes can be found at \url{http://www.pathwaycommons.org/pc2/#biopax_types} 62 | } 63 | \examples{ 64 | query <- "Q06609" 65 | #results <- searchPc(query) 66 | 67 | query <- "glycolysis" 68 | #results <- searchPc(query, type="Pathway") 69 | 70 | } 71 | \concept{paxtoolsr} 72 | -------------------------------------------------------------------------------- /man/splitSifnxByPathway.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/splitSifnxByPathway.R 3 | \name{splitSifnxByPathway} 4 | \alias{splitSifnxByPathway} 5 | \title{Splits SIFNX entries into individual pathways} 6 | \usage{ 7 | splitSifnxByPathway(edges, parallel = FALSE) 8 | } 9 | \arguments{ 10 | \item{edges}{a data.frame with SIF content with the additional column "PATHWAY_NAMES". 11 | "PATHWAY_NAMES" should include pathway names delimited with a semi-colon: ";".} 12 | 13 | \item{parallel}{a boolean that will parallelize the process; requires foreach/doSNOW/parallel packages} 14 | } 15 | \value{ 16 | a list of where each entry is a vector of row indicies for a given pathway 17 | } 18 | \description{ 19 | Splits SIFNX entries into individual pathways 20 | } 21 | \details{ 22 | This method can be slow; ~1.5 minutes for 150K+ rows. 23 | Has a parallelized method to speed things up. 24 | } 25 | \concept{paxtoolsr} 26 | -------------------------------------------------------------------------------- /man/summarize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summarize.R 3 | \name{summarize} 4 | \alias{summarize} 5 | \title{Summarize a BioPAX file} 6 | \usage{ 7 | summarize(inputFile) 8 | } 9 | \arguments{ 10 | \item{inputFile}{a string of the name of the input BioPAX OWL file} 11 | } 12 | \value{ 13 | list with BioPAX class counts 14 | } 15 | \description{ 16 | This function provides a summary of BioPAX classes. 17 | } 18 | \details{ 19 | BioPAX classes are defined by the BioPAX specification: 20 | \url{http://www.biopax.org/} 21 | } 22 | \examples{ 23 | summary <- summarize(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", 24 | package="paxtoolsr")) 25 | 26 | } 27 | \concept{paxtoolsr} 28 | -------------------------------------------------------------------------------- /man/summarizeSif.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summarizeSif.R 3 | \name{summarizeSif} 4 | \alias{summarizeSif} 5 | \title{Summarize a SIF Network} 6 | \usage{ 7 | summarizeSif(sif) 8 | } 9 | \arguments{ 10 | \item{sif}{a binary SIF as a data.frame with three columns: 11 | "PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B"} 12 | } 13 | \value{ 14 | a list containing a count of the unique genes in the SIF and counts for the interaction types in the network 15 | } 16 | \description{ 17 | Summarize a SIF Network 18 | } 19 | \examples{ 20 | results <- readSif(system.file("extdata", "test_sif.txt", package="paxtoolsr")) 21 | summarizeSif(results) 22 | 23 | } 24 | \concept{paxtoolsr} 25 | -------------------------------------------------------------------------------- /man/toCytoscape.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/toCytoscape.R 3 | \name{toCytoscape} 4 | \alias{toCytoscape} 5 | \title{Convert igraph to Cytoscape JSON} 6 | \usage{ 7 | toCytoscape(igraphobj) 8 | } 9 | \arguments{ 10 | \item{igraphobj}{an igraph object} 11 | } 12 | \value{ 13 | a JSON object 14 | } 15 | \description{ 16 | Convert igraph to Cytoscape JSON 17 | } 18 | \note{ 19 | From https://github.com/idekerlab/cy-rest-R/blob/17f748426bb5e48ba4075b9d97318ad582b250da/utility/cytoscape_util.R 20 | } 21 | \examples{ 22 | library(igraph) 23 | g <- barabasi.game(20) 24 | json <- toCytoscape(g) 25 | 26 | } 27 | \concept{paxtoolsr} 28 | -------------------------------------------------------------------------------- /man/toGSEA.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/toGSEA.R 3 | \name{toGSEA} 4 | \alias{toGSEA} 5 | \title{Converts a BioPAX OWL file to a GSEA GMT gene set} 6 | \usage{ 7 | toGSEA( 8 | inputFile, 9 | outputFile = NULL, 10 | database = "uniprot", 11 | crossSpeciesCheckFlag = TRUE 12 | ) 13 | } 14 | \arguments{ 15 | \item{inputFile}{a string of the name of the input OWL file} 16 | 17 | \item{outputFile}{a string of the name of the output file} 18 | 19 | \item{database}{a string of the name of the identifier type to be included 20 | (e.g. "HGNC Symbol")} 21 | 22 | \item{crossSpeciesCheckFlag}{a boolean that ensures participant protein is 23 | from same species} 24 | } 25 | \value{ 26 | see readGmt() 27 | } 28 | \description{ 29 | This function converts pathway information stored as BioPAX files into the 30 | the GSEA .gmt format. 31 | } 32 | \details{ 33 | The GSEA GMT format is a tab-delimited format where each row 34 | represents a gene set. The first column is the gene set name. The second 35 | column is a brief description. Other columns for each row contain genes in 36 | the gene set; these rows may be of unequal lengths. 37 | } 38 | \examples{ 39 | outFile <- tempfile() 40 | results <- toGSEA(system.file("extdata", "biopax3-short-metabolic-pathway.owl", 41 | package="paxtoolsr"), 42 | outFile, 43 | "uniprot", 44 | crossSpeciesCheckFlag=TRUE) 45 | 46 | } 47 | \concept{paxtoolsr} 48 | -------------------------------------------------------------------------------- /man/toLevel3.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/toLevel3.R 3 | \name{toLevel3} 4 | \alias{toLevel3} 5 | \title{Convert a PSIMI or older BioPAX OWL file to BioPAX Level 3} 6 | \usage{ 7 | toLevel3(inputFile, outputFile = NULL) 8 | } 9 | \arguments{ 10 | \item{inputFile}{a string of the name of the input file} 11 | 12 | \item{outputFile}{a string of the name of the output BioPAX OWL file} 13 | } 14 | \value{ 15 | an XMLInternalDocument representing a BioPAX OWL file 16 | } 17 | \description{ 18 | This file will convert PSIMI or older BioPAX objects to BioPAX Level 3 19 | } 20 | \examples{ 21 | inputFile <- system.file("extdata", "raf_map_kinase_cascade_reactome.owl", 22 | package="paxtoolsr") 23 | outFile <- tempfile() 24 | results <- toLevel3(inputFile, outFile) 25 | 26 | } 27 | \concept{paxtoolsr} 28 | -------------------------------------------------------------------------------- /man/toSBGN.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/toSBGN.R 3 | \name{toSBGN} 4 | \alias{toSBGN} 5 | \title{Convert a BioPAX OWL file to SBGNML} 6 | \usage{ 7 | toSBGN(inputFile, outputFile = NULL) 8 | } 9 | \arguments{ 10 | \item{inputFile}{a string of the name of the input BioPAX OWL file} 11 | 12 | \item{outputFile}{a string of the name of the output SBGNML file} 13 | } 14 | \value{ 15 | see readSbgn() 16 | } 17 | \description{ 18 | This function will convert a BioPAX OWL file into the Systems Biology Graphical 19 | Notation (SBGN) Markup Language (SBGNML) XML representation 20 | } 21 | \details{ 22 | Objects in the SBGNML format are laid out using a Compound Spring 23 | Embedder (CoSE) layout 24 | } 25 | \examples{ 26 | outFile <- tempfile() 27 | results <- toSBGN(system.file("extdata", "biopax3-short-metabolic-pathway.owl", 28 | package="paxtoolsr"), 29 | outFile) 30 | 31 | } 32 | \references{ 33 | \url{http://www.cs.bilkent.edu.tr/~ivis/layout/cose-animated-demo/cose.html} 34 | } 35 | \concept{paxtoolsr} 36 | -------------------------------------------------------------------------------- /man/toSif.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/toSif.R 3 | \name{toSif} 4 | \alias{toSif} 5 | \title{Convert a BioPAX OWL file to SIF} 6 | \usage{ 7 | toSif(inputFile, outputFile = NULL) 8 | } 9 | \arguments{ 10 | \item{inputFile}{a string of the name of the input BioPAX OWL file} 11 | 12 | \item{outputFile}{a string of the name of the output SIF file (Optional)} 13 | } 14 | \value{ 15 | see readSif() 16 | } 17 | \description{ 18 | Convert a BioPAX OWL file to a binary SIF file 19 | } 20 | \details{ 21 | Information on SIF conversion is provided on the Pathway Commons 22 | site: \url{http://www.pathwaycommons.org/pc2/} 23 | } 24 | \examples{ 25 | 26 | outFile <- tempfile() 27 | results <- toSif(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", 28 | package="paxtoolsr"), 29 | outFile) 30 | 31 | } 32 | \concept{paxtoolsr} 33 | -------------------------------------------------------------------------------- /man/toSifnx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/toSifnx.R 3 | \name{toSifnx} 4 | \alias{toSifnx} 5 | \title{Converts BioPAX OWL file to extended binary SIF representation} 6 | \usage{ 7 | toSifnx(inputFile, outputFile = tempfile(), idType = "uniprot") 8 | } 9 | \arguments{ 10 | \item{inputFile}{a string with the name of the input BioPAX OWL file} 11 | 12 | \item{outputFile}{a string with the name of the output file for SIFNX 13 | information} 14 | 15 | \item{idType}{a string either "hgnc" or "uniprot" (DEFAULT: uniprot, more common)} 16 | } 17 | \value{ 18 | see readSifnx() 19 | } 20 | \description{ 21 | Converts BioPAX OWL file to extended binary SIF representation 22 | } 23 | \details{ 24 | Information on SIF conversion is provided on the Pathway Commons 25 | site: \url{http://www.pathwaycommons.org/pc2/}. Also, this is a Java-based 26 | methods, it is best to use full paths. 27 | } 28 | \examples{ 29 | inputFile <- system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr") 30 | results <- toSifnx(inputFile=inputFile) 31 | 32 | } 33 | \concept{paxtoolsr} 34 | -------------------------------------------------------------------------------- /man/topPathways.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/topPathways.R 3 | \name{topPathways} 4 | \alias{topPathways} 5 | \title{Retrieve top pathways} 6 | \usage{ 7 | topPathways(q = NULL, datasource = NULL, organism = NULL, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{q}{[Optional] a keyword, name, external identifier, or a Lucene query string, like in 'search', but the default is '*' (match all).} 11 | 12 | \item{datasource}{filter by data source (same as for 'search').} 13 | 14 | \item{organism}{organism filter (same as for 'search').} 15 | 16 | \item{verbose}{a boolean, display the command used to query Pathway Commons} 17 | } 18 | \value{ 19 | a data.frame with the following columns: 20 | \itemize{ 21 | \item uri URI ID for the pathway 22 | \item biopaxClass the type of BioPAX object 23 | \item name a human readable name 24 | \item dataSource the dataSource for the pathway 25 | \item organism an organism identifier 26 | \item pathway URI ID for the pathway 27 | } 28 | } 29 | \description{ 30 | This command returns all "top" pathways. 31 | } 32 | \details{ 33 | Pathways that are neither 'controlled' nor 'pathwayComponent' of 34 | another process. 35 | } 36 | \examples{ 37 | #results <- topPathways(q="TP53", datasource="panther") 38 | 39 | } 40 | \concept{paxtoolsr} 41 | -------------------------------------------------------------------------------- /man/traverse.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/traverse.R 3 | \name{traverse} 4 | \alias{traverse} 5 | \title{Access Pathway Commons using XPath-type expressions} 6 | \usage{ 7 | traverse(uri, path, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{uri}{a BioPAX element URI - specified similarly to the 'GET' command 11 | above). Multiple IDs are allowed (uri=...&uri=...&uri=...).} 12 | 13 | \item{path}{a BioPAX propery path in the form of 14 | property1[:type1]/property2[:type2]; see properties, inverse properties, 15 | Paxtools, org.biopax.paxtools.controller.PathAccessor.} 16 | 17 | \item{verbose}{a boolean, display the command used to query Pathway Commons} 18 | } 19 | \value{ 20 | an XMLInternalDocument with results 21 | } 22 | \description{ 23 | This command provides XPath-like access to the Pathway Commons. 24 | } 25 | \details{ 26 | With traverse users can explicitly state the paths they would like 27 | to access. The format of the path query is in the form: [Initial 28 | Class]/[property1]:[classRestriction(optional)]/[property2]... A "*" sign 29 | after the property instructs path accessor to transitively traverse that 30 | property. For example, the following path accessor will traverse through 31 | all physical entity components within a complex: 32 | "Complex/component*/entityReference/xref:UnificationXref" The following 33 | will list display names of all participants of interactions, which are 34 | components (pathwayComponent) of a pathway (note: pathwayOrder property, 35 | where same or other interactions can be reached, is not considered here): 36 | "Pathway/pathwayComponent:Interaction/participant*/displayName" The 37 | optional parameter classRestriction allows to restrict/filter the returned 38 | property values to a certain subclass of the range of that property. In the 39 | first example above, this is used to get only the Unification Xrefs. Path 40 | accessors can use all the official BioPAX properties as well as additional 41 | derived classes and parameters in paxtools such as inverse parameters and 42 | interfaces that represent anonymous union classes in OWL. (See Paxtools 43 | documentation for more details). 44 | } 45 | \examples{ 46 | uri <- "http://identifiers.org/uniprot/P38398" 47 | #results <- traverse(uri=uri, path="ProteinReference/organism/displayName") 48 | 49 | } 50 | \references{ 51 | Paxtools Documentation: \url{http://www.biopax.org/m2site/} 52 | } 53 | \concept{paxtoolsr} 54 | -------------------------------------------------------------------------------- /man/validate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validate.R 3 | \name{validate} 4 | \alias{validate} 5 | \title{Validate BioPAX files} 6 | \usage{ 7 | validate( 8 | inputFile, 9 | outputFile = NULL, 10 | type = c("xml", "html", "biopax"), 11 | autoFix = FALSE, 12 | onlyErrors = FALSE, 13 | maxErrors = NULL, 14 | notStrict = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{inputFile}{a string of the name of the input BioPAX OWL file} 19 | 20 | \item{outputFile}{a string of the name of the output file containing 21 | validation results} 22 | 23 | \item{type}{a string denoting the type of output: xml (default), html, biopax} 24 | 25 | \item{autoFix}{a boolean that determines if the input file should be 26 | fixed automatically. Errors that can be automatically fixed include 27 | generating displayName properties from names, inferring organism, and 28 | inferring dataSource} 29 | 30 | \item{onlyErrors}{a boolean of whether to only display errors} 31 | 32 | \item{maxErrors}{a integer denoting the number of errors to return} 33 | 34 | \item{notStrict}{a boolean of whether to be strict in validation (default: FALSE)} 35 | } 36 | \value{ 37 | an XMLInternalDocument is returned if type is set to "xml" otherwise 38 | the location of the outputfile is returned. 39 | } 40 | \description{ 41 | This function validates BioPAX files for errors. 42 | } 43 | \details{ 44 | See the publication by Rodchenkov, et al. for information on the 45 | BioPAX validator. See \url{http://biopax.baderlab.org/validator} for 46 | additional information on validator. 47 | See \url{http://biopax.baderlab.org/validator/errorTypes.html} for 48 | information on error types. 49 | } 50 | \examples{ 51 | outFile <- tempfile() 52 | rawDoc <- validate(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", 53 | package="paxtoolsr"), onlyErrors=TRUE) 54 | 55 | } 56 | \references{ 57 | Rodchenkov I, Demir E, Sander C, Bader GD. The BioPAX Validator, 58 | \url{http://www.ncbi.nlm.nih.gov/pubmed/23918249} 59 | } 60 | \concept{paxtoolsr} 61 | -------------------------------------------------------------------------------- /tests/runTests.old: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(paxtoolsr) 3 | test_package("paxtoolsr") -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(paxtoolsr) 3 | 4 | test_check("paxtoolsr") 5 | -------------------------------------------------------------------------------- /tests/testthat/test_paxtools.R: -------------------------------------------------------------------------------- 1 | # All tests are done on files in package using system.file() 2 | 3 | context("Paxtools Functionality") 4 | 5 | test_that("fetch", { 6 | outFile <- tempfile() 7 | ids <- c("http://identifiers.org/uniprot/P36894", 8 | "http://identifiers.org/uniprot/Q13873") 9 | results <- fetch(system.file("extdata", "REACT_12034-3.owl", package="paxtoolsr"), 10 | outFile, ids) 11 | 12 | expect_is(results, "XMLInternalDocument") 13 | }) 14 | 15 | # test_that("fromPsimi", { 16 | # skip("fromPsimi removed from Paxtools 4.3.1") 17 | # 18 | # inputFile <- system.file("extdata", "10523676-compact.xml", package="paxtoolsr") 19 | # outFile <- tempfile() 20 | # 21 | # results <- fromPsimi(inputFile, outFile, 3) 22 | # expect_is(results, "XMLInternalDocument") 23 | # }) 24 | 25 | test_that("toGSEA", { 26 | outFile <- tempfile() 27 | 28 | results <- toGSEA(system.file("extdata", "biopax3-short-metabolic-pathway.owl", package="paxtoolsr"), 29 | outFile, 30 | "uniprot", 31 | crossSpeciesCheckFlag=TRUE) 32 | 33 | # The file should be tab delimited and have at least 3 columns 34 | colNum <- ncol(read.delim(outFile, sep="\t", header=FALSE)) 35 | expect_true(colNum > 3) 36 | 37 | expect_is(results, "list") 38 | }) 39 | 40 | test_that("validate", { 41 | rawDoc <- validate(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr"), onlyErrors=TRUE) 42 | expect_is(rawDoc, "XMLInternalDocument") 43 | 44 | outFile <- tempfile() 45 | rawDoc <- validate(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr"), 46 | type="html", outputFile=outFile, onlyErrors=TRUE) 47 | expect_is(rawDoc, "character") 48 | }) 49 | 50 | test_that("getNeighbors", { 51 | outFile <- tempfile() 52 | 53 | results <- getNeighbors(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr"), 54 | outFile, 55 | c("HTTP://WWW.REACTOME.ORG/BIOPAX/48887#PROTEIN2360_1_9606", 56 | "HTTP://WWW.REACTOME.ORG/BIOPAX/48887#PROTEIN1631_1_9606")) 57 | 58 | expect_is(results, "XMLInternalDocument") 59 | }) 60 | 61 | test_that("toLevel3", { 62 | outFile <- tempfile() 63 | 64 | results <- toLevel3(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr"), 65 | outFile) 66 | 67 | expect_is(results, "XMLInternalDocument") 68 | }) 69 | 70 | test_that("toSBGN", { 71 | outFile <- tempfile() 72 | 73 | results <- toSBGN(system.file("extdata", "biopax3-short-metabolic-pathway.owl", package="paxtoolsr"), 74 | outFile) 75 | 76 | expect_is(results, "XMLInternalDocument") 77 | }) 78 | 79 | test_that("toSifnx", { 80 | inputFile <- system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr") 81 | outputFile <- tempfile() 82 | 83 | # New SIF converter does not support extra properties 84 | #nodeProps <- c("EntityReference/name", "EntityReference/xref") 85 | #nodeProps <- c("EntityReference/name", "EntityReference/xref") 86 | #edgeProps <- "Interaction/dataSource/displayName" 87 | 88 | results <- toSifnx(inputFile, outputFile) 89 | # nodeProps, 90 | # edgeProps) 91 | 92 | expect_equal(names(results), c("nodes", "edges")) 93 | 94 | #expect_equal(length(colnames(results$edges)), (3 + length(edgeProps))) 95 | #expect_equal(length(colnames(results$nodes)), (1 + length(nodeProps))) 96 | }) 97 | 98 | test_that("toSif", { 99 | outFile <- tempfile() 100 | 101 | results <- toSif(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr"), 102 | outFile) 103 | 104 | expect_equal(colnames(results), c("PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B")) 105 | }) 106 | 107 | test_that("integrateBioPax", { 108 | outFile <- tempfile() 109 | 110 | results <- integrateBiopax(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr"), 111 | system.file("extdata", "biopax3-short-metabolic-pathway.owl", package="paxtoolsr"), 112 | outFile) 113 | 114 | expect_is(results, "XMLInternalDocument") 115 | }) 116 | 117 | test_that("mergeBiopax", { 118 | outFile <- tempfile() 119 | 120 | results <- mergeBiopax(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr"), 121 | system.file("extdata", "biopax3-short-metabolic-pathway.owl", package="paxtoolsr"), 122 | outFile) 123 | 124 | expect_is(results, "XMLInternalDocument") 125 | }) 126 | 127 | test_that("summarize", { 128 | summary <- summarize(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr")) 129 | 130 | expect_true(is.list(summary)) 131 | }) 132 | 133 | test_that("xmlInternalDocumentInput", { 134 | results <- toSif(system.file("extdata", "REACT_12034-3.owl", package="paxtoolsr")) 135 | 136 | expect_is(results, "data.frame") 137 | }) 138 | 139 | # TODO 140 | #test_that("checkInputFile") {}) 141 | #test_that("checkOutputFile") {}) 142 | 143 | #DEBUG 144 | #test_that("FAIL", { 145 | # expect_that(FALSE, is_true()) 146 | #}) 147 | -------------------------------------------------------------------------------- /vignettes/extract_pathways.R: -------------------------------------------------------------------------------- 1 | Try this. First, update to the paxtoolsr development version (I just updated a few things): 2 | 3 | setRepositories(ind=1:6) 4 | options(repos="http://cran.rstudio.com/") 5 | if(!require(devtools)) { install.packages("devtools") } 6 | library(devtools) 7 | install_github("BioPAX/paxtoolsr") 8 | 9 | # Example 10 | 11 | library(paxtoolsr) 12 | 13 | exampleFileInPaxtoolsr <- system.file("extdata", "REACT_12034-3.owl", package="paxtoolsr") 14 | sifnx <- toSifnx(exampleFileInPaxtoolsr, "/Users/cannin/Downloads/output.txt", "uniprot") 15 | 16 | # Not all rows represented, but that's because not every row has a pathway listed 17 | rowIndiciesForPathways <- splitSifnxByPathway(sifnx$edges) 18 | 19 | bmp <- sifnx$edges[rowIndiciesForPathways$`Signaling by BMP`, ] 20 | 21 | # If you prefer a data.frame over a data.table (data.table is used for file reading speed), then do this: 22 | library(data.table) 23 | class(bmp) # Should be "data.table" "data.frame" 24 | setDF(bmp) 25 | class(bmp) # Should be "data.frame" 26 | 27 | # Plot in R 28 | 29 | # For simple plotting in R 30 | g <- loadSifInIgraph(bmp) 31 | plot(g) 32 | 33 | 34 | -------------------------------------------------------------------------------- /vignettes/getNeighbors.R: -------------------------------------------------------------------------------- 1 | With paxtoolsr the heap size can also be increased; see below 2 | 3 | http://bioconductor.org/packages/release/bioc/vignettes/paxtoolsr/inst/doc/using_paxtoolsr.html#memory-limits-specify-jvm-maximum-heap-size 4 | 5 | # Change heap size when using paxtoolsr 6 | options(java.parameters = "-Xmx4g") 7 | library(paxtoolsr) 8 | 9 | gzFile <- "PathwayCommons.8.reactome.BIOPAX.owl.gz" 10 | owlFile <- "PathwayCommons.8.reactome.BIOPAX.owl" 11 | untar(gzFile, exdir=".") 12 | 13 | out1 <- "out1.out" 14 | out2 <- "out2.out" 15 | t1 <- getNeighbors(owlFile, out1, "http://pathwaycommons.org/pc2/Protein_c1c45c11f5ebaffd5a137865b2f4a6a3") 16 | 17 | But what I anticipate what you want to do is not completely straight-forward with getNeighbors(). You cannot simply pass gene IDs to getNeighbors with either in paxtools (or paxtoolsr that calls paxtools getNeighbors). You need to pass in URIs 18 | 19 | NOTE: GNG2: http://pathwaycommons.org/pc2/Protein_c1c45c11f5ebaffd5a137865b2f4a6a3 20 | NOTE: Equivalent paxtools command: java -Xmx4g -jar paxtools-4.3.1.jar getNeighbors PathwayCommons.8.reactome.BIOPAX.owl http://pathwaycommons.org/pc2/Protein_c1c45c11f5ebaffd5a137865b2f4a6a3 cmd.out 21 | 22 | You'll end up with a BioPAX file that can be converted to a SIF 23 | 24 | t2 <- toSif(t1, out2) 25 | 26 | In my understanding of getNeighbors this will return all the genes and small molecules that interact with GNG2 **PLUS** all the genes those entities interact with 27 | 28 | I wrote the filterSif function does not use the URIs that I use pretty often for research projects: 29 | 30 | sif <- downloadPc2("PathwayCommons.8.reactome.BINARY_SIF.hgnc.txt.sif.gz", version="8") 31 | filteredSif <- filterSif(sif, ids="SDC1", interactionTypes="controls-state-change-of") 32 | 33 | This allows you to use gene symbols and ignore interaction types you might not want. But you need the development version from GitHub (https://github.com/BioPAX/paxtoolsr) if you want this functionality: 34 | 35 | # Install 36 | setRepositories(ind=1:6) 37 | options(repos="http://cran.rstudio.com/") 38 | if(!require(devtools)) { install.packages("devtools") } 39 | library(devtools) 40 | install_github("BioPAX/paxtoolsr") 41 | 42 | gp <- graphPc(kind="NEIGHBORHOOD", datasource="reactome", format="BINARY_SIF", source="SDC1") 43 | 44 | --------------------------------------------------------------------------------