├── .Rbuildignore ├── .gitignore ├── data ├── STRING.rda └── Tgfb_phospho.rda ├── vignettes ├── PCSF-ref-manual.pdf ├── ref.bib └── PCSF-manual.Rmd ├── HISTORY.rst ├── PCSF.Rproj ├── NAMESPACE ├── R ├── data_interactome.R ├── data_phosphoproteomics.R ├── RcppExports.R ├── construct_interactome.R ├── plot.PCSFe.R ├── call_enr.R ├── enrichment_analysis.R ├── PCSF_rand.R ├── plot.PCSF.R └── PCSF.R ├── man ├── STRING.Rd ├── Tgfb_phospho.Rd ├── call_enr.Rd ├── call_sr.Rd ├── construct_interactome.Rd ├── PCSF_rand.Rd ├── plot.PCSF.Rd ├── plot.PCSFe.Rd ├── enrichment_analysis.Rd └── PCSF.Rd ├── DESCRIPTION ├── LICENSE ├── src ├── RcppExports.cpp └── PCSF.cpp └── README.md /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | src/*.o 5 | src/*.so 6 | src/*.dll 7 | -------------------------------------------------------------------------------- /data/STRING.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IOR-Bioinformatics/PCSF/HEAD/data/STRING.rda -------------------------------------------------------------------------------- /data/Tgfb_phospho.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IOR-Bioinformatics/PCSF/HEAD/data/Tgfb_phospho.rda -------------------------------------------------------------------------------- /vignettes/PCSF-ref-manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IOR-Bioinformatics/PCSF/HEAD/vignettes/PCSF-ref-manual.pdf -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | .. :changelog: 2 | 3 | History 4 | ------- 5 | 6 | 2017-08-22 7 | --------------------- 8 | * version 0.99.1 9 | * Parameters to set the legends for the nodes are added into function. 10 | 11 | 2017-07-11 12 | --------------------- 13 | * Enrichment analysis with topGO package is added 14 | 15 | 2017-02-21 16 | --------------------- 17 | * First release - version 0.99.0 18 | -------------------------------------------------------------------------------- /PCSF.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageCheckArgs: --as-cran 19 | PackageRoxygenize: rd,collate 20 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(PCSF) 2 | exportPattern("^[[:alpha:]]+") 3 | importFrom(Rcpp, evalCpp) 4 | import(BH) 5 | import(httr) 6 | import(org.Hs.eg.db) 7 | import(topGO, except = c(graph, algorithm)) 8 | import(igraph) 9 | importFrom(visNetwork, visIgraph) 10 | importFrom(visNetwork, visIgraphLayout) 11 | importFrom(visNetwork, visGroups) 12 | importFrom(visNetwork, visLegend) 13 | importFrom(visNetwork, visOptions) 14 | importFrom(visNetwork, visNodes) 15 | importFrom(visNetwork, visNetwork) 16 | importFrom(methods, new) -------------------------------------------------------------------------------- /R/data_interactome.R: -------------------------------------------------------------------------------- 1 | #' Protein-protein interaction network data 2 | #' 3 | #' An interactome data set in which the nodes are named with gene symbols 4 | #' 5 | #' @name STRING 6 | #' 7 | #' @format A data frame with three variables, where each row corresponds to 8 | #' an edge in which the first element is a \code{head}, the second 9 | #' element is a \code{tail}, and the last element represents the \code{cost} of the edge. 10 | #' 11 | #' @docType data 12 | #' @usage STRING 13 | #' 14 | #' @source iref_mitab_miscore_2013_08_12_interactome.txt \url{https://github.com/fraenkel-lab/OmicsIntegrator/tree/master/data} 15 | #' 16 | #' @keywords data 17 | #' 18 | NULL -------------------------------------------------------------------------------- /R/data_phosphoproteomics.R: -------------------------------------------------------------------------------- 1 | #' Phosphoproteomic data 2 | #' 3 | #' This dataset contains differential phosphoproteomic data derived from H358 cells, 4 | #' a model of lung cancer, that were stimulated with TGF-b. 5 | #' 6 | #' @name Tgfb_phospho 7 | #' 8 | #' @format A named \code{numeric} vector, where terminal genes are named same as 9 | #' in the interaction network and numeric values correspond to the importance of 10 | #' the gene within the study. 11 | #' 12 | #' @docType data 13 | #' @usage Tgfb_phospho 14 | #' 15 | #' @source Tgfb_phos.txt \url{https://github.com/fraenkel-lab/OmicsIntegrator/tree/master/example/a549} 16 | #' 17 | #' @keywords data 18 | #' 19 | #' 20 | NULL -------------------------------------------------------------------------------- /man/STRING.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_interactome.R 3 | \docType{data} 4 | \name{STRING} 5 | \alias{STRING} 6 | \title{Protein-protein interaction network data} 7 | \format{A data frame with three variables, where each row corresponds to 8 | an edge in which the first element is a \code{head}, the second 9 | element is a \code{tail}, and the last element represents the \code{cost} of the edge.} 10 | \source{ 11 | iref_mitab_miscore_2013_08_12_interactome.txt \url{https://github.com/fraenkel-lab/OmicsIntegrator/tree/master/data} 12 | } 13 | \usage{ 14 | STRING 15 | } 16 | \description{ 17 | An interactome data set in which the nodes are named with gene symbols 18 | } 19 | \keyword{data} 20 | -------------------------------------------------------------------------------- /man/Tgfb_phospho.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_phosphoproteomics.R 3 | \docType{data} 4 | \name{Tgfb_phospho} 5 | \alias{Tgfb_phospho} 6 | \title{Phosphoproteomic data} 7 | \format{A named \code{numeric} vector, where terminal genes are named same as 8 | in the interaction network and numeric values correspond to the importance of 9 | the gene within the study.} 10 | \source{ 11 | Tgfb_phos.txt \url{https://github.com/fraenkel-lab/OmicsIntegrator/tree/master/example/a549} 12 | } 13 | \usage{ 14 | Tgfb_phospho 15 | } 16 | \description{ 17 | This dataset contains differential phosphoproteomic data derived from H358 cells, 18 | a model of lung cancer, that were stimulated with TGF-b. 19 | } 20 | \keyword{data} 21 | -------------------------------------------------------------------------------- /man/call_enr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/call_enr.R 3 | \name{call_enr} 4 | \alias{call_enr} 5 | \title{Internal function \code{call_enr}} 6 | \usage{ 7 | call_enr(clusters, mode = 0, gene_universe) 8 | } 9 | \arguments{ 10 | \item{clusters}{A subnetwork clustered using edge betweenness algorithm of \pkg{igraph} package.} 11 | 12 | \item{mode}{A binary variable to choose the method for enrichment analysis, where 0 is for EnrichR API and 1 is for \pkg{topGO} package.} 13 | 14 | \item{gene_universe}{A complete list of genes (vector of gene symbols) used as background in enrichment analysis by \pkg{topGO} package.} 15 | } 16 | \description{ 17 | This function is internally used to perform enrichment analysis employing ENRICHR API. 18 | } 19 | \author{ 20 | Murodzhon Akhmedov 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /man/call_sr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{call_sr} 4 | \alias{call_sr} 5 | \title{Internal function \code{call_sr}} 6 | \usage{ 7 | call_sr(from, to, cost, node_names, node_prizes) 8 | } 9 | \arguments{ 10 | \item{from}{A \code{CharacterVector} that corresponds to \code{head} nodes of the edges.} 11 | 12 | \item{to}{A \code{CharacterVector} that corresponds the \code{tail} nodes of the edges.} 13 | 14 | \item{cost}{A \code{NumericVector} which represents the edge weights.} 15 | 16 | \item{node_names}{A \code{CharacterVector} demonstrates the names of the nodes.} 17 | 18 | \item{node_prizes}{A \code{NumericVector} which corresponds to the node prizes.} 19 | } 20 | \description{ 21 | This function is internally used to solve the PCST. 22 | } 23 | \author{ 24 | Murodzhon Akhmedov 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #' Internal function \code{call_sr} 5 | #' 6 | #' This function is internally used to solve the PCST. 7 | #' 8 | #' @keywords internal 9 | #' 10 | #' @param from A \code{CharacterVector} that corresponds to \code{head} nodes of the edges. 11 | #' @param to A \code{CharacterVector} that corresponds the \code{tail} nodes of the edges. 12 | #' @param cost A \code{NumericVector} which represents the edge weights. 13 | #' @param node_names A \code{CharacterVector} demonstrates the names of the nodes. 14 | #' @param node_prizes A \code{NumericVector} which corresponds to the node prizes. 15 | #' @author Murodzhon Akhmedov 16 | #' 17 | call_sr <- function(from, to, cost, node_names, node_prizes) { 18 | .Call('_PCSF_call_sr', PACKAGE = 'PCSF', from, to, cost, node_names, node_prizes) 19 | } 20 | 21 | -------------------------------------------------------------------------------- /man/construct_interactome.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/construct_interactome.R 3 | \name{construct_interactome} 4 | \alias{construct_interactome} 5 | \title{Construct an interaction network} 6 | \usage{ 7 | construct_interactome(ppi) 8 | } 9 | \arguments{ 10 | \item{ppi}{A list of edges. A \code{data.frame} composed of three columns, where each 11 | row corresponds to an edge in which the first element is a \code{head}, the second 12 | element is a \code{tail}, and the last element represents the \code{cost} of the edge.} 13 | } 14 | \value{ 15 | An interaction network as \pkg{igraph} object. 16 | } 17 | \description{ 18 | Given a list of edges, \code{construct_interactome} generates 19 | an interaction network which is used as a template network to interpret the highthrougput data. 20 | } 21 | \examples{ 22 | \dontrun{ 23 | library("PCSF") 24 | data("STRING") 25 | ppi <- construct_interactome(STRING)} 26 | 27 | } 28 | \author{ 29 | Murodzhon Akhmedov 30 | } 31 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: PCSF 2 | Type: Package 3 | Title: Network-based interpretation of highthroughput data 4 | Version: 0.99.1 5 | Date: 2017-02-01 6 | Author: Murodzhon Akhmedov, Amanda Kedaigle, Renan Escalante, Roberto Montemanni, Francesco Bertoni, Ernest Fraenkel, Ivo Kwee 7 | Maintainer: Murodzhon Akhmedov 8 | Description: The PCSF package performs an integrated analysis of highthroughput data using the interaction networks as a template, and interprets the biological landscape of interaction networks with respect to the data, which potentially leads to predictions of functional units. It also interactively visualize the resulting subnetwork with functional enrichment analysis. 9 | License: MIT + file LICENSE 10 | Suggests: 11 | knitr, 12 | rmarkdown 13 | Depends: 14 | R (>= 3.1.0), 15 | igraph, 16 | visNetwork, 17 | Imports: 18 | BH, 19 | httr, 20 | methods, 21 | org.Hs.eg.db, 22 | Rcpp, 23 | topGO 24 | LinkingTo: Rcpp, BH 25 | RoxygenNote: 6.0.1 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) <2017>, 3 | 4 | ************************************************************************ 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining 7 | a copy of this software and associated documentation files (the 8 | "Software"), to deal in the Software without restriction, including 9 | without limitation the rights to use, copy, modify, merge, publish, 10 | distribute, sublicense, and/or sell copies of the Software, and to 11 | permit persons to whom the Software is furnished to do so, subject to 12 | the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be 15 | included in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 21 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | 25 | ************************************************************************ -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | // call_sr 9 | List call_sr(CharacterVector from, CharacterVector to, NumericVector cost, CharacterVector node_names, NumericVector node_prizes); 10 | RcppExport SEXP _PCSF_call_sr(SEXP fromSEXP, SEXP toSEXP, SEXP costSEXP, SEXP node_namesSEXP, SEXP node_prizesSEXP) { 11 | BEGIN_RCPP 12 | Rcpp::RObject rcpp_result_gen; 13 | Rcpp::RNGScope rcpp_rngScope_gen; 14 | Rcpp::traits::input_parameter< CharacterVector >::type from(fromSEXP); 15 | Rcpp::traits::input_parameter< CharacterVector >::type to(toSEXP); 16 | Rcpp::traits::input_parameter< NumericVector >::type cost(costSEXP); 17 | Rcpp::traits::input_parameter< CharacterVector >::type node_names(node_namesSEXP); 18 | Rcpp::traits::input_parameter< NumericVector >::type node_prizes(node_prizesSEXP); 19 | rcpp_result_gen = Rcpp::wrap(call_sr(from, to, cost, node_names, node_prizes)); 20 | return rcpp_result_gen; 21 | END_RCPP 22 | } 23 | 24 | static const R_CallMethodDef CallEntries[] = { 25 | {"_PCSF_call_sr", (DL_FUNC) &_PCSF_call_sr, 5}, 26 | {NULL, NULL, 0} 27 | }; 28 | 29 | RcppExport void R_init_PCSF(DllInfo *dll) { 30 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 31 | R_useDynamicSymbols(dll, FALSE); 32 | } 33 | -------------------------------------------------------------------------------- /R/construct_interactome.R: -------------------------------------------------------------------------------- 1 | #' Construct an interaction network 2 | #' 3 | #' Given a list of edges, \code{construct_interactome} generates 4 | #' an interaction network which is used as a template network to interpret the highthrougput data. 5 | #' 6 | #' @param ppi A list of edges. A \code{data.frame} composed of three columns, where each 7 | #' row corresponds to an edge in which the first element is a \code{head}, the second 8 | #' element is a \code{tail}, and the last element represents the \code{cost} of the edge. 9 | #' 10 | #' @return An interaction network as \pkg{igraph} object. 11 | #' @import igraph 12 | #' @export 13 | #' 14 | #' @examples 15 | #' \dontrun{ 16 | #' library("PCSF") 17 | #' data("STRING") 18 | #' ppi <- construct_interactome(STRING)} 19 | #' 20 | #' @author Murodzhon Akhmedov 21 | #' 22 | 23 | construct_interactome <- 24 | function(ppi){ 25 | 26 | # Checking function arguments 27 | if (missing(ppi)) 28 | stop(" Need to specify a list of edges to construct an interaction network. 29 | Provide a data.frame composed of three columns, where each row corresponds 30 | to an edge in which the first element is a head node, the second element 31 | is a tail node, and the last element represents the cost of the edge.") 32 | if (nrow(ppi)<1 || ncol(ppi) != 3 || class(ppi) != "data.frame") 33 | stop(" Need to provide a data.frame composed of three columns, where each row corresponds 34 | to an edge in which the first element is a head node, the second element 35 | is a tail node, and the last element represents the cost of the edge.") 36 | 37 | # Interpolate the node prizes 38 | node_names = unique(c(as.character(ppi[,1]),as.character(ppi[,2]))) 39 | 40 | # Contruct an interaction network as igraph object 41 | ppi.graph = graph.data.frame(ppi[,1:2],vertices=node_names,directed=F) 42 | E(ppi.graph)$weight=as.numeric(ppi[,3]) 43 | ppi.graph = simplify(ppi.graph) 44 | 45 | return (ppi.graph) 46 | 47 | } 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | PCSF: an R-Package for Network-Based Interpretation of High-throughput Data 4 | =============================== 5 | 6 | The PCSF package performs fast and user-friendly network analysis of high-throughput data. Using interaction networks as a template, it determines high-confidence subnetworks relevant to the data, which potentially leads to predictions of functional units. It also interactively visualizes the resulting subnetwork with functional enrichment analysis. 7 | 8 | Contact: Murodzhon Akhmedov [murodzhon.akhmedov@irb.usi.ch] 9 | 10 | 11 | 12 | Reference: 13 | -------------------- 14 | [A divide and conquer matheuristic algorithm for the Prize-collecting Steiner Tree Problem.](http://www.sciencedirect.com/science/article/pii/S0305054815003019) 15 | Akhmedov M, Kwee I, and Montemanni R (2016). *Computers and Operations Research*, 70, 18-25. 16 | 17 | A fast Prize-collecting Steiner Forest algorithm for Functional Analyses in Biological Networks. 18 | Akhmedov M, LeNail A, Bertoni F, Kwee I, Fraenkel E and Montemanni R (2017). *Lecture Notes in Computer Science*, to appear. 19 | 20 | 21 | System Requirements: 22 | -------------------- 23 | 1. R (>= 3.1.0) 24 | 25 | 2. Boost C++ library: http://www.boost.org 26 | 27 | 28 | 29 | Installation: 30 | -------------------- 31 | 32 | 1. The PCSF package depends on the following R-packages: 33 | 34 | - `BH` and `igraph` - for efficient graph handling and calculations, 35 | - `httr`, `methods`, `org.Hs.eg.db`, and `topGO` - to perform enrichment analysis, 36 | - `Rcpp` - to employ C++ source code within R, 37 | - `visNetwork` - for visualization. 38 | 39 | 40 | 2. In order to compile the source, Windows users should install the `Rtools` package by the following [link](https://cran.r-project.org/bin/windows/Rtools/) that installs GCC and CMake. 41 | 42 | 43 | 3. The PCSF package and its dependencies can be installed on Mac OS, Linux and Windows by running the following commands in the R console. 44 | 45 | ``` 46 | source("http://bioconductor.org/biocLite.R") 47 | biocLite("topGO") 48 | install.packages("devtools", dependencies=TRUE) 49 | devtools::install_github("IOR-Bioinformatics/PCSF", repos=BiocInstaller::biocinstallRepos(), 50 | dependencies=TRUE, type="source", force=TRUE) 51 | ``` 52 | 53 | 54 | Comments: 55 | -------------------- 56 | 57 | #### Test environments 58 | 59 | * Mac OS X (10.12.4), R 3.4.0 60 | * Ubuntu (16.04), R 3.2.3 61 | * Windows 7, R 3.4.1 62 | 63 | #### R CMD check results 64 | 65 | There were no ERRORs, WARNINGs or NOTEs. 66 | -------------------------------------------------------------------------------- /man/PCSF_rand.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PCSF_rand.R 3 | \name{PCSF_rand} 4 | \alias{PCSF_rand} 5 | \title{Prize-collecting Steiner Forest (PCSF) with randomized edge costs} 6 | \usage{ 7 | PCSF_rand(ppi, terminals, n = 10, r = 0.1, w = 2, b = 1, mu = 5e-04) 8 | } 9 | \arguments{ 10 | \item{ppi}{An interaction network as an \pkg{igraph} object.} 11 | 12 | \item{terminals}{A list of terminal genes with prizes to be analyzed in the PCSF context. 13 | A named \code{numeric} vector, where terminal genes are named same as in the interaction network 14 | and numeric values correspond to the importance of the gene within the study.} 15 | 16 | \item{n}{An \code{integer} value to determine the number of runs with random noise added edge costs. 17 | A default value is 10.} 18 | 19 | \item{r}{A \code{numeric} value to determine additional random noise to edge costs. 20 | A random noise upto r percent of the edge cost is added to each edge. A default value is 0.1} 21 | 22 | \item{w}{A \code{numeric} value for tuning the number of trees in the output. A default value is 2.} 23 | 24 | \item{b}{A \code{numeric} value for tuning the node prizes. A default value is 1.} 25 | 26 | \item{mu}{A \code{numeric} value for a hub penalization. A default value is 0.0005.} 27 | } 28 | \value{ 29 | The final subnetwork obtained by taking the union of the PCSF outputs generated by 30 | adding a random noise to edge costs each time. It returns an \pkg{igraph} object with the node prize 31 | and edge cost attributes representing the total number of show ups throughout all runs. 32 | } 33 | \description{ 34 | \code{PCSF_rand} returns a union of subnetworks obtained by solving the PCSF on the 35 | given interaction network by adding a random noise to edge costs each time. 36 | } 37 | \details{ 38 | In order to increase the robustness of the resulting structure, 39 | it is recommended to solve the PCSF several times on the same network 40 | while adding some noise to the edge costs each time, and combine all results 41 | in a final subnetwork. The union of all outputs may explain 42 | the underlying biology better. 43 | } 44 | \examples{ 45 | \dontrun{ 46 | library("PCSF") 47 | data("STRING") 48 | data("Tgfb_phospho") 49 | terminals <- Tgfb_phospho 50 | ppi <- construct_interactome(STRING) 51 | subnet <- PCSF_rand(ppi, terminals, n = 10, r =0.1, w = 2, b = 2, mu = 0.0005)} 52 | 53 | } 54 | \references{ 55 | Akhmedov M., LeNail A., Bertoni F., Kwee I., Fraenkel E., and Montemanni R. (2017) 56 | A Fast Prize-Collecting Steiner Forest Algorithm for Functional Analyses in Biological Networks. 57 | \emph{Lecture Notes in Computer Science}, to appear. 58 | } 59 | \seealso{ 60 | \code{\link{PCSF}}, \code{\link{plot.PCSFe}} 61 | } 62 | \author{ 63 | Murodzhon Akhmedov 64 | } 65 | -------------------------------------------------------------------------------- /man/plot.PCSF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.PCSF.R 3 | \name{plot.PCSF} 4 | \alias{plot.PCSF} 5 | \title{Plot an interactive subnetwork} 6 | \usage{ 7 | \method{plot}{PCSF}(x, style = 0, edge_width = 5, node_size = 40, 8 | node_label_cex = 30, Steiner_node_color = "lightblue", 9 | Terminal_node_color = "lightgreen", Terminal_node_legend = "Terminal", 10 | Steiner_node_legend = "Steiner", ...) 11 | } 12 | \arguments{ 13 | \item{x}{A subnetwork obtained by the PCSF method. It is a "PCSF" object derived 14 | from \pkg{igraph} class and it has the edge cost and vertex prize attributes.} 15 | 16 | \item{style}{A \code{boolean} value to determine the visualization style of the network, 17 | where \code{0} plots the \code{static} network and \code{1} plots the \code{dynamic} 18 | network. The default valu is 0.} 19 | 20 | \item{edge_width}{A \code{numeric} value to emphasize a maximum edge width. A default value is 5. 21 | This value must be greater than 1.} 22 | 23 | \item{node_size}{A \code{numeric} value to emphasize a maximum node size. A default value is 40. 24 | This value must be greater than 10.} 25 | 26 | \item{node_label_cex}{A \code{numeric} value to set a node label size. A default value is 30.} 27 | 28 | \item{Steiner_node_color}{A \code{string} to set a color for \code{Steiner} nodes. 29 | A default value is "lightblue".} 30 | 31 | \item{Terminal_node_color}{A \code{string} to set a color for \code{terminal} nodes.} 32 | 33 | \item{Terminal_node_legend}{A \code{string} to set a legend for \code{terminal} nodes. 34 | A default legend is "Terminal".} 35 | 36 | \item{Steiner_node_legend}{A \code{string} to set a legend for \code{Steiner} nodes. 37 | A default legend is "Steiner".} 38 | 39 | \item{...}{Ignored.} 40 | } 41 | \description{ 42 | \code{plot.PCSF} plots an interactive figure of the subnetwork obrained by 43 | the PCSF method. 44 | } 45 | \details{ 46 | This function plots an interactive subnetwork obtained by the \code{\link{PCSF}} and \code{\link{PCSF_rand}}. 47 | The node sizes and edge widths are respectively proportional to the node prizes and edge costs 48 | while plotting the subnetwork from \code{\link{PCSF}}. In contrast, the node sizes and edge widths are 49 | proportional to the total number of abondance in randomized runs while plotting the subnetwork 50 | from \code{\link{PCSF_rand}}. The node names are displayed during the hover-over. 51 | } 52 | \examples{ 53 | \dontrun{ 54 | library("PCSF") 55 | data("STRING") 56 | data("Tgfb_phospho") 57 | terminals <- Tgfb_phospho 58 | ppi <- construct_interactome(STRING) 59 | subnet <- PCSF(ppi, terminals, w = 2, b = 1, mu = 0.0005) 60 | plot(subnet)} 61 | 62 | } 63 | \seealso{ 64 | \code{\link{PCSF}}, \code{\link{plot.PCSFe}} 65 | } 66 | \author{ 67 | Murodzhon Akhmedov 68 | } 69 | -------------------------------------------------------------------------------- /man/plot.PCSFe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.PCSFe.R 3 | \name{plot.PCSFe} 4 | \alias{plot.PCSFe} 5 | \title{Plot an interactive subnetwork with functional enrichment analysis} 6 | \usage{ 7 | \method{plot}{PCSFe}(x, edge_width = 5, node_size = 30, 8 | node_label_cex = 1, Terminal_node_legend = "Terminal", 9 | Steiner_node_legend = "Steiner", ...) 10 | } 11 | \arguments{ 12 | \item{x}{An output subnetwork provided by the \code{enrichment_analysis}. 13 | It is "PCSFe" object derived from an \pkg{igraph} class, and it has the edge 14 | cost and vertex prize attributes.} 15 | 16 | \item{edge_width}{A \code{numeric} value to emphasize a maximum edge width. 17 | A default value is 5. This value must be greater than 1.} 18 | 19 | \item{node_size}{A \code{numeric} value to emphasize a maximum node size. 20 | A default value is 30. This value must be greater than 10.} 21 | 22 | \item{node_label_cex}{A \code{numeric} value to set a node label size. 23 | A default value is 1.} 24 | 25 | \item{Terminal_node_legend}{A \code{string} to set a legend for \code{terminal} nodes. 26 | A default legend is "Terminal".} 27 | 28 | \item{Steiner_node_legend}{A \code{string} to set a legend for \code{Steiner} nodes. 29 | A default legend is "Steiner".} 30 | 31 | \item{...}{Ignored.} 32 | } 33 | \description{ 34 | \code{plot.PCSFe} plots an interactive figure of the subnetwork 35 | to display the functionla enrichment analysis, which is obtained by employing 36 | \code{enrichment_analysis} on the subnetwork. 37 | } 38 | \details{ 39 | An enrichment analysis of the final subnetwork obtained by multiple runs of the PCSF 40 | (with random noise added edge costs) is performed by using \code{\link{enrichment_analysis}}. 41 | The subnetwork is clustered using an edge betweenness clustering algorithm from the 42 | \pkg{igraph} package, and for each cluster functional enrichment is done by employing the 43 | ENRICHR API (Chen \emph{et al.}, 2013). An interactive visualization of the final subnetwork 44 | is plotted, where the node sizes and edge widths are proportional to the frequency of show 45 | ups in total randomised runs. Nodes are colored according to the cluster membership, and 46 | the top 15 functional enrichment terms are displayed in tabular format during the hover-over 47 | of the node in that cluster. A specific cluster can be displayed separately in the figure 48 | by selecting from the icon list at the top left side of the figure. 49 | } 50 | \examples{ 51 | \dontrun{ 52 | library("PCSF") 53 | data("STRING") 54 | data("Tgfb_phospho") 55 | terminals <- Tgfb_phospho 56 | ppi <- construct_interactome(STRING) 57 | subnet <- PCSF_rand(ppi, terminals, n = 10, r = 0.1, w = 2, b = 1, mu = 0.0005) 58 | res <- enrichment_analysis(subnet) 59 | plot(res$subnet)} 60 | 61 | } 62 | \references{ 63 | Chen E.Y., Christopher M.T., Yan K., Qiaonan D., Zichen W., Gabriela V.M., Neil R.C., and Avi M. (2013) 64 | Enrichr: Interactive and Collaborative Html5 Gene List Enrichment Analysis Tool. \emph{BMC Bioinformatics} 14 (1). 65 | BioMed Central: 1. 66 | } 67 | \seealso{ 68 | \code{\link{enrichment_analysis}}, \code{\link{PCSF_rand}}, \code{\link{plot.PCSF}} 69 | } 70 | \author{ 71 | Murodzhon Akhmedov 72 | } 73 | -------------------------------------------------------------------------------- /man/enrichment_analysis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/enrichment_analysis.R 3 | \name{enrichment_analysis} 4 | \alias{enrichment_analysis} 5 | \title{Perform enrichment analysis on the subnetwork} 6 | \usage{ 7 | enrichment_analysis(subnet, mode = NULL, gene_universe) 8 | } 9 | \arguments{ 10 | \item{subnet}{A subnetwork provided by \code{\link{PCSF_rand}}, which is obtained by merging 11 | a multiple outputs of the PCSF with random noise added edge costs. An \pkg{igraph} object 12 | with edge cost and vertex prize attributes representing the total number of 13 | show ups throughout all runs.} 14 | 15 | \item{mode}{A binary variable to choose the method for enrichment analysis, where 0 is for EnrichR API and 1 is for \pkg{topGO} package.} 16 | 17 | \item{gene_universe}{A complete list of genes (vector of gene symbols) used as background in enrichment analysis by \pkg{topGO} package.} 18 | } 19 | \value{ 20 | A list composed of an interactive subnetwork and a table with enrichment 21 | analysis results. An interactive subnetwork annotated with enrichment analysis 22 | can be reached by $subnet. A full list of enrichment analysis for each cluster 23 | can be reached by $enrichment. 24 | } 25 | \description{ 26 | \code{enrichment_analysis} performs functional enrichment analysis on the subnetwork 27 | obtained by the \code{\link{PCSF_rand}}, and returns an annotated subnetwork with top 15 28 | functional enrichments and a list of tables with a complete enrichment analysis for 29 | each cluster. 30 | } 31 | \details{ 32 | An enrichment analysis of the final subnetwork obtained by multiple runs of the PCSF 33 | (with rando noise added edge costs) is performed for functional interpretation. 34 | The subnetwork is clustered using an edge betweenness clustering algorithm from 35 | the \pkg{igraph} package, and for each cluster functional enrichment is done by 36 | employing either EnrichR API (Chen \emph{et al.}, 2013) or 37 | \pkg{topGO} (Alexa and Rahnenfuhrer, 2009) 38 | package that is specified by the user. Important to note that EnrichR API requires 39 | a working Internet connection to perform the enrichment. If the user does not 40 | specify which tool to use for enrichment analysis, the package employs EnrichR 41 | as a default if there is Internet connection, otherwise it uses \pkg{topGO}. 42 | 43 | An interactive visualization of 44 | the final subnetwork is plotted, where the node sizes and edge widths are proportional 45 | to the frequency of show ups throughout total runs. Nodes are colored according to the 46 | cluster membership, and the top 15 functional enrichment terms are displayed in tabular 47 | format during the hover-over of the node in that cluster. 48 | } 49 | \examples{ 50 | \dontrun{ 51 | library("PCSF") 52 | data("STRING") 53 | data("Tgfb_phospho") 54 | terminals <- Tgfb_phospho 55 | ppi <- construct_interactome(STRING) 56 | subnet <- PCSF_rand(ppi, terminals, n = 10, r = 0.1, w = 2, b = 1, mu = 0.0005) 57 | res <- enrichment_analysis(subnet) 58 | res <- enrichment_analysis(subnet, mode=0)} 59 | \dontrun{ 60 | library(topGO) 61 | gene_universe <- V(ppi)$name 62 | res <- enrichment_analysis(subnet, mode=1, gene_universe)} 63 | \dontrun{ 64 | plot(res$subnet) 65 | write.table(res$enrichment[[1]],file="cluster1_complete_enrichment.txt", 66 | append = FALSE, quote = FALSE, sep ="\\t", row.names=FALSE)} 67 | 68 | } 69 | \references{ 70 | Chen E.Y., Christopher M.T., Yan K., Qiaonan D., Zichen W., Gabriela V.M., Neil R.C., 71 | and Avi M. (2013) Enrichr: Interactive and Collaborative Html5 Gene List Enrichment 72 | Analysis Tool. \emph{BMC Bioinformatics} 14 (1). BioMed Central: 1. 73 | 74 | Alexa A. and Rahnenfuhrer J. (2009). topGO: Enrichment Analysis for Gene Ontology. 75 | R package version 2.28.0. 76 | } 77 | \seealso{ 78 | \code{\link{PCSF_rand}}, \code{\link{plot.PCSFe}} 79 | } 80 | \author{ 81 | Murodzhon Akhmedov 82 | } 83 | -------------------------------------------------------------------------------- /vignettes/ref.bib: -------------------------------------------------------------------------------- 1 | @article{Tuncbag, 2 | title={Network-Based Interpretation of Diverse High-Throughput Datasets through the Omics Integrator Software Package}, 3 | author={Tuncbag, Nurcan and Gosline, Sara JC and Kedaigle, Amanda and Soltis, Anthony R and Gitter, Anthony and Fraenkel, Ernest}, 4 | journal={PLoS Comput Biol}, 5 | volume={12}, 6 | number={4}, 7 | pages={e1004879}, 8 | year={2016}, 9 | publisher={Public Library of Science} 10 | } 11 | 12 | @article{Szklarczyk, 13 | title={The STRING database in 2011: functional interaction networks of proteins, globally integrated and scored}, 14 | author={Szklarczyk, Damian and Franceschini, Andrea and Kuhn, Michael and Simonovic, Milan and Roth, Alexander and Minguez, Pablo and Doerks, Tobias and Stark, Manuel and Muller, Jean and Bork, Peer and others}, 15 | journal={Nucleic acids research}, 16 | volume={39}, 17 | number={suppl 1}, 18 | pages={D561--D568}, 19 | year={2011}, 20 | publisher={Oxford Univ Press} 21 | } 22 | 23 | @article{Dittrich, 24 | title={Identifying functional modules in protein--protein interaction networks: an integrated exact approach}, 25 | author={Dittrich, Marcus T and Klau, Gunnar W and Rosenwald, Andreas and Dandekar, Thomas and M{\"u}ller, Tobias}, 26 | journal={Bioinformatics}, 27 | volume={24}, 28 | number={13}, 29 | pages={i223--i231}, 30 | year={2008}, 31 | publisher={Oxford Univ Press} 32 | } 33 | 34 | @article{Bechet, 35 | title={Finding undetected protein associations in cell signaling by belief propagation}, 36 | author={Bailly-Bechet, Marc and Borgs, Christian and Braunstein, Alfredo and Chayes, J and Dagkessamanskaia, A and Fran{\c{c}}ois, J-M and Zecchina, Riccardo}, 37 | journal={Proceedings of the National Academy of Sciences}, 38 | volume={108}, 39 | number={2}, 40 | pages={882--887}, 41 | year={2011}, 42 | publisher={National Acad Sciences} 43 | } 44 | 45 | @article{Chen, 46 | title={Enrichr: interactive and collaborative HTML5 gene list enrichment analysis tool}, 47 | author={Chen, Edward Y and Tan, Christopher M and Kou, Yan and Duan, Qiaonan and Wang, Zichen and Meirelles, Gabriela Vaz and Clark, Neil R and Ma’ayan, Avi}, 48 | journal={BMC bioinformatics}, 49 | volume={14}, 50 | number={1}, 51 | pages={1}, 52 | year={2013}, 53 | publisher={BioMed Central} 54 | } 55 | 56 | 57 | @article{Akhmedov2016, 58 | title={A divide and conquer matheuristic algorithm for the Prize-collecting Steiner Tree Problem}, 59 | author={Akhmedov, Murodzhon and Kwee, Ivo and Montemanni Roberto}, 60 | journal={Computers and Operations Research}, 61 | volume={70}, 62 | pages={18--25}, 63 | year={2016}, 64 | } 65 | 66 | @article{Akhmedov, 67 | title={A fast Prize-collecting Steiner Forest algorithm for Functional Analyses in Biological Networks}, 68 | author={Akhmedov, Murodzhon and LeNail, Alex and Bertoni, Francesco and Kwee, Ivo and Fraenkel, Ernest and Montemanni Roberto}, 69 | journal={Lecture Notes in Computer Science}, 70 | pages={263-276}, 71 | year={2017}, 72 | } 73 | 74 | @article{Thomson, 75 | title={A systems view of epithelial--mesenchymal transition signaling states}, 76 | author={Thomson, Stuart and Petti, Filippo and Sujka-Kwok, Izabela and Mercado, Peter and Bean, James and Monaghan, Melissa and Seymour, Sean L and Argast, Gretchen M and Epstein, David M and Haley, John D}, 77 | journal={Clinical \& experimental metastasis}, 78 | volume={28}, 79 | number={2}, 80 | pages={137--155}, 81 | year={2011}, 82 | publisher={Springer} 83 | } 84 | 85 | @article{Tan, 86 | title={CBL is frequently altered in lung cancers: its relationship to mutations in MET and EGFR tyrosine kinases}, 87 | author={Tan, Yi-Hung Carol and Krishnaswamy, Soundararajan and Nandi, Suvobroto and Kanteti, Rajani and Vora, Sapana and Onel, Kenan and Hasina, Rifat and Lo, Fang-Yi and El-Hashani, Essam and Cervantes, Gustavo and others}, 88 | journal={PloS one}, 89 | volume={5}, 90 | number={1}, 91 | pages={e8972}, 92 | year={2010}, 93 | publisher={Public Library of Science} 94 | } 95 | 96 | @article{Alexa, 97 | title={topGO: Enrichment Analysis for Gene Ontology}, 98 | author={Alexa, Adrian, and Jorg Rahnenfuhrer}, 99 | journal={R package version 2.28.0}, 100 | year={2009} 101 | } 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /man/PCSF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PCSF.R 3 | \name{PCSF} 4 | \alias{PCSF} 5 | \title{Prize-collecting Steiner Forest (PCSF)} 6 | \usage{ 7 | PCSF(ppi, terminals, w = 2, b = 1, mu = 5e-04) 8 | } 9 | \arguments{ 10 | \item{ppi}{An interaction network, an \pkg{igraph} object.} 11 | 12 | \item{terminals}{A list of terminal genes with prizes to be analyzed in the PCSF context. 13 | A named \code{numeric} vector, where terminal genes are named same as in the interaction network 14 | and numeric values correspond to the importance of the gene within the study.} 15 | 16 | \item{w}{A \code{numeric} value for tuning the number of trees in the output. A default value is 2.} 17 | 18 | \item{b}{A \code{numeric} value for tuning the node prizes. A default value is 1.} 19 | 20 | \item{mu}{A \code{numeric} value for a hub penalization. A default value is 0.0005.} 21 | } 22 | \value{ 23 | The final subnetwork obtained by the PCSF. 24 | It return an \pkg{igraph} object with the node prize and edge cost attributes. 25 | } 26 | \description{ 27 | \code{PCSF} returns a subnetwork obtained by solving the PCSF on the given interaction network. 28 | } 29 | \details{ 30 | The PCSF is a well-know problem in graph theory. 31 | Given an undirected graph \emph{G = (V, E)}, where the vertices are labeled with prizes 32 | \eqn{p_{v}} and the edges are labeled with costs \eqn{c_{e} > 0}, the goal is to identify 33 | a subnetwork \emph{G' = (V', E')} with a forest structure. The target is to minimize 34 | the total edge costs in \emph{E'}, the total node prizes left out of \emph{V'}, and the 35 | number of trees in \emph{G'}. This is equivalent to minimization of the following 36 | objective function: 37 | 38 | \deqn{F(G')= Minimize \sum_{ e \in E'} c_{e} + \beta*\sum_{v \not\in V'} p_v + \omega*k} 39 | where, \emph{k} is the number of trees in the forest, and it is regulated by parameter \eqn{\omega}. 40 | The parameter \eqn{\beta} is used to tune the prizes of nodes. 41 | 42 | This optimization problem nicely maps onto the problem of finding differentially 43 | enriched subnetworks in the cell protein-protein interaction (PPI) network. 44 | The vertices of interaction network correspond to genes or proteins, and edges 45 | represent the interactions among them. We can assign prizes 46 | to vertices based on measurements of differential expression, copy number, or 47 | mutation, and costs to edges based on confidence scores for those intra-cellular 48 | interactions from experimental observation, yielding a proper input to the PCSF 49 | problem. Vertices that are assigned a prize are referred to \emph{terminal} nodes, 50 | whereas the vertices which are not observed in patient data are not assigned a 51 | prize and are called \emph{Steiner} nodes. After scoring the interactome, the 52 | PCSF is used to detect a relevant subnetwork (forest), which corresponds to a 53 | portion of the interactome, where many genes are highly correlated in terms of 54 | their functions and may regulate the differentially active biological process 55 | of interest. The PCSF aims to identify neighborhoods in interaction networks 56 | potentially belonging to the key dysregulated pathways of a disease. 57 | In order to avoid a bias towards the hub nodes of PPI networks to appear in solution 58 | of PCSF, we penalize the prizes of \emph{Steiner} nodes according to their degree 59 | distribution in PPI, and it is regulated by parameter \eqn{\mu}: 60 | 61 | \deqn{p'_{v} = p_{v} - \mu*degree(v)} 62 | 63 | The parameter \eqn{\mu} also affects the total number of \emph{Steiner} nodes in the solution. 64 | Higher the value of \eqn{\mu} smaller the number of \emph{Steiners} in the subnetwork, 65 | and vice-versa. Based on our previous analysis the recommended range of \eqn{\mu} 66 | for biological networks is between 1e-4 and 5e-2, and users can choose the values 67 | resulting subnetworks with vertex sets that have desirable \emph{Steiner/terminal} 68 | node ratio and average \emph{Steiner/terminal} in-degree ratio 69 | in the template interaction network. 70 | } 71 | \examples{ 72 | \dontrun{ 73 | library("PCSF") 74 | data("STRING") 75 | data("Tgfb_phospho") 76 | terminals <- Tgfb_phospho 77 | ppi <- construct_interactome(STRING) 78 | subnet <- PCSF(ppi, terminals, w = 2, b = 1, mu = 0.0005)} 79 | 80 | } 81 | \references{ 82 | Akhmedov M., LeNail A., Bertoni F., Kwee I., Fraenkel E., and Montemanni R. (2017) 83 | A Fast Prize-Collecting Steiner Forest Algorithm for Functional Analyses in Biological Networks. 84 | \emph{Lecture Notes in Computer Science}, to appear. 85 | } 86 | \seealso{ 87 | \code{\link{PCSF_rand}}, \code{\link{plot.PCSF}} 88 | } 89 | \author{ 90 | Murodzhon Akhmedov 91 | } 92 | -------------------------------------------------------------------------------- /R/plot.PCSFe.R: -------------------------------------------------------------------------------- 1 | #' Plot an interactive subnetwork with functional enrichment analysis 2 | #' 3 | #' \code{plot.PCSFe} plots an interactive figure of the subnetwork 4 | #' to display the functionla enrichment analysis, which is obtained by employing 5 | #' \code{enrichment_analysis} on the subnetwork. 6 | #' 7 | #' @param x An output subnetwork provided by the \code{enrichment_analysis}. 8 | #' It is "PCSFe" object derived from an \pkg{igraph} class, and it has the edge 9 | #' cost and vertex prize attributes. 10 | #' @param edge_width A \code{numeric} value to emphasize a maximum edge width. 11 | #' A default value is 5. This value must be greater than 1. 12 | #' @param node_size A \code{numeric} value to emphasize a maximum node size. 13 | #' A default value is 30. This value must be greater than 10. 14 | #' @param node_label_cex A \code{numeric} value to set a node label size. 15 | #' A default value is 1. 16 | #' @param Steiner_node_legend A \code{string} to set a legend for \code{Steiner} nodes. 17 | #' A default legend is "Steiner". 18 | #' @param Terminal_node_legend A \code{string} to set a legend for \code{terminal} nodes. 19 | #' @param extra_node_colors A \code{list} with colors of extra types of nodes added to the PCSF result, with the names of the list being the node type 20 | #' A default legend is "Terminal". 21 | #' @param ... Ignored. 22 | #' @import igraph visNetwork 23 | #' @method plot PCSFe 24 | #' @export 25 | #' 26 | #' @details 27 | #' 28 | #' An enrichment analysis of the final subnetwork obtained by multiple runs of the PCSF 29 | #' (with random noise added edge costs) is performed by using \code{\link{enrichment_analysis}}. 30 | #' The subnetwork is clustered using an edge betweenness clustering algorithm from the 31 | #' \pkg{igraph} package, and for each cluster functional enrichment is done by employing the 32 | #' ENRICHR API (Chen \emph{et al.}, 2013). An interactive visualization of the final subnetwork 33 | #' is plotted, where the node sizes and edge widths are proportional to the frequency of show 34 | #' ups in total randomised runs. Nodes are colored according to the cluster membership, and 35 | #' the top 15 functional enrichment terms are displayed in tabular format during the hover-over 36 | #' of the node in that cluster. A specific cluster can be displayed separately in the figure 37 | #' by selecting from the icon list at the top left side of the figure. 38 | #' 39 | #' 40 | #' @examples 41 | #' \dontrun{ 42 | #' library("PCSF") 43 | #' data("STRING") 44 | #' data("Tgfb_phospho") 45 | #' terminals <- Tgfb_phospho 46 | #' ppi <- construct_interactome(STRING) 47 | #' subnet <- PCSF_rand(ppi, terminals, n = 10, r = 0.1, w = 2, b = 1, mu = 0.0005) 48 | #' res <- enrichment_analysis(subnet) 49 | #' plot(res$subnet)} 50 | #' 51 | #' @author Murodzhon Akhmedov 52 | #' 53 | #' @references 54 | #' Chen E.Y., Christopher M.T., Yan K., Qiaonan D., Zichen W., Gabriela V.M., Neil R.C., and Avi M. (2013) 55 | #' Enrichr: Interactive and Collaborative Html5 Gene List Enrichment Analysis Tool. \emph{BMC Bioinformatics} 14 (1). 56 | #' BioMed Central: 1. 57 | #' 58 | #' @seealso \code{\link{enrichment_analysis}}, \code{\link{PCSF_rand}}, \code{\link{plot.PCSF}} 59 | 60 | 61 | 62 | plot.PCSFe <-function(x, edge_width = 5, node_size = 30, node_label_cex = 1, 63 | Terminal_node_legend = "Terminal", 64 | Steiner_node_legend = "Steiner", 65 | extra_node_colors = list(), 66 | ...){ 67 | 68 | 69 | subnet = x 70 | # Checking function arguments 71 | if (missing(subnet)) 72 | stop("Need to specify the subnetwork obtained from the PCSF algorithm.") 73 | if (class(subnet)[1] != "PCSFe" || class(subnet)[2] != "igraph") 74 | stop("The subnetwork must be a \"PCSFe\" object derived from an \"igraph\" class.") 75 | if (edge_width < 2) 76 | stop("The edge_width must be greater than 2.") 77 | if (node_size < 10) 78 | stop("The node_size must be greater than 10.") 79 | 80 | # Add 'label' and 'size' attributes 81 | V(subnet)$label.cex = node_label_cex 82 | prize = abs(V(subnet)$prize) 83 | min1 = 10 84 | max1 = node_size 85 | r1 = max1 - min1 86 | min2 = min(prize) 87 | max2 = max(prize) 88 | r2 = max2 - min2 89 | adjusted_prize = r1*(prize - min2)/r2 + min1 90 | V(subnet)$size = adjusted_prize 91 | 92 | # Add edge width' attributes 93 | weight = E(subnet)$weight 94 | min1 = 2 95 | max1 = edge_width 96 | r1 = max1 - min1 97 | min2 = min(weight) 98 | max2 = max(weight) 99 | r2 = max2 - min2 100 | adjusted_weight = r1*(weight - min2)/r2 + min1 101 | E(subnet)$width = adjusted_weight 102 | 103 | 104 | # Associate the type of nodes to shape 105 | shape = V(subnet)$type 106 | shape[which(shape=="Steiner")] = "triangle" 107 | shape[which(shape=="Terminal")] = "circle" 108 | V(subnet)$shape = shape 109 | 110 | # Visualize the subnet 111 | visIgraph(subnet) %>% 112 | visIgraphLayout(layout = "layout_with_fr") %>% 113 | visOptions(highlightNearest = list(enabled = T), selectedBy = "group")%>% 114 | visLegend(addNodes = list( 115 | list(label = Terminal_node_legend, shape = "dot", size = 15, label.cex = 0.3), 116 | list(label = Steiner_node_legend, shape = "triangle",size = 9, label.cex = 0.3)), width = 0.2, 117 | useGroups = FALSE) 118 | } -------------------------------------------------------------------------------- /R/call_enr.R: -------------------------------------------------------------------------------- 1 | #' Internal function \code{call_enr} 2 | #' 3 | #' This function is internally used to perform enrichment analysis employing ENRICHR API. 4 | #' 5 | #' @keywords internal 6 | #' 7 | #' @author Murodzhon Akhmedov 8 | #' 9 | #' @param clusters A subnetwork clustered using edge betweenness algorithm of \pkg{igraph} package. 10 | #' @param mode A binary variable to choose the method for enrichment analysis, where 0 is for EnrichR API and 1 is for \pkg{topGO} package. 11 | #' @param gene_universe A complete list of genes (vector of gene symbols) used as background in enrichment analysis by \pkg{topGO} package. 12 | 13 | call_enr <- function(clusters, mode=0, gene_universe){ 14 | 15 | if( mode == 0){ 16 | 17 | # Enrichment analysis is performed with ENRICHR 18 | ENRICHR_ADDLIST = 'http://amp.pharm.mssm.edu/Enrichr/addList' 19 | ENRICHR_EXPORT = 'http://amp.pharm.mssm.edu/Enrichr/export' 20 | 21 | # The list of databases to be checked in Enrichment Analysis 22 | database = c("GO_Biological_Process_2015","KEGG_2016", "Reactome_2016", "BioCarta_2016") 23 | 24 | # Enrichment results 25 | enrichment_result = as.list(1:length(clusters)) 26 | enrichment_result_complete = as.list(1:length(clusters)) 27 | 28 | # Perform Enrichment Analysis for each cluster in the forest 29 | for( a in 1:length(clusters)){ 30 | 31 | # List of genes to be regusted for enrichment via ENRICHR API 32 | genes = clusters[[a]] 33 | request = list(list = paste(genes, collapse = "\n")) 34 | complete_request = POST(ENRICHR_ADDLIST, body = request) 35 | output =content(complete_request, "text", encoding = "ISO-8859-1") 36 | userListID = strsplit(strsplit(output, "\n")[[1]][3], ": ")[[1]][2] 37 | 38 | response_collection=NULL 39 | 40 | # Request enrichment for each database and comnine them all 41 | for( b in 1:length(database)){ 42 | 43 | # Gather an EXPORT URL and the Response 44 | url = paste0(ENRICHR_EXPORT, "?userListId=",userListID, "&backgroundType=", database[b]) 45 | response = GET(url) 46 | response = content(response, "text", encoding = "ISO-8859-1") 47 | response = strsplit(response, "\n")[[1]] 48 | # if(length(unlist(response ))==1) 49 | # next 50 | response = lapply(response, function(x){sp = strsplit(x, "\t")[[1]]; return (sp)}) 51 | 52 | # If the response contains some elements then combine it 53 | if(length(response)>1){ 54 | x = length(response)-1 55 | m_resp = as.data.frame(matrix(0, nrow = x, ncol = length(response[[1]]))) 56 | colnames(m_resp) = response[[1]] 57 | for(i in 1:x){ 58 | m_resp[i,] = response[[i+1]] 59 | } 60 | response_collection = rbind(response_collection,m_resp) 61 | } 62 | } 63 | 64 | 65 | if(is.null(response_collection)) 66 | next 67 | # Reorder the enrichment according to the "Adjusted P-value" and select the top 15 enrichments 68 | ordered_resp = data.frame(response_collection$`Term`, response_collection$`Adjusted P-value`, response_collection$`Combined Score`) 69 | ordered_resp = ordered_resp[order(ordered_resp[,2]),][1:15,] 70 | ordered_resp[,2] = signif(as.numeric(as.character(ordered_resp[,2])), 3) 71 | ordered_resp[,3] = signif(as.numeric(as.character(ordered_resp[,3])), 3) 72 | 73 | # Convert the enrichment table into HTML format in order to display it 74 | enrich = " 79 | "; 80 | for(i in 1:nrow(ordered_resp)){ 81 | enrich = paste0(enrich, " ") 82 | for(j in 1:ncol(ordered_resp)){ 83 | enrich = paste0(enrich, "") 84 | } 85 | enrich = paste0(enrich, " ") 86 | } 87 | enrich = paste0(enrich, "
Term Adjusted P-value Combined Score
",ordered_resp[i,j], "
") 88 | 89 | # Attach the Enrichment Analysis for the current cluster 90 | enrichment_result[[a]] = enrich 91 | enrichment_result_complete[[a]] = response_collection 92 | } 93 | 94 | } else{ 95 | 96 | # Enrichment analysis is performed by topGO 97 | 98 | # Enrichment results 99 | enrichment_result = as.list(1:length(clusters)) 100 | enrichment_result_complete = as.list(1:length(clusters)) 101 | 102 | # Perform Enrichment Analysis for each cluster in the forest 103 | for( a in 1:length(clusters)){ 104 | 105 | # List of genes to be regusted for enrichment by topGO 106 | genes = clusters[[a]] 107 | fg <- factor(as.integer(gene_universe %in% genes)) 108 | names(fg) <- gene_universe 109 | tgData <- new("topGOdata", description = "simple_session", ontology = "BP", 110 | allGenes=fg, nodeSize=15, annot=annFUN.org, mapping = "org.Hs.eg.db", ID = "symbol") 111 | 112 | resultFisher <- runTest(tgData, algorithm = "classic", statistic = "fisher") 113 | resultKS <- runTest(tgData, algorithm = "classic", statistic = "ks") 114 | 115 | res_table_top15 <- GenTable(tgData, classicFisher = resultFisher, 116 | classicKS = resultKS, 117 | orderBy = "classicFisher", ranksOf = "classicFisher", topNodes = 15) 118 | res_table_top1000 <- GenTable(tgData, classicFisher = resultFisher, 119 | classicKS = resultKS, 120 | orderBy = "classicFisher", ranksOf = "classicFisher", topNodes = 1000) 121 | 122 | 123 | # Reorder the enrichment according to the "Adjusted P-value" and select the top 15 enrichments 124 | #ordered_resp[,2] = signif(as.numeric(as.character(ordered_resp[,2])), 3) 125 | #ordered_resp[,3] = signif(as.numeric(as.character(ordered_resp[,3])), 3) 126 | 127 | # Convert the enrichment table into HTML format in order to display it 128 | enrich = " 133 | "; 134 | for(i in 1:nrow(res_table_top15)){ 135 | enrich = paste0(enrich, " ") 136 | for(j in 1:ncol(res_table_top15)){ 137 | enrich = paste0(enrich, "") 138 | } 139 | enrich = paste0(enrich, " ") 140 | } 141 | enrich = paste0(enrich, "
GO.ID Term Annotated Significant Expected classicFisher classicKS
",res_table_top15[i,j], "
") 142 | 143 | # Attach the Enrichment Analysis for the current cluster 144 | enrichment_result[[a]] = enrich 145 | enrichment_result_complete[[a]] = res_table_top1000 146 | } 147 | 148 | } 149 | 150 | 151 | return (list(enrichment_result, enrichment_result_complete)) 152 | 153 | } 154 | -------------------------------------------------------------------------------- /R/enrichment_analysis.R: -------------------------------------------------------------------------------- 1 | #' Perform enrichment analysis on the subnetwork 2 | #' 3 | #' \code{enrichment_analysis} performs functional enrichment analysis on the subnetwork 4 | #' obtained by the \code{\link{PCSF_rand}}, and returns an annotated subnetwork with top 15 5 | #' functional enrichments and a list of tables with a complete enrichment analysis for 6 | #' each cluster. 7 | #' 8 | #' @param subnet A subnetwork provided by \code{\link{PCSF_rand}}, which is obtained by merging 9 | #' a multiple outputs of the PCSF with random noise added edge costs. An \pkg{igraph} object 10 | #' with edge cost and vertex prize attributes representing the total number of 11 | #' show ups throughout all runs. 12 | #' @param mode A binary variable to choose the method for enrichment analysis, where 0 is for EnrichR API and 1 is for \pkg{topGO} package. 13 | #' @param gene_universe A complete list of genes (vector of gene symbols) used as background in enrichment analysis by \pkg{topGO} package. 14 | #' 15 | #' @return A list composed of an interactive subnetwork and a table with enrichment 16 | #' analysis results. An interactive subnetwork annotated with enrichment analysis 17 | #' can be reached by $subnet. A full list of enrichment analysis for each cluster 18 | #' can be reached by $enrichment. 19 | #' 20 | #' @export 21 | #' 22 | #' @details 23 | #' An enrichment analysis of the final subnetwork obtained by multiple runs of the PCSF 24 | #' (with rando noise added edge costs) is performed for functional interpretation. 25 | #' The subnetwork is clustered using an edge betweenness clustering algorithm from 26 | #' the \pkg{igraph} package, and for each cluster functional enrichment is done by 27 | #' employing either EnrichR API (Chen \emph{et al.}, 2013) or 28 | #' \pkg{topGO} (Alexa and Rahnenfuhrer, 2009) 29 | #' package that is specified by the user. Important to note that EnrichR API requires 30 | #' a working Internet connection to perform the enrichment. If the user does not 31 | #' specify which tool to use for enrichment analysis, the package employs EnrichR 32 | #' as a default if there is Internet connection, otherwise it uses \pkg{topGO}. 33 | #' 34 | #' An interactive visualization of 35 | #' the final subnetwork is plotted, where the node sizes and edge widths are proportional 36 | #' to the frequency of show ups throughout total runs. Nodes are colored according to the 37 | #' cluster membership, and the top 15 functional enrichment terms are displayed in tabular 38 | #' format during the hover-over of the node in that cluster. 39 | #' 40 | #' 41 | #' @examples 42 | #' \dontrun{ 43 | #' library("PCSF") 44 | #' data("STRING") 45 | #' data("Tgfb_phospho") 46 | #' terminals <- Tgfb_phospho 47 | #' ppi <- construct_interactome(STRING) 48 | #' subnet <- PCSF_rand(ppi, terminals, n = 10, r = 0.1, w = 2, b = 1, mu = 0.0005) 49 | #' res <- enrichment_analysis(subnet) 50 | #' res <- enrichment_analysis(subnet, mode=0)} 51 | #' \dontrun{ 52 | #' library(topGO) 53 | #' gene_universe <- V(ppi)$name 54 | #' res <- enrichment_analysis(subnet, mode=1, gene_universe)} 55 | #' \dontrun{ 56 | #' plot(res$subnet) 57 | #' write.table(res$enrichment[[1]],file="cluster1_complete_enrichment.txt", 58 | #' append = FALSE, quote = FALSE, sep ="\t", row.names=FALSE)} 59 | #' 60 | #' @author Murodzhon Akhmedov 61 | #' 62 | #' @seealso \code{\link{PCSF_rand}}, \code{\link{plot.PCSFe}} 63 | #' 64 | #' 65 | #' 66 | #' @references 67 | #' Chen E.Y., Christopher M.T., Yan K., Qiaonan D., Zichen W., Gabriela V.M., Neil R.C., 68 | #' and Avi M. (2013) Enrichr: Interactive and Collaborative Html5 Gene List Enrichment 69 | #' Analysis Tool. \emph{BMC Bioinformatics} 14 (1). BioMed Central: 1. 70 | #' 71 | #' Alexa A. and Rahnenfuhrer J. (2009). topGO: Enrichment Analysis for Gene Ontology. 72 | #' R package version 2.28.0. 73 | 74 | enrichment_analysis <-function(subnet, mode=NULL, gene_universe){ 75 | 76 | # Checking function arguments 77 | if (missing(subnet)) 78 | stop("Need to specify the subnetwork obtained from the PCSF algorithm.") 79 | if (class(subnet)[1] != "PCSF" || class(subnet)[2] != "igraph") 80 | stop("The subnetwork must be a \"PCSF\" object derived from an \"igraph\" class.") 81 | if (!is.null(mode)){ 82 | if(mode==1 && missing(gene_universe)) 83 | stop("Need to specify a list of genes (vector of gene symbols) used as background in enrichment analysis by topGO package") 84 | } 85 | 86 | 87 | cat(" Performing enrichment analysis...\n\n") 88 | 89 | # Obtain clusters in the subnet using edge betweenness clustering algorithm from igraph package. 90 | clusters = cluster_edge_betweenness(subnet) 91 | 92 | # Perform ebrichment analysis for each cluster using EnrichR through its API or topGO. 93 | 94 | havingInternet <- function() { 95 | if (.Platform$OS.type == "windows") { 96 | ipmessage <- system("ipconfig", intern = TRUE) 97 | } else { 98 | ipmessage <- system("ifconfig", intern = TRUE) 99 | } 100 | validIP <- "((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)[.]){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)" 101 | any(grep(validIP, ipmessage)) 102 | } 103 | 104 | internet_connection <- havingInternet() 105 | 106 | if(!is.null(mode)){ 107 | if(mode==0){ 108 | if(internet_connection){ 109 | cat(" Enrichment is being performed by EnrichR (http://amp.pharm.mssm.edu/Enrichr) API ...\n") 110 | enrich = call_enr(clusters, mode = 0, gene_universe) 111 | } 112 | else{ 113 | stop("There is no working Internet connection, perform your enrichment with topGO package with mode=1 by providing background gene list ...\n") 114 | } 115 | } 116 | else{ 117 | cat(" Enrichment is being performed by topGO package ...\n") 118 | enrich = call_enr(clusters, mode = mode, gene_universe) 119 | } 120 | } 121 | else 122 | { 123 | if(internet_connection){ 124 | cat(" Enrichment is being performed by EnrichR (http://amp.pharm.mssm.edu/Enrichr) API ...\n") 125 | enrich = call_enr(clusters, mode = 0, gene_universe) 126 | } 127 | else{ 128 | stop("There is no working Internet connection, perform your enrichment with topGO package with mode=1 by providing background gene list ...\n") 129 | } 130 | } 131 | 132 | if('Compound'%in% V(subnet)$type){##then we have drugs! 133 | require(dplyr) 134 | comps=data.frame(Drug=V(subnet)$name[which(V(subnet)$type=='Compound')], 135 | Cluster=clusters$membership[which(V(subnet)$type=='Compound')])%>% 136 | dplyr::group_by(Cluster)%>% 137 | dplyr::summarise(DrugsByBetweenness=paste(Drug,collapse=';')) 138 | 139 | } 140 | else{ 141 | comps <-NULL 142 | } 143 | enrichment = enrich[[1]] 144 | enrichment_complete = enrich[[2]] 145 | 146 | novals<-which(unlist(sapply(enrich[[2]],function(x) is.null(dim(x))))) 147 | if(length(novals)>0) 148 | enrichment_complete <- enrichment_complete[-novals] 149 | enrichment_tab = do.call(rbind,lapply(c(1:length(enrichment_complete)),function(x) data.frame(Cluster=x,enrichment_complete[[x]]))) 150 | more.than.two=which(sapply(enrichment_tab$Genes,function(x) length(unlist(strsplit(x,split=';')))>2)) 151 | if(length(more.than.two)>0) 152 | enrichment_tab=enrichment_tab[more.than.two,] 153 | if(!is.null(comps)) 154 | enrichment_tab = enrichment_tab%>%dplyr::left_join(comps,by='Cluster') 155 | 156 | # Add 'group" and 'title' attributes to subnet 157 | V(subnet)$group = clusters$membership 158 | V(subnet)$title = paste0("Cluster ",clusters$membership,": Enrichment analysis") 159 | for( i in 1:length(V(subnet))){ 160 | V(subnet)$title[i] = paste0( V(subnet)$title[i], enrichment[[V(subnet)$group[i]]]) 161 | } 162 | 163 | # Derive a "PCSFe" object from an "igraph" class. 164 | class(subnet) <- c("PCSFe", "igraph") 165 | # Combine the subnetwork and colplete enrichment analysis tables. 166 | output = list(subnet, enrichment_tab) 167 | names(output) = c("subnet", "enrichment") 168 | 169 | return (output) 170 | } 171 | -------------------------------------------------------------------------------- /R/PCSF_rand.R: -------------------------------------------------------------------------------- 1 | #' Prize-collecting Steiner Forest (PCSF) with randomized edge costs 2 | #' 3 | #' \code{PCSF_rand} returns a union of subnetworks obtained by solving the PCSF on the 4 | #' given interaction network by adding a random noise to edge costs each time. 5 | #' @param ppi An interaction network as an \pkg{igraph} object. 6 | #' @param terminals A list of terminal genes with prizes to be analyzed in the PCSF context. 7 | #' A named \code{numeric} vector, where terminal genes are named same as in the interaction network 8 | #' and numeric values correspond to the importance of the gene within the study. 9 | #' @param n An \code{integer} value to determine the number of runs with random noise added edge costs. 10 | #' A default value is 10. 11 | #' @param r A \code{numeric} value to determine additional random noise to edge costs. 12 | #' A random noise upto r percent of the edge cost is added to each edge. A default value is 0.1 13 | #' @param w A \code{numeric} value for tuning the number of trees in the output. A default value is 2. 14 | #' @param b A \code{numeric} value for tuning the node prizes. A default value is 1. 15 | #' @param mu A \code{numeric} value for a hub penalization. A default value is 0.0005. 16 | #' @param dummies A list of nodes that are to connected to the root of the tree. If missing the root will be connected to all terminals. 17 | #' @return The final subnetwork obtained by taking the union of the PCSF outputs generated by 18 | #' adding a random noise to edge costs each time. It returns an \pkg{igraph} object with the node prize 19 | #' and edge cost attributes representing the total number of show ups throughout all runs. 20 | #' @import igraph 21 | #' @export 22 | #' 23 | #' 24 | #' @details 25 | #' 26 | #' In order to increase the robustness of the resulting structure, 27 | #' it is recommended to solve the PCSF several times on the same network 28 | #' while adding some noise to the edge costs each time, and combine all results 29 | #' in a final subnetwork. The union of all outputs may explain 30 | #' the underlying biology better. 31 | #' 32 | #' @examples 33 | #' \dontrun{ 34 | #' library("PCSF") 35 | #' data("STRING") 36 | #' data("Tgfb_phospho") 37 | #' terminals <- Tgfb_phospho 38 | #' ppi <- construct_interactome(STRING) 39 | #' subnet <- PCSF_rand(ppi, terminals, n = 10, r =0.1, w = 2, b = 2, mu = 0.0005)} 40 | #' 41 | #' @author Murodzhon Akhmedov 42 | #' 43 | #' @seealso \code{\link{PCSF}}, \code{\link{plot.PCSFe}} 44 | #' 45 | #' @references 46 | #' Akhmedov M., LeNail A., Bertoni F., Kwee I., Fraenkel E., and Montemanni R. (2017) 47 | #' A Fast Prize-Collecting Steiner Forest Algorithm for Functional Analyses in Biological Networks. 48 | #' \emph{Lecture Notes in Computer Science}, to appear. 49 | 50 | 51 | PCSF_rand <- 52 | function(ppi, terminals, n = 10, r = 0.1, w = 2, b = 1, mu = 0.0005,dummies){ 53 | 54 | # Checking function arguments 55 | if (missing(ppi)) 56 | stop("Need to specify an interaction network \"ppi\".") 57 | if (class(ppi) != "igraph") 58 | stop("The interaction network \"ppi\" must be an igraph object.") 59 | if (missing(terminals)) 60 | stop(" Need to provide terminal nodes as a named numeric vector, 61 | where node names must be same as in the interaction network.") 62 | if(is.null(names(terminals))) 63 | stop(" The terminal nodes must be provided as a named numeric vector, 64 | where node names must be same as in the interaction network.") 65 | 66 | 67 | # Gather the terminal genes to be analyzed, and their scores 68 | terminal_names = names(terminals) 69 | terminal_values = as.numeric(terminals) 70 | 71 | # Incorporate the node prizes 72 | node_names = V(ppi)$name 73 | node_prz = vector(mode = "numeric", length = length(node_names)) 74 | index = match(terminal_names, node_names) 75 | percent = signif((length(index) - sum(is.na(index)))/length(index)*100, 4) 76 | if (percent < 5) 77 | stop(" Less than 1% of your terminal nodes are matched in the interactome, check your terminals!") 78 | cat(paste0(" ", percent, "% of your terminal nodes are included in the interactome\n")) 79 | terminal_names = terminal_names[!is.na(index)] 80 | terminal_values = terminal_values[!is.na(index)] 81 | index = index[!is.na(index)] 82 | node_prz[index] = terminal_values 83 | 84 | ## Prepare input file for MST-PCSF implementation in C++ 85 | if(missing(dummies)||is.null(dummies)||is.na(dummies)) 86 | dummies = terminal_names #re-assign this to allow for input 87 | 88 | cat(" Solving the PCSF by adding random noise to the edge costs...\n") 89 | 90 | # Calculate the hub penalization scores 91 | node_degrees = igraph::degree(ppi) 92 | hub_penalization = - mu*node_degrees 93 | 94 | 95 | # Update the node prizes 96 | node_prizes = b*node_prz 97 | index = which(node_prizes==0) 98 | node_prizes[index] = hub_penalization[index] 99 | 100 | # Construct the list of edges 101 | edges = ends(ppi,es = E(ppi)) 102 | from = c(rep("DUMMY", length(dummies)), edges[,1]) 103 | to = c(dummies, edges[,2]) 104 | 105 | all_nodes=NULL 106 | 107 | # Run the MST-PCSF algorithm for n times with random noise added to edge costs at each time 108 | for(i in 1:n){ 109 | 110 | # Randomize the edge costs 111 | cost = c(rep(w, length(dummies)), E(ppi)$weight + E(ppi)$weight*stats::runif(length(E(ppi)), 0, r)) 112 | 113 | # Feed in the input files into MSt-PCSF algorithm 114 | output = call_sr(from,to,cost,node_names,node_prizes) 115 | 116 | # Construct an igraph object for current parameter set, and save it 117 | edge = data.frame(as.character(output[[1]]),as.character(output[[2]])) 118 | colnames(edge)= c("source", "target") 119 | edge = edge[which(edge[,1]!="DUMMY"), ] 120 | edge = edge[which(edge[,2]!="DUMMY"), ] 121 | graph = graph.data.frame(edge,directed=F) 122 | assign(paste0("graph_",i), get("graph")) 123 | all_nodes = c(all_nodes, V(graph)$name) 124 | } 125 | 126 | # Calculate graph statistics 127 | node_frequency = table(all_nodes) 128 | node_names = names(node_frequency) 129 | node_prizes = as.numeric(node_frequency) 130 | 131 | # Combine the graphs in order to get unionized graph 132 | adj_matrix = matrix(0, length(node_names), length(node_names)) 133 | colnames(adj_matrix) = node_names 134 | rownames(adj_matrix) = node_names 135 | for(i in 1:n){ 136 | assign("graph", get(paste0("graph_",i))) 137 | edges = ends(graph,es = E(graph)) 138 | x = match(edges[,1],node_names) 139 | y = match(edges[,2],node_names) 140 | if(length(x)>0 & length(y)>0){ 141 | for( j in 1:length(x)){ 142 | if(x[j]>=y[j]){ 143 | k = x[j] 144 | l = y[j] 145 | }else{ 146 | k = y[j] 147 | l = x[j] 148 | } 149 | adj_matrix[k,l] = adj_matrix[k,l]+1 150 | } 151 | } 152 | } 153 | 154 | # Check the size of output subnetwork and print a warning if it is 0 155 | if(sum(adj_matrix) != 0){ 156 | 157 | # Construct the igraph object from the union graph 158 | subnet = graph_from_adjacency_matrix(adj_matrix, weighted=TRUE, mode="undirected") 159 | index = match(V(subnet)$name, node_names) 160 | V(subnet)$prize = node_prizes[index] 161 | 162 | # # Associate the type of nodes to shape 163 | # V(subnet)$shape = "triangle" 164 | # index = match(terminal_names, V(subnet)$name) 165 | # index = index[!is.na(index)] 166 | # V(subnet)$shape[index] = "circle" 167 | 168 | # Associate the type of nodes 169 | V(subnet)$type = "Steiner" 170 | index = match(terminal_names, V(subnet)$name) 171 | index = index[!is.na(index)] 172 | V(subnet)$type[index] = "Terminal" 173 | 174 | 175 | class(subnet) <- c("PCSF", "igraph") 176 | 177 | return (subnet) 178 | 179 | } else { 180 | 181 | stop(" Subnetwork can not be identified for a given parameter set. 182 | Provide a compatible b or mu value with your terminal prize list...\n\n") 183 | } 184 | 185 | 186 | } 187 | -------------------------------------------------------------------------------- /R/plot.PCSF.R: -------------------------------------------------------------------------------- 1 | #' Plot an interactive subnetwork 2 | #' 3 | #' \code{plot.PCSF} plots an interactive figure of the subnetwork obrained by 4 | #' the PCSF method. 5 | #' 6 | #' @param x A subnetwork obtained by the PCSF method. It is a "PCSF" object derived 7 | #' from \pkg{igraph} class and it has the edge cost and vertex prize attributes. 8 | #' @param style A \code{boolean} value to determine the visualization style of the network, 9 | #' where \code{0} plots the \code{static} network and \code{1} plots the \code{dynamic} 10 | #' network. The default valu is 0. 11 | #' @param edge_width A \code{numeric} value to emphasize a maximum edge width. A default value is 5. 12 | #' This value must be greater than 1. 13 | #' @param node_size A \code{numeric} value to emphasize a maximum node size. A default value is 40. 14 | #' This value must be greater than 10. 15 | #' @param node_label_cex A \code{numeric} value to set a node label size. A default value is 30. 16 | #' @param Steiner_node_color A \code{string} to set a color for \code{Steiner} nodes. 17 | #' A default value is "lightblue". 18 | #' @param Terminal_node_color A \code{string} to set a color for \code{terminal} nodes. 19 | #' @param Steiner_node_legend A \code{string} to set a legend for \code{Steiner} nodes. 20 | #' A default legend is "Steiner". 21 | #' @param Terminal_node_legend A \code{string} to set a legend for \code{terminal} nodes. 22 | #' A default legend is "Terminal". 23 | #' @param extra_node_colors A \code{list} with colors of extra types of nodes added to the PCSF result, with the names of the list being the node type 24 | #' @param ... Ignored. 25 | #' @import igraph visNetwork 26 | #' @method plot PCSF 27 | #' @export 28 | #' 29 | #' 30 | #' @details 31 | #' This function plots an interactive subnetwork obtained by the \code{\link{PCSF}} and \code{\link{PCSF_rand}}. 32 | #' The node sizes and edge widths are respectively proportional to the node prizes and edge costs 33 | #' while plotting the subnetwork from \code{\link{PCSF}}. In contrast, the node sizes and edge widths are 34 | #' proportional to the total number of abondance in randomized runs while plotting the subnetwork 35 | #' from \code{\link{PCSF_rand}}. The node names are displayed during the hover-over. 36 | #' 37 | #' @examples 38 | #' \dontrun{ 39 | #' library("PCSF") 40 | #' data("STRING") 41 | #' data("Tgfb_phospho") 42 | #' terminals <- Tgfb_phospho 43 | #' ppi <- construct_interactome(STRING) 44 | #' subnet <- PCSF(ppi, terminals, w = 2, b = 1, mu = 0.0005) 45 | #' plot(subnet)} 46 | #' 47 | #' @author Murodzhon Akhmedov 48 | #' 49 | #' @seealso \code{\link{PCSF}}, \code{\link{plot.PCSFe}} 50 | 51 | plot.PCSF <- 52 | function(x, style = 0, edge_width=5, node_size=40, node_label_cex = 30, Steiner_node_color = "lightblue", 53 | Terminal_node_color = "lightgreen", Terminal_node_legend = "Terminal", 54 | Steiner_node_legend = "Steiner",extra_node_colors = list(),...){ 55 | 56 | subnet = x 57 | # Checking function arguments 58 | if (missing(subnet)) 59 | stop("Need to specify the subnetwork obtained from the PCSF algorithm.") 60 | if (class(subnet)[1] != "PCSF" || class(subnet)[2] != "igraph") 61 | stop("The subnetwork must be a \"PCSF\" object derived from an \"igraph\" class.") 62 | if (edge_width < 1) 63 | stop("The edge_width must be greater than 1.") 64 | if (node_size < 10) 65 | stop("The node_size must be greater than 10.") 66 | 67 | # Calculate the adjusted node prizes 68 | prize = abs(V(subnet)$prize) 69 | min1 = 10 70 | max1 = node_size 71 | r1 = max1 - min1 72 | min2 = min(prize) 73 | max2 = max(prize) 74 | r2 = max2 - min2 75 | adjusted_prize = r1*(prize - min2)/r2 + min1 76 | 77 | # Calculate the adjusted edge weights 78 | weight = E(subnet)$weight 79 | min1 = 1 80 | max1 = edge_width 81 | r1 = max1 - min1 82 | min2 = min(weight) 83 | max2 = max(weight) 84 | r2 = max2 - min2 85 | adjusted_weight = r1*(weight - min2)/r2 + min1 86 | 87 | 88 | if(style){ 89 | 90 | # List of nodes in the subnet 91 | nodes = data.frame(1:length(V(subnet)), V(subnet)$name) 92 | names(nodes) = c("id", "name") 93 | 94 | # Differentiate the type of nodes 95 | nodes$group = V(subnet)$type 96 | 97 | # Attach the node attributes 98 | nodes$size = adjusted_prize 99 | nodes$title = nodes$name 100 | nodes$label = nodes$name 101 | nodes$label.cex = node_label_cex 102 | nodes$font.size = node_label_cex 103 | 104 | # List of edges in the subnet 105 | edges = data.frame(ends(subnet,es = E(subnet)), adjusted_weight) 106 | names(edges) = c("from", "to", "width") 107 | edges$from = match(edges$from, nodes$name) 108 | edges$to = match(edges$to, nodes$name) 109 | 110 | # Visualize the subnet 111 | visNet <-visNetwork(nodes,edges) %>% 112 | visNodes( shadow = list(enabled = TRUE, size = 12)) %>% 113 | visGroups(groupname = "Steiner", color = list(background = Steiner_node_color, border = "blue"), shape = "triangle") %>% 114 | visGroups(groupname = "Terminal", color = list(background = Terminal_node_color, border = "green"), shape = "dot") 115 | 116 | #broke this out to accomodate extra node names 117 | leg.groups<- list( 118 | list(label = Terminal_node_legend, shape = "dot", size = 15, color = list(background = Terminal_node_color, border = "green"), label.cex = 0.8), 119 | list(label = Steiner_node_legend, shape = "triangle",size = 10, color = list(background = Steiner_node_color, border = "blue"), label.cex = 0.8 )) 120 | 121 | if(length(extra_node_colors)>0){ 122 | for(i in 1:length(extra_node_colors)){ 123 | en=names(extra_node_colors)[i] 124 | visNet <- visNet %>% visGroups(groupname=en,color=list(background = extra_node_colors[[en]],border='grey'),shape='triangle') 125 | leg.groups[[i+2]]<-list(label=en,shape='triangle',size=13,color=list(background=extra_node_colors[[en]],border='grey'),label.cex=0.8) 126 | } 127 | 128 | } 129 | 130 | visNet<-visNet %>% visOptions(highlightNearest = list(enabled = T)) %>% 131 | visLegend(addNodes =leg.groups, width = 0.15, 132 | useGroups = FALSE) 133 | 134 | 135 | } else{ 136 | 137 | # Attach the node type attribute 138 | V(subnet)$group = V(subnet)$type 139 | 140 | # Attach the node attributes: size, title, label, label.cex, font size 141 | V(subnet)$size = adjusted_prize 142 | V(subnet)$title = V(subnet)$name 143 | V(subnet)$label = V(subnet)$name 144 | V(subnet)$label.cex = node_label_cex/30 145 | V(subnet)$font.size = node_label_cex/30 146 | 147 | # Attach the edge width attribute 148 | E(subnet)$width = adjusted_weight 149 | 150 | 151 | # Visualize the subnet 152 | visNet <- visIgraph(subnet) %>% 153 | visIgraphLayout(layout = "layout_with_fr") %>% 154 | visNodes( shadow = list(enabled = TRUE, size = 12)) %>% 155 | visGroups(groupname = "Steiner", color = list(background = Steiner_node_color, border = "blue"), shape = "triangle") %>% 156 | visGroups(groupname = "Terminal", color = list(background = Terminal_node_color, border = "green"), shape = "dot") 157 | 158 | leg.groups<- list( 159 | list(label = Terminal_node_legend, shape = "dot", size = 15, color = list(background = Terminal_node_color, border = "green"), label.cex = 0.8), 160 | list(label = Steiner_node_legend, shape = "triangle",size = 10, color = list(background = Steiner_node_color, border = "blue"), label.cex = 0.8 )) 161 | 162 | if(length(extra_node_colors)>0){ 163 | for(i in 1:length(extra_node_colors)){ 164 | en=names(extra_node_colors)[i] 165 | visNet <- visNet %>% visGroups(groupname=en,color=list(background = extra_node_colors[[en]],border='grey'),shape='triangle') 166 | leg.groups[[i+2]]<-list(label=en,shape='triangle',size=13,color=list(background=extra_node_colors[[en]],border='grey'),label.cex=0.8) 167 | } 168 | } 169 | 170 | visNet <- visNet %>% visOptions(highlightNearest = list(enabled = T)) %>% 171 | visLegend(addNodes =leg.groups, width = 0.10, 172 | useGroups = FALSE) 173 | 174 | } 175 | visNet 176 | 177 | } 178 | -------------------------------------------------------------------------------- /R/PCSF.R: -------------------------------------------------------------------------------- 1 | #' Prize-collecting Steiner Forest (PCSF) 2 | #' 3 | #' \code{PCSF} returns a subnetwork obtained by solving the PCSF on the given interaction network. 4 | #' 5 | #' @param ppi An interaction network, an \pkg{igraph} object. 6 | #' @param terminals A list of terminal genes with prizes to be analyzed in the PCSF context. 7 | #' A named \code{numeric} vector, where terminal genes are named same as in the interaction network 8 | #' and numeric values correspond to the importance of the gene within the study. 9 | #' @param w A \code{numeric} value for tuning the number of trees in the output. A default value is 2. 10 | #' @param b A \code{numeric} value for tuning the node prizes. A default value is 1. 11 | #' @param mu A \code{numeric} value for a hub penalization. A default value is 0.0005. 12 | #' @param dummies A list of nodes that are to connected to the root of the tree. If missing the root will be connected to all terminals. 13 | #' @return The final subnetwork obtained by the PCSF. 14 | #' It return an \pkg{igraph} object with the node prize and edge cost attributes. 15 | #' @import igraph 16 | #' @export 17 | #' 18 | #' @details 19 | #' 20 | #' The PCSF is a well-know problem in graph theory. 21 | #' Given an undirected graph \emph{G = (V, E)}, where the vertices are labeled with prizes 22 | #' \eqn{p_{v}} and the edges are labeled with costs \eqn{c_{e} > 0}, the goal is to identify 23 | #' a subnetwork \emph{G' = (V', E')} with a forest structure. The target is to minimize 24 | #' the total edge costs in \emph{E'}, the total node prizes left out of \emph{V'}, and the 25 | #' number of trees in \emph{G'}. This is equivalent to minimization of the following 26 | #' objective function: 27 | #' 28 | #' \deqn{F(G')= Minimize \sum_{ e \in E'} c_{e} + \beta*\sum_{v \not\in V'} p_v + \omega*k} 29 | #' where, \emph{k} is the number of trees in the forest, and it is regulated by parameter \eqn{\omega}. 30 | #' The parameter \eqn{\beta} is used to tune the prizes of nodes. 31 | #' 32 | #' This optimization problem nicely maps onto the problem of finding differentially 33 | #' enriched subnetworks in the cell protein-protein interaction (PPI) network. 34 | #' The vertices of interaction network correspond to genes or proteins, and edges 35 | #' represent the interactions among them. We can assign prizes 36 | #' to vertices based on measurements of differential expression, copy number, or 37 | #' mutation, and costs to edges based on confidence scores for those intra-cellular 38 | #' interactions from experimental observation, yielding a proper input to the PCSF 39 | #' problem. Vertices that are assigned a prize are referred to \emph{terminal} nodes, 40 | #' whereas the vertices which are not observed in patient data are not assigned a 41 | #' prize and are called \emph{Steiner} nodes. After scoring the interactome, the 42 | #' PCSF is used to detect a relevant subnetwork (forest), which corresponds to a 43 | #' portion of the interactome, where many genes are highly correlated in terms of 44 | #' their functions and may regulate the differentially active biological process 45 | #' of interest. The PCSF aims to identify neighborhoods in interaction networks 46 | #' potentially belonging to the key dysregulated pathways of a disease. 47 | 48 | #' In order to avoid a bias towards the hub nodes of PPI networks to appear in solution 49 | #' of PCSF, we penalize the prizes of \emph{Steiner} nodes according to their degree 50 | #' distribution in PPI, and it is regulated by parameter \eqn{\mu}: 51 | #' 52 | #' \deqn{p'_{v} = p_{v} - \mu*degree(v)} 53 | #' 54 | #' The parameter \eqn{\mu} also affects the total number of \emph{Steiner} nodes in the solution. 55 | #' Higher the value of \eqn{\mu} smaller the number of \emph{Steiners} in the subnetwork, 56 | #' and vice-versa. Based on our previous analysis the recommended range of \eqn{\mu} 57 | #' for biological networks is between 1e-4 and 5e-2, and users can choose the values 58 | #' resulting subnetworks with vertex sets that have desirable \emph{Steiner/terminal} 59 | #' node ratio and average \emph{Steiner/terminal} in-degree ratio 60 | #' in the template interaction network. 61 | #' 62 | #' @examples 63 | #' \dontrun{ 64 | #' library("PCSF") 65 | #' data("STRING") 66 | #' data("Tgfb_phospho") 67 | #' terminals <- Tgfb_phospho 68 | #' ppi <- construct_interactome(STRING) 69 | #' subnet <- PCSF(ppi, terminals, w = 2, b = 1, mu = 0.0005)} 70 | #' 71 | #' @author Murodzhon Akhmedov 72 | #' 73 | #' @seealso \code{\link{PCSF_rand}}, \code{\link{plot.PCSF}} 74 | #' 75 | #' @references 76 | #' Akhmedov M., LeNail A., Bertoni F., Kwee I., Fraenkel E., and Montemanni R. (2017) 77 | #' A Fast Prize-Collecting Steiner Forest Algorithm for Functional Analyses in Biological Networks. 78 | #' \emph{Lecture Notes in Computer Science}, to appear. 79 | 80 | 81 | PCSF <- 82 | function(ppi, terminals, w = 2, b = 1, mu = 0.0005, dummies){ 83 | 84 | # Checking function arguments 85 | if (missing(ppi)) 86 | stop("Need to specify an interaction network \"ppi\".") 87 | if (class(ppi) != "igraph") 88 | stop("The interaction network \"ppi\" must be an igraph object.") 89 | if (missing(terminals)) 90 | stop(" Need to provide terminal nodes as a named numeric vector, 91 | where node names must be same as in the interaction network.") 92 | if(is.null(names(terminals))) 93 | stop(" The terminal nodes must be provided as a named numeric vector, 94 | where node names must be same as in the interaction network.") 95 | 96 | 97 | 98 | 99 | # Gather the terminal genes to be analyzed, and their scores 100 | terminal_names = names(terminals) 101 | terminal_values = as.numeric(terminals) 102 | 103 | # Incorporate the node prizes 104 | node_names = V(ppi)$name 105 | node_prz = vector(mode = "numeric", length = length(node_names)) 106 | index = match(terminal_names, node_names) 107 | percent = signif((length(index) - sum(is.na(index)))/length(index)*100, 4) 108 | if (percent < 5) 109 | stop(" Less than 1% of your terminal nodes are matched in the interactome, check your terminals!") 110 | cat(paste0(" ", percent, "% of your terminal nodes are included in the interactome\n")) 111 | terminal_names = terminal_names[!is.na(index)] 112 | terminal_values = terminal_values[!is.na(index)] 113 | index = index[!is.na(index)] 114 | node_prz[index] = terminal_values 115 | 116 | if(missing(dummies)||is.null(dummies)||is.na(dummies)) 117 | dummies = terminal_names #re-assign this to allow for input 118 | 119 | ## Prepare input file for MST-PCSF implementation in C++ 120 | 121 | cat(" Solving the PCSF...\n") 122 | 123 | # Calculate the hub penalization scores 124 | node_degrees = igraph::degree(ppi) 125 | hub_penalization = - mu*node_degrees 126 | 127 | # Update the node prizes 128 | node_prizes = b*node_prz 129 | index = which(node_prizes==0) 130 | node_prizes[index] = hub_penalization[index] 131 | 132 | 133 | # Construct the list of edges 134 | edges = ends(ppi,es = E(ppi)) 135 | from = c(rep("DUMMY", length(dummies)), edges[,1]) 136 | to = c(dummies, edges[,2]) 137 | 138 | cost = c(rep(w, length(dummies)), E(ppi)$weight) 139 | 140 | #PCSF will faill if there are NAs in weights, this will check and fail gracefully 141 | if(any(is.na(E(ppi)$weight))){ 142 | 143 | } 144 | 145 | ## Feed the input into the PCSF algorithm 146 | output = call_sr(from,to,cost,node_names,node_prizes) 147 | 148 | # Check the size of output subnetwork and print a warning if it is 0 149 | if(length(output[[1]]) != 0){ 150 | 151 | #names(output) = c("from", "to", "cost", "terminal_names", "terminal_prizes") 152 | 153 | # Contruct an igraph object from the MST-PCSF output 154 | e = data.frame(output[[1]], output[[2]], output[[3]]) 155 | #e = e[which(e[,1]!="DUMMY"), ] 156 | Ee = e[which(e[,2]!="DUMMY"), ] 157 | names(e) = c("from", "to", "weight") 158 | 159 | 160 | # Differentiate the type of nodes 161 | type = rep("Steiner", length(output[[4]])) 162 | index = match(terminal_names, output[[4]]) 163 | index = index[!is.na(index)] 164 | type[index] = "Terminal" 165 | 166 | v = data.frame(output[[4]], output[[5]], type) 167 | names(v) = c("terminals", "prize", "type") 168 | subnet = graph.data.frame(e,vertices=v,directed=F) 169 | E(subnet)$weight=as.numeric(output[[3]]) 170 | subnet = delete_vertices(subnet, "DUMMY") 171 | subnet = delete_vertices(subnet, names(which(degree(subnet)==0))) 172 | 173 | 174 | class(subnet) <- c("PCSF", "igraph") 175 | 176 | return (subnet) 177 | 178 | } else{ 179 | 180 | stop(" Subnetwork can not be identified for a given parameter set. 181 | Provide a compatible b or mu value with your terminal prize list...\n\n") 182 | 183 | } 184 | 185 | } 186 | -------------------------------------------------------------------------------- /vignettes/PCSF-manual.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title:
PCSF Package
3 | author:
Murodzhon Akhmedov, Amanda Kedaigle, Renan Escalante, Roberto Montemanni, Francesco Bertoni, Ernest Fraenkel, Ivo Kwee
4 | date:
February 21, 2017
5 | output: 6 | html_document: 7 | theme: cerulean 8 | bibliography: ref.bib 9 | --- 10 | 11 |
12 |
13 | 14 | ####Introduction 15 | 16 | A tremendous amount of high-throughput data at multiple layers of cellular systems has been profiled to study biological responses to perturbations and disease. The current challenge is to develop methods for effective analysis of these data to better interpret underlying biology and identify functional units. Network modeling approaches are some of the techniques lately used for analysis of biological networks [@Dittrich]. Recently, the Prize-collecting Steiner Forest (PCSF) algorithm has been applied to interaction networks to find a subnetwork enriched for input data, which corresponds to relevant subunits in the interactome of a cell [@Bechet, @Tuncbag]. 17 | 18 | The PCSF problem has NP-hard characteristics, and requires a great deal of effort to find exact solutions on larger biological networks. Recently, we have developed a fast heuristic for the PCSF [@Akhmedov2016], and conducted a detailed performance comparison with existing methods [@Akhmedov]. As an extension to this, we present an R package that performs fast and user-friendly analysis of high-throughput data using the interaction networks as a template, and interprets the biological landscape of interactome with respect to the data. More specifically, the PCSF package allows the user to i) upload the interactome and patient data ii) compute the PCSF subnetwork solution iii) perform functional analysis on resulting subnetwork iv) and interactively visualize the final subnetwork with functional enrichment information. 19 | 20 | 21 |
22 |
23 | 24 | #### The PCSF: Optimization Approach on Biological Networks 25 | 26 | The PCSF is a well-known problem in graph theory. Given an undirected graph $G = (V, E)$, where the vertices are labeled with prizes $p_{v}$ and the edges are labeled with costs $c_{e} > 0$, the goal is to identify a subnetwork $G' = (V', E')$ with a forest structure. The target is to minimize the total edge costs in $E'$, the total node prizes left out of $V'$, and the number of trees in $G'$. This is equivalent to minimization of the following objective function: 27 | 28 | $$F(G')= \text{Minimize} \sum_{e{\in}E'} c_{e} + \beta*\sum_{v{\not\in}V'} p_{v} + w*k \label{GW}$$ 29 | 30 | where $k$ is the number of trees in the forest, and it is regulated by parameter $w$. The parameter $\beta$ is used to tune the prizes of nodes relative to edge costs. 31 | 32 | 33 | Recently, we have applied PCSF to biological networks in the Forest module of the Omics Integrator software [@Tuncbag]. In biological networks such as Protein-Protein Interaction (PPI) networks, every vertex represents a biomolecule, and every edge corresponds to the cellular interaction between two biomolecules. Edges of the network are given costs, which correspond to confidence or frequency of that interaction. The vertices of the network are given prizes according to the measurements of differential expression, copy number, or number of mutation for that gene/protein. The set of vertices that are assigned a prize are referred to as `terminal` nodes. Non-terminal nodes, which were not observed in the experimental data, may appear in the solution and are called `Steiner` nodes. After scoring the interactome, the PCSF is used to detect a relevant subnetwork (forest). The PCSF aims to identify neighborhoods in interaction networks potentially belonging to the key dysregulated pathways of a disease or experiment. 34 | 35 | In order to avoid a bias for the hub nodes of PPI networks to appear in solution of PCSF, we use the method introduced in Forest [@Tuncbag], which penalizes the prizes of nodes according to their degree in the PPI. The parameter $\mu$ controls the size of these penalties: 36 | $$p'_{v} = p_{v} - \mu*degree(v)$$ 37 | The parameter $\mu$ also affects the total number of `Steiner` nodes in the solution. The higher the value of $\mu$, the smaller the number of `Steiner` nodes in the subnetwork, and vice-versa. The recommended range of $\mu$ for biological networks is between `1e-4` and `5e-2` to fine-tune the `Steiner`/`terminal` node ratio in the subnetwork and average `Steiner`/`terminal` in-degree ratio of the corresponding nodes in the original template network. 38 | 39 |
40 |
41 | 42 | 43 | #### Package Dependencies and Installation 44 | 45 | The software was implemented in R environment, and easily can be installed within the R terminal. As input, the package requires a template network such as protein-protein interaction, protein-metabolite interaction or any other correlation-based interaction network, and it maps differentially expressed genes/proteins/metabolites from the high-throughput data as vertex prizes into the template network. Then, it computes and returns high-scoring neighborhoods to identify functional modules in the interactome. Required parameters are: $\beta$ - for tuning the vertex prizes, $\omega$ - for regulating the number of distinct components in the subnetwork, and $\mu$ - for hub penalization. 46 | 47 | The package has the following R-package dependencies that are automatically installed along with the package: 48 | 49 | * `BH` and `igraph` - for efficient graph handling and calculations, 50 | * `httr`, `methods`, `org.Hs.eg.db`, and `topGO` - to perform enrichment analysis, 51 | * `Rcpp` - to employ C++ source code within R, 52 | * `visNetwork` - for visualization. 53 | 54 | In order to compile the source, Windows users should install the `Rtools` package by the following [link](https://cran.r-project.org/bin/windows/Rtools/) that installs GCC and CMake. 55 | 56 | 57 | The PCSF package and its dependencies can be installed on Mac OS, Linux and Windows by running the following commands in the R console. 58 | 59 | ```{r, eval=FALSE} 60 | source("http://bioconductor.org/biocLite.R") 61 | biocLite("topGO") 62 | install.packages("devtools", dependencies=TRUE) 63 | devtools::install_github("IOR-Bioinformatics/PCSF", repos=BiocInstaller::biocinstallRepos(), 64 | dependencies=TRUE, type="source", force=TRUE) 65 | ``` 66 | 67 |
68 |
69 | 70 | 71 | #### Application 72 | 73 | In this simple example application, we demonstrate the functions of the PCSF package and its usage. In this study, we analyze differential phosphoproteomic data derived from H358 cells, a model of non-small cell lung cancer, which were stimulated with TGF-β. These data were previously published [@Thomson], and are the same ones used to demonstrate Omics Integrator [@Tuncbag, Figure 4]. We used the PPI network from the STRING database (version 13) [@Szklarczyk] as a template network, in which the network edges have a confidence score `s(e)` computed from experimental channels. The low confidence edges with `s(e) < 0.5` are removed from the interactome to increase the reliability of computational findings. We convert edge confidence into edge cost: `c(e) = max(0.01, 1-s(e))`. The mutation data and PPI network is available in the package. Let's load the library and data into an R session. 74 | 75 | ```{r, results='hide', message=FALSE, warning=FALSE} 76 | library(PCSF) 77 | ``` 78 | 79 | ```{r} 80 | data("STRING") 81 | data("Tgfb_phospho") 82 | ``` 83 |
84 | Next, we construct a PPI network using the STRING data employing the `construct_interactome()` function. The resulting PPI network consists of 17581 edges and 15405 nodes. 85 | 86 | Users can construct a PPI network by providing their own interactome. An interactome data for `construct_interactome()` must be a `data.frame` composed of three columns, where each row corresponds to an edge, in which the first element is a `head`, the second element is a `tail`, and the last element represents the `cost` of the edge. 87 | 88 | ```{r} 89 | ppi <- construct_interactome(STRING) 90 | ppi 91 | ``` 92 |
93 | 94 | We then generate a vector named `terminals` that is composed of genes with prizes to be analyzed in the PCSF context. Here, our prizes come from the phosphoproteomic data derived from H358 cells. There are 58 proteins which were determined to be significantly modulated in phosphorylation after TGF-β stimulation, and the prize for each protein is the absolute value of the log fold change in phosphotyrosine abundance. 95 | 96 | Users can generate this vector by providing their own data. It must be a named numeric vector, where the genes names are the same as in the interaction network and numeric values correspond to the importance of the gene within the study. 97 | 98 | ```{r} 99 | terminals <- Tgfb_phospho 100 | str(terminals) 101 | ``` 102 |
103 | 104 | After properly obtaining the PPI network and scoring it, we employ the PCSF approach to identify the subnetwork with the following function, in which the arguments are: `ppi` - a weighted PPI network, `terminals` - a named numeric vector which corresponds to the list of genes with prizes, `w` - a parameter for tuning the number of trees in the subnetwork, `b` - a numeric value for tuning the node prizes, `mu` - a parameter for hub penalization. This function outputs the subnetwork as an `igraph` object. 105 | 106 | ```{r, message=FALSE, warning=FALSE, results='hide'} 107 | subnet <- PCSF(ppi, terminals, w = 2, b = 1, mu = 0.0005) 108 | ``` 109 |
110 | 111 | The dynamic and interactive version of final subnetwork can be plotted with the `plot.PCSF()` function, where it takes the arguments: `x` - the output obtained by PCSF approach, `style` - a boolean value to determine the visualization style of the network, `edge_width` - a variable to adjust edge widths, `node_size` - a numeric value to adjust the size of nodes, `node_label_cex` - a numeric value to set node label size, `Steiner_node_color` - a variable to color the `Steiner` nodes, `Terminal_node_color` - a variable to color `terminal` nodes. The node sizes and edge widths in the figure are proportional to the prize of nodes and probability of existence of the edges, respectively, and they are adjusted according to `node_size` and `edge_width` parameters. 112 | 113 | ```{r, eval=FALSE} 114 | plot(subnet, style = 1, edge_width=5, node_size=40, node_label_cex = 30, Steiner_node_color = "lightblue", 115 | Terminal_node_color = "lightgreen") 116 | ``` 117 | 118 |
119 | ```{r, echo=FALSE, fig.width=10, fig.height=6} 120 | plot(subnet, style = 1, edge_width=5, node_size=40, node_label_cex = 30, Steiner_node_color = "lightblue", 121 | Terminal_node_color = "lightgreen") 122 | ``` 123 |
124 |
125 | 126 | Now, let's observe how the hub penalization parameter is effecting the final subnetwork. We solve the PCSF with a higher `mu` value than previous one (`mu=0.05`), and plot the subnetwork. It is clear that when the `mu` value is increased, we get very limited number of `Steiner` nodes and a smaller subnetwork overall. 127 | 128 | ```{r, results='hide', message=FALSE, warning=FALSE} 129 | subnet <- PCSF(ppi, terminals, w = 2, b = 100, mu = 0.05) 130 | plot(subnet, style = 1) 131 | ``` 132 |
133 | ```{r, echo=FALSE, fig.width=10, fig.height=6} 134 | plot(subnet, style = 1) 135 | ``` 136 |
137 |
138 | 139 | However, if we set `mu` to zero, we see that the forest relies on the hub node UBC, or the regulatory protein ubiquitin-C. These hub nodes are often not specific to the system in the experiment. 140 | 141 | ```{r, results='hide', message=FALSE, warning=FALSE} 142 | subnet <- PCSF(ppi, terminals, w = 2, b = 100, mu = 0) 143 | plot(subnet, style = 1) 144 | ``` 145 |
146 | ```{r, echo=FALSE, fig.width=10, fig.height=6} 147 | plot(subnet, style = 1) 148 | ``` 149 |
150 | 151 |
152 |
153 | 154 | 155 | #### Adding random noise to the edge costs 156 | 157 | We must be cautious making a biological interpretation of data based on a single run of PCSF. Since the PPI network is huge and data is noisy, it is possible to have some random nodes in the output forest of each run. In order to increase the robustness of the resulting structure, it is a reasonable approach to solve PCSF several times by adding some noise to edge costs each time, and combine all results in the final forest. The union of all outputs may explain the underlying biology better. Running the PCSF with noisy edge costs can be performed with the function below, where argument `n` determines the number of runs and argument `r` determines the level of noise. 158 | 159 | 160 | ```{r, results='hide', message=FALSE, warning=FALSE} 161 | subnet <- PCSF_rand(ppi, terminals, n=10, r = 0.1, w = 2, b = 1, mu = 0.0005) 162 | ``` 163 |
164 |
165 | 166 | 167 | 168 | #### Functional Enrichment Analysis 169 | 170 | The enrichment analysis of the final subnetwork is performed with the `enrichment_analysis()` function. The subnetwork is clustered by edge betweenness algorithm from the `igraph` package, and for each cluster the functional enrichment is done by employing either EnrichR [@Chen] API or topGO [@Alexa] R-package that can be specified by the user. Note that EnrichR API requires a working internet connection to perform the enrichment. If not specified, the package defaults to EnrichR, it uses topGO if there is no internet connection. 171 | 172 | The `enrichment_analysis()` fuction requires the following arguments: `subnet` - the final subnetwork obtained by the PCSF method, `mode` - a binary variable to choose the method for enrichment analysis, where 0 is for EnrichR API and 1 is for topGO package, and `gene_universe` - a complete list of genes (vector of gene symbols) used as background in enrichment analysis by topGO package. 173 | 174 | It is required to define a `gene_universe` to use topGO, and the gene_universe in this example is all the genes in the PPI template network. It performs enrichment analysis for `Biological_Process` terms in Gene Ontology database within our implementation. 175 | 176 | ```{r, results='hide', message=FALSE, warning=FALSE, eval=FALSE} 177 | library(topGO) 178 | gene_universe <- V(ppi)$name 179 | res <- enrichment_analysis(subnet, mode=1, gene_universe) 180 | ``` 181 | 182 | In contrast, EnrichR API does not require a `gene_universe`, and it gathers and combines the enrichment analyses for `GO_Biological_Process_2015`,`KEGG_2016`, `Reactome_2016`, and `BioCarta_2016` databases. 183 | 184 | ```{r, results='hide', message=FALSE, warning=FALSE} 185 | res <- enrichment_analysis(subnet) 186 | ``` 187 | 188 | The `enrichment_analysis()` returns an annotated subnetwork (`igraph` object) and a list of tables that contain full enrichments for each cluster. The subnetwork can be saved in a desirable format available in the `igraph` pacakge. 189 | 190 |
191 | 192 | An interactive version of the annotated subnetwork is visualized by the `plot.PCSFe()` function, where each cluster is colored differently. The plotting function requires the following arguments: `x` - an annotated subnetwork obtained by the `enrichment_analysis()` function, `edge_weight` - a variable to set the edge weights, `node_size` - a variable to adjust the size of nodes, `node_label_cex` - a numeric value to adjust the node label size. 193 | 194 | The node sizes and edge widths are proportional to the amount of times that node or edge appeared in the noisy runs. Nodes are colored according to cluster membership. As in the ENRICHR API, the p-value is calculated using the Fisher test and adjusted for multiple hypotheses. The combined score corresponds to multiplication of the log p-value and the z-score of the deviation from the expected rank of that term in random lists of genes. The top 15 functional enrichment terms for each cluster are ranked according to the adjusted p-value and displayed in a tabular format when the mouse hovers over a node in that cluster. Each cluster can be visualized separately by “Select by group” icon located at the top of the figure. 195 | 196 | ```{r, eval=FALSE} 197 | plot(res$subnet, edge_width = 8, node_size = 30, node_label_cex = 1) 198 | ``` 199 | 200 |
201 | ```{r, echo=FALSE, message=FALSE, warning=FALSE, fig.width=9, fig.height=6} 202 | plot(res$subnet, edge_width = 8, node_size = 30, node_label_cex = 1) 203 | ``` 204 |
205 | 206 | In this case, the presence of subnetworks enriched for Gene Ontology terms like 'mesenchymal-epithelial cell signaling' and 'EGFR downregulation' confirm the importance of these pathways in this lung cancer model. We also see Steiner nodes such as CBL, which has been shown to be involved in several models of non-small cell lung cancers [@Tan], like the H358 cell line. Therefore, we see that the PSCF algorithm points out proteins and pathways that are highly relevant to the system under study. 207 | 208 |
209 |
210 | 211 | 212 | #### REFERENCES 213 | 214 | 215 | -------------------------------------------------------------------------------- /src/PCSF.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ************************ Prize-collecting Steiner Forest (PCSF) *********************// 3 | // This is a C++ implementation of our heuristic algorithm for the PCSF (Akhmedov et al 2017). 4 | // The heuristic is developed for functional analyses of large biological networks in a reasonable time. 5 | 6 | 7 | // Loading the required libraries 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | using namespace boost; 37 | using namespace Rcpp; 38 | using namespace std; 39 | 40 | 41 | // Node class 42 | class Node 43 | { public: 44 | vector children; 45 | int father; 46 | int size; 47 | double price; 48 | Node(){}; 49 | Node(const Node & other){ 50 | father=other.father; 51 | children= other.children;} 52 | 53 | Node & operator= (const Node & other){ 54 | father=other.father; 55 | children= other.children; 56 | return *this; 57 | } 58 | 59 | }; 60 | 61 | // Properties of Vertices in the network 62 | struct VertexProperties { 63 | VertexProperties() :c(0) {} 64 | VertexProperties(string const & name) : name(name),c(0){} 65 | string name; 66 | double c; 67 | }; 68 | 69 | // Properties of the graph that is used within the BOOST 70 | typedef adjacency_list > GraphBase; 71 | typedef graph_traits::vertex_iterator vertex_iterator; 72 | typedef graph_traits::out_edge_iterator edge_iterator; 73 | typedef graph_traits::edge_iterator graph_edge_iterator; 74 | typedef graph_traits::edge_descriptor Edge; 75 | typedef graph_traits::vertex_descriptor Vertex; 76 | 77 | 78 | GraphBase g, g_adjusted; GraphBase G; GraphBase G_pruned; 79 | property_map::type weight_g; 80 | property_map::type weight_g_adjusted; 81 | property_map::type weight_G; 82 | property_map::type weight_G_pruned; 83 | 84 | int Root = -1; 85 | static map g_map; 86 | static map G_map; 87 | static map G_pruned_map; 88 | 89 | void clear_variables(){ 90 | g.clear(); 91 | g_adjusted.clear(); 92 | G.clear(); 93 | G_pruned.clear(); 94 | g_map.clear(); 95 | G_map.clear(); 96 | G_pruned_map.clear(); 97 | } 98 | 99 | // Map 100 | int idx_g(String const & id) 101 | { 102 | map::iterator mit = g_map.find(id); 103 | if (mit == g_map.end()) 104 | return g_map[id] = add_vertex(VertexProperties(id), g); 105 | return mit->second; 106 | } 107 | int idx_G(string const & id) 108 | { 109 | map::iterator mit = G_map.find(id); 110 | if (mit == G_map.end()) 111 | return G_map[id] = add_vertex(VertexProperties(id), G); 112 | return mit->second; 113 | } 114 | int idx_G_pruned(string const & id) 115 | { 116 | map::iterator mit = G_pruned_map.find(id); 117 | if (mit == G_pruned_map.end()) 118 | return G_pruned_map[id] = add_vertex(VertexProperties(id), G_pruned); 119 | return mit->second; 120 | } 121 | 122 | 123 | // Reading the input network 124 | void read_input_graph(CharacterVector from, CharacterVector to, NumericVector cost, CharacterVector prize, NumericVector prize_v) 125 | { 126 | for(int i=0; i < from.size(); i++){ 127 | add_edge(vertex(idx_g(from[i]), g), vertex(idx_g(to[i]), g), cost[i], g); 128 | } 129 | for(int i=0; i & predecessor, int & current_node){ 155 | int node=current_node; 156 | for(unsigned int j=0; j & predecessor, int & current_node, Edge &e, bool &found){ 164 | for(unsigned int i=0; i & predecessor, int & current_node, Edge &e, bool &found){ 179 | for(unsigned int i=0; i 0) 186 | predecessor[predecessor[node].father].price = predecessor[predecessor[node].father].price + predecessor[node].price - weight_G_pruned[e]; 187 | } 188 | } 189 | 190 | 191 | 192 | 193 | // After reading the input network information from the input file, the algorithm constructs a 194 | vector< Vertex > constructG(vector & terminals, int &Root){ 195 | 196 | // Distance: all-pairs-shortest-path distance matrix 197 | // perPath: List of arcs in all-pairs-shortest-path distance matrix 198 | vector > > perPath; 199 | vector > Distance; 200 | perPath.resize (terminals.size()); 201 | Distance.resize (terminals.size()); 202 | for (unsigned int i = 0; i < terminals.size(); ++i) { 203 | perPath [i].resize(terminals.size()); 204 | Distance [i].resize(terminals.size()); 205 | } 206 | 207 | // Computing all-pairs-shortest-path distance matrix 208 | Vertex from; int current, pred, outer = 0, inner; 209 | std::vector p = vector (num_vertices(g)); 210 | std::vector d = vector (num_vertices(g)); 211 | 212 | for (std::vector::iterator first=terminals.begin(); first!=terminals.end(); ++first){ 213 | 214 | from = vertex(*first, g_adjusted); 215 | dijkstra_shortest_paths(g_adjusted, from, 216 | predecessor_map(boost::make_iterator_property_map(p.begin(), get(boost::vertex_index, g_adjusted))). 217 | distance_map(boost::make_iterator_property_map(d.begin(), get(boost::vertex_index, g_adjusted)))); 218 | 219 | inner = outer + 1; 220 | for (std::vector::iterator second = first+1; second!=terminals.end(); ++second) { 221 | current=*second; pred=p[current]; 222 | Distance[outer][inner] = d[current]; 223 | Distance[inner][outer] = d[current]; 224 | while(pred!=current){perPath[outer][inner].push_back(current); current=pred; pred=p[current]; } perPath[outer][inner].push_back(current); 225 | inner++; 226 | } 227 | outer++; 228 | } 229 | 230 | 231 | 232 | 233 | // Heuristic Clustering, given large input network, the algorithm clusters input network into 234 | // smaller clusters, and solves the MST afterwards 235 | set V; 236 | set D; 237 | unsigned int root_index = -1; 238 | vector node_labels; 239 | node_labels.resize(terminals.size()); 240 | for(unsigned int i=0; i< node_labels.size(); i++){ 241 | node_labels[i] = 0;} 242 | for(unsigned int i=0; i= Distance[current][targ] && g[terminals[targ]].c >= Distance[current][targ]){ 261 | if(g[terminals[targ]].c > 0){ 262 | D.insert(targ); V.erase(targ); 263 | } 264 | node_labels[targ]=clusterID; 265 | } 266 | } 267 | } 268 | 269 | while(!D.empty()){ 270 | current = *D.begin(); D.erase(current); 271 | for (unsigned int i=0; i < terminals.size(); i++) { 272 | targ = i; 273 | if(node_labels[targ] == 0 && current != targ && i != root_index){ 274 | if(g[terminals[current]].c >= Distance[current][targ] && g[terminals[targ]].c >= Distance[current][targ]){ 275 | if(g[terminals[targ]].c > 0){ D.insert(targ); V.erase(targ);} 276 | node_labels[targ]=clusterID; 277 | } 278 | } 279 | } 280 | } 281 | 282 | } 283 | 284 | // Identfying the vertex membership with respect to clusters 285 | vector > clusters(clusterID+1); 286 | for(unsigned int i=0; i< node_labels.size(); i++){ 287 | for(int j=0; j<=clusterID; j++){ 288 | if(node_labels[i] == j){ 289 | clusters[j].push_back(i); 290 | } 291 | } 292 | } 293 | 294 | std::vector::iterator it, itt; int num_clusters=0; 295 | for(unsigned int i=1; i 1) num_clusters++; 297 | } 298 | 299 | if(num_clusters == 0){ 300 | //cout<<"There is no tree in construct G ()"< threshold_num){ 313 | for (itt=clusters[j].begin(); itt!=clusters[j].end(); ++itt){ 314 | if(min_distance > Distance[*it][*itt] -g[terminals[*it]].c - g[terminals[*itt]].c ){ 315 | min_distance = Distance[*it][*itt] -g[terminals[*it]].c - g[terminals[*itt]].c; min_index = j; 316 | } 317 | } 318 | } 319 | } 320 | 321 | 322 | if (min_index != -1){ 323 | clusters[min_index].push_back(*it); 324 | *it = -1; 325 | } 326 | } 327 | } 328 | 329 | } 330 | 331 | 332 | 333 | // Construct an artificial graph G, which is composed of all clusters determined 334 | // from Heuristic Clustering phase 335 | 336 | string str; unsigned int index1=-1, index2=-1; 337 | 338 | for (unsigned int l = 0; l spanning_tree_G(num_vertices(G)); 372 | prim_minimum_spanning_tree(G, & spanning_tree_G[0]); 373 | 374 | 375 | Edge beg; Vertex sour, tar; double cost; 376 | Edge e; bool found; 377 | 378 | weight_g = get(edge_weight, g); 379 | 380 | vector path; index1=0; index2=0; 381 | edge_iterator out_i, out_end; int add=0; 382 | 383 | // Solving the Minimum Spanning Tree on G 384 | for(unsigned int i = 0; i < spanning_tree_G.size(); ++i ){ 385 | 386 | if(spanning_tree_G[i]!=i ){ 387 | 388 | if(i> spanning_tree_G[i]) path=perPath[spanning_tree_G[i]][i]; 389 | else path=perPath[i][spanning_tree_G[i]]; 390 | 391 | for(unsigned int j=0; jspanning_tree_G_pruned(num_vertices(G_pruned)); 410 | prim_minimum_spanning_tree(G_pruned, &spanning_tree_G_pruned[0]); 411 | 412 | 413 | double total1=0; 414 | for(unsigned int i = 0; i < spanning_tree_G_pruned.size(); ++i ){ 415 | if(spanning_tree_G_pruned[i] != i){ 416 | sour= vertex(i,G_pruned); tar=vertex(spanning_tree_G_pruned[i], G_pruned); 417 | boost::tuples::tie(beg, found) = edge(sour, tar,G_pruned); 418 | total1+=get(weight_G_pruned, beg); 419 | } 420 | } 421 | 422 | 423 | return spanning_tree_G_pruned; 424 | 425 | } 426 | 427 | 428 | // After obtaining MST tree, the algorithm prunes the leaf nodes 429 | // which have prizes smaller than connection cost 430 | double cut(int &Root, vector< Vertex > &spanning_tree_G_pruned, vector< string > &tree_from, vector< string > &tree_to, vector< double > &tree_cost, map < string, double > &tree_terminals){ 431 | 432 | weight_G_pruned = get(edge_weight, G_pruned); 433 | 434 | Edge e; bool found; 435 | 436 | Edge beg; Vertex sour, tar; int ancestor=-1; bool select=false; 437 | 438 | 439 | int root= -1; 440 | if(Root == -1){ 441 | double max=0; 442 | vertex_iterator ei, ef; 443 | for(tie(ei, ef)= vertices(G_pruned); ei!=ef; ei++){ 444 | if(g[boost::lexical_cast(G_pruned[*ei].name)].c > max){ 445 | root = *ei; 446 | max = g[boost::lexical_cast(G_pruned[*ei].name)].c; 447 | } 448 | } 449 | } else {root = idx_G_pruned(to_string(Root));} 450 | 451 | 452 | 453 | select = true; 454 | 455 | bool ancestor_changed=true; unsigned int father, temp; ancestor = root; 456 | if (select){ 457 | father=spanning_tree_G_pruned[ancestor]; 458 | spanning_tree_G_pruned[ancestor]=ancestor; 459 | while(ancestor_changed){ 460 | if(spanning_tree_G_pruned[father]==father){ 461 | ancestor_changed=false; 462 | spanning_tree_G_pruned[father]=ancestor; 463 | }else{ 464 | temp=spanning_tree_G_pruned[father]; 465 | spanning_tree_G_pruned[father]=ancestor; 466 | ancestor=father; father=temp; 467 | } 468 | 469 | } 470 | 471 | }else{ 472 | //cout <<"There is no tree"< predecessor(num_vertices(G_pruned)); 479 | if(select){ 480 | for(unsigned int i = 0; i < spanning_tree_G_pruned.size(); ++i ){ 481 | if(spanning_tree_G_pruned[i]!=i){ 482 | predecessor[i].father=spanning_tree_G_pruned[i]; 483 | predecessor[spanning_tree_G_pruned[i]].children.push_back(i); 484 | }else{predecessor[i].father=i;} 485 | } 486 | } 487 | 488 | 489 | for(unsigned int i = 0; i < predecessor.size(); ++i ){ 490 | predecessor[i].size=predecessor[i].children.size(); 491 | predecessor[i].price=g[boost::lexical_cast(G_pruned[i].name)].c; 492 | 493 | } 494 | 495 | 496 | price_collect(predecessor, root, e, found); 497 | 498 | process_leafs(predecessor, root, e, found); 499 | 500 | weight_g = get(edge_weight, g); 501 | 502 | // Tree 503 | for(unsigned int i = 0; i < predecessor.size(); ++i ){ 504 | if(predecessor[i].father != -1 && predecessor[i].father != (int) i ){ 505 | sour= vertex(boost::lexical_cast(G_pruned[i].name),g); tar = vertex(boost::lexical_cast(G_pruned[predecessor[i].father].name),g); 506 | boost::tuples::tie(beg, found) = edge(sour, tar,g); 507 | tree_from.push_back(g[boost::lexical_cast(G_pruned[i].name)].name); 508 | tree_to.push_back(g[boost::lexical_cast(G_pruned[predecessor[i].father].name)].name); 509 | tree_cost.push_back(weight_g[beg]); 510 | tree_terminals[g[boost::lexical_cast(G_pruned[i].name)].name] = g[boost::lexical_cast(G_pruned[i].name)].c; 511 | tree_terminals[g[boost::lexical_cast(G_pruned[predecessor[i].father].name)].name] = g[boost::lexical_cast(G_pruned[predecessor[i].father].name)].c; 512 | } 513 | } 514 | 515 | 516 | 517 | double total = 0, lostPrice =0; 518 | int uncovered_nodes = 0; 519 | for(unsigned int i = 0; i < predecessor.size(); ++i ){ 520 | if(predecessor[i].father != -1){ 521 | if(predecessor[i].father != (int) i){ 522 | sour= vertex(i,G_pruned); tar = vertex(predecessor[i].father,G_pruned); 523 | boost::tuples::tie(beg, found) = edge(sour, tar,G_pruned); 524 | total+=get(weight_G_pruned, beg); 525 | } 526 | } 527 | } 528 | 529 | 530 | // Lsit of nodes that are outside of final tree 531 | vector calculatecost(num_vertices(g)); 532 | for(unsigned int i = 0; i < predecessor.size(); ++i ){ 533 | if(predecessor[i].father != -1 && predecessor[i].father != (int) i){ 534 | sour= vertex(boost::lexical_cast(G_pruned[i].name),g); tar = vertex(boost::lexical_cast(G_pruned[predecessor[i].father].name),g); 535 | calculatecost[sour]=1; calculatecost[tar]=1; 536 | } 537 | } 538 | 539 | // Uncovered nodes 540 | for(unsigned int i = 0; i < num_vertices(g); ++i ){ 541 | if(calculatecost[i] == 0 && (int) i != root ){ 542 | lostPrice += g[i].c; 543 | uncovered_nodes++; 544 | } 545 | } 546 | 547 | // The list of Nodes in the final Tree 548 | for(unsigned int i = 0; i < num_vertices(g); ++i ){ 549 | if(calculatecost[i] == 1){ 550 | } 551 | } 552 | 553 | // Objective value 554 | return total + lostPrice; 555 | 556 | } 557 | 558 | 559 | //' Internal function \code{call_sr} 560 | //' 561 | //' This function is internally used to solve the PCST. 562 | //' 563 | //' @keywords internal 564 | //' 565 | //' @param from A \code{CharacterVector} that corresponds to \code{head} nodes of the edges. 566 | //' @param to A \code{CharacterVector} that corresponds the \code{tail} nodes of the edges. 567 | //' @param cost A \code{NumericVector} which represents the edge weights. 568 | //' @param node_names A \code{CharacterVector} demonstrates the names of the nodes. 569 | //' @param node_prizes A \code{NumericVector} which corresponds to the node prizes. 570 | //' @author Murodzhon Akhmedov 571 | //' 572 | // [[Rcpp::export]] 573 | List call_sr(CharacterVector from, CharacterVector to, NumericVector cost, CharacterVector node_names, NumericVector node_prizes) 574 | { 575 | clear_variables(); 576 | 577 | vector terminals; 578 | 579 | read_input_graph(from, to, cost, node_names, node_prizes); 580 | 581 | Root = idx_g("DUMMY"); 582 | 583 | double max_price=0; int max_price_index = -1; 584 | for(unsigned int i=0; i max_price){ 586 | max_price = g[i].c; 587 | max_price_index = i; 588 | } 589 | } 590 | 591 | if(Root != -1){ 592 | for(unsigned int i=0; i0){ 594 | terminals.push_back(i); 595 | } 596 | } 597 | }else{ 598 | Root = max_price_index; 599 | for(unsigned int i=0; i0){ 601 | terminals.push_back(i); 602 | } 603 | } 604 | } 605 | 606 | terminals.push_back(Root); 607 | 608 | 609 | if(terminals.size() <=1){ 610 | // There is no tree 611 | //return 0; 612 | } 613 | 614 | 615 | vector< Vertex > spanning_tree; 616 | spanning_tree = constructG(terminals, Root); 617 | 618 | vector< string > tree_from; 619 | vector< string > tree_to; 620 | vector< double > tree_cost; 621 | map < string, double > tree_terminals; 622 | double obj = cut(Root, spanning_tree, tree_from, tree_to, tree_cost, tree_terminals); if (obj == 0) return 0; 623 | 624 | CharacterVector tree_f(tree_from.size()); 625 | CharacterVector tree_t(tree_to.size()); 626 | NumericVector tree_c(tree_cost.size()); 627 | CharacterVector tree_ter(tree_terminals.size()); 628 | NumericVector tree_ter_p(tree_terminals.size()); 629 | 630 | for(unsigned int i=0; i::iterator it=tree_terminals.begin(); it!=tree_terminals.end(); ++it){ 638 | tree_ter[counter] = it->first; 639 | tree_ter_p[counter] = it->second; 640 | counter++; 641 | } 642 | 643 | List tree = List::create(tree_from, tree_to, tree_cost, tree_ter, tree_ter_p); 644 | 645 | 646 | return tree; 647 | } 648 | 649 | 650 | 651 | 652 | 653 | 654 | --------------------------------------------------------------------------------