├── .BBSoptions ├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ └── check-bioc.yml ├── .gitignore ├── DESCRIPTION ├── Dockerfile ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS ├── R ├── Nets_writeSIF.R ├── RR_featureTally.R ├── buildPredictor.R ├── buildPredictor_sparseGenetic.R ├── callFeatSel.R ├── cleanPathwayName.R ├── compareShortestPath.R ├── compileFeatureScores.R ├── compileFeatures.R ├── countPatientsInNet.R ├── createPSN_MultiData.R ├── dataList2list.R ├── enrichLabelNets.R ├── fileCache.R ├── getCorrType.R ├── getEmapInput.R ├── getEmapInput_many.R ├── getFeatureScores.R ├── getNetConsensus.R ├── getPatientPredictions.R ├── getPatientRankings.R ├── getRegionOL.R ├── getSimilarity.R ├── globals.R ├── helper.R ├── makePSN_NamedMatrix.R ├── makePSN_RangeSets.R ├── makeQueries.R ├── makeSymmetric.R ├── mapNamedRangesToSets.R ├── matrix_getIJ.R ├── moveInteractionNets.R ├── perfCalc.R ├── plotEmap.R ├── plotIntegratedPatientNetwork.R ├── plotPerf.R ├── plotPerf_multi.R ├── plotPerf_simple.R ├── predict.R ├── predictPatientLabels.R ├── pruneNetByStrongest.R ├── pruneNet_pctX.R ├── pruneNets.R ├── readPathways.R ├── runFeatureSelection.R ├── runProfileToNetworks.R ├── runQuery.R ├── setupFeatureDB.R ├── similarities.R ├── simpleCap.R ├── smooMutationPropagation.R ├── sparsenet_enrichment_functions.R ├── sparsify2.R ├── sparsify3.R ├── splitTestTrain_partition.R ├── splitTestTrain_resampling.R ├── updateNets.R ├── utils.R ├── writeQueryBatchFile.R ├── writeQueryFile.R ├── writeWeightedNets.R └── zzz.R ├── README.md ├── data ├── MB.pheno.rda ├── cnv_GR.rda ├── cnv_TTstatus.rda ├── cnv_netPass.rda ├── cnv_netScores.rda ├── cnv_patientNetCount.rda ├── cnv_pheno.rda ├── confmat.rda ├── featScores.rda ├── genes.rda ├── modelres.rda ├── npheno.rda ├── pathwayList.rda ├── pathway_GR.rda ├── pheno.rda ├── pheno_full.rda ├── predRes.rda ├── silh.rda ├── toymodel.rda └── xpr.rda ├── inst ├── CITATION └── extdata │ ├── AGP1_CNV.txt │ ├── GM_NRANK │ ├── CV_1.query-results.report.txt.NRANK │ └── CV_2.query-results.report.txt.NRANK │ ├── GM_PRANK │ ├── CV_1.query-results.report.txt.PRANK │ └── CV_2.query-results.report.txt.PRANK │ ├── GM_query.txt │ ├── INSTALL │ ├── Dockerfile │ ├── INSTALL_OSX.sh │ └── INSTALL_Unix.sh │ ├── TGCT_mutSmooth_geno.txt │ ├── TGCT_mutSmooth_pheno.txt │ ├── dbPath │ ├── 1 │ │ ├── _0.cfs │ │ ├── metadata.xml │ │ ├── segments.gen │ │ └── segments_2 │ ├── base │ │ ├── _0.cfs │ │ ├── segments.gen │ │ └── segments_2 │ ├── cache │ │ └── CORE │ │ │ └── 1 │ │ │ ├── 1.ser │ │ │ ├── 2.ser │ │ │ ├── 3.ser │ │ │ ├── 4.ser │ │ │ ├── DatasetInfo.ser │ │ │ ├── attributeGroups.ser │ │ │ ├── networkIds.ser │ │ │ └── nodeIds.ser │ ├── genemania.xml │ └── user │ │ ├── segments.gen │ │ └── segments_1 │ ├── example_nets │ ├── BIG_CASE.txt │ ├── BIG_CONTROL.txt │ ├── BOTH_EQUAL.txt │ ├── MOSTLY_CASE.txt │ ├── SMALL_CASE.txt │ └── SMALL_CONTROL.txt │ ├── example_output │ ├── inputNets.txt │ ├── rng1 │ │ ├── LumA │ │ │ └── GM_results │ │ │ │ └── LumA_pathway_CV_score.txt │ │ ├── notLumA │ │ │ └── GM_results │ │ │ │ └── notLumA_pathway_CV_score.txt │ │ └── predictionResults.txt │ ├── rng2 │ │ ├── LumA │ │ │ └── GM_results │ │ │ │ └── LumA_pathway_CV_score.txt │ │ ├── notLumA │ │ │ └── GM_results │ │ │ │ └── notLumA_pathway_CV_score.txt │ │ └── predictionResults.txt │ └── rng3 │ │ ├── LumA │ │ └── GM_results │ │ │ └── LumA_pathway_CV_score.txt │ │ ├── notLumA │ │ └── GM_results │ │ │ └── notLumA_pathway_CV_score.txt │ │ └── predictionResults.txt │ ├── genemania.xml │ ├── pathway_ex3.gmt │ ├── pathways.gmt │ └── plots │ ├── SURVIVENO.gmt │ ├── SURVIVENO_nodeAttrs.txt │ ├── SURVIVEYES.gmt │ └── SURVIVEYES_nodeAttrs.txt ├── man ├── MB.pheno.Rd ├── RR_featureTally.Rd ├── avgNormDiff.Rd ├── buildPredictor.Rd ├── buildPredictor_sparseGenetic.Rd ├── callFeatSel.Rd ├── callOverallSelectedFeatures.Rd ├── cleanPathwayName.Rd ├── cnv_GR.Rd ├── cnv_TTstatus.Rd ├── cnv_netPass.Rd ├── cnv_netScores.Rd ├── cnv_patientNetCount.Rd ├── cnv_pheno.Rd ├── compareShortestPath.Rd ├── compileFeatureScores.Rd ├── compileFeatures.Rd ├── confmat.Rd ├── confusionMatrix.Rd ├── convertProfileToNetworks.Rd ├── countIntType.Rd ├── countIntType_batch.Rd ├── countPatientsInNet.Rd ├── createPSN_MultiData.Rd ├── dataList2List.Rd ├── dot-get_cache.Rd ├── enrichLabelNets.Rd ├── featScores.Rd ├── fetchPathwayDefinitions.Rd ├── genes.Rd ├── getCorrType.Rd ├── getEMapInput.Rd ├── getEMapInput_many.Rd ├── getEnr.Rd ├── getFeatureScores.Rd ├── getFileSep.Rd ├── getGMjar_path.Rd ├── getNetConsensus.Rd ├── getOR.Rd ├── getPSN.Rd ├── getPatientPredictions.Rd ├── getPatientRankings.Rd ├── getPerformance.Rd ├── getRegionOL.Rd ├── getResults.Rd ├── getSimilarity.Rd ├── makeInputForEnrichmentMap.Rd ├── makePSN_NamedMatrix.Rd ├── makePSN_RangeSets.Rd ├── makeQueries.Rd ├── makeSymmetric.Rd ├── mapNamedRangesToSets.Rd ├── matrix_getIJ.Rd ├── modelres.Rd ├── moveInteractionNets.Rd ├── normDiff.Rd ├── npheno.Rd ├── pathwayList.Rd ├── pathway_GR.Rd ├── perfCalc.Rd ├── pheno.Rd ├── pheno_full.Rd ├── plotEmap.Rd ├── plotIntegratedPatientNetwork.Rd ├── plotPerf.Rd ├── plotPerf_multi.Rd ├── predRes.Rd ├── predict.Rd ├── predictPatientLabels.Rd ├── pruneNet.Rd ├── pruneNet_pctX.Rd ├── pruneNets.Rd ├── randAlphanumString.Rd ├── readPathways.Rd ├── replacePattern.Rd ├── runFeatureSelection.Rd ├── runQuery.Rd ├── setupFeatureDB.Rd ├── silh.Rd ├── sim.eucscale.Rd ├── sim.pearscale.Rd ├── simpleCap.Rd ├── smoothMutations_LabelProp.Rd ├── sparsify2.Rd ├── sparsify3.Rd ├── splitTestTrain.Rd ├── splitTestTrain_resampling.Rd ├── tSNEPlotter.Rd ├── thresholdSmoothedMutations.Rd ├── toymodel.Rd ├── updateNets.Rd ├── writeNetsSIF.Rd ├── writeQueryBatchFile.Rd ├── writeQueryFile.Rd ├── writeWeightedNets.Rd └── xpr.Rd ├── tests ├── testthat.R └── testthat │ ├── test_buildpredictor.R │ └── test_suite.R └── vignettes ├── BuildPredictor.Rmd ├── EMap_realworld.png ├── Emap_example_screenshot.png ├── Predict_CaseControl_from_CNV.Rmd.old ├── ThreeWayClassifier.Rmd ├── ValidateNew.Rmd ├── images └── vignette1_design.jpg ├── integratedPSN_MEAN_top0.10.png └── prepare_data.R /.BBSoptions: -------------------------------------------------------------------------------- 1 | # The reason this package is marked as unsupported on win32 is that 2 | # it requires the Java JDK. However it seems that Oracle no longer 3 | # provides the JDK for 32-bit windows: 4 | # https://www.oracle.com/java/technologies/javase-jdk14-downloads.html 5 | UnsupportedPlatforms: win32 6 | # Unsupported on Windows per maintainer request until package gets 7 | # switched to rJava. 8 | UnsupportedPlatforms: win 9 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^LICENSE\.md$ 2 | .git 3 | ^doc$ 4 | ^Meta$ 5 | ^\.github$ 6 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | doc 6 | Meta 7 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: netDx 2 | Title: Network-based patient classifier 3 | Version: 1.5.3 4 | Authors@R: c(person("Shraddha", "Pai", 5 | email = "shraddha.pai@utoronto.ca", 6 | role = c("aut", "cre"), 7 | comment = c(ORCID = "0000-0002-1048-581X")), 8 | person("Philipp","Weber",role="aut"), 9 | person("Ahmad","Shah", role="aut"), 10 | person("Luca","Giudice",role="aut"), 11 | person("Shirley","Hui",role="aut"), 12 | person("Ruth","Isserlin",role="aut"), 13 | person("Hussam","Kaka", role="aut"), 14 | person("Gary","Bader",role="aut")) 15 | Description: netDx is a general-purpose algorithm to build a patient classifier from heterogenous patient data. The method converts data into patient similarity networks at the level of features. Feature selection identifies features of predictive value to each class. Methods are provided for versatile predictor design and performance evaluation using standard measures. netDx natively groups molecular data into pathway-level features and connects with Cytoscape for network visualization of pathway themes. For method details see: Pai et al. (2019). netDx: interpretable patient classification using integrated patient similarity networks. Molecular Systems Biology. 15, e8497 16 | Depends: 17 | R (>= 3.6) 18 | Suggests: 19 | curatedTCGAData, 20 | TCGAutils, 21 | rmarkdown, 22 | testthat, 23 | knitr, 24 | BiocStyle, 25 | RCy3, 26 | clusterExperiment, 27 | netSmooth, 28 | scater 29 | Imports: ROCR,pracma,ggplot2,glmnet,igraph,reshape2, 30 | parallel,stats,utils,MultiAssayExperiment,graphics,grDevices, 31 | methods,BiocFileCache,GenomicRanges, 32 | bigmemory,doParallel,foreach, 33 | combinat,rappdirs,GenomeInfoDb,S4Vectors, 34 | IRanges,RColorBrewer,Rtsne,httr,plotrix 35 | VignetteBuilder: knitr 36 | Encoding: UTF-8 37 | License: MIT + file LICENSE 38 | LazyData: false 39 | URL: http://netdx.org 40 | biocViews: Classification, BiomedicalInformatics, Network, SystemsBiology 41 | RoxygenNote: 7.1.2 42 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | FROM bioconductor/bioconductor_docker:devel 3 | 4 | WORKDIR /home/rstudio 5 | 6 | COPY --chown=rstudio:rstudio . /home/rstudio/ 7 | 8 | RUN Rscript -e "options(repos = c(CRAN = 'https://cran.r-project.org')); BiocManager::install(ask=FALSE)" 9 | 10 | RUN Rscript -e "options(repos = c(CRAN = 'https://cran.r-project.org')); devtools::install('.', dependencies=TRUE, build_vignettes=TRUE, repos = BiocManager::repositories())" 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2019 2 | COPYRIGHT HOLDER: netDx 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2019 netDx 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | netDx 1.5.3 2 | ================== 3 | * Moved RCy3, scater, clusterExperiment and netSmooth to "Suggests" to reduce dependency burden 4 | * Sped up vignettes by limiting all to binary classification and limiting number of layers 5 | * Removed TL;DR from vignettes as usefulness in question but maintainance high. 6 | 7 | Developers notes: 8 | ------------------- 9 | * Added Dockerfile and Github Actions for automated testing 10 | * GHA auto-generates a Docker image with netDx which gets pushed to shraddhapai/netdx_devenv 11 | 12 | netDx 1.5.2 13 | ================== 14 | * Added wrapper functions for ease-of-use. Includes: 15 | * getResults() to plot results of running the predictor 16 | * getPSN() for creating and visualizing integrated PSN 17 | * confusionMatrix() to visualize confusion matrix 18 | * tSNEPlotter() to visualize tSNE of integrated PSN (doesn't require Cytoscape) 19 | * Added CITATION file with citations to netDx methods and software paper 20 | 21 | netDx 1.5.1 22 | ================== 23 | * Adding support for Java 16. 24 | * Disabling CNV-based vignette to allow other three vignettes to run without causing build timeout on devel system 25 | 26 | netDx 1.1.4 27 | ================== 28 | Changes: 29 | * New functionality to smooth mutations over interaction, starting from sparse 30 | somatic mutations 31 | * BiocFileCache usage update 32 | 33 | Changes in version 0.99.0 (2019-11-11) 34 | + Submitted to Bioconductor 35 | -------------------------------------------------------------------------------- /R/Nets_writeSIF.R: -------------------------------------------------------------------------------- 1 | #' write patient networks in Cytoscape's .sif format 2 | #' 3 | #' @details Converts a set of binary interaction networks into Cytoscape's 4 | #' sif format. 5 | #' (http://wiki.cytoscape.org/Cytoscape_User_Manual/Network_Formats) 6 | #' This utility permits visualization of feature selected networks. 7 | #' 8 | #' @param netPath (char): vector of path to network files; file suffix 9 | #' should be '_cont.txt' 10 | #' networks should be in format: A B 1 11 | #' where A and B are nodes, and 1 indicates an edge between them 12 | #' @param outFile (char) path to .sif file 13 | #' @param netSfx (char) suffix for network file name 14 | #' @return No value. Side effect of writing all networks to \code{outFile} 15 | #' @examples 16 | #' netDir <- system.file("extdata","example_nets",package="netDx") 17 | #' netFiles <- paste(netDir,dir(netDir,pattern='txt$'), 18 | #' sep=getFileSep()) 19 | #' writeNetsSIF(netFiles,'merged.sif',netSfx='.txt') 20 | #' @export 21 | writeNetsSIF <- function(netPath, 22 | outFile=paste(tempdir(),"out.sif",sep=getFileSep()), 23 | netSfx = "_cont.txt") { 24 | if (.Platform$OS.type=="unix") { 25 | if (file.exists(outFile)) unlink(outFile) 26 | file.create(outFile) 27 | } 28 | for (n in netPath) { 29 | netName <- sub(netSfx, "", basename(n)) 30 | message(sprintf("%s\n", netName)) 31 | 32 | dat <- read.delim(n, sep = "\t", header = FALSE, as.is = TRUE) 33 | dat2 <- cbind(dat[, 1], netName, dat[, 2]) 34 | 35 | write.table(dat2, file = outFile, append = TRUE, sep = "\t", 36 | col.names = FALSE, 37 | row.names = FALSE, quote = FALSE) 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /R/callFeatSel.R: -------------------------------------------------------------------------------- 1 | #' Return feature selected nets based on given criteria 2 | #' 3 | #' @details given the output of genNetScores.R and criteria for defining 4 | #' feature-selected (FS) nets, returns subset of nets that pass criteria. 5 | #' Net must score for at least % of splits, to be 6 | #' considered feature-selected. 7 | #' @param netScores (matrix) matrix of net scores 8 | #' @param fsCutoff (integer) net must score at least this much in a split to 9 | #' 'pass' the threshold 10 | #' @param fsPctPass (numeric 0 to 1) net must pass at least this percent of 11 | #' splits to be considered feature-selected 12 | #' @return (char) names of nets that pass feature-selection 13 | #' @examples 14 | #' data(featScores) 15 | #' passed <- lapply(featScores, function(x) { 16 | #' callFeatSel(x,10,0.7) # score 10/10 in >=70% of trials 17 | #' }) 18 | #' print(passed) 19 | #' @export 20 | callFeatSel <- function(netScores, fsCutoff, fsPctPass) { 21 | fs_nets <- c() 22 | for (index in seq_len(nrow(netScores))) { 23 | cur_pathway <- netScores[index, ] 24 | pass_thresh <- length(which(cur_pathway >= fsCutoff)) 25 | percent_pass <- pass_thresh/length(cur_pathway) 26 | if (percent_pass >= fsPctPass) { 27 | fs_nets <- c(fs_nets, netScores[, 1][index]) 28 | } 29 | } 30 | return(fs_nets) 31 | } 32 | -------------------------------------------------------------------------------- /R/cleanPathwayName.R: -------------------------------------------------------------------------------- 1 | #' Clean pathway name so it can be a filename. 2 | #' 3 | #' @param curP (char) pathway name 4 | #' @export 5 | #' @examples 6 | #' cleanPathwayName('7-(3-AMINO-3-CARBOXYPROPYL)-WYOSINE BIOSYNTHESIS%HUMANC') 7 | #' @return (char) Cleaned pathway name 8 | cleanPathwayName <- function(curP) { 9 | pforfile <- gsub(" ", "_", curP) 10 | pforfile <- gsub("<", "_", pforfile) 11 | pforfile <- gsub(">", "_", pforfile) 12 | pforfile <- gsub("\\(", "_", pforfile) 13 | pforfile <- gsub("\\)", "_", pforfile) 14 | pforfile <- gsub("&", "_", pforfile) 15 | pforfile <- gsub(";", "_", pforfile) 16 | pforfile <- gsub(":", "_", pforfile) 17 | pforfile <- gsub("\\/", "_", pforfile) 18 | pforfile <- gsub("\\\xec", "X", pforfile) 19 | pforfile <- gsub("\\\xc2\\\xa0", "_", pforfile) 20 | pforfile <- gsub("\\\xa0", "X", pforfile) 21 | pforfile <- gsub("\\\xca", "_", pforfile) 22 | pforfile <- gsub("\\+", "plus", pforfile) 23 | pforfile <- gsub(",",".",pforfile) 24 | 25 | return(pforfile) 26 | } 27 | -------------------------------------------------------------------------------- /R/compileFeatureScores.R: -------------------------------------------------------------------------------- 1 | #' Tally the score of networks through cross-validation 2 | #' 3 | #' @param fList (char) Vector of paths to GeneMANIA NRANK files 4 | #' @param filter_WtSum (numeric between 5-100) Limit to top-ranked 5 | #' networks such that cumulative weight is less than this parameter. 6 | #' e.g. If filter_WtSum=20, first order networks by decreasing weight; 7 | #' then keep those whose cumulative weight <= 20. 8 | #' @param verbose (logical) print messages 9 | #' @return (data.frame) Feature name and score; includes features that occur 10 | #' at least once in \code{fList}. 11 | #' @examples 12 | #' netDir <- system.file("extdata","GM_NRANK",package="netDx") 13 | #' netFiles <- sprintf('%s/%s', netDir,dir(netDir,pattern='NRANK$')) 14 | #' pTally <- compileFeatureScores(netFiles,verbose=TRUE) 15 | #' print(head(pTally)) 16 | #' @export 17 | compileFeatureScores <- function(fList, filter_WtSum = 100, verbose = FALSE) { 18 | 19 | if (filter_WtSum < 5) { 20 | message("filter_WtSum cannot be < 5 ; setting to 5") 21 | filter_WtSum <- 5 22 | } 23 | 24 | pathwayTally <- list() 25 | ctr <- 1 26 | for (fName in fList) { 27 | tmp <- basename(fName) 28 | 29 | try( 30 | dat <- read.delim(fName, sep = "\t", header = TRUE, 31 | as.is = TRUE, skip = 1),silent = TRUE) 32 | ctr <- ctr + 1 33 | 34 | if (!inherits(dat, "try-error")) { 35 | # file not empty - continue 36 | if (verbose) { 37 | message("Net weight distribution:") 38 | print(summary(dat$Weight)) 39 | } 40 | 41 | # actually - it should already be sorted in decreasing 42 | # order if we don't reverse 43 | # it above - but let's sort anyway 44 | dat <- dat[order(dat$Weight, decreasing = TRUE), ] 45 | 46 | cs <- cumsum(dat$Weight) 47 | keep_max <- which.min(abs(cs - filter_WtSum)) 48 | 49 | dat <- dat[seq_len(keep_max), ] 50 | if (verbose) { 51 | message(sprintf(paste("filter_WtSum = %1.1f; ", 52 | "%i of %i networks left",sep=""), 53 | filter_WtSum, nrow(dat), length(cs))) 54 | } 55 | 56 | # put all Network names in pathwaytally. The ones that 57 | # are above threshold (Top pathways) get +1 58 | for (k in dat$Network) { 59 | if (!k %in% names(pathwayTally)) 60 | pathwayTally[[k]] <- 0 61 | pathwayTally[[k]] <- pathwayTally[[k]] + 1 62 | } 63 | 64 | } 65 | } 66 | out <- unlist(pathwayTally) 67 | out <- sort(out, decreasing = TRUE) 68 | out <- data.frame(name = names(out), score = as.integer(out), 69 | stringsAsFactors=FALSE) 70 | out[, 2] <- as.integer(as.character(out[, 2])) 71 | 72 | out 73 | } 74 | -------------------------------------------------------------------------------- /R/countPatientsInNet.R: -------------------------------------------------------------------------------- 1 | #' Count number of patients in a network 2 | #' 3 | #' @details This functionality is needed to count patient overlap when 4 | #' input data is in a form that results in highly missing data, rather than 5 | #' when the same measures are available for almost all patients. An example 6 | #' application is when patient networks are based on unique genomic events 7 | #' in each patients (e.g. CNVs or indels), rather than 'full-matrix' data 8 | #' (e.g. questionnaires or gene expression matrices). The former scenario 9 | #' requires an update in the list of eligible networks each time some type 10 | #' of patient subsetting is applied (e.g. label enrichment, or train/test 11 | #' split). A matrix with patient/network membership serves as a lookup 12 | #' table to prune networks as feature selection proceeds 13 | #' @param netDir (char) dir with network set 14 | #' @param fList (char) filenames of interaction networks to count in 15 | #' @param ids (char) patient IDs to look for 16 | #' @return (matrix) Size P by N, where P is num patients and N is 17 | #' number of networks networks; a[i,j] =1 if patient i in network j, else 0 18 | #' @examples 19 | #' d <- tempdir() 20 | #' pids <- paste("P",1:5,sep="") 21 | #' m1 <- matrix(c("P1","P1","P2","P2","P3","P4",1,1,1), 22 | #' byrow=FALSE,ncol=3) 23 | #' write.table(m1, 24 | #' file=paste(d,"net1.txt",sep=getFileSep()),sep="\t", 25 | #' col.names=FALSE,row.names=FALSE,quote=FALSE) 26 | #' m2 <- matrix(c("P3","P4",1),nrow=1) 27 | #' write.table(m2, 28 | #' file=paste(d,"net2.txt",sep=getFileSep()),sep="\t", 29 | #' col.names=FALSE,row.names=FALSE,quote=FALSE) 30 | #' x <- countPatientsInNet(d,c("net1.txt","net2.txt"), pids) 31 | #' @export 32 | countPatientsInNet <- function(netDir, fList, ids) { 33 | 34 | outmat <- matrix(0, nrow = length(ids), ncol = length(fList)) 35 | colnames(outmat) <- fList 36 | rownames(outmat) <- ids 37 | 38 | ctr <- 1 39 | for (f in fList) { 40 | dat <- read.delim(paste(netDir,f,sep=getFileSep()), 41 | sep = "\t", header = FALSE, as.is = TRUE) 42 | memb <- c(dat[, 1], dat[, 2]) # patients in this network 43 | outmat[which(ids %in% memb), ctr] <- 1 44 | 45 | ctr <- ctr + 1 46 | } 47 | 48 | return(outmat) 49 | } 50 | -------------------------------------------------------------------------------- /R/dataList2list.R: -------------------------------------------------------------------------------- 1 | #' Convert MultiAssayExperiment object to list and data.frame 2 | #' 3 | #' @details Used by internal routines in netDx 4 | #' @param dat (MultiAssayExperiment) Patient data and metadata 5 | #' @param groupList (list) variable groupings used for feature construction. See groupList arg in buildPredictor(). 6 | #' @return (list) Keys are: 7 | #' 1) assays: list of matrices, each corresponding to data from a particular 8 | #' layer 9 | #' 2) pheno: (data.frame) sample metadata 10 | #' @examples 11 | #' data(xpr,pheno) 12 | #' require(MultiAssayExperiment) 13 | #' objlist <- list("RNA"=SummarizedExperiment(xpr)) 14 | #' mae <- MultiAssayExperiment(objlist,pheno) 15 | #' groupList <- list(RNA=rownames(xpr)) 16 | #' dl <- dataList2List(mae,groupList) 17 | #' summary(dl) 18 | #' @export 19 | dataList2List <- function(dat,groupList) { 20 | 21 | # convert assays to list of matrices, replacing assay-specific sample 22 | # name with patient ID 23 | exprs <- experiments(dat) 24 | datList2 <- list() 25 | for (k in seq_len(length(exprs))) { 26 | tmp <- exprs[[k]] 27 | df <- sampleMap(dat)[ 28 | which(sampleMap(dat)$assay==names(exprs)[k]),] 29 | 30 | colnames(tmp) <- df$primary[match(df$colname,colnames(tmp))] 31 | if ("SimpleList" %in% class(tmp)){ 32 | tmp <- as.matrix(assays(tmp)[[1]]) # convert to matrix 33 | } else if ("SummarizedExperiment" %in% class(tmp)){ 34 | tmp <- as.matrix(assays(tmp)[[1]]) 35 | } 36 | datList2[[names(exprs)[k]]]<- tmp 37 | } 38 | 39 | if ("clinical" %in% names(groupList)) { 40 | tmp <- colData(dat) 41 | vars <- unique(unlist(groupList[["clinical"]])) 42 | datList2[["clinical"]] <- t(as.matrix(tmp[,vars,drop=FALSE])) 43 | } 44 | 45 | pheno_all <- colData(dat) 46 | pheno_all <- as.data.frame(pheno_all) 47 | 48 | out <- list( 49 | assays=datList2, 50 | pheno=pheno_all) 51 | } 52 | -------------------------------------------------------------------------------- /R/getCorrType.R: -------------------------------------------------------------------------------- 1 | #' Counts the relative correlation of (+,+) and (+,-)(-,-) interactions 2 | #' 3 | #' @param inFile (character): path to interaction networks 4 | #' @param plusID (character) vector of + nodes 5 | #' @param minusID (character) vector of - nodes 6 | #' @return (numeric) mean edge weight for (+,+) and other edges 7 | getCorrType <- function(inFile, plusID, minusID) { 8 | dat <- read.delim(inFile, sep = "\t", header = FALSE, as.is = TRUE) 9 | pp_idx <- dat[, 1] %in% plusID & dat[, 2] %in% plusID 10 | pp_corr <- mean(dat[pp_idx, 3]) 11 | pm_corr <- mean(dat[setdiff(seq_len(nrow(dat)), pp_idx), 3]) 12 | 13 | return(c(pp_corr, pm_corr)) 14 | } 15 | -------------------------------------------------------------------------------- /R/getEmapInput_many.R: -------------------------------------------------------------------------------- 1 | #' Wrapper to generate multiple EnrichmentMaps (perhaps one per class) 2 | #' 3 | #' @param featScores (list) keys are classes, and values are data.frames of 4 | #' network scores across cross-validation (output of getFeatScores()). 5 | #' @param namedSets_valid (list) Grouped unit variables limited to the 6 | #' units contained in the dataset. e.g. keys are pathways and values are 7 | #' the genes measured in this dataset. 8 | #' e.g.: 9 | #' $`MISSPLICED_GSK3BETA_MUTANTS_STABILIZE_BETA-CATENIN` 10 | #' [1] 'PPP2R5E' 'PPP2CB' 'APC' 'AXIN1' 'PPP2R1B' 'PPP2R1A' 'CSNK1A1' 11 | #' [8] 'PPP2R5D' 'PPP2R5C' 'PPP2R5B' 'PPP2R5A' 'PPP2CA' 'GSK3B' 12 | #' @param netTypes (data.frame) 'inputNets.txt' file 13 | #' generated by NetDx. Dataframe has two columns, network type and 14 | #' network name. I.E: 15 | #' clinical clinical 16 | #' rna GUANOSINE_NUCLEOTIDES__I_DE_NOVO__I__BIOSYNTHESIS 17 | #' rna RETINOL_BIOSYNTHESIS 18 | #' @param outDir (char) path to output directory 19 | #' @param ... parameters for getEMapInput() 20 | #' @examples 21 | #' data(featScores) 22 | #' 23 | #' pathwayList <- readPathways(fetchPathwayDefinitions("October",2020)) 24 | #' pathwayList <- pathwayList[seq_len(5)] 25 | #' 26 | #' netInfoFile <- system.file("extdata","example_output/inputNets.txt",package="netDx") 27 | #' netTypes <- read.delim(netInfoFile,sep='\t',h=FALSE,as.is=TRUE) 28 | #' outDir <- paste(tempdir(),'plots',sep='/') 29 | #' if (!file.exists(outDir)) dir.create(outDir) 30 | #' EMap_input <- getEMapInput_many(featScores,pathwayList, 31 | #' netTypes,outDir=outDir) 32 | #' @return (list) of length g, where g is the number of groups in featScores. 33 | #' Values are lists, corresponding to the output of getEmapInput.R 34 | #' @export 35 | getEMapInput_many <- function(featScores, namedSets_valid, netTypes, 36 | outDir, ...) { 37 | 38 | out <- list() 39 | for (gp in names(featScores)) { 40 | cur_out_files <- getEMapInput(featScores[[gp]], namedSets_valid, 41 | netTypes, ...) 42 | out[[gp]] <- cur_out_files 43 | 44 | } 45 | return(out) 46 | } 47 | -------------------------------------------------------------------------------- /R/getNetConsensus.R: -------------------------------------------------------------------------------- 1 | #' compile net score across a set of predictor results 2 | #' 3 | #' @details used to compare how individual nets score for different 4 | #' predictor configurations 5 | #' @param scorelist (list) key is dataset name, value is a data.frame 6 | #' containing PATHWAY_NAME and SCORE. This is the output of 7 | #' compileFeatureScores() 8 | #' @return (data.frame) Rownames are union of all nets in the input list. 9 | #' Columns show net scores for each key of the input list. Where a 10 | #' net is not found in a given list, it is assigned the value of NA 11 | #' @examples 12 | #' pathways <- paste("PATHWAY_",1:100,sep="") 13 | #' highrisk <- list() 14 | #' for (k in 1:10) { 15 | #' highrisk[[k]] <- data.frame(PATHWAY_NAME=pathways, 16 | #' SCORE=runif(length(pathways),min=0,max=10), 17 | #' stringsAsFactors=FALSE); 18 | #' } 19 | #' names(highrisk) <- sprintf("Split%i",1:length(highrisk)) 20 | #' x <- getNetConsensus(highrisk) 21 | #' @export 22 | getNetConsensus <- function(scorelist) { 23 | out <- scorelist[[1]] 24 | colnames(out)[2] <- names(scorelist)[1] 25 | for (k in 2:length(scorelist)) { 26 | x <- merge(x = out, y = scorelist[[k]], by = "PATHWAY_NAME", 27 | all.x = TRUE, all.y = TRUE) 28 | colnames(x)[k + 1] <- names(scorelist)[k] 29 | out <- x 30 | } 31 | 32 | out 33 | } 34 | 35 | -------------------------------------------------------------------------------- /R/getRegionOL.R: -------------------------------------------------------------------------------- 1 | #' Returns overlapping named ranges for input ranges 2 | #' 3 | #' @details Given a set of query GRanges, and a subject list-of-GRanges, 4 | #' updates the query with a column 'LOCUS_NAMES' containing the names of 5 | #' ranges overlapped by the query. One application is to map structural 6 | #' variants, such as CNVs, to genes in pathways of interest. In this 7 | #' scenario \code{gr} would contain the patient CNVs, and \code{rngList} 8 | #' would be a list of GenomicRanges objects, one per cellular pathway. 9 | #' @param gr (GRanges) query ranges 10 | #' @param rngList (list) keys are names, and values are GRanges, each range 11 | #' of which has a name (in 'name' column). Note: It is faster to provide 12 | #' a list of length 1 ; if the list is long, combining into a single GRanges 13 | #' object could prove slow. 14 | #' @return (GRanges) query ranges with the added column 'LOCUS_NAMES'. 15 | #' Where a range overlaps with multiple loci, the names are reported as a 16 | #' comma-separated vector 17 | #' @examples 18 | #' data(cnv_GR,pathway_GR) 19 | #' x <- getRegionOL(cnv_GR,pathway_GR) 20 | #' @export 21 | #' @importFrom GenomeInfoDb seqlevels seqlevels<- 22 | #' @importFrom GenomicRanges GRanges 23 | #' @importFrom S4Vectors queryHits subjectHits 24 | getRegionOL <- function(gr, rngList) { 25 | rng <- GRanges() 26 | for (k in seq_len(length(rngList))) { 27 | cur <- rngList[[k]] 28 | seqlevels(rng) <- unique(c(seqlevels(rng), seqlevels(cur))) 29 | rng <- c(rng, cur) 30 | } 31 | 32 | tmp <- as.character(seqlevels(gr)) 33 | rng <- rng[which(as.character(seqnames(rng)) %in% tmp)] 34 | seqlevels(rng) <- seqlevels(gr) 35 | 36 | ol <- findOverlaps(gr, rng) 37 | ol <- cbind(queryHits(ol), subjectHits(ol)) 38 | 39 | # could be made more efficient. 40 | ol_nm <- rng$name[ol[, 2]] 41 | LOCUS_NAMES <- rep("", length(gr)) 42 | t0 <- Sys.time() 43 | for (k in unique(ol[, 1])) { 44 | idx <- which(ol[, 1] == k) 45 | LOCUS_NAMES[k] <- paste(unique(ol_nm[idx]), collapse = ",") 46 | } 47 | print(Sys.time() - t0) 48 | gr$LOCUS_NAMES = LOCUS_NAMES 49 | 50 | gr 51 | } 52 | -------------------------------------------------------------------------------- /R/getSimilarity.R: -------------------------------------------------------------------------------- 1 | #' Measures of patient similarity 2 | #' 3 | #' @param x (matrix) matrix for which pairwise patient similarity is to be 4 | #' computed. Expects one column per patient, and one measurement per row. 5 | #' @param type (character) name of similarity measure. Currently supports 6 | #' Pearson correlation ('pearson') or a custom measure ('custom') 7 | #' @param customFunc (function) custom similarity function. Only used when 8 | #' \code{type='custom'}. The function takes \code{x} as first argument and 9 | #' can take additional argument. It should return a symmetric matrix of 10 | #' pairwise patient similarities. 11 | #' @param ... parameter for customFunc 12 | #' @return symmetric matrix of size N, where N is number of samples 13 | #' @examples 14 | #' data(xpr) 15 | #' x <- getSimilarity(xpr) # similarity by Pearson corr 16 | #' mySim <- function(x) cor(x,method='kendall') 17 | #' x <- getSimilarity(xpr,customFunc=mySim) # custom similarity 18 | #' @importFrom stats cor 19 | #' @export 20 | getSimilarity <- function(x, type = "pearson", customFunc, ...) { 21 | switch(type, pearson = round(cor(na.omit(x), method = "pearson"), 22 | digits = 3), custom = customFunc(x, ...)) 23 | } 24 | -------------------------------------------------------------------------------- /R/globals.R: -------------------------------------------------------------------------------- 1 | 2 | # curSet - makePSN_NamedMatrix.R 3 | # spos - makePSN_RangedSets.R 4 | # groupList - dataList2List.R 5 | globalVariables(c("curSet","spos","groupList")) 6 | -------------------------------------------------------------------------------- /R/makeQueries.R: -------------------------------------------------------------------------------- 1 | #' Randomly select patients for queries for feature selection 2 | #' 3 | #' @param incPat (char) vector of patient IDs to be included in query 4 | #' @param featScoreMax (integer) Number of times to run query, usually equal 5 | #' to the max score for features in the design (e.g. if featScoreMax=10, then 6 | #' this value is 10). 7 | #' @param verbose (logical) print messages 8 | #' @return (list) of length \code{featScoreMax}, containing names of patients 9 | #' in query file for each fold 10 | #' @examples 11 | #' data(pheno) 12 | #' x <- makeQueries(pheno$ID) 13 | #' @export 14 | makeQueries <- function(incPat, featScoreMax = 10L, verbose = TRUE) { 15 | 16 | # randomly reorder for N-fold partitioning. 17 | incPat <- sample(incPat, replace = FALSE) 18 | # num in query file 19 | num2samp <- floor(((featScoreMax - 1)/featScoreMax) * length(incPat)) 20 | # num to retrieve from GM database in each iteration 21 | csize <- round((1/featScoreMax) * length(incPat)) 22 | 23 | if (verbose) { 24 | message(sprintf("\t\t%i IDs; %i queries (%i sampled, %i test)", 25 | length(incPat), 26 | featScoreMax, num2samp, csize)) 27 | } 28 | 29 | out <- list() 30 | for (k in seq_len(featScoreMax)) { 31 | sidx <- ((k - 1) * csize) + 1 32 | eidx <- k * csize 33 | if (k == featScoreMax) 34 | eidx <- length(incPat) 35 | p1 <- sprintf("\t\tQ%i: %i test; ", k, eidx - sidx + 1) 36 | 37 | out[[k]] <- setdiff(incPat, incPat[sidx:eidx]) 38 | if (verbose) 39 | message(sprintf("%s %i query", p1, length(out[[k]]))) 40 | } 41 | 42 | out 43 | } 44 | -------------------------------------------------------------------------------- /R/makeSymmetric.R: -------------------------------------------------------------------------------- 1 | #' Convert a network in source-target-weight format to symmetric matrix 2 | #' 3 | #' @details A common format for network representation is to use a three 4 | #' column table listing source node, target node, and weight. 5 | #' This is the format netDx uses for network integration and visualization 6 | #' in Cytoscape. However, some functionality requires a square symmetric 7 | #' adjacency matrix. This function takes as input the three-column format 8 | #' and converts to the adjacency matrix. 9 | #' NOTE: Symmetric attribute is assumed, and the function automatically sets 10 | #' a[i,j] = a[j,i]. Diagonal is assumed to have value of 1.0. Finally 11 | #' missing edges will be assigned NA values. 12 | #' @param x (data.frame) three columns, with source node, target node, and 13 | #' edge weight. Entries must include universe of nodes; those with missing 14 | #' edges must be included as having edge weight NA 15 | #' @param verbose (logical) print messages 16 | #' @return (matrix) symmetric adjacency matrix 17 | #' @examples 18 | #' src <- c("A","B"); tgt <- c("C","C") 19 | #' cur <- data.frame(source=src,target=tgt,weight=c(0.3,0.8)) 20 | #' makeSymmetric(cur) 21 | #' @export 22 | makeSymmetric <- function(x,verbose=FALSE) { 23 | samps <- unique(c(x[,1],x[,2])) 24 | newmat <- matrix(NA, nrow=length(samps),ncol=length(samps)) 25 | rownames(newmat) <- samps 26 | colnames(newmat) <- samps 27 | i <- 1 28 | for (k in samps) { 29 | idx <- which(x[,1] == k) 30 | if (verbose) message(k) 31 | for (curr in idx) { 32 | #message(paste("\t",x[curr,2])) 33 | j <- which(colnames(newmat) == x[curr,2]) 34 | newmat[i,j] <- x[curr,3] 35 | newmat[j,i] <- x[curr,3] 36 | } 37 | i <- i+1 38 | } 39 | 40 | diag(newmat) <- 1 41 | return(newmat) 42 | } 43 | 44 | -------------------------------------------------------------------------------- /R/mapNamedRangesToSets.R: -------------------------------------------------------------------------------- 1 | #' Map named ranges to corresponding set of named ranges 2 | #' 3 | #' @details Example application is when we have named ranges each 4 | #' corresponding to genes or regulatory elements, and we wish to group 5 | #' these ranges based on metabolic pathway. 6 | #' @param gr (GRanges) named ranged to be grouped 7 | #' @param rangeList (list) sets of range names 8 | #' @param verbose (logical) print detailed messages 9 | #' 10 | #' @return RangeList. keys are names of \code{rangeList}, values are GRanges 11 | #' @importFrom GenomicRanges GRanges 12 | #' @importFrom IRanges IRanges 13 | #' @examples 14 | #' data(genes,pathwayList); 15 | #' gene_GR<-GenomicRanges::GRanges(genes$chrom, 16 | #' IRanges::IRanges(genes$txStart,genes$txEnd), 17 | #' name=genes$name2) 18 | #' path_GRList <- mapNamedRangesToSets(gene_GR,pathwayList) 19 | #' @export 20 | mapNamedRangesToSets <- function(gr, rangeList, verbose = FALSE) { 21 | out <- list() 22 | for (nm in names(rangeList)) { 23 | my_gr <- gr[which(gr$name %in% rangeList[[nm]])] 24 | if (verbose) 25 | message(sprintf("%s: %i ranges\n", nm, length(my_gr))) 26 | out[[nm]] <- my_gr 27 | } 28 | out 29 | } 30 | -------------------------------------------------------------------------------- /R/matrix_getIJ.R: -------------------------------------------------------------------------------- 1 | #' Converts matrix index (1 to m*n) to row (m) and column (n) number 2 | #' 3 | #' @param dimMat (integer vector of length 2) output of \code{dim()} for 4 | #' matrix in question 5 | #' @param idx (integer vector of length n) matrix indices 6 | #' @return (matrix) n-by-2, first column has row indices ; second column 7 | #' has col indices 8 | matrix_getIJ <- function(dimMat, idx) { 9 | nr <- dimMat[1] 10 | nc <- dimMat[2] 11 | 12 | out <- matrix(NA, nrow = length(idx), ncol = 2) 13 | out[, 1] <- idx%%nr 14 | if (any(out[, 1] %in% 0)) { 15 | out[which(out[, 1] %in% 0)] <- nr 16 | } 17 | 18 | out[, 2] <- ceiling(idx/nr) 19 | 20 | out 21 | } 22 | -------------------------------------------------------------------------------- /R/moveInteractionNets.R: -------------------------------------------------------------------------------- 1 | #' moves interaction networks when compiling database for sparse genetic 2 | #' workflow 3 | #' 4 | #' @param netDir (char) source directory 5 | #' @param outDir (char) target directory 6 | #' @param pheno (data.frame) contains patient ID and STATUS 7 | #' @param fileSfx (char) suffix to strip from network file names before 8 | #' registering in metadata tables 9 | #' @return No value. Side effect of moving interaction nets to target 10 | #' directory and creating network-related metadata files used to compile 11 | #' feature database 12 | #' @importFrom utils write.table 13 | moveInteractionNets <- function(netDir,outDir,pheno,fileSfx="_cont.txt") { 14 | netList <- dir(path=netDir,pattern=fileSfx) 15 | netID <- data.frame(ID = seq_len(length(netList)), 16 | name = netList, ID = seq_len(length(netList)), 17 | name2 = netList, 0, 1, stringsAsFactors = TRUE) 18 | dir.create(paste(netDir,"INTERACTIONS",sep=getFileSep())) 19 | for (p in netList) { 20 | dat <- read.delim(paste(netDir,p,sep=getFileSep()), 21 | sep="\t", 22 | header=FALSE,as.is=TRUE) 23 | dat2 <- dat 24 | dat2[,1] <- pheno$INTERNAL_ID[match(dat[,1],pheno$ID)] 25 | dat2[,2] <- pheno$INTERNAL_ID[match(dat[,2],pheno$ID)] 26 | write.table(dat2, 27 | file=paste(netDir,"INTERACTIONS", 28 | sprintf("1.%i.txt",netID$ID[which(netID$name == p)]),sep=getFileSep()), 29 | sep="\t",col.names=FALSE,row.names=FALSE,quote=FALSE) 30 | } 31 | 32 | # write NETWORKS.txt 33 | write.table(netID, file = paste(netDir,"NETWORKS.txt",sep=getFileSep()), 34 | sep = "\t", col.names = FALSE, 35 | row.names = FALSE, quote = FALSE) 36 | 37 | # write NETWORK_GROUPS.txt 38 | con <- file(paste(netDir,"NETWORK_GROUPS.txt", sep=getFileSep()), "w") 39 | write(paste(1, "dummy_group", "geneset_1", "dummy_group", 1, sep = "\t"), 40 | file = con) 41 | close(con) 42 | 43 | con <- file(paste(netDir,"NETWORK_METADATA.txt",sep=getFileSep()), "w") 44 | tmp <- paste(netID$ID, "", "", "", "", "", "", "", 45 | "", "", 0, "", "", 0, "", 46 | "", "", "", "", sep = "\t") 47 | write.table(tmp, file = con, sep = "\t", col.names = FALSE, 48 | row.names = FALSE, 49 | quote = FALSE) 50 | close(con) 51 | } 52 | -------------------------------------------------------------------------------- /R/perfCalc.R: -------------------------------------------------------------------------------- 1 | #' Computes variety of predictor evaluation measures based on the confusion 2 | #' matrix 3 | #' 4 | #' @param dat (data.frame): 5 columns: score, tp, fp, tn, fn. 5 | #' One row per cutoff 6 | #' score for feature selection 7 | #' @return (list) 8 | #' stats (data.frame): score, f1, ppv, precision and recall. One row 9 | #' per cutoff for feature selection 10 | #' auc (numeric between 0 and 1): AUC of overall ROC curve 11 | #' prauc (numeric between 0 and 1): AUC of overall precision-recall curve 12 | #' @importFrom pracma trapz 13 | #' @examples 14 | #' data(confmat) 15 | #' x <- perfCalc(confmat) 16 | #' @export 17 | perfCalc <- function(dat) { 18 | dat <- na.omit(dat) 19 | # F1 - harmonic mean of precision recall resolves to the formula below 20 | tp2 <- 2 * dat$tp 21 | f1 <- tp2/(tp2 + dat$fp + dat$fn) 22 | 23 | # precision recall curve 24 | 25 | # precision = positive predictive value (pr = ppv) 26 | ppv <- dat$tp/(dat$tp + dat$fp) 27 | rec <- dat$tp/(dat$tp + dat$fn) 28 | # trapz integrates from right to left, so you need to apply rev() 29 | # otherwise you 30 | 31 | # get a negative area. 32 | prauc <- pracma::trapz(rev(rec), rev(ppv)) 33 | 34 | # roc auc 35 | x <- dat$fp/(dat$fp + dat$tn) 36 | y <- dat$tp/(dat$tp + dat$fn) 37 | 38 | x <- c(0, rev(x), 1) 39 | y <- c(0, rev(y), 1) 40 | auc <- pracma::trapz(x, y) 41 | out <- data.frame(score = dat$score, ppv = ppv, f1 = f1, rec = rec) 42 | 43 | return(list(stats = out, auc = auc, prauc = prauc)) 44 | } 45 | -------------------------------------------------------------------------------- /R/plotPerf_simple.R: -------------------------------------------------------------------------------- 1 | #' performance metrics for model 2 | #' @param res (data.frame) result from predicting labels on held-out test set. output of predict() function. 3 | #' columns include ID, STATUS (ground truth) and PRED_CLASS (predicted label) 4 | #' @param predClasses (character) patient labels used by classifier 5 | #' @return (list) 6 | #' 1) rocCurve: ROCR performance object for ROC curve 7 | #' 2) prCurve: ROCR performance object for PR curve 8 | #' 3) auroc: Area under ROC curve 9 | #' 4) aupr: Area under PR curve 10 | #' 5) accuracy: Accuracy 11 | #' @import ROCR 12 | #' @export 13 | getPerformance <- function(res, predClasses) { 14 | 15 | # given output of performance('precall') compute AUC-PR 16 | prauc <- function(res) { 17 | x <- res@x.values[[1]] # recall 18 | y <- res@y.values[[1]] # precision 19 | 20 | # remove NAN 21 | idx <- which(is.nan(y)) 22 | if (any(idx)) { 23 | x <- x[-idx] 24 | y <- y[-idx] 25 | } 26 | 27 | pracma::trapz(x, y) 28 | } 29 | 30 | pred_col1 <- sprintf("%s_SCORE", predClasses[1]) 31 | pred_col2 <- sprintf("%s_SCORE", predClasses[2]) 32 | 33 | idx1 <- which(colnames(res) == pred_col1) 34 | idx2 <- which(colnames(res) == pred_col2) 35 | pred <- ROCR::prediction(res[, idx1] - res[, idx2], 36 | res$STATUS == predClasses[1]) 37 | 38 | st <- res$STATUS 39 | c1 <- predClasses[1] 40 | tp <- sum(res$STATUS == res$PRED_CLASS & res$STATUS == c1) 41 | tn <- sum(res$STATUS == res$PRED_CLASS & res$STATUS != c1) 42 | fp <- sum(res$STATUS != res$PRED_CLASS & res$STATUS != c1) 43 | fn <- sum(res$STATUS != res$PRED_CLASS & res$STATUS == c1) 44 | # entire curves 45 | curRoc <- ROCR::performance(pred, "tpr", "fpr") 46 | curPr <- ROCR::performance(pred, "prec", "rec") 47 | tmp <- data.frame(score = 0, tp = tp, tn = tn, fp = fp, fn = fn) 48 | 49 | # statistic 50 | auroc <- ROCR::performance(pred, "auc")@y.values[[1]] 51 | aupr <- prauc(curPr) 52 | corr <- sum(res$STATUS == res$PRED_CLASS) 53 | acc <- (corr/nrow(res))*100 54 | 55 | return(list(rocCurve=curRoc,prCurve=curPr,auroc=auroc,aupr=aupr,accuracy=acc)) 56 | } -------------------------------------------------------------------------------- /R/predictPatientLabels.R: -------------------------------------------------------------------------------- 1 | #' assign patient class when ranked by multiple GM predictors 2 | #' 3 | #' @param resSet (list) output of getPatientRankings, each key for a different 4 | #' predictor. names(resSet) contain predictor label 5 | #' @param verbose (logical) print detailed messages 6 | #' @return data.frame: ID, similarityScore, PRED_CLASS 7 | #' @examples 8 | #' data(predRes); predClass <- predictPatientLabels(predRes) 9 | #' @export 10 | predictPatientLabels <- function(resSet, verbose = TRUE) { 11 | type_rank <- NULL 12 | for (k in seq_len(length(resSet))) { 13 | x <- resSet[[k]]$fullmat 14 | idx <- which(colnames(x) == "GM_score") 15 | if (any(idx)) 16 | colnames(x)[idx] <- "similarityScore" 17 | if (is.null(type_rank)) 18 | type_rank <- x[, c("ID", "similarityScore")] else { 19 | if (all.equal(x$ID, type_rank$ID) != TRUE) { 20 | stop("predictPatientLabels: ids don't match") 21 | } 22 | type_rank <- cbind(type_rank, x[, "similarityScore"]) 23 | } 24 | rnkCol <- paste(names(resSet)[k], "SCORE", sep = "_") 25 | colnames(type_rank)[ncol(type_rank)] <- rnkCol 26 | } 27 | 28 | na_sum <- rowSums(is.na(type_rank[, -1])) 29 | if (verbose) { 30 | if (any(na_sum > 0)) 31 | message(sprintf(paste("*** %i rows have an NA prediction ", 32 | "(probably query samples that were not not ranked\n", 33 | sep = ""), sum(na_sum > 0))) 34 | } 35 | type_rank <- na.omit(type_rank) 36 | 37 | # finally, select the class with the highest rank as the subject label. 38 | maxScore <- rep(NA, nrow(type_rank)) 39 | for (k in seq_len(nrow(type_rank))) { 40 | maxScore[k] <- colnames(type_rank)[which.max(type_rank[k, -1]) + 1] 41 | } 42 | patClass <- sub("_SCORE", "", maxScore) 43 | type_rank <- cbind(type_rank, PRED_CLASS = patClass) 44 | type_rank$PRED_CLASS <- as.character(type_rank$PRED_CLASS) 45 | 46 | type_rank 47 | } 48 | -------------------------------------------------------------------------------- /R/pruneNetByStrongest.R: -------------------------------------------------------------------------------- 1 | #' Prune network by retaining strongest edges 2 | #' 3 | #' @param net (data.frame) Network to prune. Columns are: source,target,weight 4 | #' @param vertices (char) node names. Should match those in net[,1:2] 5 | #' @param pctX (numeric 0 to 1) Fraction of top/bottom edges to retain 6 | #' @param useTop (logical) if TRUE prunes to top pctX edges; else 7 | #' prunes to bottom pctX edges 8 | #' @return (data.frame) pruned network. Three columns: AliasA, AliasB, and 9 | #' weight 10 | #' @importFrom igraph graph_from_data_frame 11 | #' @importFrom igraph delete.edges 12 | #' @importFrom igraph get.edgelist 13 | #' @importFrom igraph edge_attr 14 | #' @importFrom igraph E 15 | #' @export 16 | pruneNet <- function(net,vertices, pctX=0.1, useTop=TRUE) { 17 | g <- igraph::graph_from_data_frame(net,vertices=vertices) 18 | wt <- sort(E(g)$weight, decreasing=TRUE) 19 | 20 | if (useTop) { # keep topmost edges 21 | thresh <- wt[length(wt) * pctX] 22 | g2 <- delete.edges(g,which(E(g)$weight < thresh)) 23 | 24 | } else { # keep bottom-most edges 25 | thresh <- wt[length(wt) * (1-pctX)] 26 | g2 <- delete.edges(g,which(E(g)$weight > thresh)) 27 | } 28 | 29 | df <- as.data.frame(get.edgelist(g2)) 30 | df[,1] <- as.character(df[,1]) 31 | df[,2] <- as.character(df[,2]) 32 | df$weight <- edge_attr(g2,name="weight") 33 | colnames(df) <- c("AliasA","AliasB","weight") 34 | 35 | return(df) 36 | } 37 | 38 | -------------------------------------------------------------------------------- /R/pruneNet_pctX.R: -------------------------------------------------------------------------------- 1 | #' Prune network by retaining strongest edges 2 | #' 3 | #' @param net (data.frame) Network to prune. Columns are: source,target,weight 4 | #' @param vertices (char) node names. Should match those in net[,1:2] 5 | #' @param pctX (numeric 0 to 1) Fraction of top/bottom edges to retain 6 | #' @param useTop (logical) if TRUE prunes to top pctX edges; else 7 | #' prunes to bottom pctX edges 8 | #' @return (data.frame) pruned network. Three columns: AliasA, AliasB, and 9 | #' weight 10 | #' @importFrom igraph graph_from_data_frame 11 | #' @importFrom igraph delete.edges 12 | #' @importFrom igraph get.edgelist 13 | #' @importFrom igraph edge_attr 14 | #' @importFrom igraph E 15 | #' @export 16 | pruneNet_pctX <- function(net,vertices, pctX=0.1, useTop=TRUE) { 17 | g <- igraph::graph_from_data_frame(net,vertices=vertices) 18 | wt <- sort(E(g)$weight, decreasing=TRUE) 19 | 20 | if (useTop) { # keep topmost edges 21 | thresh <- wt[length(wt) * pctX] 22 | g2 <- delete.edges(g,which(E(g)$weight < thresh)) 23 | 24 | } else { # keep bottom-most edges 25 | thresh <- wt[length(wt) * (1-pctX)] 26 | g2 <- delete.edges(g,which(E(g)$weight > thresh)) 27 | } 28 | 29 | df <- as.data.frame(get.edgelist(g2)) 30 | df[,1] <- as.character(df[,1]) 31 | df[,2] <- as.character(df[,2]) 32 | df$weight <- edge_attr(g2,name="weight") 33 | colnames(df) <- c("AliasA","AliasB","weight") 34 | 35 | return(df) 36 | } 37 | 38 | -------------------------------------------------------------------------------- /R/runFeatureSelection.R: -------------------------------------------------------------------------------- 1 | #' Run GeneMANIA cross-validation with a provided subset of networks 2 | #' 3 | #' @details Creates query files, runs GM for 10-fold cross validation. 4 | #' @param trainID_pred (char) vector with universe of predictor class 5 | #' patients (ie all that can possibly be included in the query file 6 | #' @param outDir (char) directory to store query file and GM results 7 | #' @param dbPath (char) path to GeneMANIA generic database with 8 | #' training population 9 | #' @param numTrainSamps (integer) number of training samples in total 10 | #' leave blank to use 5 training samples in order to save memory 11 | #' @param incNets (char) vector of networks to include in this analysis 12 | #' (features/pathway names). Useful for subset-based feature selection 13 | #' @param orgName (char) organism name for GeneMANIA generic database. 14 | #' The default value will likely never need to be changed. 15 | #' @param fileSfx (char) file suffix 16 | #' @param verbose (logical) print messages 17 | #' @param numCores (logical) num parallel threads for cross-validation 18 | #' @param JavaMemory (integer) memory for GeneMANIA run, in Gb. 19 | #' @param verbose_runQuery (logical) print messages for runQuery() 20 | #' @param debugMode (logical) when TRUE runs jobs in serial instead of parallel and 21 | #' prints verbose messages. Also prints system Java calls and prints all standard out 22 | #' and error output associated with these calls. 23 | #' @param ... args for \code{makeQueries()} 24 | #' @return No value. Side effect of generating feature scores. 25 | #' @examples 26 | #' data(MB.pheno) 27 | #' dbPath <- system.file("extdata","dbPath",package="netDx") 28 | #' runFeatureSelection(MB.pheno$ID[which(MB.pheno$STATUS%in% 'WNT')], 29 | #' tempdir(),dbPath,103L) 30 | #' @export 31 | runFeatureSelection <- function(trainID_pred, outDir, dbPath, 32 | numTrainSamps = NULL, incNets = "all", orgName = "predictor", 33 | fileSfx = "CV", verbose = FALSE, numCores = 2L, 34 | JavaMemory = 6L, verbose_runQuery = FALSE, debugMode=FALSE, ...) { 35 | 36 | if (!file.exists(outDir)) 37 | dir.create(outDir) 38 | 39 | # get query names 40 | if (verbose) 41 | message("\tWriting queries:\n") 42 | qSamps <- makeQueries(trainID_pred, verbose = verbose, ...) 43 | 44 | # write query files 45 | for (m in seq_len(length(qSamps))) { 46 | qFile <- paste(outDir,sprintf("%s_%i.query", fileSfx, m), 47 | sep=getFileSep()) 48 | if (is.null(numTrainSamps)) { 49 | numTrainSamps = 5 50 | message("Memory saver option: using 5 training samples for CV") 51 | } 52 | 53 | writeQueryFile(qSamps[[m]], incNets, numTrainSamps, qFile, orgName) 54 | } 55 | qFiles <- list() 56 | for (m in seq_len(length(qSamps))) { 57 | qFile <- paste(outDir,sprintf("%s_%i.query", fileSfx, m), 58 | sep=getFileSep()) 59 | qFiles <- append(qFiles, qFile) 60 | } 61 | 62 | runQuery(dbPath, qFiles, outDir, JavaMemory = JavaMemory, 63 | verbose = verbose_runQuery, 64 | numCores = numCores,debugMode=debugMode) 65 | 66 | } 67 | -------------------------------------------------------------------------------- /R/runProfileToNetworks.R: -------------------------------------------------------------------------------- 1 | #' Convert profiles to interaction networks before integration 2 | #' 3 | #' @details In preparation for network integration. When using GeneMANIA's 4 | #' built-in functionality to create PSN using ProfileToNetworkDriver, this 5 | #' step needs to run to process profiles to networks. These are currently used 6 | #' for Pearson correlation-based networks and those using mutual information. 7 | #' @param netDir (char) directory with .profile files 8 | #' @param outDir (char) path to directory where interaction networks are to be printed 9 | #' @param simMetric (char) similarity measure to use in converting 10 | #' profiles to interaction networks. 11 | #' @param numCores (integer) number of cores for parallel processing 12 | #' @param P2N_threshType (char) Most users shouldn't have to change this. 13 | #' ProfileToNetworkDriver's threshold option. One of 'off|auto'. 14 | #' unit testing 15 | #' @param P2N_maxMissing (integer 5-100) 16 | #' @param JavaMemory (integer) Memory for GeneMANIA (in Gb) 17 | #' @param GM_jar (char) path to GeneMANIA jar file 18 | #' @param netSfx (char) pattern for finding network files in \code{netDir}. 19 | #' @param debugMode (logical) if TRUE runs profile generation in serial 20 | #' rather than parallel, allowing debugging 21 | #' @return No value. Side effect of creating interaction networks in outDir. 22 | #' @export 23 | convertProfileToNetworks <- function(netDir,outDir=tempdir(), 24 | simMetric="pearson",numCores=1L, 25 | JavaMemory=4L,GM_jar=NULL,P2N_threshType="off",P2N_maxMissing=100, 26 | netSfx="txt$",debugMode=FALSE) { 27 | 28 | if (is.null(GM_jar)) GM_jar <- getGMjar_path() 29 | 30 | cl <- makeCluster(numCores, 31 | outfile = paste(netDir, "P2N_log.txt",sep=getFileSep())) 32 | registerDoParallel(cl) 33 | 34 | if (simMetric == "pearson") { 35 | corType <- "PEARSON" 36 | } else if (simMetric == "MI") { 37 | corType <- "MUTUAL_INFORMATION" 38 | } 39 | 40 | args <- c(sprintf("-Xmx%iG", JavaMemory), "-cp", GM_jar) 41 | args <- c(args, 42 | paste("org.genemania.engine.core.", 43 | "evaluation.ProfileToNetworkDriver",sep="")) 44 | args <- c(args, c("-proftype", "continuous", "-cor", corType)) 45 | args <- c(args, c("-threshold", P2N_threshType, 46 | "-maxmissing", 47 | sprintf("%1.1f", P2N_maxMissing))) 48 | profDir <- netDir 49 | tmpsfx <- sub("\\$", "", netSfx) 50 | 51 | curProf <- "" 52 | 53 | `%myinfix%` <- ifelse(debugMode, `%do%`, `%dopar%`) 54 | foreach(curProf = dir(path = profDir, pattern = "profile$")) %myinfix% { 55 | if (debugMode) print(curProf) 56 | args2 <- c("-in", paste(profDir, curProf,sep=getFileSep())) 57 | args2 <- c(args2, "-out", paste(outDir, 58 | sub(".profile", ".txt", curProf),sep=getFileSep())) 59 | args2 <- c(args2, "-syn", 60 | paste(netDir,"..","1.synonyms",sep=getFileSep()), 61 | "-keepAllTies", "-limitTies") 62 | 63 | if (debugMode) stdout <- "" else stdout <- NULL 64 | system2("java", args = c(args, args2), wait = TRUE, 65 | stdout = stdout) 66 | } 67 | 68 | tmp <- dir(path=outDir,pattern="txt$")[1] 69 | tmp <- sprintf("%s/%s",outDir,tmp) 70 | if (sum(grepl(pattern=",",readLines(tmp,n=1))>0)) { # detect comma 71 | replacePattern(path=outDir,fileType="txt$") 72 | } 73 | stopCluster(cl) 74 | 75 | } 76 | -------------------------------------------------------------------------------- /R/runQuery.R: -------------------------------------------------------------------------------- 1 | #' Run a query 2 | #' 3 | #' @param dbPath (char) path to directory with GeneMANIA generic database 4 | #' @param queryFiles (list(char)) paths to query files 5 | #' @param resDir (char) path to output directory 6 | #' @param verbose (logical) print messages 7 | #' @param JavaMemory (integer) Memory for GeneMANIA (in Gb) - a total of 8 | #' numCores*GMmemory will be used and distributed for all GM threads 9 | #' @param numCores (integer) number of CPU cores for parallel processing 10 | #' @param debugMode (logical) when TRUE runs jobs in serial instead of parallel and 11 | #' prints verbose messages. Also prints system Java calls. 12 | #' @return (char) path to GeneMANIA query result files with patient similarity 13 | #' rankings (*PRANK) and feature weights (*NRANK) 14 | #' of results file 15 | #' @examples 16 | #' dbPath <- system.file("extdata","dbPath",package="netDx") 17 | #' queryFile <- system.file("extdata","GM_query.txt",package="netDx") 18 | #' runQuery(dbPath, queryFile,tempdir()) 19 | #' @export 20 | runQuery <- function(dbPath, queryFiles, resDir, verbose = TRUE, 21 | JavaMemory = 6L, numCores = 1L,debugMode=FALSE) { 22 | 23 | GM_jar <- getGMjar_path() 24 | qBase <- basename(queryFiles[[1]][1]) 25 | logFile <- paste(resDir,sprintf("%s.log",qBase)) 26 | queryStrings <- paste(queryFiles, collapse = " ") 27 | 28 | args <- c() 29 | java_ver <- suppressWarnings(system2("java", 30 | args="--version",stdout=TRUE,stderr=NULL)) 31 | if (any(grep(" 11",java_ver)) || any(grep(" 12",java_ver)) || any(grep(" 13",java_ver)) || any(grep(" 14",java_ver)) || any(grep(" 16",java_ver))) { 32 | if (verbose) message("Java 11 or later detected") 33 | } else { 34 | if (verbose) message("Java 8 detected") 35 | args <- c(args,"-d64") 36 | } 37 | 38 | args <- c(args, sprintf("-Xmx%iG", JavaMemory * numCores), "-cp", GM_jar) 39 | args <- c(args, "org.genemania.plugin.apps.QueryRunner") 40 | args <- c(args, "--data", dbPath, "--in", "flat", "--out", "flat") 41 | args <- c(args, "--threads", numCores, "--results", resDir, 42 | unlist(queryFiles)) 43 | args <- c(args, "--netdx-flag", "true") #,'2>1','/dev/null') 44 | 45 | # file is not actually created - is already split in PRANK and 46 | # NRANK segments on 47 | # GeneMANIA side 48 | resFile <- paste(resDir,sprintf("%s-results.report.txt",qBase), 49 | sep=getFileSep()) 50 | t0 <- Sys.time() 51 | if (debugMode) { 52 | message(sprintf("java %s",paste(args,collapse=" "))) 53 | system2("java", args, wait = TRUE) 54 | } else { 55 | system2("java", args, wait = TRUE, stdout = NULL, stderr = NULL) 56 | } 57 | if (verbose) 58 | message(sprintf("QueryRunner time taken: %1.1f s", Sys.time() - t0)) 59 | Sys.sleep(3) 60 | return(resFile) 61 | } 62 | -------------------------------------------------------------------------------- /R/simpleCap.R: -------------------------------------------------------------------------------- 1 | #' simple capitalization 2 | #' @details used to format feature names so they are not in all-caps 3 | #' @param x (char) name 4 | #' @return (char) Changes case so start of each word is in upper-case, and 5 | #' the rest is in lowercase 6 | #' @examples simpleCap('this IS a TEST sEnTenCe') 7 | #' @export 8 | simpleCap <- function(x) { 9 | x <- tolower(x) 10 | s <- strsplit(x, " ")[[1]] 11 | x <- paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", 12 | collapse = " ") 13 | x 14 | } 15 | -------------------------------------------------------------------------------- /R/splitTestTrain_resampling.R: -------------------------------------------------------------------------------- 1 | #' Split samples into train/test 2 | #' 3 | #' @param pheno_DF (data.frame) patient information 4 | #' Must contain the following columns: 5 | #' 1. ID: (char) patient IDs 6 | #' 2. STATUS: (char) patient classes. Values not equal to \code{predClass} 7 | #' will be considered as 'other' 8 | #' Expects rows with unique IDs 9 | #' @param pctT (numeric between 0 and 1) Fraction of patients to randomly 10 | #' assign to the training set. The remainder will be used for blind test 11 | #' set 12 | #' @param verbose (logical) print messages 13 | #' @return (char) vector of length \code{nrow(pheno_DF)}, with values of 14 | #' 'TRAIN' or 'TEST'. The order corresponds to pheno_DF; a patient labelled 15 | #' 'TRAIN' has been assigned to the training set, and one labelled 'TEST' 16 | #' as been assigned to the test set. 17 | #' @examples 18 | #' data(pheno) 19 | #' x <- splitTestTrain(pheno) 20 | #' @export 21 | splitTestTrain <- function(pheno_DF, pctT = 0.7, verbose = FALSE) { 22 | 23 | lvls <- unique(pheno_DF$STATUS) 24 | IS_TRAIN <- rep("TEST", nrow(pheno_DF)) 25 | for (lv in lvls) { 26 | idx <- which(pheno_DF$STATUS %in% lv) 27 | IS_TRAIN[sample(idx, floor(pctT * length(idx)), FALSE)] <- "TRAIN" 28 | } 29 | 30 | IS_TRAIN <- factor(IS_TRAIN, levels = c("TRAIN", "TEST")) 31 | 32 | pheno_DF <- cbind(pheno_DF, IS_TRAIN = IS_TRAIN) 33 | if (verbose) 34 | print(table(pheno_DF[, c("STATUS", "IS_TRAIN")])) 35 | 36 | return(IS_TRAIN) 37 | } 38 | -------------------------------------------------------------------------------- /R/updateNets.R: -------------------------------------------------------------------------------- 1 | #' Synchronize patient set in sample table and network table. 2 | #' 3 | #' @details This function is useful in applications with highly missing 4 | #' data or where each patient contributes data points not present in the 5 | #' others; e.g. networks based on individual 6 | #' patient CNVs, which are highly sparse. In such a scenario, any kind of 7 | #' patient subsetting - for example, limiting to training samples - changes 8 | #' the population of eligible networks for analysis. Networks that no longer 9 | #' have samples, or that have one patient with the neighbour removed, have 10 | #' to be excluded. This function updates networks and patients so that 11 | #' each network contains at least two patients and only patients in 12 | #' networks are retained. In other words, it keeps pheno_DF and p_net in 13 | #' sync. 14 | #' @param p_net (matrix) rows are patients, columns are networks. 15 | #' a[i,j] = 1 if patient i occurs in network j, else 0. 16 | #' @param pheno_DF (data.frame) patient ID and STATUS. 17 | #' @param writeNewNets (logical) if TRUE writes new networks to 18 | #' \code{newNetDir}. 19 | #' @param oldNetDir (char) path to directory with networks to be updated 20 | #' @param newNetDir (char) path to directory where updated networks are 21 | #' to be written 22 | #' @param verbose (logical) print messages 23 | #' @param ... passed to pruneNets() 24 | #' @return list with updated p_net and pheno_DF. pheno_DF will contain IDs 25 | #' in the updated p_net. p_net will contain only those networks with 26 | #' 2+ patients and those patients present in 1+ network. 27 | #' @export 28 | #' @examples 29 | #' data(npheno) 30 | #' netDir <- system.file("extdata","example_nets",package="netDx") 31 | #' netmat <- countPatientsInNet(netDir,dir(netDir,pattern='txt$'), npheno[,1]) 32 | #' x <- updateNets(netmat, npheno,writeNewNets=FALSE) 33 | updateNets <- function(p_net, pheno_DF, writeNewNets = TRUE, oldNetDir, 34 | newNetDir, verbose = TRUE, ...) { 35 | idx <- which(colSums(p_net) >= 2) 36 | p_net <- p_net[, idx] 37 | idx <- which(rowSums(p_net) >= 1) 38 | p_net <- p_net[idx, ] 39 | if (verbose) { 40 | message("Update: (num patients) x (num networks)") 41 | print(dim(p_net)) 42 | } 43 | 44 | # training samples are only those that occur in label-enriched networks 45 | pheno_DF <- pheno_DF[which(pheno_DF$ID %in% rownames(p_net)), ] 46 | 47 | if (writeNewNets) { 48 | pruneNets(oldNetDir, newNetDir, filterNets = colnames(p_net), 49 | filterIDs = rownames(p_net), ...) 50 | } 51 | 52 | return(list(p_net = p_net, pheno_DF = pheno_DF)) 53 | } 54 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | #' platform-specific file separator 2 | #' 3 | #' @description Returns OS-specific file separator 4 | #' @return (char) "\\" if Windows, else "/" 5 | #' @examples 6 | #' getFileSep() 7 | #' @export 8 | getFileSep <- function(){ 9 | if (.Platform$OS.type=="windows") return("\\") 10 | else return(.Platform$file.sep) 11 | } 12 | 13 | #' Generate random alphanumerical string of length 10 14 | #' 15 | #' @details Used to create multiple temporary directories during an R session 16 | #' @param numStrings (integer) number of strings to generate 17 | #' @return vector of length n, each with 10-char alphanumerical strings 18 | #' @examples 19 | #' randAlphanumString() 20 | #' @export 21 | randAlphanumString <- function(numStrings = 1L) { 22 | a <- do.call(paste0, replicate(5, sample(LETTERS, numStrings, TRUE), FALSE)) 23 | paste0(a, sprintf("%04d", sample(9999, numStrings, TRUE)), 24 | sample(LETTERS, numStrings, TRUE)) 25 | } 26 | -------------------------------------------------------------------------------- /R/writeQueryBatchFile.R: -------------------------------------------------------------------------------- 1 | #' Write batch.txt file required to create GeneMANIA database 2 | #' 3 | #' @details This file is used to compile features into a single database 4 | #' for feature selection. 5 | #' @param netDir (char) path to dir with networks 6 | #' @param netList (char) vector of network names 7 | #' @param outDir (char) directory to write batch file 8 | #' @param idFile (char) path to file with patient IDs 9 | #' @param orgName (char) organism name. Don't change the default unless 10 | #' you know what you are doing. 11 | #' @param orgDesc (char) organism description. Similar to \code{orgName}, 12 | #' don't change the default 13 | #' @param orgAlias (char) organism alias. Similar to \code{orgName}, don't 14 | #' change the default. 15 | #' @param taxID (integer) taxonomyID required for GeneMANIA . Similar to 16 | #' \code{orgName}, don't change the default. 17 | #' @return No value. Side effect of writing batch file to 18 | #' \code{/batch.txt}. 19 | #' @export 20 | #' @examples 21 | #' data(npheno) 22 | #' netDir <- system.file("extdata","example_nets",package="netDx") 23 | #' netList <- dir(netDir,pattern='txt$') 24 | #' writeQueryBatchFile(netDir,netList, tempdir(), npheno$ID) 25 | writeQueryBatchFile <- function(netDir, netList, outDir = tempdir(), idFile, 26 | orgName = "predictor", 27 | orgDesc = "my_predictor", orgAlias = "my_predictor", taxID = 1339) { 28 | 29 | outF <- paste(outDir,"batch.txt",sep=getFileSep()) 30 | fileConn <- file(outF, "w") 31 | 32 | # organism info 33 | tmp <- c("#organism", "id", "file", "name", "description", "alias", 34 | "taxonomyid") 35 | tmp2 <- c("organism", basename(idFile), orgName, orgDesc, orgAlias, 36 | as.character(taxID)) 37 | writeLines(sprintf("%s", paste(tmp, collapse = "\t")), con = fileConn) 38 | writeLines(sprintf("%s\n", paste(tmp2, collapse = "\t")), con = fileConn) 39 | rm(tmp, tmp2) 40 | 41 | # group info 42 | groupName <- "dummy_group" 43 | groupCode <- "geneset_1" 44 | groupDesc <- "dummy_group" 45 | tmp <- c("#group", "name", "code", "description", "RRGGBB colour", 46 | "organism") 47 | tmp2 <- c("group", groupName, groupCode, groupDesc, "ff00ff", orgName) 48 | writeLines(sprintf("%s", paste(tmp, collapse = "\t")), con = fileConn) 49 | writeLines(sprintf("%s\n", paste(tmp2, collapse = "\t")), con = fileConn) 50 | rm(tmp, tmp2) 51 | 52 | # network info - header 53 | tmp <- c("#network", "filename", "name", "description", "group code") 54 | writeLines(sprintf("%s", paste(tmp, collapse = "\t")), fileConn) 55 | rm(tmp) 56 | close(fileConn) 57 | 58 | # write networks 59 | net_DF <- data.frame(type = "network", filename = netList, 60 | name = sub(".txt", 61 | "", netList), description = netList, groupCode = groupCode) 62 | write.table(net_DF, file = outF, sep = "\t", col.names = FALSE, 63 | row.names = FALSE, 64 | quote = FALSE, append = TRUE) 65 | } 66 | -------------------------------------------------------------------------------- /R/writeQueryFile.R: -------------------------------------------------------------------------------- 1 | #' Wrapper to write GeneMANIA query file 2 | #' 3 | #' @param qSamps (char) vector of patient IDs in query 4 | #' @param incNets (char) vector of networks to include in this analysis 5 | #' (features/pathway names). Useful for subset-based feature selection 6 | #' @param numReturn (integer) number of patients to return in ranking file 7 | #' @param outFile (char) path to output file 8 | #' @param orgName (char) organism name 9 | #' @return No value. Side effect of writing the query file to 10 | #' \code{outFile} 11 | #' @examples 12 | #' data(pheno) 13 | #' writeQueryFile(pheno$ID[seq_len(5)], 'all',nrow(pheno), 'myquery.txt') 14 | #' @export 15 | writeQueryFile <- function(qSamps, incNets = "all", numReturn = 1L, outFile, 16 | orgName = "predictor") { 17 | fileConn <- file(outFile, "w") 18 | writeLines(sprintf("%s", orgName), con = fileConn) # org name 19 | writeLines(sprintf("%s", paste(qSamps, collapse = "\t")), con = fileConn) 20 | # networks 21 | writeLines(sprintf("%s", paste(incNets, collapse = "\t")), con = fileConn) 22 | writeLines(sprintf("%i", numReturn), con = fileConn) #num2return 23 | writeLines("automatic", con = fileConn) # combining 24 | close(fileConn) 25 | } 26 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onLoad <- function(libname, pkgname) { 2 | options(java.parameters = c("-Xmx10G")) 3 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Update: Sep 2021: netDx is now maintained at [https://github.com/realpailab/netdx](https://github.com/realpailab/netdx). 3 | ## Development in this repo now frozen. 4 | 5 | netDx is a general-purpose algorithm for building patient classifiers by using patient similarity networks as features. It excels at interpretability and handling missing data. It also allows custom grouping rules for features, notably grouping genes into pathways. It integrates with RCy3 for network visualization of predictive pathways. 6 | 7 | As of February 2020, netDx is available via the BioConductor repository. 8 | Visit http://bioconductor.org/packages/release/bioc/html/netDx.html to install the package and see worked examples. 9 | 10 | Contact Shraddha Pai at shraddha.pai@utoronto.ca in case of questions. 11 | 12 | References: 13 | 14 | 1. Pai S, Hui S, Isserlin R, Shah MA, Kaka H and GD Bader (2019). netDx: Interpretable patient classification using patient similarity networks. *Mol Sys Biol*. 15: e8497. [Read the paper here](https://www.embopress.org/doi/full/10.15252/msb.20188497). 15 | 2. Pai S, Weber P, Isserlin R, Kaka H, Hui S, Shah MA, Giudice L, Giugno R, Nøhr AK, Baumbach J, GD Bader (2021). netDx: Software for building interpretable patient classifiers by multi-'omic data integration using patient similarity networks. *F1000 Research*. 9:1239. 16 | -------------------------------------------------------------------------------- /data/MB.pheno.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/MB.pheno.rda -------------------------------------------------------------------------------- /data/cnv_GR.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/cnv_GR.rda -------------------------------------------------------------------------------- /data/cnv_TTstatus.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/cnv_TTstatus.rda -------------------------------------------------------------------------------- /data/cnv_netPass.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/cnv_netPass.rda -------------------------------------------------------------------------------- /data/cnv_netScores.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/cnv_netScores.rda -------------------------------------------------------------------------------- /data/cnv_patientNetCount.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/cnv_patientNetCount.rda -------------------------------------------------------------------------------- /data/cnv_pheno.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/cnv_pheno.rda -------------------------------------------------------------------------------- /data/confmat.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/confmat.rda -------------------------------------------------------------------------------- /data/featScores.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/featScores.rda -------------------------------------------------------------------------------- /data/genes.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/genes.rda -------------------------------------------------------------------------------- /data/modelres.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/modelres.rda -------------------------------------------------------------------------------- /data/npheno.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/npheno.rda -------------------------------------------------------------------------------- /data/pathwayList.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/pathwayList.rda -------------------------------------------------------------------------------- /data/pathway_GR.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/pathway_GR.rda -------------------------------------------------------------------------------- /data/pheno.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/pheno.rda -------------------------------------------------------------------------------- /data/pheno_full.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/pheno_full.rda -------------------------------------------------------------------------------- /data/predRes.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/predRes.rda -------------------------------------------------------------------------------- /data/silh.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/silh.rda -------------------------------------------------------------------------------- /data/toymodel.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/toymodel.rda -------------------------------------------------------------------------------- /data/xpr.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/xpr.rda -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | c(bibentry(bibtype = "Article", 2 | key = "netDx-methods", 3 | title = "{netDx: interpretable patient classification using integrated patient similarity networks}", 4 | author = c( 5 | person("Shraddha", "Pai"), 6 | person("Shirley", "Hui"), 7 | person("Ruth", "Isserlin"), 8 | person(c("Muhammad","A"), "Shah"), 9 | person("Hussam","Kaka"), 10 | person(c("Gary", "D."), "Bader") 11 | ), 12 | year = 2019, 13 | journal = "Molecular Systems Biology", 14 | volume = "15", 15 | pages = "e8497", 16 | doi = "10.15252/msb.20188497", 17 | pubmed = "30872331", 18 | header = "The netDx algorithm is described in:"), 19 | bibentry(bibtype = "Article", 20 | key = "netDx-software", 21 | title = "{netDx: Software for building interpretable patient classifiers by multi-'omic data integration using patient similarity networks}", 22 | author = c( 23 | person("Shraddha", "Pai"), 24 | person("Philipp", "Weber"), 25 | person("Ruth", "Isserlin"), 26 | person("Hussam","Kaka"), 27 | person("Shirley", "Hui"), 28 | person(c("Muhammad","A"), "Shah"), 29 | person("Luca", "Giudice"), 30 | person("Rosalba", "Giugno"), 31 | person(c("Anne","Krogh"), "Nøhr"), 32 | person("Jan", "Baumbach"), 33 | person(c("Gary", "D."), "Bader") 34 | ), 35 | year = 2021, 36 | journal = "F1000Research", 37 | volume = "9", 38 | pages = "1239", 39 | doi = "10.12688/f1000research.26429.2", 40 | pubmed = "33628435", 41 | header = "The netDx package is described in:") 42 | 43 | ) -------------------------------------------------------------------------------- /inst/extdata/GM_NRANK/CV_1.query-results.report.txt.NRANK: -------------------------------------------------------------------------------- 1 | #This Report has been generated with a netDx-specific version of GeneMania v3.5. 2 | Network Weight 3 | GUANOSINE_NUCLEOTIDES__I_DE_NOVO__I__BIOSYNTHESIS.profile 56.37 4 | MUCIN_CORE_1_AND_CORE_2__I_O__I_-GLYCOSYLATION.profile 31.50 5 | RETINOL_BIOSYNTHESIS.profile 12.13 6 | 7 | -------------------------------------------------------------------------------- /inst/extdata/GM_NRANK/CV_2.query-results.report.txt.NRANK: -------------------------------------------------------------------------------- 1 | #This Report has been generated with a netDx-specific version of GeneMania v3.5. 2 | Network Weight 3 | GUANOSINE_NUCLEOTIDES__I_DE_NOVO__I__BIOSYNTHESIS.profile 58.25 4 | MUCIN_CORE_1_AND_CORE_2__I_O__I_-GLYCOSYLATION.profile 31.57 5 | RETINOL_BIOSYNTHESIS.profile 10.19 6 | 7 | -------------------------------------------------------------------------------- /inst/extdata/GM_query.txt: -------------------------------------------------------------------------------- 1 | predictor 2 | MB.128 MB.145 MB.147 MB.15 MB.178 3 | all 4 | 103 5 | automatic 6 | -------------------------------------------------------------------------------- /inst/extdata/INSTALL/Dockerfile: -------------------------------------------------------------------------------- 1 | # use Dockerized R ("Rocker") as parent image 2 | FROM ubuntu 3 | 4 | USER root 5 | 6 | 7 | ENV DEBIAN_FRONTEND=noninteractive 8 | ENV TZ 'America/New York' 9 | 10 | # R pre-requisites 11 | RUN echo $TZ > /etc/timezone && \ 12 | apt-get update && \ 13 | apt-get install -y tzdata && \ 14 | apt-get install -y --no-install-recommends \ 15 | gfortran \ 16 | r-base \ 17 | openjdk-8-jre \ 18 | gcc make g++ \ 19 | zlib1g-dev libssl-dev libssh2-1-dev libcurl4-openssl-dev \ 20 | liblapack-dev liblapack3 libopenblas-base libopenblas-dev \ 21 | libxml2-dev 22 | # && apt-get clean && \ 23 | # rm -rf /var/lib/apt/lists/* 24 | 25 | 26 | RUN echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > ~/.Rprofile 27 | RUN Rscript -e "install.packages(c('devtools','curl','bigmemory','foreach','combinat','doParallel','ROCR','pracma','RColorBrewer','reshape2','ggplot2', 'caroline', 'rmarkdown'))" 28 | RUN Rscript -e "source('http://bioconductor.org/biocLite.R');biocLite(c('Biobase','GenomicRanges', 'RCy3'))"; 29 | RUN Rscript -e "install.packages(c('pheatmap','RColorBrewer','gProfileR','ggplot2','glmnet','igraph'))" 30 | RUN Rscript -e "devtools::install_github('cytoscape/r2cytoscape')" 31 | 32 | # python required for using genemania, pandoc for compiling the html vignettes 33 | RUN apt-get install -y python2.7 python-pip vim git pandoc 34 | 35 | # move netDx package and code 36 | WORKDIR /examples 37 | ADD . /examples 38 | 39 | # clone the most recent netDx version and install the R package 40 | RUN git clone https://github.com/BaderLab/netDx.git 41 | RUN cd netDx && R CMD INSTALL netDx 42 | RUN cd netDx && R CMD INSTALL netDx.examples 43 | -------------------------------------------------------------------------------- /inst/extdata/INSTALL/INSTALL_OSX.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Script to automate dependency install for netDx 4 | 5 | echo "* Checking if Java installed ..." 6 | if java -version 2>&1 > /dev/null | grep -q "java version" ; then 7 | echo -e "\tdone." 8 | else { 9 | echo -e "*** ERROR: Java not found; install (https://www.java.com/en/download/) or add to path" 10 | exit 0; 11 | } 12 | fi 13 | 14 | echo "* Checking if Python installed ..." 15 | if [[ $(python --version 2>&1) ]] 16 | then 17 | echo -e "\tdone" 18 | else { 19 | echo -e "*** ERROR: Python not found; install (https://www.python.org/downloads/) or add to path" 20 | exit 0; 21 | } 22 | fi 23 | 24 | echo "* Checking if R installed ..." 25 | if R --version | grep -q "R version" ; 26 | then 27 | ver=`R --version | grep "R version" | cut -f 3 -d " "` 28 | echo -e "\tversion found: $ver" 29 | ver1=`echo $ver | cut -f1 -d"."` 30 | ver2=`echo $ver | cut -f2 -d"."` 31 | if [ $ver1 -ge 3 ] && [ $ver2 -ge 6 ]; then 32 | echo -e "\tdone" 33 | else { 34 | echo "" 35 | echo -e "\t*** ERROR: Version 3.6+ of R required. Install from https://cran.r-project.org/, or add to path" 36 | exit 0 37 | } 38 | fi 39 | else { 40 | echo -e "\t*** ERROR: R not found. Install R 3.6+ from https://cran.r-project.org/, or add to path" 41 | exit 0; 42 | } 43 | fi 44 | 45 | # install R packages 46 | echo "* Installing R dependencies" 47 | echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > ~/.Rprofile 48 | 49 | declare -a PKGS=( devtools curl bigmemory foreach combinat doParallel ROCR pracma RColorBrewer reshape2 ggplot2 caroline rmarkdown igraph glmnet ); 50 | for p in ${PKGS[@]};do 51 | echo -e "\t* Checking for $p" 52 | Rscript -e "if(!requireNamespace(\"$p\",quietly=TRUE)){ install.packages(\"$p\")}" 53 | done 54 | 55 | echo "* Installing BioConductor if required" 56 | Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)){install.packages("BiocManager")}' 57 | 58 | echo "* Installing BioConductor dependencies if required" 59 | declare -a PKGS=( GenomicRanges RCy3 ); 60 | for p in ${PKGS[@]};do 61 | echo -e "\t* Checking for $p" 62 | Rscript -e "if(!requireNamespace(\"$p\",quietly=TRUE)){ BiocManager::install(\"$p\")}" 63 | done 64 | 65 | echo "* Checking if pandoc installed (needed to run tutorials) ..." 66 | if pandoc -v | grep -q "^pandoc " ; 67 | then 68 | ver=`pandoc -v | grep "^pandoc " | cut -f 2 -d " "` 69 | echo -e "\tversion found: $ver" 70 | ver1=`echo $ver | cut -f1 -d"."` 71 | ver2=`echo $ver | cut -f2 -d"."` 72 | if [ $ver1 -ge 2 ] ; then 73 | echo -e "\tdone" 74 | else { 75 | echo "" 76 | echo -e "\t*** Version 1.12.3+ of pandoc not found! Installing..." 77 | curl -L https://github.com/jgm/pandoc/releases/download/2.7.2/pandoc-2.7.2-macOS.pkg -o pandoc.pkg 78 | sudo installer -pkg pandoc.pkg -target / 79 | } 80 | fi 81 | else { 82 | echo -e "\t*** Version 1.12.3+ of pandoc not found! Installing..." 83 | curl -L https://github.com/jgm/pandoc/releases/download/2.7.2/pandoc-2.7.2-macOS.pkg -o pandoc.pkg 84 | sudo installer -pkg pandoc.pkg -target / 85 | } 86 | fi 87 | 88 | cd .. 89 | echo "* Installing netDx" 90 | R CMD INSTALL netDx 91 | -------------------------------------------------------------------------------- /inst/extdata/INSTALL/INSTALL_Unix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Script to automate dependency install for netDx 4 | 5 | echo "* Installing Unix dependencies" 6 | ENV TZ 'America/New York' 7 | echo $TZ > /etc/timezone && \ 8 | apt-get update && \ 9 | apt-get install -y tzdata && \ 10 | apt-get install -y --no-install-recommends \ 11 | gfortran \ 12 | r-base \ 13 | openjdk-8-jre \ 14 | gcc make g++ \ 15 | zlib1g-dev libssl-dev libssh2-1-dev libcurl4-openssl-dev \ 16 | liblapack-dev liblapack3 libopenblas-base libopenblas-dev \ 17 | libxml2-dev 18 | 19 | echo "* Checking if Java installed ..." 20 | if java -version 2>&1 > /dev/null | grep -q "java version" ; then 21 | echo -e "\tdone." 22 | else { 23 | echo -e "*** ERROR: Java not found; install (https://www.java.com/en/download/) or add to path" 24 | exit 0; 25 | } 26 | fi 27 | 28 | echo "* Checking if R installed ..." 29 | if R --version | grep -q "R version" ; 30 | then 31 | ver=`R --version | grep "R version" | cut -f 3 -d " "` 32 | echo -e "\tversion found: $ver" 33 | ver1=`echo $ver | cut -f1 -d"."` 34 | ver2=`echo $ver | cut -f2 -d"."` 35 | if [ $ver1 -ge 3 ] && [ $ver2 -ge 6 ]; then 36 | echo -e "\tdone" 37 | else { 38 | echo "" 39 | echo -e "\t*** ERROR: Version 3.6+ of R required. Install from https://cran.r-project.org/, or add to path" 40 | echo -e "\t*** If upgrading, install r-base and r-base-dev" 41 | echo -e "\t*** Visit https://cran.r-project.org/bin/linux/ubuntu/README.html for details" 42 | exit 0 43 | } 44 | fi 45 | else { 46 | echo -e "\t*** ERROR: R not found. Install R 3.6+ from https://cran.r-project.org/, or add to path" 47 | exit 0; 48 | } 49 | fi 50 | 51 | # install R packages 52 | echo "* Installing R dependencies" 53 | echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > ~/.Rprofile 54 | 55 | declare -a PKGS=( devtools curl bigmemory foreach combinat doParallel ROCR pracma RColorBrewer reshape2 ggplot2 caroline rmarkdown igraph glmnet ); 56 | for p in ${PKGS[@]};do 57 | echo -e "\t* Checking for $p" 58 | Rscript -e "if(!requireNamespace(\"$p\",quietly=TRUE)){ install.packages(\"$p\")}" 59 | done 60 | 61 | echo "* Installing BioConductor if required" 62 | Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)){install.packages("BiocManager")}' 63 | 64 | echo "* Installing BioConductor dependencies if required" 65 | declare -a PKGS=( GenomicRanges RCy3 ); 66 | for p in ${PKGS[@]};do 67 | echo -e "\t* Checking for $p" 68 | Rscript -e "if(!requireNamespace(\"$p\",quietly=TRUE)){ BiocManager::install(\"$p\")}" 69 | done 70 | 71 | cd .. 72 | echo "* Installing netDx" 73 | R CMD INSTALL netDx 74 | -------------------------------------------------------------------------------- /inst/extdata/TGCT_mutSmooth_pheno.txt: -------------------------------------------------------------------------------- 1 | ID STATUS 2 | TCGA.2G.AAF4 EARLY 3 | TCGA.2G.AAF8 EARLY 4 | TCGA.2G.AAFH EARLY 5 | TCGA.2G.AAFI EARLY 6 | TCGA.2G.AAFL EARLY 7 | TCGA.2G.AAFM EARLY 8 | TCGA.2G.AAFO EARLY 9 | TCGA.2G.AAFY EARLY 10 | TCGA.2G.AAG8 EARLY 11 | TCGA.2G.AAGA EARLY 12 | TCGA.2G.AAGJ EARLY 13 | TCGA.2G.AAGP EARLY 14 | TCGA.2G.AAGX EARLY 15 | TCGA.2G.AAH3 EARLY 16 | TCGA.2G.AAH4 EARLY 17 | TCGA.2G.AAHA EARLY 18 | TCGA.2G.AAHG EARLY 19 | TCGA.2G.AAHL EARLY 20 | TCGA.2G.AAHN EARLY 21 | TCGA.2G.AAKG EARLY 22 | TCGA.2G.AAKH EARLY 23 | TCGA.2G.AAKL EARLY 24 | TCGA.2G.AAKM EARLY 25 | TCGA.2G.AAL5 EARLY 26 | TCGA.2X.A9D5 EARLY 27 | TCGA.2X.A9D6 EARLY 28 | TCGA.4K.AA1H EARLY 29 | TCGA.4K.AA1I EARLY 30 | TCGA.S6.A8JX EARLY 31 | TCGA.SB.A6J6 EARLY 32 | TCGA.SO.A8JP EARLY 33 | TCGA.VF.A8A8 EARLY 34 | TCGA.VF.A8A9 EARLY 35 | TCGA.VF.A8AA EARLY 36 | TCGA.VF.A8AB EARLY 37 | TCGA.VF.A8AC EARLY 38 | TCGA.VF.A8AD EARLY 39 | TCGA.VF.A8AE EARLY 40 | TCGA.WZ.A7V3 EARLY 41 | TCGA.WZ.A7V4 EARLY 42 | TCGA.WZ.A7V5 EARLY 43 | TCGA.XE.A8H4 EARLY 44 | TCGA.XE.A9SE EARLY 45 | TCGA.XE.AANR EARLY 46 | TCGA.XE.AANV EARLY 47 | TCGA.XE.AAO3 EARLY 48 | TCGA.XE.AAO4 EARLY 49 | TCGA.XE.AAO6 EARLY 50 | TCGA.XE.AAOC EARLY 51 | TCGA.XE.AAOD EARLY 52 | TCGA.XE.AAOF EARLY 53 | TCGA.XE.AAOL EARLY 54 | TCGA.XY.A89B EARLY 55 | TCGA.XY.A8S2 EARLY 56 | TCGA.YU.A90P EARLY 57 | TCGA.YU.A90Q EARLY 58 | TCGA.YU.A90S EARLY 59 | TCGA.YU.A90W EARLY 60 | TCGA.YU.A94I EARLY 61 | TCGA.ZM.AA05 EARLY 62 | TCGA.ZM.AA06 EARLY 63 | TCGA.ZM.AA0B EARLY 64 | TCGA.ZM.AA0D EARLY 65 | TCGA.ZM.AA0E EARLY 66 | TCGA.ZM.AA0F EARLY 67 | TCGA.ZM.AA0N EARLY 68 | TCGA.2G.AAFN LATE 69 | TCGA.2G.AAFZ LATE 70 | TCGA.2G.AAG9 LATE 71 | TCGA.2G.AAGG LATE 72 | TCGA.2G.AAGN LATE 73 | TCGA.2G.AAGS LATE 74 | TCGA.2G.AAGZ LATE 75 | TCGA.2G.AAH8 LATE 76 | TCGA.2G.AAKD LATE 77 | TCGA.2G.AALP LATE 78 | TCGA.YU.A90Y LATE 79 | TCGA.YU.A912 LATE 80 | TCGA.YU.A94D LATE 81 | TCGA.YU.AA4L LATE 82 | -------------------------------------------------------------------------------- /inst/extdata/dbPath/1/_0.cfs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/1/_0.cfs -------------------------------------------------------------------------------- /inst/extdata/dbPath/1/metadata.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | predictor 5 | 1 6 | my_predictor 7 | 8 | -------------------------------------------------------------------------------- /inst/extdata/dbPath/1/segments.gen: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/1/segments.gen -------------------------------------------------------------------------------- /inst/extdata/dbPath/1/segments_2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/1/segments_2 -------------------------------------------------------------------------------- /inst/extdata/dbPath/base/_0.cfs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/base/_0.cfs -------------------------------------------------------------------------------- /inst/extdata/dbPath/base/segments.gen: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/base/segments.gen -------------------------------------------------------------------------------- /inst/extdata/dbPath/base/segments_2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/base/segments_2 -------------------------------------------------------------------------------- /inst/extdata/dbPath/cache/CORE/1/1.ser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/1.ser -------------------------------------------------------------------------------- /inst/extdata/dbPath/cache/CORE/1/2.ser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/2.ser -------------------------------------------------------------------------------- /inst/extdata/dbPath/cache/CORE/1/3.ser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/3.ser -------------------------------------------------------------------------------- /inst/extdata/dbPath/cache/CORE/1/4.ser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/4.ser -------------------------------------------------------------------------------- /inst/extdata/dbPath/cache/CORE/1/DatasetInfo.ser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/DatasetInfo.ser -------------------------------------------------------------------------------- /inst/extdata/dbPath/cache/CORE/1/attributeGroups.ser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/attributeGroups.ser -------------------------------------------------------------------------------- /inst/extdata/dbPath/cache/CORE/1/networkIds.ser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/networkIds.ser -------------------------------------------------------------------------------- /inst/extdata/dbPath/cache/CORE/1/nodeIds.ser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/nodeIds.ser -------------------------------------------------------------------------------- /inst/extdata/dbPath/genemania.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | org.genemania.data.lucene.LuceneDataSet 4 | custom 5 | compact 6 | 7 | -------------------------------------------------------------------------------- /inst/extdata/dbPath/user/segments.gen: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/user/segments.gen -------------------------------------------------------------------------------- /inst/extdata/dbPath/user/segments_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/user/segments_1 -------------------------------------------------------------------------------- /inst/extdata/example_nets/BIG_CASE.txt: -------------------------------------------------------------------------------- 1 | P1 P2 1 2 | P1 P3 1 3 | P1 P4 1 4 | P1 P5 1 5 | P1 P6 1 6 | P1 P7 1 7 | P1 P8 1 8 | P1 P9 1 9 | P1 P10 1 10 | P2 P3 1 11 | P2 P4 1 12 | P2 P5 1 13 | P2 P6 1 14 | P2 P7 1 15 | P2 P8 1 16 | P2 P9 1 17 | P2 P10 1 18 | P3 P4 1 19 | P3 P5 1 20 | P3 P6 1 21 | P3 P7 1 22 | P3 P8 1 23 | P3 P9 1 24 | P3 P10 1 25 | P4 P5 1 26 | P4 P6 1 27 | P4 P7 1 28 | P4 P8 1 29 | P4 P9 1 30 | P4 P10 1 31 | P5 P6 1 32 | P5 P7 1 33 | P5 P8 1 34 | P5 P9 1 35 | P5 P10 1 36 | P6 P7 1 37 | P6 P8 1 38 | P6 P9 1 39 | P6 P10 1 40 | P7 P8 1 41 | P7 P9 1 42 | P7 P10 1 43 | P8 P9 1 44 | P8 P10 1 45 | P9 P10 1 46 | -------------------------------------------------------------------------------- /inst/extdata/example_nets/BIG_CONTROL.txt: -------------------------------------------------------------------------------- 1 | P101 P102 1 2 | P101 P103 1 3 | P101 P104 1 4 | P101 P105 1 5 | P101 P106 1 6 | P101 P107 1 7 | P101 P108 1 8 | P101 P109 1 9 | P101 P110 1 10 | P102 P103 1 11 | P102 P104 1 12 | P102 P105 1 13 | P102 P106 1 14 | P102 P107 1 15 | P102 P108 1 16 | P102 P109 1 17 | P102 P110 1 18 | P103 P104 1 19 | P103 P105 1 20 | P103 P106 1 21 | P103 P107 1 22 | P103 P108 1 23 | P103 P109 1 24 | P103 P110 1 25 | P104 P105 1 26 | P104 P106 1 27 | P104 P107 1 28 | P104 P108 1 29 | P104 P109 1 30 | P104 P110 1 31 | P105 P106 1 32 | P105 P107 1 33 | P105 P108 1 34 | P105 P109 1 35 | P105 P110 1 36 | P106 P107 1 37 | P106 P108 1 38 | P106 P109 1 39 | P106 P110 1 40 | P107 P108 1 41 | P107 P109 1 42 | P107 P110 1 43 | P108 P109 1 44 | P108 P110 1 45 | P109 P110 1 46 | -------------------------------------------------------------------------------- /inst/extdata/example_nets/BOTH_EQUAL.txt: -------------------------------------------------------------------------------- 1 | P1 P2 1 2 | P1 P3 1 3 | P1 P4 1 4 | P1 P5 1 5 | P1 P101 1 6 | P1 P102 1 7 | P1 P103 1 8 | P1 P104 1 9 | P1 P105 1 10 | P2 P3 1 11 | P2 P4 1 12 | P2 P5 1 13 | P2 P101 1 14 | P2 P102 1 15 | P2 P103 1 16 | P2 P104 1 17 | P2 P105 1 18 | P3 P4 1 19 | P3 P5 1 20 | P3 P101 1 21 | P3 P102 1 22 | P3 P103 1 23 | P3 P104 1 24 | P3 P105 1 25 | P4 P5 1 26 | P4 P101 1 27 | P4 P102 1 28 | P4 P103 1 29 | P4 P104 1 30 | P4 P105 1 31 | P5 P101 1 32 | P5 P102 1 33 | P5 P103 1 34 | P5 P104 1 35 | P5 P105 1 36 | P101 P102 1 37 | P101 P103 1 38 | P101 P104 1 39 | P101 P105 1 40 | P102 P103 1 41 | P102 P104 1 42 | P102 P105 1 43 | P103 P104 1 44 | P103 P105 1 45 | P104 P105 1 46 | -------------------------------------------------------------------------------- /inst/extdata/example_nets/MOSTLY_CASE.txt: -------------------------------------------------------------------------------- 1 | P1 P2 1 2 | P1 P3 1 3 | P1 P4 1 4 | P1 P5 1 5 | P1 P6 1 6 | P1 P7 1 7 | P1 P101 1 8 | P1 P102 1 9 | P1 P103 1 10 | P2 P3 1 11 | P2 P4 1 12 | P2 P5 1 13 | P2 P6 1 14 | P2 P7 1 15 | P2 P101 1 16 | P2 P102 1 17 | P2 P103 1 18 | P3 P4 1 19 | P3 P5 1 20 | P3 P6 1 21 | P3 P7 1 22 | P3 P101 1 23 | P3 P102 1 24 | P3 P103 1 25 | P4 P5 1 26 | P4 P6 1 27 | P4 P7 1 28 | P4 P101 1 29 | P4 P102 1 30 | P4 P103 1 31 | P5 P6 1 32 | P5 P7 1 33 | P5 P101 1 34 | P5 P102 1 35 | P5 P103 1 36 | P6 P7 1 37 | P6 P101 1 38 | P6 P102 1 39 | P6 P103 1 40 | P7 P101 1 41 | P7 P102 1 42 | P7 P103 1 43 | P101 P102 1 44 | P101 P103 1 45 | P102 P103 1 46 | -------------------------------------------------------------------------------- /inst/extdata/example_nets/SMALL_CASE.txt: -------------------------------------------------------------------------------- 1 | P1 P2 1 2 | P1 P3 1 3 | P2 P3 1 4 | -------------------------------------------------------------------------------- /inst/extdata/example_nets/SMALL_CONTROL.txt: -------------------------------------------------------------------------------- 1 | P101 P102 1 2 | P101 P103 1 3 | P102 P103 1 4 | -------------------------------------------------------------------------------- /inst/extdata/genemania.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | org.genemania.data.lucene.LuceneDataSet 4 | custom 5 | compact 6 | 7 | -------------------------------------------------------------------------------- /inst/extdata/pathway_ex3.gmt: -------------------------------------------------------------------------------- 1 | STEARATE BIOSYNTHESIS I (ANIMALS)%HUMANCYC%PWY-5972 stearate biosynthesis I (animals) ELOVL1 ACOT7 ACSL1 ACSL5 ELOVL6 ACSL4 ACSL3 ACOT2 ACOT1 ACSBG1 ACSBG2 SLC27A2 ACOT4 2 | PUTRESCINE DEGRADATION III%HUMANCYC%PWY-0 putrescine degradation III ALDH3A2 ALDH3B2 ALDH3A1 ALDH1B1 MAOB ALDH2 MAOA ALDH3B1 SAT2 SAT1 3 | TRYPTOPHAN DEGRADATION III (EUKARYOTIC)%HUMANCYC%TRYPTOPHAN-DEGRADATION-1 tryptophan degradation III (eukaryotic) ACAT1 HADHB GCDH TDO2 KYNU HAAO AFMID KMO ACAA1 ACAT2 ACMSD 4 | -------------------------------------------------------------------------------- /inst/extdata/plots/SURVIVENO.gmt: -------------------------------------------------------------------------------- 1 | Abacavir_transport_and_metabolism Abacavir_transport_and_metabolism Abacavir_transport_and_metabolism 2 | Androgen_biosynthesis Androgen_biosynthesis Androgen_biosynthesis 3 | Aquaporin-mediated_transport Aquaporin-mediated_transport Aquaporin-mediated_transport 4 | Bile_salt_and_organic_anion_slc_transporters Bile_salt_and_organic_anion_slc_transporters Bile_salt_and_organic_anion_slc_transporters 5 | Calnexin_calreticulin_cycle Calnexin_calreticulin_cycle Calnexin_calreticulin_cycle 6 | Class_c_3__metabotropic_glutamate_pheromone_receptors_ Class_c_3__metabotropic_glutamate_pheromone_receptors_ Class_c_3__metabotropic_glutamate_pheromone_receptors_ 7 | Hormone_ligand-binding_receptors Hormone_ligand-binding_receptors Hormone_ligand-binding_receptors 8 | Metabolism_of_folate_and_pterines Metabolism_of_folate_and_pterines Metabolism_of_folate_and_pterines 9 | Metabolism_of_water-soluble_vitamins_and_cofactors Metabolism_of_water-soluble_vitamins_and_cofactors Metabolism_of_water-soluble_vitamins_and_cofactors 10 | Platelet_adhesion_to_exposed_collagen Platelet_adhesion_to_exposed_collagen Platelet_adhesion_to_exposed_collagen 11 | Pou5f1__oct4_,_sox2,_nanog_activate_genes_related_to_proliferation Pou5f1__oct4_,_sox2,_nanog_activate_genes_related_to_proliferation Pou5f1__oct4_,_sox2,_nanog_activate_genes_related_to_proliferation 12 | Regulation_of_gene_expression_by_hypoxia-inducible_factor Regulation_of_gene_expression_by_hypoxia-inducible_factor Regulation_of_gene_expression_by_hypoxia-inducible_factor 13 | Regulation_of_ifna_signaling Regulation_of_ifna_signaling Regulation_of_ifna_signaling 14 | Thyroxine_biosynthesis Thyroxine_biosynthesis Thyroxine_biosynthesis 15 | Vasopressin_regulates_renal_water_homeostasis_via_aquaporins Vasopressin_regulates_renal_water_homeostasis_via_aquaporins Vasopressin_regulates_renal_water_homeostasis_via_aquaporins 16 | Vitamin_b5__pantothenate__metabolism Vitamin_b5__pantothenate__metabolism Vitamin_b5__pantothenate__metabolism 17 | -------------------------------------------------------------------------------- /inst/extdata/plots/SURVIVENO_nodeAttrs.txt: -------------------------------------------------------------------------------- 1 | netName maxScore netType 2 | ABACAVIR_TRANSPORT_AND_METABOLISM 10 rna 3 | ANDROGEN_BIOSYNTHESIS 8 rna 4 | AQUAPORIN-MEDIATED_TRANSPORT 9 rna 5 | BILE_SALT_AND_ORGANIC_ANION_SLC_TRANSPORTERS 10 rna 6 | CALNEXIN_CALRETICULIN_CYCLE 9 rna 7 | CLASS_C_3__METABOTROPIC_GLUTAMATE_PHEROMONE_RECEPTORS_ 9 rna 8 | HORMONE_LIGAND-BINDING_RECEPTORS 4 rna 9 | METABOLISM_OF_FOLATE_AND_PTERINES 9 rna 10 | METABOLISM_OF_WATER-SOLUBLE_VITAMINS_AND_COFACTORS 10 rna 11 | PLATELET_ADHESION_TO_EXPOSED_COLLAGEN 10 rna 12 | POU5F1__OCT4_,_SOX2,_NANOG_ACTIVATE_GENES_RELATED_TO_PROLIFERATION 9 rna 13 | REGULATION_OF_GENE_EXPRESSION_BY_HYPOXIA-INDUCIBLE_FACTOR 8 rna 14 | REGULATION_OF_IFNA_SIGNALING 10 rna 15 | THYROXINE_BIOSYNTHESIS 10 rna 16 | VASOPRESSIN_REGULATES_RENAL_WATER_HOMEOSTASIS_VIA_AQUAPORINS 9 rna 17 | VITAMIN_B5__PANTOTHENATE__METABOLISM 9 rna 18 | -------------------------------------------------------------------------------- /inst/extdata/plots/SURVIVEYES.gmt: -------------------------------------------------------------------------------- 1 | Activation_of_the_pre-replicative_complex Activation_of_the_pre-replicative_complex Activation_of_the_pre-replicative_complex 2 | Androgen_biosynthesis Androgen_biosynthesis Androgen_biosynthesis 3 | Biocarta_stem_pathway Biocarta_stem_pathway Biocarta_stem_pathway 4 | Calnexin_calreticulin_cycle Calnexin_calreticulin_cycle Calnexin_calreticulin_cycle 5 | Defects_in_cobalamin__b12__metabolism Defects_in_cobalamin__b12__metabolism Defects_in_cobalamin__b12__metabolism 6 | Defects_in_vitamin_and_cofactor_metabolism Defects_in_vitamin_and_cofactor_metabolism Defects_in_vitamin_and_cofactor_metabolism 7 | Fgfr2_ligand_binding_and_activation Fgfr2_ligand_binding_and_activation Fgfr2_ligand_binding_and_activation 8 | Gamma-carboxylation,_transport,_and_amino-terminal_cleavage_of_proteins Gamma-carboxylation,_transport,_and_amino-terminal_cleavage_of_proteins Gamma-carboxylation,_transport,_and_amino-terminal_cleavage_of_proteins 9 | Glypican_1_network Glypican_1_network Glypican_1_network 10 | Hedgehog_ligand_biogenesis Hedgehog_ligand_biogenesis Hedgehog_ligand_biogenesis 11 | Hedgehog_off_state Hedgehog_off_state Hedgehog_off_state 12 | Metabolism_of_folate_and_pterines Metabolism_of_folate_and_pterines Metabolism_of_folate_and_pterines 13 | Metabolism_of_water-soluble_vitamins_and_cofactors Metabolism_of_water-soluble_vitamins_and_cofactors Metabolism_of_water-soluble_vitamins_and_cofactors 14 | Platelet_adhesion_to_exposed_collagen Platelet_adhesion_to_exposed_collagen Platelet_adhesion_to_exposed_collagen 15 | Reactions_specific_to_the_complex_n-glycan_synthesis_pathway Reactions_specific_to_the_complex_n-glycan_synthesis_pathway Reactions_specific_to_the_complex_n-glycan_synthesis_pathway 16 | Regulation_of_cholesterol_biosynthesis_by_srebp__srebf_ Regulation_of_cholesterol_biosynthesis_by_srebp__srebf_ Regulation_of_cholesterol_biosynthesis_by_srebp__srebf_ 17 | Regulation_of_pyruvate_dehydrogenase__pdh__complex Regulation_of_pyruvate_dehydrogenase__pdh__complex Regulation_of_pyruvate_dehydrogenase__pdh__complex 18 | Removal_of_aminoterminal_propeptides_from_gamma-carboxylated_proteins Removal_of_aminoterminal_propeptides_from_gamma-carboxylated_proteins Removal_of_aminoterminal_propeptides_from_gamma-carboxylated_proteins 19 | Retinol_biosynthesis Retinol_biosynthesis RDH10 DHRS4 LRAT LIPC CES5A DHRS9 RDH11 DHRS3 CES1 RBP1 CES4A RBP2 PNLIP RBP5 RBP4 CES2 20 | Rora_activates_gene_expression Rora_activates_gene_expression Rora_activates_gene_expression 21 | Synthesis_of_pc Synthesis_of_pc Synthesis_of_pc 22 | Tak1_activates_nfkb_by_phosphorylation_and_activation_of_ikks_complex Tak1_activates_nfkb_by_phosphorylation_and_activation_of_ikks_complex Tak1_activates_nfkb_by_phosphorylation_and_activation_of_ikks_complex 23 | The_nlrp3_inflammasome The_nlrp3_inflammasome The_nlrp3_inflammasome 24 | Thyroxine_biosynthesis Thyroxine_biosynthesis Thyroxine_biosynthesis 25 | Vegf_and_vegfr_signaling_network Vegf_and_vegfr_signaling_network Vegf_and_vegfr_signaling_network 26 | -------------------------------------------------------------------------------- /inst/extdata/plots/SURVIVEYES_nodeAttrs.txt: -------------------------------------------------------------------------------- 1 | netName maxScore netType 2 | ACTIVATION_OF_THE_PRE-REPLICATIVE_COMPLEX 8 rna 3 | ANDROGEN_BIOSYNTHESIS 5 rna 4 | BIOCARTA_STEM_PATHWAY 7 rna 5 | CALNEXIN_CALRETICULIN_CYCLE 7 rna 6 | DEFECTS_IN_COBALAMIN__B12__METABOLISM 9 rna 7 | DEFECTS_IN_VITAMIN_AND_COFACTOR_METABOLISM 9 rna 8 | FGFR2_LIGAND_BINDING_AND_ACTIVATION 8 rna 9 | GAMMA-CARBOXYLATION,_TRANSPORT,_AND_AMINO-TERMINAL_CLEAVAGE_OF_PROTEINS 3 rna 10 | GLYPICAN_1_NETWORK 7 rna 11 | HEDGEHOG_LIGAND_BIOGENESIS 6 rna 12 | HEDGEHOG_OFF_STATE 7 rna 13 | METABOLISM_OF_FOLATE_AND_PTERINES 9 rna 14 | METABOLISM_OF_WATER-SOLUBLE_VITAMINS_AND_COFACTORS 5 rna 15 | PLATELET_ADHESION_TO_EXPOSED_COLLAGEN 9 rna 16 | REACTIONS_SPECIFIC_TO_THE_COMPLEX_N-GLYCAN_SYNTHESIS_PATHWAY 10 rna 17 | REGULATION_OF_CHOLESTEROL_BIOSYNTHESIS_BY_SREBP__SREBF_ 6 rna 18 | REGULATION_OF_PYRUVATE_DEHYDROGENASE__PDH__COMPLEX 8 rna 19 | REMOVAL_OF_AMINOTERMINAL_PROPEPTIDES_FROM_GAMMA-CARBOXYLATED_PROTEINS 4 rna 20 | RETINOL_BIOSYNTHESIS 7 rna 21 | RORA_ACTIVATES_GENE_EXPRESSION 8 rna 22 | SYNTHESIS_OF_PC 6 rna 23 | TAK1_ACTIVATES_NFKB_BY_PHOSPHORYLATION_AND_ACTIVATION_OF_IKKS_COMPLEX 8 rna 24 | THE_NLRP3_INFLAMMASOME 5 rna 25 | THYROXINE_BIOSYNTHESIS 10 rna 26 | VEGF_AND_VEGFR_SIGNALING_NETWORK 7 rna 27 | -------------------------------------------------------------------------------- /man/MB.pheno.Rd: -------------------------------------------------------------------------------- 1 | \name{MB.pheno} 2 | \alias{MB.pheno} 3 | \docType{data} 4 | \title{ 5 | Sample metadata table for medulloblastoma dataset. 6 | 7 | } 8 | \description{ 9 | data.frame with patient ID and tumour subtype (STATUS) 10 | } 11 | \usage{data(MB.pheno)} 12 | \source{ 13 | Northcott et al. (2011). J Clin Oncol. 29 (11):1408. 14 | } 15 | \references{ 16 | Northcott et al. (2011). J Clin Oncol. 29 (11):1408. 17 | } 18 | \examples{ 19 | data(MB.pheno) 20 | head(MB.pheno) 21 | } 22 | \keyword{datasets} 23 | -------------------------------------------------------------------------------- /man/avgNormDiff.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/similarities.R 3 | \name{avgNormDiff} 4 | \alias{avgNormDiff} 5 | \title{takes average of normdiff of each row in x} 6 | \usage{ 7 | avgNormDiff(x) 8 | } 9 | \arguments{ 10 | \item{x}{(numeric) matrix of values, one column per patient (e.g. ages)} 11 | } 12 | \value{ 13 | symmetric matrix of size ncol(dat) (number of patients) containing 14 | pairwise patient similarities 15 | } 16 | \description{ 17 | takes average of normdiff of each row in x 18 | } 19 | \examples{ 20 | data(xpr) 21 | sim <- avgNormDiff(xpr[,seq_len(2)]) 22 | } 23 | -------------------------------------------------------------------------------- /man/callFeatSel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/callFeatSel.R 3 | \name{callFeatSel} 4 | \alias{callFeatSel} 5 | \title{Return feature selected nets based on given criteria} 6 | \usage{ 7 | callFeatSel(netScores, fsCutoff, fsPctPass) 8 | } 9 | \arguments{ 10 | \item{netScores}{(matrix) matrix of net scores} 11 | 12 | \item{fsCutoff}{(integer) net must score at least this much in a split to 13 | 'pass' the threshold} 14 | 15 | \item{fsPctPass}{(numeric 0 to 1) net must pass at least this percent of 16 | splits to be considered feature-selected} 17 | } 18 | \value{ 19 | (char) names of nets that pass feature-selection 20 | } 21 | \description{ 22 | Return feature selected nets based on given criteria 23 | } 24 | \details{ 25 | given the output of genNetScores.R and criteria for defining 26 | feature-selected (FS) nets, returns subset of nets that pass criteria. 27 | Net must score for at least % of splits, to be 28 | considered feature-selected. 29 | } 30 | \examples{ 31 | data(featScores) 32 | passed <- lapply(featScores, function(x) { 33 | callFeatSel(x,10,0.7) # score 10/10 in >=70\% of trials 34 | }) 35 | print(passed) 36 | } 37 | -------------------------------------------------------------------------------- /man/callOverallSelectedFeatures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper.R 3 | \name{callOverallSelectedFeatures} 4 | \alias{callOverallSelectedFeatures} 5 | \title{Wrapper to call selected features} 6 | \usage{ 7 | callOverallSelectedFeatures( 8 | featScores, 9 | featureSelCutoff, 10 | featureSelPct, 11 | cleanNames = TRUE 12 | ) 13 | } 14 | \arguments{ 15 | \item{featScores}{(list of lists): matrix of feature scores across all splits, separated 16 | by patient label. First level: patient labels. Second level: matrix of scores for 17 | corresponding label.} 18 | 19 | \item{featureSelCutoff}{(integer) cutoff score for feature selection. 20 | A feature must have minimum of this score for specified fraction of splits 21 | (see featureSelPct) to pass.} 22 | 23 | \item{featureSelPct}{(numeric between 0 and 1) cutoff percent for feature selection. 24 | A feature must have minimum score of featureSelCutoff for featureSelPct of 25 | train/test splits to pass.} 26 | 27 | \item{cleanNames}{(logical) remove internal suffixes for human readability} 28 | } 29 | \value{ 30 | (list) Feature scores for all splits, plus those passing selection for overall predictor 31 | featScores: (matrix) feature scores for each split 32 | selectedFeatures: (list) features passing selection for each class; one key per class 33 | } 34 | \description{ 35 | Wrapper to call selected features 36 | } 37 | \details{ 38 | Calls features that are consistently high-scoring for predicting 39 | each class. The context for this is as follows: 40 | The original model runs feature selection over multiple splits of data 41 | into train/test samples, and each such split generates scores for all features. 42 | This function identifies features with scores that exceed a threshold for a fraction 43 | of train/test splits; the threshold and fraction are both user-specified. This 44 | function is called by the wrapper getResults(), which returns both the matrix of 45 | feature scores across splits and list of features that pass the user-specified cutoffs. 46 | } 47 | \examples{ 48 | pathways <- paste("PATHWAY_",1:100,sep="") 49 | highrisk <- list() 50 | lowrisk <- list() 51 | for (k in 1:10) { 52 | highrisk[[k]] <- data.frame(PATHWAY_NAME=pathways, 53 | SCORE=floor(runif(length(pathways),min=0,max=10)), 54 | stringsAsFactors=FALSE); 55 | lowrisk[[k]] <- data.frame(PATHWAY_NAME=pathways, 56 | SCORE=floor(runif(length(pathways),min=0,max=10)), 57 | stringsAsFactors=FALSE); 58 | } 59 | names(highrisk) <- sprintf("Split\%i",1:length(highrisk)) 60 | names(lowrisk) <- sprintf("Split\%i",1:length(lowrisk)) 61 | callOverallSelectedFeatures(list(highrisk=highrisk,lowrisk=lowrisk), 5,0.5) 62 | } 63 | -------------------------------------------------------------------------------- /man/cleanPathwayName.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cleanPathwayName.R 3 | \name{cleanPathwayName} 4 | \alias{cleanPathwayName} 5 | \title{Clean pathway name so it can be a filename.} 6 | \usage{ 7 | cleanPathwayName(curP) 8 | } 9 | \arguments{ 10 | \item{curP}{(char) pathway name} 11 | } 12 | \value{ 13 | (char) Cleaned pathway name 14 | } 15 | \description{ 16 | Clean pathway name so it can be a filename. 17 | } 18 | \examples{ 19 | cleanPathwayName('7-(3-AMINO-3-CARBOXYPROPYL)-WYOSINE BIOSYNTHESIS\%HUMANC') 20 | } 21 | -------------------------------------------------------------------------------- /man/cnv_GR.Rd: -------------------------------------------------------------------------------- 1 | \name{cnv_GR} 2 | \alias{cnv_GR} 3 | \docType{data} 4 | \title{ 5 | CNV locations for breast cancer (subset) 6 | } 7 | \description{ 8 | Subset of CNV locations for TCGA breast tumour. Each range is 9 | associated with a patient (ID) 10 | } 11 | \usage{data(cnv_GR)} 12 | \source{ 13 | The Cancer Genome Atlas. (2012). Nature 490:61-70. 14 | } 15 | \references{ 16 | The Cancer Genome Atlas. (2012). Nature 490:61-70. 17 | } 18 | \examples{ 19 | data(cnv_GR) 20 | head(cnv_GR) 21 | } 22 | \keyword{datasets} 23 | -------------------------------------------------------------------------------- /man/cnv_TTstatus.Rd: -------------------------------------------------------------------------------- 1 | \name{cnv_TTstatus} 2 | \alias{cnv_TTstatus} 3 | \docType{data} 4 | \title{ 5 | list of train/test statuses for CNV example 6 | } 7 | \description{ 8 | list of train/test statuses for CNV example 9 | } 10 | \usage{data(cnv_TTstatus)} 11 | \examples{ 12 | data(cnv_TTstatus) 13 | head(cnv_TTstatus) 14 | } 15 | \keyword{datasets} 16 | -------------------------------------------------------------------------------- /man/cnv_netPass.Rd: -------------------------------------------------------------------------------- 1 | \name{cnv_netScores} 2 | \alias{cnv_netScores} 3 | \docType{data} 4 | \title{ 5 | List of pathway-level feature selection scores 6 | } 7 | \description{ 8 | List of pathway-level feature selection scores 9 | } 10 | \usage{data(cnv_netScores)} 11 | \examples{ 12 | data(cnv_netScores) 13 | summary(cnv_netScores) 14 | head(cnv_netScores[[1]]) 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/cnv_netScores.Rd: -------------------------------------------------------------------------------- 1 | \name{cnv_netPass} 2 | \alias{cnv_netPass} 3 | \docType{data} 4 | \title{ 5 | Vector of pathways that pass class enrichment 6 | } 7 | \description{ 8 | Vector of pathways that pass class enrichment 9 | } 10 | \usage{data(cnv_netPass)} 11 | \examples{ 12 | data(cnv_netPass) 13 | head(cnv_netPass) 14 | } 15 | \keyword{datasets} 16 | -------------------------------------------------------------------------------- /man/cnv_patientNetCount.Rd: -------------------------------------------------------------------------------- 1 | \name{cnv_patientNetCount} 2 | \alias{cnv_patientNetCount} 3 | \docType{data} 4 | \title{ 5 | Binary matrix of patient occurrence in networks 6 | } 7 | \description{ 8 | Binary matrix of patient occurrence in networks 9 | } 10 | \usage{data(cnv_patientNetCount)} 11 | \examples{ 12 | data(cnv_patientNetCount) 13 | head(cnv_patientNetCount) 14 | } 15 | \keyword{datasets} 16 | -------------------------------------------------------------------------------- /man/cnv_pheno.Rd: -------------------------------------------------------------------------------- 1 | \name{cnv_pheno} 2 | \alias{cnv_pheno} 3 | \docType{data} 4 | \title{ 5 | data.frame of patient labels and status for CNV example 6 | } 7 | \description{ 8 | data.frame of patient labels and status for CNV example 9 | } 10 | \usage{data(cnv_pheno)} 11 | \examples{ 12 | data(cnv_pheno) 13 | head(cnv_pheno) 14 | } 15 | \keyword{datasets} 16 | -------------------------------------------------------------------------------- /man/compareShortestPath.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compareShortestPath.R 3 | \name{compareShortestPath} 4 | \alias{compareShortestPath} 5 | \title{compare intra-cluster shortest distance to overall shortest distance of the 6 | network} 7 | \usage{ 8 | compareShortestPath(net, pheno, plotDist = FALSE, verbose = TRUE) 9 | } 10 | \arguments{ 11 | \item{net}{(data.frame) network on which to compute shortest path. 12 | SOURCE, TARGET, WEIGHTS. 13 | Column names are ignored but expects a header row. Distances will be 14 | computed based on the third column} 15 | 16 | \item{pheno}{(data.frame) Node information. ID (node name) and GROUP 17 | (cluster name)} 18 | 19 | \item{plotDist}{(logical) if TRUE, creates a violin plot showing the 20 | shortest path distributions for each group.} 21 | 22 | \item{verbose}{(logical) print messages} 23 | } 24 | \value{ 25 | (list) Two lists, 'avg' and 'all'. keys are cluster names. 26 | values for 'avg' are mean shortest path ; for 'all', are all pairwise 27 | shortest paths 28 | for subnetworks that contain only the edges where source and target both 29 | belong to the corresponding cluster. In addition, there is an 'overall' 30 | entry for the mean shortest distance for the entire network. 31 | } 32 | \description{ 33 | compare intra-cluster shortest distance to overall shortest distance of the 34 | network 35 | } 36 | \details{ 37 | Uses Dijkstra's algorithm for weighted edges. Pairwise nodes with 38 | infinite distances are excluded before computing average shortest path 39 | for a network. This function requires the igraph package to be installed. 40 | } 41 | \examples{ 42 | data(silh); 43 | colnames(silh$net)[3] <- 'weight' 44 | compareShortestPath(silh$net, silh$groups) 45 | } 46 | -------------------------------------------------------------------------------- /man/compileFeatureScores.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compileFeatureScores.R 3 | \name{compileFeatureScores} 4 | \alias{compileFeatureScores} 5 | \title{Tally the score of networks through cross-validation} 6 | \usage{ 7 | compileFeatureScores(fList, filter_WtSum = 100, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{fList}{(char) Vector of paths to GeneMANIA NRANK files} 11 | 12 | \item{filter_WtSum}{(numeric between 5-100) Limit to top-ranked 13 | networks such that cumulative weight is less than this parameter. 14 | e.g. If filter_WtSum=20, first order networks by decreasing weight; 15 | then keep those whose cumulative weight <= 20.} 16 | 17 | \item{verbose}{(logical) print messages} 18 | } 19 | \value{ 20 | (data.frame) Feature name and score; includes features that occur 21 | at least once in \code{fList}. 22 | } 23 | \description{ 24 | Tally the score of networks through cross-validation 25 | } 26 | \examples{ 27 | netDir <- system.file("extdata","GM_NRANK",package="netDx") 28 | netFiles <- sprintf('\%s/\%s', netDir,dir(netDir,pattern='NRANK$')) 29 | pTally <- compileFeatureScores(netFiles,verbose=TRUE) 30 | print(head(pTally)) 31 | } 32 | -------------------------------------------------------------------------------- /man/confmat.Rd: -------------------------------------------------------------------------------- 1 | \name{confmat} 2 | \alias{confmat} 3 | \docType{data} 4 | \title{ 5 | Confusion matrix example 6 | } 7 | \description{ 8 | Sample table of True/False Positives and Negatives for various feature 9 | selection cutoffs 10 | tp: true positive rate, 11 | fp: false positive rate, 12 | tn: true negative rate, 13 | fn: false negative rate 14 | } 15 | \usage{data(confmat)} 16 | \examples{ 17 | data(confmat) 18 | head(confmat) 19 | } 20 | \keyword{datasets} 21 | -------------------------------------------------------------------------------- /man/confusionMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper.R 3 | \name{confusionMatrix} 4 | \alias{confusionMatrix} 5 | \title{Make confusion matrix} 6 | \usage{ 7 | confusionMatrix(model) 8 | } 9 | \arguments{ 10 | \item{model}{(list) output of buildPredictor()} 11 | } 12 | \value{ 13 | (list) confusion matrix for all train/test splits and final averaged matrix 14 | Side effect of plotting the averaged matrix. 15 | } 16 | \description{ 17 | Make confusion matrix 18 | } 19 | \details{ 20 | Creates a confusion matrix, a square matrix which indicates the fraction of times 21 | patients in a class are correctly classified, versus misclassified as each of the other classes. 22 | Here, the confusion matrix is computed once per train-test split and the average is displayed. 23 | For this reason, the fractions may not cleanly add up to 100%. 24 | } 25 | \examples{ 26 | data(toymodel) 27 | confusionMatrix(toymodel) 28 | } 29 | -------------------------------------------------------------------------------- /man/convertProfileToNetworks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runProfileToNetworks.R 3 | \name{convertProfileToNetworks} 4 | \alias{convertProfileToNetworks} 5 | \title{Convert profiles to interaction networks before integration} 6 | \usage{ 7 | convertProfileToNetworks( 8 | netDir, 9 | outDir = tempdir(), 10 | simMetric = "pearson", 11 | numCores = 1L, 12 | JavaMemory = 4L, 13 | GM_jar = NULL, 14 | P2N_threshType = "off", 15 | P2N_maxMissing = 100, 16 | netSfx = "txt$", 17 | debugMode = FALSE 18 | ) 19 | } 20 | \arguments{ 21 | \item{netDir}{(char) directory with .profile files} 22 | 23 | \item{outDir}{(char) path to directory where interaction networks are to be printed} 24 | 25 | \item{simMetric}{(char) similarity measure to use in converting 26 | profiles to interaction networks.} 27 | 28 | \item{numCores}{(integer) number of cores for parallel processing} 29 | 30 | \item{JavaMemory}{(integer) Memory for GeneMANIA (in Gb)} 31 | 32 | \item{GM_jar}{(char) path to GeneMANIA jar file} 33 | 34 | \item{P2N_threshType}{(char) Most users shouldn't have to change this. 35 | ProfileToNetworkDriver's threshold option. One of 'off|auto'. 36 | unit testing} 37 | 38 | \item{P2N_maxMissing}{(integer 5-100)} 39 | 40 | \item{netSfx}{(char) pattern for finding network files in \code{netDir}.} 41 | 42 | \item{debugMode}{(logical) if TRUE runs profile generation in serial 43 | rather than parallel, allowing debugging} 44 | } 45 | \value{ 46 | No value. Side effect of creating interaction networks in outDir. 47 | } 48 | \description{ 49 | Convert profiles to interaction networks before integration 50 | } 51 | \details{ 52 | In preparation for network integration. When using GeneMANIA's 53 | built-in functionality to create PSN using ProfileToNetworkDriver, this 54 | step needs to run to process profiles to networks. These are currently used 55 | for Pearson correlation-based networks and those using mutual information. 56 | } 57 | -------------------------------------------------------------------------------- /man/countIntType.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sparsenet_enrichment_functions.R 3 | \name{countIntType} 4 | \alias{countIntType} 5 | \title{Counts the number of (+,+) and (+,-) interactions in a single network} 6 | \usage{ 7 | countIntType(inFile, plusID, minusID) 8 | } 9 | \arguments{ 10 | \item{inFile}{(char) path to interaction networks} 11 | 12 | \item{plusID}{(char) vector of + nodes} 13 | 14 | \item{minusID}{(char) vector of - nodes} 15 | } 16 | \value{ 17 | (numeric of length 2) Number of (+,+) interactions, and 18 | non-(+,+) interactions 19 | (i.e. (+,-) and (-,-) interactions) 20 | } 21 | \description{ 22 | Counts the number of (+,+) and (+,-) interactions in a single network 23 | } 24 | \examples{ 25 | d <- tempdir() 26 | # write PSN 27 | m1 <- matrix(c("P1","P1","P2","P2","P3","P4",1,1,1),byrow=FALSE,ncol=3) 28 | write.table(m1,file=paste(d,"net1.txt",sep=getFileSep()), 29 | sep="\t", 30 | col.names=FALSE,row.names=FALSE,quote=FALSE) 31 | 32 | countIntType(paste(d,"net1.txt",sep=getFileSep()),c("P1","P2","P3"), 33 | c("P4","P5")) 34 | } 35 | -------------------------------------------------------------------------------- /man/countIntType_batch.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sparsenet_enrichment_functions.R 3 | \name{countIntType_batch} 4 | \alias{countIntType_batch} 5 | \title{Counts number of (+,+) and (+,-) interactions in a set of networks} 6 | \usage{ 7 | countIntType_batch( 8 | inFiles, 9 | plusID, 10 | minusID, 11 | tmpDir = tempdir(), 12 | enrType = "binary", 13 | numCores = 1L 14 | ) 15 | } 16 | \arguments{ 17 | \item{inFiles}{(char) path to interaction networks to process} 18 | 19 | \item{plusID}{(char) IDs of + nodes} 20 | 21 | \item{minusID}{(char) IDs of - nodes} 22 | 23 | \item{tmpDir}{(char) path to dir where temporary files can be stored} 24 | 25 | \item{enrType}{(char) see getEnr.R} 26 | 27 | \item{numCores}{(integer) number of cores for parallel processing} 28 | } 29 | \value{ 30 | (matrix) two columns, one row per network 31 | If \code{enrType="binary"}, number of (+,+) and other interactions 32 | Otherwise if \code{enrType="corr"} mean edge weight of (+,+) edges and 33 | of other edges 34 | } 35 | \description{ 36 | Counts number of (+,+) and (+,-) interactions in a set of networks 37 | } 38 | \examples{ 39 | d <- tempdir() 40 | # write PSN 41 | m1 <- matrix(c("P1","P1","P2","P2","P3","P4",1,1,1),byrow=FALSE,ncol=3) 42 | write.table(m1,file=paste(d,"net1.txt",sep=getFileSep()),sep="\t", 43 | col.names=FALSE,row.names=FALSE,quote=FALSE) 44 | m2 <- matrix(c("P3","P4",1),nrow=1) 45 | write.table(m2,file=paste(d,"net2.txt",sep=getFileSep()),sep="\t", 46 | col.names=FALSE,row.names=FALSE,quote=FALSE) 47 | 48 | countIntType_batch(paste(d,c("net1.txt","net2.txt"),sep=getFileSep()), 49 | c("P1","P2","P3"),c("P4","P5")) 50 | } 51 | -------------------------------------------------------------------------------- /man/countPatientsInNet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/countPatientsInNet.R 3 | \name{countPatientsInNet} 4 | \alias{countPatientsInNet} 5 | \title{Count number of patients in a network} 6 | \usage{ 7 | countPatientsInNet(netDir, fList, ids) 8 | } 9 | \arguments{ 10 | \item{netDir}{(char) dir with network set} 11 | 12 | \item{fList}{(char) filenames of interaction networks to count in} 13 | 14 | \item{ids}{(char) patient IDs to look for} 15 | } 16 | \value{ 17 | (matrix) Size P by N, where P is num patients and N is 18 | number of networks networks; a[i,j] =1 if patient i in network j, else 0 19 | } 20 | \description{ 21 | Count number of patients in a network 22 | } 23 | \details{ 24 | This functionality is needed to count patient overlap when 25 | input data is in a form that results in highly missing data, rather than 26 | when the same measures are available for almost all patients. An example 27 | application is when patient networks are based on unique genomic events 28 | in each patients (e.g. CNVs or indels), rather than 'full-matrix' data 29 | (e.g. questionnaires or gene expression matrices). The former scenario 30 | requires an update in the list of eligible networks each time some type 31 | of patient subsetting is applied (e.g. label enrichment, or train/test 32 | split). A matrix with patient/network membership serves as a lookup 33 | table to prune networks as feature selection proceeds 34 | } 35 | \examples{ 36 | d <- tempdir() 37 | pids <- paste("P",1:5,sep="") 38 | m1 <- matrix(c("P1","P1","P2","P2","P3","P4",1,1,1), 39 | byrow=FALSE,ncol=3) 40 | write.table(m1, 41 | file=paste(d,"net1.txt",sep=getFileSep()),sep="\t", 42 | col.names=FALSE,row.names=FALSE,quote=FALSE) 43 | m2 <- matrix(c("P3","P4",1),nrow=1) 44 | write.table(m2, 45 | file=paste(d,"net2.txt",sep=getFileSep()),sep="\t", 46 | col.names=FALSE,row.names=FALSE,quote=FALSE) 47 | x <- countPatientsInNet(d,c("net1.txt","net2.txt"), pids) 48 | } 49 | -------------------------------------------------------------------------------- /man/dataList2List.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dataList2list.R 3 | \name{dataList2List} 4 | \alias{dataList2List} 5 | \title{Convert MultiAssayExperiment object to list and data.frame} 6 | \usage{ 7 | dataList2List(dat, groupList) 8 | } 9 | \arguments{ 10 | \item{dat}{(MultiAssayExperiment) Patient data and metadata} 11 | 12 | \item{groupList}{(list) variable groupings used for feature construction. See groupList arg in buildPredictor().} 13 | } 14 | \value{ 15 | (list) Keys are: 16 | 1) assays: list of matrices, each corresponding to data from a particular 17 | layer 18 | 2) pheno: (data.frame) sample metadata 19 | } 20 | \description{ 21 | Convert MultiAssayExperiment object to list and data.frame 22 | } 23 | \details{ 24 | Used by internal routines in netDx 25 | } 26 | \examples{ 27 | data(xpr,pheno) 28 | require(MultiAssayExperiment) 29 | objlist <- list("RNA"=SummarizedExperiment(xpr)) 30 | mae <- MultiAssayExperiment(objlist,pheno) 31 | groupList <- list(RNA=rownames(xpr)) 32 | dl <- dataList2List(mae,groupList) 33 | summary(dl) 34 | } 35 | -------------------------------------------------------------------------------- /man/dot-get_cache.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fileCache.R 3 | \name{.get_cache} 4 | \alias{.get_cache} 5 | \title{wrapper function for getting BiocFileCache associated with netDx package} 6 | \usage{ 7 | .get_cache() 8 | } 9 | \value{ 10 | BiocFileCache object associated with netDx 11 | } 12 | \description{ 13 | wrapper function for getting BiocFileCache associated with netDx package 14 | } 15 | -------------------------------------------------------------------------------- /man/enrichLabelNets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/enrichLabelNets.R 3 | \name{enrichLabelNets} 4 | \alias{enrichLabelNets} 5 | \title{Score networks based on their edge bias towards (+,+) interactions} 6 | \usage{ 7 | enrichLabelNets( 8 | netDir, 9 | pheno_DF, 10 | outDir, 11 | numReps = 50L, 12 | minEnr = -1, 13 | outPref = "enrichLabelNets", 14 | verbose = TRUE, 15 | setSeed = 42L, 16 | enrType = "binary", 17 | numCores = 1L, 18 | predClass, 19 | tmpDir = tempdir(), 20 | netGrep = "_cont.txt$", 21 | getShufResults = FALSE, 22 | ... 23 | ) 24 | } 25 | \arguments{ 26 | \item{netDir}{(char) path to dir containing all networks} 27 | 28 | \item{pheno_DF}{(data.frame) for details see \code{getEnr()}} 29 | 30 | \item{outDir}{(char) path to dir where output/log files are written} 31 | 32 | \item{numReps}{(integer) Max num reps for shuffling class status. 33 | Adaptive permutation is 34 | used so in practice, few networks would be evaluated to this extent} 35 | 36 | \item{minEnr}{(numeric from -1 to 1) Only include networks with ENR 37 | value greater than this threshold.} 38 | 39 | \item{outPref}{(char) prefix for log file (not counting the dir name)} 40 | 41 | \item{verbose}{(logical) print messages} 42 | 43 | \item{setSeed}{(integer) if not NULL, integer is set as seed 44 | to ensure reproducibility in random number generation} 45 | 46 | \item{enrType}{(char) see getEnr()} 47 | 48 | \item{numCores}{(integer) num cores for parallel ENR computation of 49 | all networks} 50 | 51 | \item{predClass}{(char) see \code{getEnr()}} 52 | 53 | \item{tmpDir}{(char) path to dir where temporary work can be stored} 54 | 55 | \item{netGrep}{(char) pattern to grep for network files in netDir} 56 | 57 | \item{getShufResults}{(logical) if TRUE, returns the ENR for each 58 | permutation, for all networks. Warning: this is likely to be huge. Use 59 | this flag for debugging purposes only.} 60 | 61 | \item{...}{parameters for \code{countIntType_batch()}.} 62 | } 63 | \value{ 64 | (data.frame) networks stats from clique-filtering, one record 65 | per network 66 | } 67 | \description{ 68 | Score networks based on their edge bias towards (+,+) interactions 69 | } 70 | \details{ 71 | Determines which networks are statistically enriched for 72 | interactions between the class of interest. The resulting \code{ENR} 73 | score and corresponding p-value serve as a filter to exclude random-like 74 | interaction networks before using feature selection. This filter is 75 | known to be important when patient networks are sparse and binary; e.g. 76 | networks based on shared overlap of CNV locations. If the filter is 77 | not applied, GeneMANIA WILL promote networks with slight bias towards 78 | (+,+) edges , even if these are small and random-like. 79 | 80 | The measure of (+,+)-enrichment is defined as: 81 | ENR(network N) = ((num (+,+) edges) - (num other edges))/(num edges). 82 | A p-value for per-network ENR is obtained non-parametrically by 83 | measuring a null distribution for ENR following multiple permutations 84 | of case-control labels. 85 | } 86 | \examples{ 87 | data(npheno) 88 | netDir <- system.file("extdata","example_nets",package="netDx") 89 | x <- enrichLabelNets(netDir,npheno,".",predClass="case",netGrep="txt$", 90 | numReps=5) 91 | print(x) 92 | 93 | } 94 | -------------------------------------------------------------------------------- /man/featScores.Rd: -------------------------------------------------------------------------------- 1 | \name{featScores} 2 | \alias{featScores} 3 | \docType{data} 4 | \title{ 5 | Demo feature-level scores from running feature selection on two-class problem 6 | } 7 | \description{ 8 | List with one entry per patient label ("SURVIVEYES" and "SURVIVENO"). Each entry contains scores obtained through feature-selection acros 100 train/test splits. Scores range from 0 to 10. Scores in data.frame format, with rows corresponding to features and columns to a particular train/test split. 9 | } 10 | \usage{data(featScores)} 11 | \examples{ 12 | data(featScores) 13 | head(featScores) 14 | } 15 | \keyword{datasets} 16 | -------------------------------------------------------------------------------- /man/fetchPathwayDefinitions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fileCache.R 3 | \name{fetchPathwayDefinitions} 4 | \alias{fetchPathwayDefinitions} 5 | \title{fetch pathway definitions from downloads.baderlab.org} 6 | \usage{ 7 | fetchPathwayDefinitions(month = NULL, year = NULL, day = 1, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{month}{(numeric or char) month of pathway definition file. Can be 11 | numeric or text (e.g. "January","April"). If NULL, fails.} 12 | 13 | \item{year}{(numeric) year of pathway definition file. Must be in 14 | yyyy format (e.g. 2018). If NULL, fails.} 15 | 16 | \item{day}{(integer)} 17 | 18 | \item{verbose}{(logical) print messages} 19 | } 20 | \value{ 21 | (char) Path to local cached copy of GMT file 22 | or initial download is required 23 | } 24 | \description{ 25 | fetch pathway definitions from downloads.baderlab.org 26 | } 27 | \details{ 28 | Fetches genesets compiled from multiple curated pathway 29 | databases. Downloaded from: https://download.baderlab.org/EM_Genesets/ 30 | The file contains pathways from HumanCyc, NetPath, Reactome, NCI 31 | Curated Pathways and mSigDB. 32 | For details see Merico D, Isserlin R, Stueker O, Emili A and GD Bader. 33 | (2010). PLoS One. 5(11):e13984. 34 | } 35 | \examples{ 36 | fetchPathwayDefinitions("October",2020) 37 | fetchPathwayDefinitions("January",2018) 38 | fetchPathwayDefinitions(month=10,year=2020) 39 | } 40 | -------------------------------------------------------------------------------- /man/genes.Rd: -------------------------------------------------------------------------------- 1 | \name{genes} 2 | \alias{genes} 3 | \docType{data} 4 | \title{ 5 | Table of gene definitions (small subsample of human genes) 6 | } 7 | \description{ 8 | data.frame object with columns of (gene) RefSeq ID (name), chromosome (chrom), strand, transcription start site (txStart), transcription end site (txEnd), and gene symbol (name2) 9 | } 10 | \usage{data(genes)} 11 | \examples{ 12 | data(genes) 13 | head(genes) 14 | } 15 | \keyword{datasets} 16 | -------------------------------------------------------------------------------- /man/getCorrType.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getCorrType.R 3 | \name{getCorrType} 4 | \alias{getCorrType} 5 | \title{Counts the relative correlation of (+,+) and (+,-)(-,-) interactions} 6 | \usage{ 7 | getCorrType(inFile, plusID, minusID) 8 | } 9 | \arguments{ 10 | \item{inFile}{(character): path to interaction networks} 11 | 12 | \item{plusID}{(character) vector of + nodes} 13 | 14 | \item{minusID}{(character) vector of - nodes} 15 | } 16 | \value{ 17 | (numeric) mean edge weight for (+,+) and other edges 18 | } 19 | \description{ 20 | Counts the relative correlation of (+,+) and (+,-)(-,-) interactions 21 | } 22 | -------------------------------------------------------------------------------- /man/getEMapInput.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getEmapInput.R 3 | \name{getEMapInput} 4 | \alias{getEMapInput} 5 | \title{write enrichment map for consensus nets} 6 | \usage{ 7 | getEMapInput( 8 | featScores, 9 | namedSets, 10 | netInfo, 11 | pctPass = 0.7, 12 | minScore = 1, 13 | maxScore = 10, 14 | trimFromName = c(".profile", "_cont"), 15 | verbose = FALSE 16 | ) 17 | } 18 | \arguments{ 19 | \item{featScores}{(data.frame) network scores across rounds of cross 20 | validation. Rows are networks and columns are network name followed by 21 | scores for cross-validation rounds. Output of getFeatureScores()} 22 | 23 | \item{namedSets}{(list) list of nets and units (e.g.e pathway names and 24 | genes). Should only contain units profiled in this dataset} 25 | 26 | \item{netInfo}{(data.frame) Table of network name (netName) and type 27 | (netType). Type is used to assign shapes to nodes: 28 | clinical clinical 29 | rna GUANOSINE_NUCLEOTIDES__I_DE_NOVO__I__BIOSYNTHESIS 30 | rna RETINOL_BIOSYNTHESIS} 31 | 32 | \item{pctPass}{(numeric between 0 and 1) fraction of splits for which 33 | the highest score for the network is required, for that to be the network's 34 | maxScore} 35 | 36 | \item{minScore}{(integer) features with score below this cutoff are 37 | excluded from downstream analyses} 38 | 39 | \item{maxScore}{(integer) maximum possible score in one round of cross- 40 | validation. e.g. for 10-fold cross-validation, maxScore=10.} 41 | 42 | \item{trimFromName}{(char) strings to trim from name with sub()} 43 | 44 | \item{verbose}{(logical) print messages} 45 | } 46 | \value{ 47 | (list) Length two. 1) nodeAttrs: data.frame of node attributes 48 | 2) featureSets: key-value pairs of selected feature sets (e.g. if pathway 49 | features are used, keys are pathway names, and values are member genes). 50 | } 51 | \description{ 52 | write enrichment map for consensus nets 53 | } 54 | \examples{ 55 | inDir <- system.file("extdata","example_output",package="netDx") 56 | outDir <- paste(tempdir(),'plots',sep='/') 57 | if (!file.exists(outDir)) dir.create(outDir) 58 | featScores <- getFeatureScores(inDir,predClasses=c('LumA','notLumA')) 59 | gp <- names(featScores)[1] 60 | pathwayList <- readPathways(fetchPathwayDefinitions("October",2020)) 61 | pathwayList <- pathwayList[seq_len(5)] 62 | netInfoFile <- system.file("extdata","example_output/inputNets.txt",package="netDx") 63 | netInfo <- read.delim(netInfoFile,sep='\t',h=FALSE,as.is=TRUE) 64 | emap_input <- getEMapInput(featScores[[gp]],pathwayList,netInfo) 65 | summary(emap_input) 66 | } 67 | -------------------------------------------------------------------------------- /man/getEMapInput_many.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getEmapInput_many.R 3 | \name{getEMapInput_many} 4 | \alias{getEMapInput_many} 5 | \title{Wrapper to generate multiple EnrichmentMaps (perhaps one per class)} 6 | \usage{ 7 | getEMapInput_many(featScores, namedSets_valid, netTypes, outDir, ...) 8 | } 9 | \arguments{ 10 | \item{featScores}{(list) keys are classes, and values are data.frames of 11 | network scores across cross-validation (output of getFeatScores()).} 12 | 13 | \item{namedSets_valid}{(list) Grouped unit variables limited to the 14 | units contained in the dataset. e.g. keys are pathways and values are 15 | the genes measured in this dataset. 16 | e.g.: 17 | $`MISSPLICED_GSK3BETA_MUTANTS_STABILIZE_BETA-CATENIN` 18 | [1] 'PPP2R5E' 'PPP2CB' 'APC' 'AXIN1' 'PPP2R1B' 'PPP2R1A' 'CSNK1A1' 19 | [8] 'PPP2R5D' 'PPP2R5C' 'PPP2R5B' 'PPP2R5A' 'PPP2CA' 'GSK3B'} 20 | 21 | \item{netTypes}{(data.frame) 'inputNets.txt' file 22 | generated by NetDx. Dataframe has two columns, network type and 23 | network name. I.E: 24 | clinical clinical 25 | rna GUANOSINE_NUCLEOTIDES__I_DE_NOVO__I__BIOSYNTHESIS 26 | rna RETINOL_BIOSYNTHESIS} 27 | 28 | \item{outDir}{(char) path to output directory} 29 | 30 | \item{...}{parameters for getEMapInput()} 31 | } 32 | \value{ 33 | (list) of length g, where g is the number of groups in featScores. 34 | Values are lists, corresponding to the output of getEmapInput.R 35 | } 36 | \description{ 37 | Wrapper to generate multiple EnrichmentMaps (perhaps one per class) 38 | } 39 | \examples{ 40 | data(featScores) 41 | 42 | pathwayList <- readPathways(fetchPathwayDefinitions("October",2020)) 43 | pathwayList <- pathwayList[seq_len(5)] 44 | 45 | netInfoFile <- system.file("extdata","example_output/inputNets.txt",package="netDx") 46 | netTypes <- read.delim(netInfoFile,sep='\t',h=FALSE,as.is=TRUE) 47 | outDir <- paste(tempdir(),'plots',sep='/') 48 | if (!file.exists(outDir)) dir.create(outDir) 49 | EMap_input <- getEMapInput_many(featScores,pathwayList, 50 | netTypes,outDir=outDir) 51 | } 52 | -------------------------------------------------------------------------------- /man/getEnr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sparsenet_enrichment_functions.R 3 | \name{getEnr} 4 | \alias{getEnr} 5 | \title{Get ENR for all networks in a specified directory} 6 | \usage{ 7 | getEnr( 8 | netDir, 9 | pheno_DF, 10 | predClass, 11 | netGrep = "_cont.txt$", 12 | enrType = "binary", 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{netDir}{(char) directory containing interaction networks} 18 | 19 | \item{pheno_DF}{(data.frame) table with patient ID and status. 20 | Must contain columns for Patient ID (named "ID") and class 21 | (named "STATUS"). Status should be a char; value of predictor class 22 | should be specified in \code{predClass} param; 23 | all other values are considered non-predictor class 24 | Rows with duplicate IDs will be excluded.} 25 | 26 | \item{predClass}{(char) value for patients in predictor class} 27 | 28 | \item{netGrep}{(char) pattern for grep-ing network text files, used in 29 | dir(pattern=..) argument} 30 | 31 | \item{enrType}{(char) how enrichment should be computed. Options are: 32 | 1) binary: Skew of number of (+,+) interactions relative to other 33 | interactions. Used when all edges in network are set to 1 (e.g. 34 | shared CNV overlap) 35 | 2) corr: 0.5*((mean weight of (+,+) edges)-(mean weight of other edges))} 36 | 37 | \item{...}{arguments for \code{countIntType_batch}} 38 | } 39 | \value{ 40 | (list): 41 | 1) plusID (char) vector of + nodes 42 | 2) minusID (char) vector of - nodes 43 | 3) orig_rat (numeric) \code{ENR} for data networks 44 | 4) fList (char) set of networks processed 45 | 5) orig (data.frame) output of \code{countIntType_batch} for input 46 | networks 47 | } 48 | \description{ 49 | Get ENR for all networks in a specified directory 50 | } 51 | \details{ 52 | For each network, compute the number of (+,+) and other 53 | {(+,-),(-,+),(-,-)} interactions. 54 | From this compute network ENR. 55 | The measure of (+,+)-enrichment is defined as: 56 | ENR(network N) = ((num (+,+) edges) - (num other edges))/(num edges). 57 | A network with only (+,+) interactions has an ENR=1 ; a network with 58 | no (+,+) interactions has an ENR=-1; a network with a balance of the two 59 | has ENR=0. 60 | } 61 | \examples{ 62 | d <- tempdir() 63 | options(stringsAsFactors=FALSE) 64 | pids <- paste("P",seq_len(5),sep="") 65 | pheno <- data.frame(ID=pids,STATUS=c(rep("case",3),rep("control",2))) 66 | 67 | # write PSN 68 | m1 <- matrix(c("P1","P1","P2","P2","P3","P4",1,1,1),byrow=FALSE,ncol=3) 69 | write.table(m1,file=paste(d,"net1.nettxt",sep=getFileSep()),sep="\t", 70 | col.names=FALSE,row.names=FALSE,quote=FALSE) 71 | m2 <- matrix(c("P3","P4",1),nrow=1) 72 | write.table(m2,file=paste(d,"net2.nettxt",sep=getFileSep()),sep="\t", 73 | col.names=FALSE,row.names=FALSE,quote=FALSE) 74 | 75 | # compute enrichment 76 | x <- countPatientsInNet(d,dir(d,pattern=c("net1.nettxt","net2.nettxt")), pids) 77 | getEnr(d,pheno,"case","nettxt$") 78 | } 79 | -------------------------------------------------------------------------------- /man/getFeatureScores.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getFeatureScores.R 3 | \name{getFeatureScores} 4 | \alias{getFeatureScores} 5 | \title{Compile network scores into a matrix} 6 | \usage{ 7 | getFeatureScores(inDir, predClasses, getFullCons = TRUE) 8 | } 9 | \arguments{ 10 | \item{inDir}{(char/list) directory containing directories with all split 11 | info or list of all CV score files. 12 | if inDir is a single directory then the expected format for CV score files 13 | is /rngX/predClassX/GM_results/predClassX_pathway_CV_score.txt' 14 | if inDir is a list, it should have one key per class. The value should be 15 | the corresponding set of filenames for pathway_CV_score.txt} 16 | 17 | \item{predClasses}{(char) possible STATUS for patients} 18 | 19 | \item{getFullCons}{(logical) if TRUE, does not remove rows with NA. 20 | Recommended only when the number of input features is extensively 21 | pruned by first-pass feature selection.} 22 | } 23 | \value{ 24 | (list) one key per patient class. Value is matrix of network 25 | scores across all train/test splits. Each score is the output of 26 | the inner fold of CV. 27 | } 28 | \description{ 29 | Compile network scores into a matrix 30 | } 31 | \details{ 32 | Given network scores over a set of train/test splits, compiles 33 | these into a matrix for downstream analysis. See the section on 34 | 'Output Files' 35 | } 36 | \examples{ 37 | inDir <- system.file("extdata","example_output",package="netDx") 38 | netScores <- getFeatureScores(inDir, predClasses = c('LumA','notLumA')) 39 | } 40 | -------------------------------------------------------------------------------- /man/getFileSep.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{getFileSep} 4 | \alias{getFileSep} 5 | \title{platform-specific file separator} 6 | \usage{ 7 | getFileSep() 8 | } 9 | \value{ 10 | (char) "\\" if Windows, else "/" 11 | } 12 | \description{ 13 | Returns OS-specific file separator 14 | } 15 | \examples{ 16 | getFileSep() 17 | } 18 | -------------------------------------------------------------------------------- /man/getGMjar_path.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fileCache.R 3 | \name{getGMjar_path} 4 | \alias{getGMjar_path} 5 | \title{download and update GeneMANIA jar file} 6 | \usage{ 7 | getGMjar_path(verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{verbose}{(logical) print messages} 11 | } 12 | \value{ 13 | (char) Path to local cached copy of GeneMANIA jar file.. 14 | or initial download is required 15 | } 16 | \description{ 17 | download and update GeneMANIA jar file 18 | } 19 | \examples{ 20 | getGMjar_path() 21 | } 22 | -------------------------------------------------------------------------------- /man/getNetConsensus.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getNetConsensus.R 3 | \name{getNetConsensus} 4 | \alias{getNetConsensus} 5 | \title{compile net score across a set of predictor results} 6 | \usage{ 7 | getNetConsensus(scorelist) 8 | } 9 | \arguments{ 10 | \item{scorelist}{(list) key is dataset name, value is a data.frame 11 | containing PATHWAY_NAME and SCORE. This is the output of 12 | compileFeatureScores()} 13 | } 14 | \value{ 15 | (data.frame) Rownames are union of all nets in the input list. 16 | Columns show net scores for each key of the input list. Where a 17 | net is not found in a given list, it is assigned the value of NA 18 | } 19 | \description{ 20 | compile net score across a set of predictor results 21 | } 22 | \details{ 23 | used to compare how individual nets score for different 24 | predictor configurations 25 | } 26 | \examples{ 27 | pathways <- paste("PATHWAY_",1:100,sep="") 28 | highrisk <- list() 29 | for (k in 1:10) { 30 | highrisk[[k]] <- data.frame(PATHWAY_NAME=pathways, 31 | SCORE=runif(length(pathways),min=0,max=10), 32 | stringsAsFactors=FALSE); 33 | } 34 | names(highrisk) <- sprintf("Split\%i",1:length(highrisk)) 35 | x <- getNetConsensus(highrisk) 36 | } 37 | -------------------------------------------------------------------------------- /man/getOR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sparsenet_enrichment_functions.R 3 | \name{getOR} 4 | \alias{getOR} 5 | \title{Get relative proportion of patient classes that contribute to a set of 6 | networks} 7 | \usage{ 8 | getOR(pNetworks, pheno_DF, predClass, netFile, verbose = TRUE) 9 | } 10 | \arguments{ 11 | \item{pNetworks}{(matrix) rows are patients, columns are network file 12 | filenames. a[i,j] = 1 if patient i has a structural variant in network 13 | j; else a[i,j] = 0} 14 | 15 | \item{pheno_DF}{(data.frame) Column "ID" has unique patient identifiers; 16 | column "STATUS" has patient class} 17 | 18 | \item{predClass}{(char) Class for which predictor is being built} 19 | 20 | \item{netFile}{(char) vector of networks of interest (e.g. those 21 | passing feature selection)} 22 | 23 | \item{verbose}{(logical) print messages} 24 | } 25 | \value{ 26 | List. 1) stats: statistics on group overlap with , 27 | This is a 2xK matrix, where rows are classes (predClass,other), and 28 | columns are: total samples, samples overlapping nets, % overlap 29 | 2) relEnr: relative enrichment of \code{predClass} over other 30 | } 31 | \description{ 32 | Get relative proportion of patient classes that contribute to a set of 33 | networks 34 | } 35 | \details{ 36 | Feature selected networks should have the property of being 37 | enriched in the class of interest; e.g. be enriched in 'case' relative 38 | to 'control'. When given a list of networks N, this method computes the 39 | number and proportion of patients that overlap N. A high relative 40 | fraction of the predicted class indicates successful feature selection. 41 | To create a ROC or precision-recall curve, several calls can be made 42 | to this function, one per cutoff. 43 | } 44 | \examples{ 45 | d <- tempdir() 46 | options(stringsAsFactors=FALSE) 47 | pids <- paste("P",seq_len(5),sep="") 48 | pheno <- data.frame(ID=pids,STATUS=c(rep("case",3),rep("control",2))) 49 | 50 | # write PSN 51 | m1 <- matrix(c("P1","P1","P2","P2","P3","P4",1,1,1),byrow=FALSE,ncol=3) 52 | write.table(m1,file=paste(d,"net1.txt",sep=getFileSep()),sep="\t", 53 | col.names=FALSE,row.names=FALSE,quote=FALSE) 54 | m2 <- matrix(c("P3","P4",1),nrow=1) 55 | write.table(m2,file=paste(d,"net2.txt",sep=getFileSep()),sep="\t", 56 | col.names=FALSE,row.names=FALSE,quote=FALSE) 57 | 58 | # compute enrichment 59 | x <- countPatientsInNet(d,dir(d,pattern=c("net1.txt","net2.txt")), pids) 60 | getOR(x,pheno,"case",colnames(x)) # should give large RelEnr 61 | } 62 | -------------------------------------------------------------------------------- /man/getPSN.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper.R 3 | \name{getPSN} 4 | \alias{getPSN} 5 | \title{get the integrated patient similarity network made of selected features} 6 | \usage{ 7 | getPSN( 8 | dat, 9 | groupList, 10 | makeNets, 11 | selectedFeatures, 12 | plotCytoscape = FALSE, 13 | aggFun = "MEAN", 14 | prune_pctX = 0.3, 15 | prune_useTop = TRUE, 16 | numCores = 1L, 17 | calcShortestPath = FALSE 18 | ) 19 | } 20 | \arguments{ 21 | \item{dat}{(MultiAssayExperiment) input data} 22 | 23 | \item{groupList}{(list) feature groups, identical to groupList provided for buildPredictor()} 24 | 25 | \item{makeNets}{(function) Function used to create patient similarity networks. Identical to 26 | makeNets provided to buildPredictor()} 27 | 28 | \item{selectedFeatures}{(list) selected features for each class (key of list). This object is returned as 29 | part of a call to getResults(), after running buildPredictor().} 30 | 31 | \item{plotCytoscape}{(logical) If TRUE, plots network in Cytoscape. 32 | Requires Cytoscape software to be installed and running on the computer 33 | when the function call is being made.} 34 | 35 | \item{aggFun}{(char) function to aggregate edges from different PSN (e.g. mean)} 36 | 37 | \item{prune_pctX}{(numeric between 0 and 1) fraction of most/least 38 | edges to keep when pruning the integrated PSN for visualization. 39 | Must be used in conjunction with useTop=TRUE/FALSE 40 | e.g. Setting pctX=0.2 and useTop=TRUE will keep 20\% top edges} 41 | 42 | \item{prune_useTop}{(logical) when pruning integrated PSN for visualization, 43 | determines whether to keep strongest edges (useTop=TRUE) or weakest edges 44 | (useTop=FALSE)} 45 | 46 | \item{numCores}{(integer) number of cores for parallel processing} 47 | 48 | \item{calcShortestPath}{(logical) if TRUE, computes weighted shortest path 49 | Unless you plan to analyse these separately from looking at the shortest 50 | path violin plots or integrated PSN in Cytoscape, probably good to set to 51 | FALSE.} 52 | } 53 | \value{ 54 | (list) information about the integrated network 55 | similarity network 56 | 2) patientDistNetwork_pruned (matrix) the network plotted in 57 | Cytoscape. Also note that this is a dissimilarity network, 58 | so that more similar nodes have smaller edge weights 59 | 3) colLegend (data.frame): legend for the patient network 60 | plotted in Cytoscape. Columns are node labels (STATUS) and 61 | colours (colour) 62 | 6) outDir (char) value of outDir parameter 63 | } 64 | \description{ 65 | get the integrated patient similarity network made of selected features 66 | } 67 | \details{ 68 | An integrated patient similarity network can be built using combined 69 | top features for each patient class. Such a network is created by taking the union of selected features for 70 | all patient labels, and aggregating pairwise edges for all of them using a user-specified function (aggFun). 71 | The network is then pruned prior to visualization, using a user-specified fraction of strongest edges 72 | (prune_pctX, prune_useTop). In addition, the user may quantify the distance between patients of the 73 | same class, relative to those of other classes, using Dijkstra distance (calcShortestPath flag). 74 | } 75 | -------------------------------------------------------------------------------- /man/getPatientPredictions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getPatientPredictions.R 3 | \name{getPatientPredictions} 4 | \alias{getPatientPredictions} 5 | \title{Calculates patient-level classification accuracy across train/test splits} 6 | \usage{ 7 | getPatientPredictions(predFiles, pheno, plotAccuracy = FALSE) 8 | } 9 | \arguments{ 10 | \item{predFiles}{(char) vector of paths to all test predictions 11 | (e.g. 100 files for a 100 train/test split design). 12 | Alternately, the user can also provide a single directory name, and allow 13 | the script to retrieve prediction files. 14 | Format is 'rootDir/rngX/predictionResults.txt'} 15 | 16 | \item{pheno}{(data.frame) ID=patient ID, STATUS=ground truth (known class 17 | label). This table is required to get the master list of all patients, as 18 | not every patient is classified in every split.} 19 | 20 | \item{plotAccuracy}{(logical) if TRUE, shows fraction of times 21 | patient is misclassified, using a dot plot} 22 | } 23 | \value{ 24 | (list) of length 2. 25 | 1) (data.frame) rows are patients, (length(predFiles)+2) columns. 26 | Columns seq_len(length(predFiles)): Predicted labels for a given split (NA 27 | if patient was training sample for the split). 28 | Column (length(predFiles)+1): 29 | split, value is NA. Columns are : ID, REAL_STATUS, predStatus1,... 30 | predStatusN. 31 | Side effect of plotting a dot plot of % accuracy. Each dot is a patient, 32 | and the value is '% splits for which patient was classified correctly'. 33 | } 34 | \description{ 35 | Calculates patient-level classification accuracy across train/test splits 36 | } 37 | \details{ 38 | Takes all the predictions across the different train/test splits, 39 | and for each patient, generates a score indicating how many times they were 40 | classified by netDx as belonging to each of the classes. The result is that 41 | we get a measure of individual classification accuracy across the different 42 | train/test splits. 43 | } 44 | \examples{ 45 | inDir <- system.file("extdata","example_output",package="netDx") 46 | data(pheno) 47 | all_rngs <- list.dirs(inDir, recursive = FALSE) 48 | all_pred_files <- unlist(lapply(all_rngs, function(x) { 49 | paste(x, 'predictionResults.txt', 50 | sep = getFileSep())})) 51 | pred_mat <- getPatientPredictions(all_pred_files, pheno) 52 | } 53 | -------------------------------------------------------------------------------- /man/getPatientRankings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getPatientRankings.R 3 | \name{getPatientRankings} 4 | \alias{getPatientRankings} 5 | \title{Process GM PRANK files to get the ROC curve for the query} 6 | \usage{ 7 | getPatientRankings(pFile, pheno_DF, predClass, plotIt = FALSE, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{pFile}{(char) path to PRANK file} 11 | 12 | \item{pheno_DF}{(data.frame) patient IDs ('ID') and label('STATUS')} 13 | 14 | \item{predClass}{(character) class label for which predictor is built} 15 | 16 | \item{plotIt}{(logical) if TRUE plots ROC curve} 17 | 18 | \item{verbose}{(logical) print messages} 19 | } 20 | \value{ 21 | (list) 22 | 1) predLbl: GeneMANIA scores (predicted labels). Higher score for 23 | higher ranked patient. 24 | 2) realLbl: binary value indicating if patient label matches predictor 25 | label (real labels) 26 | 3) fullmat: pheno_DF merged with similarity scores ('similarityScore') 27 | and real label ('isPredClass') 28 | 4) roc: output of ROCRs performance(,'tpr','fpr') - ROC curve 29 | 5) auc: output of ROCRs auc() 30 | 6) precall: output of ROCRs performance(, 'prec','rec') 31 | 7) f: output of ROCRs performance(,'f') 32 | If < 2 patients in PRANK file, roc,auc, precall, f are all returned as 33 | NA. 34 | } 35 | \description{ 36 | Process GM PRANK files to get the ROC curve for the query 37 | } 38 | \examples{ 39 | data(pheno) 40 | prankFile <- system.file("extdata", 41 | paste("GM_PRANK","CV_1.query-results.report.txt.PRANK",sep=getFileSep()), 42 | package="netDx") 43 | x <- getPatientRankings(prankFile, pheno, 'LumA') 44 | } 45 | -------------------------------------------------------------------------------- /man/getPerformance.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotPerf_simple.R 3 | \name{getPerformance} 4 | \alias{getPerformance} 5 | \title{performance metrics for model} 6 | \usage{ 7 | getPerformance(res, predClasses) 8 | } 9 | \arguments{ 10 | \item{res}{(data.frame) result from predicting labels on held-out test set. output of predict() function. 11 | columns include ID, STATUS (ground truth) and PRED_CLASS (predicted label)} 12 | 13 | \item{predClasses}{(character) patient labels used by classifier} 14 | } 15 | \value{ 16 | (list) 17 | 1) rocCurve: ROCR performance object for ROC curve 18 | 2) prCurve: ROCR performance object for PR curve 19 | 3) auroc: Area under ROC curve 20 | 4) aupr: Area under PR curve 21 | 5) accuracy: Accuracy 22 | } 23 | \description{ 24 | performance metrics for model 25 | } 26 | -------------------------------------------------------------------------------- /man/getRegionOL.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getRegionOL.R 3 | \name{getRegionOL} 4 | \alias{getRegionOL} 5 | \title{Returns overlapping named ranges for input ranges} 6 | \usage{ 7 | getRegionOL(gr, rngList) 8 | } 9 | \arguments{ 10 | \item{gr}{(GRanges) query ranges} 11 | 12 | \item{rngList}{(list) keys are names, and values are GRanges, each range 13 | of which has a name (in 'name' column). Note: It is faster to provide 14 | a list of length 1 ; if the list is long, combining into a single GRanges 15 | object could prove slow.} 16 | } 17 | \value{ 18 | (GRanges) query ranges with the added column 'LOCUS_NAMES'. 19 | Where a range overlaps with multiple loci, the names are reported as a 20 | comma-separated vector 21 | } 22 | \description{ 23 | Returns overlapping named ranges for input ranges 24 | } 25 | \details{ 26 | Given a set of query GRanges, and a subject list-of-GRanges, 27 | updates the query with a column 'LOCUS_NAMES' containing the names of 28 | ranges overlapped by the query. One application is to map structural 29 | variants, such as CNVs, to genes in pathways of interest. In this 30 | scenario \code{gr} would contain the patient CNVs, and \code{rngList} 31 | would be a list of GenomicRanges objects, one per cellular pathway. 32 | } 33 | \examples{ 34 | data(cnv_GR,pathway_GR) 35 | x <- getRegionOL(cnv_GR,pathway_GR) 36 | } 37 | -------------------------------------------------------------------------------- /man/getResults.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper.R 3 | \name{getResults} 4 | \alias{getResults} 5 | \title{Compiles performance and selected features for a trained model.} 6 | \usage{ 7 | getResults(res, status, featureSelCutoff = 1L, featureSelPct = 0) 8 | } 9 | \arguments{ 10 | \item{res}{(list) output of buildPredictor() function} 11 | 12 | \item{status}{(character) unique patient labels used by the classifier, found in colData()$STATUS} 13 | 14 | \item{featureSelCutoff}{(integer) cutoff score for feature selection. 15 | A feature must have minimum of this score for specified fraction of splits 16 | (see featureSelPct) to pass.} 17 | 18 | \item{featureSelPct}{(numeric between 0 and 1) cutoff percent for feature selection. 19 | A feature must have minimum score of featureSelCutoff for featureSelPct of 20 | train/test splits to pass.} 21 | } 22 | \value{ 23 | list of results. 24 | - selectedFeatures (list of character vectors): list, one per class 25 | - performance (list of mixed datatypes) including mean accuracy (meanAccuracy), 26 | split-level accuracy (splitAccuracy), split-level AUROC (auroc), 27 | split-level AUPR (splitAUR) 28 | Side effect of plotting ROC curve if binary classifier 29 | } 30 | \description{ 31 | Compiles performance and selected features for a trained model. 32 | } 33 | \details{ 34 | This function is run after training a model using buildPredictor(). 35 | It takes patient input data, model output, and returns performance and selected features. 36 | } 37 | \examples{ 38 | data(toymodel) # load example results from binary breast classification 39 | patlabels <- names(toymodel$Split1$featureSelected) 40 | getResults(toymodel,patlabels,2,0.5) 41 | 42 | } 43 | -------------------------------------------------------------------------------- /man/getSimilarity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getSimilarity.R 3 | \name{getSimilarity} 4 | \alias{getSimilarity} 5 | \title{Measures of patient similarity} 6 | \usage{ 7 | getSimilarity(x, type = "pearson", customFunc, ...) 8 | } 9 | \arguments{ 10 | \item{x}{(matrix) matrix for which pairwise patient similarity is to be 11 | computed. Expects one column per patient, and one measurement per row.} 12 | 13 | \item{type}{(character) name of similarity measure. Currently supports 14 | Pearson correlation ('pearson') or a custom measure ('custom')} 15 | 16 | \item{customFunc}{(function) custom similarity function. Only used when 17 | \code{type='custom'}. The function takes \code{x} as first argument and 18 | can take additional argument. It should return a symmetric matrix of 19 | pairwise patient similarities.} 20 | 21 | \item{...}{parameter for customFunc} 22 | } 23 | \value{ 24 | symmetric matrix of size N, where N is number of samples 25 | } 26 | \description{ 27 | Measures of patient similarity 28 | } 29 | \examples{ 30 | data(xpr) 31 | x <- getSimilarity(xpr) # similarity by Pearson corr 32 | mySim <- function(x) cor(x,method='kendall') 33 | x <- getSimilarity(xpr,customFunc=mySim) # custom similarity 34 | } 35 | -------------------------------------------------------------------------------- /man/makeInputForEnrichmentMap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper.R 3 | \name{makeInputForEnrichmentMap} 4 | \alias{makeInputForEnrichmentMap} 5 | \title{Wrapper to create input files for Enrichment Map} 6 | \usage{ 7 | makeInputForEnrichmentMap( 8 | model, 9 | results, 10 | pathwayList, 11 | EMapMinScore = 0L, 12 | EMapMaxScore = 1L, 13 | EMapPctPass = 0.5, 14 | outDir 15 | ) 16 | } 17 | \arguments{ 18 | \item{model}{(list) Output of training model, generated by running buildPredictor()} 19 | 20 | \item{results}{(list) Model results. output of getResults()} 21 | 22 | \item{pathwayList}{(list) output of readPathwayFile() used to make pathway-level feat ures for predictor} 23 | 24 | \item{EMapMinScore}{(integer) minimum score for Enrichment Map} 25 | 26 | \item{EMapMaxScore}{(integer) maximum score for Enrichment Map} 27 | 28 | \item{EMapPctPass}{(numeric between 0 and 1) percent of splits for which feature must have score in range 29 | [EMapMinScore,EMapMaxScore] to be included for EnrichmentMap visualization} 30 | 31 | \item{outDir}{(char) directory where files should be written} 32 | } 33 | \value{ 34 | 35 | } 36 | \description{ 37 | Wrapper to create input files for Enrichment Map 38 | } 39 | \details{ 40 | An Enrichment Map is a network-based visualization of top-scoring pathway features 41 | and themes. It is generated in Cytoscape. This script generates the input files needed 42 | for Cytoscape to create an Enrichment Map visualization. 43 | } 44 | -------------------------------------------------------------------------------- /man/makePSN_RangeSets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/makePSN_RangeSets.R 3 | \name{makePSN_RangeSets} 4 | \alias{makePSN_RangeSets} 5 | \title{Create patient similarity interaction networks based on range sets} 6 | \usage{ 7 | makePSN_RangeSets( 8 | gr, 9 | rangeSet, 10 | netDir = tempdir(), 11 | simMetric = "coincide", 12 | quorum = 2L, 13 | verbose = TRUE, 14 | numCores = 1L 15 | ) 16 | } 17 | \arguments{ 18 | \item{gr}{(GRanges) patient ranges. Metadata should contain: 19 | ID: (char) unique patient ID 20 | LOCUS_NAME: (comma-separated char) named ranges overlapped} 21 | 22 | \item{rangeSet}{(list) list of GRanges, one entry per range set. 23 | Key is the name of the range set, and value is a GRanges object with 24 | corresponding ranges} 25 | 26 | \item{netDir}{(char) path to directory where networks should be written} 27 | 28 | \item{simMetric}{(char) Similarity metric. Currently only 'coincide' 29 | is supported; two patients share an edge if they overlap elements in the 30 | the same gene set. E.g. Two patients with CNVs that overlap different 31 | genes of the same pathway would be related, but patients overlapping 32 | genes that don't share a pathway (or, more accurately, a named-set 33 | grouping) would not be related. The edge weight is therefore binary.} 34 | 35 | \item{quorum}{(integer) minimum number of patients in a network for the 36 | network to be constructed} 37 | 38 | \item{verbose}{(logical) print detailed messages} 39 | 40 | \item{numCores}{(integer) num cores for parallel processing} 41 | } 42 | \value{ 43 | Vector of network filenames 44 | } 45 | \description{ 46 | Create patient similarity interaction networks based on range sets 47 | } 48 | \details{ 49 | Creates patient similarity networks when data consist of 50 | genomic events associated with patients. Examples include CNV or 51 | indel data for patients. To generate networks from full matrices such 52 | gene expression data, use \code{makePSN_NamedMatrix} instead. 53 | Genomic ranges corresponding to events in patients (gr) should be named. 54 | One network is created per named range set (rangeSet). Each set 55 | reflects a group of related loci ; for example, genomic ranges associated 56 | with genes in the same cellular pathway. 57 | Currently, the only similarity measure supported is binary; two patients 58 | are related in a network N if they both overlap elements of set N. 59 | } 60 | \examples{ 61 | data(pathway_GR,cnv_GR) 62 | ### # example commented out to avoid build errors because of parallel 63 | ### # execution. Uncomment to run. 64 | ### netList <- makePSN_RangeSets(cnv_GR,pathway_GR,'.') 65 | } 66 | -------------------------------------------------------------------------------- /man/makeQueries.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/makeQueries.R 3 | \name{makeQueries} 4 | \alias{makeQueries} 5 | \title{Randomly select patients for queries for feature selection} 6 | \usage{ 7 | makeQueries(incPat, featScoreMax = 10L, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{incPat}{(char) vector of patient IDs to be included in query} 11 | 12 | \item{featScoreMax}{(integer) Number of times to run query, usually equal 13 | to the max score for features in the design (e.g. if featScoreMax=10, then 14 | this value is 10).} 15 | 16 | \item{verbose}{(logical) print messages} 17 | } 18 | \value{ 19 | (list) of length \code{featScoreMax}, containing names of patients 20 | in query file for each fold 21 | } 22 | \description{ 23 | Randomly select patients for queries for feature selection 24 | } 25 | \examples{ 26 | data(pheno) 27 | x <- makeQueries(pheno$ID) 28 | } 29 | -------------------------------------------------------------------------------- /man/makeSymmetric.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/makeSymmetric.R 3 | \name{makeSymmetric} 4 | \alias{makeSymmetric} 5 | \title{Convert a network in source-target-weight format to symmetric matrix} 6 | \usage{ 7 | makeSymmetric(x, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{x}{(data.frame) three columns, with source node, target node, and 11 | edge weight. Entries must include universe of nodes; those with missing 12 | edges must be included as having edge weight NA} 13 | 14 | \item{verbose}{(logical) print messages} 15 | } 16 | \value{ 17 | (matrix) symmetric adjacency matrix 18 | } 19 | \description{ 20 | Convert a network in source-target-weight format to symmetric matrix 21 | } 22 | \details{ 23 | A common format for network representation is to use a three 24 | column table listing source node, target node, and weight. 25 | This is the format netDx uses for network integration and visualization 26 | in Cytoscape. However, some functionality requires a square symmetric 27 | adjacency matrix. This function takes as input the three-column format 28 | and converts to the adjacency matrix. 29 | NOTE: Symmetric attribute is assumed, and the function automatically sets 30 | a[i,j] = a[j,i]. Diagonal is assumed to have value of 1.0. Finally 31 | missing edges will be assigned NA values. 32 | } 33 | \examples{ 34 | src <- c("A","B"); tgt <- c("C","C") 35 | cur <- data.frame(source=src,target=tgt,weight=c(0.3,0.8)) 36 | makeSymmetric(cur) 37 | } 38 | -------------------------------------------------------------------------------- /man/mapNamedRangesToSets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mapNamedRangesToSets.R 3 | \name{mapNamedRangesToSets} 4 | \alias{mapNamedRangesToSets} 5 | \title{Map named ranges to corresponding set of named ranges} 6 | \usage{ 7 | mapNamedRangesToSets(gr, rangeList, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{gr}{(GRanges) named ranged to be grouped} 11 | 12 | \item{rangeList}{(list) sets of range names} 13 | 14 | \item{verbose}{(logical) print detailed messages} 15 | } 16 | \value{ 17 | RangeList. keys are names of \code{rangeList}, values are GRanges 18 | } 19 | \description{ 20 | Map named ranges to corresponding set of named ranges 21 | } 22 | \details{ 23 | Example application is when we have named ranges each 24 | corresponding to genes or regulatory elements, and we wish to group 25 | these ranges based on metabolic pathway. 26 | } 27 | \examples{ 28 | data(genes,pathwayList); 29 | gene_GR<-GenomicRanges::GRanges(genes$chrom, 30 | IRanges::IRanges(genes$txStart,genes$txEnd), 31 | name=genes$name2) 32 | path_GRList <- mapNamedRangesToSets(gene_GR,pathwayList) 33 | } 34 | -------------------------------------------------------------------------------- /man/matrix_getIJ.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/matrix_getIJ.R 3 | \name{matrix_getIJ} 4 | \alias{matrix_getIJ} 5 | \title{Converts matrix index (1 to m*n) to row (m) and column (n) number} 6 | \usage{ 7 | matrix_getIJ(dimMat, idx) 8 | } 9 | \arguments{ 10 | \item{dimMat}{(integer vector of length 2) output of \code{dim()} for 11 | matrix in question} 12 | 13 | \item{idx}{(integer vector of length n) matrix indices} 14 | } 15 | \value{ 16 | (matrix) n-by-2, first column has row indices ; second column 17 | has col indices 18 | } 19 | \description{ 20 | Converts matrix index (1 to m*n) to row (m) and column (n) number 21 | } 22 | -------------------------------------------------------------------------------- /man/modelres.Rd: -------------------------------------------------------------------------------- 1 | \name{modelres} 2 | \alias{modelres} 3 | \docType{data} 4 | \title{ 5 | Sample output of getResults() 6 | } 7 | \description{ 8 | Output of getResults() generated by running toymodel. 9 | toymodel is itself the output of buildPredictor() run on a simple dataset for binary breast tumour classification using two genomic data sources. 10 | BRCA data were downloaded using curatedTCGAData for mRNA and miRNA expression. buildPredictor()] was run by scoring features out of 2, with selected features passing 1 out of 2. 11 | Tumours were labelled either "Luminal.A" or "other". 12 | See details of getResults() for output format. 13 | } 14 | \usage{data(modelres)} 15 | \examples{ 16 | data(modelres) 17 | head(modelres) 18 | } 19 | \keyword{datasets} 20 | -------------------------------------------------------------------------------- /man/moveInteractionNets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/moveInteractionNets.R 3 | \name{moveInteractionNets} 4 | \alias{moveInteractionNets} 5 | \title{moves interaction networks when compiling database for sparse genetic 6 | workflow} 7 | \usage{ 8 | moveInteractionNets(netDir, outDir, pheno, fileSfx = "_cont.txt") 9 | } 10 | \arguments{ 11 | \item{netDir}{(char) source directory} 12 | 13 | \item{outDir}{(char) target directory} 14 | 15 | \item{pheno}{(data.frame) contains patient ID and STATUS} 16 | 17 | \item{fileSfx}{(char) suffix to strip from network file names before 18 | registering in metadata tables} 19 | } 20 | \value{ 21 | No value. Side effect of moving interaction nets to target 22 | directory and creating network-related metadata files used to compile 23 | feature database 24 | } 25 | \description{ 26 | moves interaction networks when compiling database for sparse genetic 27 | workflow 28 | } 29 | -------------------------------------------------------------------------------- /man/normDiff.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/similarities.R 3 | \name{normDiff} 4 | \alias{normDiff} 5 | \title{Similarity metric of normalized difference} 6 | \usage{ 7 | normDiff(x) 8 | } 9 | \arguments{ 10 | \item{x}{(numeric) vector of values, one per patient (e.g. ages)} 11 | } 12 | \value{ 13 | symmetric matrix of size ncol(dat) (number of patients) containing 14 | pairwise patient similarities 15 | } 16 | \description{ 17 | Similarity metric of normalized difference 18 | } 19 | \details{ 20 | Similarity metric used when data for a network consists of 21 | exactly 1 continuous variable (e.g. a network based only on 'age'). 22 | When number of variables is 2-5, use avgNormDiff() which 23 | takes the average of normalized difference for individual variables 24 | } 25 | \examples{ 26 | sim <- normDiff(rnorm(10)) 27 | } 28 | -------------------------------------------------------------------------------- /man/npheno.Rd: -------------------------------------------------------------------------------- 1 | \name{npheno} 2 | \alias{npheno} 3 | \docType{data} 4 | \title{ 5 | Toy sample metadata table 6 | } 7 | \description{ 8 | data.frame with patient ID ("ID") and label ("STATUS"). 100 "cases" and 100 "controls" 9 | } 10 | \usage{data(npheno)} 11 | \examples{ 12 | data(npheno) 13 | head(npheno) 14 | } 15 | \keyword{datasets} 16 | -------------------------------------------------------------------------------- /man/pathwayList.Rd: -------------------------------------------------------------------------------- 1 | \name{pathwayList} 2 | \alias{pathwayList} 3 | \docType{data} 4 | \title{ 5 | Sample list of pathways 6 | } 7 | \description{ 8 | List where keys are pathway names and values are character vectors comprising of member genes for corresponding pathways 9 | } 10 | \usage{data(pathwayList)} 11 | \examples{ 12 | data(pathwayList) 13 | head(pathwayList) 14 | } 15 | \keyword{datasets} 16 | -------------------------------------------------------------------------------- /man/pathway_GR.Rd: -------------------------------------------------------------------------------- 1 | \name{pathway_GR} 2 | \alias{pathway_GR} 3 | \docType{data} 4 | \title{ 5 | List of genomic ranges mapped to pathways 6 | } 7 | \description{ 8 | List object. Keys are pathway names, values are GRanges objects with coordinates of corresponding genes. Small subset of pathways sufficient for package examples. 9 | } 10 | \usage{data(pathway_GR)} 11 | \examples{ 12 | data(pathway_GR) 13 | head(pathway_GR) 14 | } 15 | \keyword{datasets} 16 | -------------------------------------------------------------------------------- /man/perfCalc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/perfCalc.R 3 | \name{perfCalc} 4 | \alias{perfCalc} 5 | \title{Computes variety of predictor evaluation measures based on the confusion 6 | matrix} 7 | \usage{ 8 | perfCalc(dat) 9 | } 10 | \arguments{ 11 | \item{dat}{(data.frame): 5 columns: score, tp, fp, tn, fn. 12 | One row per cutoff 13 | score for feature selection} 14 | } 15 | \value{ 16 | (list) 17 | stats (data.frame): score, f1, ppv, precision and recall. One row 18 | per cutoff for feature selection 19 | auc (numeric between 0 and 1): AUC of overall ROC curve 20 | prauc (numeric between 0 and 1): AUC of overall precision-recall curve 21 | } 22 | \description{ 23 | Computes variety of predictor evaluation measures based on the confusion 24 | matrix 25 | } 26 | \examples{ 27 | data(confmat) 28 | x <- perfCalc(confmat) 29 | } 30 | -------------------------------------------------------------------------------- /man/pheno.Rd: -------------------------------------------------------------------------------- 1 | \name{pheno} 2 | \alias{pheno} 3 | \docType{data} 4 | \title{ 5 | Sample metadata table 6 | } 7 | \description{ 8 | data.frame with patient ID (ID), sample type (Type), tumour subtype (STATUS). 9 | From TCGA 2012 breast cancer paper (see reference). 10 | } 11 | \usage{data(pheno)} 12 | \source{ 13 | The Cancer Genome Atlas. (2012). Nature 490:61-70. 14 | } 15 | \references{ 16 | The Cancer Genome Atlas. (2012). Nature 490:61-70. 17 | } 18 | \examples{ 19 | data(pheno) 20 | head(pheno) 21 | } 22 | \keyword{datasets} 23 | -------------------------------------------------------------------------------- /man/pheno_full.Rd: -------------------------------------------------------------------------------- 1 | \name{pheno_full} 2 | \alias{pheno_full} 3 | \docType{data} 4 | \title{ 5 | Subsample of TCGA breast cancer data used for netDx function examples 6 | } 7 | \description{ 8 | Patient ID and tumour status in "pheno", subsample of gene expression in "xpr" and CNV data in "cnv_GR" 9 | } 10 | \usage{data(pheno_full)} 11 | \source{ 12 | The Cancer Genome Atlas. (2012). Nature 490:61-70. 13 | } 14 | \references{ 15 | The Cancer Genome Atlas. (2012). Nature 490:61-70. 16 | } 17 | \examples{ 18 | data(pheno_full) 19 | head(pheno_full) 20 | } 21 | \keyword{datasets} 22 | -------------------------------------------------------------------------------- /man/plotPerf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotPerf.R 3 | \name{plotPerf} 4 | \alias{plotPerf} 5 | \title{Plots various measures of predictor performance for binary classifiers} 6 | \usage{ 7 | plotPerf(resList = NULL, inFiles, predClasses, plotSEM = FALSE) 8 | } 9 | \arguments{ 10 | \item{resList}{(list) list of prediction results. If provided, the method 11 | will ignore inDir} 12 | 13 | \item{inFiles}{(char) path to predictionResults.txt files. 14 | A vector, each with absolute paths to predictionResults.txt} 15 | 16 | \item{predClasses}{(char) vector of class names.} 17 | 18 | \item{plotSEM}{(logical) metric for error bars. If set to TRUE, plots SEM; 19 | else plots SD.} 20 | } 21 | \value{ 22 | (list) each key corresponds to an input file in inDir. 23 | Value is a list with: 24 | 1) stats: 'stats' component of perfCalc 25 | 2) rocCurve: ROCR performance object for ROC curve 26 | 3) prCurve: ROCR performance object for PR curve 27 | 4) auroc: Area under ROC curve 28 | 5) aupr: Area under PR curve 29 | 6) accuracy: Accuracy 30 | 31 | Side effect of plotting in a 2x2 format: 32 | 1) mean+/-SEM or (mean+/-SD) AUROC 33 | 2) mean+/-SEM or (mean+/-SD) AUPR 34 | 3) ROC curve for all runs plus average 35 | 4) PR curve for all runs plus average 36 | } 37 | \description{ 38 | Plots various measures of predictor performance for binary classifiers 39 | } 40 | \details{ 41 | Plots individual and average ROC/PR curves. mean+/-SEM performance 42 | for a predictor run using nested 43 | cross-validation or a similar repeated design. 44 | predictionResults.txt contains a (data.frame) 45 | } 46 | \examples{ 47 | inDir <- system.file("extdata","example_output",package='netDx') 48 | inFiles <- paste(rep(inDir,3), sprintf("rng\%i",seq_len(3)),"predictionResults.txt", 49 | sep=getFileSep()) 50 | resList <- list() 51 | for (k in seq_len(length(inFiles))) { 52 | resList[[k]] <- read.delim(inFiles[k],sep="\t",header=TRUE,as.is=TRUE) 53 | } 54 | plotPerf(resList, predClasses = c('LumA','notLumA')) 55 | } 56 | -------------------------------------------------------------------------------- /man/plotPerf_multi.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotPerf_multi.R 3 | \name{plotPerf_multi} 4 | \alias{plotPerf_multi} 5 | \title{Plots a set of ROC/PR curves with average.} 6 | \usage{ 7 | plotPerf_multi( 8 | inList, 9 | plotTitle = "performance", 10 | plotType = "ROC", 11 | xlab = "TPR", 12 | ylab = "FPR", 13 | meanCol = "darkblue", 14 | xlim = c(0, 1), 15 | ylim = c(0, 1) 16 | ) 17 | } 18 | \arguments{ 19 | \item{inList}{(list or ROCR::performance object) ROCR::performance objects, one per iteration} 20 | 21 | \item{plotTitle}{(numeric) plot title} 22 | 23 | \item{plotType}{(char) one of ROC | PR | custom. Affects x/y labels} 24 | 25 | \item{xlab}{(char) x-axis label} 26 | 27 | \item{ylab}{(char) y-axis label} 28 | 29 | \item{meanCol}{(char) colour for mean trendline} 30 | 31 | \item{xlim}{(numeric) min/max extent for x-axis} 32 | 33 | \item{ylim}{(numeric) min/max extent for y-axis} 34 | } 35 | \value{ 36 | No value. Side effect of plotting ROC and PR curves 37 | } 38 | \description{ 39 | Plots a set of ROC/PR curves with average. 40 | } 41 | \details{ 42 | Plots average curves with individual curves imposed. 43 | } 44 | \examples{ 45 | inDir <- system.file("extdata","example_output",package="netDx") 46 | all_rng <- list.files(path = inDir, pattern = 'rng.') 47 | fList <- paste(inDir,all_rng,'predictionResults.txt',sep=getFileSep()) 48 | rocList <- list() 49 | for (k in seq_len(length(fList))) { 50 | dat <- read.delim(fList[1],sep='\t',header=TRUE,as.is=TRUE) 51 | predClasses <- c('LumA', 'notLumA') 52 | pred_col1 <- sprintf('\%s_SCORE',predClasses[1]) 53 | pred_col2 <- sprintf('\%s_SCORE',predClasses[2]) 54 | idx1 <- which(colnames(dat) == pred_col1) 55 | idx2 <- which(colnames(dat) == pred_col2) 56 | pred <- ROCR::prediction(dat[,idx1]-dat[,idx2], 57 | dat$STATUS==predClasses[1]) 58 | rocList[[k]] <- ROCR::performance(pred,'tpr','fpr') 59 | } 60 | plotPerf_multi(rocList,'ROC') 61 | } 62 | -------------------------------------------------------------------------------- /man/predRes.Rd: -------------------------------------------------------------------------------- 1 | \name{predRes} 2 | \alias{predRes} 3 | \docType{data} 4 | \title{ 5 | Example output of getPatientRankings, used to call labels for test patients. 6 | } 7 | \description{ 8 | List of lists. First level is a list of size 4, with one key entry for each tumour type in example medulloblastoma dataset (WNT,SHH,Group3,Group4). Each list in the second level is of length 8, with structure corresponding to the output of getPatientRankings(). 9 | } 10 | \usage{data(predRes)} 11 | \examples{ 12 | data(predRes) 13 | summary(predRes) 14 | summary(predRes[[1]]) 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/predict.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.R 3 | \name{predict} 4 | \alias{predict} 5 | \title{predict patient labels} 6 | \usage{ 7 | predict( 8 | trainMAE, 9 | testMAE, 10 | groupList, 11 | featSel, 12 | makeNetFunc, 13 | outDir, 14 | impute = FALSE, 15 | verbose = FALSE, 16 | numCores = 1L, 17 | JavaMemory = 4L, 18 | debugMode = FALSE 19 | ) 20 | } 21 | \arguments{ 22 | \item{trainMAE}{(MultiAssayExperiment) patient data for training samples. Same as provided to buildPredictor()} 23 | 24 | \item{testMAE}{(MultiAssayExperiment) new patient dataset for testing model. Assays must be the same as for trainMAE.} 25 | 26 | \item{groupList}{(list) list of features used to train the model. Keys are data types, and values are lists for groupings within those datatypes. 27 | e.g. keys could include {'clinical','rna','methylation'}, and values within 'rna' could include pathway names {'cell cycle', 'DNA repair'}, etc., 28 | featSel will be used to subset} 29 | 30 | \item{featSel}{(list) selected features to be used in the predictive model. 31 | keys are patient labels (e.g. "responder/nonresponder"), and values are feature names 32 | identified by running buildPredictor(). Feature names must correspond to names of groupList, from which they will be subset.} 33 | 34 | \item{makeNetFunc}{(function) function to create PSN features from patient data. See makeNetFunc in buildPredictor() for details} 35 | 36 | \item{outDir}{(char) directory for results} 37 | 38 | \item{impute}{(logical) if TRUE imputes train and test samples separately before creating features. Currently unsupported.} 39 | 40 | \item{verbose}{(logical) print messages} 41 | 42 | \item{numCores}{(integer) number of CPU cores for parallel processing} 43 | 44 | \item{JavaMemory}{(integer) memory in (Gb) used for each fold of CV} 45 | 46 | \item{debugMode}{(logical) Set to TRUE for detailed messages. Used for debugging.} 47 | } 48 | \value{ 49 | (data.frame) predicted patient similarities and labels 50 | columns are: 1) ID, 2) STATUS (ground truth), 3)