├── .BBSoptions
├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   └── check-bioc.yml
├── .gitignore
├── DESCRIPTION
├── Dockerfile
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS
├── R
    ├── Nets_writeSIF.R
    ├── RR_featureTally.R
    ├── buildPredictor.R
    ├── buildPredictor_sparseGenetic.R
    ├── callFeatSel.R
    ├── cleanPathwayName.R
    ├── compareShortestPath.R
    ├── compileFeatureScores.R
    ├── compileFeatures.R
    ├── countPatientsInNet.R
    ├── createPSN_MultiData.R
    ├── dataList2list.R
    ├── enrichLabelNets.R
    ├── fileCache.R
    ├── getCorrType.R
    ├── getEmapInput.R
    ├── getEmapInput_many.R
    ├── getFeatureScores.R
    ├── getNetConsensus.R
    ├── getPatientPredictions.R
    ├── getPatientRankings.R
    ├── getRegionOL.R
    ├── getSimilarity.R
    ├── globals.R
    ├── helper.R
    ├── makePSN_NamedMatrix.R
    ├── makePSN_RangeSets.R
    ├── makeQueries.R
    ├── makeSymmetric.R
    ├── mapNamedRangesToSets.R
    ├── matrix_getIJ.R
    ├── moveInteractionNets.R
    ├── perfCalc.R
    ├── plotEmap.R
    ├── plotIntegratedPatientNetwork.R
    ├── plotPerf.R
    ├── plotPerf_multi.R
    ├── plotPerf_simple.R
    ├── predict.R
    ├── predictPatientLabels.R
    ├── pruneNetByStrongest.R
    ├── pruneNet_pctX.R
    ├── pruneNets.R
    ├── readPathways.R
    ├── runFeatureSelection.R
    ├── runProfileToNetworks.R
    ├── runQuery.R
    ├── setupFeatureDB.R
    ├── similarities.R
    ├── simpleCap.R
    ├── smooMutationPropagation.R
    ├── sparsenet_enrichment_functions.R
    ├── sparsify2.R
    ├── sparsify3.R
    ├── splitTestTrain_partition.R
    ├── splitTestTrain_resampling.R
    ├── updateNets.R
    ├── utils.R
    ├── writeQueryBatchFile.R
    ├── writeQueryFile.R
    ├── writeWeightedNets.R
    └── zzz.R
├── README.md
├── data
    ├── MB.pheno.rda
    ├── cnv_GR.rda
    ├── cnv_TTstatus.rda
    ├── cnv_netPass.rda
    ├── cnv_netScores.rda
    ├── cnv_patientNetCount.rda
    ├── cnv_pheno.rda
    ├── confmat.rda
    ├── featScores.rda
    ├── genes.rda
    ├── modelres.rda
    ├── npheno.rda
    ├── pathwayList.rda
    ├── pathway_GR.rda
    ├── pheno.rda
    ├── pheno_full.rda
    ├── predRes.rda
    ├── silh.rda
    ├── toymodel.rda
    └── xpr.rda
├── inst
    ├── CITATION
    └── extdata
    │   ├── AGP1_CNV.txt
    │   ├── GM_NRANK
    │       ├── CV_1.query-results.report.txt.NRANK
    │       └── CV_2.query-results.report.txt.NRANK
    │   ├── GM_PRANK
    │       ├── CV_1.query-results.report.txt.PRANK
    │       └── CV_2.query-results.report.txt.PRANK
    │   ├── GM_query.txt
    │   ├── INSTALL
    │       ├── Dockerfile
    │       ├── INSTALL_OSX.sh
    │       └── INSTALL_Unix.sh
    │   ├── TGCT_mutSmooth_geno.txt
    │   ├── TGCT_mutSmooth_pheno.txt
    │   ├── dbPath
    │       ├── 1
    │       │   ├── _0.cfs
    │       │   ├── metadata.xml
    │       │   ├── segments.gen
    │       │   └── segments_2
    │       ├── base
    │       │   ├── _0.cfs
    │       │   ├── segments.gen
    │       │   └── segments_2
    │       ├── cache
    │       │   └── CORE
    │       │   │   └── 1
    │       │   │       ├── 1.ser
    │       │   │       ├── 2.ser
    │       │   │       ├── 3.ser
    │       │   │       ├── 4.ser
    │       │   │       ├── DatasetInfo.ser
    │       │   │       ├── attributeGroups.ser
    │       │   │       ├── networkIds.ser
    │       │   │       └── nodeIds.ser
    │       ├── genemania.xml
    │       └── user
    │       │   ├── segments.gen
    │       │   └── segments_1
    │   ├── example_nets
    │       ├── BIG_CASE.txt
    │       ├── BIG_CONTROL.txt
    │       ├── BOTH_EQUAL.txt
    │       ├── MOSTLY_CASE.txt
    │       ├── SMALL_CASE.txt
    │       └── SMALL_CONTROL.txt
    │   ├── example_output
    │       ├── inputNets.txt
    │       ├── rng1
    │       │   ├── LumA
    │       │   │   └── GM_results
    │       │   │   │   └── LumA_pathway_CV_score.txt
    │       │   ├── notLumA
    │       │   │   └── GM_results
    │       │   │   │   └── notLumA_pathway_CV_score.txt
    │       │   └── predictionResults.txt
    │       ├── rng2
    │       │   ├── LumA
    │       │   │   └── GM_results
    │       │   │   │   └── LumA_pathway_CV_score.txt
    │       │   ├── notLumA
    │       │   │   └── GM_results
    │       │   │   │   └── notLumA_pathway_CV_score.txt
    │       │   └── predictionResults.txt
    │       └── rng3
    │       │   ├── LumA
    │       │       └── GM_results
    │       │       │   └── LumA_pathway_CV_score.txt
    │       │   ├── notLumA
    │       │       └── GM_results
    │       │       │   └── notLumA_pathway_CV_score.txt
    │       │   └── predictionResults.txt
    │   ├── genemania.xml
    │   ├── pathway_ex3.gmt
    │   ├── pathways.gmt
    │   └── plots
    │       ├── SURVIVENO.gmt
    │       ├── SURVIVENO_nodeAttrs.txt
    │       ├── SURVIVEYES.gmt
    │       └── SURVIVEYES_nodeAttrs.txt
├── man
    ├── MB.pheno.Rd
    ├── RR_featureTally.Rd
    ├── avgNormDiff.Rd
    ├── buildPredictor.Rd
    ├── buildPredictor_sparseGenetic.Rd
    ├── callFeatSel.Rd
    ├── callOverallSelectedFeatures.Rd
    ├── cleanPathwayName.Rd
    ├── cnv_GR.Rd
    ├── cnv_TTstatus.Rd
    ├── cnv_netPass.Rd
    ├── cnv_netScores.Rd
    ├── cnv_patientNetCount.Rd
    ├── cnv_pheno.Rd
    ├── compareShortestPath.Rd
    ├── compileFeatureScores.Rd
    ├── compileFeatures.Rd
    ├── confmat.Rd
    ├── confusionMatrix.Rd
    ├── convertProfileToNetworks.Rd
    ├── countIntType.Rd
    ├── countIntType_batch.Rd
    ├── countPatientsInNet.Rd
    ├── createPSN_MultiData.Rd
    ├── dataList2List.Rd
    ├── dot-get_cache.Rd
    ├── enrichLabelNets.Rd
    ├── featScores.Rd
    ├── fetchPathwayDefinitions.Rd
    ├── genes.Rd
    ├── getCorrType.Rd
    ├── getEMapInput.Rd
    ├── getEMapInput_many.Rd
    ├── getEnr.Rd
    ├── getFeatureScores.Rd
    ├── getFileSep.Rd
    ├── getGMjar_path.Rd
    ├── getNetConsensus.Rd
    ├── getOR.Rd
    ├── getPSN.Rd
    ├── getPatientPredictions.Rd
    ├── getPatientRankings.Rd
    ├── getPerformance.Rd
    ├── getRegionOL.Rd
    ├── getResults.Rd
    ├── getSimilarity.Rd
    ├── makeInputForEnrichmentMap.Rd
    ├── makePSN_NamedMatrix.Rd
    ├── makePSN_RangeSets.Rd
    ├── makeQueries.Rd
    ├── makeSymmetric.Rd
    ├── mapNamedRangesToSets.Rd
    ├── matrix_getIJ.Rd
    ├── modelres.Rd
    ├── moveInteractionNets.Rd
    ├── normDiff.Rd
    ├── npheno.Rd
    ├── pathwayList.Rd
    ├── pathway_GR.Rd
    ├── perfCalc.Rd
    ├── pheno.Rd
    ├── pheno_full.Rd
    ├── plotEmap.Rd
    ├── plotIntegratedPatientNetwork.Rd
    ├── plotPerf.Rd
    ├── plotPerf_multi.Rd
    ├── predRes.Rd
    ├── predict.Rd
    ├── predictPatientLabels.Rd
    ├── pruneNet.Rd
    ├── pruneNet_pctX.Rd
    ├── pruneNets.Rd
    ├── randAlphanumString.Rd
    ├── readPathways.Rd
    ├── replacePattern.Rd
    ├── runFeatureSelection.Rd
    ├── runQuery.Rd
    ├── setupFeatureDB.Rd
    ├── silh.Rd
    ├── sim.eucscale.Rd
    ├── sim.pearscale.Rd
    ├── simpleCap.Rd
    ├── smoothMutations_LabelProp.Rd
    ├── sparsify2.Rd
    ├── sparsify3.Rd
    ├── splitTestTrain.Rd
    ├── splitTestTrain_resampling.Rd
    ├── tSNEPlotter.Rd
    ├── thresholdSmoothedMutations.Rd
    ├── toymodel.Rd
    ├── updateNets.Rd
    ├── writeNetsSIF.Rd
    ├── writeQueryBatchFile.Rd
    ├── writeQueryFile.Rd
    ├── writeWeightedNets.Rd
    └── xpr.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test_buildpredictor.R
    │   └── test_suite.R
└── vignettes
    ├── BuildPredictor.Rmd
    ├── EMap_realworld.png
    ├── Emap_example_screenshot.png
    ├── Predict_CaseControl_from_CNV.Rmd.old
    ├── ThreeWayClassifier.Rmd
    ├── ValidateNew.Rmd
    ├── images
        └── vignette1_design.jpg
    ├── integratedPSN_MEAN_top0.10.png
    └── prepare_data.R


/.BBSoptions:
--------------------------------------------------------------------------------
1 | # The reason this package is marked as unsupported on win32 is that
2 | # it requires the Java JDK. However it seems that Oracle no longer
3 | # provides the JDK for 32-bit windows:
4 | # https://www.oracle.com/java/technologies/javase-jdk14-downloads.html
5 | UnsupportedPlatforms: win32
6 | # Unsupported on Windows per maintainer request until package gets
7 | # switched to rJava.
8 | UnsupportedPlatforms: win
9 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^LICENSE\.md$
2 | .git
3 | ^doc$
4 | ^Meta$
5 | ^\.github$
6 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | doc
6 | Meta
7 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: netDx
 2 | Title: Network-based patient classifier
 3 | Version: 1.5.3
 4 | Authors@R: c(person("Shraddha", "Pai", 
 5 | 		email = "shraddha.pai@utoronto.ca", 
 6 | 		role = c("aut", "cre"),
 7 | 	  comment = c(ORCID = "0000-0002-1048-581X")), 
 8 | 	person("Philipp","Weber",role="aut"),
 9 | 	person("Ahmad","Shah", role="aut"),
10 | 	person("Luca","Giudice",role="aut"),
11 | 	person("Shirley","Hui",role="aut"),
12 | 	person("Ruth","Isserlin",role="aut"),
13 | 	person("Hussam","Kaka", role="aut"),
14 | 	person("Gary","Bader",role="aut"))
15 | Description: netDx is a general-purpose algorithm to build a patient classifier from heterogenous patient data. The method converts data into patient similarity networks at the level of features. Feature selection identifies features of predictive value to each class. Methods are provided for versatile predictor design and performance evaluation using standard measures. netDx natively groups molecular data into pathway-level features and connects with Cytoscape for network visualization of pathway themes. For method details see: Pai et al. (2019). netDx: interpretable patient classification using integrated patient similarity networks. Molecular Systems Biology. 15, e8497
16 | Depends:
17 |     R (>= 3.6)
18 | Suggests: 
19 |   curatedTCGAData,
20 | 	TCGAutils,
21 | 	rmarkdown,
22 | 	testthat,
23 | 	knitr,
24 | 	BiocStyle,
25 | 	RCy3,
26 | 	clusterExperiment,
27 | 	netSmooth,
28 | 	scater
29 | Imports: ROCR,pracma,ggplot2,glmnet,igraph,reshape2,
30 | 	parallel,stats,utils,MultiAssayExperiment,graphics,grDevices,
31 | 	methods,BiocFileCache,GenomicRanges,
32 | 	bigmemory,doParallel,foreach,
33 | 	combinat,rappdirs,GenomeInfoDb,S4Vectors,
34 | 	IRanges,RColorBrewer,Rtsne,httr,plotrix
35 | VignetteBuilder: knitr 
36 | Encoding: UTF-8
37 | License: MIT + file LICENSE
38 | LazyData: false
39 | URL: http://netdx.org
40 | biocViews: Classification, BiomedicalInformatics, Network, SystemsBiology
41 | RoxygenNote: 7.1.2
42 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | 
 2 | FROM bioconductor/bioconductor_docker:devel
 3 | 
 4 | WORKDIR /home/rstudio
 5 | 
 6 | COPY --chown=rstudio:rstudio . /home/rstudio/
 7 | 
 8 | RUN Rscript -e "options(repos = c(CRAN = 'https://cran.r-project.org')); BiocManager::install(ask=FALSE)"
 9 | 
10 | RUN Rscript -e "options(repos = c(CRAN = 'https://cran.r-project.org')); devtools::install('.', dependencies=TRUE, build_vignettes=TRUE, repos = BiocManager::repositories())"
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2019
2 | COPYRIGHT HOLDER: netDx
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2019 netDx
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
 1 | netDx 1.5.3
 2 | ==================
 3 | * Moved RCy3, scater, clusterExperiment and netSmooth to "Suggests" to reduce dependency burden
 4 | * Sped up vignettes by limiting all to binary classification and limiting number of layers
 5 | * Removed TL;DR from vignettes as usefulness in question but maintainance high.
 6 | 
 7 | Developers notes:
 8 | -------------------
 9 | * Added Dockerfile and Github Actions for automated testing
10 | * GHA auto-generates a Docker image with netDx which gets pushed to shraddhapai/netdx_devenv
11 | 
12 | netDx 1.5.2
13 | ==================
14 | * Added wrapper functions for ease-of-use. Includes:
15 | 	* getResults() to plot results of running the predictor
16 | 	* getPSN() for creating and visualizing integrated PSN
17 | 	* confusionMatrix() to visualize confusion matrix
18 | 	* tSNEPlotter() to visualize tSNE of integrated PSN (doesn't require Cytoscape)
19 | * Added CITATION file with citations to netDx methods and software paper
20 | 
21 | netDx 1.5.1
22 | ==================
23 | * Adding support for Java 16. 
24 | * Disabling CNV-based vignette to allow other three vignettes to run without causing build timeout on devel system
25 | 
26 | netDx 1.1.4
27 | ==================
28 | Changes:
29 | * New functionality to smooth mutations over interaction, starting from sparse
30 | somatic mutations
31 | * BiocFileCache usage update
32 | 
33 | Changes in version 0.99.0 (2019-11-11)
34 | + Submitted to Bioconductor
35 | 


--------------------------------------------------------------------------------
/R/Nets_writeSIF.R:
--------------------------------------------------------------------------------
 1 | #' write patient networks in Cytoscape's .sif format
 2 | #'
 3 | #' @details Converts a set of binary interaction networks into Cytoscape's
 4 | #' sif format.
 5 | #' (http://wiki.cytoscape.org/Cytoscape_User_Manual/Network_Formats)
 6 | #' This utility permits visualization of feature selected networks.
 7 | #'
 8 | #' @param netPath (char): vector of path to network files; file suffix
 9 | #' should be '_cont.txt' 
10 | #' networks should be in format: A B 1
11 | #' where A and B are nodes, and 1 indicates an edge between them
12 | #' @param outFile (char) path to .sif file 
13 | #' @param netSfx (char) suffix for network file name
14 | #' @return No value. Side effect of writing all networks to \code{outFile}
15 | #' @examples
16 | #' netDir <- system.file("extdata","example_nets",package="netDx")
17 | #' netFiles <- paste(netDir,dir(netDir,pattern='txt$'),
18 | #'	sep=getFileSep())
19 | #' writeNetsSIF(netFiles,'merged.sif',netSfx='.txt')
20 | #' @export
21 | writeNetsSIF <- function(netPath, 
22 | 	outFile=paste(tempdir(),"out.sif",sep=getFileSep()),
23 | 	netSfx = "_cont.txt") {
24 |     if (.Platform$OS.type=="unix") {
25 | 	if (file.exists(outFile)) unlink(outFile)
26 | 	file.create(outFile)
27 |     } 
28 |     for (n in netPath) {
29 |         netName <- sub(netSfx, "", basename(n))
30 |         message(sprintf("%s\n", netName))
31 |         
32 |         dat <- read.delim(n, sep = "\t", header = FALSE, as.is = TRUE)
33 |         dat2 <- cbind(dat[, 1], netName, dat[, 2])
34 |         
35 |         write.table(dat2, file = outFile, append = TRUE, sep = "\t", 
36 | 						col.names = FALSE, 
37 |             row.names = FALSE, quote = FALSE)
38 |     }
39 |     
40 | }
41 | 


--------------------------------------------------------------------------------
/R/callFeatSel.R:
--------------------------------------------------------------------------------
 1 | #' Return feature selected nets based on given criteria
 2 | #'
 3 | #' @details given the output of genNetScores.R and criteria for defining
 4 | #' feature-selected (FS) nets, returns subset of nets that pass criteria.
 5 | #' Net must score <fsCutoff> for at least <fsPctPass> % of splits, to be
 6 | #' considered feature-selected.
 7 | #' @param netScores (matrix) matrix of net scores
 8 | #' @param fsCutoff (integer) net must score at least this much in a split to
 9 | #'  'pass' the threshold
10 | #' @param fsPctPass (numeric 0 to 1) net must pass at least this percent of
11 | #' splits to be considered feature-selected
12 | #' @return (char) names of nets that pass feature-selection
13 | #' @examples
14 | #' data(featScores)
15 | #' passed <- lapply(featScores, function(x) {
16 | #'    callFeatSel(x,10,0.7) # score 10/10 in >=70% of trials
17 | #' })
18 | #' print(passed)
19 | #' @export
20 | callFeatSel <- function(netScores, fsCutoff, fsPctPass) {
21 |     fs_nets <- c()
22 |     for (index in seq_len(nrow(netScores))) {
23 |         cur_pathway <- netScores[index, ]
24 |         pass_thresh <- length(which(cur_pathway >= fsCutoff))
25 |         percent_pass <- pass_thresh/length(cur_pathway)
26 |         if (percent_pass >= fsPctPass) {
27 |             fs_nets <- c(fs_nets, netScores[, 1][index])
28 |         }
29 |     }
30 |     return(fs_nets)
31 | }
32 | 


--------------------------------------------------------------------------------
/R/cleanPathwayName.R:
--------------------------------------------------------------------------------
 1 | #' Clean pathway name so it can be a filename.
 2 | #' 
 3 | #' @param curP (char) pathway name
 4 | #' @export
 5 | #' @examples
 6 | #' cleanPathwayName('7-(3-AMINO-3-CARBOXYPROPYL)-WYOSINE BIOSYNTHESIS%HUMANC')
 7 | #' @return (char) Cleaned pathway name
 8 | cleanPathwayName <- function(curP) {
 9 |     pforfile <- gsub(" ", "_", curP)
10 |     pforfile <- gsub("<", "_", pforfile)
11 |     pforfile <- gsub(">", "_", pforfile)
12 |     pforfile <- gsub("\\(", "_", pforfile)
13 |     pforfile <- gsub("\\)", "_", pforfile)
14 |     pforfile <- gsub("&", "_", pforfile)
15 |     pforfile <- gsub(";", "_", pforfile)
16 |     pforfile <- gsub(":", "_", pforfile)
17 |     pforfile <- gsub("\\/", "_", pforfile)
18 |     pforfile <- gsub("\\\xec", "X", pforfile)
19 |     pforfile <- gsub("\\\xc2\\\xa0", "_", pforfile)
20 |     pforfile <- gsub("\\\xa0", "X", pforfile)
21 |     pforfile <- gsub("\\\xca", "_", pforfile)
22 |     pforfile <- gsub("\\+", "plus", pforfile)
23 |     pforfile <- gsub(",",".",pforfile)
24 |     
25 |     return(pforfile)
26 | }
27 | 


--------------------------------------------------------------------------------
/R/compileFeatureScores.R:
--------------------------------------------------------------------------------
 1 | #' Tally the score of networks through cross-validation
 2 | #'
 3 | #' @param fList (char) Vector of paths to GeneMANIA NRANK files
 4 | #' @param filter_WtSum (numeric between 5-100) Limit to top-ranked 
 5 | #' networks such that cumulative weight is less than this parameter. 
 6 | #' e.g. If filter_WtSum=20, first order networks by decreasing weight; 
 7 | #' then keep those whose cumulative weight <= 20.
 8 | #' @param verbose (logical) print messages
 9 | #' @return (data.frame) Feature name and score; includes features that occur
10 | #' at least once in \code{fList}.
11 | #' @examples
12 | #' netDir <- system.file("extdata","GM_NRANK",package="netDx")
13 | #' netFiles <- sprintf('%s/%s', netDir,dir(netDir,pattern='NRANK$'))
14 | #' pTally <- compileFeatureScores(netFiles,verbose=TRUE)
15 | #' print(head(pTally))
16 | #' @export
17 | compileFeatureScores <- function(fList, filter_WtSum = 100, verbose = FALSE) {
18 |     
19 |     if (filter_WtSum < 5) {
20 |         message("filter_WtSum cannot be < 5 ; setting to 5")
21 |         filter_WtSum <- 5
22 |     }
23 |     
24 |     pathwayTally <- list()
25 |     ctr <- 1
26 |     for (fName in fList) {
27 |         tmp <- basename(fName)
28 |         
29 |         try(
30 | 		dat <- read.delim(fName, sep = "\t", header = TRUE, 
31 | 		as.is = TRUE, skip = 1),silent = TRUE)
32 |         ctr <- ctr + 1
33 |         
34 |         if (!inherits(dat, "try-error")) {
35 |             # file not empty - continue
36 |             if (verbose) {
37 |                 message("Net weight distribution:")
38 |                 print(summary(dat$Weight))
39 |             }
40 |             
41 |             # actually - it should already be sorted in decreasing 
42 | 		# order if we don't reverse
43 |             # it above - but let's sort anyway
44 |             dat <- dat[order(dat$Weight, decreasing = TRUE), ]
45 |             
46 |             cs <- cumsum(dat$Weight)
47 |             keep_max <- which.min(abs(cs - filter_WtSum))
48 |             
49 |             dat <- dat[seq_len(keep_max), ]
50 |             if (verbose) {
51 |                 message(sprintf(paste("filter_WtSum = %1.1f; ",
52 | 									"%i of %i networks left",sep=""),
53 | 									filter_WtSum, nrow(dat), length(cs)))
54 | 						}
55 |             
56 |             # put all Network names in pathwaytally. The ones that 
57 | 						# are above threshold (Top pathways) get +1
58 |             for (k in dat$Network) {
59 |                 if (!k %in% names(pathwayTally)) 
60 |                   pathwayTally[[k]] <- 0
61 |                 pathwayTally[[k]] <- pathwayTally[[k]] + 1
62 |             }
63 |             
64 |         }
65 |     }
66 |     out <- unlist(pathwayTally)
67 |     out <- sort(out, decreasing = TRUE)
68 |     out <- data.frame(name = names(out), score = as.integer(out),
69 | 		stringsAsFactors=FALSE)
70 |     out[, 2] <- as.integer(as.character(out[, 2]))
71 |     
72 |     out
73 | }
74 | 


--------------------------------------------------------------------------------
/R/countPatientsInNet.R:
--------------------------------------------------------------------------------
 1 | #' Count number of patients in a network
 2 | #' 
 3 | #' @details This functionality is needed to count patient overlap when 
 4 | #' input data is in a form that results in highly missing data, rather than
 5 | #' when the same measures are available for almost all patients. An example
 6 | #' application is when patient networks are based on unique genomic events
 7 | #' in each patients (e.g. CNVs or indels), rather than 'full-matrix' data
 8 | #' (e.g. questionnaires or gene expression matrices). The former scenario
 9 | #' requires an update in the list of eligible networks each time some type
10 | #' of patient subsetting is applied (e.g. label enrichment, or train/test
11 | #' split). A matrix with patient/network membership serves as a lookup
12 | #' table to prune networks as feature selection proceeds
13 | #' @param netDir (char) dir with network set
14 | #' @param fList (char) filenames of interaction networks to count in
15 | #' @param ids (char) patient IDs to look for
16 | #' @return (matrix) Size P by N, where P is num patients and N is 
17 | #' number of networks networks; a[i,j] =1 if patient i in network j, else 0
18 | #' @examples
19 | #' d <- tempdir()
20 | #' pids <- paste("P",1:5,sep="")
21 | #' m1 <- matrix(c("P1","P1","P2","P2","P3","P4",1,1,1),
22 | #' 	byrow=FALSE,ncol=3)
23 | #' write.table(m1,
24 | #'	file=paste(d,"net1.txt",sep=getFileSep()),sep="\t",
25 | #'	col.names=FALSE,row.names=FALSE,quote=FALSE)
26 | #' m2 <- matrix(c("P3","P4",1),nrow=1)
27 | #' write.table(m2,
28 | #'	file=paste(d,"net2.txt",sep=getFileSep()),sep="\t",
29 | #'	col.names=FALSE,row.names=FALSE,quote=FALSE)
30 | #' x <- countPatientsInNet(d,c("net1.txt","net2.txt"), pids)
31 | #' @export
32 | countPatientsInNet <- function(netDir, fList, ids) {
33 |     
34 |     outmat <- matrix(0, nrow = length(ids), ncol = length(fList))
35 |     colnames(outmat) <- fList
36 |     rownames(outmat) <- ids
37 |     
38 |     ctr <- 1
39 |     for (f in fList) {
40 |         dat <- read.delim(paste(netDir,f,sep=getFileSep()),
41 | 		sep = "\t", header = FALSE, as.is = TRUE)
42 |         memb <- c(dat[, 1], dat[, 2])  # patients in this network
43 |         outmat[which(ids %in% memb), ctr] <- 1
44 |         
45 |         ctr <- ctr + 1
46 |     }
47 |     
48 |     return(outmat)
49 | }
50 | 


--------------------------------------------------------------------------------
/R/dataList2list.R:
--------------------------------------------------------------------------------
 1 | #' Convert MultiAssayExperiment object to list and data.frame
 2 | #' 
 3 | #' @details Used by internal routines in netDx
 4 | #' @param dat (MultiAssayExperiment) Patient data and metadata
 5 | #' @param groupList (list) variable groupings used for feature construction. See groupList arg in buildPredictor().
 6 | #' @return (list) Keys are:
 7 | #' 1) assays: list of matrices, each corresponding to data from a particular
 8 | #' layer
 9 | #' 2) pheno: (data.frame) sample metadata 
10 | #' @examples
11 | #' data(xpr,pheno)
12 | #' require(MultiAssayExperiment)
13 | #' objlist <- list("RNA"=SummarizedExperiment(xpr))
14 | #' mae <- MultiAssayExperiment(objlist,pheno)
15 | #' groupList <- list(RNA=rownames(xpr))
16 | #' dl <- dataList2List(mae,groupList)
17 | #' summary(dl) 
18 | #' @export
19 | dataList2List <- function(dat,groupList) {
20 | 
21 | # convert assays to list of matrices, replacing assay-specific sample
22 | # name with patient ID
23 | exprs <- experiments(dat)
24 | datList2 <- list()
25 | for (k in seq_len(length(exprs))) {
26 | 	tmp <- exprs[[k]]
27 | 	df <- sampleMap(dat)[
28 | 			which(sampleMap(dat)$assay==names(exprs)[k]),]
29 | 
30 | 	colnames(tmp) <- df$primary[match(df$colname,colnames(tmp))]
31 | 	if ("SimpleList" %in% class(tmp)){
32 | 		tmp <- as.matrix(assays(tmp)[[1]]) # convert to matrix
33 | 	} else if ("SummarizedExperiment" %in% class(tmp)){
34 | 		tmp <- as.matrix(assays(tmp)[[1]])
35 | 	}
36 | 	datList2[[names(exprs)[k]]]<- tmp	
37 | }
38 | 
39 | if ("clinical" %in% names(groupList)) {
40 | 	tmp <- colData(dat)
41 | 	vars <- unique(unlist(groupList[["clinical"]]))
42 | 	datList2[["clinical"]] <- t(as.matrix(tmp[,vars,drop=FALSE]))
43 | }
44 | 
45 | pheno_all <- colData(dat)
46 | pheno_all <- as.data.frame(pheno_all)
47 | 
48 | out <- list(
49 | 	assays=datList2,
50 | 	pheno=pheno_all)
51 | }
52 | 


--------------------------------------------------------------------------------
/R/getCorrType.R:
--------------------------------------------------------------------------------
 1 | #' Counts the relative correlation of (+,+) and (+,-)(-,-) interactions
 2 | #' 
 3 | #' @param inFile (character): path to interaction networks
 4 | #' @param plusID (character) vector of + nodes
 5 | #' @param minusID (character) vector of - nodes
 6 | #' @return (numeric) mean edge weight for (+,+) and other edges
 7 | getCorrType <- function(inFile, plusID, minusID) {
 8 |     dat <- read.delim(inFile, sep = "\t", header = FALSE, as.is = TRUE)
 9 |     pp_idx <- dat[, 1] %in% plusID & dat[, 2] %in% plusID
10 |     pp_corr <- mean(dat[pp_idx, 3])
11 |     pm_corr <- mean(dat[setdiff(seq_len(nrow(dat)), pp_idx), 3])
12 |     
13 |     return(c(pp_corr, pm_corr))
14 | }
15 | 


--------------------------------------------------------------------------------
/R/getEmapInput_many.R:
--------------------------------------------------------------------------------
 1 | #' Wrapper to generate multiple EnrichmentMaps (perhaps one per class)
 2 | #'
 3 | #' @param featScores (list) keys are classes, and values are data.frames of
 4 | #' network scores across cross-validation (output of getFeatScores()).
 5 | #' @param namedSets_valid (list) Grouped unit variables limited to the
 6 | #' units contained in the dataset. e.g. keys are pathways and values are
 7 | #' the genes measured in this dataset.
 8 | #' e.g.:
 9 | #' $`MISSPLICED_GSK3BETA_MUTANTS_STABILIZE_BETA-CATENIN`
10 | #' [1] 'PPP2R5E' 'PPP2CB'  'APC'     'AXIN1'   'PPP2R1B' 'PPP2R1A' 'CSNK1A1'
11 | #' [8] 'PPP2R5D' 'PPP2R5C' 'PPP2R5B' 'PPP2R5A' 'PPP2CA'  'GSK3B'
12 | #' @param netTypes (data.frame) 'inputNets.txt' file
13 | #' generated by NetDx. Dataframe has two columns, network type and
14 | #' network  name. I.E:
15 | #'  clinical                                          clinical
16 | #'       rna GUANOSINE_NUCLEOTIDES__I_DE_NOVO__I__BIOSYNTHESIS
17 | #'       rna                              RETINOL_BIOSYNTHESIS
18 | #' @param outDir (char) path to output directory
19 | #' @param ... parameters for getEMapInput()
20 | #' @examples
21 | #' data(featScores)
22 | #' 
23 | #' pathwayList <- readPathways(fetchPathwayDefinitions("October",2020))
24 | #' pathwayList <- pathwayList[seq_len(5)]
25 | #' 
26 | #' netInfoFile <- system.file("extdata","example_output/inputNets.txt",package="netDx")
27 | #' netTypes <- read.delim(netInfoFile,sep='\t',h=FALSE,as.is=TRUE)
28 | #' outDir <- paste(tempdir(),'plots',sep='/')
29 | #' if (!file.exists(outDir)) dir.create(outDir)
30 | #' EMap_input <- getEMapInput_many(featScores,pathwayList,
31 | #'      netTypes,outDir=outDir)
32 | #' @return (list) of length g, where g is the number of groups in featScores.
33 | #' Values are lists, corresponding to the output of getEmapInput.R
34 | #' @export
35 | getEMapInput_many <- function(featScores, namedSets_valid, netTypes, 
36 | 		outDir, ...) {
37 |     
38 |     out <- list()
39 |     for (gp in names(featScores)) {
40 |         cur_out_files <- getEMapInput(featScores[[gp]], namedSets_valid, 
41 | 					netTypes,  ...)
42 |         out[[gp]] <- cur_out_files
43 |         
44 |     }
45 |     return(out)
46 | }
47 | 


--------------------------------------------------------------------------------
/R/getNetConsensus.R:
--------------------------------------------------------------------------------
 1 | #' compile net score across a set of predictor results
 2 | #'
 3 | #' @details used to compare how individual nets score for different
 4 | #' predictor configurations
 5 | #' @param scorelist (list) key is dataset name, value is a data.frame 
 6 | #' containing PATHWAY_NAME and SCORE. This is the output of
 7 | #'  compileFeatureScores()
 8 | #' @return (data.frame) Rownames are union of all nets in the input list.
 9 | #' Columns show net scores for each key of the input list. Where a 
10 | #' net is not found in a given list, it is assigned the value of NA
11 | #' @examples
12 | #' pathways <- paste("PATHWAY_",1:100,sep="")
13 | #' highrisk <- list()
14 | #' for (k in 1:10) { 
15 | #' 	highrisk[[k]] <- data.frame(PATHWAY_NAME=pathways, 
16 | #'		SCORE=runif(length(pathways),min=0,max=10),
17 | #'				stringsAsFactors=FALSE);
18 | #' }
19 | #' names(highrisk) <- sprintf("Split%i",1:length(highrisk))
20 | #' x <- getNetConsensus(highrisk)
21 | #' @export
22 | getNetConsensus <- function(scorelist) {
23 |     out <- scorelist[[1]]
24 |     colnames(out)[2] <- names(scorelist)[1]
25 |     for (k in 2:length(scorelist)) {
26 |         x <- merge(x = out, y = scorelist[[k]], by = "PATHWAY_NAME", 
27 | 						all.x = TRUE, all.y = TRUE)
28 |         colnames(x)[k + 1] <- names(scorelist)[k]
29 |         out <- x
30 |     }
31 |     
32 |     out
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/R/getRegionOL.R:
--------------------------------------------------------------------------------
 1 | #' Returns overlapping named ranges for input ranges
 2 | #'
 3 | #' @details Given a set of query GRanges, and a subject list-of-GRanges,
 4 | #' updates the query with a column 'LOCUS_NAMES' containing the names of
 5 | #' ranges overlapped by the query. One application is to map structural
 6 | #' variants, such as CNVs, to genes in pathways of interest. In this 
 7 | #' scenario \code{gr} would contain the patient CNVs, and \code{rngList}
 8 | #' would be a list of GenomicRanges objects, one per cellular pathway.
 9 | #' @param gr (GRanges) query ranges
10 | #' @param rngList (list) keys are names, and values are GRanges, each range
11 | #' of which has a name (in 'name' column). Note: It is faster to provide
12 | #' a list of length 1 ; if the list is long, combining into a single GRanges
13 | #' object could prove slow.
14 | #' @return (GRanges) query ranges with the added column 'LOCUS_NAMES'. 
15 | #' Where a range overlaps with multiple loci, the names are reported as a 
16 | #' comma-separated vector
17 | #' @examples
18 | #' data(cnv_GR,pathway_GR)
19 | #' x <- getRegionOL(cnv_GR,pathway_GR)
20 | #' @export
21 | #' @importFrom GenomeInfoDb seqlevels seqlevels<-
22 | #' @importFrom GenomicRanges GRanges
23 | #' @importFrom S4Vectors queryHits subjectHits
24 | getRegionOL <- function(gr, rngList) {
25 |     rng <- GRanges()
26 |     for (k in seq_len(length(rngList))) {
27 |         cur <- rngList[[k]]
28 |         seqlevels(rng) <- unique(c(seqlevels(rng), seqlevels(cur)))
29 |         rng <- c(rng, cur)
30 |     }
31 |     
32 |     tmp <- as.character(seqlevels(gr))
33 |     rng <- rng[which(as.character(seqnames(rng)) %in% tmp)]
34 |     seqlevels(rng) <- seqlevels(gr)
35 |     
36 |     ol <- findOverlaps(gr, rng)
37 |     ol <- cbind(queryHits(ol), subjectHits(ol))
38 |     
39 |     # could be made more efficient.
40 |     ol_nm <- rng$name[ol[, 2]]
41 |     LOCUS_NAMES <- rep("", length(gr))
42 |     t0 <- Sys.time()
43 |     for (k in unique(ol[, 1])) {
44 |         idx <- which(ol[, 1] == k)
45 |         LOCUS_NAMES[k] <- paste(unique(ol_nm[idx]), collapse = ",")
46 |     }
47 |     print(Sys.time() - t0)
48 |     gr$LOCUS_NAMES = LOCUS_NAMES
49 |     
50 |     gr
51 | }
52 | 


--------------------------------------------------------------------------------
/R/getSimilarity.R:
--------------------------------------------------------------------------------
 1 | #' Measures of patient similarity
 2 | #' 
 3 | #' @param x (matrix) matrix for which pairwise patient similarity is to be
 4 | #' computed. Expects one column per patient, and one measurement per row.
 5 | #' @param type (character) name of similarity measure. Currently supports 
 6 | #' Pearson correlation ('pearson') or a custom measure ('custom')
 7 | #' @param customFunc (function) custom similarity function. Only used when 
 8 | #' \code{type='custom'}. The function takes \code{x} as first argument and 
 9 | #' can take additional argument. It should return a symmetric matrix of 
10 | #' pairwise patient similarities.
11 | #' @param ... parameter for customFunc
12 | #' @return symmetric matrix of size N, where N is number of samples
13 | #' @examples
14 | #' data(xpr) 
15 | #' x <- getSimilarity(xpr) # similarity by Pearson corr
16 | #' mySim <- function(x) cor(x,method='kendall')
17 | #' x <- getSimilarity(xpr,customFunc=mySim) # custom similarity
18 | #' @importFrom stats cor
19 | #' @export
20 | getSimilarity <- function(x, type = "pearson", customFunc, ...) {
21 |     switch(type, pearson = round(cor(na.omit(x), method = "pearson"), 
22 | 			digits = 3), custom = customFunc(x, ...))
23 | }
24 | 


--------------------------------------------------------------------------------
/R/globals.R:
--------------------------------------------------------------------------------
1 | 
2 | # curSet - makePSN_NamedMatrix.R
3 | # spos - makePSN_RangedSets.R
4 | # groupList - dataList2List.R
5 | globalVariables(c("curSet","spos","groupList"))
6 | 


--------------------------------------------------------------------------------
/R/makeQueries.R:
--------------------------------------------------------------------------------
 1 | #' Randomly select patients for queries for feature selection
 2 | #' 
 3 | #' @param incPat (char) vector of patient IDs to be included in query
 4 | #' @param featScoreMax (integer) Number of times to run query, usually equal 
 5 | #' to the max score for features in the design (e.g. if featScoreMax=10, then 
 6 | #' this value is 10).
 7 | #' @param verbose (logical) print messages
 8 | #' @return (list) of length \code{featScoreMax}, containing names of patients 
 9 | #' in query file for each fold
10 | #' @examples 
11 | #' data(pheno)
12 | #' x <- makeQueries(pheno$ID)
13 | #' @export
14 | makeQueries <- function(incPat, featScoreMax = 10L, verbose = TRUE) {
15 |     
16 |     # randomly reorder for N-fold partitioning.
17 |     incPat <- sample(incPat, replace = FALSE)
18 |     # num in query file
19 |     num2samp <- floor(((featScoreMax - 1)/featScoreMax) * length(incPat))
20 |     # num to retrieve from GM database in each iteration
21 |     csize <- round((1/featScoreMax) * length(incPat))
22 |     
23 |     if (verbose) {
24 |         message(sprintf("\t\t%i IDs; %i queries (%i sampled, %i test)", 
25 | 						length(incPat), 
26 |             featScoreMax, num2samp, csize))
27 |     }
28 |     
29 |     out <- list()
30 |     for (k in seq_len(featScoreMax)) {
31 |         sidx <- ((k - 1) * csize) + 1
32 |         eidx <- k * csize
33 |         if (k == featScoreMax) 
34 |             eidx <- length(incPat)
35 |         p1 <- sprintf("\t\tQ%i: %i test; ", k, eidx - sidx + 1)
36 |         
37 |         out[[k]] <- setdiff(incPat, incPat[sidx:eidx])
38 |         if (verbose) 
39 |             message(sprintf("%s %i query", p1, length(out[[k]])))
40 |     }
41 |     
42 |     out
43 | }
44 | 


--------------------------------------------------------------------------------
/R/makeSymmetric.R:
--------------------------------------------------------------------------------
 1 | #' Convert a network in source-target-weight format to symmetric matrix
 2 | #'
 3 | #' @details A common format for network representation is to use a three
 4 | #' column table listing source node, target node, and weight.  
 5 | #' This is the format netDx uses for network integration and visualization
 6 | #' in Cytoscape. However, some functionality requires a square symmetric
 7 | #' adjacency matrix. This function takes as input the three-column format
 8 | #' and converts to the adjacency matrix. 
 9 | #' NOTE: Symmetric attribute is assumed, and the function automatically sets
10 | #' a[i,j] = a[j,i]. Diagonal is assumed to have value of 1.0. Finally
11 | #' missing edges will be assigned NA values.
12 | #' @param x (data.frame) three columns, with source node, target node, and 
13 | #' edge weight. Entries must include universe of nodes; those with missing
14 | #' edges must be included as having edge weight NA
15 | #' @param verbose (logical) print messages
16 | #' @return (matrix) symmetric adjacency matrix
17 | #' @examples
18 | #' src <- c("A","B"); tgt <- c("C","C")
19 | #' cur <- data.frame(source=src,target=tgt,weight=c(0.3,0.8))
20 | #' makeSymmetric(cur)
21 | #' @export
22 | makeSymmetric <- function(x,verbose=FALSE) {
23 | samps <- unique(c(x[,1],x[,2]))
24 | newmat <- matrix(NA, nrow=length(samps),ncol=length(samps))
25 | rownames(newmat) <- samps
26 | colnames(newmat) <- samps
27 | i <- 1
28 | for (k in samps) {
29 | 	idx <- which(x[,1] == k)
30 | 	if (verbose) message(k)
31 | 	for (curr in idx) {
32 | 		#message(paste("\t",x[curr,2]))
33 | 		j <- which(colnames(newmat) == x[curr,2])
34 | 		newmat[i,j] <- x[curr,3]
35 | 		newmat[j,i] <- x[curr,3]
36 | 	}
37 | 	i <- i+1
38 | }
39 | 
40 | diag(newmat) <- 1
41 | return(newmat)
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/R/mapNamedRangesToSets.R:
--------------------------------------------------------------------------------
 1 | #' Map named ranges to corresponding set of named ranges
 2 | #'
 3 | #' @details Example application is when we have named ranges each
 4 | #' corresponding to genes or regulatory elements, and we wish to group
 5 | #' these ranges based on metabolic pathway.
 6 | #' @param gr (GRanges) named ranged to be grouped
 7 | #' @param rangeList (list) sets of range names
 8 | #' @param verbose (logical) print detailed messages
 9 | #'
10 | #' @return RangeList. keys are names of \code{rangeList}, values are GRanges
11 | #' @importFrom GenomicRanges GRanges
12 | #' @importFrom IRanges IRanges
13 | #' @examples 
14 | #' data(genes,pathwayList); 
15 | #' gene_GR<-GenomicRanges::GRanges(genes$chrom,
16 | #'   IRanges::IRanges(genes$txStart,genes$txEnd),
17 | #' 		name=genes$name2)
18 | #' path_GRList <- mapNamedRangesToSets(gene_GR,pathwayList)
19 | #' @export
20 | mapNamedRangesToSets <- function(gr, rangeList, verbose = FALSE) {
21 |     out <- list()
22 |     for (nm in names(rangeList)) {
23 |         my_gr <- gr[which(gr$name %in% rangeList[[nm]])]
24 |         if (verbose) 
25 |             message(sprintf("%s: %i ranges\n", nm, length(my_gr)))
26 |         out[[nm]] <- my_gr
27 |     }
28 |     out
29 | }
30 | 


--------------------------------------------------------------------------------
/R/matrix_getIJ.R:
--------------------------------------------------------------------------------
 1 | #' Converts matrix index (1 to m*n) to row (m) and column (n) number
 2 | #'
 3 | #' @param dimMat (integer vector of length 2) output of \code{dim()} for
 4 | #' matrix in question
 5 | #' @param idx (integer vector of length n) matrix indices
 6 | #' @return (matrix) n-by-2, first column has row indices ; second column
 7 | #' has col indices
 8 | matrix_getIJ <- function(dimMat, idx) {
 9 |     nr <- dimMat[1]
10 |     nc <- dimMat[2]
11 |     
12 |     out <- matrix(NA, nrow = length(idx), ncol = 2)
13 |     out[, 1] <- idx%%nr
14 |     if (any(out[, 1] %in% 0)) {
15 |         out[which(out[, 1] %in% 0)] <- nr
16 |     }
17 |     
18 |     out[, 2] <- ceiling(idx/nr)
19 |     
20 |     out
21 | }
22 | 


--------------------------------------------------------------------------------
/R/moveInteractionNets.R:
--------------------------------------------------------------------------------
 1 | #' moves interaction networks when compiling database for sparse genetic
 2 | #' workflow
 3 | #'
 4 | #' @param netDir (char) source directory
 5 | #' @param outDir (char) target directory
 6 | #' @param pheno (data.frame) contains patient ID and STATUS
 7 | #' @param fileSfx (char) suffix to strip from network file names before
 8 | #' registering in metadata tables
 9 | #' @return No value. Side effect of moving interaction nets to target
10 | #' directory and creating network-related metadata files used to compile
11 | #' feature database
12 | #' @importFrom utils write.table
13 | moveInteractionNets <- function(netDir,outDir,pheno,fileSfx="_cont.txt") {
14 | netList <- dir(path=netDir,pattern=fileSfx)
15 | 	netID <- data.frame(ID = seq_len(length(netList)),
16 |                 name = netList, ID = seq_len(length(netList)),
17 |         name2 = netList, 0, 1, stringsAsFactors = TRUE)	
18 | 		dir.create(paste(netDir,"INTERACTIONS",sep=getFileSep()))
19 |         for (p in netList) {
20 | 			dat <- read.delim(paste(netDir,p,sep=getFileSep()),
21 | 				sep="\t",
22 | 				header=FALSE,as.is=TRUE)
23 | 			dat2 <- dat
24 | 			dat2[,1] <- pheno$INTERNAL_ID[match(dat[,1],pheno$ID)]
25 | 			dat2[,2] <- pheno$INTERNAL_ID[match(dat[,2],pheno$ID)]
26 | 			write.table(dat2,
27 | 				file=paste(netDir,"INTERACTIONS",
28 | 					sprintf("1.%i.txt",netID$ID[which(netID$name == p)]),sep=getFileSep()),
29 | 				sep="\t",col.names=FALSE,row.names=FALSE,quote=FALSE)
30 |         }
31 |     
32 |     # write NETWORKS.txt
33 |     write.table(netID, file = paste(netDir,"NETWORKS.txt",sep=getFileSep()),
34 | 			sep = "\t", col.names = FALSE, 
35 |         row.names = FALSE, quote = FALSE)
36 |     
37 |     # write NETWORK_GROUPS.txt
38 | 	con <- file(paste(netDir,"NETWORK_GROUPS.txt", sep=getFileSep()), "w")
39 |     write(paste(1, "dummy_group", "geneset_1", "dummy_group", 1, sep = "\t"),
40 | 	file = con)
41 |     close(con)
42 |     
43 |     con <- file(paste(netDir,"NETWORK_METADATA.txt",sep=getFileSep()), "w")
44 |     tmp <- paste(netID$ID, "", "", "", "", "", "", "", 
45 | 			"", "", 0, "", "", 0, "", 
46 |         "", "", "", "", sep = "\t")
47 |     write.table(tmp, file = con, sep = "\t", col.names = FALSE, 	
48 | 			row.names = FALSE, 
49 |         quote = FALSE)
50 |     close(con)
51 | }
52 | 


--------------------------------------------------------------------------------
/R/perfCalc.R:
--------------------------------------------------------------------------------
 1 | #' Computes variety of predictor evaluation measures based on the confusion
 2 | #' matrix
 3 | #'
 4 | #' @param dat (data.frame): 5 columns: score, tp, fp, tn, fn. 
 5 | #' One row per cutoff
 6 | #' score for feature selection
 7 | #' @return (list)
 8 | #' stats (data.frame): score, f1, ppv, precision and recall. One row
 9 | #' per cutoff for feature selection
10 | #' auc (numeric between 0 and 1): AUC of overall ROC curve
11 | #' prauc (numeric between 0 and 1): AUC of overall precision-recall curve
12 | #' @importFrom pracma trapz
13 | #' @examples
14 | #' data(confmat)
15 | #' x <- perfCalc(confmat)
16 | #' @export
17 | perfCalc <- function(dat) {
18 |     dat <- na.omit(dat)
19 |     # F1 - harmonic mean of precision recall resolves to the formula below
20 |     tp2 <- 2 * dat$tp
21 |     f1 <- tp2/(tp2 + dat$fp + dat$fn)
22 |     
23 |     # precision recall curve
24 |     
25 |     # precision = positive predictive value (pr = ppv)
26 |     ppv <- dat$tp/(dat$tp + dat$fp)
27 |     rec <- dat$tp/(dat$tp + dat$fn)
28 |     # trapz integrates from right to left, so you need to apply rev() 
29 | 		# otherwise you
30 | 
31 |     # get a negative area.
32 |     prauc <- pracma::trapz(rev(rec), rev(ppv))
33 |     
34 |     # roc auc
35 |     x <- dat$fp/(dat$fp + dat$tn)
36 |     y <- dat$tp/(dat$tp + dat$fn)
37 |     
38 |     x <- c(0, rev(x), 1)
39 |     y <- c(0, rev(y), 1)
40 |     auc <- pracma::trapz(x, y)
41 |     out <- data.frame(score = dat$score, ppv = ppv, f1 = f1, rec = rec)
42 |     
43 |     return(list(stats = out, auc = auc, prauc = prauc))
44 | }
45 | 


--------------------------------------------------------------------------------
/R/plotPerf_simple.R:
--------------------------------------------------------------------------------
 1 | #' performance metrics for model
 2 | #' @param res (data.frame) result from predicting labels on held-out test set. output of predict() function. 
 3 | #' columns include ID, STATUS (ground truth) and PRED_CLASS (predicted label)
 4 | #' @param predClasses (character) patient labels used by classifier
 5 | #' @return (list)
 6 | #' 1) rocCurve: ROCR performance object for ROC curve
 7 | #' 2) prCurve: ROCR performance object for PR curve
 8 | #' 3) auroc: Area under ROC curve
 9 | #' 4) aupr: Area under PR curve
10 | #' 5) accuracy: Accuracy
11 | #' @import ROCR
12 | #' @export
13 | getPerformance <- function(res, predClasses) {
14 | 
15 |   # given output of performance('precall') compute AUC-PR
16 |   prauc <- function(res) {
17 |     x <- res@x.values[[1]] # recall
18 |     y <- res@y.values[[1]] # precision
19 | 
20 |     # remove NAN
21 |     idx <- which(is.nan(y))
22 |     if (any(idx)) {
23 |       x <- x[-idx]
24 |       y <- y[-idx]
25 |     }
26 | 
27 |     pracma::trapz(x, y)
28 |   }
29 | 
30 |   pred_col1 <- sprintf("%s_SCORE", predClasses[1])
31 |   pred_col2 <- sprintf("%s_SCORE", predClasses[2])
32 | 
33 |   idx1 <- which(colnames(res) == pred_col1)
34 |   idx2 <- which(colnames(res) == pred_col2)
35 |   pred <- ROCR::prediction(res[, idx1] - res[, idx2],
36 |             res$STATUS == predClasses[1])
37 | 
38 |   st <- res$STATUS
39 |   c1 <- predClasses[1]
40 |   tp <- sum(res$STATUS == res$PRED_CLASS & res$STATUS == c1)
41 |   tn <- sum(res$STATUS == res$PRED_CLASS & res$STATUS != c1)
42 |   fp <- sum(res$STATUS != res$PRED_CLASS & res$STATUS != c1)
43 |   fn <- sum(res$STATUS != res$PRED_CLASS & res$STATUS == c1)
44 |   # entire curves
45 |   curRoc <- ROCR::performance(pred, "tpr", "fpr")
46 |   curPr <- ROCR::performance(pred, "prec", "rec")
47 |   tmp <- data.frame(score = 0, tp = tp, tn = tn, fp = fp, fn = fn)
48 | 
49 |   # statistic
50 |   auroc <- ROCR::performance(pred, "auc")@y.values[[1]]
51 |   aupr <- prauc(curPr)
52 |   corr <- sum(res$STATUS == res$PRED_CLASS)
53 |   acc <- (corr/nrow(res))*100
54 | 
55 |   return(list(rocCurve=curRoc,prCurve=curPr,auroc=auroc,aupr=aupr,accuracy=acc))
56 | }


--------------------------------------------------------------------------------
/R/predictPatientLabels.R:
--------------------------------------------------------------------------------
 1 | #' assign patient class when ranked by multiple GM predictors
 2 | #'
 3 | #' @param resSet (list) output of getPatientRankings, each key for a different
 4 | #' predictor. names(resSet) contain predictor label
 5 | #' @param verbose (logical) print detailed messages
 6 | #' @return data.frame: ID, similarityScore, PRED_CLASS
 7 | #' @examples 
 8 | #' data(predRes); predClass <- predictPatientLabels(predRes)
 9 | #' @export
10 | predictPatientLabels <- function(resSet, verbose = TRUE) {
11 |     type_rank <- NULL
12 |     for (k in seq_len(length(resSet))) {
13 |         x <- resSet[[k]]$fullmat
14 |         idx <- which(colnames(x) == "GM_score")
15 |         if (any(idx)) 
16 |             colnames(x)[idx] <- "similarityScore"
17 |         if (is.null(type_rank)) 
18 |             type_rank <- x[, c("ID", "similarityScore")] else {
19 |             if (all.equal(x$ID, type_rank$ID) != TRUE) {
20 |                 stop("predictPatientLabels: ids don't match")
21 |             }
22 |             type_rank <- cbind(type_rank, x[, "similarityScore"])
23 |         }
24 |         rnkCol <- paste(names(resSet)[k], "SCORE", sep = "_")
25 |         colnames(type_rank)[ncol(type_rank)] <- rnkCol
26 |     }
27 |     
28 |     na_sum <- rowSums(is.na(type_rank[, -1]))
29 |     if (verbose) {
30 |         if (any(na_sum > 0)) 
31 |             message(sprintf(paste("*** %i rows have an NA prediction ", 
32 | 							"(probably query samples that were not not ranked\n", 
33 |                 sep = ""), sum(na_sum > 0)))
34 |     }
35 |     type_rank <- na.omit(type_rank)
36 |     
37 |     # finally, select the class with the highest rank as the subject label.
38 |     maxScore <- rep(NA, nrow(type_rank))
39 |     for (k in seq_len(nrow(type_rank))) {
40 |         maxScore[k] <- colnames(type_rank)[which.max(type_rank[k, -1]) + 1]
41 |     }
42 |     patClass <- sub("_SCORE", "", maxScore)
43 |     type_rank <- cbind(type_rank, PRED_CLASS = patClass)
44 |     type_rank$PRED_CLASS <- as.character(type_rank$PRED_CLASS)
45 |     
46 |     type_rank
47 | }
48 | 


--------------------------------------------------------------------------------
/R/pruneNetByStrongest.R:
--------------------------------------------------------------------------------
 1 | #' Prune network by retaining strongest edges
 2 | #'
 3 | #' @param net (data.frame)  Network to prune. Columns are: source,target,weight
 4 | #' @param vertices (char) node names. Should match those in net[,1:2]
 5 | #' @param pctX (numeric 0 to 1) Fraction of top/bottom edges to retain
 6 | #' @param useTop (logical) if TRUE prunes to top pctX edges; else
 7 | #' prunes to bottom pctX edges
 8 | #' @return (data.frame) pruned network. Three columns: AliasA, AliasB, and 
 9 | #' weight
10 | #' @importFrom igraph graph_from_data_frame
11 | #' @importFrom igraph delete.edges
12 | #' @importFrom igraph get.edgelist
13 | #' @importFrom igraph edge_attr
14 | #' @importFrom igraph E
15 | #' @export
16 | pruneNet <- function(net,vertices, pctX=0.1, useTop=TRUE) {
17 | 	g <- igraph::graph_from_data_frame(net,vertices=vertices)
18 | 	wt <- sort(E(g)$weight, decreasing=TRUE)
19 | 
20 | 	if (useTop) { # keep topmost edges
21 | 		thresh <- wt[length(wt) * pctX]
22 | 		g2 <- delete.edges(g,which(E(g)$weight < thresh))
23 | 
24 | 	} else { # keep bottom-most edges 
25 | 		thresh <- wt[length(wt) * (1-pctX)]
26 | 		g2 <- delete.edges(g,which(E(g)$weight > thresh))
27 | 	}
28 | 
29 | 	df <- as.data.frame(get.edgelist(g2))
30 | 	df[,1] <- as.character(df[,1])
31 | 	df[,2] <- as.character(df[,2])
32 | 	df$weight <- edge_attr(g2,name="weight")
33 | 	colnames(df) <- c("AliasA","AliasB","weight")
34 | 
35 | return(df)
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/R/pruneNet_pctX.R:
--------------------------------------------------------------------------------
 1 | #' Prune network by retaining strongest edges
 2 | #'
 3 | #' @param net (data.frame)  Network to prune. Columns are: source,target,weight
 4 | #' @param vertices (char) node names. Should match those in net[,1:2]
 5 | #' @param pctX (numeric 0 to 1) Fraction of top/bottom edges to retain
 6 | #' @param useTop (logical) if TRUE prunes to top pctX edges; else
 7 | #' prunes to bottom pctX edges
 8 | #' @return (data.frame) pruned network. Three columns: AliasA, AliasB, and 
 9 | #' weight
10 | #' @importFrom igraph graph_from_data_frame
11 | #' @importFrom igraph delete.edges
12 | #' @importFrom igraph get.edgelist
13 | #' @importFrom igraph edge_attr
14 | #' @importFrom igraph E
15 | #' @export
16 | pruneNet_pctX <- function(net,vertices, pctX=0.1, useTop=TRUE) {
17 | 	g <- igraph::graph_from_data_frame(net,vertices=vertices)
18 | 	wt <- sort(E(g)$weight, decreasing=TRUE)
19 | 
20 | 	if (useTop) { # keep topmost edges
21 | 		thresh <- wt[length(wt) * pctX]
22 | 		g2 <- delete.edges(g,which(E(g)$weight < thresh))
23 | 
24 | 	} else { # keep bottom-most edges 
25 | 		thresh <- wt[length(wt) * (1-pctX)]
26 | 		g2 <- delete.edges(g,which(E(g)$weight > thresh))
27 | 	}
28 | 
29 | 	df <- as.data.frame(get.edgelist(g2))
30 | 	df[,1] <- as.character(df[,1])
31 | 	df[,2] <- as.character(df[,2])
32 | 	df$weight <- edge_attr(g2,name="weight")
33 | 	colnames(df) <- c("AliasA","AliasB","weight")
34 | 
35 | return(df)
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/R/runFeatureSelection.R:
--------------------------------------------------------------------------------
 1 | #' Run GeneMANIA cross-validation with a provided subset of networks
 2 | #'
 3 | #' @details Creates query files, runs GM for 10-fold cross validation.
 4 | #' @param trainID_pred (char) vector with universe of predictor class
 5 | #' patients (ie all that can possibly be included in the query file
 6 | #' @param outDir (char) directory to store query file and GM results
 7 | #' @param dbPath (char) path to GeneMANIA generic database with
 8 | #' training population
 9 | #' @param numTrainSamps (integer) number of training samples in total
10 | #' leave blank to use 5 training samples in order to save memory
11 | #' @param incNets (char) vector of networks to include in this analysis
12 | #' (features/pathway names). Useful for subset-based feature selection
13 | #' @param orgName (char) organism name for GeneMANIA generic database.
14 | #' The default value will likely never need to be changed.
15 | #' @param fileSfx (char) file suffix
16 | #' @param verbose (logical) print messages
17 | #' @param numCores (logical) num parallel threads for cross-validation
18 | #' @param JavaMemory (integer) memory for GeneMANIA run, in Gb.
19 | #' @param verbose_runQuery (logical) print messages for runQuery()
20 | #' @param debugMode (logical) when TRUE runs jobs in serial instead of parallel and 
21 | #' prints verbose messages. Also prints system Java calls and prints all standard out
22 | #' and error output associated with these calls.
23 | #' @param ... args for \code{makeQueries()}
24 | #' @return No value. Side effect of generating feature scores.
25 | #' @examples
26 | #' data(MB.pheno)
27 | #' dbPath <- system.file("extdata","dbPath",package="netDx")
28 | #' runFeatureSelection(MB.pheno$ID[which(MB.pheno$STATUS%in% 'WNT')],
29 | #' 		tempdir(),dbPath,103L)
30 | #' @export
31 | runFeatureSelection <- function(trainID_pred, outDir, dbPath, 
32 | 		numTrainSamps = NULL, incNets = "all", orgName = "predictor", 
33 | 		fileSfx = "CV", verbose = FALSE, numCores = 2L, 
34 |     JavaMemory = 6L, verbose_runQuery = FALSE, debugMode=FALSE, ...) {
35 |     
36 |     if (!file.exists(outDir)) 
37 |         dir.create(outDir)
38 |     
39 |     # get query names
40 |     if (verbose) 
41 |         message("\tWriting queries:\n")
42 |     qSamps <- makeQueries(trainID_pred, verbose = verbose, ...)
43 |     
44 |     # write query files
45 |     for (m in seq_len(length(qSamps))) {
46 |         qFile <- paste(outDir,sprintf("%s_%i.query", fileSfx, m),
47 | 		sep=getFileSep())
48 |         if (is.null(numTrainSamps)) {
49 |             numTrainSamps = 5
50 |             message("Memory saver option: using 5 training samples for CV")
51 |         }
52 |         
53 |         writeQueryFile(qSamps[[m]], incNets, numTrainSamps, qFile, orgName)
54 |     }
55 |     qFiles <- list()
56 |     for (m in seq_len(length(qSamps))) {
57 |         qFile <- paste(outDir,sprintf("%s_%i.query", fileSfx, m),
58 | 		sep=getFileSep())
59 |         qFiles <- append(qFiles, qFile)
60 |     }
61 |     
62 |     runQuery(dbPath, qFiles, outDir, JavaMemory = JavaMemory, 
63 | 				verbose = verbose_runQuery, 
64 |         numCores = numCores,debugMode=debugMode)
65 |     
66 | }
67 | 


--------------------------------------------------------------------------------
/R/runProfileToNetworks.R:
--------------------------------------------------------------------------------
 1 | #' Convert profiles to interaction networks before integration
 2 | #'
 3 | #' @details In preparation for network integration. When using GeneMANIA's
 4 | #' built-in functionality to create PSN using ProfileToNetworkDriver, this
 5 | #' step needs to run to process profiles to networks. These are currently used
 6 | #' for Pearson correlation-based networks and those using mutual information.
 7 | #' @param netDir (char) directory with .profile files
 8 | #' @param outDir (char) path to directory where interaction networks are to be printed
 9 | #' @param simMetric (char) similarity measure to use in converting 
10 | #' profiles to interaction networks. 
11 | #' @param numCores (integer) number of cores for parallel processing 
12 | #' @param P2N_threshType (char) Most users shouldn't have to change this.
13 | #' ProfileToNetworkDriver's threshold option. One of 'off|auto'. 
14 | #' unit testing
15 | #' @param P2N_maxMissing (integer 5-100)
16 | #' @param JavaMemory (integer) Memory for GeneMANIA (in Gb)
17 | #' @param GM_jar (char) path to GeneMANIA jar file
18 | #' @param netSfx (char) pattern for finding network files in \code{netDir}.
19 | #' @param debugMode (logical) if TRUE runs profile generation in serial 
20 | #' rather than parallel, allowing debugging
21 | #' @return No value. Side effect of creating interaction networks in outDir.
22 | #' @export
23 | convertProfileToNetworks <- function(netDir,outDir=tempdir(),
24 | 	simMetric="pearson",numCores=1L,
25 | 	JavaMemory=4L,GM_jar=NULL,P2N_threshType="off",P2N_maxMissing=100,
26 | 	netSfx="txt$",debugMode=FALSE) {
27 | 
28 | if (is.null(GM_jar)) GM_jar <- getGMjar_path()
29 | 
30 | cl <- makeCluster(numCores, 
31 | 	outfile = paste(netDir, "P2N_log.txt",sep=getFileSep()))
32 | registerDoParallel(cl)
33 | 
34 | if (simMetric == "pearson") {
35 | 	corType <- "PEARSON"
36 | } else if (simMetric == "MI") {
37 | 	corType <- "MUTUAL_INFORMATION"
38 | }
39 |         
40 | args <- c(sprintf("-Xmx%iG", JavaMemory), "-cp", GM_jar)
41 | args <- c(args, 
42 | 	paste("org.genemania.engine.core.",
43 | 	"evaluation.ProfileToNetworkDriver",sep=""))
44 | args <- c(args, c("-proftype", "continuous", "-cor", corType))
45 | args <- c(args, c("-threshold", P2N_threshType, 
46 | 	"-maxmissing", 
47 | 	sprintf("%1.1f", P2N_maxMissing)))
48 | profDir <- netDir
49 | tmpsfx <- sub("\\$", "", netSfx)
50 |         
51 | curProf <- ""
52 | 
53 | `%myinfix%` <- ifelse(debugMode, `%do%`, `%dopar%`)
54 | foreach(curProf = dir(path = profDir, pattern = "profile$")) %myinfix% {
55 | 	if (debugMode) print(curProf)
56 | 	args2 <- c("-in", paste(profDir, curProf,sep=getFileSep()))
57 | 	args2 <- c(args2, "-out", paste(outDir, 
58 | 		sub(".profile", ".txt", curProf),sep=getFileSep()))
59 | 	args2 <- c(args2, "-syn", 
60 | 		paste(netDir,"..","1.synonyms",sep=getFileSep()),
61 | 		"-keepAllTies", "-limitTies")
62 | 
63 | 	if (debugMode) stdout <- "" else stdout <- NULL
64 | 	system2("java", args = c(args, args2), wait = TRUE, 
65 | 		stdout = stdout)
66 | }
67 | 
68 | 	tmp <- dir(path=outDir,pattern="txt$")[1]
69 | 	tmp <- sprintf("%s/%s",outDir,tmp)
70 |  	if (sum(grepl(pattern=",",readLines(tmp,n=1))>0)) { # detect comma
71 | 		replacePattern(path=outDir,fileType="txt$")
72 | 	}
73 | stopCluster(cl)
74 | 
75 | }
76 | 


--------------------------------------------------------------------------------
/R/runQuery.R:
--------------------------------------------------------------------------------
 1 | #' Run a query
 2 | #'
 3 | #' @param dbPath (char) path to directory with GeneMANIA generic database
 4 | #' @param queryFiles (list(char)) paths to query files
 5 | #' @param resDir (char) path to output directory
 6 | #' @param verbose (logical) print messages
 7 | #' @param JavaMemory (integer) Memory for GeneMANIA (in Gb) - a total of 
 8 | #' numCores*GMmemory will be used and distributed for all GM threads
 9 | #' @param numCores (integer) number of CPU cores for parallel processing
10 | #' @param debugMode (logical) when TRUE runs jobs in serial instead of parallel and 
11 | #' prints verbose messages. Also prints system Java calls.
12 | #' @return (char) path to GeneMANIA query result files with patient similarity
13 | #' rankings (*PRANK) and feature weights (*NRANK)
14 | #' of results file
15 | #' @examples
16 | #' dbPath <- system.file("extdata","dbPath",package="netDx")
17 | #' queryFile <- system.file("extdata","GM_query.txt",package="netDx")
18 | #' runQuery(dbPath, queryFile,tempdir())
19 | #' @export
20 | runQuery <- function(dbPath, queryFiles, resDir, verbose = TRUE, 
21 | 		JavaMemory = 6L, numCores = 1L,debugMode=FALSE) {
22 |     
23 |     GM_jar <- getGMjar_path()
24 |     qBase <- basename(queryFiles[[1]][1])
25 |     logFile <- paste(resDir,sprintf("%s.log",qBase))
26 |     queryStrings <- paste(queryFiles, collapse = " ")
27 | 
28 | 	args <- c()
29 | 	java_ver <- suppressWarnings(system2("java", 
30 | 		args="--version",stdout=TRUE,stderr=NULL))
31 | 	if (any(grep(" 11",java_ver)) || any(grep(" 12",java_ver)) || any(grep(" 13",java_ver)) || any(grep(" 14",java_ver)) || any(grep(" 16",java_ver))) {
32 | 		if (verbose) message("Java 11 or later detected")
33 | 	} else {
34 | 		if (verbose) message("Java 8 detected")
35 | 		args <- c(args,"-d64")
36 | 	}
37 | 
38 |     args <- c(args, sprintf("-Xmx%iG", JavaMemory * numCores), "-cp", GM_jar)
39 |     args <- c(args, "org.genemania.plugin.apps.QueryRunner")
40 |     args <- c(args, "--data", dbPath, "--in", "flat", "--out", "flat")
41 |     args <- c(args, "--threads", numCores, "--results", resDir, 
42 | 			unlist(queryFiles))
43 |     args <- c(args, "--netdx-flag", "true")  #,'2>1','/dev/null')
44 |     
45 |     # file is not actually created - is already split in PRANK and 
46 | 		# NRANK segments on
47 |     # GeneMANIA side
48 |     resFile <- paste(resDir,sprintf("%s-results.report.txt",qBase),
49 | 		sep=getFileSep())
50 |     t0 <- Sys.time()
51 | 	if (debugMode) {
52 | 		message(sprintf("java %s",paste(args,collapse=" ")))
53 |     	system2("java", args, wait = TRUE)
54 | 	} else {
55 |     	system2("java", args, wait = TRUE, stdout = NULL, stderr = NULL)
56 | 	}
57 |     if (verbose) 
58 |         message(sprintf("QueryRunner time taken: %1.1f s", Sys.time() - t0))
59 |     Sys.sleep(3)
60 |     return(resFile)
61 | }
62 | 


--------------------------------------------------------------------------------
/R/simpleCap.R:
--------------------------------------------------------------------------------
 1 | #' simple capitalization
 2 | #' @details used to format feature names so they are not in all-caps
 3 | #' @param x (char) name
 4 | #' @return (char) Changes case so start of each word is in upper-case, and
 5 | #' the rest is in lowercase
 6 | #' @examples simpleCap('this IS a TEST sEnTenCe')
 7 | #' @export
 8 | simpleCap <- function(x) {
 9 |     x <- tolower(x)
10 |     s <- strsplit(x, " ")[[1]]
11 |     x <- paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", 
12 | 			collapse = " ")
13 |     x
14 | }
15 | 


--------------------------------------------------------------------------------
/R/splitTestTrain_resampling.R:
--------------------------------------------------------------------------------
 1 | #' Split samples into train/test
 2 | #' 
 3 | #' @param pheno_DF (data.frame) patient information
 4 | #' Must contain the following columns:
 5 | #' 1. ID: (char) patient IDs
 6 | #' 2. STATUS: (char) patient classes. Values not equal to \code{predClass}
 7 | #' will be considered as 'other'
 8 | #' Expects rows with unique IDs
 9 | #' @param pctT (numeric between 0 and 1) Fraction of patients to randomly
10 | #' assign to the training set. The remainder will be used for blind test 
11 | #' set
12 | #' @param verbose (logical) print messages
13 | #' @return (char) vector of length \code{nrow(pheno_DF)}, with values of 
14 | #' 'TRAIN' or 'TEST'. The order corresponds to pheno_DF; a patient labelled
15 | #' 'TRAIN' has been assigned to the training set, and one labelled 'TEST'
16 | #' as been assigned to the test set.
17 | #' @examples
18 | #' data(pheno)
19 | #' x <- splitTestTrain(pheno)
20 | #' @export
21 | splitTestTrain <- function(pheno_DF, pctT = 0.7, verbose = FALSE) {
22 |     
23 |     lvls <- unique(pheno_DF$STATUS)
24 |     IS_TRAIN <- rep("TEST", nrow(pheno_DF))
25 |     for (lv in lvls) {
26 |         idx <- which(pheno_DF$STATUS %in% lv)
27 |         IS_TRAIN[sample(idx, floor(pctT * length(idx)), FALSE)] <- "TRAIN"
28 |     }
29 |     
30 |     IS_TRAIN <- factor(IS_TRAIN, levels = c("TRAIN", "TEST"))
31 |     
32 |     pheno_DF <- cbind(pheno_DF, IS_TRAIN = IS_TRAIN)
33 |     if (verbose) 
34 |         print(table(pheno_DF[, c("STATUS", "IS_TRAIN")]))
35 |     
36 |     return(IS_TRAIN)
37 | }
38 | 


--------------------------------------------------------------------------------
/R/updateNets.R:
--------------------------------------------------------------------------------
 1 | #' Synchronize patient set in sample table and network table.
 2 | #'
 3 | #' @details This function is useful in applications with highly missing
 4 | #' data or where each patient contributes data points not present in the
 5 | #' others; e.g. networks based on individual
 6 | #' patient CNVs, which are highly sparse. In such a scenario, any kind of
 7 | #' patient subsetting - for example, limiting to training samples - changes
 8 | #' the population of eligible networks for analysis. Networks that no longer
 9 | #' have samples, or that have one patient with the neighbour removed, have
10 | #' to be excluded. This function updates networks and patients so that 
11 | #' each network contains at least two patients and only patients in 
12 | #' networks are retained. In other words, it keeps pheno_DF and p_net in 
13 | #' sync.
14 | #' @param p_net (matrix) rows are patients, columns are networks.
15 | #' a[i,j] = 1 if patient i occurs in network j, else 0.
16 | #' @param pheno_DF (data.frame) patient ID and STATUS. 
17 | #' @param writeNewNets (logical) if TRUE writes new networks to 
18 | #' \code{newNetDir}.
19 | #' @param oldNetDir (char) path to directory with networks to be updated
20 | #' @param newNetDir (char) path to directory where updated networks are
21 | #' to be written
22 | #' @param verbose (logical) print messages
23 | #' @param ... passed to pruneNets()
24 | #' @return list with updated p_net and pheno_DF. pheno_DF will contain IDs
25 | #' in the updated p_net. p_net will contain only those networks with 
26 | #' 2+ patients and those patients present in 1+ network.
27 | #' @export
28 | #' @examples
29 | #' data(npheno)
30 | #' netDir <- system.file("extdata","example_nets",package="netDx")
31 | #' netmat <- countPatientsInNet(netDir,dir(netDir,pattern='txt$'), npheno[,1])
32 | #' x <- updateNets(netmat, npheno,writeNewNets=FALSE)
33 | updateNets <- function(p_net, pheno_DF, writeNewNets = TRUE, oldNetDir, 
34 | 		newNetDir, verbose = TRUE, ...) {
35 |     idx <- which(colSums(p_net) >= 2)
36 |     p_net <- p_net[, idx]
37 |     idx <- which(rowSums(p_net) >= 1)
38 |     p_net <- p_net[idx, ]
39 |     if (verbose) {
40 |         message("Update: (num patients) x (num networks)")
41 |         print(dim(p_net))
42 |     }
43 |     
44 |     # training samples are only those that occur in label-enriched networks
45 |     pheno_DF <- pheno_DF[which(pheno_DF$ID %in% rownames(p_net)), ]
46 |     
47 |     if (writeNewNets) {
48 |         pruneNets(oldNetDir, newNetDir, filterNets = colnames(p_net), 
49 | 					filterIDs = rownames(p_net),  ...)
50 |     }
51 |     
52 |     return(list(p_net = p_net, pheno_DF = pheno_DF))
53 | }
54 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | #' platform-specific file separator
 2 | #' 
 3 | #' @description Returns OS-specific file separator
 4 | #' @return (char) "\\" if Windows, else "/"
 5 | #' @examples
 6 | #' getFileSep()
 7 | #' @export
 8 | getFileSep <- function(){
 9 |   if (.Platform$OS.type=="windows") return("\\")
10 |   else return(.Platform$file.sep)
11 | }
12 | 
13 | #' Generate random alphanumerical string of length 10
14 | #'
15 | #' @details Used to create multiple temporary directories during an R session
16 | #' @param numStrings (integer) number of strings to generate
17 | #' @return vector of length n, each with 10-char alphanumerical strings
18 | #' @examples
19 | #' randAlphanumString()
20 | #' @export
21 | randAlphanumString <- function(numStrings = 1L) {
22 |   a <- do.call(paste0, replicate(5, sample(LETTERS, numStrings, TRUE), FALSE))
23 |   paste0(a, sprintf("%04d", sample(9999, numStrings, TRUE)), 
24 | 		sample(LETTERS, numStrings, TRUE))
25 | }
26 | 


--------------------------------------------------------------------------------
/R/writeQueryBatchFile.R:
--------------------------------------------------------------------------------
 1 | #' Write batch.txt file required to create GeneMANIA database
 2 | #'
 3 | #' @details This file is used to compile features into a single database
 4 | #' for feature selection. 
 5 | #' @param netDir (char) path to dir with networks
 6 | #' @param netList (char) vector of network names
 7 | #' @param outDir (char) directory to write batch file
 8 | #' @param idFile (char) path to file with patient IDs
 9 | #' @param orgName (char) organism name. Don't change the default unless
10 | #' you know what you are doing.
11 | #' @param orgDesc (char) organism description. Similar to \code{orgName},
12 | #' don't change the default
13 | #' @param orgAlias (char) organism alias. Similar to \code{orgName}, don't
14 | #' change the default.
15 | #' @param taxID (integer) taxonomyID required for GeneMANIA . Similar to 
16 | #' \code{orgName}, don't change the default.
17 | #' @return No value. Side effect of writing batch file to 
18 | #' \code{<outDir>/batch.txt}.
19 | #' @export
20 | #' @examples
21 | #' data(npheno)
22 | #' netDir <- system.file("extdata","example_nets",package="netDx")
23 | #' netList <- dir(netDir,pattern='txt$')
24 | #' writeQueryBatchFile(netDir,netList, tempdir(), npheno$ID)
25 | writeQueryBatchFile <- function(netDir, netList, outDir = tempdir(), idFile, 
26 | 		orgName = "predictor", 
27 |     orgDesc = "my_predictor", orgAlias = "my_predictor", taxID = 1339) {
28 |     
29 |     outF <- paste(outDir,"batch.txt",sep=getFileSep())
30 |     fileConn <- file(outF, "w")
31 |     
32 |     # organism info
33 |     tmp <- c("#organism", "id", "file", "name", "description", "alias", 
34 | 				"taxonomyid")
35 |     tmp2 <- c("organism", basename(idFile), orgName, orgDesc, orgAlias, 
36 | 				as.character(taxID))
37 |     writeLines(sprintf("%s", paste(tmp, collapse = "\t")), con = fileConn)
38 |     writeLines(sprintf("%s\n", paste(tmp2, collapse = "\t")), con = fileConn)
39 |     rm(tmp, tmp2)
40 |     
41 |     # group info
42 |     groupName <- "dummy_group"
43 |     groupCode <- "geneset_1"
44 |     groupDesc <- "dummy_group"
45 |     tmp <- c("#group", "name", "code", "description", "RRGGBB colour", 
46 | 				"organism")
47 |     tmp2 <- c("group", groupName, groupCode, groupDesc, "ff00ff", orgName)
48 |     writeLines(sprintf("%s", paste(tmp, collapse = "\t")), con = fileConn)
49 |     writeLines(sprintf("%s\n", paste(tmp2, collapse = "\t")), con = fileConn)
50 |     rm(tmp, tmp2)
51 |     
52 |     # network info - header
53 |     tmp <- c("#network", "filename", "name", "description", "group code")
54 |     writeLines(sprintf("%s", paste(tmp, collapse = "\t")), fileConn)
55 |     rm(tmp)
56 |     close(fileConn)
57 |     
58 |     # write networks
59 |     net_DF <- data.frame(type = "network", filename = netList, 
60 | 				name = sub(".txt", 
61 |         "", netList), description = netList, groupCode = groupCode)
62 |     write.table(net_DF, file = outF, sep = "\t", col.names = FALSE, 
63 | 				row.names = FALSE, 
64 |         quote = FALSE, append = TRUE)
65 | }
66 | 


--------------------------------------------------------------------------------
/R/writeQueryFile.R:
--------------------------------------------------------------------------------
 1 | #' Wrapper to write GeneMANIA query file
 2 | #'
 3 | #' @param qSamps (char) vector of patient IDs in query
 4 | #' @param incNets (char) vector of networks to include in this analysis
 5 | #' (features/pathway names). Useful for subset-based feature selection
 6 | #' @param numReturn (integer) number of patients to return in ranking file
 7 | #' @param outFile (char) path to output file
 8 | #' @param orgName (char) organism name
 9 | #' @return No value. Side effect of writing the query file to
10 | #' \code{outFile}
11 | #' @examples
12 | #' data(pheno)
13 | #' writeQueryFile(pheno$ID[seq_len(5)], 'all',nrow(pheno), 'myquery.txt')
14 | #' @export
15 | writeQueryFile <- function(qSamps, incNets = "all", numReturn = 1L, outFile, 
16 | 		orgName = "predictor") {
17 |     fileConn <- file(outFile, "w")
18 |     writeLines(sprintf("%s", orgName), con = fileConn)  # org name
19 |     writeLines(sprintf("%s", paste(qSamps, collapse = "\t")), con = fileConn)
20 |     # networks
21 |     writeLines(sprintf("%s", paste(incNets, collapse = "\t")), con = fileConn)
22 |     writeLines(sprintf("%i", numReturn), con = fileConn)  #num2return
23 |     writeLines("automatic", con = fileConn)  # combining
24 |     close(fileConn)
25 | }
26 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .onLoad <- function(libname, pkgname) {
2 |   options(java.parameters = c("-Xmx10G"))
3 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Update: Sep 2021: netDx is now maintained at [https://github.com/realpailab/netdx](https://github.com/realpailab/netdx).
 3 | ## Development in this repo now frozen.
 4 | 
 5 | netDx is a general-purpose algorithm for building patient classifiers by using patient similarity networks as features. It excels at interpretability and handling missing data. It also allows custom grouping rules for features, notably grouping genes into pathways. It integrates with RCy3 for network visualization of predictive pathways.
 6 | 
 7 | As of February 2020, netDx is available via the BioConductor repository. 
 8 | Visit http://bioconductor.org/packages/release/bioc/html/netDx.html to install the package and see worked examples.
 9 | 
10 | Contact Shraddha Pai at shraddha.pai@utoronto.ca in case of questions.
11 | 
12 | References: 
13 | 
14 | 1. Pai S, Hui S, Isserlin R, Shah MA, Kaka H and GD Bader (2019). netDx: Interpretable patient classification using patient similarity networks. *Mol Sys Biol*. 15: e8497. [Read the paper here](https://www.embopress.org/doi/full/10.15252/msb.20188497).
15 | 2. Pai S, Weber P, Isserlin R, Kaka H, Hui S, Shah MA, Giudice L, Giugno R, Nøhr AK, Baumbach J, GD Bader (2021). netDx: Software for building interpretable patient classifiers by multi-'omic data integration using patient similarity networks. *F1000 Research*. 9:1239.
16 | 


--------------------------------------------------------------------------------
/data/MB.pheno.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/MB.pheno.rda


--------------------------------------------------------------------------------
/data/cnv_GR.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/cnv_GR.rda


--------------------------------------------------------------------------------
/data/cnv_TTstatus.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/cnv_TTstatus.rda


--------------------------------------------------------------------------------
/data/cnv_netPass.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/cnv_netPass.rda


--------------------------------------------------------------------------------
/data/cnv_netScores.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/cnv_netScores.rda


--------------------------------------------------------------------------------
/data/cnv_patientNetCount.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/cnv_patientNetCount.rda


--------------------------------------------------------------------------------
/data/cnv_pheno.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/cnv_pheno.rda


--------------------------------------------------------------------------------
/data/confmat.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/confmat.rda


--------------------------------------------------------------------------------
/data/featScores.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/featScores.rda


--------------------------------------------------------------------------------
/data/genes.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/genes.rda


--------------------------------------------------------------------------------
/data/modelres.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/modelres.rda


--------------------------------------------------------------------------------
/data/npheno.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/npheno.rda


--------------------------------------------------------------------------------
/data/pathwayList.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/pathwayList.rda


--------------------------------------------------------------------------------
/data/pathway_GR.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/pathway_GR.rda


--------------------------------------------------------------------------------
/data/pheno.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/pheno.rda


--------------------------------------------------------------------------------
/data/pheno_full.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/pheno_full.rda


--------------------------------------------------------------------------------
/data/predRes.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/predRes.rda


--------------------------------------------------------------------------------
/data/silh.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/silh.rda


--------------------------------------------------------------------------------
/data/toymodel.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/toymodel.rda


--------------------------------------------------------------------------------
/data/xpr.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/data/xpr.rda


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | c(bibentry(bibtype = "Article",
 2 |            key = "netDx-methods",
 3 |            title = "{netDx: interpretable patient classification using integrated patient similarity networks}",
 4 |            author = c(
 5 |                person("Shraddha", "Pai"),
 6 |                person("Shirley", "Hui"),
 7 |                person("Ruth", "Isserlin"),
 8 |                person(c("Muhammad","A"), "Shah"),
 9 |                person("Hussam","Kaka"),
10 |                person(c("Gary", "D."), "Bader")
11 |            ),
12 |            year = 2019,
13 |            journal = "Molecular Systems Biology",
14 |            volume = "15",
15 |            pages = "e8497",
16 |            doi = "10.15252/msb.20188497",
17 |            pubmed = "30872331",
18 |            header = "The netDx algorithm is described in:"),
19 | bibentry(bibtype = "Article",
20 |            key = "netDx-software",
21 |            title = "{netDx: Software for building interpretable patient classifiers by multi-'omic data integration using patient similarity networks}",
22 |            author = c(
23 |                person("Shraddha", "Pai"),
24 |                person("Philipp", "Weber"),
25 |                person("Ruth", "Isserlin"),
26 |                person("Hussam","Kaka"),
27 |                person("Shirley", "Hui"),
28 |                person(c("Muhammad","A"), "Shah"),
29 |                person("Luca", "Giudice"),
30 |                person("Rosalba", "Giugno"),
31 |                person(c("Anne","Krogh"), "Nøhr"),
32 |                person("Jan", "Baumbach"),
33 |                person(c("Gary", "D."), "Bader")
34 |            ),
35 |            year = 2021,
36 |            journal = "F1000Research",
37 |            volume = "9",
38 |            pages = "1239",
39 |            doi = "10.12688/f1000research.26429.2",
40 |            pubmed = "33628435",
41 |            header = "The netDx package is described in:")
42 | 
43 | )


--------------------------------------------------------------------------------
/inst/extdata/GM_NRANK/CV_1.query-results.report.txt.NRANK:
--------------------------------------------------------------------------------
1 | #This Report has been generated with a netDx-specific version of GeneMania v3.5.
2 | Network	Weight
3 | GUANOSINE_NUCLEOTIDES__I_DE_NOVO__I__BIOSYNTHESIS.profile	56.37
4 | MUCIN_CORE_1_AND_CORE_2__I_O__I_-GLYCOSYLATION.profile	31.50
5 | RETINOL_BIOSYNTHESIS.profile	12.13
6 | 
7 | 


--------------------------------------------------------------------------------
/inst/extdata/GM_NRANK/CV_2.query-results.report.txt.NRANK:
--------------------------------------------------------------------------------
1 | #This Report has been generated with a netDx-specific version of GeneMania v3.5.
2 | Network	Weight
3 | GUANOSINE_NUCLEOTIDES__I_DE_NOVO__I__BIOSYNTHESIS.profile	58.25
4 | MUCIN_CORE_1_AND_CORE_2__I_O__I_-GLYCOSYLATION.profile	31.57
5 | RETINOL_BIOSYNTHESIS.profile	10.19
6 | 
7 | 


--------------------------------------------------------------------------------
/inst/extdata/GM_query.txt:
--------------------------------------------------------------------------------
1 | predictor
2 | MB.128	MB.145	MB.147	MB.15	MB.178
3 | all
4 | 103
5 | automatic
6 | 


--------------------------------------------------------------------------------
/inst/extdata/INSTALL/Dockerfile:
--------------------------------------------------------------------------------
 1 | # use Dockerized R ("Rocker") as parent image
 2 | FROM ubuntu
 3 | 
 4 | USER root
 5 | 
 6 | 
 7 | ENV DEBIAN_FRONTEND=noninteractive
 8 | ENV TZ 'America/New York'
 9 | 
10 | # R pre-requisites
11 | RUN echo $TZ > /etc/timezone && \ 
12 | 	apt-get update && \
13 | 	apt-get install -y tzdata && \
14 |     apt-get install -y --no-install-recommends \
15 |     gfortran \
16 | 	r-base \ 
17 | 	openjdk-8-jre \ 
18 |     gcc make g++ \
19 | 	zlib1g-dev libssl-dev libssh2-1-dev libcurl4-openssl-dev \
20 | 	liblapack-dev liblapack3 libopenblas-base libopenblas-dev \
21 | 	libxml2-dev
22 | # && apt-get clean && \
23 | #    rm -rf /var/lib/apt/lists/*
24 | 
25 | 
26 | RUN echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > ~/.Rprofile
27 | RUN Rscript -e "install.packages(c('devtools','curl','bigmemory','foreach','combinat','doParallel','ROCR','pracma','RColorBrewer','reshape2','ggplot2', 'caroline', 'rmarkdown'))"
28 | RUN Rscript -e "source('http://bioconductor.org/biocLite.R');biocLite(c('Biobase','GenomicRanges', 'RCy3'))";
29 | RUN Rscript -e "install.packages(c('pheatmap','RColorBrewer','gProfileR','ggplot2','glmnet','igraph'))"
30 | RUN Rscript -e "devtools::install_github('cytoscape/r2cytoscape')"
31 | 
32 | # python required for using genemania, pandoc for compiling the html vignettes
33 | RUN apt-get install -y python2.7 python-pip vim git pandoc
34 | 
35 | # move netDx package and code
36 | WORKDIR /examples
37 | ADD . /examples
38 | 
39 | # clone the most recent netDx version and install the R package
40 | RUN git clone https://github.com/BaderLab/netDx.git                                                                                                                                                                              
41 | RUN cd netDx && R CMD INSTALL netDx                                                                                                  
42 | RUN cd netDx && R CMD INSTALL netDx.examples
43 | 


--------------------------------------------------------------------------------
/inst/extdata/INSTALL/INSTALL_OSX.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Script to automate dependency install for netDx
 4 | 
 5 | echo "* Checking if Java installed ..."
 6 | if java -version 2>&1 > /dev/null |  grep -q "java version" ; then
 7 |   echo -e "\tdone."
 8 | else {
 9 |   echo -e "*** ERROR: Java not found; install (https://www.java.com/en/download/) or add to path"
10 |   exit 0;
11 |  }
12 | fi
13 | 
14 | echo "* Checking if Python installed ..."
15 | if [[ $(python --version 2>&1)  ]]
16 |     then
17 |         echo -e "\tdone"
18 |     else {
19 |         echo -e "*** ERROR: Python not found; install (https://www.python.org/downloads/) or add to path"
20 | 	exit 0;
21 | 	}
22 | fi
23 | 
24 | echo "* Checking if R installed ..."
25 | if R --version | grep -q "R version" ;  
26 |     then
27 | 			 ver=`R --version | grep "R version" | cut -f 3 -d " "`
28 | 			echo -e "\tversion found: $ver"
29 | 	 	   ver1=`echo $ver | cut -f1 -d"."`
30 | 		   ver2=`echo $ver | cut -f2 -d"."`
31 | 			if [ $ver1 -ge 3 ] &&  [ $ver2 -ge 6 ]; then
32 |         echo -e "\tdone"
33 | 		  else {
34 | 				echo ""
35 | 				echo -e "\t*** ERROR: Version 3.6+ of R required. Install from https://cran.r-project.org/, or add to path"
36 | 			  exit 0
37 | 		}
38 | 			fi
39 |     else {
40 | 				echo -e "\t*** ERROR: R not found. Install R 3.6+ from https://cran.r-project.org/, or add to path"
41 | 	exit 0;
42 | 	}
43 | fi
44 | 
45 | # install R packages
46 | echo "* Installing R dependencies"
47 | echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > ~/.Rprofile
48 | 
49 | declare -a PKGS=( devtools curl bigmemory foreach combinat doParallel ROCR pracma RColorBrewer reshape2 ggplot2 caroline rmarkdown igraph glmnet );
50 | for p in ${PKGS[@]};do 
51 | 	echo -e "\t* Checking for $p"
52 | 	Rscript -e "if(!requireNamespace(\"$p\",quietly=TRUE)){ install.packages(\"$p\")}"
53 | done
54 | 
55 | echo "* Installing BioConductor if required"
56 | Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)){install.packages("BiocManager")}'
57 | 
58 | echo "* Installing BioConductor dependencies if required"
59 | declare -a PKGS=( GenomicRanges RCy3 );
60 | for p in ${PKGS[@]};do 
61 | 	echo -e "\t* Checking for $p"
62 | 	Rscript -e "if(!requireNamespace(\"$p\",quietly=TRUE)){ BiocManager::install(\"$p\")}"
63 | done
64 | 
65 | echo "* Checking if pandoc installed (needed to run tutorials) ..."
66 | if pandoc -v | grep -q "^pandoc " ;  
67 |     then
68 | 			 ver=`pandoc -v | grep "^pandoc " | cut -f 2 -d " "`
69 | 			echo -e "\tversion found: $ver"
70 | 	 	   ver1=`echo $ver | cut -f1 -d"."`
71 | 		   ver2=`echo $ver | cut -f2 -d"."`
72 | 			if [ $ver1 -ge 2 ] ; then
73 |         echo -e "\tdone"
74 | 		  else {
75 | 				echo ""
76 | 				echo -e "\t*** Version 1.12.3+ of pandoc not found! Installing..."
77 | 				curl -L https://github.com/jgm/pandoc/releases/download/2.7.2/pandoc-2.7.2-macOS.pkg -o pandoc.pkg
78 | 				sudo installer -pkg pandoc.pkg -target /
79 | 		}
80 | 			fi
81 |    else {
82 | 				echo -e "\t*** Version 1.12.3+ of pandoc not found! Installing..."
83 | 				curl -L https://github.com/jgm/pandoc/releases/download/2.7.2/pandoc-2.7.2-macOS.pkg -o pandoc.pkg
84 | 				sudo installer -pkg pandoc.pkg -target /
85 | 	}
86 | fi
87 | 
88 | cd ..
89 | echo "* Installing netDx"
90 | R CMD INSTALL netDx
91 | 


--------------------------------------------------------------------------------
/inst/extdata/INSTALL/INSTALL_Unix.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Script to automate dependency install for netDx
 4 | 
 5 | echo "* Installing Unix dependencies"
 6 | ENV TZ 'America/New York'
 7 | echo $TZ > /etc/timezone && \ 
 8 | 	apt-get update && \
 9 | 	apt-get install -y tzdata && \
10 |   apt-get install -y --no-install-recommends \
11 |     gfortran \
12 | 	  r-base \ 
13 | 	  openjdk-8-jre \ 
14 |     gcc make g++ \
15 | 	  zlib1g-dev libssl-dev libssh2-1-dev libcurl4-openssl-dev \
16 | 	  liblapack-dev liblapack3 libopenblas-base libopenblas-dev \
17 | 	  libxml2-dev
18 | 
19 | echo "* Checking if Java installed ..."
20 | if java -version 2>&1 > /dev/null |  grep -q "java version" ; then
21 |   echo -e "\tdone."
22 | else {
23 |   echo -e "*** ERROR: Java not found; install (https://www.java.com/en/download/) or add to path"
24 |   exit 0;
25 |  }
26 | fi
27 | 
28 | echo "* Checking if R installed ..."
29 | if R --version | grep -q "R version" ;  
30 |     then
31 | 			 ver=`R --version | grep "R version" | cut -f 3 -d " "`
32 | 			echo -e "\tversion found: $ver"
33 | 	 	   ver1=`echo $ver | cut -f1 -d"."`
34 | 		   ver2=`echo $ver | cut -f2 -d"."`
35 | 			if [ $ver1 -ge 3 ] &&  [ $ver2 -ge 6 ]; then
36 |         echo -e "\tdone"
37 | 		  else {
38 | 				echo ""
39 | 				echo -e "\t*** ERROR: Version 3.6+ of R required. Install from https://cran.r-project.org/, or add to path"
40 | 				echo -e "\t*** If upgrading, install r-base and r-base-dev"
41 | 				echo -e "\t*** Visit https://cran.r-project.org/bin/linux/ubuntu/README.html for details"
42 | 			  exit 0
43 | 		}
44 | 			fi
45 |     else {
46 | 				echo -e "\t*** ERROR: R not found. Install R 3.6+ from https://cran.r-project.org/, or add to path"
47 | 	exit 0;
48 | 	}
49 | fi
50 | 
51 | # install R packages
52 | echo "* Installing R dependencies"
53 | echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > ~/.Rprofile
54 | 
55 | declare -a PKGS=( devtools curl bigmemory foreach combinat doParallel ROCR pracma RColorBrewer reshape2 ggplot2 caroline rmarkdown igraph glmnet );
56 | for p in ${PKGS[@]};do 
57 | 	echo -e "\t* Checking for $p"
58 | 	Rscript -e "if(!requireNamespace(\"$p\",quietly=TRUE)){ install.packages(\"$p\")}"
59 | done
60 | 
61 | echo "* Installing BioConductor if required"
62 | Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)){install.packages("BiocManager")}'
63 | 
64 | echo "* Installing BioConductor dependencies if required"
65 | declare -a PKGS=( GenomicRanges RCy3 );
66 | for p in ${PKGS[@]};do 
67 | 	echo -e "\t* Checking for $p"
68 | 	Rscript -e "if(!requireNamespace(\"$p\",quietly=TRUE)){ BiocManager::install(\"$p\")}"
69 | done
70 | 
71 | cd ..
72 | echo "* Installing netDx"
73 | R CMD INSTALL netDx
74 | 


--------------------------------------------------------------------------------
/inst/extdata/TGCT_mutSmooth_pheno.txt:
--------------------------------------------------------------------------------
 1 | ID	STATUS
 2 | TCGA.2G.AAF4	EARLY
 3 | TCGA.2G.AAF8	EARLY
 4 | TCGA.2G.AAFH	EARLY
 5 | TCGA.2G.AAFI	EARLY
 6 | TCGA.2G.AAFL	EARLY
 7 | TCGA.2G.AAFM	EARLY
 8 | TCGA.2G.AAFO	EARLY
 9 | TCGA.2G.AAFY	EARLY
10 | TCGA.2G.AAG8	EARLY
11 | TCGA.2G.AAGA	EARLY
12 | TCGA.2G.AAGJ	EARLY
13 | TCGA.2G.AAGP	EARLY
14 | TCGA.2G.AAGX	EARLY
15 | TCGA.2G.AAH3	EARLY
16 | TCGA.2G.AAH4	EARLY
17 | TCGA.2G.AAHA	EARLY
18 | TCGA.2G.AAHG	EARLY
19 | TCGA.2G.AAHL	EARLY
20 | TCGA.2G.AAHN	EARLY
21 | TCGA.2G.AAKG	EARLY
22 | TCGA.2G.AAKH	EARLY
23 | TCGA.2G.AAKL	EARLY
24 | TCGA.2G.AAKM	EARLY
25 | TCGA.2G.AAL5	EARLY
26 | TCGA.2X.A9D5	EARLY
27 | TCGA.2X.A9D6	EARLY
28 | TCGA.4K.AA1H	EARLY
29 | TCGA.4K.AA1I	EARLY
30 | TCGA.S6.A8JX	EARLY
31 | TCGA.SB.A6J6	EARLY
32 | TCGA.SO.A8JP	EARLY
33 | TCGA.VF.A8A8	EARLY
34 | TCGA.VF.A8A9	EARLY
35 | TCGA.VF.A8AA	EARLY
36 | TCGA.VF.A8AB	EARLY
37 | TCGA.VF.A8AC	EARLY
38 | TCGA.VF.A8AD	EARLY
39 | TCGA.VF.A8AE	EARLY
40 | TCGA.WZ.A7V3	EARLY
41 | TCGA.WZ.A7V4	EARLY
42 | TCGA.WZ.A7V5	EARLY
43 | TCGA.XE.A8H4	EARLY
44 | TCGA.XE.A9SE	EARLY
45 | TCGA.XE.AANR	EARLY
46 | TCGA.XE.AANV	EARLY
47 | TCGA.XE.AAO3	EARLY
48 | TCGA.XE.AAO4	EARLY
49 | TCGA.XE.AAO6	EARLY
50 | TCGA.XE.AAOC	EARLY
51 | TCGA.XE.AAOD	EARLY
52 | TCGA.XE.AAOF	EARLY
53 | TCGA.XE.AAOL	EARLY
54 | TCGA.XY.A89B	EARLY
55 | TCGA.XY.A8S2	EARLY
56 | TCGA.YU.A90P	EARLY
57 | TCGA.YU.A90Q	EARLY
58 | TCGA.YU.A90S	EARLY
59 | TCGA.YU.A90W	EARLY
60 | TCGA.YU.A94I	EARLY
61 | TCGA.ZM.AA05	EARLY
62 | TCGA.ZM.AA06	EARLY
63 | TCGA.ZM.AA0B	EARLY
64 | TCGA.ZM.AA0D	EARLY
65 | TCGA.ZM.AA0E	EARLY
66 | TCGA.ZM.AA0F	EARLY
67 | TCGA.ZM.AA0N	EARLY
68 | TCGA.2G.AAFN	LATE
69 | TCGA.2G.AAFZ	LATE
70 | TCGA.2G.AAG9	LATE
71 | TCGA.2G.AAGG	LATE
72 | TCGA.2G.AAGN	LATE
73 | TCGA.2G.AAGS	LATE
74 | TCGA.2G.AAGZ	LATE
75 | TCGA.2G.AAH8	LATE
76 | TCGA.2G.AAKD	LATE
77 | TCGA.2G.AALP	LATE
78 | TCGA.YU.A90Y	LATE
79 | TCGA.YU.A912	LATE
80 | TCGA.YU.A94D	LATE
81 | TCGA.YU.AA4L	LATE
82 | 


--------------------------------------------------------------------------------
/inst/extdata/dbPath/1/_0.cfs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/1/_0.cfs


--------------------------------------------------------------------------------
/inst/extdata/dbPath/1/metadata.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2 | <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
3 | <properties>
4 | <entry key="short_name">predictor</entry>
5 | <entry key="organism_id">1</entry>
6 | <entry key="common_name">my_predictor</entry>
7 | </properties>
8 | 


--------------------------------------------------------------------------------
/inst/extdata/dbPath/1/segments.gen:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/1/segments.gen


--------------------------------------------------------------------------------
/inst/extdata/dbPath/1/segments_2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/1/segments_2


--------------------------------------------------------------------------------
/inst/extdata/dbPath/base/_0.cfs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/base/_0.cfs


--------------------------------------------------------------------------------
/inst/extdata/dbPath/base/segments.gen:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/base/segments.gen


--------------------------------------------------------------------------------
/inst/extdata/dbPath/base/segments_2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/base/segments_2


--------------------------------------------------------------------------------
/inst/extdata/dbPath/cache/CORE/1/1.ser:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/1.ser


--------------------------------------------------------------------------------
/inst/extdata/dbPath/cache/CORE/1/2.ser:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/2.ser


--------------------------------------------------------------------------------
/inst/extdata/dbPath/cache/CORE/1/3.ser:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/3.ser


--------------------------------------------------------------------------------
/inst/extdata/dbPath/cache/CORE/1/4.ser:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/4.ser


--------------------------------------------------------------------------------
/inst/extdata/dbPath/cache/CORE/1/DatasetInfo.ser:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/DatasetInfo.ser


--------------------------------------------------------------------------------
/inst/extdata/dbPath/cache/CORE/1/attributeGroups.ser:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/attributeGroups.ser


--------------------------------------------------------------------------------
/inst/extdata/dbPath/cache/CORE/1/networkIds.ser:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/networkIds.ser


--------------------------------------------------------------------------------
/inst/extdata/dbPath/cache/CORE/1/nodeIds.ser:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/cache/CORE/1/nodeIds.ser


--------------------------------------------------------------------------------
/inst/extdata/dbPath/genemania.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <genemania>
3 |     <type>org.genemania.data.lucene.LuceneDataSet</type>
4 |     <data-version>custom</data-version>
5 |     <access-mode>compact</access-mode>
6 | </genemania>
7 | 


--------------------------------------------------------------------------------
/inst/extdata/dbPath/user/segments.gen:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/user/segments.gen


--------------------------------------------------------------------------------
/inst/extdata/dbPath/user/segments_1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/inst/extdata/dbPath/user/segments_1


--------------------------------------------------------------------------------
/inst/extdata/example_nets/BIG_CASE.txt:
--------------------------------------------------------------------------------
 1 | P1	P2	1
 2 | P1	P3	1
 3 | P1	P4	1
 4 | P1	P5	1
 5 | P1	P6	1
 6 | P1	P7	1
 7 | P1	P8	1
 8 | P1	P9	1
 9 | P1	P10	1
10 | P2	P3	1
11 | P2	P4	1
12 | P2	P5	1
13 | P2	P6	1
14 | P2	P7	1
15 | P2	P8	1
16 | P2	P9	1
17 | P2	P10	1
18 | P3	P4	1
19 | P3	P5	1
20 | P3	P6	1
21 | P3	P7	1
22 | P3	P8	1
23 | P3	P9	1
24 | P3	P10	1
25 | P4	P5	1
26 | P4	P6	1
27 | P4	P7	1
28 | P4	P8	1
29 | P4	P9	1
30 | P4	P10	1
31 | P5	P6	1
32 | P5	P7	1
33 | P5	P8	1
34 | P5	P9	1
35 | P5	P10	1
36 | P6	P7	1
37 | P6	P8	1
38 | P6	P9	1
39 | P6	P10	1
40 | P7	P8	1
41 | P7	P9	1
42 | P7	P10	1
43 | P8	P9	1
44 | P8	P10	1
45 | P9	P10	1
46 | 


--------------------------------------------------------------------------------
/inst/extdata/example_nets/BIG_CONTROL.txt:
--------------------------------------------------------------------------------
 1 | P101	P102	1
 2 | P101	P103	1
 3 | P101	P104	1
 4 | P101	P105	1
 5 | P101	P106	1
 6 | P101	P107	1
 7 | P101	P108	1
 8 | P101	P109	1
 9 | P101	P110	1
10 | P102	P103	1
11 | P102	P104	1
12 | P102	P105	1
13 | P102	P106	1
14 | P102	P107	1
15 | P102	P108	1
16 | P102	P109	1
17 | P102	P110	1
18 | P103	P104	1
19 | P103	P105	1
20 | P103	P106	1
21 | P103	P107	1
22 | P103	P108	1
23 | P103	P109	1
24 | P103	P110	1
25 | P104	P105	1
26 | P104	P106	1
27 | P104	P107	1
28 | P104	P108	1
29 | P104	P109	1
30 | P104	P110	1
31 | P105	P106	1
32 | P105	P107	1
33 | P105	P108	1
34 | P105	P109	1
35 | P105	P110	1
36 | P106	P107	1
37 | P106	P108	1
38 | P106	P109	1
39 | P106	P110	1
40 | P107	P108	1
41 | P107	P109	1
42 | P107	P110	1
43 | P108	P109	1
44 | P108	P110	1
45 | P109	P110	1
46 | 


--------------------------------------------------------------------------------
/inst/extdata/example_nets/BOTH_EQUAL.txt:
--------------------------------------------------------------------------------
 1 | P1	P2	1
 2 | P1	P3	1
 3 | P1	P4	1
 4 | P1	P5	1
 5 | P1	P101	1
 6 | P1	P102	1
 7 | P1	P103	1
 8 | P1	P104	1
 9 | P1	P105	1
10 | P2	P3	1
11 | P2	P4	1
12 | P2	P5	1
13 | P2	P101	1
14 | P2	P102	1
15 | P2	P103	1
16 | P2	P104	1
17 | P2	P105	1
18 | P3	P4	1
19 | P3	P5	1
20 | P3	P101	1
21 | P3	P102	1
22 | P3	P103	1
23 | P3	P104	1
24 | P3	P105	1
25 | P4	P5	1
26 | P4	P101	1
27 | P4	P102	1
28 | P4	P103	1
29 | P4	P104	1
30 | P4	P105	1
31 | P5	P101	1
32 | P5	P102	1
33 | P5	P103	1
34 | P5	P104	1
35 | P5	P105	1
36 | P101	P102	1
37 | P101	P103	1
38 | P101	P104	1
39 | P101	P105	1
40 | P102	P103	1
41 | P102	P104	1
42 | P102	P105	1
43 | P103	P104	1
44 | P103	P105	1
45 | P104	P105	1
46 | 


--------------------------------------------------------------------------------
/inst/extdata/example_nets/MOSTLY_CASE.txt:
--------------------------------------------------------------------------------
 1 | P1	P2	1
 2 | P1	P3	1
 3 | P1	P4	1
 4 | P1	P5	1
 5 | P1	P6	1
 6 | P1	P7	1
 7 | P1	P101	1
 8 | P1	P102	1
 9 | P1	P103	1
10 | P2	P3	1
11 | P2	P4	1
12 | P2	P5	1
13 | P2	P6	1
14 | P2	P7	1
15 | P2	P101	1
16 | P2	P102	1
17 | P2	P103	1
18 | P3	P4	1
19 | P3	P5	1
20 | P3	P6	1
21 | P3	P7	1
22 | P3	P101	1
23 | P3	P102	1
24 | P3	P103	1
25 | P4	P5	1
26 | P4	P6	1
27 | P4	P7	1
28 | P4	P101	1
29 | P4	P102	1
30 | P4	P103	1
31 | P5	P6	1
32 | P5	P7	1
33 | P5	P101	1
34 | P5	P102	1
35 | P5	P103	1
36 | P6	P7	1
37 | P6	P101	1
38 | P6	P102	1
39 | P6	P103	1
40 | P7	P101	1
41 | P7	P102	1
42 | P7	P103	1
43 | P101	P102	1
44 | P101	P103	1
45 | P102	P103	1
46 | 


--------------------------------------------------------------------------------
/inst/extdata/example_nets/SMALL_CASE.txt:
--------------------------------------------------------------------------------
1 | P1	P2	1
2 | P1	P3	1
3 | P2	P3	1
4 | 


--------------------------------------------------------------------------------
/inst/extdata/example_nets/SMALL_CONTROL.txt:
--------------------------------------------------------------------------------
1 | P101	P102	1
2 | P101	P103	1
3 | P102	P103	1
4 | 


--------------------------------------------------------------------------------
/inst/extdata/genemania.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <genemania>
3 |     <type>org.genemania.data.lucene.LuceneDataSet</type>
4 |     <data-version>custom</data-version>
5 |     <access-mode>compact</access-mode>
6 | </genemania>
7 | 


--------------------------------------------------------------------------------
/inst/extdata/pathway_ex3.gmt:
--------------------------------------------------------------------------------
1 | STEARATE BIOSYNTHESIS I (ANIMALS)%HUMANCYC%PWY-5972	stearate biosynthesis I (animals)	ELOVL1	ACOT7	ACSL1	ACSL5	ELOVL6	ACSL4	ACSL3	ACOT2	ACOT1	ACSBG1	ACSBG2	SLC27A2	ACOT4	
2 | PUTRESCINE DEGRADATION III%HUMANCYC%PWY-0	putrescine degradation III	ALDH3A2	ALDH3B2	ALDH3A1	ALDH1B1	MAOB	ALDH2	MAOA	ALDH3B1	SAT2	SAT1	
3 | TRYPTOPHAN DEGRADATION III (EUKARYOTIC)%HUMANCYC%TRYPTOPHAN-DEGRADATION-1	tryptophan degradation III (eukaryotic)	ACAT1	HADHB	GCDH	TDO2	KYNU	HAAO	AFMID	KMO	ACAA1	ACAT2	ACMSD	
4 | 


--------------------------------------------------------------------------------
/inst/extdata/plots/SURVIVENO.gmt:
--------------------------------------------------------------------------------
 1 | Abacavir_transport_and_metabolism	Abacavir_transport_and_metabolism	Abacavir_transport_and_metabolism
 2 | Androgen_biosynthesis	Androgen_biosynthesis	Androgen_biosynthesis
 3 | Aquaporin-mediated_transport	Aquaporin-mediated_transport	Aquaporin-mediated_transport
 4 | Bile_salt_and_organic_anion_slc_transporters	Bile_salt_and_organic_anion_slc_transporters	Bile_salt_and_organic_anion_slc_transporters
 5 | Calnexin_calreticulin_cycle	Calnexin_calreticulin_cycle	Calnexin_calreticulin_cycle
 6 | Class_c_3__metabotropic_glutamate_pheromone_receptors_	Class_c_3__metabotropic_glutamate_pheromone_receptors_	Class_c_3__metabotropic_glutamate_pheromone_receptors_
 7 | Hormone_ligand-binding_receptors	Hormone_ligand-binding_receptors	Hormone_ligand-binding_receptors
 8 | Metabolism_of_folate_and_pterines	Metabolism_of_folate_and_pterines	Metabolism_of_folate_and_pterines
 9 | Metabolism_of_water-soluble_vitamins_and_cofactors	Metabolism_of_water-soluble_vitamins_and_cofactors	Metabolism_of_water-soluble_vitamins_and_cofactors
10 | Platelet_adhesion_to_exposed_collagen	Platelet_adhesion_to_exposed_collagen	Platelet_adhesion_to_exposed_collagen
11 | Pou5f1__oct4_,_sox2,_nanog_activate_genes_related_to_proliferation	Pou5f1__oct4_,_sox2,_nanog_activate_genes_related_to_proliferation	Pou5f1__oct4_,_sox2,_nanog_activate_genes_related_to_proliferation
12 | Regulation_of_gene_expression_by_hypoxia-inducible_factor	Regulation_of_gene_expression_by_hypoxia-inducible_factor	Regulation_of_gene_expression_by_hypoxia-inducible_factor
13 | Regulation_of_ifna_signaling	Regulation_of_ifna_signaling	Regulation_of_ifna_signaling
14 | Thyroxine_biosynthesis	Thyroxine_biosynthesis	Thyroxine_biosynthesis
15 | Vasopressin_regulates_renal_water_homeostasis_via_aquaporins	Vasopressin_regulates_renal_water_homeostasis_via_aquaporins	Vasopressin_regulates_renal_water_homeostasis_via_aquaporins
16 | Vitamin_b5__pantothenate__metabolism	Vitamin_b5__pantothenate__metabolism	Vitamin_b5__pantothenate__metabolism
17 | 


--------------------------------------------------------------------------------
/inst/extdata/plots/SURVIVENO_nodeAttrs.txt:
--------------------------------------------------------------------------------
 1 | netName	maxScore	netType
 2 | ABACAVIR_TRANSPORT_AND_METABOLISM	10	rna
 3 | ANDROGEN_BIOSYNTHESIS	8	rna
 4 | AQUAPORIN-MEDIATED_TRANSPORT	9	rna
 5 | BILE_SALT_AND_ORGANIC_ANION_SLC_TRANSPORTERS	10	rna
 6 | CALNEXIN_CALRETICULIN_CYCLE	9	rna
 7 | CLASS_C_3__METABOTROPIC_GLUTAMATE_PHEROMONE_RECEPTORS_	9	rna
 8 | HORMONE_LIGAND-BINDING_RECEPTORS	4	rna
 9 | METABOLISM_OF_FOLATE_AND_PTERINES	9	rna
10 | METABOLISM_OF_WATER-SOLUBLE_VITAMINS_AND_COFACTORS	10	rna
11 | PLATELET_ADHESION_TO_EXPOSED_COLLAGEN	10	rna
12 | POU5F1__OCT4_,_SOX2,_NANOG_ACTIVATE_GENES_RELATED_TO_PROLIFERATION	9	rna
13 | REGULATION_OF_GENE_EXPRESSION_BY_HYPOXIA-INDUCIBLE_FACTOR	8	rna
14 | REGULATION_OF_IFNA_SIGNALING	10	rna
15 | THYROXINE_BIOSYNTHESIS	10	rna
16 | VASOPRESSIN_REGULATES_RENAL_WATER_HOMEOSTASIS_VIA_AQUAPORINS	9	rna
17 | VITAMIN_B5__PANTOTHENATE__METABOLISM	9	rna
18 | 


--------------------------------------------------------------------------------
/inst/extdata/plots/SURVIVEYES.gmt:
--------------------------------------------------------------------------------
 1 | Activation_of_the_pre-replicative_complex	Activation_of_the_pre-replicative_complex	Activation_of_the_pre-replicative_complex
 2 | Androgen_biosynthesis	Androgen_biosynthesis	Androgen_biosynthesis
 3 | Biocarta_stem_pathway	Biocarta_stem_pathway	Biocarta_stem_pathway
 4 | Calnexin_calreticulin_cycle	Calnexin_calreticulin_cycle	Calnexin_calreticulin_cycle
 5 | Defects_in_cobalamin__b12__metabolism	Defects_in_cobalamin__b12__metabolism	Defects_in_cobalamin__b12__metabolism
 6 | Defects_in_vitamin_and_cofactor_metabolism	Defects_in_vitamin_and_cofactor_metabolism	Defects_in_vitamin_and_cofactor_metabolism
 7 | Fgfr2_ligand_binding_and_activation	Fgfr2_ligand_binding_and_activation	Fgfr2_ligand_binding_and_activation
 8 | Gamma-carboxylation,_transport,_and_amino-terminal_cleavage_of_proteins	Gamma-carboxylation,_transport,_and_amino-terminal_cleavage_of_proteins	Gamma-carboxylation,_transport,_and_amino-terminal_cleavage_of_proteins
 9 | Glypican_1_network	Glypican_1_network	Glypican_1_network
10 | Hedgehog_ligand_biogenesis	Hedgehog_ligand_biogenesis	Hedgehog_ligand_biogenesis
11 | Hedgehog_off_state	Hedgehog_off_state	Hedgehog_off_state
12 | Metabolism_of_folate_and_pterines	Metabolism_of_folate_and_pterines	Metabolism_of_folate_and_pterines
13 | Metabolism_of_water-soluble_vitamins_and_cofactors	Metabolism_of_water-soluble_vitamins_and_cofactors	Metabolism_of_water-soluble_vitamins_and_cofactors
14 | Platelet_adhesion_to_exposed_collagen	Platelet_adhesion_to_exposed_collagen	Platelet_adhesion_to_exposed_collagen
15 | Reactions_specific_to_the_complex_n-glycan_synthesis_pathway	Reactions_specific_to_the_complex_n-glycan_synthesis_pathway	Reactions_specific_to_the_complex_n-glycan_synthesis_pathway
16 | Regulation_of_cholesterol_biosynthesis_by_srebp__srebf_	Regulation_of_cholesterol_biosynthesis_by_srebp__srebf_	Regulation_of_cholesterol_biosynthesis_by_srebp__srebf_
17 | Regulation_of_pyruvate_dehydrogenase__pdh__complex	Regulation_of_pyruvate_dehydrogenase__pdh__complex	Regulation_of_pyruvate_dehydrogenase__pdh__complex
18 | Removal_of_aminoterminal_propeptides_from_gamma-carboxylated_proteins	Removal_of_aminoterminal_propeptides_from_gamma-carboxylated_proteins	Removal_of_aminoterminal_propeptides_from_gamma-carboxylated_proteins
19 | Retinol_biosynthesis	Retinol_biosynthesis	RDH10	DHRS4	LRAT	LIPC	CES5A	DHRS9	RDH11	DHRS3	CES1	RBP1	CES4A	RBP2	PNLIP	RBP5	RBP4	CES2
20 | Rora_activates_gene_expression	Rora_activates_gene_expression	Rora_activates_gene_expression
21 | Synthesis_of_pc	Synthesis_of_pc	Synthesis_of_pc
22 | Tak1_activates_nfkb_by_phosphorylation_and_activation_of_ikks_complex	Tak1_activates_nfkb_by_phosphorylation_and_activation_of_ikks_complex	Tak1_activates_nfkb_by_phosphorylation_and_activation_of_ikks_complex
23 | The_nlrp3_inflammasome	The_nlrp3_inflammasome	The_nlrp3_inflammasome
24 | Thyroxine_biosynthesis	Thyroxine_biosynthesis	Thyroxine_biosynthesis
25 | Vegf_and_vegfr_signaling_network	Vegf_and_vegfr_signaling_network	Vegf_and_vegfr_signaling_network
26 | 


--------------------------------------------------------------------------------
/inst/extdata/plots/SURVIVEYES_nodeAttrs.txt:
--------------------------------------------------------------------------------
 1 | netName	maxScore	netType
 2 | ACTIVATION_OF_THE_PRE-REPLICATIVE_COMPLEX	8	rna
 3 | ANDROGEN_BIOSYNTHESIS	5	rna
 4 | BIOCARTA_STEM_PATHWAY	7	rna
 5 | CALNEXIN_CALRETICULIN_CYCLE	7	rna
 6 | DEFECTS_IN_COBALAMIN__B12__METABOLISM	9	rna
 7 | DEFECTS_IN_VITAMIN_AND_COFACTOR_METABOLISM	9	rna
 8 | FGFR2_LIGAND_BINDING_AND_ACTIVATION	8	rna
 9 | GAMMA-CARBOXYLATION,_TRANSPORT,_AND_AMINO-TERMINAL_CLEAVAGE_OF_PROTEINS	3	rna
10 | GLYPICAN_1_NETWORK	7	rna
11 | HEDGEHOG_LIGAND_BIOGENESIS	6	rna
12 | HEDGEHOG_OFF_STATE	7	rna
13 | METABOLISM_OF_FOLATE_AND_PTERINES	9	rna
14 | METABOLISM_OF_WATER-SOLUBLE_VITAMINS_AND_COFACTORS	5	rna
15 | PLATELET_ADHESION_TO_EXPOSED_COLLAGEN	9	rna
16 | REACTIONS_SPECIFIC_TO_THE_COMPLEX_N-GLYCAN_SYNTHESIS_PATHWAY	10	rna
17 | REGULATION_OF_CHOLESTEROL_BIOSYNTHESIS_BY_SREBP__SREBF_	6	rna
18 | REGULATION_OF_PYRUVATE_DEHYDROGENASE__PDH__COMPLEX	8	rna
19 | REMOVAL_OF_AMINOTERMINAL_PROPEPTIDES_FROM_GAMMA-CARBOXYLATED_PROTEINS	4	rna
20 | RETINOL_BIOSYNTHESIS	7	rna
21 | RORA_ACTIVATES_GENE_EXPRESSION	8	rna
22 | SYNTHESIS_OF_PC	6	rna
23 | TAK1_ACTIVATES_NFKB_BY_PHOSPHORYLATION_AND_ACTIVATION_OF_IKKS_COMPLEX	8	rna
24 | THE_NLRP3_INFLAMMASOME	5	rna
25 | THYROXINE_BIOSYNTHESIS	10	rna
26 | VEGF_AND_VEGFR_SIGNALING_NETWORK	7	rna
27 | 


--------------------------------------------------------------------------------
/man/MB.pheno.Rd:
--------------------------------------------------------------------------------
 1 | \name{MB.pheno}
 2 | \alias{MB.pheno}
 3 | \docType{data}
 4 | \title{
 5 | Sample metadata table for medulloblastoma dataset.
 6 | 
 7 | }
 8 | \description{
 9 | data.frame with patient ID and tumour subtype (STATUS)
10 | }
11 | \usage{data(MB.pheno)}
12 | \source{
13 | Northcott et al. (2011). J Clin Oncol. 29 (11):1408.
14 | }
15 | \references{
16 | Northcott et al. (2011). J Clin Oncol. 29 (11):1408.
17 | }
18 | \examples{
19 | data(MB.pheno)
20 | head(MB.pheno)
21 | }
22 | \keyword{datasets}
23 | 


--------------------------------------------------------------------------------
/man/avgNormDiff.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/similarities.R
 3 | \name{avgNormDiff}
 4 | \alias{avgNormDiff}
 5 | \title{takes average of normdiff of each row in x}
 6 | \usage{
 7 | avgNormDiff(x)
 8 | }
 9 | \arguments{
10 | \item{x}{(numeric) matrix of values, one column per patient (e.g. ages)}
11 | }
12 | \value{
13 | symmetric matrix of size ncol(dat) (number of patients) containing
14 | pairwise patient similarities
15 | }
16 | \description{
17 | takes average of normdiff of each row in x
18 | }
19 | \examples{
20 | data(xpr)
21 | sim <- avgNormDiff(xpr[,seq_len(2)])
22 | }
23 | 


--------------------------------------------------------------------------------
/man/callFeatSel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/callFeatSel.R
 3 | \name{callFeatSel}
 4 | \alias{callFeatSel}
 5 | \title{Return feature selected nets based on given criteria}
 6 | \usage{
 7 | callFeatSel(netScores, fsCutoff, fsPctPass)
 8 | }
 9 | \arguments{
10 | \item{netScores}{(matrix) matrix of net scores}
11 | 
12 | \item{fsCutoff}{(integer) net must score at least this much in a split to
13 | 'pass' the threshold}
14 | 
15 | \item{fsPctPass}{(numeric 0 to 1) net must pass at least this percent of
16 | splits to be considered feature-selected}
17 | }
18 | \value{
19 | (char) names of nets that pass feature-selection
20 | }
21 | \description{
22 | Return feature selected nets based on given criteria
23 | }
24 | \details{
25 | given the output of genNetScores.R and criteria for defining
26 | feature-selected (FS) nets, returns subset of nets that pass criteria.
27 | Net must score <fsCutoff> for at least <fsPctPass> % of splits, to be
28 | considered feature-selected.
29 | }
30 | \examples{
31 | data(featScores)
32 | passed <- lapply(featScores, function(x) {
33 |    callFeatSel(x,10,0.7) # score 10/10 in >=70\% of trials
34 | })
35 | print(passed)
36 | }
37 | 


--------------------------------------------------------------------------------
/man/callOverallSelectedFeatures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper.R
 3 | \name{callOverallSelectedFeatures}
 4 | \alias{callOverallSelectedFeatures}
 5 | \title{Wrapper to call selected features}
 6 | \usage{
 7 | callOverallSelectedFeatures(
 8 |   featScores,
 9 |   featureSelCutoff,
10 |   featureSelPct,
11 |   cleanNames = TRUE
12 | )
13 | }
14 | \arguments{
15 | \item{featScores}{(list of lists): matrix of feature scores across all splits, separated
16 | by patient label. First level: patient labels. Second level: matrix of scores for 
17 | corresponding label.}
18 | 
19 | \item{featureSelCutoff}{(integer) cutoff score for feature selection.
20 | A feature must have minimum of this score for specified fraction of splits 
21 | (see featureSelPct) to pass.}
22 | 
23 | \item{featureSelPct}{(numeric between 0 and 1) cutoff percent for feature selection.
24 | A feature must have minimum score of featureSelCutoff for featureSelPct of 
25 | train/test splits to pass.}
26 | 
27 | \item{cleanNames}{(logical) remove internal suffixes for human readability}
28 | }
29 | \value{
30 | (list) Feature scores for all splits, plus those passing selection for overall predictor
31 | featScores: (matrix) feature scores for each split
32 | selectedFeatures: (list) features passing selection for each class; one key per class
33 | }
34 | \description{
35 | Wrapper to call selected features
36 | }
37 | \details{
38 | Calls features that are consistently high-scoring for predicting 
39 | each class. The context for this is as follows: 
40 | The original model runs feature selection over multiple splits of data
41 | into train/test samples, and each such split generates scores for all features.
42 | This function identifies features with scores that exceed a threshold for a fraction
43 | of train/test splits; the threshold and fraction are both user-specified. This
44 | function is called by the wrapper getResults(), which returns both the matrix of 
45 | feature scores across splits and list of features that pass the user-specified cutoffs.
46 | }
47 | \examples{
48 | pathways <- paste("PATHWAY_",1:100,sep="")
49 | highrisk <- list()
50 | lowrisk <- list()
51 | for (k in 1:10) { 
52 | 	highrisk[[k]] <- data.frame(PATHWAY_NAME=pathways, 
53 | 	        SCORE=floor(runif(length(pathways),min=0,max=10)),
54 | 			stringsAsFactors=FALSE);
55 |     lowrisk[[k]] <- data.frame(PATHWAY_NAME=pathways, 
56 | 	        SCORE=floor(runif(length(pathways),min=0,max=10)),
57 | 			stringsAsFactors=FALSE);
58 | }
59 | names(highrisk) <- sprintf("Split\%i",1:length(highrisk))
60 | names(lowrisk) <- sprintf("Split\%i",1:length(lowrisk))
61 | callOverallSelectedFeatures(list(highrisk=highrisk,lowrisk=lowrisk), 5,0.5)
62 | }
63 | 


--------------------------------------------------------------------------------
/man/cleanPathwayName.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cleanPathwayName.R
 3 | \name{cleanPathwayName}
 4 | \alias{cleanPathwayName}
 5 | \title{Clean pathway name so it can be a filename.}
 6 | \usage{
 7 | cleanPathwayName(curP)
 8 | }
 9 | \arguments{
10 | \item{curP}{(char) pathway name}
11 | }
12 | \value{
13 | (char) Cleaned pathway name
14 | }
15 | \description{
16 | Clean pathway name so it can be a filename.
17 | }
18 | \examples{
19 | cleanPathwayName('7-(3-AMINO-3-CARBOXYPROPYL)-WYOSINE BIOSYNTHESIS\%HUMANC')
20 | }
21 | 


--------------------------------------------------------------------------------
/man/cnv_GR.Rd:
--------------------------------------------------------------------------------
 1 | \name{cnv_GR}
 2 | \alias{cnv_GR}
 3 | \docType{data}
 4 | \title{
 5 | CNV locations for breast cancer (subset)
 6 | }
 7 | \description{
 8 | Subset of CNV locations for TCGA breast tumour. Each range is 
 9 |  associated with a patient (ID)
10 | }
11 | \usage{data(cnv_GR)}
12 | \source{
13 | The Cancer Genome Atlas. (2012). Nature 490:61-70.
14 | }
15 | \references{
16 | The Cancer Genome Atlas. (2012). Nature 490:61-70.
17 | }
18 | \examples{
19 | data(cnv_GR)
20 | head(cnv_GR)
21 | }
22 | \keyword{datasets}
23 | 


--------------------------------------------------------------------------------
/man/cnv_TTstatus.Rd:
--------------------------------------------------------------------------------
 1 | \name{cnv_TTstatus}
 2 | \alias{cnv_TTstatus}
 3 | \docType{data}
 4 | \title{
 5 | list of train/test statuses for CNV example
 6 | }
 7 | \description{
 8 | list of train/test statuses for CNV example
 9 | }
10 | \usage{data(cnv_TTstatus)}
11 | \examples{
12 | data(cnv_TTstatus)
13 | head(cnv_TTstatus)
14 | }
15 | \keyword{datasets}
16 | 


--------------------------------------------------------------------------------
/man/cnv_netPass.Rd:
--------------------------------------------------------------------------------
 1 | \name{cnv_netScores}
 2 | \alias{cnv_netScores}
 3 | \docType{data}
 4 | \title{
 5 | List of pathway-level feature selection scores
 6 | }
 7 | \description{
 8 | List of pathway-level feature selection scores
 9 | }
10 | \usage{data(cnv_netScores)}
11 | \examples{
12 | data(cnv_netScores)
13 | summary(cnv_netScores)
14 | head(cnv_netScores[[1]])
15 | }
16 | \keyword{datasets}
17 | 


--------------------------------------------------------------------------------
/man/cnv_netScores.Rd:
--------------------------------------------------------------------------------
 1 | \name{cnv_netPass}
 2 | \alias{cnv_netPass}
 3 | \docType{data}
 4 | \title{
 5 | Vector of pathways that pass class enrichment
 6 | }
 7 | \description{
 8 | Vector of pathways that pass class enrichment
 9 | }
10 | \usage{data(cnv_netPass)}
11 | \examples{
12 | data(cnv_netPass)
13 | head(cnv_netPass)
14 | }
15 | \keyword{datasets}
16 | 


--------------------------------------------------------------------------------
/man/cnv_patientNetCount.Rd:
--------------------------------------------------------------------------------
 1 | \name{cnv_patientNetCount}
 2 | \alias{cnv_patientNetCount}
 3 | \docType{data}
 4 | \title{
 5 | Binary matrix of patient occurrence in networks
 6 | }
 7 | \description{
 8 | Binary matrix of patient occurrence in networks
 9 | }
10 | \usage{data(cnv_patientNetCount)}
11 | \examples{
12 | data(cnv_patientNetCount)
13 | head(cnv_patientNetCount)
14 | }
15 | \keyword{datasets}
16 | 


--------------------------------------------------------------------------------
/man/cnv_pheno.Rd:
--------------------------------------------------------------------------------
 1 | \name{cnv_pheno}
 2 | \alias{cnv_pheno}
 3 | \docType{data}
 4 | \title{
 5 | data.frame of patient labels and status for CNV example
 6 | }
 7 | \description{
 8 | data.frame of patient labels and status for CNV example
 9 | }
10 | \usage{data(cnv_pheno)}
11 | \examples{
12 | data(cnv_pheno)
13 | head(cnv_pheno)
14 | }
15 | \keyword{datasets}
16 | 


--------------------------------------------------------------------------------
/man/compareShortestPath.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compareShortestPath.R
 3 | \name{compareShortestPath}
 4 | \alias{compareShortestPath}
 5 | \title{compare intra-cluster shortest distance to overall shortest distance of the
 6 |  network}
 7 | \usage{
 8 | compareShortestPath(net, pheno, plotDist = FALSE, verbose = TRUE)
 9 | }
10 | \arguments{
11 | \item{net}{(data.frame) network on which to compute shortest path. 
12 | SOURCE, TARGET, WEIGHTS. 
13 | Column names are ignored but expects a header row. Distances will be 
14 | computed based on the third column}
15 | 
16 | \item{pheno}{(data.frame) Node information. ID (node name) and GROUP
17 | (cluster name)}
18 | 
19 | \item{plotDist}{(logical) if TRUE, creates a violin plot showing the 
20 | shortest path distributions for each group.}
21 | 
22 | \item{verbose}{(logical) print messages}
23 | }
24 | \value{
25 | (list) Two lists, 'avg' and 'all'. keys are cluster names. 
26 | values for 'avg' are mean shortest path ; for 'all', are all pairwise
27 | shortest paths
28 | for subnetworks that contain only the edges where source and target both 
29 | belong to the corresponding cluster. In addition, there is an 'overall' 
30 | entry for the mean shortest distance for the entire network.
31 | }
32 | \description{
33 | compare intra-cluster shortest distance to overall shortest distance of the
34 |  network
35 | }
36 | \details{
37 | Uses Dijkstra's algorithm for weighted edges. Pairwise nodes with
38 | infinite distances are excluded before computing average shortest path 
39 | for a network. This function requires the igraph package to be installed.
40 | }
41 | \examples{
42 | data(silh); 
43 | colnames(silh$net)[3] <- 'weight'
44 | compareShortestPath(silh$net, silh$groups)
45 | }
46 | 


--------------------------------------------------------------------------------
/man/compileFeatureScores.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compileFeatureScores.R
 3 | \name{compileFeatureScores}
 4 | \alias{compileFeatureScores}
 5 | \title{Tally the score of networks through cross-validation}
 6 | \usage{
 7 | compileFeatureScores(fList, filter_WtSum = 100, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{fList}{(char) Vector of paths to GeneMANIA NRANK files}
11 | 
12 | \item{filter_WtSum}{(numeric between 5-100) Limit to top-ranked 
13 | networks such that cumulative weight is less than this parameter. 
14 | e.g. If filter_WtSum=20, first order networks by decreasing weight; 
15 | then keep those whose cumulative weight <= 20.}
16 | 
17 | \item{verbose}{(logical) print messages}
18 | }
19 | \value{
20 | (data.frame) Feature name and score; includes features that occur
21 | at least once in \code{fList}.
22 | }
23 | \description{
24 | Tally the score of networks through cross-validation
25 | }
26 | \examples{
27 | netDir <- system.file("extdata","GM_NRANK",package="netDx")
28 | netFiles <- sprintf('\%s/\%s', netDir,dir(netDir,pattern='NRANK$'))
29 | pTally <- compileFeatureScores(netFiles,verbose=TRUE)
30 | print(head(pTally))
31 | }
32 | 


--------------------------------------------------------------------------------
/man/confmat.Rd:
--------------------------------------------------------------------------------
 1 | \name{confmat}
 2 | \alias{confmat}
 3 | \docType{data}
 4 | \title{
 5 | 	Confusion matrix example
 6 | }
 7 | \description{
 8 | Sample table of True/False Positives and Negatives for various feature 
 9 | selection cutoffs
10 | tp: true positive rate, 
11 | fp: false positive rate, 
12 | tn: true negative rate, 
13 | fn: false negative rate
14 | }
15 | \usage{data(confmat)}
16 | \examples{
17 | data(confmat)
18 | head(confmat)
19 | }
20 | \keyword{datasets}
21 | 


--------------------------------------------------------------------------------
/man/confusionMatrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper.R
 3 | \name{confusionMatrix}
 4 | \alias{confusionMatrix}
 5 | \title{Make confusion matrix}
 6 | \usage{
 7 | confusionMatrix(model)
 8 | }
 9 | \arguments{
10 | \item{model}{(list) output of buildPredictor()}
11 | }
12 | \value{
13 | (list) confusion matrix for all train/test splits and final averaged matrix
14 | Side effect of plotting the averaged matrix.
15 | }
16 | \description{
17 | Make confusion matrix
18 | }
19 | \details{
20 | Creates a confusion matrix, a square matrix which indicates the fraction of times
21 | patients in a class are correctly classified, versus misclassified as each of the other classes.
22 | Here, the confusion matrix is computed once per train-test split and the average is displayed. 
23 | For this reason, the fractions may not cleanly add up to 100%.
24 | }
25 | \examples{
26 | data(toymodel)
27 | confusionMatrix(toymodel)
28 | }
29 | 


--------------------------------------------------------------------------------
/man/convertProfileToNetworks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/runProfileToNetworks.R
 3 | \name{convertProfileToNetworks}
 4 | \alias{convertProfileToNetworks}
 5 | \title{Convert profiles to interaction networks before integration}
 6 | \usage{
 7 | convertProfileToNetworks(
 8 |   netDir,
 9 |   outDir = tempdir(),
10 |   simMetric = "pearson",
11 |   numCores = 1L,
12 |   JavaMemory = 4L,
13 |   GM_jar = NULL,
14 |   P2N_threshType = "off",
15 |   P2N_maxMissing = 100,
16 |   netSfx = "txt$",
17 |   debugMode = FALSE
18 | )
19 | }
20 | \arguments{
21 | \item{netDir}{(char) directory with .profile files}
22 | 
23 | \item{outDir}{(char) path to directory where interaction networks are to be printed}
24 | 
25 | \item{simMetric}{(char) similarity measure to use in converting 
26 | profiles to interaction networks.}
27 | 
28 | \item{numCores}{(integer) number of cores for parallel processing}
29 | 
30 | \item{JavaMemory}{(integer) Memory for GeneMANIA (in Gb)}
31 | 
32 | \item{GM_jar}{(char) path to GeneMANIA jar file}
33 | 
34 | \item{P2N_threshType}{(char) Most users shouldn't have to change this.
35 | ProfileToNetworkDriver's threshold option. One of 'off|auto'. 
36 | unit testing}
37 | 
38 | \item{P2N_maxMissing}{(integer 5-100)}
39 | 
40 | \item{netSfx}{(char) pattern for finding network files in \code{netDir}.}
41 | 
42 | \item{debugMode}{(logical) if TRUE runs profile generation in serial 
43 | rather than parallel, allowing debugging}
44 | }
45 | \value{
46 | No value. Side effect of creating interaction networks in outDir.
47 | }
48 | \description{
49 | Convert profiles to interaction networks before integration
50 | }
51 | \details{
52 | In preparation for network integration. When using GeneMANIA's
53 | built-in functionality to create PSN using ProfileToNetworkDriver, this
54 | step needs to run to process profiles to networks. These are currently used
55 | for Pearson correlation-based networks and those using mutual information.
56 | }
57 | 


--------------------------------------------------------------------------------
/man/countIntType.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sparsenet_enrichment_functions.R
 3 | \name{countIntType}
 4 | \alias{countIntType}
 5 | \title{Counts the number of (+,+) and (+,-) interactions in a single network}
 6 | \usage{
 7 | countIntType(inFile, plusID, minusID)
 8 | }
 9 | \arguments{
10 | \item{inFile}{(char) path to interaction networks}
11 | 
12 | \item{plusID}{(char) vector of + nodes}
13 | 
14 | \item{minusID}{(char) vector of - nodes}
15 | }
16 | \value{
17 | (numeric of length 2) Number of (+,+) interactions, and 
18 | non-(+,+) interactions
19 | (i.e. (+,-) and (-,-) interactions)
20 | }
21 | \description{
22 | Counts the number of (+,+) and (+,-) interactions in a single network
23 | }
24 | \examples{
25 | d <- tempdir()
26 | # write PSN
27 | m1 <- matrix(c("P1","P1","P2","P2","P3","P4",1,1,1),byrow=FALSE,ncol=3)
28 | write.table(m1,file=paste(d,"net1.txt",sep=getFileSep()),
29 | sep="\t",
30 | col.names=FALSE,row.names=FALSE,quote=FALSE)
31 | 
32 | countIntType(paste(d,"net1.txt",sep=getFileSep()),c("P1","P2","P3"),
33 | c("P4","P5"))
34 | }
35 | 


--------------------------------------------------------------------------------
/man/countIntType_batch.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sparsenet_enrichment_functions.R
 3 | \name{countIntType_batch}
 4 | \alias{countIntType_batch}
 5 | \title{Counts number of (+,+) and (+,-) interactions in a set of networks}
 6 | \usage{
 7 | countIntType_batch(
 8 |   inFiles,
 9 |   plusID,
10 |   minusID,
11 |   tmpDir = tempdir(),
12 |   enrType = "binary",
13 |   numCores = 1L
14 | )
15 | }
16 | \arguments{
17 | \item{inFiles}{(char) path to interaction networks to process}
18 | 
19 | \item{plusID}{(char) IDs of + nodes}
20 | 
21 | \item{minusID}{(char) IDs of - nodes}
22 | 
23 | \item{tmpDir}{(char) path to dir where temporary files can be stored}
24 | 
25 | \item{enrType}{(char) see getEnr.R}
26 | 
27 | \item{numCores}{(integer) number of cores for parallel processing}
28 | }
29 | \value{
30 | (matrix) two columns, one row per network 
31 | If \code{enrType="binary"}, number of (+,+) and other interactions
32 | Otherwise if \code{enrType="corr"} mean edge weight of (+,+) edges and
33 | of other edges
34 | }
35 | \description{
36 | Counts number of (+,+) and (+,-) interactions in a set of networks
37 | }
38 | \examples{
39 | d <- tempdir()
40 | # write PSN
41 | m1 <- matrix(c("P1","P1","P2","P2","P3","P4",1,1,1),byrow=FALSE,ncol=3)
42 | write.table(m1,file=paste(d,"net1.txt",sep=getFileSep()),sep="\t",
43 | col.names=FALSE,row.names=FALSE,quote=FALSE)
44 | m2 <- matrix(c("P3","P4",1),nrow=1)
45 | write.table(m2,file=paste(d,"net2.txt",sep=getFileSep()),sep="\t",
46 | col.names=FALSE,row.names=FALSE,quote=FALSE)
47 | 
48 | countIntType_batch(paste(d,c("net1.txt","net2.txt"),sep=getFileSep()),
49 | 	c("P1","P2","P3"),c("P4","P5"))
50 | }
51 | 


--------------------------------------------------------------------------------
/man/countPatientsInNet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/countPatientsInNet.R
 3 | \name{countPatientsInNet}
 4 | \alias{countPatientsInNet}
 5 | \title{Count number of patients in a network}
 6 | \usage{
 7 | countPatientsInNet(netDir, fList, ids)
 8 | }
 9 | \arguments{
10 | \item{netDir}{(char) dir with network set}
11 | 
12 | \item{fList}{(char) filenames of interaction networks to count in}
13 | 
14 | \item{ids}{(char) patient IDs to look for}
15 | }
16 | \value{
17 | (matrix) Size P by N, where P is num patients and N is 
18 | number of networks networks; a[i,j] =1 if patient i in network j, else 0
19 | }
20 | \description{
21 | Count number of patients in a network
22 | }
23 | \details{
24 | This functionality is needed to count patient overlap when 
25 | input data is in a form that results in highly missing data, rather than
26 | when the same measures are available for almost all patients. An example
27 | application is when patient networks are based on unique genomic events
28 | in each patients (e.g. CNVs or indels), rather than 'full-matrix' data
29 | (e.g. questionnaires or gene expression matrices). The former scenario
30 | requires an update in the list of eligible networks each time some type
31 | of patient subsetting is applied (e.g. label enrichment, or train/test
32 | split). A matrix with patient/network membership serves as a lookup
33 | table to prune networks as feature selection proceeds
34 | }
35 | \examples{
36 | d <- tempdir()
37 | pids <- paste("P",1:5,sep="")
38 | m1 <- matrix(c("P1","P1","P2","P2","P3","P4",1,1,1),
39 | 	byrow=FALSE,ncol=3)
40 | write.table(m1,
41 | file=paste(d,"net1.txt",sep=getFileSep()),sep="\t",
42 | col.names=FALSE,row.names=FALSE,quote=FALSE)
43 | m2 <- matrix(c("P3","P4",1),nrow=1)
44 | write.table(m2,
45 | file=paste(d,"net2.txt",sep=getFileSep()),sep="\t",
46 | col.names=FALSE,row.names=FALSE,quote=FALSE)
47 | x <- countPatientsInNet(d,c("net1.txt","net2.txt"), pids)
48 | }
49 | 


--------------------------------------------------------------------------------
/man/dataList2List.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dataList2list.R
 3 | \name{dataList2List}
 4 | \alias{dataList2List}
 5 | \title{Convert MultiAssayExperiment object to list and data.frame}
 6 | \usage{
 7 | dataList2List(dat, groupList)
 8 | }
 9 | \arguments{
10 | \item{dat}{(MultiAssayExperiment) Patient data and metadata}
11 | 
12 | \item{groupList}{(list) variable groupings used for feature construction. See groupList arg in buildPredictor().}
13 | }
14 | \value{
15 | (list) Keys are:
16 | 1) assays: list of matrices, each corresponding to data from a particular
17 | layer
18 | 2) pheno: (data.frame) sample metadata
19 | }
20 | \description{
21 | Convert MultiAssayExperiment object to list and data.frame
22 | }
23 | \details{
24 | Used by internal routines in netDx
25 | }
26 | \examples{
27 | data(xpr,pheno)
28 | require(MultiAssayExperiment)
29 | objlist <- list("RNA"=SummarizedExperiment(xpr))
30 | mae <- MultiAssayExperiment(objlist,pheno)
31 | groupList <- list(RNA=rownames(xpr))
32 | dl <- dataList2List(mae,groupList)
33 | summary(dl) 
34 | }
35 | 


--------------------------------------------------------------------------------
/man/dot-get_cache.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fileCache.R
 3 | \name{.get_cache}
 4 | \alias{.get_cache}
 5 | \title{wrapper function for getting BiocFileCache associated with netDx package}
 6 | \usage{
 7 | .get_cache()
 8 | }
 9 | \value{
10 | BiocFileCache object associated with netDx
11 | }
12 | \description{
13 | wrapper function for getting BiocFileCache associated with netDx package
14 | }
15 | 


--------------------------------------------------------------------------------
/man/enrichLabelNets.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/enrichLabelNets.R
 3 | \name{enrichLabelNets}
 4 | \alias{enrichLabelNets}
 5 | \title{Score networks based on their edge bias towards (+,+) interactions}
 6 | \usage{
 7 | enrichLabelNets(
 8 |   netDir,
 9 |   pheno_DF,
10 |   outDir,
11 |   numReps = 50L,
12 |   minEnr = -1,
13 |   outPref = "enrichLabelNets",
14 |   verbose = TRUE,
15 |   setSeed = 42L,
16 |   enrType = "binary",
17 |   numCores = 1L,
18 |   predClass,
19 |   tmpDir = tempdir(),
20 |   netGrep = "_cont.txt$",
21 |   getShufResults = FALSE,
22 |   ...
23 | )
24 | }
25 | \arguments{
26 | \item{netDir}{(char) path to dir containing all networks}
27 | 
28 | \item{pheno_DF}{(data.frame) for details see \code{getEnr()}}
29 | 
30 | \item{outDir}{(char) path to dir where output/log files are written}
31 | 
32 | \item{numReps}{(integer) Max num reps for shuffling class status. 
33 | Adaptive permutation is
34 | used so in practice, few networks would be evaluated to this extent}
35 | 
36 | \item{minEnr}{(numeric from -1 to 1) Only include networks with ENR
37 | value greater than this threshold.}
38 | 
39 | \item{outPref}{(char) prefix for log file (not counting the dir name)}
40 | 
41 | \item{verbose}{(logical) print messages}
42 | 
43 | \item{setSeed}{(integer) if not NULL, integer is set as seed
44 | to ensure reproducibility in random number generation}
45 | 
46 | \item{enrType}{(char) see getEnr()}
47 | 
48 | \item{numCores}{(integer) num cores for parallel ENR computation of
49 | all networks}
50 | 
51 | \item{predClass}{(char) see \code{getEnr()}}
52 | 
53 | \item{tmpDir}{(char) path to dir where temporary work can be stored}
54 | 
55 | \item{netGrep}{(char) pattern to grep for network files in netDir}
56 | 
57 | \item{getShufResults}{(logical) if TRUE, returns the ENR for each
58 | permutation, for all networks. Warning: this is likely to be huge. Use
59 | this flag for debugging purposes only.}
60 | 
61 | \item{...}{parameters for \code{countIntType_batch()}.}
62 | }
63 | \value{
64 | (data.frame) networks stats from clique-filtering, one record 
65 | per network
66 | }
67 | \description{
68 | Score networks based on their edge bias towards (+,+) interactions
69 | }
70 | \details{
71 | Determines which networks are statistically enriched for 
72 | interactions between the class of interest. The resulting \code{ENR} 
73 | score and corresponding p-value serve as a filter to exclude random-like
74 | interaction networks before using feature selection. This filter is
75 | known to be important when patient networks are sparse and binary; e.g.
76 | networks based on shared overlap of CNV locations.  If the filter is 
77 | not applied, GeneMANIA WILL promote networks with slight bias towards 
78 | (+,+) edges , even if these are small and random-like.
79 | 
80 | The measure of (+,+)-enrichment is defined as: 
81 | ENR(network N) = ((num (+,+) edges) - (num other edges))/(num edges).
82 | A p-value for per-network ENR is obtained non-parametrically by
83 | measuring a null distribution for ENR following multiple permutations
84 | of case-control labels.
85 | }
86 | \examples{
87 | data(npheno)
88 | netDir <- system.file("extdata","example_nets",package="netDx")
89 | x <- enrichLabelNets(netDir,npheno,".",predClass="case",netGrep="txt$",
90 | 	numReps=5)
91 | print(x)
92 | 
93 | }
94 | 


--------------------------------------------------------------------------------
/man/featScores.Rd:
--------------------------------------------------------------------------------
 1 | \name{featScores}
 2 | \alias{featScores}
 3 | \docType{data}
 4 | \title{
 5 | Demo feature-level scores from running feature selection on two-class problem
 6 | }
 7 | \description{
 8 | List with one entry per patient label ("SURVIVEYES" and "SURVIVENO"). Each entry contains scores obtained through feature-selection acros 100 train/test splits. Scores range from 0 to 10. Scores in data.frame format, with rows corresponding to features and columns to a particular train/test split.
 9 | }
10 | \usage{data(featScores)}
11 | \examples{
12 | data(featScores)
13 | head(featScores)
14 | }
15 | \keyword{datasets}
16 | 


--------------------------------------------------------------------------------
/man/fetchPathwayDefinitions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fileCache.R
 3 | \name{fetchPathwayDefinitions}
 4 | \alias{fetchPathwayDefinitions}
 5 | \title{fetch pathway definitions from downloads.baderlab.org}
 6 | \usage{
 7 | fetchPathwayDefinitions(month = NULL, year = NULL, day = 1, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{month}{(numeric or char) month of pathway definition file. Can be
11 | numeric or text (e.g. "January","April"). If NULL, fails.}
12 | 
13 | \item{year}{(numeric) year of pathway definition file. Must be in
14 | yyyy format (e.g. 2018). If NULL, fails.}
15 | 
16 | \item{day}{(integer)}
17 | 
18 | \item{verbose}{(logical) print messages}
19 | }
20 | \value{
21 | (char) Path to local cached copy of GMT file
22 | or initial download is required
23 | }
24 | \description{
25 | fetch pathway definitions from downloads.baderlab.org
26 | }
27 | \details{
28 | Fetches genesets compiled from multiple curated pathway
29 | databases. Downloaded from: https://download.baderlab.org/EM_Genesets/
30 | The file contains pathways from HumanCyc, NetPath, Reactome, NCI
31 | Curated Pathways and mSigDB.
32 | For details see Merico D, Isserlin R, Stueker O, Emili A and GD Bader.
33 | (2010). PLoS One. 5(11):e13984.
34 | }
35 | \examples{
36 | fetchPathwayDefinitions("October",2020)
37 | fetchPathwayDefinitions("January",2018)
38 | fetchPathwayDefinitions(month=10,year=2020)
39 | }
40 | 


--------------------------------------------------------------------------------
/man/genes.Rd:
--------------------------------------------------------------------------------
 1 | \name{genes}
 2 | \alias{genes}
 3 | \docType{data}
 4 | \title{
 5 | 	Table of gene definitions (small subsample of human genes)
 6 | }
 7 | \description{
 8 | data.frame object with columns of (gene) RefSeq ID (name), chromosome (chrom), strand, transcription start site (txStart), transcription end site (txEnd), and gene symbol (name2)
 9 | }
10 | \usage{data(genes)}
11 | \examples{
12 | data(genes)
13 | head(genes)
14 | }
15 | \keyword{datasets}
16 | 


--------------------------------------------------------------------------------
/man/getCorrType.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getCorrType.R
 3 | \name{getCorrType}
 4 | \alias{getCorrType}
 5 | \title{Counts the relative correlation of (+,+) and (+,-)(-,-) interactions}
 6 | \usage{
 7 | getCorrType(inFile, plusID, minusID)
 8 | }
 9 | \arguments{
10 | \item{inFile}{(character): path to interaction networks}
11 | 
12 | \item{plusID}{(character) vector of + nodes}
13 | 
14 | \item{minusID}{(character) vector of - nodes}
15 | }
16 | \value{
17 | (numeric) mean edge weight for (+,+) and other edges
18 | }
19 | \description{
20 | Counts the relative correlation of (+,+) and (+,-)(-,-) interactions
21 | }
22 | 


--------------------------------------------------------------------------------
/man/getEMapInput.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getEmapInput.R
 3 | \name{getEMapInput}
 4 | \alias{getEMapInput}
 5 | \title{write enrichment map for consensus nets}
 6 | \usage{
 7 | getEMapInput(
 8 |   featScores,
 9 |   namedSets,
10 |   netInfo,
11 |   pctPass = 0.7,
12 |   minScore = 1,
13 |   maxScore = 10,
14 |   trimFromName = c(".profile", "_cont"),
15 |   verbose = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{featScores}{(data.frame) network scores across rounds of cross
20 | validation. Rows are networks and columns are network name followed by
21 | scores for cross-validation rounds. Output of getFeatureScores()}
22 | 
23 | \item{namedSets}{(list) list of nets and units (e.g.e pathway names and
24 | genes). Should only contain units profiled in this dataset}
25 | 
26 | \item{netInfo}{(data.frame) Table of network name (netName) and type
27 | (netType). Type is used to assign shapes to nodes:
28 |  clinical                                          clinical
29 |       rna GUANOSINE_NUCLEOTIDES__I_DE_NOVO__I__BIOSYNTHESIS
30 |       rna                              RETINOL_BIOSYNTHESIS}
31 | 
32 | \item{pctPass}{(numeric between 0 and 1) fraction of splits for which
33 | the highest score for the network is required, for that to be the network's
34 | maxScore}
35 | 
36 | \item{minScore}{(integer) features with score below this cutoff are
37 | excluded from downstream analyses}
38 | 
39 | \item{maxScore}{(integer) maximum possible score in one round of cross-
40 | validation. e.g. for 10-fold cross-validation, maxScore=10.}
41 | 
42 | \item{trimFromName}{(char) strings to trim from name with sub()}
43 | 
44 | \item{verbose}{(logical) print messages}
45 | }
46 | \value{
47 | (list) Length two. 1) nodeAttrs: data.frame of node attributes
48 | 2) featureSets: key-value pairs of selected feature sets (e.g. if pathway
49 | features are used, keys are pathway names, and values are member genes).
50 | }
51 | \description{
52 | write enrichment map for consensus nets
53 | }
54 | \examples{
55 | inDir <- system.file("extdata","example_output",package="netDx")
56 | outDir <- paste(tempdir(),'plots',sep='/')
57 | if (!file.exists(outDir)) dir.create(outDir)
58 | featScores <- getFeatureScores(inDir,predClasses=c('LumA','notLumA'))
59 | gp <- names(featScores)[1]
60 | pathwayList <- readPathways(fetchPathwayDefinitions("October",2020))
61 | pathwayList <- pathwayList[seq_len(5)]
62 | netInfoFile <- system.file("extdata","example_output/inputNets.txt",package="netDx")
63 | netInfo <- read.delim(netInfoFile,sep='\t',h=FALSE,as.is=TRUE)
64 | emap_input <- getEMapInput(featScores[[gp]],pathwayList,netInfo)
65 | summary(emap_input)
66 | }
67 | 


--------------------------------------------------------------------------------
/man/getEMapInput_many.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getEmapInput_many.R
 3 | \name{getEMapInput_many}
 4 | \alias{getEMapInput_many}
 5 | \title{Wrapper to generate multiple EnrichmentMaps (perhaps one per class)}
 6 | \usage{
 7 | getEMapInput_many(featScores, namedSets_valid, netTypes, outDir, ...)
 8 | }
 9 | \arguments{
10 | \item{featScores}{(list) keys are classes, and values are data.frames of
11 | network scores across cross-validation (output of getFeatScores()).}
12 | 
13 | \item{namedSets_valid}{(list) Grouped unit variables limited to the
14 | units contained in the dataset. e.g. keys are pathways and values are
15 | the genes measured in this dataset.
16 | e.g.:
17 | $`MISSPLICED_GSK3BETA_MUTANTS_STABILIZE_BETA-CATENIN`
18 | [1] 'PPP2R5E' 'PPP2CB'  'APC'     'AXIN1'   'PPP2R1B' 'PPP2R1A' 'CSNK1A1'
19 | [8] 'PPP2R5D' 'PPP2R5C' 'PPP2R5B' 'PPP2R5A' 'PPP2CA'  'GSK3B'}
20 | 
21 | \item{netTypes}{(data.frame) 'inputNets.txt' file
22 | generated by NetDx. Dataframe has two columns, network type and
23 | network  name. I.E:
24 |  clinical                                          clinical
25 |       rna GUANOSINE_NUCLEOTIDES__I_DE_NOVO__I__BIOSYNTHESIS
26 |       rna                              RETINOL_BIOSYNTHESIS}
27 | 
28 | \item{outDir}{(char) path to output directory}
29 | 
30 | \item{...}{parameters for getEMapInput()}
31 | }
32 | \value{
33 | (list) of length g, where g is the number of groups in featScores.
34 | Values are lists, corresponding to the output of getEmapInput.R
35 | }
36 | \description{
37 | Wrapper to generate multiple EnrichmentMaps (perhaps one per class)
38 | }
39 | \examples{
40 | data(featScores)
41 | 
42 | pathwayList <- readPathways(fetchPathwayDefinitions("October",2020))
43 | pathwayList <- pathwayList[seq_len(5)]
44 | 
45 | netInfoFile <- system.file("extdata","example_output/inputNets.txt",package="netDx")
46 | netTypes <- read.delim(netInfoFile,sep='\t',h=FALSE,as.is=TRUE)
47 | outDir <- paste(tempdir(),'plots',sep='/')
48 | if (!file.exists(outDir)) dir.create(outDir)
49 | EMap_input <- getEMapInput_many(featScores,pathwayList,
50 |      netTypes,outDir=outDir)
51 | }
52 | 


--------------------------------------------------------------------------------
/man/getEnr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sparsenet_enrichment_functions.R
 3 | \name{getEnr}
 4 | \alias{getEnr}
 5 | \title{Get ENR for all networks in a specified directory}
 6 | \usage{
 7 | getEnr(
 8 |   netDir,
 9 |   pheno_DF,
10 |   predClass,
11 |   netGrep = "_cont.txt$",
12 |   enrType = "binary",
13 |   ...
14 | )
15 | }
16 | \arguments{
17 | \item{netDir}{(char) directory containing interaction networks}
18 | 
19 | \item{pheno_DF}{(data.frame) table with patient ID and status.
20 | Must contain columns for Patient ID (named "ID") and class
21 | (named "STATUS"). Status should be a char; value of predictor class 
22 | should be specified in \code{predClass} param; 
23 | all other values are considered non-predictor class
24 | Rows with duplicate IDs will be excluded.}
25 | 
26 | \item{predClass}{(char) value for patients in predictor class}
27 | 
28 | \item{netGrep}{(char) pattern for grep-ing network text files, used in
29 | dir(pattern=..) argument}
30 | 
31 | \item{enrType}{(char) how enrichment should be computed. Options are:
32 | 1) binary: Skew of number of (+,+) interactions relative to other
33 | interactions. Used when all edges in network are set to 1 (e.g. 
34 | shared CNV overlap)
35 | 2) corr: 0.5*((mean weight of (+,+) edges)-(mean weight of other edges))}
36 | 
37 | \item{...}{arguments for \code{countIntType_batch}}
38 | }
39 | \value{
40 | (list):
41 | 1) plusID (char) vector of + nodes
42 | 2) minusID (char) vector of - nodes
43 | 3) orig_rat (numeric) \code{ENR} for data networks
44 | 4) fList (char) set of networks processed
45 | 5) orig (data.frame) output of \code{countIntType_batch} for input
46 | networks
47 | }
48 | \description{
49 | Get ENR for all networks in a specified directory
50 | }
51 | \details{
52 | For each network, compute the number of (+,+) and other 
53 | {(+,-),(-,+),(-,-)} interactions. 
54 |  From this compute network ENR.
55 | The measure of (+,+)-enrichment is defined as: 
56 | ENR(network N) = ((num (+,+) edges) - (num other edges))/(num edges).
57 | A network with only (+,+) interactions has an ENR=1 ; a network with
58 | no (+,+) interactions has an ENR=-1; a network with a balance of the two
59 | has ENR=0.
60 | }
61 | \examples{
62 | d <- tempdir()
63 | options(stringsAsFactors=FALSE)
64 | pids <- paste("P",seq_len(5),sep="")
65 | pheno <- data.frame(ID=pids,STATUS=c(rep("case",3),rep("control",2)))
66 | 
67 | # write PSN
68 | m1 <- matrix(c("P1","P1","P2","P2","P3","P4",1,1,1),byrow=FALSE,ncol=3)
69 | write.table(m1,file=paste(d,"net1.nettxt",sep=getFileSep()),sep="\t",
70 | col.names=FALSE,row.names=FALSE,quote=FALSE)
71 | m2 <- matrix(c("P3","P4",1),nrow=1)
72 | write.table(m2,file=paste(d,"net2.nettxt",sep=getFileSep()),sep="\t",
73 | col.names=FALSE,row.names=FALSE,quote=FALSE)
74 | 
75 | # compute enrichment
76 | x <- countPatientsInNet(d,dir(d,pattern=c("net1.nettxt","net2.nettxt")), pids)
77 | getEnr(d,pheno,"case","nettxt$")
78 | }
79 | 


--------------------------------------------------------------------------------
/man/getFeatureScores.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getFeatureScores.R
 3 | \name{getFeatureScores}
 4 | \alias{getFeatureScores}
 5 | \title{Compile network scores into a matrix}
 6 | \usage{
 7 | getFeatureScores(inDir, predClasses, getFullCons = TRUE)
 8 | }
 9 | \arguments{
10 | \item{inDir}{(char/list) directory containing directories with all split 
11 | info or list of all CV score files.
12 | if inDir is a single directory then the expected format for CV score files 
13 | is <inDir>/rngX/predClassX/GM_results/predClassX_pathway_CV_score.txt'
14 | if inDir is a list, it should have one key per class. The value should be 
15 | the corresponding set of filenames for pathway_CV_score.txt}
16 | 
17 | \item{predClasses}{(char) possible STATUS for patients}
18 | 
19 | \item{getFullCons}{(logical) if TRUE, does not remove rows with NA.
20 | Recommended only when the number of input features is extensively 
21 | pruned by first-pass feature selection.}
22 | }
23 | \value{
24 | (list) one key per patient class. Value is matrix of network
25 | scores across all train/test splits. Each score is the output of
26 | the inner fold of CV.
27 | }
28 | \description{
29 | Compile network scores into a matrix
30 | }
31 | \details{
32 | Given network scores over a set of train/test splits, compiles 
33 | these into a matrix for downstream analysis. See the section on 
34 | 'Output Files'
35 | }
36 | \examples{
37 | inDir <- system.file("extdata","example_output",package="netDx")
38 | netScores <- getFeatureScores(inDir, predClasses = c('LumA','notLumA'))
39 | }
40 | 


--------------------------------------------------------------------------------
/man/getFileSep.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{getFileSep}
 4 | \alias{getFileSep}
 5 | \title{platform-specific file separator}
 6 | \usage{
 7 | getFileSep()
 8 | }
 9 | \value{
10 | (char) "\\" if Windows, else "/"
11 | }
12 | \description{
13 | Returns OS-specific file separator
14 | }
15 | \examples{
16 | getFileSep()
17 | }
18 | 


--------------------------------------------------------------------------------
/man/getGMjar_path.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fileCache.R
 3 | \name{getGMjar_path}
 4 | \alias{getGMjar_path}
 5 | \title{download and update GeneMANIA jar file}
 6 | \usage{
 7 | getGMjar_path(verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{verbose}{(logical) print messages}
11 | }
12 | \value{
13 | (char) Path to local cached copy of GeneMANIA jar file..
14 | or initial download is required
15 | }
16 | \description{
17 | download and update GeneMANIA jar file
18 | }
19 | \examples{
20 | getGMjar_path()
21 | }
22 | 


--------------------------------------------------------------------------------
/man/getNetConsensus.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getNetConsensus.R
 3 | \name{getNetConsensus}
 4 | \alias{getNetConsensus}
 5 | \title{compile net score across a set of predictor results}
 6 | \usage{
 7 | getNetConsensus(scorelist)
 8 | }
 9 | \arguments{
10 | \item{scorelist}{(list) key is dataset name, value is a data.frame 
11 | containing PATHWAY_NAME and SCORE. This is the output of
12 |  compileFeatureScores()}
13 | }
14 | \value{
15 | (data.frame) Rownames are union of all nets in the input list.
16 | Columns show net scores for each key of the input list. Where a 
17 | net is not found in a given list, it is assigned the value of NA
18 | }
19 | \description{
20 | compile net score across a set of predictor results
21 | }
22 | \details{
23 | used to compare how individual nets score for different
24 | predictor configurations
25 | }
26 | \examples{
27 | pathways <- paste("PATHWAY_",1:100,sep="")
28 | highrisk <- list()
29 | for (k in 1:10) { 
30 | 	highrisk[[k]] <- data.frame(PATHWAY_NAME=pathways, 
31 | 	SCORE=runif(length(pathways),min=0,max=10),
32 | 			stringsAsFactors=FALSE);
33 | }
34 | names(highrisk) <- sprintf("Split\%i",1:length(highrisk))
35 | x <- getNetConsensus(highrisk)
36 | }
37 | 


--------------------------------------------------------------------------------
/man/getOR.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sparsenet_enrichment_functions.R
 3 | \name{getOR}
 4 | \alias{getOR}
 5 | \title{Get relative proportion of patient classes that contribute to a set of
 6 | networks}
 7 | \usage{
 8 | getOR(pNetworks, pheno_DF, predClass, netFile, verbose = TRUE)
 9 | }
10 | \arguments{
11 | \item{pNetworks}{(matrix) rows are patients, columns are network file
12 | filenames. a[i,j] = 1 if patient i has a structural variant in network
13 | j; else a[i,j] = 0}
14 | 
15 | \item{pheno_DF}{(data.frame) Column "ID" has unique patient identifiers;
16 | column "STATUS" has patient class}
17 | 
18 | \item{predClass}{(char) Class for which predictor is being built}
19 | 
20 | \item{netFile}{(char) vector of networks of interest (e.g. those 
21 | passing feature selection)}
22 | 
23 | \item{verbose}{(logical) print messages}
24 | }
25 | \value{
26 | List. 1) stats: statistics on group overlap with ,
27 | This is a 2xK matrix, where rows are classes (predClass,other), and 
28 | columns are: total samples, samples overlapping nets, % overlap
29 | 2) relEnr: relative enrichment of \code{predClass} over other
30 | }
31 | \description{
32 | Get relative proportion of patient classes that contribute to a set of
33 | networks
34 | }
35 | \details{
36 | Feature selected networks should have the property of being
37 | enriched in the class of interest; e.g. be enriched in 'case' relative
38 | to 'control'. When given a list of networks N, this method computes the
39 | number and proportion of patients that overlap N. A high relative 
40 | fraction of the predicted class indicates successful feature selection.
41 | To create a ROC or precision-recall curve, several calls can be made
42 | to this function, one per cutoff.
43 | }
44 | \examples{
45 | d <- tempdir()
46 | options(stringsAsFactors=FALSE)
47 | pids <- paste("P",seq_len(5),sep="")
48 | pheno <- data.frame(ID=pids,STATUS=c(rep("case",3),rep("control",2)))
49 | 
50 | # write PSN
51 | m1 <- matrix(c("P1","P1","P2","P2","P3","P4",1,1,1),byrow=FALSE,ncol=3)
52 | write.table(m1,file=paste(d,"net1.txt",sep=getFileSep()),sep="\t",
53 | col.names=FALSE,row.names=FALSE,quote=FALSE)
54 | m2 <- matrix(c("P3","P4",1),nrow=1)
55 | write.table(m2,file=paste(d,"net2.txt",sep=getFileSep()),sep="\t",
56 | col.names=FALSE,row.names=FALSE,quote=FALSE)
57 | 
58 | # compute enrichment
59 | x <- countPatientsInNet(d,dir(d,pattern=c("net1.txt","net2.txt")), pids)
60 | getOR(x,pheno,"case",colnames(x)) # should give large RelEnr
61 | }
62 | 


--------------------------------------------------------------------------------
/man/getPSN.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper.R
 3 | \name{getPSN}
 4 | \alias{getPSN}
 5 | \title{get the integrated patient similarity network made of selected features}
 6 | \usage{
 7 | getPSN(
 8 |   dat,
 9 |   groupList,
10 |   makeNets,
11 |   selectedFeatures,
12 |   plotCytoscape = FALSE,
13 |   aggFun = "MEAN",
14 |   prune_pctX = 0.3,
15 |   prune_useTop = TRUE,
16 |   numCores = 1L,
17 |   calcShortestPath = FALSE
18 | )
19 | }
20 | \arguments{
21 | \item{dat}{(MultiAssayExperiment) input data}
22 | 
23 | \item{groupList}{(list) feature groups, identical to groupList provided for buildPredictor()}
24 | 
25 | \item{makeNets}{(function) Function used to create patient similarity networks. Identical to 
26 | makeNets provided to buildPredictor()}
27 | 
28 | \item{selectedFeatures}{(list) selected features for each class (key of list). This object is returned as
29 | part of a call to getResults(), after running buildPredictor().}
30 | 
31 | \item{plotCytoscape}{(logical) If TRUE, plots network in Cytoscape.
32 | Requires Cytoscape software to be installed and running on the computer
33 | when the function call is being made.}
34 | 
35 | \item{aggFun}{(char) function to aggregate edges from different PSN (e.g. mean)}
36 | 
37 | \item{prune_pctX}{(numeric between 0 and 1) fraction of most/least 
38 | edges to keep when pruning the integrated PSN for visualization.
39 | Must be used in conjunction with useTop=TRUE/FALSE
40 | e.g. Setting pctX=0.2 and useTop=TRUE will keep 20\% top edges}
41 | 
42 | \item{prune_useTop}{(logical) when pruning integrated PSN for visualization,
43 | determines whether to keep strongest edges (useTop=TRUE) or weakest edges
44 | (useTop=FALSE)}
45 | 
46 | \item{numCores}{(integer) number of cores for parallel processing}
47 | 
48 | \item{calcShortestPath}{(logical) if TRUE, computes weighted shortest path
49 | Unless you plan to analyse these separately from looking at the shortest 
50 | path violin plots or integrated PSN in Cytoscape, probably good to set to 
51 | FALSE.}
52 | }
53 | \value{
54 | (list) information about the integrated network
55 | similarity network
56 | 2) patientDistNetwork_pruned (matrix) the network plotted in
57 | Cytoscape. Also note that this is a dissimilarity network, 
58 | so that more similar nodes have smaller edge weights
59 | 3) colLegend (data.frame): legend for the patient network
60 | plotted in Cytoscape. Columns are node labels (STATUS) and
61 | colours (colour)
62 | 6) outDir (char) value of outDir parameter
63 | }
64 | \description{
65 | get the integrated patient similarity network made of selected features
66 | }
67 | \details{
68 | An integrated patient similarity network can be built using combined
69 | top features for each patient class. Such a network is created by taking the union of selected features for
70 | all patient labels, and aggregating pairwise edges for all of them using a user-specified function (aggFun).
71 | The network is then pruned prior to visualization, using a user-specified fraction of strongest edges
72 | (prune_pctX, prune_useTop). In addition, the user may quantify the distance between patients of the 
73 | same class, relative to those of other classes, using Dijkstra distance (calcShortestPath flag).
74 | }
75 | 


--------------------------------------------------------------------------------
/man/getPatientPredictions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getPatientPredictions.R
 3 | \name{getPatientPredictions}
 4 | \alias{getPatientPredictions}
 5 | \title{Calculates patient-level classification accuracy across train/test splits}
 6 | \usage{
 7 | getPatientPredictions(predFiles, pheno, plotAccuracy = FALSE)
 8 | }
 9 | \arguments{
10 | \item{predFiles}{(char) vector of paths to all test predictions
11 | (e.g. 100 files for a 100 train/test split design).
12 | Alternately, the user can also  provide a single directory name, and allow
13 | the script to retrieve prediction files.
14 | Format is 'rootDir/rngX/predictionResults.txt'}
15 | 
16 | \item{pheno}{(data.frame) ID=patient ID, STATUS=ground truth (known class
17 | label). This table is required to get the master list of all patients, as
18 | not every patient is classified in every split.}
19 | 
20 | \item{plotAccuracy}{(logical) if TRUE, shows fraction of times
21 | patient is misclassified, using a dot plot}
22 | }
23 | \value{
24 | (list) of length 2.
25 | 1) (data.frame) rows are patients, (length(predFiles)+2) columns.
26 | Columns seq_len(length(predFiles)): Predicted labels for a given split (NA 
27 | if patient was training sample for the split).
28 | Column (length(predFiles)+1):
29 | split, value is NA. Columns are : ID, REAL_STATUS, predStatus1,...
30 | predStatusN.
31 | Side effect of plotting a dot plot of % accuracy. Each dot is a patient, 
32 | and the value is '% splits for which patient was classified correctly'.
33 | }
34 | \description{
35 | Calculates patient-level classification accuracy across train/test splits
36 | }
37 | \details{
38 | Takes all the predictions across the different train/test splits,
39 | and for each patient, generates a score indicating how many times they were
40 | classified by netDx as belonging to each of the classes. The result is that
41 | we get a measure of individual classification accuracy across the different
42 | train/test splits.
43 | }
44 | \examples{
45 | inDir <- system.file("extdata","example_output",package="netDx")
46 | data(pheno)
47 | all_rngs <- list.dirs(inDir, recursive = FALSE)
48 | all_pred_files <- unlist(lapply(all_rngs, function(x) {
49 |     paste(x, 'predictionResults.txt', 
50 | 	sep = getFileSep())}))
51 | pred_mat <- getPatientPredictions(all_pred_files, pheno)
52 | }
53 | 


--------------------------------------------------------------------------------
/man/getPatientRankings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getPatientRankings.R
 3 | \name{getPatientRankings}
 4 | \alias{getPatientRankings}
 5 | \title{Process GM PRANK files to get the ROC curve for the query}
 6 | \usage{
 7 | getPatientRankings(pFile, pheno_DF, predClass, plotIt = FALSE, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{pFile}{(char) path to PRANK file}
11 | 
12 | \item{pheno_DF}{(data.frame) patient IDs ('ID') and label('STATUS')}
13 | 
14 | \item{predClass}{(character) class label for which predictor is built}
15 | 
16 | \item{plotIt}{(logical) if TRUE plots ROC curve}
17 | 
18 | \item{verbose}{(logical) print messages}
19 | }
20 | \value{
21 | (list) 
22 | 1) predLbl: GeneMANIA scores (predicted labels). Higher score for
23 | higher ranked patient. 
24 | 2) realLbl: binary value indicating if patient label matches predictor
25 | label (real labels)
26 | 3) fullmat: pheno_DF merged with similarity scores ('similarityScore') 
27 | and real label ('isPredClass')
28 | 4) roc: output of ROCRs performance(,'tpr','fpr') - ROC curve
29 | 5) auc: output of ROCRs auc() 
30 | 6) precall: output of ROCRs performance(, 'prec','rec')
31 | 7) f: output of ROCRs performance(,'f')
32 | If < 2 patients in PRANK file, roc,auc, precall, f are all returned as
33 | NA.
34 | }
35 | \description{
36 | Process GM PRANK files to get the ROC curve for the query
37 | }
38 | \examples{
39 | data(pheno)
40 | prankFile <- system.file("extdata",
41 | paste("GM_PRANK","CV_1.query-results.report.txt.PRANK",sep=getFileSep()),
42 | 	package="netDx")
43 | x <- getPatientRankings(prankFile, pheno, 'LumA')
44 | }
45 | 


--------------------------------------------------------------------------------
/man/getPerformance.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotPerf_simple.R
 3 | \name{getPerformance}
 4 | \alias{getPerformance}
 5 | \title{performance metrics for model}
 6 | \usage{
 7 | getPerformance(res, predClasses)
 8 | }
 9 | \arguments{
10 | \item{res}{(data.frame) result from predicting labels on held-out test set. output of predict() function. 
11 | columns include ID, STATUS (ground truth) and PRED_CLASS (predicted label)}
12 | 
13 | \item{predClasses}{(character) patient labels used by classifier}
14 | }
15 | \value{
16 | (list)
17 | 1) rocCurve: ROCR performance object for ROC curve
18 | 2) prCurve: ROCR performance object for PR curve
19 | 3) auroc: Area under ROC curve
20 | 4) aupr: Area under PR curve
21 | 5) accuracy: Accuracy
22 | }
23 | \description{
24 | performance metrics for model
25 | }
26 | 


--------------------------------------------------------------------------------
/man/getRegionOL.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getRegionOL.R
 3 | \name{getRegionOL}
 4 | \alias{getRegionOL}
 5 | \title{Returns overlapping named ranges for input ranges}
 6 | \usage{
 7 | getRegionOL(gr, rngList)
 8 | }
 9 | \arguments{
10 | \item{gr}{(GRanges) query ranges}
11 | 
12 | \item{rngList}{(list) keys are names, and values are GRanges, each range
13 | of which has a name (in 'name' column). Note: It is faster to provide
14 | a list of length 1 ; if the list is long, combining into a single GRanges
15 | object could prove slow.}
16 | }
17 | \value{
18 | (GRanges) query ranges with the added column 'LOCUS_NAMES'. 
19 | Where a range overlaps with multiple loci, the names are reported as a 
20 | comma-separated vector
21 | }
22 | \description{
23 | Returns overlapping named ranges for input ranges
24 | }
25 | \details{
26 | Given a set of query GRanges, and a subject list-of-GRanges,
27 | updates the query with a column 'LOCUS_NAMES' containing the names of
28 | ranges overlapped by the query. One application is to map structural
29 | variants, such as CNVs, to genes in pathways of interest. In this 
30 | scenario \code{gr} would contain the patient CNVs, and \code{rngList}
31 | would be a list of GenomicRanges objects, one per cellular pathway.
32 | }
33 | \examples{
34 | data(cnv_GR,pathway_GR)
35 | x <- getRegionOL(cnv_GR,pathway_GR)
36 | }
37 | 


--------------------------------------------------------------------------------
/man/getResults.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper.R
 3 | \name{getResults}
 4 | \alias{getResults}
 5 | \title{Compiles performance and selected features for a trained model.}
 6 | \usage{
 7 | getResults(res, status, featureSelCutoff = 1L, featureSelPct = 0)
 8 | }
 9 | \arguments{
10 | \item{res}{(list) output of buildPredictor() function}
11 | 
12 | \item{status}{(character) unique patient labels used by the classifier, found in colData()$STATUS}
13 | 
14 | \item{featureSelCutoff}{(integer) cutoff score for feature selection.
15 | A feature must have minimum of this score for specified fraction of splits 
16 | (see featureSelPct) to pass.}
17 | 
18 | \item{featureSelPct}{(numeric between 0 and 1) cutoff percent for feature selection.
19 | A feature must have minimum score of featureSelCutoff for featureSelPct of 
20 | train/test splits to pass.}
21 | }
22 | \value{
23 | list of results.
24 | - selectedFeatures (list of character vectors): list, one per class
25 | - performance (list of mixed datatypes) including mean accuracy (meanAccuracy), 
26 | split-level accuracy (splitAccuracy), split-level AUROC (auroc),
27 | split-level AUPR (splitAUR)
28 | Side effect of plotting ROC curve if binary classifier
29 | }
30 | \description{
31 | Compiles performance and selected features for a trained model.
32 | }
33 | \details{
34 | This function is run after training a model using buildPredictor(). 
35 | It takes patient input data, model output, and returns performance and selected features.
36 | }
37 | \examples{
38 | data(toymodel) # load example results from binary breast classification
39 | patlabels <- names(toymodel$Split1$featureSelected)
40 | getResults(toymodel,patlabels,2,0.5)
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/man/getSimilarity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getSimilarity.R
 3 | \name{getSimilarity}
 4 | \alias{getSimilarity}
 5 | \title{Measures of patient similarity}
 6 | \usage{
 7 | getSimilarity(x, type = "pearson", customFunc, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{(matrix) matrix for which pairwise patient similarity is to be
11 | computed. Expects one column per patient, and one measurement per row.}
12 | 
13 | \item{type}{(character) name of similarity measure. Currently supports 
14 | Pearson correlation ('pearson') or a custom measure ('custom')}
15 | 
16 | \item{customFunc}{(function) custom similarity function. Only used when 
17 | \code{type='custom'}. The function takes \code{x} as first argument and 
18 | can take additional argument. It should return a symmetric matrix of 
19 | pairwise patient similarities.}
20 | 
21 | \item{...}{parameter for customFunc}
22 | }
23 | \value{
24 | symmetric matrix of size N, where N is number of samples
25 | }
26 | \description{
27 | Measures of patient similarity
28 | }
29 | \examples{
30 | data(xpr) 
31 | x <- getSimilarity(xpr) # similarity by Pearson corr
32 | mySim <- function(x) cor(x,method='kendall')
33 | x <- getSimilarity(xpr,customFunc=mySim) # custom similarity
34 | }
35 | 


--------------------------------------------------------------------------------
/man/makeInputForEnrichmentMap.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper.R
 3 | \name{makeInputForEnrichmentMap}
 4 | \alias{makeInputForEnrichmentMap}
 5 | \title{Wrapper to create input files for Enrichment Map}
 6 | \usage{
 7 | makeInputForEnrichmentMap(
 8 |   model,
 9 |   results,
10 |   pathwayList,
11 |   EMapMinScore = 0L,
12 |   EMapMaxScore = 1L,
13 |   EMapPctPass = 0.5,
14 |   outDir
15 | )
16 | }
17 | \arguments{
18 | \item{model}{(list) Output of training model, generated by running buildPredictor()}
19 | 
20 | \item{results}{(list) Model results. output of getResults()}
21 | 
22 | \item{pathwayList}{(list) output of readPathwayFile() used to make pathway-level feat ures for predictor}
23 | 
24 | \item{EMapMinScore}{(integer) minimum score for Enrichment Map}
25 | 
26 | \item{EMapMaxScore}{(integer) maximum score for Enrichment Map}
27 | 
28 | \item{EMapPctPass}{(numeric between 0 and 1) percent of splits for which feature must have score in range
29 | [EMapMinScore,EMapMaxScore] to be included for EnrichmentMap visualization}
30 | 
31 | \item{outDir}{(char) directory where files should be written}
32 | }
33 | \value{
34 | 
35 | }
36 | \description{
37 | Wrapper to create input files for Enrichment Map
38 | }
39 | \details{
40 | An Enrichment Map is a network-based visualization of top-scoring pathway features
41 | and themes. It is generated in Cytoscape. This script generates the input files needed
42 | for Cytoscape to create an Enrichment Map visualization.
43 | }
44 | 


--------------------------------------------------------------------------------
/man/makePSN_RangeSets.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/makePSN_RangeSets.R
 3 | \name{makePSN_RangeSets}
 4 | \alias{makePSN_RangeSets}
 5 | \title{Create patient similarity interaction networks based on range sets}
 6 | \usage{
 7 | makePSN_RangeSets(
 8 |   gr,
 9 |   rangeSet,
10 |   netDir = tempdir(),
11 |   simMetric = "coincide",
12 |   quorum = 2L,
13 |   verbose = TRUE,
14 |   numCores = 1L
15 | )
16 | }
17 | \arguments{
18 | \item{gr}{(GRanges) patient ranges. Metadata should contain:
19 | ID: (char) unique patient ID
20 | LOCUS_NAME: (comma-separated char) named ranges overlapped}
21 | 
22 | \item{rangeSet}{(list) list of GRanges, one entry per range set.
23 | Key is the name of the range set, and value is a GRanges object with
24 | corresponding ranges}
25 | 
26 | \item{netDir}{(char) path to directory where networks should be written}
27 | 
28 | \item{simMetric}{(char) Similarity metric. Currently only 'coincide' 
29 | is supported; two patients share an edge if they overlap elements in the
30 | the same gene set. E.g. Two patients with CNVs that overlap different
31 | genes of the same pathway would be related, but patients overlapping
32 | genes that don't share a pathway (or, more accurately, a named-set 
33 | grouping) would not be related. The edge weight is therefore binary.}
34 | 
35 | \item{quorum}{(integer) minimum number of patients in a network for the 
36 | network to be constructed}
37 | 
38 | \item{verbose}{(logical) print detailed messages}
39 | 
40 | \item{numCores}{(integer) num cores for parallel processing}
41 | }
42 | \value{
43 | Vector of network filenames
44 | }
45 | \description{
46 | Create patient similarity interaction networks based on range sets
47 | }
48 | \details{
49 | Creates patient similarity networks when data consist of 
50 | genomic events associated with patients. Examples include CNV or 
51 | indel data for patients. To generate networks from full matrices such
52 | gene expression data, use \code{makePSN_NamedMatrix} instead.
53 | Genomic ranges corresponding to events in patients (gr) should be named.
54 | One network is created per named range set (rangeSet). Each set
55 | reflects a group of related loci ; for example, genomic ranges associated
56 | with genes in the same cellular pathway. 
57 | Currently, the only similarity measure supported is binary; two patients
58 | are related in a network N if they both overlap elements of set N.
59 | }
60 | \examples{
61 | data(pathway_GR,cnv_GR)
62 | ### # example commented out to avoid build errors because of parallel
63 | ### # execution. Uncomment to run.
64 | ### netList <- makePSN_RangeSets(cnv_GR,pathway_GR,'.')
65 | }
66 | 


--------------------------------------------------------------------------------
/man/makeQueries.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/makeQueries.R
 3 | \name{makeQueries}
 4 | \alias{makeQueries}
 5 | \title{Randomly select patients for queries for feature selection}
 6 | \usage{
 7 | makeQueries(incPat, featScoreMax = 10L, verbose = TRUE)
 8 | }
 9 | \arguments{
10 | \item{incPat}{(char) vector of patient IDs to be included in query}
11 | 
12 | \item{featScoreMax}{(integer) Number of times to run query, usually equal 
13 | to the max score for features in the design (e.g. if featScoreMax=10, then 
14 | this value is 10).}
15 | 
16 | \item{verbose}{(logical) print messages}
17 | }
18 | \value{
19 | (list) of length \code{featScoreMax}, containing names of patients 
20 | in query file for each fold
21 | }
22 | \description{
23 | Randomly select patients for queries for feature selection
24 | }
25 | \examples{
26 | data(pheno)
27 | x <- makeQueries(pheno$ID)
28 | }
29 | 


--------------------------------------------------------------------------------
/man/makeSymmetric.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/makeSymmetric.R
 3 | \name{makeSymmetric}
 4 | \alias{makeSymmetric}
 5 | \title{Convert a network in source-target-weight format to symmetric matrix}
 6 | \usage{
 7 | makeSymmetric(x, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{x}{(data.frame) three columns, with source node, target node, and 
11 | edge weight. Entries must include universe of nodes; those with missing
12 | edges must be included as having edge weight NA}
13 | 
14 | \item{verbose}{(logical) print messages}
15 | }
16 | \value{
17 | (matrix) symmetric adjacency matrix
18 | }
19 | \description{
20 | Convert a network in source-target-weight format to symmetric matrix
21 | }
22 | \details{
23 | A common format for network representation is to use a three
24 | column table listing source node, target node, and weight.  
25 | This is the format netDx uses for network integration and visualization
26 | in Cytoscape. However, some functionality requires a square symmetric
27 | adjacency matrix. This function takes as input the three-column format
28 | and converts to the adjacency matrix. 
29 | NOTE: Symmetric attribute is assumed, and the function automatically sets
30 | a[i,j] = a[j,i]. Diagonal is assumed to have value of 1.0. Finally
31 | missing edges will be assigned NA values.
32 | }
33 | \examples{
34 | src <- c("A","B"); tgt <- c("C","C")
35 | cur <- data.frame(source=src,target=tgt,weight=c(0.3,0.8))
36 | makeSymmetric(cur)
37 | }
38 | 


--------------------------------------------------------------------------------
/man/mapNamedRangesToSets.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mapNamedRangesToSets.R
 3 | \name{mapNamedRangesToSets}
 4 | \alias{mapNamedRangesToSets}
 5 | \title{Map named ranges to corresponding set of named ranges}
 6 | \usage{
 7 | mapNamedRangesToSets(gr, rangeList, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{gr}{(GRanges) named ranged to be grouped}
11 | 
12 | \item{rangeList}{(list) sets of range names}
13 | 
14 | \item{verbose}{(logical) print detailed messages}
15 | }
16 | \value{
17 | RangeList. keys are names of \code{rangeList}, values are GRanges
18 | }
19 | \description{
20 | Map named ranges to corresponding set of named ranges
21 | }
22 | \details{
23 | Example application is when we have named ranges each
24 | corresponding to genes or regulatory elements, and we wish to group
25 | these ranges based on metabolic pathway.
26 | }
27 | \examples{
28 | data(genes,pathwayList); 
29 | gene_GR<-GenomicRanges::GRanges(genes$chrom,
30 |   IRanges::IRanges(genes$txStart,genes$txEnd),
31 | 		name=genes$name2)
32 | path_GRList <- mapNamedRangesToSets(gene_GR,pathwayList)
33 | }
34 | 


--------------------------------------------------------------------------------
/man/matrix_getIJ.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/matrix_getIJ.R
 3 | \name{matrix_getIJ}
 4 | \alias{matrix_getIJ}
 5 | \title{Converts matrix index (1 to m*n) to row (m) and column (n) number}
 6 | \usage{
 7 | matrix_getIJ(dimMat, idx)
 8 | }
 9 | \arguments{
10 | \item{dimMat}{(integer vector of length 2) output of \code{dim()} for
11 | matrix in question}
12 | 
13 | \item{idx}{(integer vector of length n) matrix indices}
14 | }
15 | \value{
16 | (matrix) n-by-2, first column has row indices ; second column
17 | has col indices
18 | }
19 | \description{
20 | Converts matrix index (1 to m*n) to row (m) and column (n) number
21 | }
22 | 


--------------------------------------------------------------------------------
/man/modelres.Rd:
--------------------------------------------------------------------------------
 1 | \name{modelres}
 2 | \alias{modelres}
 3 | \docType{data}
 4 | \title{
 5 | 	Sample output of getResults()
 6 | }
 7 | \description{
 8 | Output of getResults() generated by running toymodel.
 9 | toymodel is itself the output of buildPredictor() run on a simple dataset for binary breast tumour classification using two genomic data sources.
10 | BRCA data were downloaded using curatedTCGAData for mRNA and miRNA expression. buildPredictor()] was run by scoring features out of 2, with selected features passing 1 out of 2.
11 | Tumours were labelled either "Luminal.A" or "other".
12 | See details of getResults() for output format.
13 | }
14 | \usage{data(modelres)}
15 | \examples{
16 | data(modelres)
17 | head(modelres)
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/moveInteractionNets.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/moveInteractionNets.R
 3 | \name{moveInteractionNets}
 4 | \alias{moveInteractionNets}
 5 | \title{moves interaction networks when compiling database for sparse genetic
 6 | workflow}
 7 | \usage{
 8 | moveInteractionNets(netDir, outDir, pheno, fileSfx = "_cont.txt")
 9 | }
10 | \arguments{
11 | \item{netDir}{(char) source directory}
12 | 
13 | \item{outDir}{(char) target directory}
14 | 
15 | \item{pheno}{(data.frame) contains patient ID and STATUS}
16 | 
17 | \item{fileSfx}{(char) suffix to strip from network file names before
18 | registering in metadata tables}
19 | }
20 | \value{
21 | No value. Side effect of moving interaction nets to target
22 | directory and creating network-related metadata files used to compile
23 | feature database
24 | }
25 | \description{
26 | moves interaction networks when compiling database for sparse genetic
27 | workflow
28 | }
29 | 


--------------------------------------------------------------------------------
/man/normDiff.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/similarities.R
 3 | \name{normDiff}
 4 | \alias{normDiff}
 5 | \title{Similarity metric of normalized difference}
 6 | \usage{
 7 | normDiff(x)
 8 | }
 9 | \arguments{
10 | \item{x}{(numeric) vector of values, one per patient (e.g. ages)}
11 | }
12 | \value{
13 | symmetric matrix of size ncol(dat) (number of patients) containing
14 | pairwise patient similarities
15 | }
16 | \description{
17 | Similarity metric of normalized difference
18 | }
19 | \details{
20 | Similarity metric used when data for a network consists of
21 | exactly 1 continuous variable  (e.g. a network based only on 'age'). 
22 | When number of variables is 2-5, use avgNormDiff() which 
23 | takes the average of normalized difference for individual variables
24 | }
25 | \examples{
26 | sim <- normDiff(rnorm(10))
27 | }
28 | 


--------------------------------------------------------------------------------
/man/npheno.Rd:
--------------------------------------------------------------------------------
 1 | \name{npheno}
 2 | \alias{npheno}
 3 | \docType{data}
 4 | \title{
 5 | 	Toy sample metadata table
 6 | }
 7 | \description{
 8 | data.frame with patient ID ("ID") and label ("STATUS"). 100 "cases" and 100 "controls"
 9 | }
10 | \usage{data(npheno)}
11 | \examples{
12 | data(npheno)
13 | head(npheno)
14 | }
15 | \keyword{datasets}
16 | 


--------------------------------------------------------------------------------
/man/pathwayList.Rd:
--------------------------------------------------------------------------------
 1 | \name{pathwayList}
 2 | \alias{pathwayList}
 3 | \docType{data}
 4 | \title{
 5 | 	Sample list of pathways
 6 | }
 7 | \description{
 8 | List where keys are pathway names and values are character vectors comprising of member genes for corresponding pathways
 9 | }
10 | \usage{data(pathwayList)}
11 | \examples{
12 | data(pathwayList)
13 | head(pathwayList)
14 | }
15 | \keyword{datasets}
16 | 


--------------------------------------------------------------------------------
/man/pathway_GR.Rd:
--------------------------------------------------------------------------------
 1 | \name{pathway_GR}
 2 | \alias{pathway_GR}
 3 | \docType{data}
 4 | \title{
 5 | 	List of genomic ranges mapped to pathways
 6 | }
 7 | \description{
 8 | List object. Keys are pathway names, values are GRanges objects with coordinates of corresponding genes. Small subset of pathways sufficient for package examples.
 9 | }
10 | \usage{data(pathway_GR)}
11 | \examples{
12 | data(pathway_GR)
13 | head(pathway_GR)
14 | }
15 | \keyword{datasets}
16 | 


--------------------------------------------------------------------------------
/man/perfCalc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/perfCalc.R
 3 | \name{perfCalc}
 4 | \alias{perfCalc}
 5 | \title{Computes variety of predictor evaluation measures based on the confusion
 6 | matrix}
 7 | \usage{
 8 | perfCalc(dat)
 9 | }
10 | \arguments{
11 | \item{dat}{(data.frame): 5 columns: score, tp, fp, tn, fn. 
12 | One row per cutoff
13 | score for feature selection}
14 | }
15 | \value{
16 | (list)
17 | stats (data.frame): score, f1, ppv, precision and recall. One row
18 | per cutoff for feature selection
19 | auc (numeric between 0 and 1): AUC of overall ROC curve
20 | prauc (numeric between 0 and 1): AUC of overall precision-recall curve
21 | }
22 | \description{
23 | Computes variety of predictor evaluation measures based on the confusion
24 | matrix
25 | }
26 | \examples{
27 | data(confmat)
28 | x <- perfCalc(confmat)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/pheno.Rd:
--------------------------------------------------------------------------------
 1 | \name{pheno}
 2 | \alias{pheno}
 3 | \docType{data}
 4 | \title{
 5 | Sample metadata table
 6 | }
 7 | \description{
 8 | data.frame with patient ID (ID), sample type (Type), tumour subtype (STATUS).
 9 | From TCGA 2012 breast cancer paper (see reference).
10 | }
11 | \usage{data(pheno)}
12 | \source{
13 | The Cancer Genome Atlas. (2012). Nature 490:61-70.
14 | }
15 | \references{
16 | The Cancer Genome Atlas. (2012). Nature 490:61-70.
17 | }
18 | \examples{
19 | data(pheno)
20 | head(pheno)
21 | }
22 | \keyword{datasets}
23 | 


--------------------------------------------------------------------------------
/man/pheno_full.Rd:
--------------------------------------------------------------------------------
 1 | \name{pheno_full}
 2 | \alias{pheno_full}
 3 | \docType{data}
 4 | \title{
 5 | Subsample of TCGA breast cancer data used for netDx function examples
 6 | }
 7 | \description{
 8 | Patient ID and tumour status in "pheno", subsample of gene expression in "xpr" and CNV data in "cnv_GR"
 9 | }
10 | \usage{data(pheno_full)}
11 | \source{
12 | The Cancer Genome Atlas. (2012). Nature 490:61-70.
13 | }
14 | \references{
15 | The Cancer Genome Atlas. (2012). Nature 490:61-70.
16 | }
17 | \examples{
18 | data(pheno_full)
19 | head(pheno_full)
20 | }
21 | \keyword{datasets}
22 | 


--------------------------------------------------------------------------------
/man/plotPerf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotPerf.R
 3 | \name{plotPerf}
 4 | \alias{plotPerf}
 5 | \title{Plots various measures of predictor performance for binary classifiers}
 6 | \usage{
 7 | plotPerf(resList = NULL, inFiles, predClasses, plotSEM = FALSE)
 8 | }
 9 | \arguments{
10 | \item{resList}{(list) list of prediction results. If provided, the method
11 | will ignore inDir}
12 | 
13 | \item{inFiles}{(char) path to predictionResults.txt files.
14 | A vector, each with absolute paths to predictionResults.txt}
15 | 
16 | \item{predClasses}{(char) vector of class names.}
17 | 
18 | \item{plotSEM}{(logical) metric for error bars. If set to TRUE, plots SEM;
19 | else plots SD.}
20 | }
21 | \value{
22 | (list) each key corresponds to an input file in inDir.
23 | Value is a list with:
24 | 1) stats: 'stats' component of perfCalc
25 | 2) rocCurve: ROCR performance object for ROC curve
26 | 3) prCurve: ROCR performance object for PR curve
27 | 4) auroc: Area under ROC curve
28 | 5) aupr: Area under PR curve
29 | 6) accuracy: Accuracy
30 | 
31 | Side effect of plotting in a 2x2 format:
32 | 1) mean+/-SEM or (mean+/-SD) AUROC
33 | 2) mean+/-SEM or (mean+/-SD) AUPR
34 | 3) ROC curve for all runs plus average
35 | 4) PR curve for all runs plus average
36 | }
37 | \description{
38 | Plots various measures of predictor performance for binary classifiers
39 | }
40 | \details{
41 | Plots individual and average ROC/PR curves. mean+/-SEM performance 
42 | for a predictor run using nested
43 | cross-validation or a similar repeated design.
44 | predictionResults.txt contains a (data.frame)
45 | }
46 | \examples{
47 | inDir <- system.file("extdata","example_output",package='netDx')
48 | inFiles <- paste(rep(inDir,3), sprintf("rng\%i",seq_len(3)),"predictionResults.txt",
49 |  sep=getFileSep())
50 | resList <- list()
51 | for (k in seq_len(length(inFiles))) {
52 | 	resList[[k]] <- read.delim(inFiles[k],sep="\t",header=TRUE,as.is=TRUE)
53 | }
54 | plotPerf(resList, predClasses = c('LumA','notLumA'))
55 | }
56 | 


--------------------------------------------------------------------------------
/man/plotPerf_multi.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotPerf_multi.R
 3 | \name{plotPerf_multi}
 4 | \alias{plotPerf_multi}
 5 | \title{Plots a set of ROC/PR curves with average.}
 6 | \usage{
 7 | plotPerf_multi(
 8 |   inList,
 9 |   plotTitle = "performance",
10 |   plotType = "ROC",
11 |   xlab = "TPR",
12 |   ylab = "FPR",
13 |   meanCol = "darkblue",
14 |   xlim = c(0, 1),
15 |   ylim = c(0, 1)
16 | )
17 | }
18 | \arguments{
19 | \item{inList}{(list or ROCR::performance object) ROCR::performance objects, one per iteration}
20 | 
21 | \item{plotTitle}{(numeric) plot title}
22 | 
23 | \item{plotType}{(char) one of ROC | PR | custom. Affects x/y labels}
24 | 
25 | \item{xlab}{(char) x-axis label}
26 | 
27 | \item{ylab}{(char) y-axis label}
28 | 
29 | \item{meanCol}{(char) colour for mean trendline}
30 | 
31 | \item{xlim}{(numeric) min/max extent for x-axis}
32 | 
33 | \item{ylim}{(numeric) min/max extent for y-axis}
34 | }
35 | \value{
36 | No value. Side effect of plotting ROC and PR curves
37 | }
38 | \description{
39 | Plots a set of ROC/PR curves with average.
40 | }
41 | \details{
42 | Plots average curves with individual curves imposed.
43 | }
44 | \examples{
45 | inDir <- system.file("extdata","example_output",package="netDx")
46 | all_rng <- list.files(path = inDir, pattern = 'rng.')
47 | fList <- paste(inDir,all_rng,'predictionResults.txt',sep=getFileSep())
48 | rocList <- list()
49 | for (k in seq_len(length(fList))) {
50 |   dat <- read.delim(fList[1],sep='\t',header=TRUE,as.is=TRUE)
51 |   predClasses <- c('LumA', 'notLumA')
52 |   pred_col1 <- sprintf('\%s_SCORE',predClasses[1])
53 |   pred_col2 <- sprintf('\%s_SCORE',predClasses[2])
54 |   idx1 <- which(colnames(dat) == pred_col1)
55 |   idx2 <- which(colnames(dat) == pred_col2)
56 |  pred <- ROCR::prediction(dat[,idx1]-dat[,idx2], 
57 | 		dat$STATUS==predClasses[1])
58 |  rocList[[k]] <- ROCR::performance(pred,'tpr','fpr')
59 | }
60 | plotPerf_multi(rocList,'ROC')
61 | }
62 | 


--------------------------------------------------------------------------------
/man/predRes.Rd:
--------------------------------------------------------------------------------
 1 | \name{predRes}
 2 | \alias{predRes}
 3 | \docType{data}
 4 | \title{
 5 | 	Example output of getPatientRankings, used to call labels for test patients.
 6 | }
 7 | \description{
 8 | List of lists. First level is a list of size 4, with one key entry for each tumour type in example medulloblastoma dataset (WNT,SHH,Group3,Group4). Each list in the second level is of length 8, with structure corresponding to the output of getPatientRankings().
 9 | }
10 | \usage{data(predRes)}
11 | \examples{
12 | data(predRes)
13 | summary(predRes)
14 | summary(predRes[[1]])
15 | }
16 | \keyword{datasets}
17 | 


--------------------------------------------------------------------------------
/man/predict.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.R
 3 | \name{predict}
 4 | \alias{predict}
 5 | \title{predict patient labels}
 6 | \usage{
 7 | predict(
 8 |   trainMAE,
 9 |   testMAE,
10 |   groupList,
11 |   featSel,
12 |   makeNetFunc,
13 |   outDir,
14 |   impute = FALSE,
15 |   verbose = FALSE,
16 |   numCores = 1L,
17 |   JavaMemory = 4L,
18 |   debugMode = FALSE
19 | )
20 | }
21 | \arguments{
22 | \item{trainMAE}{(MultiAssayExperiment) patient data for training samples. Same as provided to buildPredictor()}
23 | 
24 | \item{testMAE}{(MultiAssayExperiment) new patient dataset for testing model. Assays must be the same as for trainMAE.}
25 | 
26 | \item{groupList}{(list) list of features used to train the model. Keys are data types, and values are lists for groupings within those datatypes.
27 | e.g. keys could include {'clinical','rna','methylation'}, and values within 'rna' could include pathway names {'cell cycle', 'DNA repair'}, etc.,
28 | featSel will be used to subset}
29 | 
30 | \item{featSel}{(list) selected features to be used in the predictive model. 
31 | keys are patient labels (e.g. "responder/nonresponder"), and values are feature names 
32 | identified by running buildPredictor(). Feature names must correspond to names of groupList, from which they will be subset.}
33 | 
34 | \item{makeNetFunc}{(function) function to create PSN features from patient data. See makeNetFunc in buildPredictor() for details}
35 | 
36 | \item{outDir}{(char) directory for results}
37 | 
38 | \item{impute}{(logical) if TRUE imputes train and test samples separately before creating features. Currently unsupported.}
39 | 
40 | \item{verbose}{(logical) print messages}
41 | 
42 | \item{numCores}{(integer) number of CPU cores for parallel processing}
43 | 
44 | \item{JavaMemory}{(integer) memory in (Gb) used for each fold of CV}
45 | 
46 | \item{debugMode}{(logical) Set to TRUE for detailed messages. Used for debugging.}
47 | }
48 | \value{
49 | (data.frame) predicted patient similarities and labels
50 | columns are:  1) ID, 2) STATUS (ground truth), 3) <label>_SCORE: similarity score for the corresponding label,
51 | 4) PRED_CLASS: predicted class
52 | }
53 | \description{
54 | Once a model is trained, this function is used to classify new patients using selected features
55 | }
56 | 


--------------------------------------------------------------------------------
/man/predictPatientLabels.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predictPatientLabels.R
 3 | \name{predictPatientLabels}
 4 | \alias{predictPatientLabels}
 5 | \title{assign patient class when ranked by multiple GM predictors}
 6 | \usage{
 7 | predictPatientLabels(resSet, verbose = TRUE)
 8 | }
 9 | \arguments{
10 | \item{resSet}{(list) output of getPatientRankings, each key for a different
11 | predictor. names(resSet) contain predictor label}
12 | 
13 | \item{verbose}{(logical) print detailed messages}
14 | }
15 | \value{
16 | data.frame: ID, similarityScore, PRED_CLASS
17 | }
18 | \description{
19 | assign patient class when ranked by multiple GM predictors
20 | }
21 | \examples{
22 | data(predRes); predClass <- predictPatientLabels(predRes)
23 | }
24 | 


--------------------------------------------------------------------------------
/man/pruneNet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pruneNetByStrongest.R
 3 | \name{pruneNet}
 4 | \alias{pruneNet}
 5 | \title{Prune network by retaining strongest edges}
 6 | \usage{
 7 | pruneNet(net, vertices, pctX = 0.1, useTop = TRUE)
 8 | }
 9 | \arguments{
10 | \item{net}{(data.frame)  Network to prune. Columns are: source,target,weight}
11 | 
12 | \item{vertices}{(char) node names. Should match those in net[,1:2]}
13 | 
14 | \item{pctX}{(numeric 0 to 1) Fraction of top/bottom edges to retain}
15 | 
16 | \item{useTop}{(logical) if TRUE prunes to top pctX edges; else
17 | prunes to bottom pctX edges}
18 | }
19 | \value{
20 | (data.frame) pruned network. Three columns: AliasA, AliasB, and 
21 | weight
22 | }
23 | \description{
24 | Prune network by retaining strongest edges
25 | }
26 | 


--------------------------------------------------------------------------------
/man/pruneNet_pctX.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pruneNet_pctX.R
 3 | \name{pruneNet_pctX}
 4 | \alias{pruneNet_pctX}
 5 | \title{Prune network by retaining strongest edges}
 6 | \usage{
 7 | pruneNet_pctX(net, vertices, pctX = 0.1, useTop = TRUE)
 8 | }
 9 | \arguments{
10 | \item{net}{(data.frame)  Network to prune. Columns are: source,target,weight}
11 | 
12 | \item{vertices}{(char) node names. Should match those in net[,1:2]}
13 | 
14 | \item{pctX}{(numeric 0 to 1) Fraction of top/bottom edges to retain}
15 | 
16 | \item{useTop}{(logical) if TRUE prunes to top pctX edges; else
17 | prunes to bottom pctX edges}
18 | }
19 | \value{
20 | (data.frame) pruned network. Three columns: AliasA, AliasB, and 
21 | weight
22 | }
23 | \description{
24 | Prune network by retaining strongest edges
25 | }
26 | 


--------------------------------------------------------------------------------
/man/pruneNets.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pruneNets.R
 3 | \name{pruneNets}
 4 | \alias{pruneNets}
 5 | \title{Prune interaction networks to keep only the networks and patients 
 6 | requested}
 7 | \usage{
 8 | pruneNets(
 9 |   oldDir,
10 |   newDir = tempdir(),
11 |   filterNets = "*",
12 |   filterIDs = "*",
13 |   netSfx = "_cont.txt$",
14 |   verbose = TRUE
15 | )
16 | }
17 | \arguments{
18 | \item{oldDir}{(char) path to directory with original networks}
19 | 
20 | \item{newDir}{(char) path to output directory for pruned networks}
21 | 
22 | \item{filterNets}{(char) vector of networks to include. These should 
23 | match filenames in \code{netDir}. Value of '*' results in pruning all
24 | networks}
25 | 
26 | \item{filterIDs}{(char) patients to include in pruned networks. These
27 | should match nodes in the input interaction networks}
28 | 
29 | \item{netSfx}{(char) suffix for network file names. Only used if 
30 | \code{filterNets='*'}.}
31 | 
32 | \item{verbose}{(logical) print messages}
33 | }
34 | \value{
35 | (no value). Side effect of writing pruned network files to 
36 | \code{newDir}
37 | }
38 | \description{
39 | Prune interaction networks to keep only the networks and patients 
40 | requested
41 | }
42 | \details{
43 | This function is crucial for patient data that is highly 
44 | sparse; examples include patient CNVs indels, as opposed to full matrix
45 | measures (gene expression, questionnaire data). Each step where the pool
46 | of patients is subset - e.g. limiting feature selection only to patients
47 | in training set - changes the set of networks that are eligible. 
48 | Some networks may only contain test patients, while others may contain
49 | a single edge between a training and a test patient. Upon subsetting,
50 | such networks are no longer eligible for downstream use, such as 
51 | feature selection. This function rewrites those subnetworks of the 
52 | original networks that consist of eligible patients.
53 | }
54 | \examples{
55 | data(npheno)
56 | netDir <- system.file("extdata","example_nets",package='netDx')
57 | pruneNets(netDir,tempdir(),filterIDs=npheno[seq_len(10),],
58 |  netSfx='txt$')
59 | }
60 | 


--------------------------------------------------------------------------------
/man/randAlphanumString.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{randAlphanumString}
 4 | \alias{randAlphanumString}
 5 | \title{Generate random alphanumerical string of length 10}
 6 | \usage{
 7 | randAlphanumString(numStrings = 1L)
 8 | }
 9 | \arguments{
10 | \item{numStrings}{(integer) number of strings to generate}
11 | }
12 | \value{
13 | vector of length n, each with 10-char alphanumerical strings
14 | }
15 | \description{
16 | Generate random alphanumerical string of length 10
17 | }
18 | \details{
19 | Used to create multiple temporary directories during an R session
20 | }
21 | \examples{
22 | randAlphanumString()
23 | }
24 | 


--------------------------------------------------------------------------------
/man/readPathways.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/readPathways.R
 3 | \name{readPathways}
 4 | \alias{readPathways}
 5 | \title{Parse GMT file and return pathways as list}
 6 | \usage{
 7 | readPathways(
 8 |   fname,
 9 |   MIN_SIZE = 10L,
10 |   MAX_SIZE = 200L,
11 |   EXCLUDE_KEGG = TRUE,
12 |   IDasName = FALSE,
13 |   verbose = TRUE,
14 |   getOrigNames = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{fname}{(char) path to pathway file in gmt format
19 | pathway score to include pathway in the filter list}
20 | 
21 | \item{MIN_SIZE}{(integer) min num genes allowed in a pathway. Pathways
22 | with fewer number of genes are excluded from the output list}
23 | 
24 | \item{MAX_SIZE}{(integer) max num genes allowed in a pathway. Pathways
25 | with gene counts greater than this are excluded from the output list}
26 | 
27 | \item{EXCLUDE_KEGG}{(boolean) If TRUE exclude KEGG pathways. Our
28 | experience has been that some KEGG gene sets are to broad to be 
29 | physiologically relevant}
30 | 
31 | \item{IDasName}{(boolean) Value for key in output list. 
32 | If TRUE, uses db name and ID as name (e.g.  KEGG:hsa04940)
33 | If FALSE, pathway name.}
34 | 
35 | \item{verbose}{(logical) print detailed messages}
36 | 
37 | \item{getOrigNames}{(logical) when TRUE also returns a mapping of the
38 | cleaned pathway names to the original names}
39 | }
40 | \value{
41 | Depends on value of getOrigNames. If FALSE (Default), list with
42 | pathway name as key, vector of genes as value. If TRUE, returns list of
43 | length two, (1) geneSets: pathway-gene mappings as default, 
44 | (2) pNames: data.frame with original and cleaned names.
45 | }
46 | \description{
47 | Parse GMT file and return pathways as list
48 | }
49 | \details{
50 | The GMT file format currently supported should match the ones
51 | found at http://downloads.baderlab.org. The original GMT file format is:
52 | <set name><set description><member 1><member 2>...<member N>, 
53 | one row per set, with values tab-delimited.
54 | The version at baderlab.org has additional unique formatting of the
55 | <set name> column as follows:
56 | <pathway_full_name>%<pathway_source>%<pathway_source_id>
57 | This function requires the specific formatting of the first column
58 | to assign the key name of the output list (see \code{useIDasName} 
59 | argument).
60 | }
61 | \examples{
62 | pathFile <- fetchPathwayDefinitions("October",2020)
63 | pathwayList    <- readPathways(pathFile)
64 | 
65 | }
66 | 


--------------------------------------------------------------------------------
/man/replacePattern.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compileFeatures.R
 3 | \name{replacePattern}
 4 | \alias{replacePattern}
 5 | \title{Replace pattern in all files in dir}
 6 | \usage{
 7 | replacePattern(pattern = ",", target = ".", path = getwd(), fileType = "txt$")
 8 | }
 9 | \arguments{
10 | \item{pattern}{(char) pattern to find}
11 | 
12 | \item{target}{(char) pattern to replace}
13 | 
14 | \item{path}{(char) dir to replace pattern in}
15 | 
16 | \item{fileType}{(char) pattern for files to replace pattern in}
17 | }
18 | \value{
19 | No value. Files have patterns replaced in place.
20 | }
21 | \description{
22 | find/replace pattern in all files of specified file type
23 | in specified directory. Needed to modify number format when intefacing
24 | with GeneMANIA, on  French locale machines. Without this step,
25 | CacheBuilder throws error with commas.
26 | }
27 | 


--------------------------------------------------------------------------------
/man/runFeatureSelection.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/runFeatureSelection.R
 3 | \name{runFeatureSelection}
 4 | \alias{runFeatureSelection}
 5 | \title{Run GeneMANIA cross-validation with a provided subset of networks}
 6 | \usage{
 7 | runFeatureSelection(
 8 |   trainID_pred,
 9 |   outDir,
10 |   dbPath,
11 |   numTrainSamps = NULL,
12 |   incNets = "all",
13 |   orgName = "predictor",
14 |   fileSfx = "CV",
15 |   verbose = FALSE,
16 |   numCores = 2L,
17 |   JavaMemory = 6L,
18 |   verbose_runQuery = FALSE,
19 |   debugMode = FALSE,
20 |   ...
21 | )
22 | }
23 | \arguments{
24 | \item{trainID_pred}{(char) vector with universe of predictor class
25 | patients (ie all that can possibly be included in the query file}
26 | 
27 | \item{outDir}{(char) directory to store query file and GM results}
28 | 
29 | \item{dbPath}{(char) path to GeneMANIA generic database with
30 | training population}
31 | 
32 | \item{numTrainSamps}{(integer) number of training samples in total
33 | leave blank to use 5 training samples in order to save memory}
34 | 
35 | \item{incNets}{(char) vector of networks to include in this analysis
36 | (features/pathway names). Useful for subset-based feature selection}
37 | 
38 | \item{orgName}{(char) organism name for GeneMANIA generic database.
39 | The default value will likely never need to be changed.}
40 | 
41 | \item{fileSfx}{(char) file suffix}
42 | 
43 | \item{verbose}{(logical) print messages}
44 | 
45 | \item{numCores}{(logical) num parallel threads for cross-validation}
46 | 
47 | \item{JavaMemory}{(integer) memory for GeneMANIA run, in Gb.}
48 | 
49 | \item{verbose_runQuery}{(logical) print messages for runQuery()}
50 | 
51 | \item{debugMode}{(logical) when TRUE runs jobs in serial instead of parallel and 
52 | prints verbose messages. Also prints system Java calls and prints all standard out
53 | and error output associated with these calls.}
54 | 
55 | \item{...}{args for \code{makeQueries()}}
56 | }
57 | \value{
58 | No value. Side effect of generating feature scores.
59 | }
60 | \description{
61 | Run GeneMANIA cross-validation with a provided subset of networks
62 | }
63 | \details{
64 | Creates query files, runs GM for 10-fold cross validation.
65 | }
66 | \examples{
67 | data(MB.pheno)
68 | dbPath <- system.file("extdata","dbPath",package="netDx")
69 | runFeatureSelection(MB.pheno$ID[which(MB.pheno$STATUS\%in\% 'WNT')],
70 | 		tempdir(),dbPath,103L)
71 | }
72 | 


--------------------------------------------------------------------------------
/man/runQuery.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/runQuery.R
 3 | \name{runQuery}
 4 | \alias{runQuery}
 5 | \title{Run a query}
 6 | \usage{
 7 | runQuery(
 8 |   dbPath,
 9 |   queryFiles,
10 |   resDir,
11 |   verbose = TRUE,
12 |   JavaMemory = 6L,
13 |   numCores = 1L,
14 |   debugMode = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{dbPath}{(char) path to directory with GeneMANIA generic database}
19 | 
20 | \item{queryFiles}{(list(char)) paths to query files}
21 | 
22 | \item{resDir}{(char) path to output directory}
23 | 
24 | \item{verbose}{(logical) print messages}
25 | 
26 | \item{JavaMemory}{(integer) Memory for GeneMANIA (in Gb) - a total of 
27 | numCores*GMmemory will be used and distributed for all GM threads}
28 | 
29 | \item{numCores}{(integer) number of CPU cores for parallel processing}
30 | 
31 | \item{debugMode}{(logical) when TRUE runs jobs in serial instead of parallel and 
32 | prints verbose messages. Also prints system Java calls.}
33 | }
34 | \value{
35 | (char) path to GeneMANIA query result files with patient similarity
36 | rankings (*PRANK) and feature weights (*NRANK)
37 | of results file
38 | }
39 | \description{
40 | Run a query
41 | }
42 | \examples{
43 | dbPath <- system.file("extdata","dbPath",package="netDx")
44 | queryFile <- system.file("extdata","GM_query.txt",package="netDx")
45 | runQuery(dbPath, queryFile,tempdir())
46 | }
47 | 


--------------------------------------------------------------------------------
/man/setupFeatureDB.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/setupFeatureDB.R
 3 | \name{setupFeatureDB}
 4 | \alias{setupFeatureDB}
 5 | \title{setup database of features for feature selection}
 6 | \usage{
 7 | setupFeatureDB(pheno, prepDir = tempdir())
 8 | }
 9 | \arguments{
10 | \item{pheno}{(data.frame) patient metadata. Must contain ID column}
11 | 
12 | \item{prepDir}{(char) directory in which to setup database}
13 | }
14 | \value{
15 | (data.frame) internal numerical id for patients (INTERNAL_ID) and
16 | user-provided ID (ID)
17 | }
18 | \description{
19 | Creates all the input files for the collection of features used in 
20 | feature selection.
21 | }
22 | \examples{
23 | data(xpr,pheno)
24 | pathwayList <- list(pathA=rownames(xpr)[1:10],pathB=rownames(xpr)[21:50])
25 | 
26 | dataList <- list(rna=xpr)  #only one layer type
27 | groupList <- list(rna=pathwayList) # group genes by pathways
28 | 
29 | makeNets <- function(dataList, groupList, netDir,...) {
30 |     netList <- makePSN_NamedMatrix(dataList[['rna']],
31 | 		rownames(dataList[['rna']]),
32 |      groupList[['rna']],netDir,verbose=FALSE,
33 | 			writeProfiles=TRUE,...)
34 |     unlist(netList)
35 | }
36 | tmpDir <- tempdir(); netDir <- paste(tmpDir,"nets",sep=getFileSep())
37 | dir.create(netDir,recursive=TRUE)
38 | 
39 | pheno_id <- setupFeatureDB(pheno,netDir)
40 | }
41 | 


--------------------------------------------------------------------------------
/man/silh.Rd:
--------------------------------------------------------------------------------
 1 | \name{silh}
 2 | \alias{silh}
 3 | \docType{data}
 4 | \title{
 5 | 	Toy network.
 6 | }
 7 | \description{
 8 | List with two entries. 
 9 | net: Network specification. "X" and "Y" are source and target columns respectively. "DIST" specifies weights.
10 | groups: Node labsls. A data.frame with columns "ID" and "GROUP"
11 | }
12 | \usage{data(silh)}
13 | \examples{
14 | data(silh)
15 | summary(silh)
16 | silh$net
17 | silh$groups
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/sim.eucscale.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/similarities.R
 3 | \name{sim.eucscale}
 4 | \alias{sim.eucscale}
 5 | \title{Similarity method. Euclidean distance followed by exponential scaling}
 6 | \usage{
 7 | sim.eucscale(dat, K = 20, alpha = 0.5)
 8 | }
 9 | \arguments{
10 | \item{dat}{(data.frame) Patient data; rows are measures, columns are 
11 | patients.}
12 | 
13 | \item{K}{(integer) Number of nearest neighbours to consider (K of KNN)}
14 | 
15 | \item{alpha}{(numeric) Scaling factor for exponential similarity kernel. 
16 | Recommended range between 0.3 and 0.8.}
17 | }
18 | \value{
19 | symmetric matrix of size ncol(dat) (number of patients) containing
20 | pairwise patient similarities
21 | }
22 | \description{
23 | Computes Euclidean distance between patients. A scaled 
24 | exponential similarity kernel is used to determine edge weight. The 
25 | exponential scaling considers the K nearest neighbours, so that 
26 | similarities between non-neighbours is set to zero. Alpha is a 
27 | hyperparameterthat determines decay rate of the exponential. For details,
28 | see Wang et al. (2014). Nature Methods 11:333.
29 | }
30 | \examples{
31 | data(xpr)
32 | sim <- sim.eucscale(xpr)
33 | }
34 | 


--------------------------------------------------------------------------------
/man/sim.pearscale.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/similarities.R
 3 | \name{sim.pearscale}
 4 | \alias{sim.pearscale}
 5 | \title{various similarity functions
 6 | Similarity function: Pearson correlation followed by exponential scaling}
 7 | \usage{
 8 | sim.pearscale(dat, K = 20, alpha = 0.5)
 9 | }
10 | \arguments{
11 | \item{dat}{(data.frame) Patient data; rows are measures, columns are 
12 | patients.}
13 | 
14 | \item{K}{(integer) Number of nearest neighbours to consider (K of KNN)}
15 | 
16 | \item{alpha}{(numeric) Scaling factor for exponential similarity kernel. 
17 | Recommended range between 0.3 and 0.8.}
18 | }
19 | \value{
20 | symmetric matrix of size ncol(dat) (number of patients) containing
21 | pairwise patient similarities
22 | }
23 | \description{
24 | Computes Pearson correlation between patients. A scaled 
25 | exponential similarity kernel is used to determine edge weight. The 
26 | exponential scaling considers the K nearest neighbours, so that 
27 | similarities between non-neighbours is set to zero. Alpha is a 
28 | hyperparameter that determines decay rate of the exponential. For details
29 | see Wang et al. (2014). Nature Methods 11:333.
30 | }
31 | \examples{
32 | data(xpr)
33 | sim <- sim.pearscale(xpr)
34 | }
35 | 


--------------------------------------------------------------------------------
/man/simpleCap.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simpleCap.R
 3 | \name{simpleCap}
 4 | \alias{simpleCap}
 5 | \title{simple capitalization}
 6 | \usage{
 7 | simpleCap(x)
 8 | }
 9 | \arguments{
10 | \item{x}{(char) name}
11 | }
12 | \value{
13 | (char) Changes case so start of each word is in upper-case, and
14 | the rest is in lowercase
15 | }
16 | \description{
17 | simple capitalization
18 | }
19 | \details{
20 | used to format feature names so they are not in all-caps
21 | }
22 | \examples{
23 | simpleCap('this IS a TEST sEnTenCe')
24 | }
25 | 


--------------------------------------------------------------------------------
/man/sparsify2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sparsify2.R
 3 | \name{sparsify2}
 4 | \alias{sparsify2}
 5 | \title{cleaner sparsification routine}
 6 | \usage{
 7 | sparsify2(
 8 |   W,
 9 |   outFile = paste(tempdir(), "tmp.txt", sep = getFileSep()),
10 |   cutoff = 0.3,
11 |   maxInt = 50,
12 |   EDGE_MAX = 1000,
13 |   includeAllNodes = TRUE,
14 |   verbose = TRUE
15 | )
16 | }
17 | \arguments{
18 | \item{W}{(matrix) similarity matrix}
19 | 
20 | \item{outFile}{(char) path to file to write sparsified network}
21 | 
22 | \item{cutoff}{(numeric) edges with weight smaller than this are set to NA}
23 | 
24 | \item{maxInt}{(numeric) max num edges per node.}
25 | 
26 | \item{EDGE_MAX}{(numeric) max num edges in network}
27 | 
28 | \item{includeAllNodes}{(logical) if TRUE, ensures at least one edge is 
29 | present for each patient. This feature is required when sparsification 
30 | excludes test patients that are required to be classified. If the 
31 | sparsification rules exclude all edges for a patient and this flag is set, 
32 | then the strongest edge for each missing patient is added to the net. 
33 | Note that this condition results in the total number of edges potentially 
34 | exceeding EDGE_MAX}
35 | 
36 | \item{verbose}{(logical) print detailed messages, useful for debugging}
37 | }
38 | \value{
39 | writes SIF content to text file (node1,node2,edge weight)
40 | }
41 | \description{
42 | cleaner sparsification routine
43 | }
44 | \details{
45 | Sparsifies similarity matrix to keep strongest edges.
46 | Sets diagonal and edges < cutoff to NA. Keeps strongest maxInt edges
47 | per node. Ties are ignored. Keeps a max of EDGE_MAX edges in the network.
48 | }
49 | \examples{
50 | data(xpr); 
51 | sparsify2(cor(xpr))
52 | }
53 | 


--------------------------------------------------------------------------------
/man/sparsify3.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sparsify3.R
 3 | \name{sparsify3}
 4 | \alias{sparsify3}
 5 | \title{cleaner sparsification routine - faster, matrix-based version}
 6 | \usage{
 7 | sparsify3(
 8 |   W,
 9 |   outFile = sprintf("\%s/tmp.txt", tempdir()),
10 |   cutoff = 0.3,
11 |   maxInt = 50,
12 |   EDGE_MAX = Inf,
13 |   includeAllNodes = TRUE,
14 |   verbose = TRUE
15 | )
16 | }
17 | \arguments{
18 | \item{W}{(matrix) similarity matrix}
19 | 
20 | \item{outFile}{(char) path to file to write sparsified network}
21 | 
22 | \item{cutoff}{(numeric) edges with weight smaller than this are set to NA}
23 | 
24 | \item{maxInt}{(numeric) max num edges per node.}
25 | 
26 | \item{EDGE_MAX}{(numeric) max num edges in network}
27 | 
28 | \item{includeAllNodes}{(logical) if TRUE, ensures at least one edge is 
29 | present for each patient. This feature is required when sparsification 
30 | excludes test patients that are required to be classified. If the 
31 | sparsification rules exclude all edges for a patient and this flag is set, 
32 | then the strongest edge for each missing patient is added to the net. Note 
33 | that this condition results in the total number of edges potentially 
34 | exceeding EDGE_MAX}
35 | 
36 | \item{verbose}{(logical) print detailed messages, useful for debugging}
37 | }
38 | \value{
39 | writes SIF content to text file (node1,node2,edge weight)
40 | }
41 | \description{
42 | cleaner sparsification routine - faster, matrix-based version
43 | }
44 | \details{
45 | Sparsifies similarity matrix to keep strongest edges.
46 | Sets diagonal and edges < cutoff to NA. Keeps strongest maxInt edges
47 | per node. Ties are ignored. Keeps a max of EDGE_MAX edges in the network.
48 | }
49 | \examples{
50 | m <- matrix(runif(500*500),nrow=500)
51 | y <- sparsify2(m)
52 | m <- matrix(runif(500*500),nrow=500)
53 | y <- sparsify2(m)
54 | }
55 | 


--------------------------------------------------------------------------------
/man/splitTestTrain.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/splitTestTrain_resampling.R
 3 | \name{splitTestTrain}
 4 | \alias{splitTestTrain}
 5 | \title{Split samples into train/test}
 6 | \usage{
 7 | splitTestTrain(pheno_DF, pctT = 0.7, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{pheno_DF}{(data.frame) patient information
11 | Must contain the following columns:
12 | 1. ID: (char) patient IDs
13 | 2. STATUS: (char) patient classes. Values not equal to \code{predClass}
14 | will be considered as 'other'
15 | Expects rows with unique IDs}
16 | 
17 | \item{pctT}{(numeric between 0 and 1) Fraction of patients to randomly
18 | assign to the training set. The remainder will be used for blind test 
19 | set}
20 | 
21 | \item{verbose}{(logical) print messages}
22 | }
23 | \value{
24 | (char) vector of length \code{nrow(pheno_DF)}, with values of 
25 | 'TRAIN' or 'TEST'. The order corresponds to pheno_DF; a patient labelled
26 | 'TRAIN' has been assigned to the training set, and one labelled 'TEST'
27 | as been assigned to the test set.
28 | }
29 | \description{
30 | Split samples into train/test
31 | }
32 | \examples{
33 | data(pheno)
34 | x <- splitTestTrain(pheno)
35 | }
36 | 


--------------------------------------------------------------------------------
/man/splitTestTrain_resampling.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/splitTestTrain_partition.R
 3 | \name{splitTestTrain_resampling}
 4 | \alias{splitTestTrain_resampling}
 5 | \title{Assign train/test labels over several resamplings of the data.}
 6 | \usage{
 7 | splitTestTrain_resampling(pheno_DF, nFold = 3L, predClass, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{pheno_DF}{(data.frame) table with patient ID and status.
11 | Must contain columns for Patient ID (named 'ID') and class
12 | (named 'STATUS'). Status should be a char; value of predictor class 
13 | should be specified in \code{predClass} param; 
14 | all other values are considered non-predictor class
15 | Expects rows with unique IDs
16 | Rows with duplicate IDs will be excluded.}
17 | 
18 | \item{nFold}{(integer) number of resamplings. Each sample will be a test
19 | sample exactly once.}
20 | 
21 | \item{predClass}{(char) name of predictor class}
22 | 
23 | \item{verbose}{(logical) print messages}
24 | }
25 | \value{
26 | (list) of length nFold, each with char vector of length 
27 | nrow(pheno_DF). Values of 'TRAIN' or 'TEST'
28 | }
29 | \description{
30 | Assign train/test labels over several resamplings of the data.
31 | }
32 | \details{
33 | This function is useful when feature selection needs to 
34 | occur over multiple resamplings of the data, as a strategy to reduce
35 | overfitting. Each sample serves as a test for exactly one resampilng, 
36 | and as a training sample for the others. The method is provided with the
37 | positive label and splits the samples so that an even number of positive
38 | and negative classes are represented in all the resamplings (i.e. it
39 | avoids the situation where one resampling has too many positives and 
40 | another has too few).
41 | }
42 | \examples{
43 | data(pheno) 
44 | x <- splitTestTrain_resampling(pheno,predClass='LumA')
45 | }
46 | 


--------------------------------------------------------------------------------
/man/tSNEPlotter.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper.R
 3 | \name{tSNEPlotter}
 4 | \alias{tSNEPlotter}
 5 | \title{Plot tSNE}
 6 | \usage{
 7 | tSNEPlotter(psn, pheno, ...)
 8 | }
 9 | \arguments{
10 | \item{psn}{(matrix) Patient similarity network represented as adjacency
11 | matrix (symmetric). Row and column names are patient IDs. Note that NA
12 | values will be replaced by very small number (effectively zero).}
13 | 
14 | \item{pheno}{(data.frame) Patient labels. ID column is patient ID and 
15 | STATUS is patient label of interest. tSNE will colour-code nodes by 
16 | patient label.}
17 | 
18 | \item{...}{Parameters for Rtsne() function.}
19 | }
20 | \value{
21 | (Rtsne) output of Rtsne call. Side effect of tSNE plot
22 | }
23 | \description{
24 | Plot tSNE
25 | }
26 | \details{
27 | Plots tSNE of integrated patient similarity network using Rtsne
28 | }
29 | \examples{
30 | pid <- paste("P",1:100,sep="")
31 | psn <- matrix(rnorm(100*100),nrow=100,dimnames=list(pid,pid))
32 | psn[lower.tri(psn)] <- NA; diag(psn) <- NA
33 | psn2 <- reshape2::melt(psn); psn2 <- psn2[-which(is.na(psn2[,3])),]
34 | colnames(psn2) <- c("SOURCE","TARGET","WEIGHT")
35 | pheno <- data.frame(ID=pid,STATUS=c(rep("control",50),rep("case",50)))
36 | tSNEPlotter(psn2,pheno)
37 | }
38 | 


--------------------------------------------------------------------------------
/man/thresholdSmoothedMutations.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/smooMutationPropagation.R
 3 | \name{thresholdSmoothedMutations}
 4 | \alias{thresholdSmoothedMutations}
 5 | \title{Apply discretization to the matrix resulted from the propagation on the
 6 | sparse patient matrix}
 7 | \usage{
 8 | thresholdSmoothedMutations(
 9 |   smoothedMutProfile,
10 |   unsmoothedMutProfile,
11 |   nameDataset,
12 |   n_topXmuts = c(10)
13 | )
14 | }
15 | \arguments{
16 | \item{smoothedMutProfile}{(data.frame) continous matrix of patient profiles 
17 | resulting from applying :.,$ s/network-based propagation algorithm 
18 | (smoothMutations_LabelProp()) on a binary somatic mutation sparse matrix.}
19 | 
20 | \item{unsmoothedMutProfile}{(data.frame) binary somatic mutation sparse 
21 | matrix. Rownames are unique genes. Colnames are unique patients. A cell 
22 | contains a zero or a one.}
23 | 
24 | \item{nameDataset}{(char) for titles on plot}
25 | 
26 | \item{n_topXmuts}{(numeric between 0 and 1) percent of top mutations
27 | to keep. This function converts these to 1.0 when binarizing, so they
28 | remain in the thresholded output matrix; other mutations are set to zero.}
29 | }
30 | \value{
31 | (data.frame) binary somatic mutation matrix which sparsity has been 
32 | decreased
33 | }
34 | \description{
35 | Apply discretization to the matrix resulted from the propagation on the
36 | sparse patient matrix
37 | }
38 | \details{
39 | This function is included in the netDx use case which involves
40 |   propagating the sparse matrix of patient's profiles to reduce its sparsity.
41 |   This function applies discretization on the propagated matrix of patient
42 |   profiles. It sets to 1 the genes which got the highest propagation value.
43 |   While, the remaining genes are set to 0. This discretization is driven by
44 |   the fact that higher is the propagation value and higher is the chance that
45 |   the gene is involved in the patient condition and expression/mutation
46 |   profile. On the contrary, genes which got either a medium or a low value
47 |   are not trustable.
48 | }
49 | \examples{
50 | suppressWarnings(suppressMessages(require(MultiAssayExperiment)))
51 | require(doParallel)
52 | 
53 | # load mutation and phenotype data
54 | genoFile <- system.file("extdata","TGCT_mutSmooth_geno.txt",package="netDx")
55 | geno <- read.delim(genoFile,sep="\t",header=TRUE,as.is=TRUE)
56 | phenoFile <- system.file("extdata", "TGCT_mutSmooth_pheno.txt",
57 | 			package="netDx")
58 | pheno <- read.delim(phenoFile,sep="\t",header=TRUE,as.is=TRUE)
59 | rownames(pheno) <- pheno$ID
60 | 
61 | # load interaction nets to smooth over
62 | require(BiocFileCache)
63 | netFileURL <- paste("https://download.baderlab.org/netDx/",
64 | 	"supporting_data/CancerNets.txt",sep="")
65 | cache <- rappdirs::user_cache_dir(appname = "netDx")
66 | bfc <- BiocFileCache::BiocFileCache(cache,ask=FALSE)
67 | netFile <- bfcrpath(bfc,netFileURL)
68 | cancerNets <- read.delim(netFile,sep="\t",header=TRUE,as.is=TRUE)
69 | # smooth mutations
70 | prop_net <- smoothMutations_LabelProp(geno,cancerNets,numCores=1L)
71 | genoP <- thresholdSmoothedMutations(
72 |    prop_net,geno,"TGCT_CancerNets",c(20)
73 |   )
74 | }
75 | 


--------------------------------------------------------------------------------
/man/toymodel.Rd:
--------------------------------------------------------------------------------
 1 | \name{toymodel}
 2 | \alias{toymodel}
 3 | \docType{data}
 4 | \title{
 5 | 	Example model returned by a buildPredictor() call.
 6 | }
 7 | \description{
 8 | Output of buildPredictor() generated by a simple use-case of binary breast tumour classification using two genomic data sources.
 9 | BRCA data were downloaded using curatedTCGAData for mRNA and miRNA expression. buildPredictor()] was run by scoring features out of 2, with selected features passing 1 out of 2.
10 | Tumours were labelled either "Luminal.A" or "other".
11 | }
12 | \usage{data(toymodel)}
13 | \examples{
14 | data(toymodel)
15 | head(toymodel)
16 | }
17 | \keyword{datasets}
18 | 


--------------------------------------------------------------------------------
/man/updateNets.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/updateNets.R
 3 | \name{updateNets}
 4 | \alias{updateNets}
 5 | \title{Synchronize patient set in sample table and network table.}
 6 | \usage{
 7 | updateNets(
 8 |   p_net,
 9 |   pheno_DF,
10 |   writeNewNets = TRUE,
11 |   oldNetDir,
12 |   newNetDir,
13 |   verbose = TRUE,
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{p_net}{(matrix) rows are patients, columns are networks.
19 | a[i,j] = 1 if patient i occurs in network j, else 0.}
20 | 
21 | \item{pheno_DF}{(data.frame) patient ID and STATUS.}
22 | 
23 | \item{writeNewNets}{(logical) if TRUE writes new networks to 
24 | \code{newNetDir}.}
25 | 
26 | \item{oldNetDir}{(char) path to directory with networks to be updated}
27 | 
28 | \item{newNetDir}{(char) path to directory where updated networks are
29 | to be written}
30 | 
31 | \item{verbose}{(logical) print messages}
32 | 
33 | \item{...}{passed to pruneNets()}
34 | }
35 | \value{
36 | list with updated p_net and pheno_DF. pheno_DF will contain IDs
37 | in the updated p_net. p_net will contain only those networks with 
38 | 2+ patients and those patients present in 1+ network.
39 | }
40 | \description{
41 | Synchronize patient set in sample table and network table.
42 | }
43 | \details{
44 | This function is useful in applications with highly missing
45 | data or where each patient contributes data points not present in the
46 | others; e.g. networks based on individual
47 | patient CNVs, which are highly sparse. In such a scenario, any kind of
48 | patient subsetting - for example, limiting to training samples - changes
49 | the population of eligible networks for analysis. Networks that no longer
50 | have samples, or that have one patient with the neighbour removed, have
51 | to be excluded. This function updates networks and patients so that 
52 | each network contains at least two patients and only patients in 
53 | networks are retained. In other words, it keeps pheno_DF and p_net in 
54 | sync.
55 | }
56 | \examples{
57 | data(npheno)
58 | netDir <- system.file("extdata","example_nets",package="netDx")
59 | netmat <- countPatientsInNet(netDir,dir(netDir,pattern='txt$'), npheno[,1])
60 | x <- updateNets(netmat, npheno,writeNewNets=FALSE)
61 | }
62 | 


--------------------------------------------------------------------------------
/man/writeNetsSIF.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Nets_writeSIF.R
 3 | \name{writeNetsSIF}
 4 | \alias{writeNetsSIF}
 5 | \title{write patient networks in Cytoscape's .sif format}
 6 | \usage{
 7 | writeNetsSIF(
 8 |   netPath,
 9 |   outFile = paste(tempdir(), "out.sif", sep = getFileSep()),
10 |   netSfx = "_cont.txt"
11 | )
12 | }
13 | \arguments{
14 | \item{netPath}{(char): vector of path to network files; file suffix
15 | should be '_cont.txt' 
16 | networks should be in format: A B 1
17 | where A and B are nodes, and 1 indicates an edge between them}
18 | 
19 | \item{outFile}{(char) path to .sif file}
20 | 
21 | \item{netSfx}{(char) suffix for network file name}
22 | }
23 | \value{
24 | No value. Side effect of writing all networks to \code{outFile}
25 | }
26 | \description{
27 | write patient networks in Cytoscape's .sif format
28 | }
29 | \details{
30 | Converts a set of binary interaction networks into Cytoscape's
31 | sif format.
32 | (http://wiki.cytoscape.org/Cytoscape_User_Manual/Network_Formats)
33 | This utility permits visualization of feature selected networks.
34 | }
35 | \examples{
36 | netDir <- system.file("extdata","example_nets",package="netDx")
37 | netFiles <- paste(netDir,dir(netDir,pattern='txt$'),
38 | sep=getFileSep())
39 | writeNetsSIF(netFiles,'merged.sif',netSfx='.txt')
40 | }
41 | 


--------------------------------------------------------------------------------
/man/writeQueryBatchFile.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/writeQueryBatchFile.R
 3 | \name{writeQueryBatchFile}
 4 | \alias{writeQueryBatchFile}
 5 | \title{Write batch.txt file required to create GeneMANIA database}
 6 | \usage{
 7 | writeQueryBatchFile(
 8 |   netDir,
 9 |   netList,
10 |   outDir = tempdir(),
11 |   idFile,
12 |   orgName = "predictor",
13 |   orgDesc = "my_predictor",
14 |   orgAlias = "my_predictor",
15 |   taxID = 1339
16 | )
17 | }
18 | \arguments{
19 | \item{netDir}{(char) path to dir with networks}
20 | 
21 | \item{netList}{(char) vector of network names}
22 | 
23 | \item{outDir}{(char) directory to write batch file}
24 | 
25 | \item{idFile}{(char) path to file with patient IDs}
26 | 
27 | \item{orgName}{(char) organism name. Don't change the default unless
28 | you know what you are doing.}
29 | 
30 | \item{orgDesc}{(char) organism description. Similar to \code{orgName},
31 | don't change the default}
32 | 
33 | \item{orgAlias}{(char) organism alias. Similar to \code{orgName}, don't
34 | change the default.}
35 | 
36 | \item{taxID}{(integer) taxonomyID required for GeneMANIA . Similar to 
37 | \code{orgName}, don't change the default.}
38 | }
39 | \value{
40 | No value. Side effect of writing batch file to 
41 | \code{<outDir>/batch.txt}.
42 | }
43 | \description{
44 | Write batch.txt file required to create GeneMANIA database
45 | }
46 | \details{
47 | This file is used to compile features into a single database
48 | for feature selection.
49 | }
50 | \examples{
51 | data(npheno)
52 | netDir <- system.file("extdata","example_nets",package="netDx")
53 | netList <- dir(netDir,pattern='txt$')
54 | writeQueryBatchFile(netDir,netList, tempdir(), npheno$ID)
55 | }
56 | 


--------------------------------------------------------------------------------
/man/writeQueryFile.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/writeQueryFile.R
 3 | \name{writeQueryFile}
 4 | \alias{writeQueryFile}
 5 | \title{Wrapper to write GeneMANIA query file}
 6 | \usage{
 7 | writeQueryFile(
 8 |   qSamps,
 9 |   incNets = "all",
10 |   numReturn = 1L,
11 |   outFile,
12 |   orgName = "predictor"
13 | )
14 | }
15 | \arguments{
16 | \item{qSamps}{(char) vector of patient IDs in query}
17 | 
18 | \item{incNets}{(char) vector of networks to include in this analysis
19 | (features/pathway names). Useful for subset-based feature selection}
20 | 
21 | \item{numReturn}{(integer) number of patients to return in ranking file}
22 | 
23 | \item{outFile}{(char) path to output file}
24 | 
25 | \item{orgName}{(char) organism name}
26 | }
27 | \value{
28 | No value. Side effect of writing the query file to
29 | \code{outFile}
30 | }
31 | \description{
32 | Wrapper to write GeneMANIA query file
33 | }
34 | \examples{
35 | data(pheno)
36 | writeQueryFile(pheno$ID[seq_len(5)], 'all',nrow(pheno), 'myquery.txt')
37 | }
38 | 


--------------------------------------------------------------------------------
/man/writeWeightedNets.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/writeWeightedNets.R
 3 | \name{writeWeightedNets}
 4 | \alias{writeWeightedNets}
 5 | \title{Write an integrated similarity network consisting of selected networks.}
 6 | \usage{
 7 | writeWeightedNets(
 8 |   patientIDs,
 9 |   netIDs,
10 |   netDir,
11 |   keepNets,
12 |   filterEdgeWt = 0,
13 |   aggNetFunc = "MAX",
14 |   limitToTop = 50L,
15 |   plotEdgeDensity = FALSE,
16 |   verbose = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{patientIDs}{(data.frame) patient identifiers. Columns include
21 | internally-generated identifiers (GM_ID) and user-provided identifiers 
22 | (ID)}
23 | 
24 | \item{netIDs}{(data.frame) network metadata. Columns include
25 | internal network name (NET_ID), user-provided name (NETWORK).
26 | If a third optional column named "isBinary" is provided, and contains
27 | binary values (i.e. 1 and 0), that indicates that the network contains
28 | only binary weights and an alternate similarity computation (PropBinary)
29 | will be used (see description).}
30 | 
31 | \item{netDir}{(char) path to directory containing interaction networks.
32 | Note that these are networks where the node IDs have been recoded by 
33 | GeneMANIA (e.g. 1,2,3)}
34 | 
35 | \item{keepNets}{(char or data.frame) networks to include in integrated net
36 | If data.frame must be in "NETWORK" column,other columns will be
37 | ignored. Mainly included as convenience so pathway scores can passed
38 | in table format
39 | (NETWORK), and a multiplier constant for edges in that network (WEIGHT)}
40 | 
41 | \item{filterEdgeWt}{(numeric) keep edges with raw edge
42 | weight strictly greater than this value. Note that "raw" refers to 
43 | this filter being applied before the multiplier is applied.}
44 | 
45 | \item{aggNetFunc}{(char, one of: [MEAN|MAX]) Aggregate the network 
46 | 2) MEAN: average of weighted edges (raw x netDx score)
47 | 3) MAX: max of raw edge weight}
48 | 
49 | \item{limitToTop}{(integer) limit to top strongest connections. Set to
50 | Inf to list all connections.}
51 | 
52 | \item{plotEdgeDensity}{(logical) plot density plot of edge weights, one
53 | per input net. Used to troubleshoot problems introduced by specific nets.}
54 | 
55 | \item{verbose}{(logical) print messages if TRUE}
56 | }
57 | \value{
58 | (list) 
59 | 1) filterEdgeWt (numeric) Value of filterEdgeWt parameter
60 | 2) aggNetFunc (char) Value of aggNetFunc parameter
61 | 3) limitToTop (integer) Value of limitToTop parameter
62 | 4) aggNet (matrix) Value of limitToTop parameter
63 | File format is:
64 | 1) source patient (SOURCE)
65 | 2) target patient (TARGET)
66 | 3) network name (NET_NAME)
67 | 4) weight similarity for the network (WT_SIM)
68 | }
69 | \description{
70 | Write an integrated similarity network consisting of selected networks.
71 | }
72 | 


--------------------------------------------------------------------------------
/man/xpr.Rd:
--------------------------------------------------------------------------------
 1 | \name{xpr}
 2 | \alias{xpr}
 3 | \docType{data}
 4 | \title{
 5 | Example expression matrix
 6 | }
 7 | \description{
 8 | data.frame with gene expression for 727 genes (rows) and 40 patients (columns).
 9 | Data from TCGA breast cancer subtyping study.
10 | }
11 | \usage{data(xpr)}
12 | \source{
13 | The Cancer Genome Atlas. (2012). Nature 490:61-70.
14 | }
15 | \references{
16 | The Cancer Genome Atlas. (2012). Nature 490:61-70.
17 | }
18 | \examples{
19 | data(xpr)
20 | head(xpr)
21 | }
22 | \keyword{datasets}
23 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(netDx)
3 | 
4 | test_check("netDx")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test_suite.R:
--------------------------------------------------------------------------------
 1 | # test utilities
 2 | 
 3 | test_that("readPathways works", {
 4 | 	  x <- readPathways(fetchPathwayDefinitions("February",2020),
 5 | 			MIN_SIZE=10L, MAX_SIZE=200L)
 6 | 		ln <- unlist(lapply(x,length))
 7 |     expect_that(x,is_a("list"))
 8 | 	  expect_that(x[[1]],is_a("character"))
 9 | 		expect_gt(min(ln),9)
10 | 		expect_lt(max(ln),201)
11 | })
12 | 
13 | 
14 | ###test_that("lasso filtering works", {
15 | ###	# make own subroutine
16 | ###})
17 | ###
18 | ###test_that("imputation works", {
19 | ###	# make own subroutine
20 | ###})
21 | ###
22 | #### ------------------------------------------
23 | #### feature creation
24 | #### ------------------------------------------
25 | ###
26 | #### similarity methods
27 | ###test_that("similarity works: normDiff", {
28 | ###})
29 | ###test_that("similarity works: AvgNormDiff", {
30 | ###})
31 | ###
32 | ###test_that("similarity works: euc + exp scaling", {
33 | ###})
34 | ###
35 | ###test_that("similarity works: Pearson", {
36 | ###})
37 | ###
38 | #### sparsification methods
39 | ###test_that("sparsification works: sparsify2", {
40 | ###})
41 | ###
42 | ###test_that("sparsification works: sparsify3", {
43 | ###})
44 | ###
45 | 


--------------------------------------------------------------------------------
/vignettes/EMap_realworld.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/vignettes/EMap_realworld.png


--------------------------------------------------------------------------------
/vignettes/Emap_example_screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/vignettes/Emap_example_screenshot.png


--------------------------------------------------------------------------------
/vignettes/images/vignette1_design.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/vignettes/images/vignette1_design.jpg


--------------------------------------------------------------------------------
/vignettes/integratedPSN_MEAN_top0.10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaderLab/netDx/39ef9af812b91072d94ed8ff988a8ec961c3d6c5/vignettes/integratedPSN_MEAN_top0.10.png


--------------------------------------------------------------------------------
/vignettes/prepare_data.R:
--------------------------------------------------------------------------------
 1 | # setup brca data
 2 | prepareData <- function(dat, setBinary=FALSE) {
 3 | ### clean up stage variable
 4 | staget <- sub("[abcd]","",sub("t","",colData(dat)$pathology_T_stage))
 5 | staget <- suppressWarnings(as.integer(staget))
 6 | colData(dat)$STAGE <- staget
 7 | 
 8 | ### remove NA PAM50 calls, remove normal samples
 9 | tmp <- colData(dat)$PAM50.mRNA
10 | if (!setBinary){
11 | 	idx <- which(tmp %in% c("Normal-like","HER2-enriched"))
12 | } else {
13 | 	idx <- union(which(tmp %in% c("Normal-like","HER2-enriched","Luminal B")),
14 | 			which(is.na(staget)))
15 | }
16 | idx <- union(idx, which(is.na(tmp)))
17 | pID <- colData(dat)$patientID
18 | tokeep <- setdiff(pID, pID[idx])
19 | dat <- dat[,tokeep,]
20 | pam50 <- colData(dat)$PAM50.mRNA
21 | 
22 | ### where a patient has multiple instances of the same assay
23 | ### just keep the first instance encountered
24 | smp <- sampleMap(dat)
25 | expr <- assays(dat)
26 | for (k in 1:length(expr)) {
27 | 	samps <- smp[which(smp$assay==names(expr)[k]),]
28 | 	notdup <- samps[which(!duplicated(samps$primary)),"colname"]
29 | 	#message(sprintf("%s: %i notdup", names(expr)[k], length(notdup)))
30 | 	dat[[k]] <- suppressMessages(dat[[k]][,notdup])
31 | }
32 | 
33 | ### create ID, STATUS columns, remove spaces/hyphens from patient labels
34 | pID <- colData(dat)$patientID
35 | colData(dat)$ID <- pID
36 | colData(dat)$STATUS <- pam50
37 | colData(dat)$STATUS <- gsub(" ",".",colData(dat)$STATUS)
38 | colData(dat)$STATUS <- gsub("-",".",colData(dat)$STATUS)
39 | 
40 | if (setBinary){
41 | 	st <- colData(dat)$STATUS
42 | 	st[which(!st %in% "Luminal.A")] <- "other"
43 | 	colData(dat)$STATUS <- st
44 | }
45 | 
46 | return(dat)
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------