├── README.md ├── package ├── DESCRIPTION ├── NAMESPACE ├── NEWS ├── R │ ├── ALL_BEA_objects.R │ ├── BEA_Assessments.R │ ├── BEA_AssessmentsAPI.R │ ├── BEA_AssessmentsBatchcorr.R │ ├── BEA_AssessmentsBoxplot.R │ ├── BEA_AssessmentsCDP.R │ ├── BEA_AssessmentsHierclust.R │ ├── BEA_AssessmentsMANOVA.R │ ├── BEA_AssessmentsPca.R │ ├── BEA_AssessmentsPca_dsc.R │ ├── BEA_AssessmentsPca_pvalueDsc.R │ ├── BEA_AssessmentsSupervisedClust.R │ ├── BEA_CorrectionsAN.R │ ├── BEA_CorrectionsAPI.R │ ├── BEA_CorrectionsEB.R │ ├── BEA_CorrectionsEBNP.R │ ├── BEA_CorrectionsEBNPapi.R │ ├── BEA_CorrectionsEBNPdata.R │ ├── BEA_CorrectionsEBNplus.R │ ├── BEA_CorrectionsMP.R │ ├── BEA_CorrectionsRBN.R │ ├── BEA_CorrectionsRBNapi.R │ ├── BatchEffAssess.R │ ├── FilesAndDirs.R │ ├── Logging.R │ ├── ReadRJava.R │ └── zzz.R ├── inst │ ├── BoxplotJava │ │ ├── BoxplotJava.jar │ │ ├── LegendJava.jar │ │ ├── commons-lang3-3.3.2.jar │ │ ├── commons-math3-3.3.jar │ │ ├── jcommon-1.0.17.jar │ │ └── jfreechart-1.0.14.jar │ ├── DscJava │ │ ├── DscJava.jar │ │ ├── commons-lang3-3.1.jar │ │ └── commons-math3-3.3.jar │ ├── LegendJava │ │ ├── LegendJava.jar │ │ ├── jcommon-1.0.17.jar │ │ └── jfreechart-1.0.14.jar │ ├── ReadRJava │ │ └── ReadRJava.jar │ ├── doc │ │ ├── MBatch_04-06_PCA_DualBatch_Structures.R │ │ ├── MBatch_04-06_PCA_DualBatch_Structures.Rmd │ │ ├── MBatch_04-06_PCA_DualBatch_Structures.pdf │ │ ├── MBatch_04-07_SupervisedClustering_Pairs_Structures.R │ │ ├── MBatch_04-07_SupervisedClustering_Pairs_Structures.Rmd │ │ ├── MBatch_04-07_SupervisedClustering_Pairs_Structures.pdf │ │ ├── MBatch_04-08_Boxplot_AllSamplesRLE_Structures.R │ │ ├── MBatch_04-08_Boxplot_AllSamplesRLE_Structures.Rmd │ │ ├── MBatch_04-08_Boxplot_AllSamplesRLE_Structures.pdf │ │ ├── MBatch_04-09_Boxplot_AllSamplesData_Structures.R │ │ ├── MBatch_04-09_Boxplot_AllSamplesData_Structures.Rmd │ │ ├── MBatch_04-09_Boxplot_AllSamplesData_Structures.pdf │ │ ├── MBatch_05-05_EB_withNonParametricPriors.R │ │ ├── MBatch_05-05_EB_withNonParametricPriors.Rmd │ │ ├── MBatch_05-05_EB_withNonParametricPriors.pdf │ │ ├── MBatch_05-06_EB_withParametricPriors.R │ │ ├── MBatch_05-06_EB_withParametricPriors.Rmd │ │ ├── MBatch_05-06_EB_withParametricPriors.pdf │ │ ├── MBatch_05-07_MP_Overall.R │ │ ├── MBatch_05-07_MP_Overall.Rmd │ │ ├── MBatch_05-07_MP_Overall.pdf │ │ ├── MBatch_05-08_MP_ByBatch.R │ │ ├── MBatch_05-08_MP_ByBatch.Rmd │ │ ├── MBatch_05-08_MP_ByBatch.pdf │ │ ├── MBatch_05-09_AN_Adjusted.R │ │ ├── MBatch_05-09_AN_Adjusted.Rmd │ │ ├── MBatch_05-09_AN_Adjusted.pdf │ │ ├── MBatch_05-10_AN_Unadjusted.R │ │ ├── MBatch_05-10_AN_Unadjusted.Rmd │ │ ├── MBatch_05-10_AN_Unadjusted.pdf │ │ ├── MBatch_05-11_EBNPlus_CheckData_Structures.R │ │ ├── MBatch_05-11_EBNPlus_CheckData_Structures.Rmd │ │ ├── MBatch_05-11_EBNPlus_CheckData_Structures.pdf │ │ ├── MBatch_05-12_EBNPlus_TrainAndValidateReplicates_Structures.R │ │ ├── MBatch_05-12_EBNPlus_TrainAndValidateReplicates_Structures.Rmd │ │ ├── MBatch_05-12_EBNPlus_TrainAndValidateReplicates_Structures.pdf │ │ ├── MBatch_05-13_EBNPlus_TrainAndValidateFromVector_Structures.R │ │ ├── MBatch_05-13_EBNPlus_TrainAndValidateFromVector_Structures.Rmd │ │ └── MBatch_05-13_EBNPlus_TrainAndValidateFromVector_Structures.pdf │ └── gpl-2_0.txt ├── man │ ├── AN_Adjusted.Rd │ ├── AN_Unadjusted.Rd │ ├── BEA_DATA-class.Rd │ ├── Boxplot_AllSamplesData_Structures.Rd │ ├── Boxplot_AllSamplesRLE_Structures.Rd │ ├── Boxplot_Group_Structures.Rd │ ├── CDP_Files.Rd │ ├── CDP_Plot.Rd │ ├── CDP_Structures.Rd │ ├── EBNPlus_CheckData_Structures.Rd │ ├── EBNPlus_CombineBatches.Rd │ ├── EBNPlus_Correction_Files.Rd │ ├── EBNPlus_Correction_Structures.Rd │ ├── EBNPlus_TrainAndValidateFromVector_Structures.Rd │ ├── EBNPlus_TrainAndValidateReplicates_Structures.Rd │ ├── EB_withNonParametricPriors.Rd │ ├── EB_withParametricPriors.Rd │ ├── HierarchicalClustering_Structures.Rd │ ├── Logging-class.Rd │ ├── MP_ByBatch.Rd │ ├── MP_Overall.Rd │ ├── PCA_DualBatch_Structures.Rd │ ├── PCA_Regular_Structures.Rd │ ├── RBN_Pseudoreplicates.Rd │ ├── RBN_Replicates.Rd │ ├── SupervisedClustering_Batches_Structures.Rd │ ├── SupervisedClustering_Pairs_Structures.Rd │ ├── buildDSCOverviewFile.Rd │ ├── clearDSCOverviewFiles.Rd │ ├── getReplicatesForRBN.Rd │ ├── mbatchFilterData.Rd │ ├── mbatchIncludeExcludeData.Rd │ ├── mbatchLoadFiles.Rd │ ├── mbatchLoadStructures.Rd │ ├── mbatchTrimData.Rd │ ├── mbatchWriteSuccessfulLog.Rd │ ├── readAsDataFrame.Rd │ ├── readAsGenericMatrix.Rd │ ├── readAsMatrix.Rd │ ├── setLogging.Rd │ ├── writeAsDataframe.Rd │ └── writeAsMatrix.Rd ├── tests │ ├── AN_Adjusted.R │ ├── AN_Unadjusted.R │ ├── Boxplot_AllSamplesData_Structures.R │ ├── Boxplot_AllSamplesRLE_Structures.R │ ├── Boxplot_Group_Structures.R │ ├── CDP_Files.R │ ├── CDP_Plot.R │ ├── CDP_Structures.R │ ├── EBNPlus_CombineBatches.R │ ├── EBNPlus_Correction_Files.R │ ├── EBNPlus_Correction_Structures.R │ ├── EB_withNonParametricPriors.R │ ├── EB_withParametricPriors.R │ ├── HierarchicalClustering_Structures.R │ ├── MP_ByBatch.R │ ├── MP_Overall.R │ ├── PCA_DualBatch_Structures.R │ ├── PCA_Regular_Structures.R │ ├── RBN_Pseudoreplicates.R │ ├── RBN_Replicates.R │ ├── SupervisedClustering_Batches_Structures.R │ └── SupervisedClustering_Pairs_Structures.R └── vignettes │ ├── BoxPlot_AllSample-Data_Diagram-TSS.png │ ├── BoxPlot_AllSample-RLE_Diagram-BatchId.png │ ├── MBatch_04-06_PCA_DualBatch_Structures.Rmd │ ├── MBatch_04-07_SupervisedClustering_Pairs_Structures.Rmd │ ├── MBatch_04-08_Boxplot_AllSamplesRLE_Structures.Rmd │ ├── MBatch_04-09_Boxplot_AllSamplesData_Structures.Rmd │ ├── MBatch_05-05_EB_withNonParametricPriors.Rmd │ ├── MBatch_05-06_EB_withParametricPriors.Rmd │ ├── MBatch_05-07_MP_Overall.Rmd │ ├── MBatch_05-08_MP_ByBatch.Rmd │ ├── MBatch_05-09_AN_Adjusted.Rmd │ ├── MBatch_05-10_AN_Unadjusted.Rmd │ ├── MBatch_05-11_EBNPlus_CheckData_Structures.Rmd │ ├── MBatch_05-12_EBNPlus_TrainAndValidateReplicates_Structures.Rmd │ ├── MBatch_05-13_EBNPlus_TrainAndValidateFromVector_Structures.Rmd │ ├── boxplot_dynamic.PNG │ ├── boxplot_dynamic_data.PNG │ ├── pca_plus.png │ └── supervised_clustering.png └── pdf ├── MBatch_01_InstallLinux.pdf ├── MBatch_01_InstallOSX.pdf ├── MBatch_01_InstallWindows.pdf ├── MBatch_02_RunningTests.pdf ├── MBatch_03_StandardizedData.pdf ├── MBatch_03_UserData.pdf ├── MBatch_04-00_ParametersBatchTypesValues.pdf ├── MBatch_04-01_SupervisedClusteringBatchesStructures.pdf ├── MBatch_04-02_PCA_Regular_Structures.pdf ├── MBatch_04-03_HierarchicalClustering_Structures.pdf ├── MBatch_04-04_Boxplot_Group_Structures.pdf ├── MBatch_04-05_CDP_Structures.pdf ├── MBatch_04-06_PCA_DualBatch_Structures.pdf ├── MBatch_04-07_SupervisedClustering_Pairs_Structures.pdf ├── MBatch_04-08_Boxplot_AllSamplesRLE_Structures.pdf ├── MBatch_04-09_Boxplot_AllSamplesData_Structures.pdf ├── MBatch_05-01_EBNPlus_CombineBatches.pdf ├── MBatch_05-02_EBNPlus_Correction_Structures.pdf ├── MBatch_05-03_RBN_Replicates.pdf ├── MBatch_05-04_RBN_Pseudoreplicates.pdf ├── MBatch_05-05_EB_withNonParametricPriors.pdf ├── MBatch_05-06_EB_withParametricPriors.pdf ├── MBatch_05-07_MP_Overall.pdf ├── MBatch_05-08_MP_ByBatch.pdf ├── MBatch_05-09_AN_Adjusted.pdf ├── MBatch_05-10_AN_Unadjusted.pdf ├── MBatch_05-11_EBNPlus_CheckData_Structures.pdf ├── MBatch_05-12_EBNPlus_TrainAndValidateReplicates_Structures.pdf └── MBatch_05-13_EBNPlus_TrainAndValidateFromVector_Structures.pdf /README.md: -------------------------------------------------------------------------------- 1 | Superseded by https://github.com/MD-Anderson-Bioinformatics/BatchEffectsPackage 2 | 3 | # MBatch R Package 4 | 5 | This is for educational and research purposes only. 6 | 7 | Samples from large research projects are often processed and run in multiple batches at different times. Because the samples are processed in batches rather than all at once, the data can be vulnerable to systematic noise such as batch effects (unwanted variation between batches) and trend effects (unwanted variation over time), which can lead to misleading analysis results. 8 | 9 | The MBatch R package is designed to help assess and correct for batch effects. It first allows the user to assess and quantify the presence of any batch effects via algorithms such as Hierarchical Clustering, Principal Component Analysis, and box plots. If significant batch effects are observed in the data, the user then has the option of selecting from a variety of correction algorithms, such as Empirical Bayes (aka Combat), ANOVA and Median Polish. 10 | 11 | Additional information can be found at http://bioinformatics.mdanderson.org/main/TCGABatchEffects:Overview 12 | 13 | The documentation directort contains several kinds of documentation for MBatch: 14 | 15 | * Files that start MBatch_01 are install documentations. Current instructions are for Linux (Debian 9.1). We expect to provide Windows and OS X instructions in late 2017/early 2018. 16 | * Files that start MBatch_02 are additional details about the test files in the package. 17 | * Files that start MBatch_03 are detail the file formats used by MBatch and the associated "Standardized Data" files. 18 | * Files that start MBatch_04 are documentation of assessment algorithms/plots. 19 | * Files that start MBatch_05 are documentation of correction algorithms. 20 | 21 | Downloads and details on Standardized Data are available at http://bioinformatics.mdanderson.org/TCGA/databrowser/ 22 | 23 | If you have the equivalent of Java 8 and R 3.4+ installed on your machine, and are familiar with your OS prerequisites and R package installation, the following quickstart instructions may allow quick installation. 24 | 25 | ```R 26 | # required CRAN packages 27 | install.packages(c("rJava", "devtools", "Cairo", "epiR", "gtools", "mclust", "squash", "httr"), dependencies=TRUE, repos = "http://cloud.r-project.org/") 28 | 29 | # required GitHub package 30 | library(devtools) 31 | install_github("js229/Vennerable") 32 | 33 | # required Bioconductor packages 34 | source("http://bioconductor.org/biocLite.R") 35 | biocLite(c("limma","RBGL","graph","Biobase"), ask="a") 36 | install.packages(c("oompaBase", "ClassDiscovery", "PreProcess"), dependencies=TRUE, repos=c("http://cloud.r-project.org", "http://silicovore.com/OOMPA/")) 37 | 38 | # MBatch package 39 | devtools::install_github("MD-Anderson-Bioinformatics/MBatch/package") 40 | ``` 41 | -------------------------------------------------------------------------------- /package/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: MBatch 2 | Title: MD Anderson Batch Assessment Tools (MBatch) Package 3 | Version: 1.4.17 4 | Date: 2018-06-22 5 | Author: Rehan Akbani 6 | Tod Casasent 7 | Bradley Broom 8 | John Weinstein 9 | Maintainer: Rehan Akbani 10 | Depends: R (>= 3.3.0), Cairo, cluster, mclust, oompaBase, PreProcess, ClassDiscovery, squash, gtools, methods, rJava, parallel, Vennerable, limma, stats, epiR 11 | Suggests: batchcorr, 12 | knitr, 13 | rmarkdown 14 | Description: MBatch is an R package for assessment of batch effects in high throughput data such as microarray data. It includes R implementations of several conventional methods such as principal component analysis, clustering analysis , also includes novel correlation of correlation based methods such as CZR, CZN, CZNS, CZNL, and CZS. This package is mainly to deal with data with multiple batches and multiple batch effect sources such as data from The Cancer Genome Atlas (TCGA). Some novel visulization methods such as PCA plot with centroid are also included in this package. It also requires Java 8 for DSC computations. See https://github.com/MD-Anderson-Bioinformatics/MBatch 15 | License: GPL (>=2) 16 | VignetteBuilder: knitr 17 | -------------------------------------------------------------------------------- /package/NAMESPACE: -------------------------------------------------------------------------------- 1 | importFrom("grDevices", "col2rgb", "colorRampPalette", "dev.off", "dev.set", "rainbow") 2 | importFrom("graphics", "abline", "arrows", "axis", "box", "boxplot", "frame", "layout", "lines", "mtext", "par", "plot.new", "points", "rect", "segments", "text", "title") 3 | importFrom("stats", "IQR", "as.dendrogram", "as.dist", "cor", "cor.test", "density", "dist", "dnorm", "hclust", "lm", "mad", "median", "medpolish", "order.dendrogram", "qqline", "qqnorm", "qqplot", "quantile", "reorder", "rgamma", "sd", "var") 4 | importFrom("utils", "URLencode", "dump.frames", "glob2rx", "installed.packages", "packageDescription", "read.csv", "sessionInfo", "write.table") 5 | importFrom("graphics", "legend") 6 | importFrom("epiR", "epi.ccc") 7 | importFrom("grDevices", "rgb") 8 | import(Cairo) 9 | import(cluster) 10 | import(mclust) 11 | import(oompaBase) 12 | import(PreProcess) 13 | import(ClassDiscovery) 14 | import(squash) 15 | import(gtools) 16 | import(methods) 17 | import(rJava) 18 | import(parallel) 19 | import(Vennerable) 20 | import(limma) 21 | 22 | # Utility classes 23 | exportClasses(BEA_DATA) 24 | exportClasses(Logging) 25 | 26 | # utility functions 27 | export(compareTwoMatrices) 28 | export(mbatchLoadStructures) 29 | export(mbatchLoadFiles) 30 | export(mbatchFilterData) 31 | export(mbatchTrimData) 32 | export(setLogging) 33 | export(clearDSCOverviewFiles) 34 | export(buildDSCOverviewFile) 35 | export(readAsMatrix) 36 | export(readAsDataFrame) 37 | export(writeAsMatrix) 38 | export(writeAsDataframe) 39 | export(mbatchIncludeExcludeData) 40 | export(readAsGenericMatrix) 41 | export(readAsGenericDataframe) 42 | export(mbatchWriteSuccessfulLog) 43 | 44 | # Assessments 45 | export(SupervisedClustering_Pairs_Structures) 46 | export(SupervisedClustering_Batches_Structures) 47 | export(PCA_Regular_Structures) 48 | export(PCA_DualBatch_Structures) 49 | export(HierarchicalClustering_Structures) 50 | export(Boxplot_Group_Structures) 51 | export(Boxplot_AllSamplesRLE_Structures) 52 | export(Boxplot_AllSamplesData_Structures) 53 | export(CDP_Plot) 54 | export(CDP_Files) 55 | export(CDP_Structures) 56 | 57 | # Corrections 58 | export(EBNPlus_Correction_Files) 59 | export(EBNPlus_CombineBatches) 60 | export(EBNPlus_Correction_Structures) 61 | export(EBNPlus_CheckData_Structures) 62 | export(EBNPlus_TrainAndValidateReplicates_Structures) 63 | export(EBNPlus_TrainAndValidateFromVector_Structures) 64 | export(EB_withNonParametricPriors) 65 | export(EB_withParametricPriors) 66 | export(MP_Overall) 67 | export(MP_ByBatch) 68 | export(AN_Adjusted) 69 | export(AN_Unadjusted) 70 | export(RBN_Replicates) 71 | export(RBN_Pseudoreplicates) 72 | export(getReplicatesForRBN) 73 | 74 | # used in testing 75 | export(getTestInputDir) 76 | export(getTestOutputDir) 77 | export(getTestCompareDir) 78 | -------------------------------------------------------------------------------- /package/R/ALL_BEA_objects.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(methods) 10 | 11 | ###Samples 12 | ###setClass("foo", representation(a = "character", b = "numeric")) 13 | ###setClass("bar", representation(d = "numeric", c = "numeric")) 14 | ###setClass("baz", contains = c("foo", "bar")) 15 | ###setMethod("initialize", "xx", 16 | ### function(.Object, b) 17 | ### { 18 | ### .Object@b <- b 19 | ### .Object@a <- nchar(b) 20 | ### .Object 21 | ### }) 22 | 23 | setClass("BEA_DATA", representation( 24 | mData="matrix", 25 | mBatches="data.frame", 26 | mCovariates="data.frame" 27 | )) 28 | 29 | setMethod("initialize", "BEA_DATA", 30 | function(.Object, 31 | theData, theBatches, theCovariates 32 | ) 33 | { 34 | .Object@mData <- theData 35 | .Object@mBatches <- theBatches 36 | .Object@mCovariates <- theCovariates 37 | .Object 38 | }) 39 | 40 | setClass("Corrections_EBNP", representation( 41 | mEBNP_DoCorrectionFlag="logical", 42 | mEBNP_Data2="BEA_DATA", 43 | mEBNP_TrimBarcodesFunction="function", 44 | mEBNP_TrimGenesFunction="function", 45 | mEBNP_RemoveRowDuplicatesFunction="function", 46 | mEBNP_RemoveColDuplicatesFunction="function", 47 | mEBNP_Data1BatchId="character", 48 | mEBNP_Data2BatchId="character", 49 | mEBNP_BatchWithZero="character", 50 | mEBNP_FixDataSet="numeric", 51 | mEBNP_CorrectForZero="logical", 52 | mEBNP_ValidationRatio="numeric" 53 | )) 54 | 55 | 56 | setClass("Logging", representation( 57 | mFile="character", 58 | mLevelNamesToLog="vector", 59 | mLevelNames="vector", 60 | mSeparator="character", 61 | mConsole="logical" 62 | )) 63 | 64 | setMethod("initialize", "Logging", 65 | function(.Object, 66 | theFile="", 67 | theLevelNamesToLog=c('DEBUG', 'TIMING', 'INFO', 'WARN', 'PERCENT', 'ERROR'), 68 | theLevelNames= c('DEBUG', 'TIMING', 'INFO', 'WARN', 'PERCENT', 'ERROR'), 69 | theSeparator=" ", 70 | theConsole=TRUE) 71 | { 72 | .Object@mFile <- theFile 73 | .Object@mLevelNamesToLog <- theLevelNamesToLog 74 | .Object@mLevelNames <- theLevelNames 75 | .Object@mSeparator <- theSeparator 76 | .Object@mConsole <- theConsole 77 | .Object 78 | }) 79 | 80 | -------------------------------------------------------------------------------- /package/R/BEA_Assessments.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | 10 | 11 | #################################################################### 12 | #################################################################### 13 | # 14 | # generateFinalImageTitle<-function(theTitle, tempCentroids, thePathSubdir) 15 | # { 16 | # ### cut off anything before the first occurance of thePathSubdir 17 | # index <- regexpr(thePathSubdir, tempCentroids) 18 | # resultString <- substr(tempCentroids,index-1, nchar(tempCentroids)) 19 | # ### get rid of '_Diagram.png', 20 | # resultString <- gsub("_Diagram.png", "", resultString, fixed=TRUE) 21 | # ### convert file underscores to spaces 22 | # ###resultString <- gsub("_", " ", resultString, fixed=TRUE) 23 | # ### add theTitle to replace what was removed 24 | # resultString <- paste(theTitle, resultString, sep=" ") 25 | # ### convert directory separators to spaces 26 | # ###resultString <- gsub(.Platform$file.sep, " ", resultString, fixed=TRUE) 27 | # ### convert directory separators to space/space 28 | # resultString <- gsub(.Platform$file.sep, paste(" ",.Platform$file.sep," ", sep=""), resultString, fixed=TRUE) 29 | # ### wrap to lengths 30 | # resultString <- breakIntoTitle(resultString, theOldChar=" ", theWidth=50) 31 | # return(resultString) 32 | # } 33 | 34 | #################################################################### 35 | #################################################################### 36 | -------------------------------------------------------------------------------- /package/R/BEA_CorrectionsMP.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | BeaMP<- function(subMatrixGeneData, subDataframeBatchData, by='Batch', overall=TRUE, theIssuesFile=NULL) 10 | { 11 | logDebug("starting BeaMP") 12 | foo <- NULL 13 | tryCatch( 14 | foo <- MP(subMatrixGeneData, convertDataFrameToSi(subDataframeBatchData), by, overall) 15 | ,error=function(e) {handleIssuesFunction(e, theIssuesFile) }) 16 | logDebug("finishing BeaMP") 17 | return(foo) 18 | } 19 | 20 | MP<-function(dat, si, by='Batch', overall=FALSE, ...) 21 | { 22 | logDebug("starting MP") 23 | if(missing(si) & overall==FALSE) 24 | { 25 | stop('sample information is needed for batch-wise median polish corrections') 26 | } 27 | if(overall==TRUE) 28 | { 29 | logDebug("MP overall") 30 | MPdat<-medpolish(dat, eps=0.0001, trace.iter=FALSE, na.rm=TRUE, ...) 31 | MPdat<-MPdat$residuals+MPdat$overall 32 | final<-MPdat 33 | } 34 | else 35 | { 36 | logDebug("MP batch") 37 | stopifnotWithLogging("Data sample names should match and be in same order as those for batch data", all(colnames(dat)==rownames(si))) 38 | stopifnotWithLogging("All requested batch types should be in batch data", all(by %in% colnames(si))) 39 | batch<-table(si[,by]) 40 | ###logDebug("MP batch 1") 41 | MPBdat<-dat 42 | ###logDebug("MP batch 2") 43 | MPBcol<-rep(NA, ncol(dat)) 44 | ###logDebug("MP batch 3") 45 | for(i in names(batch)) 46 | { 47 | ###logDebug("MP batch 4") 48 | temp<-dat[, si[,by]==i] 49 | ###logDebug("MP batch 5") 50 | temp.MP<-medpolish(temp,eps=0.0001, trace.iter=FALSE,na.rm=TRUE, ...) 51 | ###logDebug("MP batch 6") 52 | MPBdat[, si[,by]==i]<-temp.MP$residuals 53 | ###logDebug("MP batch 7") 54 | } 55 | ###logDebug("MP batch 8") 56 | all.MP<-medpolish(dat, eps=0.0001, trace.iter=FALSE,na.rm=TRUE) 57 | ###logDebug("MP batch 9") 58 | final<-MPBdat+all.MP$overall 59 | ###logDebug("MP batch 10") 60 | } 61 | return(final) 62 | } -------------------------------------------------------------------------------- /package/R/FilesAndDirs.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | 10 | ################################################################################ 11 | 12 | createDirPlusFilename<-function(theDir, ...) 13 | { 14 | file.path(theDir, paste(..., sep="", collapse="")) 15 | } 16 | 17 | createDirPath<-function(...) 18 | { 19 | file.path(...) 20 | } 21 | 22 | checkDirForCreation <- function(thePath) 23 | { 24 | if (thePath!=dirname(thePath)) 25 | { 26 | checkDirForCreation(dirname(thePath)) 27 | if (FALSE==file.exists(thePath)) 28 | { 29 | dir.create(thePath, recursive=FALSE) 30 | } 31 | } 32 | } 33 | 34 | checkCreateDir<-function(...) 35 | { 36 | myDir <-createDirPath(...) 37 | logDebug("checkCreateDir: ", myDir) 38 | checkDirForCreation(myDir) 39 | return(myDir) 40 | } 41 | 42 | ################################################################################ 43 | 44 | readAsGenericMatrix_Samples<-function(theFile) 45 | { 46 | myFile <- file(theFile, "r") 47 | on.exit(close(myFile)) 48 | myHeaderString <- readLines(con=myFile, n=1) 49 | myHeaderList <- unlist(strsplit(myHeaderString, "\t")) 50 | myHeaderList 51 | } 52 | 53 | readAsGenericMatrix <- function(theFile) 54 | { 55 | samples <- readAsGenericMatrix_Samples(theFile) 56 | samples <- samples[2:length(samples)] 57 | whatList <- lapply(0:length(samples), function(x) 58 | { 59 | if (0==x) 60 | { 61 | return(character()) 62 | } 63 | else 64 | { 65 | return(double()) 66 | } 67 | }) 68 | myScan <- scan(file=theFile, skip=1, what=whatList, quote="", 69 | sep="\t", na.strings="", flush=TRUE, fill=FALSE, multi.line=FALSE) 70 | genes <- as.vector(unlist(myScan[1])) 71 | data <- as.vector(unlist(myScan[2:length(myScan)])) 72 | temp<-matrixWithIssues(data, 73 | ncol=length(samples), 74 | nrow=length(genes), 75 | dimnames=list(make.unique(genes), make.unique(samples))) 76 | temp <- temp[,sort(colnames(temp))] 77 | temp <- temp[sort(rownames(temp)),] 78 | temp 79 | } 80 | 81 | writeAsGenericMatrix <- function(theFile, theMatrix) 82 | { 83 | write.table(theMatrix, file=theFile, quote=FALSE, sep="\t", col.names=NA, row.names=TRUE) 84 | return(TRUE) 85 | } 86 | 87 | writeAsGenericMatrixNoRows <- function(theFile, theMatrix) 88 | { 89 | write.table(theMatrix, file=theFile, quote=FALSE, sep="\t", col.names=TRUE, row.names=FALSE) 90 | return(TRUE) 91 | } 92 | 93 | readAsGenericDataframe <- function(theFile, theNaString="NA") 94 | { 95 | return( read.csv(theFile, header=TRUE, sep="\t", as.is=TRUE, check.names=FALSE, stringsAsFactors=FALSE, colClasses="character", na.strings=theNaString) ) 96 | } 97 | 98 | writeAsGenericDataframe <- function(theFile, theDataframe) 99 | { 100 | write.table(theDataframe, file=theFile, quote=FALSE, sep="\t", col.names=TRUE, row.names=FALSE) 101 | return(TRUE) 102 | } 103 | 104 | ################################################################################ 105 | -------------------------------------------------------------------------------- /package/R/zzz.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | .onAttach <- function(libname, pkgname) 10 | { 11 | packageStartupMessage(paste("All sorting in this package requires using a Sys.setlocale(\"LC_COLLATE\",\"C\").", getMBatchVersion(), sep=" ")) 12 | } 13 | -------------------------------------------------------------------------------- /package/inst/BoxplotJava/BoxplotJava.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/BoxplotJava/BoxplotJava.jar -------------------------------------------------------------------------------- /package/inst/BoxplotJava/LegendJava.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/BoxplotJava/LegendJava.jar -------------------------------------------------------------------------------- /package/inst/BoxplotJava/commons-lang3-3.3.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/BoxplotJava/commons-lang3-3.3.2.jar -------------------------------------------------------------------------------- /package/inst/BoxplotJava/commons-math3-3.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/BoxplotJava/commons-math3-3.3.jar -------------------------------------------------------------------------------- /package/inst/BoxplotJava/jcommon-1.0.17.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/BoxplotJava/jcommon-1.0.17.jar -------------------------------------------------------------------------------- /package/inst/BoxplotJava/jfreechart-1.0.14.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/BoxplotJava/jfreechart-1.0.14.jar -------------------------------------------------------------------------------- /package/inst/DscJava/DscJava.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/DscJava/DscJava.jar -------------------------------------------------------------------------------- /package/inst/DscJava/commons-lang3-3.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/DscJava/commons-lang3-3.1.jar -------------------------------------------------------------------------------- /package/inst/DscJava/commons-math3-3.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/DscJava/commons-math3-3.3.jar -------------------------------------------------------------------------------- /package/inst/LegendJava/LegendJava.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/LegendJava/LegendJava.jar -------------------------------------------------------------------------------- /package/inst/LegendJava/jcommon-1.0.17.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/LegendJava/jcommon-1.0.17.jar -------------------------------------------------------------------------------- /package/inst/LegendJava/jfreechart-1.0.14.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/LegendJava/jfreechart-1.0.14.jar -------------------------------------------------------------------------------- /package/inst/ReadRJava/ReadRJava.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/ReadRJava/ReadRJava.jar -------------------------------------------------------------------------------- /package/inst/doc/MBatch_04-06_PCA_DualBatch_Structures.R: -------------------------------------------------------------------------------- 1 | ## ---- echo=TRUE---------------------------------------------------------- 2 | { 3 | library(MBatch) 4 | 5 | # set the paths 6 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 7 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 8 | theOutputDir="/bea_testing/output/PCA_DualBatch_Structures" 9 | theRandomSeed=314 10 | 11 | # make sure the output dir exists and is empty 12 | unlink(theOutputDir, recursive=TRUE) 13 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 14 | # load data 15 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 16 | myData@mData <- mbatchTrimData(myData@mData, 100000) 17 | # trend function to handle time data 18 | isTrendBatch<-function(theBatchTypeName, theListOfBatchIds) 19 | { 20 | return(is.element(theBatchTypeName, c("ShipDate"))) 21 | } 22 | ### 23 | # do two plots PlateId versus TSS and BatchId versus TSS 24 | PCA_DualBatch_Structures(myData, 25 | "Example Title for Test Data", theOutputDir, list(), list(), 26 | theIsPcaTrendFunction=isTrendBatch, 27 | theListForDoCentroidDualBatchType=c("PlateId", "TSS", "BatchId", "TSS"), 28 | theDoDSCFlag=TRUE, theDoDscPermsFileFlag=TRUE, theDoSampleLocatorFlag=TRUE, 29 | theListOfComponentsToPlot=c(1, 2, 1, 3), theDSCPermutations=100, 30 | theDSCThreads=5, theMinBatchSize=0, 31 | theJavaParameters="-Xms1000m", theSeed=0, theMaxGeneCount=0) 32 | print(dir(theOutputDir, recursive=TRUE)) 33 | } 34 | 35 | -------------------------------------------------------------------------------- /package/inst/doc/MBatch_04-06_PCA_DualBatch_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/doc/MBatch_04-06_PCA_DualBatch_Structures.pdf -------------------------------------------------------------------------------- /package/inst/doc/MBatch_04-07_SupervisedClustering_Pairs_Structures.R: -------------------------------------------------------------------------------- 1 | ## ---- echo=TRUE---------------------------------------------------------- 2 | { 3 | library(MBatch) 4 | 5 | # set the paths 6 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 7 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 8 | theOutputDir="/bea_testing/output/SupervisedClustering_Pairs_Structures" 9 | theRandomSeed=314 10 | 11 | # make sure the output dir exists and is empty 12 | unlink(theOutputDir, recursive=TRUE) 13 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 14 | 15 | # load the data and reduce the amount of data to reduce run time 16 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 17 | myData@mData <- mbatchTrimData(myData@mData, 100000) 18 | 19 | # here, we take most defaults 20 | SupervisedClustering_Pairs_Structures(theData=myData, 21 | theTitle="Test Data Title", 22 | theOutputPath=theOutputDir, 23 | theDoHeatmapFlag=TRUE, 24 | theListOfBatchPairs=c("PlateId", "TSS", "BatchId", "TSS"), 25 | theBatchTypeAndValuePairsToRemove=list(), 26 | theBatchTypeAndValuePairsToKeep=list() ) 27 | } 28 | 29 | -------------------------------------------------------------------------------- /package/inst/doc/MBatch_04-07_SupervisedClustering_Pairs_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/doc/MBatch_04-07_SupervisedClustering_Pairs_Structures.pdf -------------------------------------------------------------------------------- /package/inst/doc/MBatch_04-08_Boxplot_AllSamplesRLE_Structures.R: -------------------------------------------------------------------------------- 1 | ## ---- echo=TRUE---------------------------------------------------------- 2 | { 3 | library(MBatch) 4 | 5 | # set the paths 6 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 7 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 8 | theOutputDir="/bea_testing/output/Boxplot_AllSamplesRLE_Structures" 9 | theRandomSeed=314 10 | 11 | # make sure the output dir exists and is empty 12 | unlink(theOutputDir, recursive=TRUE) 13 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 14 | 15 | # load the data and reduce the amount of data to reduce run time 16 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 17 | myData@mData <- mbatchTrimData(myData@mData, 100000) 18 | 19 | # here, we take most defaults 20 | Boxplot_AllSamplesRLE_Structures(myData, "Disease/Data Type/Platform/Data Level", theOutputDir, list(), list()) 21 | } 22 | 23 | -------------------------------------------------------------------------------- /package/inst/doc/MBatch_04-08_Boxplot_AllSamplesRLE_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/doc/MBatch_04-08_Boxplot_AllSamplesRLE_Structures.pdf -------------------------------------------------------------------------------- /package/inst/doc/MBatch_04-09_Boxplot_AllSamplesData_Structures.R: -------------------------------------------------------------------------------- 1 | ## ---- echo=TRUE---------------------------------------------------------- 2 | { 3 | library(MBatch) 4 | 5 | # set the paths 6 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 7 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 8 | theOutputDir="/bea_testing/output/Boxplot_AllSamplesData_Structures" 9 | theRandomSeed=314 10 | 11 | # make sure the output dir exists and is empty 12 | unlink(theOutputDir, recursive=TRUE) 13 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 14 | 15 | # load the data and reduce the amount of data to reduce run time 16 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 17 | myData@mData <- mbatchTrimData(myData@mData, 100000) 18 | 19 | # here, we take most defaults 20 | Boxplot_AllSamplesData_Structures(myData, "Disease/Data Type/Platform/Data Level", theOutputDir, list(), list()) 21 | } 22 | 23 | -------------------------------------------------------------------------------- /package/inst/doc/MBatch_04-09_Boxplot_AllSamplesData_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/doc/MBatch_04-09_Boxplot_AllSamplesData_Structures.pdf -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-05_EB_withNonParametricPriors.R: -------------------------------------------------------------------------------- 1 | ## ---- echo=TRUE---------------------------------------------------------- 2 | { 3 | library(MBatch) 4 | 5 | # set the paths 6 | invariantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-iset.tsv" 7 | variantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-vset.tsv" 8 | theOutputDir="/bea_testing/output/RBN_Pseudoreplicates" 9 | theRandomSeed=314 10 | 11 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 12 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 13 | theOutputDir="/bea_testing/output/EB_withNonParametricPriors" 14 | theRandomSeed=314 15 | theBatchType="TSS" 16 | 17 | # make sure the output dir exists and is empty 18 | unlink(theOutputDir, recursive=TRUE) 19 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 20 | # load data 21 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 22 | myData@mData <- mbatchTrimData(myData@mData, 100000) 23 | # call 24 | outputFile <- EB_withNonParametricPriors(theBeaData=myData, 25 | theBatchIdsNotToCorrect=c(""), 26 | theDoCheckPlotsFlag=TRUE, 27 | theBatchType=theBatchType, 28 | theThreads=1, 29 | thePath=theOutputDir, 30 | theWriteToFile=TRUE) 31 | correctedMatrix <- readAsGenericMatrix(outputFile) 32 | print(correctedMatrix[1:4, 1:4]) 33 | } 34 | 35 | -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-05_EB_withNonParametricPriors.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/doc/MBatch_05-05_EB_withNonParametricPriors.pdf -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-06_EB_withParametricPriors.R: -------------------------------------------------------------------------------- 1 | ## ---- echo=TRUE---------------------------------------------------------- 2 | { 3 | library(MBatch) 4 | 5 | # set the paths 6 | invariantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-iset.tsv" 7 | variantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-vset.tsv" 8 | theOutputDir="/bea_testing/output/RBN_Pseudoreplicates" 9 | theRandomSeed=314 10 | 11 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 12 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 13 | theOutputDir="/bea_testing/output/EB_withParametricPriors" 14 | theRandomSeed=314 15 | theBatchType="TSS" 16 | 17 | # make sure the output dir exists and is empty 18 | unlink(theOutputDir, recursive=TRUE) 19 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 20 | # load data 21 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 22 | myData@mData <- mbatchTrimData(myData@mData, 100000) 23 | # call 24 | outputFile <- EB_withParametricPriors(theBeaData=myData, 25 | theBatchIdsNotToCorrect=c(""), 26 | theDoCheckPlotsFlag=TRUE, 27 | theBatchType=theBatchType, 28 | theThreads=1, 29 | thePath=theOutputDir, 30 | theWriteToFile=TRUE) 31 | correctedMatrix <- readAsGenericMatrix(outputFile) 32 | print(correctedMatrix[1:4, 1:4]) 33 | } 34 | 35 | -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-06_EB_withParametricPriors.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/doc/MBatch_05-06_EB_withParametricPriors.pdf -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-07_MP_Overall.R: -------------------------------------------------------------------------------- 1 | ## ---- echo=TRUE---------------------------------------------------------- 2 | { 3 | library(MBatch) 4 | 5 | # set the paths 6 | invariantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-iset.tsv" 7 | variantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-vset.tsv" 8 | theOutputDir="/bea_testing/output/RBN_Pseudoreplicates" 9 | theRandomSeed=314 10 | 11 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 12 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 13 | theOutputDir="/bea_testing/output/MP_Overall" 14 | theRandomSeed=314 15 | theBatchType="TSS" 16 | 17 | # make sure the output dir exists and is empty 18 | unlink(theOutputDir, recursive=TRUE) 19 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 20 | # load data 21 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 22 | myData@mData <- mbatchTrimData(myData@mData, 100000) 23 | # call 24 | outputFile <- MP_Overall(theBeaData=myData, 25 | thePath=theOutputDir, 26 | theWriteToFile=TRUE) 27 | correctedMatrix <- readAsGenericMatrix(outputFile) 28 | print(correctedMatrix[1:4, 1:4]) 29 | } 30 | 31 | -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-07_MP_Overall.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using MBatch Corrections: MP_Overall" 3 | author: "Tod Casasent" 4 | date: "`r Sys.Date()`" 5 | #output: rmarkdown::html_vignette 6 | # install with vignettes using devtools::install(build_vignettes = TRUE) 7 | # build vignettes using devtools::build_vignettes() (inst/doc) 8 | output: 9 | pdf_document: 10 | number_sections: TRUE 11 | vignette: > 12 | %\VignetteIndexEntry{Using MBatch Corrections: MP_Overall} 13 | %\VignetteEngine{knitr::rmarkdown} 14 | %\VignetteEncoding{UTF-8} 15 | --- 16 | 17 | # Introduction 18 | 19 | These instructions are aimed at people familiar with R and familiar with TCGA/GDC platforms and data types. They are intended to introduce the reader to producing the given assessment. These instructions will only rarely, if ever, touch on the appropriateness of the assessment algorithm or interpretation of output. See MBatch_01_InstallLinux for instructions on downloading test data. 20 | 21 | # Algorithm 22 | 23 | MP Overall performs a Median Polish Overall correction taking a BEA_DATA object (with data matrix and batch dataframe) and returning either a corrected matrix or a string containing the path to where the data file was written. 24 | 25 | # Output 26 | 27 | The primary output method for MBatch is to view results in the Batch Effects Website. Correction algorithms generally do not create graphical output and instead create TSV output files. 28 | 29 | # Usage 30 | 31 | MP_Overall(theBeaData, thePath = NULL, theWriteToFile = FALSE) 32 | 33 | # Arguments 34 | 35 | ## theBeaData 36 | 37 | BEA_DATA objects can be created by calls of the form new("BEA_DATA", theData, theBatches, theCovariates). If you have no covariate data, use an empty data.frame created with data.frame() 38 | 39 | mData: 40 | Object of class "matrix" A matrix where the colnames are sample ids and the rownames are gene equivalents. All names should be strings, not factors. 41 | 42 | mBatches: 43 | Object of class "data.frame" A data.frame where the column "names" are batch types. The first batch "type" is "Sample". All names and values should be strings, not factors or numeric. 44 | 45 | mCovariates: 46 | Object of class "data.frame" A data.frame where the column "names" are covariate types. The first covariate "type" is "Sample". All names and values should be strings, not factors or numeric. 47 | 48 | ## thePath 49 | Output path for any files. 50 | 51 | ## theWriteToFile 52 | TRUE to write the corrected data to file and return the file pathname instead of the corrected matrix. 53 | 54 | # Example Call 55 | 56 | The following code is adapted from the tests/MP_Overall.R file. Data used is from the testing data as per the MBatch_01_InstallLinux document. In the future, we plan to make the output from MBatch more user friendly, but currently, this produces the following output at the command line. 57 | 58 | ```{r, echo=TRUE} 59 | { 60 | library(MBatch) 61 | 62 | # set the paths 63 | invariantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-iset.tsv" 64 | variantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-vset.tsv" 65 | theOutputDir="/bea_testing/output/RBN_Pseudoreplicates" 66 | theRandomSeed=314 67 | 68 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 69 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 70 | theOutputDir="/bea_testing/output/MP_Overall" 71 | theRandomSeed=314 72 | theBatchType="TSS" 73 | 74 | # make sure the output dir exists and is empty 75 | unlink(theOutputDir, recursive=TRUE) 76 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 77 | # load data 78 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 79 | myData@mData <- mbatchTrimData(myData@mData, 100000) 80 | # call 81 | outputFile <- MP_Overall(theBeaData=myData, 82 | thePath=theOutputDir, 83 | theWriteToFile=TRUE) 84 | correctedMatrix <- readAsGenericMatrix(outputFile) 85 | print(correctedMatrix[1:4, 1:4]) 86 | } 87 | ``` 88 | 89 | # Example File Output 90 | 91 | The above code creates the following output file. File is named using the following naming convention: 92 | ANY_Corrections-MPOverall.tsv 93 | The TSV file with the corrected dataset is written by the MBatch package. The end of the output shows a snippet from the corrected matrix. 94 | -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-07_MP_Overall.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/doc/MBatch_05-07_MP_Overall.pdf -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-08_MP_ByBatch.R: -------------------------------------------------------------------------------- 1 | ## ---- echo=TRUE---------------------------------------------------------- 2 | { 3 | library(MBatch) 4 | 5 | # set the paths 6 | invariantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-iset.tsv" 7 | variantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-vset.tsv" 8 | theOutputDir="/bea_testing/output/RBN_Pseudoreplicates" 9 | theRandomSeed=314 10 | 11 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 12 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 13 | theOutputDir="/bea_testing/output/MP_ByBatch" 14 | theRandomSeed=314 15 | theBatchType="TSS" 16 | 17 | # make sure the output dir exists and is empty 18 | unlink(theOutputDir, recursive=TRUE) 19 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 20 | # load data 21 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 22 | myData@mData <- mbatchTrimData(myData@mData, 100000) 23 | # call 24 | outputFile <- MP_ByBatch(theBeaData=myData, 25 | theBatchType=theBatchType, 26 | thePath=theOutputDir, 27 | theWriteToFile=TRUE) 28 | correctedMatrix <- readAsGenericMatrix(outputFile) 29 | print(correctedMatrix[1:4, 1:4]) 30 | } 31 | 32 | -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-08_MP_ByBatch.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using MBatch Corrections: MP_ByBatch" 3 | author: "Tod Casasent" 4 | date: "`r Sys.Date()`" 5 | #output: rmarkdown::html_vignette 6 | # install with vignettes using devtools::install(build_vignettes = TRUE) 7 | # build vignettes using devtools::build_vignettes() (inst/doc) 8 | output: 9 | pdf_document: 10 | number_sections: TRUE 11 | vignette: > 12 | %\VignetteIndexEntry{Using MBatch Corrections: MP_ByBatch} 13 | %\VignetteEngine{knitr::rmarkdown} 14 | %\VignetteEncoding{UTF-8} 15 | --- 16 | 17 | # Introduction 18 | 19 | These instructions are aimed at people familiar with R and familiar with TCGA/GDC platforms and data types. They are intended to introduce the reader to producing the given assessment. These instructions will only rarely, if ever, touch on the appropriateness of the assessment algorithm or interpretation of output. See MBatch_01_InstallLinux for instructions on downloading test data. 20 | 21 | # Algorithm 22 | 23 | MP Overall performs a Median Polish Overall correction taking a BEA_DATA object (with data matrix and batch dataframe) and returning either a corrected matrix or a string containing the path to where the data file was written. 24 | 25 | # Output 26 | 27 | The primary output method for MBatch is to view results in the Batch Effects Website. Correction algorithms generally do not create graphical output and instead create TSV output files. 28 | 29 | # Usage 30 | 31 | MP_ByBatch(theBeaData, theBatchType, thePath = NULL, theWriteToFile = FALSE) 32 | 33 | # Arguments 34 | 35 | ## theBeaData 36 | 37 | BEA_DATA objects can be created by calls of the form new("BEA_DATA", theData, theBatches, theCovariates). If you have no covariate data, use an empty data.frame created with data.frame() 38 | 39 | mData: 40 | Object of class "matrix" A matrix where the colnames are sample ids and the rownames are gene equivalents. All names should be strings, not factors. 41 | 42 | mBatches: 43 | Object of class "data.frame" A data.frame where the column "names" are batch types. The first batch "type" is "Sample". All names and values should be strings, not factors or numeric. 44 | 45 | mCovariates: 46 | Object of class "data.frame" A data.frame where the column "names" are covariate types. The first covariate "type" is "Sample". All names and values should be strings, not factors or numeric. 47 | 48 | ## theBatchType 49 | A string identifying the batch type to correct. 50 | 51 | ## thePath 52 | Output path for any files. 53 | 54 | ## theWriteToFile 55 | TRUE to write the corrected data to file and return the file pathname instead of the corrected matrix. 56 | 57 | # Example Call 58 | 59 | The following code is adapted from the tests/MP_ByBatch.R file. Data used is from the testing data as per the MBatch_01_InstallLinux document. In the future, we plan to make the output from MBatch more user friendly, but currently, this produces the following output at the command line. 60 | 61 | ```{r, echo=TRUE} 62 | { 63 | library(MBatch) 64 | 65 | # set the paths 66 | invariantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-iset.tsv" 67 | variantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-vset.tsv" 68 | theOutputDir="/bea_testing/output/RBN_Pseudoreplicates" 69 | theRandomSeed=314 70 | 71 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 72 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 73 | theOutputDir="/bea_testing/output/MP_ByBatch" 74 | theRandomSeed=314 75 | theBatchType="TSS" 76 | 77 | # make sure the output dir exists and is empty 78 | unlink(theOutputDir, recursive=TRUE) 79 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 80 | # load data 81 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 82 | myData@mData <- mbatchTrimData(myData@mData, 100000) 83 | # call 84 | outputFile <- MP_ByBatch(theBeaData=myData, 85 | theBatchType=theBatchType, 86 | thePath=theOutputDir, 87 | theWriteToFile=TRUE) 88 | correctedMatrix <- readAsGenericMatrix(outputFile) 89 | print(correctedMatrix[1:4, 1:4]) 90 | } 91 | ``` 92 | 93 | # Example File Output 94 | 95 | The above code creates the following output file. File is named using the following naming convention: 96 | ANY_Corrections-MPByBatch.tsv 97 | The TSV file with the corrected dataset is written by the MBatch package. The end of the output shows a snippet from the corrected matrix. 98 | -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-08_MP_ByBatch.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/doc/MBatch_05-08_MP_ByBatch.pdf -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-09_AN_Adjusted.R: -------------------------------------------------------------------------------- 1 | ## ---- echo=TRUE---------------------------------------------------------- 2 | { 3 | library(MBatch) 4 | 5 | # set the paths 6 | invariantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-iset.tsv" 7 | variantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-vset.tsv" 8 | theOutputDir="/bea_testing/output/RBN_Pseudoreplicates" 9 | theRandomSeed=314 10 | 11 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 12 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 13 | theOutputDir="/bea_testing/output/AN_Adjusted" 14 | theRandomSeed=314 15 | theBatchType="TSS" 16 | 17 | # make sure the output dir exists and is empty 18 | unlink(theOutputDir, recursive=TRUE) 19 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 20 | # load data 21 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 22 | myData@mData <- mbatchTrimData(myData@mData, 100000) 23 | # call 24 | outputFile <- AN_Adjusted(theBeaData=myData, 25 | theBatchType=theBatchType, 26 | thePath=theOutputDir, 27 | theWriteToFile=TRUE) 28 | correctedMatrix <- readAsGenericMatrix(outputFile) 29 | print(correctedMatrix[1:4, 1:4]) 30 | } 31 | 32 | -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-09_AN_Adjusted.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using MBatch Corrections: AN_Adjusted" 3 | author: "Tod Casasent" 4 | date: "`r Sys.Date()`" 5 | #output: rmarkdown::html_vignette 6 | # install with vignettes using devtools::install(build_vignettes = TRUE) 7 | # build vignettes using devtools::build_vignettes() (inst/doc) 8 | output: 9 | pdf_document: 10 | number_sections: TRUE 11 | vignette: > 12 | %\VignetteIndexEntry{Using MBatch Corrections: AN_Adjusted} 13 | %\VignetteEngine{knitr::rmarkdown} 14 | %\VignetteEncoding{UTF-8} 15 | --- 16 | 17 | # Introduction 18 | 19 | These instructions are aimed at people familiar with R and familiar with TCGA/GDC platforms and data types. They are intended to introduce the reader to producing the given assessment. These instructions will only rarely, if ever, touch on the appropriateness of the assessment algorithm or interpretation of output. See MBatch_01_InstallLinux for instructions on downloading test data. 20 | 21 | # Algorithm 22 | 23 | AN Adjusted performs an ANOVA Adjusted correction taking a BEA_DATA object (with data matrix and batch dataframe) and returning either a corrected matrix or a string containing the path to where the data file was written. 24 | 25 | # Output 26 | 27 | The primary output method for MBatch is to view results in the Batch Effects Website. Correction algorithms generally do not create graphical output and instead create TSV output files. 28 | 29 | # Usage 30 | 31 | AN_Adjusted(theBeaData, theBatchType, thePath = NULL, theWriteToFile = FALSE) 32 | 33 | # Arguments 34 | 35 | ## theBeaData 36 | 37 | BEA_DATA objects can be created by calls of the form new("BEA_DATA", theData, theBatches, theCovariates). If you have no covariate data, use an empty data.frame created with data.frame() 38 | 39 | mData: 40 | Object of class "matrix" A matrix where the colnames are sample ids and the rownames are gene equivalents. All names should be strings, not factors. 41 | 42 | mBatches: 43 | Object of class "data.frame" A data.frame where the column "names" are batch types. The first batch "type" is "Sample". All names and values should be strings, not factors or numeric. 44 | 45 | mCovariates: 46 | Object of class "data.frame" A data.frame where the column "names" are covariate types. The first covariate "type" is "Sample". All names and values should be strings, not factors or numeric. 47 | 48 | ## theBatchType 49 | A string identifying the batch type to correct. 50 | 51 | ## thePath 52 | Output path for any files. 53 | 54 | ## theWriteToFile 55 | TRUE to write the corrected data to file and return the file pathname instead of the corrected matrix. 56 | 57 | # Example Call 58 | 59 | The following code is adapted from the tests/AN_Adjusted.R file. Data used is from the testing data as per the MBatch_01_InstallLinux document. In the future, we plan to make the output from MBatch more user friendly, but currently, this produces the following output at the command line. 60 | 61 | ```{r, echo=TRUE} 62 | { 63 | library(MBatch) 64 | 65 | # set the paths 66 | invariantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-iset.tsv" 67 | variantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-vset.tsv" 68 | theOutputDir="/bea_testing/output/RBN_Pseudoreplicates" 69 | theRandomSeed=314 70 | 71 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 72 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 73 | theOutputDir="/bea_testing/output/AN_Adjusted" 74 | theRandomSeed=314 75 | theBatchType="TSS" 76 | 77 | # make sure the output dir exists and is empty 78 | unlink(theOutputDir, recursive=TRUE) 79 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 80 | # load data 81 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 82 | myData@mData <- mbatchTrimData(myData@mData, 100000) 83 | # call 84 | outputFile <- AN_Adjusted(theBeaData=myData, 85 | theBatchType=theBatchType, 86 | thePath=theOutputDir, 87 | theWriteToFile=TRUE) 88 | correctedMatrix <- readAsGenericMatrix(outputFile) 89 | print(correctedMatrix[1:4, 1:4]) 90 | } 91 | ``` 92 | 93 | # Example File Output 94 | 95 | The above code creates the following output file. File is named using the following naming convention: 96 | ANY_Corrections-ANAdjusted.tsv 97 | The TSV file with the corrected dataset is written by the MBatch package. The end of the output shows a snippet from the corrected matrix. 98 | -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-09_AN_Adjusted.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/doc/MBatch_05-09_AN_Adjusted.pdf -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-10_AN_Unadjusted.R: -------------------------------------------------------------------------------- 1 | ## ---- echo=TRUE---------------------------------------------------------- 2 | { 3 | library(MBatch) 4 | 5 | # set the paths 6 | invariantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-iset.tsv" 7 | variantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-vset.tsv" 8 | theOutputDir="/bea_testing/output/RBN_Pseudoreplicates" 9 | theRandomSeed=314 10 | 11 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 12 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 13 | theOutputDir="/bea_testing/output/AN_Unadjusted" 14 | theRandomSeed=314 15 | theBatchType="TSS" 16 | 17 | # make sure the output dir exists and is empty 18 | unlink(theOutputDir, recursive=TRUE) 19 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 20 | # load data 21 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 22 | myData@mData <- mbatchTrimData(myData@mData, 100000) 23 | # call 24 | outputFile <- AN_Unadjusted(theBeaData=myData, 25 | theBatchType=theBatchType, 26 | thePath=theOutputDir, 27 | theWriteToFile=TRUE) 28 | correctedMatrix <- readAsGenericMatrix(outputFile) 29 | print(correctedMatrix[1:4, 1:4]) 30 | } 31 | 32 | -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-10_AN_Unadjusted.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/doc/MBatch_05-10_AN_Unadjusted.pdf -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-11_EBNPlus_CheckData_Structures.R: -------------------------------------------------------------------------------- 1 | ## ---- echo=TRUE---------------------------------------------------------- 2 | { 3 | library(MBatch) 4 | 5 | # set the paths 6 | theDataFile1="/bea_testing/MATRIX_DATA/brca_rnaseq2_matrix_data.tsv" 7 | theDataFile2="/bea_testing/MATRIX_DATA/brca_agi4502_matrix_data.tsv" 8 | 9 | # trim genes to get just gene symbols from standardized data 10 | trimGenes <- function(theGenes) 11 | { 12 | foo <- as.vector(unlist( 13 | sapply(theGenes, function(theGene) 14 | { 15 | # keep the same if it starts with ? 16 | if (TRUE==grepl("^[?]+", theGene)) 17 | { 18 | return(theGene) 19 | } 20 | else 21 | { 22 | # split on the | and take the first argument 23 | # this makes no change if no pipe 24 | return(strsplit(theGene, "|", fixed=TRUE)[[1]][1]) 25 | } 26 | }) 27 | )) 28 | foo 29 | } 30 | 31 | # remove duplicates from columns (samples) 32 | removeDuplicatesFromColumns <- function(theMatrix) 33 | { 34 | indexOfDuplicates <- which(duplicated(colnames(theMatrix))) 35 | if (length(indexOfDuplicates) > 0) 36 | { 37 | # minus sign uses inverse of indexes 38 | theMatrix <- theMatrix[ ,-indexOfDuplicates] 39 | } 40 | return(theMatrix) 41 | } 42 | 43 | # remove duplicates from rows (genes/probes) 44 | removeDuplicatesFromRows <- function(theMatrix) 45 | { 46 | indexOfDuplicates <- which(duplicated(rownames(theMatrix))) 47 | if (length(indexOfDuplicates) > 0) 48 | { 49 | # minus sign uses inverse of indexes 50 | theMatrix <- theMatrix[-indexOfDuplicates, ] 51 | } 52 | return(theMatrix) 53 | } 54 | 55 | if ((!dir.exists(theDataFile1))&&(!dir.exists(theDataFile2))) 56 | { 57 | warnLevel<-getOption("warn") 58 | on.exit(options(warn=warnLevel)) 59 | # warnings are errors 60 | options(warn=3) 61 | # if there is a warning, show the calls leading up to it 62 | options(showWarnCalls=TRUE) 63 | # if there is an error, show the calls leading up to it 64 | options(showErrorCalls=TRUE) 65 | # 66 | # read the files in. This can be done however you want 67 | theDataMatrix1 <- readAsGenericMatrix(theDataFile1) 68 | theDataMatrix2 <- readAsGenericMatrix(theDataFile2) 69 | # this is the reduce genes to just gene symbols, handling those from standardized data 70 | rownames(theDataMatrix1) <- trimGenes(rownames(theDataMatrix1)) 71 | rownames(theDataMatrix2) <- trimGenes(rownames(theDataMatrix2)) 72 | # remove any duplicates (this is a requirement for EBNplus) 73 | theDataMatrix1 <- removeDuplicatesFromColumns(removeDuplicatesFromRows(theDataMatrix1)) 74 | theDataMatrix2 <- removeDuplicatesFromColumns(removeDuplicatesFromRows(theDataMatrix2)) 75 | print("Is this data acceptable?") 76 | EBNPlus_CheckData_Structures(theDataMatrix1, theDataMatrix2) 77 | print("If you see this, it is.") 78 | } 79 | } 80 | 81 | -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-11_EBNPlus_CheckData_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/doc/MBatch_05-11_EBNPlus_CheckData_Structures.pdf -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-12_EBNPlus_TrainAndValidateReplicates_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/doc/MBatch_05-12_EBNPlus_TrainAndValidateReplicates_Structures.pdf -------------------------------------------------------------------------------- /package/inst/doc/MBatch_05-13_EBNPlus_TrainAndValidateFromVector_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/inst/doc/MBatch_05-13_EBNPlus_TrainAndValidateFromVector_Structures.pdf -------------------------------------------------------------------------------- /package/man/AN_Adjusted.Rd: -------------------------------------------------------------------------------- 1 | \name{AN_Adjusted} 2 | \alias{AN_Adjusted} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{AN_Adjusted} 5 | \description{ 6 | Perform ANOVA Adjusted corrections. 7 | } 8 | \usage{ 9 | AN_Adjusted(theBeaData, theBatchType, thePath = NULL, theWriteToFile = FALSE) 10 | } 11 | %- maybe also 'usage' for other objects documented here. 12 | \arguments{ 13 | \item{theBeaData}{An instance of BEA_DATA \code{\link{BEA_DATA-class}}} 14 | \item{theBatchType}{A string identifying the batch type to correct.} 15 | \item{thePath}{Output path for any files.} 16 | \item{theWriteToFile}{TRUE to write the corrected data to file and return 17 | the file pathname instead of the corrected matrix.} 18 | } 19 | \details{ 20 | Perform ANOVA adjusted correction for the given batch type. 21 | Returning the corrected matrix or a path to the filename. 22 | } 23 | \value{ 24 | Return the corrected matrix (columns are sample ids and rows are features) 25 | or a path to the filename. 26 | } 27 | \references{} 28 | \author{} 29 | \note{} 30 | \seealso{} 31 | \examples{} 32 | % Add one or more standard keywords, see file 'KEYWORDS' in the 33 | % R documentation directory. 34 | \keyword{ ~kwd1 }% use one of RShowDoc("KEYWORDS") 35 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 36 | -------------------------------------------------------------------------------- /package/man/AN_Unadjusted.Rd: -------------------------------------------------------------------------------- 1 | \name{AN_Unadjusted} 2 | \alias{AN_Unadjusted} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{AN_Unadjusted} 5 | \description{ 6 | Perform ANOVA Unadjusted corrections. 7 | } 8 | \usage{ 9 | AN_Unadjusted(theBeaData, theBatchType, thePath = NULL, theWriteToFile = FALSE) 10 | } 11 | %- maybe also 'usage' for other objects documented here. 12 | \arguments{ 13 | \item{theBeaData}{An instance of BEA_DATA \code{\link{BEA_DATA-class}}} 14 | \item{theBatchType}{A string identifying the batch type to correct.} 15 | \item{thePath}{Output path for any files.} 16 | \item{theWriteToFile}{TRUE to write the corrected data to file and return 17 | the file pathname instead of the corrected matrix.} 18 | } 19 | \details{ 20 | Perform ANOVA unadjusted correction for the given batch type. 21 | Returning the corrected matrix or a path to the filename. 22 | } 23 | \value{ 24 | Return the corrected matrix (columns are sample ids and rows are features) 25 | or a path to the filename. 26 | } 27 | \references{} 28 | \author{} 29 | \note{} 30 | \seealso{} 31 | \examples{} 32 | % Add one or more standard keywords, see file 'KEYWORDS' in the 33 | % R documentation directory. 34 | \keyword{ ~kwd1 }% use one of RShowDoc("KEYWORDS") 35 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 36 | -------------------------------------------------------------------------------- /package/man/BEA_DATA-class.Rd: -------------------------------------------------------------------------------- 1 | \name{BEA_DATA-class} 2 | \Rdversion{1.1} 3 | \docType{class} 4 | \alias{BEA_DATA-class} 5 | \alias{initialize,BEA_DATA-method} 6 | 7 | \title{Class \code{"BEA_DATA"}} 8 | \description{ 9 | An S4 class, BEA_DATA provides slots for the MBatch data matrix, batch data.frame and covariate data.frame. 10 | } 11 | \section{Objects from the Class}{ 12 | Objects can be created by calls of the form \code{new("BEA_DATA", theData, theBatches, theCovariates)}. 13 | See the Slots section for a description of the objects. 14 | If you have no covariate data, use an empty data.frame created with \code{data.frame()} 15 | } 16 | \section{Slots}{ 17 | \describe{ 18 | \item{\code{mData}:}{Object of class \code{"matrix"} A matrix where the colnames are sample ids and the rownames are gene equivalents. All names should be strings, not factors. } 19 | \item{\code{mBatches}:}{Object of class \code{"data.frame"} A data.frame where the column "names" are batch types. The first batch "type" is "Sample". All names and values should be strings, not factors or numeric. } 20 | \item{\code{mCovariates}:}{Object of class \code{"data.frame"} A data.frame where the column "names" are covariate types. The first covariate "type" is "Sample". All names and values should be strings, not factors or numeric. } 21 | } 22 | } 23 | \section{Methods}{ 24 | \describe{ 25 | \item{initialize}{\code{signature(.Object = "BEA_DATA")}: ... } 26 | } 27 | } 28 | \references{ 29 | %% ~~put references to the literature/web site here~~ 30 | } 31 | \author{ 32 | Tod Casasent \email{tdcasasent@mdanderson.org} 33 | } 34 | \note{ 35 | %% ~~further notes~~ 36 | } 37 | 38 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 39 | 40 | \seealso{ 41 | %% ~~objects to See Also as \code{\link{~~fun~~}}, ~~~ 42 | %% ~~or \code{\linkS4class{CLASSNAME}} for links to other classes ~~~ 43 | } 44 | \examples{ 45 | #showClass("BEA_DATA") 46 | } 47 | \keyword{classes} 48 | -------------------------------------------------------------------------------- /package/man/Boxplot_AllSamplesData_Structures.Rd: -------------------------------------------------------------------------------- 1 | \name{Boxplot_AllSamplesData_Structures} 2 | \alias{Boxplot_AllSamplesData_Structures} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{Boxplot_AllSamplesData_Structures} 5 | \description{ 6 | A function to perform Box Plot all samples and all data analysis from data in Structures. 7 | } 8 | \usage{ 9 | Boxplot_AllSamplesData_Structures (theData, theTitle, theOutputPath, 10 | theBatchTypeAndValuePairsToRemove, theBatchTypeAndValuePairsToKeep, 11 | theMaxGeneCount=20000, theJavaParameters = "-Xms8000m") 12 | } 13 | %- maybe also 'usage' for other objects documented here. 14 | \arguments{ 15 | \item{theData}{An instance of BEA_DATA \code{\link{BEA_DATA-class}}} 16 | \item{theTitle}{Object of class \code{"character"} Title to use in PNG files. } 17 | \item{theOutputPath}{Object of class \code{"character"} Directory in which to place output PNG 18 | files. } 19 | \item{theBatchTypeAndValuePairsToRemove}{Object of class \code{"list"} A list of vectors 20 | containing the batch type (or * for all types) and the value to remove. list() indicates 21 | none while NULL will cause an error. } 22 | \item{theBatchTypeAndValuePairsToKeep}{Object of class \code{"list"} A list of vectors 23 | containing the batch type (or * for all types) and a vector of the the value(s) to keep. 24 | list() indicates none while NULL will cause an error. } 25 | \item{theMaxGeneCount}{Integer giving maximum number of features (genes) to keep. Default is 20000. 0 means keep all.} 26 | \item{theJavaParameters}{Object of class \code{"character"} String for initializing JVM. 27 | Defaults to Xms8000m. } 28 | } 29 | \details{ 30 | %% ~~ If necessary, more details than the description above ~~ 31 | } 32 | \value{ 33 | %% ~Describe the value returned 34 | %% If it is a LIST, use 35 | %% \item{comp1 }{Description of 'comp1'} 36 | %% \item{comp2 }{Description of 'comp2'} 37 | %% ... 38 | } 39 | \references{ 40 | %% ~put references to the literature/web site here ~ 41 | } 42 | \author{ 43 | %% ~~who you are~~ 44 | } 45 | \note{ 46 | %% ~~further notes~~ 47 | } 48 | 49 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 50 | 51 | \seealso{ 52 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 53 | } 54 | \examples{ 55 | # ############################################################################# 56 | # mydir <- file.path(getwd(), "working", "Boxplot_AllSamplesData_Structures") 57 | # dir.create(mydir, recursive=TRUE) 58 | # setLogging(new("Logging", theFile=file.path(mydir, "mbatch.log"))) 59 | # ## path for output 60 | # myOutputPath <- file.path(mydir, "output") 61 | # dir.create(myOutputPath, recursive=TRUE) 62 | # ############################################################################# 63 | # ## this is the title used for the images 64 | # myTitle <- "Disease/Data Type/Platform/Data Level" 65 | # ############################################################################# 66 | # # mbatchLoadFiles 67 | # ############################################################################# 68 | # ## This is the single gene data file to be read and processed 69 | # myGeneDataFile <- system.file("extdata", "matrix_data.tsv", package="MBatch") 70 | # ## This is the single batch file to be read and processed 71 | # myBatchFile <- system.file("extdata", "batches.tsv", package="MBatch") 72 | # ## This is the single batch file to be read and processed 73 | # myCovariateFile <- system.file("extdata", "covariates.tsv", package="MBatch") 74 | # ## this loads the files 75 | # myData <- mbatchLoadFiles(myGeneDataFile, myBatchFile, myCovariateFile) 76 | # ### 77 | # 78 | # Boxplot_AllSamplesData_Structures(myData, myTitle, myOutputPath, list(), list()) 79 | } 80 | % Add one or more standard keywords, see file 'KEYWORDS' in the 81 | % R documentation directory. 82 | \keyword{ ~kwd1 } 83 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 84 | -------------------------------------------------------------------------------- /package/man/Boxplot_AllSamplesRLE_Structures.Rd: -------------------------------------------------------------------------------- 1 | \name{Boxplot_AllSamplesRLE_Structures} 2 | \alias{Boxplot_AllSamplesRLE_Structures} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{Boxplot_AllSamplesRLE_Structures} 5 | \description{ 6 | A function to perform Box Plot RLE all data analysis from data in files. 7 | } 8 | \usage{ 9 | Boxplot_AllSamplesRLE_Structures(theData, theTitle, theOutputPath, 10 | theBatchTypeAndValuePairsToRemove, theBatchTypeAndValuePairsToKeep, 11 | theMaxGeneCount=20000, theJavaParameters = "-Xms8000m") 12 | } 13 | %- maybe also 'usage' for other objects documented here. 14 | \arguments{ 15 | \item{theData}{An instance of BEA_DATA \code{\link{BEA_DATA-class}}} 16 | \item{theTitle}{Object of class \code{"character"} Title to use in PNG files. } 17 | \item{theOutputPath}{Object of class \code{"character"} 18 | Directory in which to place output PNG files. } 19 | \item{theBatchTypeAndValuePairsToRemove}{Object of class \code{"list"} A list of vectors 20 | containing the batch type (or * for all types) and the value to remove. list() indicates 21 | none while NULL will cause an error. } 22 | \item{theBatchTypeAndValuePairsToKeep}{Object of class \code{"list"} A list of vectors 23 | containing the batch type (or * for all types) and a vector of the the value(s) to keep. 24 | list() indicates none while NULL will cause an error. } 25 | \item{theMaxGeneCount}{Integer giving maximum number of features (genes) to keep. Default is 20000. 0 means keep all.} 26 | \item{theJavaParameters}{Object of class \code{"character"} String for initializing JVM. 27 | Defaults to Xms8000m. } 28 | } 29 | \details{ 30 | %% ~~ If necessary, more details than the description above ~~ 31 | } 32 | \value{ 33 | %% ~Describe the value returned 34 | %% If it is a LIST, use 35 | %% \item{comp1 }{Description of 'comp1'} 36 | %% \item{comp2 }{Description of 'comp2'} 37 | %% ... 38 | } 39 | \references{ 40 | %% ~put references to the literature/web site here ~ 41 | } 42 | \author{ 43 | %% ~~who you are~~ 44 | } 45 | \note{ 46 | %% ~~further notes~~ 47 | } 48 | 49 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 50 | 51 | \seealso{ 52 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 53 | } 54 | \examples{ 55 | # ############################################################################# 56 | # mydir <- file.path(getwd(), "working", "Boxplot_AllSamplesRLE_Structures") 57 | # dir.create(mydir, recursive=TRUE) 58 | # setLogging(new("Logging", theFile=file.path(mydir, "mbatch.log"))) 59 | # ## path for output 60 | # myOutputPath <- file.path(mydir, "output") 61 | # dir.create(myOutputPath, recursive=TRUE) 62 | # ############################################################################# 63 | # ## this is the title used for the images 64 | # myTitle <- "Disease/Data Type/Platform/Data Level" 65 | # ############################################################################# 66 | # # mbatchLoadFiles 67 | # ############################################################################# 68 | # ## This is the single gene data file to be read and processed 69 | # myGeneDataFile <- system.file("extdata", "matrix_data.tsv", package="MBatch") 70 | # ## This is the single batch file to be read and processed 71 | # myBatchFile <- system.file("extdata", "batches.tsv", package="MBatch") 72 | # ## This is the single batch file to be read and processed 73 | # myCovariateFile <- system.file("extdata", "covariates.tsv", package="MBatch") 74 | # ## this loads the files 75 | # myData <- mbatchLoadFiles(myGeneDataFile, myBatchFile, myCovariateFile) 76 | # ### 77 | # 78 | # Boxplot_AllSamplesRLE_Structures(myData, myTitle, myOutputPath, list(), list()) 79 | } 80 | % Add one or more standard keywords, see file 'KEYWORDS' in the 81 | % R documentation directory. 82 | \keyword{ ~kwd1 } 83 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 84 | -------------------------------------------------------------------------------- /package/man/Boxplot_Group_Structures.Rd: -------------------------------------------------------------------------------- 1 | \name{Boxplot_Group_Structures} 2 | \alias{Boxplot_Group_Structures} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{Boxplot_Group_Structures} 5 | \description{ 6 | A function to perform Box Plot group analysis from data in files. 7 | } 8 | \usage{ 9 | Boxplot_Group_Structures(theData,theTitle, theOutputPath, 10 | theBatchTypeAndValuePairsToRemove, theBatchTypeAndValuePairsToKeep, 11 | theListOfGroupBoxFunction, theListOfGroupBoxLabels, 12 | theMaxGeneCount=20000, theJavaParameters = "-Xms8000m") 13 | } 14 | %- maybe also 'usage' for other objects documented here. 15 | \arguments{ 16 | \item{theData}{An instance of BEA_DATA \code{\link{BEA_DATA-class}}} 17 | \item{theTitle}{Object of class \code{"character"} 18 | Title to use in PNG files. } 19 | \item{theOutputPath}{Object of class \code{"character"} 20 | Directory in which to place output PNG files. } 21 | \item{theBatchTypeAndValuePairsToRemove}{Object of class \code{"list"} 22 | A list of vectors containing the batch type (or * for all types) and 23 | the value to remove. list() indicates none while NULL will cause an error. } 24 | \item{theBatchTypeAndValuePairsToKeep}{Object of class \code{"list"} 25 | A list of vectors containing the batch type (or * for all types) and 26 | a vector of the the value(s) to keep. 27 | list() indicates none while NULL will cause an error. } 28 | \item{theListOfGroupBoxFunction}{Object of class \code{"vector"} 29 | A list of functions to use for group assessments. } 30 | \item{theListOfGroupBoxLabels}{Object of class \code{"vector"} 31 | A list of strings giving strings for labels to use for group assessments. } 32 | \item{theMaxGeneCount}{Integer giving maximum number of features (genes) to keep. Default is 20000. 0 means keep all.} 33 | \item{theJavaParameters}{Object of class \code{"character"} 34 | String for initializing JVM. Defaults to Xms8000m. } 35 | } 36 | \details{ 37 | %% ~~ If necessary, more details than the description above ~~ 38 | } 39 | \value{ 40 | %% ~Describe the value returned 41 | %% If it is a LIST, use 42 | %% \item{comp1 }{Description of 'comp1'} 43 | %% \item{comp2 }{Description of 'comp2'} 44 | %% ... 45 | } 46 | \references{ 47 | %% ~put references to the literature/web site here ~ 48 | } 49 | \author{ 50 | %% ~~who you are~~ 51 | } 52 | \note{ 53 | %% ~~further notes~~ 54 | } 55 | 56 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 57 | 58 | \seealso{ 59 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 60 | } 61 | \examples{ 62 | # ############################################################################# 63 | # mydir <- file.path(getwd(), "working", "Boxplot_Group_Structures") 64 | # dir.create(mydir, recursive=TRUE) 65 | # setLogging(new("Logging", theFile=file.path(mydir, "mbatch.log"))) 66 | # ## path for output 67 | # myOutputPath <- file.path(mydir, "output") 68 | # dir.create(myOutputPath, recursive=TRUE) 69 | # ############################################################################# 70 | # ## this is the title used for the images 71 | # myTitle <- "Disease/Data Type/Platform/Data Level" 72 | # ############################################################################# 73 | # # mbatchLoadFiles 74 | # ############################################################################# 75 | # ## This is the single gene data file to be read and processed 76 | # myGeneDataFile <- system.file("extdata", "matrix_data.tsv", package="MBatch") 77 | # ## This is the single batch file to be read and processed 78 | # myBatchFile <- system.file("extdata", "batches.tsv", package="MBatch") 79 | # ## This is the single batch file to be read and processed 80 | # myCovariateFile <- system.file("extdata", "covariates.tsv", package="MBatch") 81 | # ## this loads the files 82 | # myData <- mbatchLoadFiles(myGeneDataFile, myBatchFile, myCovariateFile) 83 | # ### 84 | # 85 | # Boxplot_Group_Structures(myData, myTitle, myOutputPath, list(), list(), 86 | # theListOfGroupBoxFunction=c(mean), theListOfGroupBoxLabels=c("mean")) 87 | # 88 | } 89 | % Add one or more standard keywords, see file 'KEYWORDS' in the 90 | % R documentation directory. 91 | \keyword{ ~kwd1 } 92 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 93 | -------------------------------------------------------------------------------- /package/man/CDP_Files.Rd: -------------------------------------------------------------------------------- 1 | \name{CDP_Files} 2 | \alias{CDP_Files} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{CDP_Files} 5 | \description{Make a Correlation Density Plot for given data.} 6 | \usage{ 7 | CDP_Files(theFilePath, theDataFile1, theDataFile2, theSubTitle, 8 | theUnmatchedCount = 1000, theMethod = "pearson", 9 | theUse = "pairwise.complete.obs", theSeed = NULL, 10 | theUseReplicatesUnpaired=FALSE, 11 | theLinePlot=TRUE, theHistPlot=TRUE, theBinWidth=NULL) 12 | } 13 | %- maybe also 'usage' for other objects documented here. 14 | \arguments{ 15 | \item{theFilePath}{Full path and filename for PNG output file} 16 | \item{theDataFile1}{Matrix file with columns as samples 17 | (with initial tab) and rows as features.} 18 | \item{theDataFile2}{Matrix file with columns as samples 19 | (with initial tab) and rows as features.} 20 | \item{theSubTitle}{Subtitle for image, giving data type being displayed.} 21 | \item{theUnmatchedCount}{Number of iterations for unpaired samples.} 22 | \item{theMethod}{Defaults to pearson. 23 | Valid values are: concordance, pearson, kendall, spearman.} 24 | \item{theUse}{Defaults to pairwise.complete.obs. 25 | Valid values are accepted by the method parameter to cor.} 26 | \item{theSeed}{Default to NULL.} 27 | \item{theUseReplicatesUnpaired}{Defaults to FALSE. 28 | If TRUE, use both the replicates and non-replicates for the unpaired plot.} 29 | \item{theLinePlot}{Default to TRUE. TRUE means plot the lines for Correlation Density Plots.} 30 | \item{theHistPlot}{Default to TRUE. TRUE means plot the histogram for Correlation Density Plots.} 31 | \item{theBinWidth}{Default to NULL. Non-null means to use the given wide for bins. Otherwise, use default for hist.} 32 | } 33 | \details{Make a Correlation Density Plot for given data.} 34 | \value{No useful values} 35 | \references{ 36 | %% ~put references to the literature/web site here ~ 37 | } 38 | \author{ 39 | %% ~~who you are~~ 40 | } 41 | \note{ 42 | %% ~~further notes~~ 43 | } 44 | 45 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 46 | 47 | \seealso{ 48 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 49 | } 50 | \examples{ 51 | ##---- Should be DIRECTLY executable !! ---- 52 | ##-- ==> Define data, use random, 53 | ##-- or do help(data=index) for the standard data sets. 54 | 55 | ## The function is currently defined as 56 | function (theFilePath, theDataFile1, theDataFile2, theSubTitle, 57 | theUnmatchedCount = 1000, theMethod = "pearson", 58 | theUse = "pairwise.complete.obs", 59 | theSeed = NULL) 60 | { 61 | logInfo(paste("CDP_Files -- theDataFile1=", theDataFile1)) 62 | myData1 <- readAsGenericMatrix(theDataFile1) 63 | logInfo(paste("CDP_Files -- theDataFile2=", theDataFile2)) 64 | myData2 <- readAsGenericMatrix(theDataFile2) 65 | CDP_Structures(theFilePath, myData1, myData2, theSubTitle = theSubTitle, 66 | theUnmatchedCount = theUnmatchedCount, theMethod = theMethod, 67 | theUse = theUse, theSeed = theSeed) 68 | } 69 | } 70 | % Add one or more standard keywords, see file 'KEYWORDS' in the 71 | % R documentation directory. 72 | \keyword{ ~kwd1 }% use one of RShowDoc("KEYWORDS") 73 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 74 | -------------------------------------------------------------------------------- /package/man/EBNPlus_CheckData_Structures.Rd: -------------------------------------------------------------------------------- 1 | \name{EBNPlus_CheckData_Structures} 2 | \alias{EBNPlus_CheckData_Structures} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{EBNPlus_CheckData_Structures} 5 | \description{ 6 | Take two matrices and check that they will work as arguments to MBatch EBNPlus functions. 7 | } 8 | \usage{ 9 | EBNPlus_CheckData_Structures(theDataMatrix1, theDataMatrix2, theDataReplicates1 = NULL, 10 | theDataReplicates2 = NULL) 11 | } 12 | %- maybe also 'usage' for other objects documented here. 13 | \arguments{ 14 | \item{theDataMatrix1}{A matrix for data set 1 containing numeric values with columns being 15 | sample ids and rows being gene ids.} 16 | \item{theDataMatrix2}{A matrix for data set 2 containing numeric values with columns being 17 | sample ids and rows being gene ids.} 18 | \item{theDataReplicates1}{A vector of "replicates" in data set 1 used for corrections. 19 | Defaults to NULL. See "details" below.} 20 | \item{theDataReplicates2}{A vector of "replicates" in data set 2 used for corrections. 21 | Defaults to NULL. See "details" below.} 22 | } 23 | \details{ 24 | Performs a stop if conditions are not met. 25 | This function checks the following: 26 | Both matrix arguments pass is.matrix test. 27 | Both matrices have column names. 28 | Both matrices have row names and they interect at least once. 29 | Both matrices have column names that intersect at least once or the replicate value vectors 30 | are the same size and exist in the column names. 31 | All data in the matrices is numeric. 32 | 33 | Please note, support for performing corrections using replicate lists is not yet supported. 34 | } 35 | \value{ 36 | No meaningful value. Performs a stop if conditions are not met. 37 | } 38 | \references{} 39 | \author{} 40 | \note{} 41 | \seealso{} 42 | \examples{} 43 | % Add one or more standard keywords, see file 'KEYWORDS' in the 44 | % R documentation directory. 45 | \keyword{ ~kwd1 } 46 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 47 | -------------------------------------------------------------------------------- /package/man/EBNPlus_CombineBatches.Rd: -------------------------------------------------------------------------------- 1 | \name{EBNPlus_CombineBatches} 2 | \alias{EBNPlus_CombineBatches} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{EBNPlus_CombineBatches} 5 | \description{ 6 | Take two different data set's batch data.frames, combine them, and add an EBNPlus batch 7 | giving original data sets. 8 | } 9 | \usage{ 10 | EBNPlus_CombineBatches(theBeaBatches1, theBeaBatches2, theEBNP_Data1BatchId, 11 | theEBNP_Data2BatchId, theBarcodeTrimFunction = NULL, theSep=".") 12 | } 13 | %- maybe also 'usage' for other objects documented here. 14 | \arguments{ 15 | \item{theBeaBatches1}{The data.frame containing batch information for data set 1. 16 | The "Sample" column should contain barcodes and is required.} 17 | \item{theBeaBatches2}{The data.frame containing batch information for data set 2. 18 | The "Sample" column should contain barcodes and is required.} 19 | \item{theEBNP_Data1BatchId}{The Batch Id for data set 1, as passed to one of the 20 | other EBNPlus functions (for example, RNASeqV2)} 21 | \item{theEBNP_Data2BatchId}{The Batch Id for data set 2, as passed to one of the 22 | other EBNPlus functions (for example, Agilent4502)} 23 | \item{theBarcodeTrimFunction}{A function applied to trim barcodes if needed. 24 | This defaults to NULL (indicating no trimming) and should not be needed for TCGA data.} 25 | \item{theSep}{Separator used when adding ids to existing barcodes. 26 | This defaults to ".""} 27 | } 28 | \details{ 29 | Combine the two data.frames. 30 | Add the new "batch names" at batches for the two data sets and the EBNPlus batch type. 31 | } 32 | \value{ 33 | Return a data.frame. 34 | Names for the data.frame are original names, with a new column "EBNPlus". 35 | } 36 | \references{} 37 | \author{} 38 | \note{} 39 | \seealso{} 40 | \examples{} 41 | % Add one or more standard keywords, see file 'KEYWORDS' in the 42 | % R documentation directory. 43 | \keyword{ ~kwd1 } 44 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 45 | -------------------------------------------------------------------------------- /package/man/EBNPlus_Correction_Files.Rd: -------------------------------------------------------------------------------- 1 | \name{EBNPlus_Correction_Files} 2 | \alias{EBNPlus_Correction_Files} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{EBNPlus_Correction_Files} 5 | \description{ 6 | Take two Standardized TCGA Data matrix files and two batch files, 7 | apply the normal TCGA Batch Effects Correction values to those files to 8 | correct the data into one dataset. 9 | } 10 | \usage{ 11 | EBNPlus_Correction_Files(theDataFile1, theDataFile2, theOutputDir, 12 | theBatchId1, theBatchId2, 13 | theSeed = NULL, theEBNP_PriorPlotsFlag=FALSE, theEBNP_MinSampleNum = 3, 14 | theEBNP_AddData1Rows = FALSE, theEBNP_AddData2Rows = FALSE) 15 | } 16 | %- maybe also 'usage' for other objects documented here. 17 | \arguments{ 18 | \item{theDataFile1}{A string containing the full path to a 19 | Standardized data matrix file for data set 1.} 20 | \item{theDataFile2}{A string containing the full path to a 21 | Standardized data matrix file for data set 2.} 22 | \item{theOutputDir}{ 23 | A string containing the full path to the output directory.} 24 | \item{theBatchId1}{The group name for data set 1 (for example, RNASeqV2)} 25 | \item{theBatchId2}{The group name for data set 2 (for example, Agilent4502)} 26 | \item{theSeed}{NULL or a seed to use. Defaults to NULL. 27 | This is passed to the standard R set.seed function.} 28 | \item{theEBNP_PriorPlotsFlag}{Defaults to FALSE. 29 | TRUE indicates prior plots PNG should be created.} 30 | \item{theEBNP_MinSampleNum}{ 31 | Defaults to 3. Any row (gene) with less than this 32 | number of samples is dropped.} 33 | \item{theEBNP_AddData1Rows}{Defaults to FALSE. 34 | TRUE indicates rows in data set 2 not in 1 should be added as all NA.} 35 | \item{theEBNP_AddData2Rows}{Defaults to FALSE. 36 | TRUE indicates rows in data set 1 not in 2 should be added as all NA.} 37 | } 38 | \details{ 39 | EBNPlus_Correction_Files does corrections and creates files as output 40 | for internal (MD Anderson) MBatch runs. While the batch files are not 41 | used here, the more general functions wrapped by this 42 | function require batch files. 43 | 44 | The function EBNPlus_Correction_Files takes data set 2 and corrects it to 45 | become part of data set 1 using replicates shared between the sets. 46 | EBNPlus_Correction_Files uses batch with zero as 1, fix data set as NA, 47 | and correct for zero as TRUE. It uses a validation ratio value of 0 48 | to use all replicates for training. Barcodes are kept uncut. Genes 49 | have pipes and any strings after the pipes removed. Any duplicates 50 | in genes or barcodes are removed. 51 | Genes that start with "?" are also removed. 52 | 53 | Details on what the arguments mean can be found in 54 | EBNPlus_Correction_Structures and 55 | EBNPlus_TrainAndValidateReplicates_Structures. 56 | } 57 | \value{ 58 | EBNPlus_Correction_Files returns a list containing full path to the 59 | matrix file with the corrected data set. The columns of the matrix are 60 | the barcodes with "." and the group name attached to the barcodes. 61 | This extra string is attached, since the correction algorithm requires 62 | replicates, some barcode will be identical, so some method preserve 63 | distinctness is needed. The row names are gene symbols. 64 | } 65 | \references{} 66 | \author{} 67 | \note{} 68 | \seealso{} 69 | \examples{} 70 | % Add one or more standard keywords, see file 'KEYWORDS' in the 71 | % R documentation directory. 72 | \keyword{ ~kwd1 } 73 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 74 | -------------------------------------------------------------------------------- /package/man/EBNPlus_Correction_Structures.Rd: -------------------------------------------------------------------------------- 1 | \name{EBNPlus_Correction_Structures} 2 | \alias{EBNPlus_Correction_Structures} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{EBNPlus_Correction_Structures} 5 | \description{ 6 | Take two matrices and correct them into a single data set using 7 | the EBNPlus algorithm. 8 | } 9 | \usage{ 10 | EBNPlus_Correction_Structures(theDataMatrix1, theDataMatrix2, 11 | theBatchId1, theBatchId2, 12 | theEBNP_BatchWithZero, theEBNP_FixDataSet, theEBNP_CorrectForZero, 13 | theEBNP_ParametricPriorsFlag, 14 | theSeed = NULL, theEBNP_PriorPlotsFile=NULL, theEBNP_MinSampleNum = 3, 15 | theEBNP_AddData1Rows = FALSE, theEBNP_AddData2Rows = FALSE) 16 | } 17 | %- maybe also 'usage' for other objects documented here. 18 | \arguments{ 19 | \item{theDataMatrix1}{A matrix for data set 1 containing numeric values with 20 | columns being sample ids and rows being gene ids.} 21 | \item{theDataMatrix2}{A matrix for data set 2 containing numeric values with 22 | columns being sample ids and rows being gene ids.} 23 | \item{theBatchId1}{The group name for data set 1 (for example, RNASeqV2)} 24 | \item{theBatchId2}{The group name for data set 2 (for example, Agilent4502)} 25 | \item{theEBNP_BatchWithZero}{Batch With Zero indicates which data set 26 | contains zero values. This is a string "1", "2", or "both". 27 | These values will be changed in a future release.} 28 | \item{theEBNP_FixDataSet}{Fix Data Set indicates which data set should 29 | be set as invariate during the corrections. Value are numeric, 1 or 2 for 30 | the appropriate data set, or as.numeric(NA) for neither.} 31 | \item{theEBNP_CorrectForZero}{Correct For Zero indicates whether 32 | or not data sets 33 | marked as "Batch With Zero" should be modified from zero (TRUE or FALSE)} 34 | \item{theEBNP_ParametricPriorsFlag}{Use parametric adjustments 35 | for corrections (TRUE or FALSE)} 36 | \item{theSeed}{NULL or a seed to use. Defaults to NULL. 37 | This is passed to the standard R set.seed function.} 38 | \item{theEBNP_PriorPlotsFile}{Defaults to NULL and does not create PNG. 39 | Non-NULL should be the full path and filename for where 40 | to create the prior plots PNG.} 41 | \item{theEBNP_MinSampleNum}{Defaults to 3. Any row (gene) with less than 42 | this number of samples is dropped.} 43 | \item{theEBNP_AddData1Rows}{Defaults to FALSE. TRUE indicates rows in data 44 | set 2 not in 1 should be added as all NA.} 45 | \item{theEBNP_AddData2Rows}{Defaults to FALSE. TRUE indicates rows in data 46 | set 1 not in 2 should be added as all NA.} 47 | } 48 | \details{ 49 | Take two matrices and correct them into a single data set using the EBNPlus 50 | algorithm and the provided arguments, using all replicates for training. 51 | } 52 | \value{ 53 | BeaEBNplusFiles returns a matrix. The columns of the matrix are the 54 | barcodes with "." and the group name attached to the barcodes. This extra 55 | string is attached, since the correction algorithm requires replicates, 56 | some barcode will be identical, so some method preserve distinctness is needed. 57 | The row names are gene symbols. 58 | } 59 | \references{} 60 | \author{} 61 | \note{} 62 | \seealso{} 63 | \examples{} 64 | % Add one or more standard keywords, see file 'KEYWORDS' in the 65 | % R documentation directory. 66 | \keyword{ ~kwd1 } 67 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 68 | -------------------------------------------------------------------------------- /package/man/EB_withNonParametricPriors.Rd: -------------------------------------------------------------------------------- 1 | \name{EB_withNonParametricPriors} 2 | \alias{EB_withNonParametricPriors} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{EB_withNonParametricPriors} 5 | \description{ 6 | Perform Empirical Bayes without Parametric Priors corrections. 7 | } 8 | \usage{ 9 | EB_withNonParametricPriors(theBeaData, theBatchIdsNotToCorrect, 10 | theDoCheckPlotsFlag, theBatchType, theThreads = 1, thePath = NULL, 11 | theWriteToFile = FALSE) 12 | } 13 | %- maybe also 'usage' for other objects documented here. 14 | \arguments{ 15 | \item{theBeaData}{An instance of BEA_DATA \code{\link{BEA_DATA-class}}} 16 | \item{theBatchIdsNotToCorrect}{A vector of strings giving batch names/ids 17 | within the batch type that should not be corrected} 18 | \item{theDoCheckPlotsFlag}{Defaults to FALSE. TRUE indicates a prior 19 | plots image should be created.} 20 | \item{theBatchType}{A string identifying the batch type to correct.} 21 | \item{theThreads}{Integer defaulting to 1. Number of threads to use 22 | for calculating priors.} 23 | \item{thePath}{Output path for any files.} 24 | \item{theWriteToFile}{TRUE to write the corrected data to file and return 25 | the file pathname instead of the corrected matrix.} 26 | } 27 | \details{ 28 | Perform Empirical Bayes correction with NONParametric Priors. 29 | Returning the corrected matrix or a path to the filename. 30 | } 31 | \value{ 32 | Return the corrected matrix (columns are sample ids and rows are features) 33 | or a path to the filename. 34 | } 35 | \references{} 36 | \author{} 37 | \note{} 38 | \seealso{} 39 | \examples{} 40 | % Add one or more standard keywords, see file 'KEYWORDS' in the 41 | % R documentation directory. 42 | \keyword{ ~kwd1 }% use one of RShowDoc("KEYWORDS") 43 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 44 | -------------------------------------------------------------------------------- /package/man/EB_withParametricPriors.Rd: -------------------------------------------------------------------------------- 1 | \name{EB_withParametricPriors} 2 | \alias{EB_withParametricPriors} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{EB_withParametricPriors} 5 | \description{ 6 | Perform Empirical Bayes with Parametric Priors corrections. 7 | } 8 | \usage{ 9 | EB_withParametricPriors(theBeaData, theBatchIdsNotToCorrect, 10 | theDoCheckPlotsFlag, theBatchType, theThreads = 1, thePath = NULL, 11 | theWriteToFile = FALSE) 12 | } 13 | %- maybe also 'usage' for other objects documented here. 14 | \arguments{ 15 | \item{theBeaData}{An instance of BEA_DATA \code{\link{BEA_DATA-class}}} 16 | \item{theBatchIdsNotToCorrect}{A vector of strings giving batch names/ids 17 | within the batch type that should not be corrected} 18 | \item{theDoCheckPlotsFlag}{Defaults to FALSE. TRUE indicates a prior 19 | plots image should be created.} 20 | \item{theBatchType}{A string identifying the batch type to correct.} 21 | \item{theThreads}{Integer defaulting to 1. Number of threads to use 22 | for calculating priors.} 23 | \item{thePath}{Output path for any files.} 24 | \item{theWriteToFile}{TRUE to write the corrected data to file and return 25 | the file pathname instead of the corrected matrix.} 26 | } 27 | \details{ 28 | Perform Empirical Bayes correction with Parametric Priors. 29 | Returning the corrected matrix or a path to the filename. 30 | } 31 | \value{ 32 | Return the corrected matrix (columns are sample ids and rows are features) 33 | or a path to the filename. 34 | } 35 | \references{} 36 | \author{} 37 | \note{} 38 | \seealso{} 39 | \examples{} 40 | % Add one or more standard keywords, see file 'KEYWORDS' in the 41 | % R documentation directory. 42 | \keyword{ ~kwd1 }% use one of RShowDoc("KEYWORDS") 43 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 44 | -------------------------------------------------------------------------------- /package/man/HierarchicalClustering_Structures.Rd: -------------------------------------------------------------------------------- 1 | \name{HierarchicalClustering_Structures} 2 | \alias{HierarchicalClustering_Structures} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{HierarchicalClustering_Structures} 5 | \description{ 6 | A function to perform Hierarchical Clustering analysis from data in files. 7 | } 8 | \usage{ 9 | HierarchicalClustering_Structures(theData, theTitle, theOutputPath, 10 | theBatchTypeAndValuePairsToRemove, theBatchTypeAndValuePairsToKeep) 11 | } 12 | %- maybe also 'usage' for other objects documented here. 13 | \arguments{ 14 | \item{theData}{An instance of BEA_DATA \code{\link{BEA_DATA-class}}} 15 | \item{theTitle}{Object of class \code{"character"} Title to use in PNG files. } 16 | \item{theOutputPath}{Object of class \code{"character"} 17 | Directory in which to place output PNG files. } 18 | \item{theBatchTypeAndValuePairsToRemove}{Object of class \code{"list"} A list of vectors 19 | containing the batch type (or * for all types) and the value to remove. list() indicates 20 | none while NULL will cause an error. } 21 | \item{theBatchTypeAndValuePairsToKeep}{Object of class \code{"list"} A list of vectors 22 | containing the batch type (or * for all types) and a vector of the the value(s) to keep. 23 | list() indicates none while NULL will cause an error. } 24 | } 25 | \details{ 26 | %% ~~ If necessary, more details than the description above ~~ 27 | } 28 | \value{ 29 | %% ~Describe the value returned 30 | %% If it is a LIST, use 31 | %% \item{comp1 }{Description of 'comp1'} 32 | %% \item{comp2 }{Description of 'comp2'} 33 | %% ... 34 | } 35 | \references{ 36 | %% ~put references to the literature/web site here ~ 37 | } 38 | \author{ 39 | %% ~~who you are~~ 40 | } 41 | \note{ 42 | %% ~~further notes~~ 43 | } 44 | 45 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 46 | 47 | \seealso{ 48 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 49 | } 50 | \examples{ 51 | # ############################################################################# 52 | # mydir <- file.path(getwd(), "working", "HierarchicalClustering_Structures") 53 | # dir.create(mydir, recursive=TRUE) 54 | # setLogging(new("Logging", theFile=file.path(mydir, "mbatch.log"))) 55 | # ## path for output 56 | # myOutputPath <- file.path(mydir, "output") 57 | # dir.create(myOutputPath, recursive=TRUE) 58 | # ############################################################################# 59 | # ## this is the title used for the images 60 | # myTitle <- "Disease/Data Type/Platform/Data Level" 61 | # ############################################################################# 62 | # # mbatchLoadFiles 63 | # ############################################################################# 64 | # ## This is the single gene data file to be read and processed 65 | # myGeneDataFile <- system.file("extdata", "matrix_data.tsv", package="MBatch") 66 | # ## This is the single batch file to be read and processed 67 | # myBatchFile <- system.file("extdata", "batches.tsv", package="MBatch") 68 | # ## This is the single batch file to be read and processed 69 | # myCovariateFile <- system.file("extdata", "covariates.tsv", package="MBatch") 70 | # ## this loads the files 71 | # myData <- mbatchLoadFiles(myGeneDataFile, myBatchFile, myCovariateFile) 72 | # ### 73 | # 74 | # HierarchicalClustering_Structures(myData, myTitle, myOutputPath, list(), list()) 75 | } 76 | % Add one or more standard keywords, see file 'KEYWORDS' in the 77 | % R documentation directory. 78 | \keyword{ ~kwd1 } 79 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 80 | -------------------------------------------------------------------------------- /package/man/Logging-class.Rd: -------------------------------------------------------------------------------- 1 | \name{Logging-class} 2 | \Rdversion{1.1} 3 | \docType{class} 4 | \alias{Logging-class} 5 | 6 | \title{Class \code{"Logging"}} 7 | \description{ 8 | An S4 object detailing logging settings. 9 | } 10 | \section{Objects from the Class}{ 11 | See also \code{\link{new}} 12 | Objects can be created by calls of the form \code{ 13 | setLogging(new("Logging", 14 | theFile=system.file("logging", "mbatch.log", package="MBatch"))) 15 | } 16 | } 17 | \section{Slots}{ 18 | \describe{ 19 | \item{\code{mFile}:}{Object of class \code{"character"} 20 | The file to write log data to. 21 | To log only to the console, set the file equal to an empty string, "" } 22 | \item{\code{mLevelNamesToLog}:}{Object of class \code{"vector"} 23 | Vector of string of types of logging to perform. Should be limited to the 24 | following logging types: 25 | 'DEBUG', 'TIMING', 'INFO', 'WARN', 'PERCENT', 'ERROR' } 26 | \item{\code{mLevelNames}:}{Object of class \code{"vector"} 27 | Vector of the possible logging types for which there are corresponding 28 | logging functions. Should be left as the 29 | following a vector of the following strings: 30 | 'DEBUG', 'TIMING', 'INFO', 'WARN', 'PERCENT', 'ERROR' } 31 | \item{\code{mSeparator}:}{Object of class \code{"character"} 32 | Separator character(s) to use 33 | between logging items. Defaults to single space. } 34 | \item{\code{mConsole}:}{Object of class \code{"logical"} 35 | Determines if logged values should always be sent to console. 36 | Set to true if you wish to have the log messages 37 | sent to the console when a log file has been specified. 38 | If the log file is set to "", then logging 39 | messages will default to the console. } 40 | } 41 | } 42 | \section{Methods}{ 43 | No methods defined with class "Logging" in the signature. 44 | } 45 | \references{ 46 | %% ~~put references to the literature/web site here~~ 47 | } 48 | \author{ 49 | Tod Casasent \email{tdcasasent@mdanderson.org} 50 | Nianxiang Zhang \email{nzhang@mdanderson.org} 51 | } 52 | \note{ 53 | %% ~~further notes~~ 54 | } 55 | 56 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 57 | 58 | \seealso{ 59 | See also \code{\link{new}} 60 | %% ~~objects to See Also as \code{\link{~~fun~~}}, ~~~ 61 | %% ~~or \code{\linkS4class{CLASSNAME}} for links to other classes ~~~ 62 | } 63 | \examples{ 64 | \dontrun{ 65 | # ###Set new logging object logging all 6 types 66 | # ###of messages to the console (default values). 67 | # logger<-new("Logging") 68 | # 69 | # ###Set up new logging object to write only error 70 | # ###and warning messages to the file 71 | # ###test.log and not to the console. 72 | # logger<-new("Logging", theFile="test.log", 73 | # mLevelNamesToLog=c("WARN","ERROR"), 74 | # theConsole=FALSE) 75 | } 76 | } 77 | \keyword{classes} 78 | -------------------------------------------------------------------------------- /package/man/MP_ByBatch.Rd: -------------------------------------------------------------------------------- 1 | \name{MP_ByBatch} 2 | \alias{MP_ByBatch} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{MP_ByBatch} 5 | \description{ 6 | Perform Median Polish by Batch Corrections 7 | } 8 | \usage{ 9 | MP_ByBatch(theBeaData, theBatchType, thePath = NULL, theWriteToFile = FALSE) 10 | } 11 | %- maybe also 'usage' for other objects documented here. 12 | \arguments{ 13 | \item{theBeaData}{An instance of BEA_DATA \code{\link{BEA_DATA-class}}} 14 | \item{theBatchType}{A string identifying the batch type to correct.} 15 | \item{thePath}{Output path for any files.} 16 | \item{theWriteToFile}{TRUE to write the corrected data to file and return 17 | the file pathname instead of the corrected matrix.} 18 | } 19 | \details{ 20 | Perform Median Polish correction by Batch. 21 | Returning the corrected matrix or a path to the filename. 22 | } 23 | \value{ 24 | Return the corrected matrix (columns are sample ids and rows are features) 25 | or a path to the filename. 26 | } 27 | \references{} 28 | \author{} 29 | \note{} 30 | \seealso{} 31 | \examples{} 32 | % Add one or more standard keywords, see file 'KEYWORDS' in the 33 | % R documentation directory. 34 | \keyword{ ~kwd1 }% use one of RShowDoc("KEYWORDS") 35 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 36 | -------------------------------------------------------------------------------- /package/man/MP_Overall.Rd: -------------------------------------------------------------------------------- 1 | \name{MP_Overall} 2 | \alias{MP_Overall} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{MP_Overall} 5 | \description{ 6 | Perform Median Polish Overall Corrections 7 | } 8 | \usage{ 9 | MP_Overall(theBeaData, thePath = NULL, theWriteToFile = FALSE) 10 | } 11 | %- maybe also 'usage' for other objects documented here. 12 | \arguments{ 13 | \item{theBeaData}{An instance of BEA_DATA \code{\link{BEA_DATA-class}}} 14 | \item{thePath}{Output path for any files.} 15 | \item{theWriteToFile}{TRUE to write the corrected data to file and return 16 | the file pathname instead of the corrected matrix.} 17 | } 18 | \details{ 19 | Perform Median Polish correction overall. 20 | Returning the corrected matrix or a path to the filename. 21 | } 22 | \value{ 23 | Return the corrected matrix (columns are sample ids and rows are features) 24 | or a path to the filename. 25 | } 26 | \references{} 27 | \author{} 28 | \note{} 29 | \seealso{} 30 | \examples{} 31 | % Add one or more standard keywords, see file 'KEYWORDS' in the 32 | % R documentation directory. 33 | \keyword{ ~kwd1 }% use one of RShowDoc("KEYWORDS") 34 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 35 | -------------------------------------------------------------------------------- /package/man/RBN_Pseudoreplicates.Rd: -------------------------------------------------------------------------------- 1 | \name{RBN_Pseudoreplicates} 2 | \alias{RBN_Pseudoreplicates} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{RBN_Pseudoreplicates} 5 | \description{ 6 | Do RBN correction using replicates with different identifiers (pseudoreplicates). 7 | } 8 | \usage{ 9 | RBN_Pseudoreplicates(theInvariantMatrix, theVariantMatrix, 10 | theInvariantReplicates, theVariantReplicates, theInvariantGroupId = "", 11 | theVariantGroupId = "", theMatchedReplicatesFlag = TRUE, 12 | theCombineOnlyFlag = FALSE, thePath = NULL, theWriteToFile = FALSE) 13 | } 14 | %- maybe also 'usage' for other objects documented here. 15 | \arguments{ 16 | \item{theInvariantMatrix}{Matrix with sample names in colnames and features (like genes) in rownames. This matrix is invariant.} 17 | \item{theVariantMatrix}{Matrix with sample names in colnames and features (like genes) in rownames. This matrix is variant.} 18 | \item{theInvariantReplicates}{Vector of feature ids indicating replicates for variant data.} 19 | \item{theVariantReplicates}{Vector of feature ids indicating replicates for invariant data.} 20 | \item{theInvariantGroupId}{Group name used for labelling invariant features when combining matrixes.} 21 | \item{theVariantGroupId}{Group name used for labelling variant features when combining matrixes.} 22 | \item{theMatchedReplicatesFlag}{If TRUE, indicates that NAs should be added for missing replicates. Defaults to FALSE.} 23 | \item{theCombineOnlyFlag}{If TRUE, only combined the matrixes, do not correct. Defaults to FALSE.} 24 | \item{thePath}{Location for output.} 25 | \item{theWriteToFile}{TRUE means write corrected data to thePath.} 26 | } 27 | \details{Uses RBN (Replicates Based Normalization) to correct one data group to match another. 28 | } 29 | \value{Returned the corrected matrix unless thePath is given and theWriteToFile is TRUE, in which case the full path and filename to the corrected data file is returned.} 30 | \references{ 31 | %% ~put references to the literature/web site here ~ 32 | } 33 | \author{ 34 | %% ~~who you are~~ 35 | } 36 | \note{ 37 | %% ~~further notes~~ 38 | } 39 | 40 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 41 | 42 | \seealso{ 43 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 44 | } 45 | \examples{ 46 | } 47 | % Add one or more standard keywords, see file 'KEYWORDS' in the 48 | % R documentation directory. 49 | \keyword{ ~kwd1 }% use one of RShowDoc("KEYWORDS") 50 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 51 | -------------------------------------------------------------------------------- /package/man/RBN_Replicates.Rd: -------------------------------------------------------------------------------- 1 | \name{RBN_Replicates} 2 | \alias{RBN_Replicates} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{RBN_Replicates} 5 | \description{ 6 | Do RBN correction using exact replicates. 7 | } 8 | \usage{ 9 | RBN_Replicates(theInvariantMatrix, theVariantMatrix, theInvariantGroupId = "", 10 | theVariantGroupId = "", theMatchedReplicatesFlag = TRUE, 11 | theCombineOnlyFlag = FALSE, thePath = NULL, theWriteToFile = FALSE) 12 | } 13 | %- maybe also 'usage' for other objects documented here. 14 | \arguments{ 15 | \item{theInvariantMatrix}{Matrix with sample names in colnames and features (like genes) in rownames. This matrix is invariant.} 16 | \item{theVariantMatrix}{Matrix with sample names in colnames and features (like genes) in rownames. This matrix is variant.} 17 | \item{theInvariantGroupId}{Group name used for labelling invariant features when combining matrixes.} 18 | \item{theVariantGroupId}{Group name used for labelling variant features when combining matrixes.} 19 | \item{theMatchedReplicatesFlag}{If TRUE, indicates that NAs should be added for missing replicates. Defaults to FALSE.} 20 | \item{theCombineOnlyFlag}{If TRUE, only combined the matrixes, do not correct. Defaults to FALSE.} 21 | \item{thePath}{Location for output.} 22 | \item{theWriteToFile}{TRUE means write corrected data to thePath.} 23 | } 24 | \details{Uses RBN (Replicates Based Normalization) to correct one data group to match another. 25 | } 26 | \value{Returned the corrected matrix unless thePath is given and theWriteToFile is TRUE, in which case the full path and filename to the corrected data file is returned.} 27 | \references{ 28 | %% ~put references to the literature/web site here ~ 29 | } 30 | \author{ 31 | %% ~~who you are~~ 32 | } 33 | \note{ 34 | %% ~~further notes~~ 35 | } 36 | 37 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 38 | 39 | \seealso{ 40 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 41 | } 42 | \examples{ 43 | } 44 | % Add one or more standard keywords, see file 'KEYWORDS' in the 45 | % R documentation directory. 46 | \keyword{ ~kwd1 }% use one of RShowDoc("KEYWORDS") 47 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 48 | -------------------------------------------------------------------------------- /package/man/SupervisedClustering_Batches_Structures.Rd: -------------------------------------------------------------------------------- 1 | \name{SupervisedClustering_Batches_Structures} 2 | \alias{SupervisedClustering_Batches_Structures} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{SupervisedClustering_Batches_Structures} 5 | \description{ 6 | A function to perform Supervised Clustering for batch types from data in files. 7 | } 8 | \usage{ 9 | SupervisedClustering_Batches_Structures(theData, theTitle, theOutputPath, 10 | theDoHeatmapFlag, 11 | theBatchTypeAndValuePairsToRemove = list(), 12 | theBatchTypeAndValuePairsToKeep = list()) 13 | } 14 | %- maybe also 'usage' for other objects documented here. 15 | \arguments{ 16 | \item{theData}{An instance of BEA_DATA \code{\link{BEA_DATA-class}}} 17 | \item{theTitle}{Object of class \code{"character"} 18 | Title to use in PNG files. } 19 | \item{theOutputPath}{Object of class \code{"character"} 20 | Directory in which to place output PNG files. } 21 | \item{theDoHeatmapFlag}{Object of class \code{"logical"} 22 | A flag indicating whether or not 23 | to create HC heatmap, where TRUE meants to create heatmap. } 24 | \item{theBatchTypeAndValuePairsToRemove}{Object of class \code{"list"} 25 | A list of vectors containing the batch type (or * for all types) 26 | and the value to remove. list() indicates 27 | none while NULL will cause an error. } 28 | \item{theBatchTypeAndValuePairsToKeep}{Object of class \code{"list"} 29 | A list of vectors containing the batch type (or * for all types) and a 30 | vector of the the value(s) to keep. 31 | list() indicates none while NULL will cause an error. } 32 | } 33 | \details{ 34 | %% ~~ If necessary, more details than the description above ~~ 35 | } 36 | \value{ 37 | %% ~Describe the value returned 38 | %% If it is a LIST, use 39 | %% \item{comp1 }{Description of 'comp1'} 40 | %% \item{comp2 }{Description of 'comp2'} 41 | %% ... 42 | } 43 | \references{ 44 | %% ~put references to the literature/web site here ~ 45 | } 46 | \author{ 47 | %% ~~who you are~~ 48 | } 49 | \note{ 50 | %% ~~further notes~~ 51 | } 52 | 53 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 54 | 55 | \seealso{ 56 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 57 | } 58 | \examples{ 59 | # ############################################################################# 60 | # mydir<-file.path(getwd()"working","SupervisedClustering_Batches_Structures") 61 | # dir.create(mydir, recursive=TRUE) 62 | # setLogging(new("Logging", theFile=file.path(mydir, "mbatch.log"))) 63 | # ## path for output 64 | # myOutputPath <- file.path(mydir, "output") 65 | # dir.create(myOutputPath, recursive=TRUE) 66 | # ############################################################################# 67 | # ## this is the title used for the images 68 | # myTitle <- "Disease/Data Type/Platform/Data Level" 69 | # ############################################################################# 70 | # # mbatchLoadFiles 71 | # ############################################################################# 72 | # ## This is the single gene data file to be read and processed 73 | # myGeneDataFile <- system.file("extdata", "matrix_data.tsv", package="MBatch") 74 | # ## This is the single batch file to be read and processed 75 | # myBatchFile <- system.file("extdata", "batches.tsv", package="MBatch") 76 | # ## this loads the files 77 | # myData <- mbatchLoadFiles(myGeneDataFile, myBatchFile) 78 | # ### 79 | # SupervisedClustering_Batches_Structures(myData, myTitle, myOutputPath, 80 | #list(), list(), 81 | # theDoHeatmapFlag=TRUE) 82 | } 83 | % Add one or more standard keywords, see file 'KEYWORDS' in the 84 | % R documentation directory. 85 | \keyword{ ~kwd1 } 86 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 87 | -------------------------------------------------------------------------------- /package/man/SupervisedClustering_Pairs_Structures.Rd: -------------------------------------------------------------------------------- 1 | \name{SupervisedClustering_Pairs_Structures} 2 | \alias{SupervisedClustering_Pairs_Structures} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{SupervisedClustering_Pairs_Structures} 5 | \description{ 6 | A function to perform Supervised Clustering for pairs of batch types 7 | from data in files. 8 | } 9 | \usage{ 10 | SupervisedClustering_Pairs_Structures(theData, theTitle, theOutputPath, 11 | theDoHeatmapFlag, theListOfBatchPairs, 12 | theBatchTypeAndValuePairsToRemove=list(), 13 | theBatchTypeAndValuePairsToKeep=list() 14 | ) 15 | } 16 | %- maybe also 'usage' for other objects documented here. 17 | \arguments{ 18 | \item{theData}{An instance of BEA_DATA \code{\link{BEA_DATA-class}}} 19 | \item{theTitle}{Object of class \code{"character"} 20 | Title to use in PNG files. } 21 | \item{theOutputPath}{Object of class \code{"character"} 22 | Directory in which to place output PNG files. } 23 | \item{theDoHeatmapFlag}{Object of class \code{"logical"} 24 | A flag indicating whether or 25 | not to create HC heatmap, where TRUE meants to create heatmap. } 26 | \item{theListOfBatchPairs}{Object of class \code{"vector"} 27 | A list of strings, where pairs of strings give batch types 28 | to use for pairs assessment. Use c("") for empty set. } 29 | \item{theBatchTypeAndValuePairsToRemove}{Object of class \code{"list"} 30 | A list of vectors containing the batch type (or * for all types) 31 | and the value to remove. 32 | list() indicates none while NULL will cause an error. 33 | Default to list().} 34 | \item{theBatchTypeAndValuePairsToKeep}{Object of class \code{"list"} 35 | A list of vectors containing the batch type (or * for all types) and a 36 | vector of the the value(s) to keep. 37 | list() indicates none while NULL will cause an error. 38 | Default to list().} 39 | } 40 | \details{ 41 | %% ~~ If necessary, more details than the description above ~~ 42 | } 43 | \value{ 44 | %% ~Describe the value returned 45 | %% If it is a LIST, use 46 | %% \item{comp1 }{Description of 'comp1'} 47 | %% \item{comp2 }{Description of 'comp2'} 48 | %% ... 49 | } 50 | \references{ 51 | %% ~put references to the literature/web site here ~ 52 | } 53 | \author{ 54 | %% ~~who you are~~ 55 | } 56 | \note{ 57 | %% ~~further notes~~ 58 | } 59 | 60 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 61 | 62 | \seealso{ 63 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 64 | } 65 | \examples{ 66 | # ############################################################################# 67 | # mydir <- file.path(getwd(),"working","SupervisedClustering_Pairs_Structures") 68 | # dir.create(mydir, recursive=TRUE) 69 | # setLogging(new("Logging", theFile=file.path(mydir, "mbatch.log"))) 70 | # ## path for output 71 | # myOutputPath <- file.path(mydir, "output") 72 | # dir.create(myOutputPath, recursive=TRUE) 73 | # ############################################################################# 74 | # ## this is the title used for the images 75 | # myTitle <- "Disease/Data Type/Platform/Data Level" 76 | # ############################################################################# 77 | # # mbatchLoadFiles 78 | # ############################################################################# 79 | # ## This is the single gene data file to be read and processed 80 | # myGeneDataFile <- system.file("extdata", "matrix_data.tsv", package="MBatch") 81 | # ## This is the single batch file to be read and processed 82 | # myBatchFile <- system.file("extdata", "batches.tsv", package="MBatch") 83 | # ## this loads the files 84 | # myData <- mbatchLoadFiles(myGeneDataFile, myBatchFile) 85 | # ### 86 | # SupervisedClustering_Pairs_Structures(myData, myTitle, myOutputPath, list(), 87 | # list(), 88 | # theDoHeatmapFlag=TRUE, 89 | # theListOfBatchPairs=c("PlateId", "TSS", "BatchId", "TSS")) 90 | } 91 | % Add one or more standard keywords, see file 'KEYWORDS' in the 92 | % R documentation directory. 93 | \keyword{ ~kwd1 } 94 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 95 | -------------------------------------------------------------------------------- /package/man/buildDSCOverviewFile.Rd: -------------------------------------------------------------------------------- 1 | \name{buildDSCOverviewFile} 2 | \alias{buildDSCOverviewFile} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{ 5 | Create DSC Text Output from DSC.RData Files 6 | } 7 | \description{ 8 | MBatch can create a human readable file containing all the DSC values from a run. These are built based on *DSC.RData files created during a call to mbatchAssess. The file uses the data's directory structure to create a table listing the existing data sets and the DSC results for each data set. This requires the data directory structure to match the download directories from the DCC for TCGA data, plus a top level "run date" directory. Additional directories are added to this to create places for the output. 9 | 10 | For example, this is a valid directory structure for use with this output: 11 | 12 | 2011_11_30_0904\\GBM\\Expression-Genes\\BI__HT_HG-U133A\\Level_3\\original\\PCA\\PlateId\\ManyToMany\\PCA-Plus 13 | 14 | This portion comes from the original data's directory structure: GBM\\Expression-Genes\\BI__HT_HG-U133A\\Level_3 15 | Note that this matches the DCC Data Access Matrix directory structure for downloaded data. 16 | 17 | The output file contains the following columns: "run-date", "disease-type", "data-type", "platform", "data-level", "correction-type", "PCA", "batch-type", "Overall-DSC", and "Overall-DSC-pvalue". 18 | 19 | So, for the GBM output listed above, you end up with these values in the output, which makes it easier to track down interesting datasets using DSC values: 20 | 21 | "run-date" 2011_11_30_0904 <- this is the date of the data run 22 | "disease-type" GBM <- GBM disease data 23 | "data-type" Expression-Genes <- type of data 24 | "platform" BI__HT_HG-U133A <- platform 25 | "data-level" Level_3 <- level 3 DCC data 26 | "correction-type" original <- this is "original" or uncorrected data. Assessments of corrected data will list the correction algorithm. 27 | "PCA" PCA <- PCA assessment algorithm 28 | "batch-type" PlateId <- assessment of the PlateId batch type 29 | 30 | After running mbatchAssess, there are *DSC.RData files created -- these can either be use to create a DSC Overview file with buildDSCOverviewFile, or they can be deleted using clearDSCOverviewFiles. This is handled outside of these functions, so all data sets can be processed by multiple calls to mbatchAssess in a parallel or distributed environment, and then the DSC Overview created from these files at the end of the processing. 31 | } 32 | \usage{ 33 | buildDSCOverviewFile(theStartDir, theOutputFile) 34 | } 35 | %- maybe also 'usage' for other objects documented here. 36 | \arguments{ 37 | \item{theStartDir}{String indicating top of directory structure to search for *DSC.RData objects.} 38 | \item{theOutputFile}{String containing only the filename of output text file. Will be placed in theStartDir.} 39 | } 40 | \details{ 41 | %% ~~ If necessary, more details than the description above ~~ 42 | } 43 | \value{ 44 | %% ~Describe the value returned 45 | %% If it is a LIST, use 46 | %% \item{comp1 }{Description of 'comp1'} 47 | %% \item{comp2 }{Description of 'comp2'} 48 | %% ... 49 | } 50 | \references{ 51 | %% ~put references to the literature/web site here ~ 52 | } 53 | \author{ 54 | Tod Casasent \email{tdcasasent@mdanderson.org} 55 | Nianxiang Zhang \email{nzhang@mdanderson.org} 56 | } 57 | \note{ 58 | %% ~~further notes~~ 59 | } 60 | 61 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 62 | 63 | \seealso{ 64 | See also \code{\link{clearDSCOverviewFiles}} 65 | } 66 | \examples{ 67 | \dontrun{ 68 | example in doRunBEA_Files 69 | }} 70 | % Add one or more standard keywords, see file 'KEYWORDS' in the 71 | % R documentation directory. 72 | \keyword{ ~kwd1 } 73 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 74 | -------------------------------------------------------------------------------- /package/man/clearDSCOverviewFiles.Rd: -------------------------------------------------------------------------------- 1 | \name{clearDSCOverviewFiles} 2 | \alias{clearDSCOverviewFiles} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{ 5 | Delete All DSC.RData Files 6 | } 7 | \description{ 8 | Deletes all *DSC.RData files from the directory structure passed in to it. 9 | 10 | After running mbatchAssess, there are *DSC.RData files created -- these can either be use to create a DSC Overview file with buildDSCOverviewFile, or they can be deleted using clearDSCOverviewFiles. This is handled outside of these functions, so all data sets can be processed by multiple calls to mbatchAssess in a parallel or distributed environment, and then the DSC Overview created from these files at the end of the processing. 11 | } 12 | \usage{ 13 | clearDSCOverviewFiles(theStartDir) 14 | } 15 | %- maybe also 'usage' for other objects documented here. 16 | \arguments{ 17 | \item{theStartDir}{String indicating top of directory structure to search for *DSC.RData objects.} 18 | } 19 | \details{ 20 | %% ~~ If necessary, more details than the description above ~~ 21 | } 22 | \value{ 23 | %% ~Describe the value returned 24 | %% If it is a LIST, use 25 | %% \item{comp1 }{Description of 'comp1'} 26 | %% \item{comp2 }{Description of 'comp2'} 27 | %% ... 28 | } 29 | \references{ 30 | %% ~put references to the literature/web site here ~ 31 | } 32 | \author{ 33 | Tod Casasent \email{tdcasasent@mdanderson.org} 34 | Nianxiang Zhang \email{nzhang@mdanderson.org} 35 | } 36 | \note{ 37 | %% ~~further notes~~ 38 | } 39 | 40 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 41 | 42 | \seealso{ 43 | See also \code{\link{buildDSCOverviewFile}} 44 | } 45 | \examples{ 46 | \dontrun{ 47 | example in doRunBEA_Structures 48 | }} 49 | % Add one or more standard keywords, see file 'KEYWORDS' in the 50 | % R documentation directory. 51 | \keyword{ ~kwd1 } 52 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 53 | -------------------------------------------------------------------------------- /package/man/getReplicatesForRBN.Rd: -------------------------------------------------------------------------------- 1 | \name{getReplicatesForRBN} 2 | \alias{getReplicatesForRBN} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{getReplicatesForRBN} 5 | \description{Function to find exact replicates in sample (column) names.} 6 | \usage{ 7 | getReplicatesForRBN(matrix1, matrix2) 8 | } 9 | %- maybe also 'usage' for other objects documented here. 10 | \arguments{ 11 | \item{matrix1}{First matrix, with sample ids in colnames() and 12 | feature names (like genes) in rownames().} 13 | \item{matrix2}{Second matrix, with sample ids in colnames() and 14 | feature names (like genes) in rownames().} 15 | } 16 | \details{ 17 | Find colnames that match between matrixes. 18 | } 19 | \value{ 20 | A vector of matching feature ids. 21 | } 22 | \references{ 23 | %% ~put references to the literature/web site here ~ 24 | } 25 | \author{ 26 | %% ~~who you are~~ 27 | } 28 | \note{ 29 | %% ~~further notes~~ 30 | } 31 | 32 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 33 | 34 | \seealso{ 35 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 36 | } 37 | \examples{ 38 | } 39 | % Add one or more standard keywords, see file 'KEYWORDS' in the 40 | % R documentation directory. 41 | \keyword{ ~kwd1 }% use one of RShowDoc("KEYWORDS") 42 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 43 | -------------------------------------------------------------------------------- /package/man/mbatchFilterData.Rd: -------------------------------------------------------------------------------- 1 | \name{mbatchFilterData} 2 | \alias{mbatchFilterData} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{mbatchFilterData} 5 | \description{ 6 | Filter data based on IQR, SD, MAD and batches to remove and keep. 7 | } 8 | \usage{ 9 | mbatchFilterData(theBeaData, 10 | theBatchTypeAndValuePairsToRemove=list(c("*", "unknown"), c("*", "Unknown")), 11 | theBatchTypeAndValuePairsToKeep=list(list("Type", c("01", "03", "05"))), 12 | theBatchTypesToRemove=NULL, theMinIqr = 0, theMinSd = 0, theMinMad = 0) 13 | } 14 | %- maybe also 'usage' for other objects documented here. 15 | \arguments{ 16 | \item{theBeaData}{An instance of BEA_DATA \code{\link{BEA_DATA-class}}} 17 | \item{theBatchTypeAndValuePairsToRemove}{Object of class \code{"list"} 18 | A list of vectors containing the batch type (or * for all types) and 19 | the value to remove. list() indicates none while NULL will cause an error. } 20 | \item{theBatchTypeAndValuePairsToKeep}{Object of class \code{"list"} 21 | A list of vectors containing the batch type (or * for all types) and 22 | a vector of the the value(s) to keep. list() indicates none while 23 | NULL will cause an error. } 24 | \item{theBatchTypesToRemove}{Object of class \code{"vector"} 25 | A vector of strings giving batch types to remove. NULL means none, 26 | c("Type") means remove Type.} 27 | \item{theMinIqr}{Object of class \code{"numeric"} Minimum acceptable 28 | Interquartile Range (MinIqr) value for gene values. (See \code{\link{IQR}} 29 | from the R stats package.) Zero (0) means use all samples. } 30 | \item{theMinSd}{Object of class \code{"numeric"} Minimum acceptable 31 | Standard Deviation (MinSd) value for gene values. (See \code{\link{sd}} 32 | from the R stats package.) Zero (0) means use all samples. } 33 | \item{theMinMad}{Object of class \code{"numeric"} Minimum acceptable 34 | Median Absolute Deviation (MinMad) value for gene values. 35 | (See \code{\link{mad}} from the R stats package.). 36 | Zero (0) means use all samples.} 37 | } 38 | \details{ 39 | %% ~~ If necessary, more details than the description above ~~ 40 | } 41 | \value{ 42 | An instance of BEA_DATA \code{\link{BEA_DATA-class}} 43 | } 44 | \references{ 45 | %% ~put references to the literature/web site here ~ 46 | } 47 | \author{ 48 | Tod Casasent \email{tdcasasent@mdanderson.org} 49 | } 50 | \note{ 51 | } 52 | 53 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 54 | 55 | \seealso{ 56 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 57 | } 58 | \examples{ 59 | } 60 | % Add one or more standard keywords, see file 'KEYWORDS' in the 61 | % R documentation directory. 62 | \keyword{ ~kwd1 } 63 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 64 | -------------------------------------------------------------------------------- /package/man/mbatchIncludeExcludeData.Rd: -------------------------------------------------------------------------------- 1 | \name{mbatchIncludeExcludeData} 2 | \alias{mbatchIncludeExcludeData} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{mbatchIncludeExcludeData} 5 | \description{Makes sure that the appropriate list(s) of included/excluded 6 | samples/genes is enforced and returned the BEA Data object.} 7 | \usage{ 8 | mbatchIncludeExcludeData(theBeaData, theIncludeSamples = NULL, 9 | theIncludeGenes = NULL, theExcludeSamples = NULL, theExcludeGenes = NULL) 10 | } 11 | %- maybe also 'usage' for other objects documented here. 12 | \arguments{ 13 | \item{theBeaData}{An instance of BEA_DATA \code{\link{BEA_DATA-class}}} 14 | \item{theIncludeSamples}{A list of sample ids. 15 | Any sample ids not in this list are removed from the data.} 16 | \item{theIncludeGenes}{A list of gene ids. 17 | Any gene ids not in this list are removed from the data.} 18 | \item{theExcludeSamples}{A list of sample ids. 19 | Any sample ids in this list are removed from the data.} 20 | \item{theExcludeGenes}{A list of gene ids. 21 | Any gene ids in this list are removed from the data.} 22 | } 23 | \details{ 24 | %% ~~ If necessary, more details than the description above ~~ 25 | } 26 | \value{ 27 | Returns an instance of BEA_DATA \code{\link{BEA_DATA-class}} 28 | with the updated data and batch information. 29 | } 30 | \references{ 31 | %% ~put references to the literature/web site here ~ 32 | } 33 | \author{ 34 | %% ~~who you are~~ 35 | } 36 | \note{ 37 | %% ~~further notes~~ 38 | } 39 | 40 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 41 | 42 | \seealso{ 43 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 44 | } 45 | \examples{ 46 | 47 | } 48 | % Add one or more standard keywords, see file 'KEYWORDS' in the 49 | % R documentation directory. 50 | \keyword{ ~kwd1 } 51 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 52 | -------------------------------------------------------------------------------- /package/man/mbatchLoadFiles.Rd: -------------------------------------------------------------------------------- 1 | \name{mbatchLoadFiles} 2 | \alias{mbatchLoadFiles} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{mbatchLoadFiles} 5 | \description{ 6 | Load the given files into a BEA_DATA object, sorting and checking the data for consistency. 7 | } 8 | \usage{ 9 | mbatchLoadFiles(theGeneDataFile, theBatchFile, theCovariateFile=NULL, theNaStrings="NA") 10 | } 11 | %- maybe also 'usage' for other objects documented here. 12 | \arguments{ 13 | \item{theGeneDataFile}{Object of class \code{"character"} This is a string containing the full path to the gene data file which is tab delimited (without quotes) with sample ids across the top and gene-equivalents on the left. The sample ids start with a leading tab. } 14 | \item{theBatchFile}{Object of class \code{"character"} This is a string containing the full path to the batch data file which is tab delimited (without quotes). The first row gives the batch types. The first batch "type" is "Sample". } 15 | \item{theCovariateFile}{Object of class \code{"character"} This is a string containing the full path to the covariate data file which is tab delimited (without quotes). The first row gives the covariate types. The first covariate "type" is "Sample". } 16 | \item{theNaStrings}{This is the na.strings argument to read.csv. This is used because some data contains "NA" values that are code strings and not NA values. } 17 | } 18 | \details{ 19 | } 20 | \value{ 21 | An instance of BEA_DATA \code{\link{BEA_DATA-class}} 22 | } 23 | \references{ 24 | %% ~put references to the literature/web site here ~ 25 | } 26 | \author{ 27 | Tod Casasent \email{tdcasasent@mdanderson.org} 28 | } 29 | \note{ 30 | %% ~~further notes~~ 31 | } 32 | 33 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 34 | 35 | \seealso{ 36 | } 37 | \examples{ 38 | } 39 | % Add one or more standard keywords, see file 'KEYWORDS' in the 40 | % R documentation directory. 41 | \keyword{ ~kwd1 } 42 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 43 | -------------------------------------------------------------------------------- /package/man/mbatchLoadStructures.Rd: -------------------------------------------------------------------------------- 1 | \name{mbatchLoadStructures} 2 | \alias{mbatchLoadStructures} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{mbatchLoadStructures} 5 | \description{ 6 | Take the given structures and build a BEA_DATA object to contain them, after doing sorting and making sure data is consistent. 7 | } 8 | \usage{ 9 | mbatchLoadStructures(theGeneMatrix, theBatchDataframe, theCovariatedataframe=NULL) 10 | } 11 | %- maybe also 'usage' for other objects documented here. 12 | \arguments{ 13 | \item{theGeneMatrix}{Object of class \code{"matrix"} A matrix where the colnames are sample ids and the rownames are gene equivalents. All names should be strings, not factors. } 14 | \item{theBatchDataframe}{Object of class \code{"data.frame"} A data.frame where the column "names" are batch types. The first batch "type" is "Sample". All names and values should be strings, not factors or numeric. } 15 | \item{theCovariatedataframe}{Object of class \code{"data.frame"} NULL or a data.frame where the column "names" are covariate types. The first covariate "type" is "Sample". All names and values should be strings, not factors or numeric. } 16 | } 17 | \details{ 18 | } 19 | \value{ 20 | An instance of BEA_DATA \code{\link{BEA_DATA-class}} 21 | } 22 | \references{ 23 | %% ~put references to the literature/web site here ~ 24 | } 25 | \author{ 26 | Tod Casasent \email{tdcasasent@mdanderson.org} 27 | } 28 | \note{ 29 | %% ~~further notes~~ 30 | } 31 | 32 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 33 | 34 | \seealso{ 35 | } 36 | \examples{ 37 | } 38 | % Add one or more standard keywords, see file 'KEYWORDS' in the 39 | % R documentation directory. 40 | \keyword{ ~kwd1 } 41 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 42 | -------------------------------------------------------------------------------- /package/man/mbatchTrimData.Rd: -------------------------------------------------------------------------------- 1 | \name{mbatchTrimData} 2 | \alias{mbatchTrimData} 3 | \title{mbatchTrimData} 4 | \description{ 5 | Based on IQR, shorten the number of columns (genes) to keep the data below the given size. 6 | } 7 | \usage{ 8 | mbatchTrimData(theMatrix, theMaxSize = 1.5e+07) 9 | } 10 | %- maybe also 'usage' for other objects documented here. 11 | \arguments{ 12 | \item{theMatrix}{A matrix with row names being sample identifier strings, column names being gene strings, and the data being numeric.} 13 | \item{theMaxSize}{An integer giving the maximum number of cells to be in the matrix.} 14 | } 15 | \details{ 16 | This function can be used to reduce the size of a data set to ensure that it will fit within memory. 17 | } 18 | \value{ 19 | A matrix such that the row names (samples) are unchanged, while the columns (genes) are shortened such that row count times column count is less than theMaxSize. 20 | Genes are selected for inclusion based on IQR, which higher ones selected first. This is useful for batch effects, but may not be desirable for other computations. 21 | } 22 | \references{ 23 | %% ~put references to the literature/web site here ~ 24 | } 25 | \author{ 26 | Tod Casasent \email{tdcasasent@mdanderson.org} 27 | } 28 | \note{ 29 | %% ~~further notes~~ 30 | } 31 | 32 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 33 | 34 | \seealso{ 35 | } 36 | \examples{ 37 | ##---- Should be DIRECTLY executable !! ---- 38 | ##-- ==> Define data, use random, 39 | ##-- or do help(data=index) for the standard data sets. 40 | } 41 | % Add one or more standard keywords, see file 'KEYWORDS' in the 42 | % R documentation directory. 43 | \keyword{ ~kwd1 } 44 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 45 | -------------------------------------------------------------------------------- /package/man/mbatchWriteSuccessfulLog.Rd: -------------------------------------------------------------------------------- 1 | \name{mbatchWriteSuccessfulLog} 2 | \alias{mbatchWriteSuccessfulLog} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{mbatchWriteSuccessfulLog} 5 | \description{Internal use. Writes a known string to end of a log file.} 6 | \usage{ 7 | mbatchWriteSuccessfulLog() 8 | } 9 | %- maybe also 'usage' for other objects documented here. 10 | \details{ 11 | %% ~~ If necessary, more details than the description above ~~ 12 | } 13 | \value{ 14 | %% ~Describe the value returned 15 | %% If it is a LIST, use 16 | %% \item{comp1 }{Description of 'comp1'} 17 | %% \item{comp2 }{Description of 'comp2'} 18 | %% ... 19 | } 20 | \references{ 21 | %% ~put references to the literature/web site here ~ 22 | } 23 | \author{ 24 | %% ~~who you are~~ 25 | } 26 | \note{ 27 | %% ~~further notes~~ 28 | } 29 | 30 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 31 | 32 | \seealso{ 33 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 34 | } 35 | \examples{ 36 | ##---- Should be DIRECTLY executable !! ---- 37 | ##-- ==> Define data, use random, 38 | ##-- or do help(data=index) for the standard data sets. 39 | 40 | ## The function is currently defined as 41 | function () 42 | { 43 | logInfo("mbatchAssess Finishing") 44 | } 45 | } 46 | % Add one or more standard keywords, see file 'KEYWORDS' in the 47 | % R documentation directory. 48 | \keyword{ ~kwd1 }% use one of RShowDoc("KEYWORDS") 49 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 50 | -------------------------------------------------------------------------------- /package/man/readAsDataFrame.Rd: -------------------------------------------------------------------------------- 1 | \name{readAsDataFrame} 2 | \alias{readAsDataFrame} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{readAsDataFrame} 5 | \description{ 6 | This function takes a cross-platform tab-delimited matrix file with column 7 | and optional row labels and reads it in as a data.frame. 8 | The data.frame data is handled as strings. 9 | If row labels are included, there is an initial tab on the column label row. 10 | } 11 | \usage{ 12 | readAsDataFrame(theFile, thePar = "-Xmx2000m") 13 | } 14 | %- maybe also 'usage' for other objects documented here. 15 | \arguments{ 16 | \item{theFile}{The cross-platform tab-delimited data.frame file with 17 | columns and optional row labels to be read. If row labels are included, 18 | there is an initial tab on the column label row.} 19 | \item{thePar}{Parameters to pass to the Java VM -- 20 | by default this requests 8GB of memory.} 21 | } 22 | \details{ 23 | %% ~~ If necessary, more details than the description above ~~ 24 | } 25 | \value{ 26 | Returns the data.frame read from file. 27 | } 28 | \references{ 29 | %% ~put references to the literature/web site here ~ 30 | } 31 | \author{ 32 | %% ~~who you are~~ 33 | } 34 | \note{ 35 | %% ~~further notes~~ 36 | } 37 | 38 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 39 | 40 | \seealso{ 41 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 42 | } 43 | \examples{ 44 | ##---- Should be DIRECTLY executable !! ---- 45 | ##-- ==> Define data, use random, 46 | ##-- or do help(data=index) for the standard data sets. 47 | 48 | ## The function is currently defined as 49 | #function (theFile, thePar = "-Xmx2000m") 50 | #{ 51 | # myClass1 <- system.file("ReadRJava", "ReadRJava.jar", package = "MBatch") 52 | # myJavaJars <- file.path(myClass1, fsep = .Platform$path.sep) 53 | # logDebug("readAsDataFrame - thePar ", thePar) 54 | # logDebug("readAsDataFrame - theFile ", theFile) 55 | # logDebug("readAsDataFrame - Calling .jinit ", myJavaJars) 56 | # .jinit(classpath = myJavaJars, force.init = TRUE, parameters = thePar) 57 | # logDebug("readAsDataFrame - .jinit complete") 58 | # logDebug("readAsDataFrame before java") 59 | # objJavaFile <- .jcall("org/mda/readrjava/ReadRJava", 60 | # returnSig = "Lorg/mda/readrjava/JavaFile;", 61 | # method = "loadStringData", .jnew("java/lang/String", 62 | # theFile), TRUE, FALSE, TRUE) 63 | # logDebug("readAsDataFrame after java") 64 | # myData <- .jcall(objJavaFile, "[S", "getmStringData") 65 | # myCols <- .jcall(objJavaFile, "[S", "getmColumns") 66 | # logDebug("readAsDataFrame - length(myData) ", length(myData)) 67 | # logDebug("readAsDataFrame - length(myCols) ", length(myCols)) 68 | # return(data.frame(matrixWithIssues(myData, ncol = length(myCols), 69 | # byrow = TRUE, dimnames = list(NULL, myCols)))) 70 | # } 71 | } 72 | % Add one or more standard keywords, see file 'KEYWORDS' in the 73 | % R documentation directory. 74 | \keyword{ ~kwd1 } 75 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 76 | -------------------------------------------------------------------------------- /package/man/readAsGenericMatrix.Rd: -------------------------------------------------------------------------------- 1 | \name{readAsGenericMatrix} 2 | \alias{readAsGenericMatrix} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{readAsGenericMatrix} 5 | \description{ 6 | Given a tab-delimited file with column and row headers, read it into a matrix. 7 | } 8 | \usage{ 9 | readAsGenericMatrix(theFile) 10 | } 11 | %- maybe also 'usage' for other objects documented here. 12 | \arguments{ 13 | \item{theFile}{full path to file to load into matrix} 14 | } 15 | \details{ 16 | Reads a tab-delimited file using R in an efficient manner, into a matrix. First line should start with a tab. 17 | } 18 | \value{ 19 | a matrix based on the file read in 20 | } 21 | \references{} 22 | \author{} 23 | \note{} 24 | \seealso{} 25 | \examples{ 26 | } 27 | % Add one or more standard keywords, see file 'KEYWORDS' in the 28 | % R documentation directory. 29 | \keyword{ ~kwd1 } 30 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 31 | -------------------------------------------------------------------------------- /package/man/readAsMatrix.Rd: -------------------------------------------------------------------------------- 1 | \name{readAsMatrix} 2 | \alias{readAsMatrix} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{readAsMatrix} 5 | \description{ 6 | This function takes a cross-platform tab-delimited matrix file with 7 | column and optional row labels and reads it in as a matrix. 8 | The data is handled as double (numeric) values. 9 | If row labels are included, there is an initial tab on the column label row. 10 | } 11 | \usage{ 12 | readAsMatrix(theFile, thePar = "-Xmx2000m") 13 | } 14 | %- maybe also 'usage' for other objects documented here. 15 | \arguments{ 16 | \item{theFile}{The cross-platform tab-delimited matrix file with columns 17 | and optional row labels to be read. If row labels are included, there 18 | is an initial tab on the column label row.} 19 | \item{thePar}{Parameters to pass to the Java VM -- 20 | by default this requests 8GB of memory.} 21 | } 22 | \details{ 23 | %% ~~ If necessary, more details than the description above ~~ 24 | } 25 | \value{ 26 | The matrix read in. 27 | } 28 | \references{ 29 | %% ~put references to the literature/web site here ~ 30 | } 31 | \author{ 32 | %% ~~who you are~~ 33 | } 34 | \note{ 35 | %% ~~further notes~~ 36 | } 37 | 38 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 39 | 40 | \seealso{ 41 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 42 | } 43 | \examples{ 44 | ##---- Should be DIRECTLY executable !! ---- 45 | ##-- ==> Define data, use random, 46 | ##-- or do help(data=index) for the standard data sets. 47 | 48 | ## The function is currently defined as 49 | #function (theFile, thePar = "-Xmx2000m") 50 | #{ 51 | # myClass1 <- system.file("ReadRJava", "ReadRJava.jar", package = "MBatch") 52 | # myJavaJars <- file.path(myClass1, fsep = .Platform$path.sep) 53 | # logDebug("readAsMatrix - thePar ", thePar) 54 | # logDebug("readAsMatrix - theFile ", theFile) 55 | # logDebug("readAsMatrix - Calling .jinit ", myJavaJars) 56 | # .jinit(classpath = myJavaJars, force.init = TRUE, parameters = thePar) 57 | # logDebug("readAsMatrix - .jinit complete") 58 | # logDebug("readAsMatrix before java") 59 | # objJavaFile <- .jcall("org/mda/readrjava/ReadRJava", 60 | # returnSig = "Lorg/mda/readrjava/JavaFile;", 61 | # method = "loadDoubleData", .jnew("java/lang/String", 62 | # theFile), TRUE, TRUE, TRUE) 63 | # logDebug("readAsMatrix after java") 64 | # myData <- .jcall(objJavaFile, "[D", "getmDoubleData") 65 | # myCols <- .jcall(objJavaFile, "[S", "getmColumns") 66 | # myRows <- .jcall(objJavaFile, "[S", "getmRows") 67 | # myCols <- myCols[2:length(myCols)] 68 | # logDebug("readAsMatrix - length(myData) ", length(myData)) 69 | # logDebug("readAsMatrix - length(myCols) ", length(myCols)) 70 | # logDebug("readAsMatrix - length(myRows) ", length(myRows)) 71 | # return(matrixWithIssues(myData, nrow = length(myRows), 72 | # ncol = length(myCols), 73 | # byrow = TRUE, dimnames = list(myRows, myCols))) 74 | # } 75 | } 76 | % Add one or more standard keywords, see file 'KEYWORDS' in the 77 | % R documentation directory. 78 | \keyword{ ~kwd1 } 79 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 80 | -------------------------------------------------------------------------------- /package/man/setLogging.Rd: -------------------------------------------------------------------------------- 1 | \name{setLogging} 2 | \alias{setLogging} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{ 5 | setLogging 6 | } 7 | \description{ 8 | Used to set up a shared logging object to be used by logging functions and 9 | other routines in the package. See the Logging clas for details. 10 | } 11 | \usage{ 12 | setLogging(theLogger) 13 | } 14 | %- maybe also 'usage' for other objects documented here. 15 | \arguments{ 16 | \item{theLogger}{The logger object to share.} 17 | } 18 | \details{ 19 | %% ~~ If necessary, more details than the description above ~~ 20 | } 21 | \value{ 22 | %% ~Describe the value returned 23 | %% If it is a LIST, use 24 | %% \item{comp1 }{Description of 'comp1'} 25 | %% \item{comp2 }{Description of 'comp2'} 26 | %% ... 27 | } 28 | \references{ 29 | %% ~put references to the literature/web site here ~ 30 | } 31 | \author{ 32 | Tod Casasent \email{tdcasasent@mdanderson.org} 33 | Nianxiang Zhang \email{nzhang@mdanderson.org} 34 | } 35 | \note{ 36 | %% ~~further notes~~ 37 | } 38 | 39 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 40 | 41 | \seealso{ 42 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 43 | See Also as \code{\link{Logging-class}} 44 | } 45 | \examples{ 46 | \dontrun{ 47 | # ## see examples in doRunBEA_Files and doRunBEA_Structures 48 | # setLogging(new("Logging", theFile=theLogFile, 49 | #theLevelNamesToLog=c('TIMING', 'ERROR', 'WARN', 'INFO'))) 50 | # 51 | # ###setLogging(new("Logging", theFile=theLogFile)) 52 | # 53 | }} 54 | % Add one or more standard keywords, see file 'KEYWORDS' in the 55 | % R documentation directory. 56 | \keyword{ ~kwd1 } 57 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 58 | -------------------------------------------------------------------------------- /package/man/writeAsDataframe.Rd: -------------------------------------------------------------------------------- 1 | \name{writeAsDataframe} 2 | \alias{writeAsDataframe} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{writeAsDataframe} 5 | \description{ 6 | This function takes a data.frame and writes it out as a cross-platform 7 | tab-delimited matrix file with column and row labels. 8 | The data.frame data is handled as strings. 9 | If row labels are included, there is an initial tab on the column label row. 10 | } 11 | \usage{ 12 | writeAsDataframe(theFile, theDataframe, thePar = "-Xmx2000m", 13 | theIncludeRowNamesFlag = FALSE) 14 | } 15 | %- maybe also 'usage' for other objects documented here. 16 | \arguments{ 17 | \item{theFile}{Full path to the output file.} 18 | \item{theDataframe}{The data.frame to write out--requires column names.} 19 | \item{thePar}{Parameters to pass to the Java VM -- 20 | by default this requests 8GB of memory.} 21 | \item{theIncludeRowNamesFlag}{By default this is false, 22 | which skips the row names.} 23 | } 24 | \details{ 25 | %% ~~ If necessary, more details than the description above ~~ 26 | } 27 | \value{ 28 | Returns TRUE if the save succeeded. 29 | } 30 | \references{ 31 | %% ~put references to the literature/web site here ~ 32 | } 33 | \author{ 34 | %% ~~who you are~~ 35 | } 36 | \note{ 37 | %% ~~further notes~~ 38 | } 39 | 40 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 41 | 42 | \seealso{ 43 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 44 | } 45 | \examples{ 46 | ##---- Should be DIRECTLY executable !! ---- 47 | ##-- ==> Define data, use random, 48 | ##-- or do help(data=index) for the standard data sets. 49 | 50 | ## The function is currently defined as 51 | #function (theFile, theDataframe, thePar = "-Xmx2000m", 52 | #theIncludeRowNamesFlag = FALSE) 53 | #{ 54 | # myClass1 <- system.file("ReadRJava", "ReadRJava.jar", package = "MBatch") 55 | # myJavaJars <- file.path(myClass1, fsep = .Platform$path.sep) 56 | # logDebug("writeAsDataframe - thePar ", thePar) 57 | # logDebug("writeAsDataframe - theFile ", theFile) 58 | # myCols <- as.vector(colnames(theDataframe)) 59 | # myRows <- NULL 60 | # myData <- as.vector(t(theDataframe)) 61 | # if (TRUE == theIncludeRowNamesFlag) { 62 | # myRows <- as.vector(rownames(theDataframe)) 63 | # } 64 | # logDebug("writeAsDataframe - length(myData) ", length(myData)) 65 | # logDebug("writeAsDataframe - length(myCols) ", length(myCols)) 66 | # logDebug("writeAsDataframe - length(myRows) ", length(myRows)) 67 | # logDebug("writeAsDataframe - Calling .jinit ", myJavaJars) 68 | # .jinit(classpath = myJavaJars, force.init = TRUE, parameters = thePar) 69 | # logDebug("writeAsDataframe - .jinit complete") 70 | # logDebug("writeAsDataframe before java") 71 | # if (is.null(myRows)) { 72 | # success <- .jcall("org/mda/readrjava/ReadRJava", returnSig = "Z", 73 | # method = "writeStringData_Column", .jnew("java/lang/String", 74 | # theFile), .jcastToArray(myCols), .jcastToArray(myData)) 75 | # } 76 | # else { 77 | # success <- .jcall("org/mda/readrjava/ReadRJava", returnSig = "Z", 78 | # method = "writeStringData_All", .jnew("java/lang/String", 79 | # theFile), .jcastToArray(myCols), .jcastToArray(myRows), 80 | # .jcastToArray(myData)) 81 | # } 82 | # logDebug("writeAsDataframe after java") 83 | # logDebug("writeAsDataframe success=", success) 84 | # return(success) 85 | # } 86 | } 87 | % Add one or more standard keywords, see file 'KEYWORDS' in the 88 | % R documentation directory. 89 | \keyword{ ~kwd1 } 90 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 91 | -------------------------------------------------------------------------------- /package/man/writeAsMatrix.Rd: -------------------------------------------------------------------------------- 1 | \name{writeAsMatrix} 2 | \alias{writeAsMatrix} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{writeAsMatrix} 5 | \description{ 6 | This function takes a matrix and writes it out as a cross-platform tab-delimited matrix file with column and row labels. 7 | The matrix data should be doubles. 8 | If row labels are included, there is an initial tab on the column label row. 9 | } 10 | \usage{ 11 | writeAsMatrix(theFile, theMatrix, thePar = "-Xmx2000m") 12 | } 13 | %- maybe also 'usage' for other objects documented here. 14 | \arguments{ 15 | \item{theFile}{Full path to the output file.} 16 | \item{theMatrix}{The matrix to write out--requires column names and double (numeric) data.} 17 | \item{thePar}{Parameters to pass to the Java VM -- by default this requests 8GB of memory.} 18 | } 19 | \details{ 20 | %% ~~ If necessary, more details than the description above ~~ 21 | } 22 | \value{ 23 | Returns TRUE if the save succeeded. 24 | } 25 | \references{ 26 | %% ~put references to the literature/web site here ~ 27 | } 28 | \author{ 29 | %% ~~who you are~~ 30 | } 31 | \note{ 32 | %% ~~further notes~~ 33 | } 34 | 35 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 36 | 37 | \seealso{ 38 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 39 | } 40 | \examples{ 41 | ##---- Should be DIRECTLY executable !! ---- 42 | ##-- ==> Define data, use random, 43 | ##-- or do help(data=index) for the standard data sets. 44 | 45 | ## The function is currently defined as 46 | #function (theFile, theMatrix, thePar = "-Xmx2000m") 47 | #{ 48 | # myClass1 <- system.file("ReadRJava", "ReadRJava.jar", package = "MBatch") 49 | # myJavaJars <- file.path(myClass1, fsep = .Platform$path.sep) 50 | # logDebug("writeAsMatrix - thePar ", thePar) 51 | # logDebug("writeAsMatrix - theFile ", theFile) 52 | # myCols <- as.vector(colnames(theMatrix)) 53 | # myRows <- rownames(theMatrix) 54 | # myData <- as.numeric(as.vector(t(theMatrix))) 55 | # if (!is.null(myRows)) { 56 | # myRows <- as.vector(myRows) 57 | # } 58 | # logDebug("writeAsMatrix - length(myData) ", length(myData)) 59 | # logDebug("writeAsMatrix - length(myCols) ", length(myCols)) 60 | # logDebug("writeAsMatrix - length(myRows) ", length(myRows)) 61 | # logDebug("writeAsMatrix - Calling .jinit ", myJavaJars) 62 | # .jinit(classpath = myJavaJars, force.init = TRUE, parameters = thePar) 63 | # logDebug("writeAsMatrix - .jinit complete") 64 | # logDebug("writeAsMatrix before java") 65 | # if (is.null(myRows)) { 66 | # success <- .jcall("org/mda/readrjava/ReadRJava", returnSig = "Z", 67 | # method = "writeDoubleData_Column", .jnew("java/lang/String", 68 | # theFile), .jarray(myCols), .jarray(myData)) 69 | # } 70 | # else { 71 | # success <- .jcall("org/mda/readrjava/ReadRJava", returnSig = "Z", 72 | # method = "writeDoubleData_All", .jnew("java/lang/String", 73 | # theFile), .jarray(myCols), .jarray(myRows), .jarray(myData)) 74 | # } 75 | # logDebug("writeAsMatrix after java") 76 | # logDebug("writeAsMatrix success=", success) 77 | # return(success) 78 | # } 79 | } 80 | % Add one or more standard keywords, see file 'KEYWORDS' in the 81 | % R documentation directory. 82 | \keyword{ ~kwd1 } 83 | \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 84 | -------------------------------------------------------------------------------- /package/tests/AN_Adjusted.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile=file.path(inputDir, "matrix_data-Tumor.tsv") 16 | theBatchFile=file.path(inputDir, "batches-Tumor.tsv") 17 | theOutputDir=file.path(outputDir, "AN_Adjusted") 18 | theCompareFile=file.path(compareDir, "AN_Adjusted.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | theBatchType="TSS" 23 | 24 | 25 | if (!is.null(inputDir)) 26 | { 27 | warnLevel<-getOption("warn") 28 | on.exit(options(warn=warnLevel)) 29 | # warnings are errors 30 | options(warn=3) 31 | # if there is a warning, show the calls leading up to it 32 | options(showWarnCalls=TRUE) 33 | # if there is an error, show the calls leading up to it 34 | options(showErrorCalls=TRUE) 35 | # 36 | unlink(theOutputDir, recursive=TRUE) 37 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 38 | # load data 39 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 40 | myData@mData <- mbatchTrimData(myData@mData, 100000) 41 | # here, we take all the defaults to hierarchical clustering, passing a title and an output path 42 | AN_Adjusted(theBeaData=myData, 43 | theBatchType=theBatchType, 44 | thePath=theOutputDir, 45 | theWriteToFile=TRUE) 46 | correctedMatrix <- readAsGenericMatrix(file.path(theOutputDir, "ANY_Corrections-ANAdjusted.tsv")) 47 | compareMatrix <- readAsGenericMatrix(theCompareFile) 48 | compared <- compareTwoMatrices(correctedMatrix, compareMatrix) 49 | print(compared) 50 | compared 51 | } else { 52 | message("No test data. Skip test.") 53 | TRUE 54 | } 55 | -------------------------------------------------------------------------------- /package/tests/AN_Unadjusted.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile=file.path(inputDir, "matrix_data-Tumor.tsv") 16 | theBatchFile=file.path(inputDir, "batches-Tumor.tsv") 17 | theOutputDir=file.path(outputDir, "AN_Unadjusted") 18 | theCompareFile=file.path(compareDir, "AN_Unadjusted.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | theBatchType="TSS" 23 | 24 | 25 | if (!is.null(inputDir)) 26 | { 27 | warnLevel<-getOption("warn") 28 | on.exit(options(warn=warnLevel)) 29 | # warnings are errors 30 | options(warn=3) 31 | # if there is a warning, show the calls leading up to it 32 | options(showWarnCalls=TRUE) 33 | # if there is an error, show the calls leading up to it 34 | options(showErrorCalls=TRUE) 35 | # 36 | unlink(theOutputDir, recursive=TRUE) 37 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 38 | # load data 39 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 40 | myData@mData <- mbatchTrimData(myData@mData, 100000) 41 | # here, we take all the defaults to hierarchical clustering, passing a title and an output path 42 | AN_Unadjusted(theBeaData=myData, 43 | theBatchType=theBatchType, 44 | thePath=theOutputDir, 45 | theWriteToFile=TRUE) 46 | correctedMatrix <- readAsGenericMatrix(file.path(theOutputDir, "ANY_Corrections-ANUnadjusted.tsv")) 47 | compareMatrix <- readAsGenericMatrix(theCompareFile) 48 | compared <- compareTwoMatrices(correctedMatrix, compareMatrix) 49 | print(compared) 50 | compared 51 | } else { 52 | message("No test data. Skip test.") 53 | TRUE 54 | } 55 | -------------------------------------------------------------------------------- /package/tests/Boxplot_AllSamplesData_Structures.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile=file.path(inputDir, "matrix_data-Tumor.tsv") 16 | theBatchFile=file.path(inputDir, "batches-Tumor.tsv") 17 | theOutputDir=file.path(outputDir, "Boxplot_AllSamplesData_Structures") 18 | theCompareFile=file.path(compareDir, "Boxplot_AllSamplesData_Structures.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | #theBatchType="TSS" 23 | 24 | if (!is.null(inputDir)) 25 | { 26 | warnLevel<-getOption("warn") 27 | on.exit(options(warn=warnLevel)) 28 | # warnings are errors 29 | options(warn=3) 30 | # if there is a warning, show the calls leading up to it 31 | options(showWarnCalls=TRUE) 32 | # if there is an error, show the calls leading up to it 33 | options(showErrorCalls=TRUE) 34 | # 35 | unlink(theOutputDir, recursive=TRUE, force=TRUE) 36 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 37 | # load data 38 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 39 | myData@mData <- mbatchTrimData(myData@mData, 100000) 40 | # here, we take most defaults 41 | Boxplot_AllSamplesData_Structures(theData=myData, 42 | theTitle="Test", 43 | theOutputPath=theOutputDir, 44 | theBatchTypeAndValuePairsToRemove=NULL, 45 | theBatchTypeAndValuePairsToKeep=NULL, 46 | theJavaParameters="-Xms8000m", 47 | theMaxGeneCount=10000) 48 | correctedMatrix <- readAsGenericMatrix(file.path(theOutputDir, "AllSample-Data", "BoxPlot_AllSample-Data_BoxData-BatchId.tsv")) 49 | compareMatrix <- readAsGenericMatrix(theCompareFile) 50 | compared <- compareTwoMatrices(correctedMatrix, compareMatrix) 51 | print(compared) 52 | compared 53 | } else { 54 | message("No test data. Skip test.") 55 | TRUE 56 | } 57 | -------------------------------------------------------------------------------- /package/tests/Boxplot_AllSamplesRLE_Structures.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile=file.path(inputDir, "matrix_data-Tumor.tsv") 16 | theBatchFile=file.path(inputDir, "batches-Tumor.tsv") 17 | theOutputDir=file.path(outputDir, "Boxplot_AllSamplesRLE_Structures") 18 | theCompareFile=file.path(compareDir, "Boxplot_AllSamplesRLE_Structures.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | #theBatchType="TSS" 23 | 24 | if (!is.null(inputDir)) 25 | { 26 | warnLevel<-getOption("warn") 27 | on.exit(options(warn=warnLevel)) 28 | # warnings are errors 29 | options(warn=3) 30 | # if there is a warning, show the calls leading up to it 31 | options(showWarnCalls=TRUE) 32 | # if there is an error, show the calls leading up to it 33 | options(showErrorCalls=TRUE) 34 | # 35 | unlink(theOutputDir, recursive=TRUE, force=TRUE) 36 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 37 | # load data 38 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 39 | myData@mData <- mbatchTrimData(myData@mData, 100000) 40 | # here, we take most defaults 41 | Boxplot_AllSamplesRLE_Structures(theData=myData, 42 | theTitle="Test", 43 | theOutputPath=theOutputDir, 44 | theBatchTypeAndValuePairsToRemove=NULL, 45 | theBatchTypeAndValuePairsToKeep=NULL, 46 | theJavaParameters="-Xms8000m", 47 | theMaxGeneCount=10000) 48 | correctedMatrix <- readAsGenericMatrix(file.path(theOutputDir, "AllSample-RLE", "BoxPlot_AllSample-RLE_BoxData-BatchId.tsv")) 49 | compareMatrix <- readAsGenericMatrix(theCompareFile) 50 | compared <- compareTwoMatrices(correctedMatrix, compareMatrix) 51 | print(compared) 52 | compared 53 | } else { 54 | message("No test data. Skip test.") 55 | TRUE 56 | } 57 | -------------------------------------------------------------------------------- /package/tests/Boxplot_Group_Structures.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile=file.path(inputDir, "matrix_data-Tumor.tsv") 16 | theBatchFile=file.path(inputDir, "batches-Tumor.tsv") 17 | theOutputDir=file.path(outputDir, "Boxplot_Group_Structures") 18 | theCompareFile=file.path(compareDir, "Boxplot_Group_Structures.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | #theBatchType="TSS" 23 | 24 | if (!is.null(inputDir)) 25 | { 26 | warnLevel<-getOption("warn") 27 | on.exit(options(warn=warnLevel)) 28 | # warnings are errors 29 | options(warn=3) 30 | # if there is a warning, show the calls leading up to it 31 | options(showWarnCalls=TRUE) 32 | # if there is an error, show the calls leading up to it 33 | options(showErrorCalls=TRUE) 34 | # 35 | unlink(theOutputDir, recursive=TRUE, force=TRUE) 36 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 37 | # load data 38 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 39 | myData@mData <- mbatchTrimData(myData@mData, 100000) 40 | # here, we take most defaults 41 | Boxplot_Group_Structures(theData=myData, 42 | theTitle="Test", 43 | theOutputPath=theOutputDir, 44 | theBatchTypeAndValuePairsToRemove=NULL, 45 | theBatchTypeAndValuePairsToKeep=NULL, 46 | theListOfGroupBoxFunction=c(mean), 47 | theListOfGroupBoxLabels=c("Mean"), 48 | theJavaParameters="-Xms8000m", 49 | theMaxGeneCount=10000) 50 | correctedMatrix <- readAsDataFrame(file.path(theOutputDir, "Group-MEAN", "BoxPlot_Group-MEAN_BoxData-BatchId.tsv")) 51 | compareMatrix <- readAsDataFrame(theCompareFile) 52 | compared <- compareTwoMatrices(correctedMatrix, compareMatrix) 53 | print(compared) 54 | compared 55 | } else { 56 | message("No test data. Skip test.") 57 | TRUE 58 | } 59 | -------------------------------------------------------------------------------- /package/tests/CDP_Files.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile1=file.path(inputDir, "CDP_allrep_data1.tsv") 16 | theGeneFile2=file.path(inputDir, "CDP_allrep_data2.tsv") 17 | theOutputDir=file.path(outputDir, "CDP_Files") 18 | theRandomSeed=314 19 | 20 | if (!is.null(inputDir)) 21 | { 22 | warnLevel<-getOption("warn") 23 | on.exit(options(warn=warnLevel)) 24 | # warnings are errors 25 | options(warn=3) 26 | # if there is a warning, show the calls leading up to it 27 | options(showWarnCalls=TRUE) 28 | # if there is an error, show the calls leading up to it 29 | options(showErrorCalls=TRUE) 30 | # 31 | unlink(theOutputDir, recursive=TRUE, force=TRUE) 32 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 33 | # load data 34 | #theData1 <- readAsGenericMatrix(theGeneFile1) 35 | #theData2 <- readAsGenericMatrix(theGeneFile2) 36 | ############################################################################## 37 | theUseReplicatesUnpaired <- FALSE 38 | theUnmatchedCount <- 1000 39 | CDP_Files(file.path(theOutputDir, "CDP_Plot.png"), theGeneFile1, theGeneFile2, 40 | theSubTitle="all replicates", theMethod="pearson", theUse="pairwise.complete.obs", theSeed=theRandomSeed, 41 | theLinePlot=TRUE, theHistPlot=TRUE, theBinWidth=NULL) 42 | message("No error means test was OK.") 43 | TRUE 44 | } else { 45 | message("No test data. Skip test.") 46 | TRUE 47 | } 48 | -------------------------------------------------------------------------------- /package/tests/CDP_Plot.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile1=file.path(inputDir, "CDP_norep_data1.tsv") 16 | theGeneFile2=file.path(inputDir, "CDP_norep_data2.tsv") 17 | theOutputDir=file.path(outputDir, "CDP_Plot") 18 | theCompareFile=file.path(compareDir, "CDP_Plot.tsv") 19 | theRandomSeed=314 20 | 21 | if (!is.null(inputDir)) 22 | { 23 | warnLevel<-getOption("warn") 24 | on.exit(options(warn=warnLevel)) 25 | # warnings are errors 26 | options(warn=3) 27 | # if there is a warning, show the calls leading up to it 28 | options(showWarnCalls=TRUE) 29 | # if there is an error, show the calls leading up to it 30 | options(showErrorCalls=TRUE) 31 | # 32 | unlink(theOutputDir, recursive=TRUE, force=TRUE) 33 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 34 | # load data 35 | theData1 <- readAsGenericMatrix(theGeneFile1) 36 | theData2 <- readAsGenericMatrix(theGeneFile2) 37 | ############################################################################## 38 | theUseReplicatesUnpaired <- FALSE 39 | theUnmatchedCount <- 1000 40 | # get list of natural replicates 41 | pairedSamples <- colnames(theData1)[colnames(theData1) %in% colnames(theData2)] 42 | # get list of unmatched replicates 43 | unpairedSamples1 <- NULL 44 | unpairedSamples2 <- NULL 45 | if (TRUE==theUseReplicatesUnpaired) 46 | { 47 | unpairedSamples1 <- sample(colnames(theData1), theUnmatchedCount, replace=TRUE) 48 | unpairedSamples2 <- sample(colnames(theData2), theUnmatchedCount, replace=TRUE) 49 | } 50 | else 51 | { 52 | data1samples <- colnames(theData1)[!colnames(theData1) %in% pairedSamples] 53 | data2samples <- colnames(theData2)[!colnames(theData2) %in% pairedSamples] 54 | if (is.null(pairedSamples)) 55 | { 56 | data1samples <- colnames(theData1) 57 | data2samples <- colnames(theData2) 58 | } 59 | if ((0==length(data1samples))||(0==length(data2samples))) 60 | { 61 | unpairedSamples1 <- c() 62 | unpairedSamples2 <- c() 63 | } 64 | else 65 | { 66 | unpairedSamples1 <- sample(data1samples, theUnmatchedCount, replace=TRUE) 67 | unpairedSamples2 <- sample(data2samples, theUnmatchedCount, replace=TRUE) 68 | } 69 | } 70 | CDP_Plot(file.path(theOutputDir, "CDP_Plot.png"), theData1, theData2, pairedSamples, pairedSamples, unpairedSamples1, unpairedSamples2, 71 | theSubTitle="no replicates", theMethod="pearson", theUse="pairwise.complete.obs", theSeed=theRandomSeed, 72 | theLinePlot=TRUE, theHistPlot=TRUE, theBinWidth=NULL) 73 | message("No error means test was OK.") 74 | TRUE 75 | } else { 76 | message("No test data. Skip test.") 77 | TRUE 78 | } 79 | -------------------------------------------------------------------------------- /package/tests/CDP_Structures.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile1=file.path(inputDir, "CDP_reuserep_data1.tsv") 16 | theGeneFile2=file.path(inputDir, "CDP_reuserep_data2.tsv") 17 | theOutputDir=file.path(outputDir, "CDP_Structures") 18 | theRandomSeed=314 19 | 20 | if (!is.null(inputDir)) 21 | { 22 | warnLevel<-getOption("warn") 23 | on.exit(options(warn=warnLevel)) 24 | # warnings are errors 25 | options(warn=3) 26 | # if there is a warning, show the calls leading up to it 27 | options(showWarnCalls=TRUE) 28 | # if there is an error, show the calls leading up to it 29 | options(showErrorCalls=TRUE) 30 | # 31 | unlink(theOutputDir, recursive=TRUE, force=TRUE) 32 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 33 | # load data 34 | theData1 <- readAsGenericMatrix(theGeneFile1) 35 | theData2 <- readAsGenericMatrix(theGeneFile2) 36 | ############################################################################## 37 | theUseReplicatesUnpaired <- FALSE 38 | theUnmatchedCount <- 1000 39 | CDP_Structures(file.path(theOutputDir, "CDP_Plot.png"), theData1, theData2, 40 | theSubTitle="reuse replicates", theMethod="pearson", theUse="pairwise.complete.obs", theSeed=theRandomSeed, 41 | theLinePlot=TRUE, theHistPlot=TRUE, theBinWidth=NULL, theUseReplicatesUnpaired=TRUE) 42 | message("No error means test was OK.") 43 | TRUE 44 | } else { 45 | message("No test data. Skip test.") 46 | TRUE 47 | } 48 | -------------------------------------------------------------------------------- /package/tests/EBNPlus_CombineBatches.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | 12 | 13 | inputDir <- getTestInputDir() 14 | outputDir <- getTestOutputDir() 15 | compareDir <- getTestCompareDir() 16 | 17 | theBatchFile=file.path(inputDir, "brca_rnaseq2_batches.tsv") 18 | theBatchFile2=file.path(inputDir, "brca_agi4502_batches.tsv") 19 | theOutputDir=file.path(outputDir, "ebnplus", "EBNPlus_CombineBatches") 20 | theCompareFile=file.path(compareDir, "EBNPlus_CombineBatches.tsv") 21 | theBatchId1="RNASeqV2" 22 | theBatchId2="Agilent4502" 23 | #myRandomSeed <- 314 24 | #myTestSeed <- 42 25 | 26 | if (!is.null(inputDir)) 27 | { 28 | unlink(theOutputDir, recursive=TRUE) 29 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 30 | dataBatches <- EBNPlus_CombineBatches(readAsDataFrame(theBatchFile), readAsDataFrame(theBatchFile2), theBatchId1, theBatchId2) 31 | writeAsDataframe(file.path(theOutputDir, "BatchData.tsv"), dataBatches) 32 | compareDF <- readAsDataFrame(theCompareFile) 33 | print(all(dataBatches==compareDF, na.rm=TRUE)) 34 | (all(dataBatches==compareDF, na.rm=TRUE)) 35 | } else { 36 | message("No test data. Skip test.") 37 | TRUE 38 | } 39 | -------------------------------------------------------------------------------- /package/tests/EBNPlus_Correction_Files.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | 12 | 13 | inputDir <- getTestInputDir() 14 | outputDir <- getTestOutputDir() 15 | compareDir <- getTestCompareDir() 16 | 17 | theDataFile1=file.path(inputDir, "brca_rnaseq2_matrix_data.tsv") 18 | theDataFile2=file.path(inputDir, "brca_agi4502_matrix_data.tsv") 19 | theOutputDir=file.path(outputDir, "ebnplus") 20 | theCompareFile=file.path(compareDir, "EBNPlus_Correction_Files.tsv") 21 | theBatchId1="RNASeqV2" 22 | theBatchId2="Agilent4502" 23 | theRandomSeed=314 24 | #myRandomSeed <- 314 25 | #myTestSeed <- 42 26 | 27 | if (!is.null(inputDir)) 28 | { 29 | message("EBNPlus_Correction_Files") 30 | warnLevel<-getOption("warn") 31 | on.exit(options(warn=warnLevel)) 32 | # warnings are errors 33 | options(warn=3) 34 | # if there is a warning, show the calls leading up to it 35 | options(showWarnCalls=TRUE) 36 | # if there is an error, show the calls leading up to it 37 | options(showErrorCalls=TRUE) 38 | # 39 | outdir <- file.path(theOutputDir, "EBNPlus_Correction_Files") 40 | unlink(outdir, recursive=TRUE) 41 | dir.create(outdir, showWarnings=FALSE, recursive=TRUE) 42 | #setLogging(new("Logging", theFile=file.path(outdir, "mbatch.log"))) 43 | # this is an MDA function that starts with and processes standardized data files 44 | myCorrectedFile <- EBNPlus_Correction_Files( 45 | theDataFile1=theDataFile1, 46 | theDataFile2=theDataFile2, 47 | theOutputDir=outdir, 48 | theBatchId1=theBatchId1, 49 | theBatchId2=theBatchId2, 50 | theSeed=theRandomSeed, 51 | theEBNP_PriorPlotsFlag=TRUE) 52 | message("after correction-load file") 53 | myCorrectedFile <- myCorrectedFile[[1]] 54 | myRenamedFile <- file.path(dirname(myCorrectedFile), "corrected.tsv") 55 | file.rename(myCorrectedFile, myRenamedFile) 56 | correctedMatrix <- readAsGenericMatrix(myRenamedFile) 57 | compareMatrix <- readAsGenericMatrix(theCompareFile) 58 | message("myRenamedFile=",myRenamedFile) 59 | message("theCompareFile=",theCompareFile) 60 | message("correctedMatrix") 61 | print(dim(correctedMatrix)) 62 | print(correctedMatrix[1:4,1:3]) 63 | message("compareMatrix") 64 | print(dim(compareMatrix)) 65 | print(compareMatrix[1:4,1:3]) 66 | compared <- compareTwoMatrices(correctedMatrix, compareMatrix) 67 | print(compared) 68 | compared 69 | } else { 70 | message("No test data. Skip test.") 71 | TRUE 72 | } -------------------------------------------------------------------------------- /package/tests/EB_withNonParametricPriors.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile=file.path(inputDir, "matrix_data-Tumor.tsv") 16 | theBatchFile=file.path(inputDir, "batches-Tumor.tsv") 17 | theOutputDir=file.path(outputDir, "EB_withNonParametricPriors") 18 | theCompareFile=file.path(compareDir, "EB_withNonParametricPriors.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | theBatchType="TSS" 23 | 24 | 25 | if (!is.null(inputDir)) 26 | { 27 | warnLevel<-getOption("warn") 28 | on.exit(options(warn=warnLevel)) 29 | # warnings are errors 30 | options(warn=3) 31 | # if there is a warning, show the calls leading up to it 32 | options(showWarnCalls=TRUE) 33 | # if there is an error, show the calls leading up to it 34 | options(showErrorCalls=TRUE) 35 | # 36 | unlink(theOutputDir, recursive=TRUE) 37 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 38 | # load data 39 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 40 | myData@mData <- mbatchTrimData(myData@mData, 100000) 41 | # here, we take all the defaults to hierarchical clustering, passing a title and an output path 42 | EB_withNonParametricPriors(theBeaData=myData, 43 | theBatchIdsNotToCorrect=c(""), 44 | theDoCheckPlotsFlag=TRUE, 45 | theBatchType=theBatchType, 46 | theThreads=1, 47 | thePath=theOutputDir, 48 | theWriteToFile=TRUE) 49 | correctedMatrix <- readAsGenericMatrix(file.path(theOutputDir, "ANY_Corrections-EBwithNonParametricPriors.tsv")) 50 | compareMatrix <- readAsGenericMatrix(theCompareFile) 51 | compared <- compareTwoMatrices(correctedMatrix, compareMatrix) 52 | print(compared) 53 | compared 54 | } else { 55 | message("No test data. Skip test.") 56 | TRUE 57 | } 58 | -------------------------------------------------------------------------------- /package/tests/EB_withParametricPriors.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile=file.path(inputDir, "matrix_data-Tumor.tsv") 16 | theBatchFile=file.path(inputDir, "batches-Tumor.tsv") 17 | theOutputDir=file.path(outputDir, "EB_withParametricPriors") 18 | theCompareFile=file.path(compareDir, "EB_withParametricPriors.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | theBatchType="TSS" 23 | 24 | 25 | if (!is.null(inputDir)) 26 | { 27 | warnLevel<-getOption("warn") 28 | on.exit(options(warn=warnLevel)) 29 | # warnings are errors 30 | options(warn=3) 31 | # if there is a warning, show the calls leading up to it 32 | options(showWarnCalls=TRUE) 33 | # if there is an error, show the calls leading up to it 34 | options(showErrorCalls=TRUE) 35 | # 36 | unlink(theOutputDir, recursive=TRUE) 37 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 38 | # load data 39 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 40 | myData@mData <- mbatchTrimData(myData@mData, 100000) 41 | # here, we take all the defaults to hierarchical clustering, passing a title and an output path 42 | EB_withParametricPriors(theBeaData=myData, 43 | theBatchIdsNotToCorrect=c(""), 44 | theDoCheckPlotsFlag=TRUE, 45 | theBatchType=theBatchType, 46 | theThreads=1, 47 | thePath=theOutputDir, 48 | theWriteToFile=TRUE) 49 | correctedMatrix <- readAsGenericMatrix(file.path(theOutputDir, "ANY_Corrections-EBwithParametricPriors.tsv")) 50 | compareMatrix <- readAsGenericMatrix(theCompareFile) 51 | compared <- compareTwoMatrices(correctedMatrix, compareMatrix) 52 | print(compared) 53 | compared 54 | } else { 55 | message("No test data. Skip test.") 56 | TRUE 57 | } 58 | -------------------------------------------------------------------------------- /package/tests/HierarchicalClustering_Structures.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile=file.path(inputDir, "matrix_data-Tumor.tsv") 16 | theBatchFile=file.path(inputDir, "batches-Tumor.tsv") 17 | theOutputDir=file.path(outputDir, "HierarchicalClustering_Structures") 18 | theCompareFile=file.path(compareDir, "HierarchicalClustering_Structures.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | theBatchType="TSS" 23 | 24 | if (!is.null(inputDir)) 25 | { 26 | warnLevel<-getOption("warn") 27 | on.exit(options(warn=warnLevel)) 28 | # warnings are errors 29 | options(warn=3) 30 | # if there is a warning, show the calls leading up to it 31 | options(showWarnCalls=TRUE) 32 | # if there is an error, show the calls leading up to it 33 | options(showErrorCalls=TRUE) 34 | # 35 | unlink(theOutputDir, recursive=TRUE, force=TRUE) 36 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 37 | # load data 38 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 39 | myData@mData <- mbatchTrimData(myData@mData, 100000) 40 | # here, we take most defaults 41 | HierarchicalClustering_Structures(theData=myData, 42 | theTitle="Test PCA", 43 | theOutputPath=theOutputDir) 44 | correctedMatrix <- readAsGenericMatrix(file.path(theOutputDir, "HCData.tsv")) 45 | compareMatrix <- readAsGenericMatrix(theCompareFile) 46 | compared <- compareTwoMatrices(correctedMatrix, compareMatrix) 47 | print(compared) 48 | compared 49 | } else { 50 | message("No test data. Skip test.") 51 | TRUE 52 | } 53 | -------------------------------------------------------------------------------- /package/tests/MP_ByBatch.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile=file.path(inputDir, "matrix_data-Tumor.tsv") 16 | theBatchFile=file.path(inputDir, "batches-Tumor.tsv") 17 | theOutputDir=file.path(outputDir, "MP_ByBatch") 18 | theCompareFile=file.path(compareDir, "MP_ByBatch.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | theBatchType="TSS" 23 | 24 | 25 | if (!is.null(inputDir)) 26 | { 27 | warnLevel<-getOption("warn") 28 | on.exit(options(warn=warnLevel)) 29 | # warnings are errors 30 | options(warn=3) 31 | # if there is a warning, show the calls leading up to it 32 | options(showWarnCalls=TRUE) 33 | # if there is an error, show the calls leading up to it 34 | options(showErrorCalls=TRUE) 35 | # 36 | unlink(theOutputDir, recursive=TRUE) 37 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 38 | # load data 39 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 40 | myData@mData <- mbatchTrimData(myData@mData, 100000) 41 | # here, we take all the defaults to hierarchical clustering, passing a title and an output path 42 | MP_ByBatch(theBeaData=myData, 43 | theBatchType=theBatchType, 44 | thePath=theOutputDir, 45 | theWriteToFile=TRUE) 46 | correctedMatrix <- readAsGenericMatrix(file.path(theOutputDir, "ANY_Corrections-MPByBatch.tsv")) 47 | compareMatrix <- readAsGenericMatrix(theCompareFile) 48 | compared <- compareTwoMatrices(correctedMatrix, compareMatrix) 49 | print(compared) 50 | compared 51 | } else { 52 | message("No test data. Skip test.") 53 | TRUE 54 | } 55 | -------------------------------------------------------------------------------- /package/tests/MP_Overall.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile=file.path(inputDir, "matrix_data-Tumor.tsv") 16 | theBatchFile=file.path(inputDir, "batches-Tumor.tsv") 17 | theOutputDir=file.path(outputDir, "MP_Overall") 18 | theCompareFile=file.path(compareDir, "MP_Overall.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | theBatchType="TSS" 23 | 24 | 25 | if (!is.null(inputDir)) 26 | { 27 | warnLevel<-getOption("warn") 28 | on.exit(options(warn=warnLevel)) 29 | # warnings are errors 30 | options(warn=3) 31 | # if there is a warning, show the calls leading up to it 32 | options(showWarnCalls=TRUE) 33 | # if there is an error, show the calls leading up to it 34 | options(showErrorCalls=TRUE) 35 | # 36 | unlink(theOutputDir, recursive=TRUE) 37 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 38 | # load data 39 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 40 | myData@mData <- mbatchTrimData(myData@mData, 100000) 41 | # here, we take all the defaults to hierarchical clustering, passing a title and an output path 42 | MP_Overall(theBeaData=myData, 43 | thePath=theOutputDir, 44 | theWriteToFile=TRUE) 45 | correctedMatrix <- readAsGenericMatrix(file.path(theOutputDir, "ANY_Corrections-MPOverall.tsv")) 46 | compareMatrix <- readAsGenericMatrix(theCompareFile) 47 | compared <- compareTwoMatrices(correctedMatrix, compareMatrix) 48 | print(compared) 49 | compared 50 | } else { 51 | message("No test data. Skip test.") 52 | TRUE 53 | } 54 | -------------------------------------------------------------------------------- /package/tests/PCA_DualBatch_Structures.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile=file.path(inputDir, "matrix_data-Tumor.tsv") 16 | theBatchFile=file.path(inputDir, "batches-Tumor.tsv") 17 | theOutputDir=file.path(outputDir, "PCA_DualBatch_Structures") 18 | theCompareFile=file.path(compareDir, "PCA_DualBatch_Structures.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | theBatchType="TSS" 23 | 24 | 25 | isTrendBatch<-function(theBatchTypeName, theListOfBatchIds) 26 | { 27 | return(is.element(theBatchTypeName, c("ShipDate"))) 28 | } 29 | 30 | if (!is.null(inputDir)) 31 | { 32 | warnLevel<-getOption("warn") 33 | on.exit(options(warn=warnLevel)) 34 | # warnings are errors 35 | options(warn=3) 36 | # if there is a warning, show the calls leading up to it 37 | options(showWarnCalls=TRUE) 38 | # if there is an error, show the calls leading up to it 39 | options(showErrorCalls=TRUE) 40 | # 41 | unlink(theOutputDir, recursive=TRUE, force=TRUE) 42 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 43 | # load data 44 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 45 | myData@mData <- mbatchTrimData(myData@mData, 100000) 46 | # here, we take most defaults 47 | PCA_DualBatch_Structures(theData=myData, 48 | theTitle="Test PCA", 49 | theOutputPath=theOutputDir, 50 | theBatchTypeAndValuePairsToRemove=NULL, 51 | theBatchTypeAndValuePairsToKeep=NULL, 52 | theListForDoCentroidDualBatchType=c("BatchId", "PlateId", "TSS", "ShipDate"), 53 | theIsPcaTrendFunction=isTrendBatch, 54 | theDSCPermutations=1000, 55 | theDSCThreads=1, 56 | theMinBatchSize=2, 57 | theJavaParameters="-Xms2000m", 58 | theSeed=theRandomSeed, 59 | theMaxGeneCount=10000) 60 | correctedMatrix <- readAsGenericMatrix(file.path(theOutputDir, "BatchIdwithPlateId", "DualBatch", "PCAValues.tsv")) 61 | compareMatrix <- readAsGenericMatrix(theCompareFile) 62 | compared <- compareTwoMatrices(correctedMatrix, compareMatrix) 63 | print(compared) 64 | compared 65 | } else { 66 | message("No test data. Skip test.") 67 | TRUE 68 | } 69 | -------------------------------------------------------------------------------- /package/tests/PCA_Regular_Structures.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile=file.path(inputDir, "matrix_data-Tumor.tsv") 16 | theBatchFile=file.path(inputDir, "batches-Tumor.tsv") 17 | theOutputDir=file.path(outputDir, "PCA_Regular_Structures") 18 | theCompareFile=file.path(compareDir, "PCA_Regular_Structures.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | theBatchType="TSS" 23 | 24 | 25 | isTrendBatch<-function(theBatchTypeName, theListOfBatchIds) 26 | { 27 | return(is.element(theBatchTypeName, c("ShipDate"))) 28 | } 29 | 30 | if (!is.null(inputDir)) 31 | { 32 | warnLevel<-getOption("warn") 33 | on.exit(options(warn=warnLevel)) 34 | # warnings are errors 35 | options(warn=3) 36 | # if there is a warning, show the calls leading up to it 37 | options(showWarnCalls=TRUE) 38 | # if there is an error, show the calls leading up to it 39 | options(showErrorCalls=TRUE) 40 | # 41 | unlink(theOutputDir, recursive=TRUE, force=TRUE) 42 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 43 | # load data 44 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 45 | myData@mData <- mbatchTrimData(myData@mData, 100000) 46 | # here, we take most defaults 47 | PCA_Regular_Structures(theData=myData, 48 | theTitle="Test PCA", 49 | theOutputPath=theOutputDir, 50 | theBatchTypeAndValuePairsToRemove=NULL, 51 | theBatchTypeAndValuePairsToKeep=NULL, 52 | theDoDscPermsFileFlag = TRUE, 53 | theIsPcaTrendFunction=isTrendBatch, 54 | theDSCPermutations=1000, 55 | theDSCThreads=1, 56 | theMinBatchSize=2, 57 | theJavaParameters="-Xms2000m", 58 | theSeed=theRandomSeed, 59 | theMaxGeneCount=10000) 60 | correctedMatrix <- readAsGenericMatrix(file.path(theOutputDir, "BatchId", "ManyToMany", "PCAValues.tsv")) 61 | compareMatrix <- readAsGenericMatrix(theCompareFile) 62 | compared <- compareTwoMatrices(correctedMatrix, compareMatrix) 63 | print(compared) 64 | compared 65 | } else { 66 | message("No test data. Skip test.") 67 | TRUE 68 | } 69 | -------------------------------------------------------------------------------- /package/tests/RBN_Replicates.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | invariantFile=file.path(inputDir, "rbn-test6-iset.tsv") 16 | variantFile=file.path(inputDir, "rbn-test6-vset.tsv") 17 | theOutputDir=file.path(outputDir, "RBN_Replicates") 18 | theCompareFile=file.path(compareDir, "rbn-test6-output.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | 23 | resolveDuplicates <- function(theNames) 24 | { 25 | # keep first instance of a name 26 | # number subsequent ones starting with .1 27 | make.unique(theNames) 28 | } 29 | 30 | readRPPAdataAsMatrix_WithTab <- function(theFile) 31 | { 32 | # read RPPA data as a dataframe 33 | # column rppaDF[,1] contains row names that may contain duplicates 34 | rppaDF <- readAsGenericDataframe(theFile) 35 | # resolve duplicates in row names here 36 | myRownames <- rppaDF[,1] 37 | myRownames <- resolveDuplicates(myRownames) 38 | # convert to matrix 39 | myMatrix <- data.matrix(rppaDF[,-1]) 40 | rownames(myMatrix) <- myRownames 41 | t(myMatrix) 42 | } 43 | 44 | readRPPAdataAsMatrix_NoInitialTab <- function(theFile) 45 | { 46 | # read RPPA data as a dataframe 47 | # column rppaDF[,1] contains row names that may contain duplicates 48 | rppaDF <- read.table(theFile, header=TRUE, sep="\t", as.is=TRUE, 49 | check.names=FALSE, stringsAsFactors=FALSE, 50 | colClasses="character", na.strings="NA", 51 | row.names=NULL) 52 | # resolve duplicates in row names here 53 | myRownames <- rppaDF[,1] 54 | myRownames <- resolveDuplicates(myRownames) 55 | # convert to matrix 56 | myMatrix <- data.matrix(rppaDF[,-1]) 57 | rownames(myMatrix) <- myRownames 58 | t(myMatrix) 59 | } 60 | 61 | if (!is.null(inputDir)) 62 | { 63 | warnLevel<-getOption("warn") 64 | on.exit(options(warn=warnLevel)) 65 | # warnings are errors 66 | options(warn=3) 67 | # if there is a warning, show the calls leading up to it 68 | options(showWarnCalls=TRUE) 69 | # if there is an error, show the calls leading up to it 70 | options(showErrorCalls=TRUE) 71 | # 72 | unlink(theOutputDir, recursive=TRUE) 73 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 74 | 75 | message("Reading invariant file") 76 | invMatrix = readRPPAdataAsMatrix_WithTab(invariantFile) 77 | message("Reading variant file") 78 | varMatrix = readRPPAdataAsMatrix_WithTab(variantFile) 79 | filename <- RBN_Replicates(theInvariantMatrix=invMatrix, 80 | theVariantMatrix=varMatrix, 81 | theInvariantGroupId="Grp1", 82 | theVariantGroupId="Grp2", 83 | theMatchedReplicatesFlag=TRUE, 84 | theCombineOnlyFlag=FALSE, 85 | thePath=theOutputDir, 86 | theWriteToFile=TRUE) 87 | correctedMatrix <- readAsGenericMatrix(file.path(theOutputDir, "ANY_Corrections-RBN_Replicates.tsv")) 88 | compareMatrix <- readAsGenericMatrix(theCompareFile) 89 | compared <- compareTwoMatrices(correctedMatrix, compareMatrix) 90 | print(compared) 91 | compared 92 | } else { 93 | message("No test data. Skip test.") 94 | TRUE 95 | } 96 | -------------------------------------------------------------------------------- /package/tests/SupervisedClustering_Batches_Structures.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile=file.path(inputDir, "matrix_data-Tumor.tsv") 16 | theBatchFile=file.path(inputDir, "batches-Tumor.tsv") 17 | theOutputDir=file.path(outputDir, "SupervisedClustering_Batches_Structures") 18 | #theCompareFile=file.path(compareDir, "SupervisedClustering_Batches_Structures.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | theBatchType="TSS" 23 | 24 | 25 | if (!is.null(inputDir)) 26 | { 27 | warnLevel<-getOption("warn") 28 | on.exit(options(warn=warnLevel)) 29 | # warnings are errors 30 | options(warn=3) 31 | # if there is a warning, show the calls leading up to it 32 | options(showWarnCalls=TRUE) 33 | # if there is an error, show the calls leading up to it 34 | options(showErrorCalls=TRUE) 35 | # 36 | unlink(theOutputDir, recursive=TRUE) 37 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 38 | # load data 39 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 40 | myData@mData <- mbatchTrimData(myData@mData, 100000) 41 | # here, we take most defaults 42 | SupervisedClustering_Batches_Structures(theData=myData, 43 | theTitle="Test Supervised Clustering", 44 | theOutputPath=theOutputDir, 45 | theDoHeatmapFlag=TRUE) 46 | message("No comparable output--no error means 'OK' for now.") 47 | TRUE 48 | } else { 49 | message("No test data. Skip test.") 50 | TRUE 51 | } 52 | -------------------------------------------------------------------------------- /package/tests/SupervisedClustering_Pairs_Structures.R: -------------------------------------------------------------------------------- 1 | #MBatch Copyright ? 2011, 2012, 2013, 2014, 2015, 2016, 2017 University of Texas MD Anderson Cancer Center 2 | # 3 | #This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. 4 | # 5 | #This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 6 | # 7 | #You should have received a copy of the GNU General Public License along with this program. If not, see . 8 | 9 | library(MBatch) 10 | 11 | inputDir <- getTestInputDir() 12 | outputDir <- getTestOutputDir() 13 | compareDir <- getTestCompareDir() 14 | 15 | theGeneFile=file.path(inputDir, "matrix_data-Tumor.tsv") 16 | theBatchFile=file.path(inputDir, "batches-Tumor.tsv") 17 | theOutputDir=file.path(outputDir, "SupervisedClustering_Pairs_Structures") 18 | #theCompareFile=file.path(compareDir, "SupervisedClustering_Pairs_Structures.tsv") 19 | theRandomSeed=314 20 | #myRandomSeed <- 314 21 | #myTestSeed <- 42 22 | theBatchType="TSS" 23 | 24 | 25 | if (!is.null(inputDir)) 26 | { 27 | warnLevel<-getOption("warn") 28 | on.exit(options(warn=warnLevel)) 29 | # warnings are errors 30 | options(warn=3) 31 | # if there is a warning, show the calls leading up to it 32 | options(showWarnCalls=TRUE) 33 | # if there is an error, show the calls leading up to it 34 | options(showErrorCalls=TRUE) 35 | # 36 | unlink(theOutputDir, recursive=TRUE) 37 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 38 | # load data 39 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 40 | myData@mData <- mbatchTrimData(myData@mData, 100000) 41 | # here, we take most defaults 42 | SupervisedClustering_Pairs_Structures(theData=myData, 43 | theTitle="Test Supervised Clustering", 44 | theOutputPath=theOutputDir, 45 | theDoHeatmapFlag=TRUE, 46 | theListOfBatchPairs=c("BatchId", "PlateId", "TSS", "ShipDate"), 47 | theBatchTypeAndValuePairsToRemove=NULL, 48 | theBatchTypeAndValuePairsToKeep=NULL) 49 | message("No comparable output--no error means 'OK' for now.") 50 | TRUE 51 | } else { 52 | message("No test data. Skip test.") 53 | TRUE 54 | } 55 | -------------------------------------------------------------------------------- /package/vignettes/BoxPlot_AllSample-Data_Diagram-TSS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/vignettes/BoxPlot_AllSample-Data_Diagram-TSS.png -------------------------------------------------------------------------------- /package/vignettes/BoxPlot_AllSample-RLE_Diagram-BatchId.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/vignettes/BoxPlot_AllSample-RLE_Diagram-BatchId.png -------------------------------------------------------------------------------- /package/vignettes/MBatch_05-07_MP_Overall.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using MBatch Corrections: MP_Overall" 3 | author: "Tod Casasent" 4 | date: "`r Sys.Date()`" 5 | #output: rmarkdown::html_vignette 6 | # install with vignettes using devtools::install(build_vignettes = TRUE) 7 | # build vignettes using devtools::build_vignettes() (inst/doc) 8 | output: 9 | pdf_document: 10 | number_sections: TRUE 11 | vignette: > 12 | %\VignetteIndexEntry{Using MBatch Corrections: MP_Overall} 13 | %\VignetteEngine{knitr::rmarkdown} 14 | %\VignetteEncoding{UTF-8} 15 | --- 16 | 17 | # Introduction 18 | 19 | These instructions are aimed at people familiar with R and familiar with TCGA/GDC platforms and data types. They are intended to introduce the reader to producing the given assessment. These instructions will only rarely, if ever, touch on the appropriateness of the assessment algorithm or interpretation of output. See MBatch_01_InstallLinux for instructions on downloading test data. 20 | 21 | # Algorithm 22 | 23 | MP Overall performs a Median Polish Overall correction taking a BEA_DATA object (with data matrix and batch dataframe) and returning either a corrected matrix or a string containing the path to where the data file was written. 24 | 25 | # Output 26 | 27 | The primary output method for MBatch is to view results in the Batch Effects Website. Correction algorithms generally do not create graphical output and instead create TSV output files. 28 | 29 | # Usage 30 | 31 | MP_Overall(theBeaData, thePath = NULL, theWriteToFile = FALSE) 32 | 33 | # Arguments 34 | 35 | ## theBeaData 36 | 37 | BEA_DATA objects can be created by calls of the form new("BEA_DATA", theData, theBatches, theCovariates). If you have no covariate data, use an empty data.frame created with data.frame() 38 | 39 | mData: 40 | Object of class "matrix" A matrix where the colnames are sample ids and the rownames are gene equivalents. All names should be strings, not factors. 41 | 42 | mBatches: 43 | Object of class "data.frame" A data.frame where the column "names" are batch types. The first batch "type" is "Sample". All names and values should be strings, not factors or numeric. 44 | 45 | mCovariates: 46 | Object of class "data.frame" A data.frame where the column "names" are covariate types. The first covariate "type" is "Sample". All names and values should be strings, not factors or numeric. 47 | 48 | ## thePath 49 | Output path for any files. 50 | 51 | ## theWriteToFile 52 | TRUE to write the corrected data to file and return the file pathname instead of the corrected matrix. 53 | 54 | # Example Call 55 | 56 | The following code is adapted from the tests/MP_Overall.R file. Data used is from the testing data as per the MBatch_01_InstallLinux document. In the future, we plan to make the output from MBatch more user friendly, but currently, this produces the following output at the command line. 57 | 58 | ```{r, echo=TRUE} 59 | { 60 | library(MBatch) 61 | 62 | # set the paths 63 | invariantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-iset.tsv" 64 | variantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-vset.tsv" 65 | theOutputDir="/bea_testing/output/RBN_Pseudoreplicates" 66 | theRandomSeed=314 67 | 68 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 69 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 70 | theOutputDir="/bea_testing/output/MP_Overall" 71 | theRandomSeed=314 72 | theBatchType="TSS" 73 | 74 | # make sure the output dir exists and is empty 75 | unlink(theOutputDir, recursive=TRUE) 76 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 77 | # load data 78 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 79 | myData@mData <- mbatchTrimData(myData@mData, 100000) 80 | # call 81 | outputFile <- MP_Overall(theBeaData=myData, 82 | thePath=theOutputDir, 83 | theWriteToFile=TRUE) 84 | correctedMatrix <- readAsGenericMatrix(outputFile) 85 | print(correctedMatrix[1:4, 1:4]) 86 | } 87 | ``` 88 | 89 | # Example File Output 90 | 91 | The above code creates the following output file. File is named using the following naming convention: 92 | ANY_Corrections-MPOverall.tsv 93 | The TSV file with the corrected dataset is written by the MBatch package. The end of the output shows a snippet from the corrected matrix. 94 | -------------------------------------------------------------------------------- /package/vignettes/MBatch_05-08_MP_ByBatch.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using MBatch Corrections: MP_ByBatch" 3 | author: "Tod Casasent" 4 | date: "`r Sys.Date()`" 5 | #output: rmarkdown::html_vignette 6 | # install with vignettes using devtools::install(build_vignettes = TRUE) 7 | # build vignettes using devtools::build_vignettes() (inst/doc) 8 | output: 9 | pdf_document: 10 | number_sections: TRUE 11 | vignette: > 12 | %\VignetteIndexEntry{Using MBatch Corrections: MP_ByBatch} 13 | %\VignetteEngine{knitr::rmarkdown} 14 | %\VignetteEncoding{UTF-8} 15 | --- 16 | 17 | # Introduction 18 | 19 | These instructions are aimed at people familiar with R and familiar with TCGA/GDC platforms and data types. They are intended to introduce the reader to producing the given assessment. These instructions will only rarely, if ever, touch on the appropriateness of the assessment algorithm or interpretation of output. See MBatch_01_InstallLinux for instructions on downloading test data. 20 | 21 | # Algorithm 22 | 23 | MP Overall performs a Median Polish Overall correction taking a BEA_DATA object (with data matrix and batch dataframe) and returning either a corrected matrix or a string containing the path to where the data file was written. 24 | 25 | # Output 26 | 27 | The primary output method for MBatch is to view results in the Batch Effects Website. Correction algorithms generally do not create graphical output and instead create TSV output files. 28 | 29 | # Usage 30 | 31 | MP_ByBatch(theBeaData, theBatchType, thePath = NULL, theWriteToFile = FALSE) 32 | 33 | # Arguments 34 | 35 | ## theBeaData 36 | 37 | BEA_DATA objects can be created by calls of the form new("BEA_DATA", theData, theBatches, theCovariates). If you have no covariate data, use an empty data.frame created with data.frame() 38 | 39 | mData: 40 | Object of class "matrix" A matrix where the colnames are sample ids and the rownames are gene equivalents. All names should be strings, not factors. 41 | 42 | mBatches: 43 | Object of class "data.frame" A data.frame where the column "names" are batch types. The first batch "type" is "Sample". All names and values should be strings, not factors or numeric. 44 | 45 | mCovariates: 46 | Object of class "data.frame" A data.frame where the column "names" are covariate types. The first covariate "type" is "Sample". All names and values should be strings, not factors or numeric. 47 | 48 | ## theBatchType 49 | A string identifying the batch type to correct. 50 | 51 | ## thePath 52 | Output path for any files. 53 | 54 | ## theWriteToFile 55 | TRUE to write the corrected data to file and return the file pathname instead of the corrected matrix. 56 | 57 | # Example Call 58 | 59 | The following code is adapted from the tests/MP_ByBatch.R file. Data used is from the testing data as per the MBatch_01_InstallLinux document. In the future, we plan to make the output from MBatch more user friendly, but currently, this produces the following output at the command line. 60 | 61 | ```{r, echo=TRUE} 62 | { 63 | library(MBatch) 64 | 65 | # set the paths 66 | invariantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-iset.tsv" 67 | variantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-vset.tsv" 68 | theOutputDir="/bea_testing/output/RBN_Pseudoreplicates" 69 | theRandomSeed=314 70 | 71 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 72 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 73 | theOutputDir="/bea_testing/output/MP_ByBatch" 74 | theRandomSeed=314 75 | theBatchType="TSS" 76 | 77 | # make sure the output dir exists and is empty 78 | unlink(theOutputDir, recursive=TRUE) 79 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 80 | # load data 81 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 82 | myData@mData <- mbatchTrimData(myData@mData, 100000) 83 | # call 84 | outputFile <- MP_ByBatch(theBeaData=myData, 85 | theBatchType=theBatchType, 86 | thePath=theOutputDir, 87 | theWriteToFile=TRUE) 88 | correctedMatrix <- readAsGenericMatrix(outputFile) 89 | print(correctedMatrix[1:4, 1:4]) 90 | } 91 | ``` 92 | 93 | # Example File Output 94 | 95 | The above code creates the following output file. File is named using the following naming convention: 96 | ANY_Corrections-MPByBatch.tsv 97 | The TSV file with the corrected dataset is written by the MBatch package. The end of the output shows a snippet from the corrected matrix. 98 | -------------------------------------------------------------------------------- /package/vignettes/MBatch_05-09_AN_Adjusted.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using MBatch Corrections: AN_Adjusted" 3 | author: "Tod Casasent" 4 | date: "`r Sys.Date()`" 5 | #output: rmarkdown::html_vignette 6 | # install with vignettes using devtools::install(build_vignettes = TRUE) 7 | # build vignettes using devtools::build_vignettes() (inst/doc) 8 | output: 9 | pdf_document: 10 | number_sections: TRUE 11 | vignette: > 12 | %\VignetteIndexEntry{Using MBatch Corrections: AN_Adjusted} 13 | %\VignetteEngine{knitr::rmarkdown} 14 | %\VignetteEncoding{UTF-8} 15 | --- 16 | 17 | # Introduction 18 | 19 | These instructions are aimed at people familiar with R and familiar with TCGA/GDC platforms and data types. They are intended to introduce the reader to producing the given assessment. These instructions will only rarely, if ever, touch on the appropriateness of the assessment algorithm or interpretation of output. See MBatch_01_InstallLinux for instructions on downloading test data. 20 | 21 | # Algorithm 22 | 23 | AN Adjusted performs an ANOVA Adjusted correction taking a BEA_DATA object (with data matrix and batch dataframe) and returning either a corrected matrix or a string containing the path to where the data file was written. 24 | 25 | # Output 26 | 27 | The primary output method for MBatch is to view results in the Batch Effects Website. Correction algorithms generally do not create graphical output and instead create TSV output files. 28 | 29 | # Usage 30 | 31 | AN_Adjusted(theBeaData, theBatchType, thePath = NULL, theWriteToFile = FALSE) 32 | 33 | # Arguments 34 | 35 | ## theBeaData 36 | 37 | BEA_DATA objects can be created by calls of the form new("BEA_DATA", theData, theBatches, theCovariates). If you have no covariate data, use an empty data.frame created with data.frame() 38 | 39 | mData: 40 | Object of class "matrix" A matrix where the colnames are sample ids and the rownames are gene equivalents. All names should be strings, not factors. 41 | 42 | mBatches: 43 | Object of class "data.frame" A data.frame where the column "names" are batch types. The first batch "type" is "Sample". All names and values should be strings, not factors or numeric. 44 | 45 | mCovariates: 46 | Object of class "data.frame" A data.frame where the column "names" are covariate types. The first covariate "type" is "Sample". All names and values should be strings, not factors or numeric. 47 | 48 | ## theBatchType 49 | A string identifying the batch type to correct. 50 | 51 | ## thePath 52 | Output path for any files. 53 | 54 | ## theWriteToFile 55 | TRUE to write the corrected data to file and return the file pathname instead of the corrected matrix. 56 | 57 | # Example Call 58 | 59 | The following code is adapted from the tests/AN_Adjusted.R file. Data used is from the testing data as per the MBatch_01_InstallLinux document. In the future, we plan to make the output from MBatch more user friendly, but currently, this produces the following output at the command line. 60 | 61 | ```{r, echo=TRUE} 62 | { 63 | library(MBatch) 64 | 65 | # set the paths 66 | invariantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-iset.tsv" 67 | variantFile="/bea_testing/MATRIX_DATA/rbn-pseudo-vset.tsv" 68 | theOutputDir="/bea_testing/output/RBN_Pseudoreplicates" 69 | theRandomSeed=314 70 | 71 | theGeneFile="/bea_testing/MATRIX_DATA/matrix_data-Tumor.tsv" 72 | theBatchFile="/bea_testing/MATRIX_DATA/batches-Tumor.tsv" 73 | theOutputDir="/bea_testing/output/AN_Adjusted" 74 | theRandomSeed=314 75 | theBatchType="TSS" 76 | 77 | # make sure the output dir exists and is empty 78 | unlink(theOutputDir, recursive=TRUE) 79 | dir.create(theOutputDir, showWarnings=FALSE, recursive=TRUE) 80 | # load data 81 | myData <- mbatchLoadFiles(theGeneFile, theBatchFile) 82 | myData@mData <- mbatchTrimData(myData@mData, 100000) 83 | # call 84 | outputFile <- AN_Adjusted(theBeaData=myData, 85 | theBatchType=theBatchType, 86 | thePath=theOutputDir, 87 | theWriteToFile=TRUE) 88 | correctedMatrix <- readAsGenericMatrix(outputFile) 89 | print(correctedMatrix[1:4, 1:4]) 90 | } 91 | ``` 92 | 93 | # Example File Output 94 | 95 | The above code creates the following output file. File is named using the following naming convention: 96 | ANY_Corrections-ANAdjusted.tsv 97 | The TSV file with the corrected dataset is written by the MBatch package. The end of the output shows a snippet from the corrected matrix. 98 | -------------------------------------------------------------------------------- /package/vignettes/boxplot_dynamic.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/vignettes/boxplot_dynamic.PNG -------------------------------------------------------------------------------- /package/vignettes/boxplot_dynamic_data.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/vignettes/boxplot_dynamic_data.PNG -------------------------------------------------------------------------------- /package/vignettes/pca_plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/vignettes/pca_plus.png -------------------------------------------------------------------------------- /package/vignettes/supervised_clustering.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/package/vignettes/supervised_clustering.png -------------------------------------------------------------------------------- /pdf/MBatch_01_InstallLinux.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_01_InstallLinux.pdf -------------------------------------------------------------------------------- /pdf/MBatch_01_InstallOSX.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_01_InstallOSX.pdf -------------------------------------------------------------------------------- /pdf/MBatch_01_InstallWindows.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_01_InstallWindows.pdf -------------------------------------------------------------------------------- /pdf/MBatch_02_RunningTests.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_02_RunningTests.pdf -------------------------------------------------------------------------------- /pdf/MBatch_03_StandardizedData.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_03_StandardizedData.pdf -------------------------------------------------------------------------------- /pdf/MBatch_03_UserData.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_03_UserData.pdf -------------------------------------------------------------------------------- /pdf/MBatch_04-00_ParametersBatchTypesValues.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_04-00_ParametersBatchTypesValues.pdf -------------------------------------------------------------------------------- /pdf/MBatch_04-01_SupervisedClusteringBatchesStructures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_04-01_SupervisedClusteringBatchesStructures.pdf -------------------------------------------------------------------------------- /pdf/MBatch_04-02_PCA_Regular_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_04-02_PCA_Regular_Structures.pdf -------------------------------------------------------------------------------- /pdf/MBatch_04-03_HierarchicalClustering_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_04-03_HierarchicalClustering_Structures.pdf -------------------------------------------------------------------------------- /pdf/MBatch_04-04_Boxplot_Group_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_04-04_Boxplot_Group_Structures.pdf -------------------------------------------------------------------------------- /pdf/MBatch_04-05_CDP_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_04-05_CDP_Structures.pdf -------------------------------------------------------------------------------- /pdf/MBatch_04-06_PCA_DualBatch_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_04-06_PCA_DualBatch_Structures.pdf -------------------------------------------------------------------------------- /pdf/MBatch_04-07_SupervisedClustering_Pairs_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_04-07_SupervisedClustering_Pairs_Structures.pdf -------------------------------------------------------------------------------- /pdf/MBatch_04-08_Boxplot_AllSamplesRLE_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_04-08_Boxplot_AllSamplesRLE_Structures.pdf -------------------------------------------------------------------------------- /pdf/MBatch_04-09_Boxplot_AllSamplesData_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_04-09_Boxplot_AllSamplesData_Structures.pdf -------------------------------------------------------------------------------- /pdf/MBatch_05-01_EBNPlus_CombineBatches.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_05-01_EBNPlus_CombineBatches.pdf -------------------------------------------------------------------------------- /pdf/MBatch_05-02_EBNPlus_Correction_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_05-02_EBNPlus_Correction_Structures.pdf -------------------------------------------------------------------------------- /pdf/MBatch_05-03_RBN_Replicates.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_05-03_RBN_Replicates.pdf -------------------------------------------------------------------------------- /pdf/MBatch_05-04_RBN_Pseudoreplicates.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_05-04_RBN_Pseudoreplicates.pdf -------------------------------------------------------------------------------- /pdf/MBatch_05-05_EB_withNonParametricPriors.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_05-05_EB_withNonParametricPriors.pdf -------------------------------------------------------------------------------- /pdf/MBatch_05-06_EB_withParametricPriors.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_05-06_EB_withParametricPriors.pdf -------------------------------------------------------------------------------- /pdf/MBatch_05-07_MP_Overall.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_05-07_MP_Overall.pdf -------------------------------------------------------------------------------- /pdf/MBatch_05-08_MP_ByBatch.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_05-08_MP_ByBatch.pdf -------------------------------------------------------------------------------- /pdf/MBatch_05-09_AN_Adjusted.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_05-09_AN_Adjusted.pdf -------------------------------------------------------------------------------- /pdf/MBatch_05-10_AN_Unadjusted.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_05-10_AN_Unadjusted.pdf -------------------------------------------------------------------------------- /pdf/MBatch_05-11_EBNPlus_CheckData_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_05-11_EBNPlus_CheckData_Structures.pdf -------------------------------------------------------------------------------- /pdf/MBatch_05-12_EBNPlus_TrainAndValidateReplicates_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_05-12_EBNPlus_TrainAndValidateReplicates_Structures.pdf -------------------------------------------------------------------------------- /pdf/MBatch_05-13_EBNPlus_TrainAndValidateFromVector_Structures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MD-Anderson-Bioinformatics/MBatch/18135408c91084eec02115dc791e12e771e8eec2/pdf/MBatch_05-13_EBNPlus_TrainAndValidateFromVector_Structures.pdf --------------------------------------------------------------------------------