├── DESCRIPTION
├── NAMESPACE
├── R
    ├── 00compMS2-class.R
    ├── HMDB.R
    ├── LMSD.R
    ├── MS1MatchSpectra.R
    ├── MS2fileInfo.R
    ├── PMIDsearch.R
    ├── ReSpect.R
    ├── Substructure_masses.R
    ├── T3DB.R
    ├── addNoMS2.R
    ├── adduct2mass.R
    ├── annoCompoundClass.R
    ├── cfmFragGraphGen.R
    ├── cleanAbs.R
    ├── combineMS2.Ions.R
    ├── combineMS2.R
    ├── combineMS2.Spectra.R
    ├── combineMS2.removeContam.R
    ├── compMS2Construct.R
    ├── compMS2Create.R
    ├── compMS2Miner.R
    ├── compMS2explorer.R
    ├── couchDBcurate.R
    ├── couchDBpingReq.R
    ├── deconvNoise.DNF.R
    ├── deconvNoise.R
    ├── deconvNoise.maxInt.R
    ├── dotProdMatrix.R
    ├── drugBank.R
    ├── dynamicNoiseFilter.R
    ├── exactMassEle.R
    ├── example_mzXML_MS1features.R
    ├── falsePosIdentify.R
    ├── getAbs.R
    ├── getLoginDetails.R
    ├── getTitles.R
    ├── lipidAbbrev.R
    ├── metFragAdducts.R
    ├── metFragCl.R
    ├── metID.CFM.R
    ├── metID.R
    ├── metID.buildConsensus.R
    ├── metID.chemSim.R
    ├── metID.compMS2ToMsp.R
    ├── metID.corrNetwork.R
    ├── metID.dbAnnotate.R
    ├── metID.dbProb.R
    ├── metID.matchSpectralDB.R
    ├── metID.metFrag.R
    ├── metID.optimConsensus.R
    ├── metID.predSMILES.R
    ├── metID.rtPred.R
    ├── metID.specSimNetwork.R
    ├── mfSearchPUG.R
    ├── monoMassMatch.R
    ├── optimCutOff.R
    ├── pubMedSearch.R
    ├── publishApp.R
    ├── runGitHubApp.R
    ├── signalGrouping.R
    ├── smiles2Form.R
    ├── smiles2MonoMassForm.R
    ├── subFormulae.R
    ├── subStructure.Annotate.R
    ├── subStructure.R
    ├── subStructure.prob.R
    ├── subStructure.probSummary.R
    ├── subsetCompMS2.R
    └── trueFalseSum.R
├── README.md
├── data
    ├── HMDB.RData
    ├── LMSD.RData
    ├── ReSpect.RData
    ├── Substructure_masses.RData
    ├── T3DB.RData
    ├── compMS2Example.RData
    ├── drugBank.RData
    ├── exactMassEle.RData
    ├── lipidAbbrev.RData
    ├── metFragAdducts.RData
    ├── negESIAdducts.RData
    └── posESIAdducts.RData
├── inst
    ├── doc
    │   └── compMS2Miner_Workflow.pdf
    ├── extdata
    │   ├── DDA_ACN_80.mzXML
    │   ├── DDA_MeOH_80.mzXML
    │   ├── MS1features_example.csv
    │   ├── MetFrag2.3-CL.jar
    │   ├── fraggraph-gen.exe
    │   ├── license.txt
    │   └── lpsolve55.dll
    ├── rmarkdown
    │   └── templates
    │   │   └── compMS2Template
    │   │       ├── skeleton
    │   │           ├── compMS2MinerLogo.png
    │   │           └── skeleton.Rmd
    │   │       └── template.yaml
    └── shiny-apps
    │   └── compMS2Explorer
    │       ├── global.R
    │       ├── server.R
    │       ├── ui.R
    │       └── www
    │           ├── DNFanimation.gif
    │           ├── compMS2MinerLogo.png
    │           ├── compMS2MinerLogoExApp.png
    │           ├── compMS2MinerLogoTutorial.png
    │           ├── dynamicNoiseFilterVideo.mp4
    │           ├── optimConsensusAnimation.gif
    │           ├── optimConsensusAnimation.mp4
    │           └── screenshotCompMS2Explorer_260_120.png
├── man
    ├── HMDB.Rd
    ├── LMSD.Rd
    ├── MS1MatchSpectra.Rd
    ├── MS2fileInfo.Rd
    ├── PMIDsearch.Rd
    ├── ReSpect.Rd
    ├── Substructure_masses.Rd
    ├── T3DB.Rd
    ├── addNoMS2.Rd
    ├── adduct2mass.Rd
    ├── annoCompoundClass.Rd
    ├── cfmFragGraphGen.Rd
    ├── cleanAbs.Rd
    ├── combineMS2.Ions.Rd
    ├── combineMS2.Rd
    ├── combineMS2.Spectra.Rd
    ├── combineMS2.removeContam.Rd
    ├── compMS2-class.Rd
    ├── compMS2Construct.Rd
    ├── compMS2Create.Rd
    ├── compMS2Explorer.Rd
    ├── compMS2Miner.Rd
    ├── couchDBcurate.Rd
    ├── couchDBpingReq.Rd
    ├── deconvNoise.DNF.Rd
    ├── deconvNoise.Rd
    ├── dotProdMatrix.Rd
    ├── drugBank.Rd
    ├── dynamicNoiseFilter.Rd
    ├── exactMassEle.Rd
    ├── example_mzXML_MS1features.Rd
    ├── falsePosIdentify.Rd
    ├── form2Mat.Rd
    ├── formulaFilter.Rd
    ├── getAbs.Rd
    ├── getLoginDetails.Rd
    ├── getTitles.Rd
    ├── lipidAbbrev.Rd
    ├── metFragAdducts.Rd
    ├── metFragCl.Rd
    ├── metID.CFM.Rd
    ├── metID.Rd
    ├── metID.buildConsensus.Rd
    ├── metID.chemSim.Rd
    ├── metID.compMS2toMsp.Rd
    ├── metID.corrNetwork.Rd
    ├── metID.dbAnnotate.Rd
    ├── metID.dbProb.Rd
    ├── metID.matchSpectralDB.Rd
    ├── metID.metFrag.Rd
    ├── metID.optimConsensus.Rd
    ├── metID.predSMILES.Rd
    ├── metID.rtPred.Rd
    ├── metID.specSimNetwork.Rd
    ├── mfSearchPUG.Rd
    ├── monoMassMatch.Rd
    ├── optimCutOff.Rd
    ├── pubMedSearch.Rd
    ├── publishApp.Rd
    ├── runGitHubApp.Rd
    ├── signalGrouping.Rd
    ├── smiles2Form.Rd
    ├── smiles2MonoMassForm.Rd
    ├── subFormulae.Rd
    ├── subStructure.Annotate.Rd
    ├── subStructure.Rd
    ├── subStructure.prob.Rd
    ├── subStructure.probSummary.Rd
    ├── subsetCompMS2.Rd
    └── trueFalseSum.Rd
└── vignettes
    ├── compMS2Miner_Workflow.R
    ├── compMS2Miner_Workflow.Rmd
    ├── compMS2Miner_Workflow.html
    └── compMS2Miner_Workflow.pdf


/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: compMS2Miner
 2 | Type: Package
 3 | Title: an automatable metabolite identification, visualization and data-sharing
 4 |   R package for high-resolution LC-MS datasets
 5 | Version: 2.3.0
 6 | Date: 2017-02-22
 7 | Authors@R: c(
 8 |     person("William MB", "Edmands", role = c("aut", "cre"), email="edmandsw@berkeley.edu"),
 9 |     person("Lauren", "Petrick", role = "aut", email = "lpetrick@berkeley.edu"),
10 |     person("Dinesh K", "Barupal", role = "aut", email = "dinkumar@ucdavis.edu"),
11 |     person("Augustin", "Scalbert", role = "aut", email = "scalberta@iarc.fr"),
12 |     person("Mark J", "Wilson", role = "aut", email = "mwilson9@tulane.edu"),
13 |     person("Jeffrey K", "Wickliffe", role = "aut", email = "jwicklif@tulane.edu"),
14 |     person("Stephen M", "Rappaport", role = "aut", email = "srappaport@berkeley.edu"))
15 | Author: William MB Edmands [aut, cre], Lauren Petrick [aut], Dinesh K Barupal
16 |     [aut], Augustin Scalbert [aut], Stephen M Rappaport [aut]
17 | Maintainer: William MB Edmands <edmandsw@berkeley.edu>
18 | Description: an automatable metabolite identification, visualization and 
19 |   data-sharing R package for high-resolution LC-MS datasets
20 | Depends:
21 | 	R (>= 3.2.0)
22 | Imports:
23 | 	mzR (>= 2.8.1),
24 | 	foreach (>= 1.4.3),
25 | 	Rcpp (>= 0.12.9),
26 | 	shiny (>= 1.0.1),
27 | 	couchDB (>= 1.4.1),
28 | 	fastcluster (>= 1.1.22),
29 | 	data.table (>= 1.10.4),
30 | 	doSNOW (>= 1.0.14),
31 | 	DT (>= 0.2),
32 | 	methods (>= 3.3.2),
33 | 	parallel (>= 3.3.2),
34 | 	RcppEigen (>= 0.3.2.9.1),
35 | 	reshape2 (>= 1.4.2),
36 | 	rjson (>= 0.2.15),
37 | 	igraph (>= 1.0.1),
38 | 	rhandsontable (>= 0.3.4),
39 | 	PubMedWordcloud (>= 0.3.3),
40 | 	RCurl (>= 1.95.4.8),
41 | 	XML (>= 3.98.1.5)
42 | Suggests:
43 | 	knitr (>= 1.15.1),
44 | 	rmarkdown (>= 1.3),
45 | 	ChemmineR (>= 2.26.1),
46 | 	ChemmineOB (>= 1.12.0),
47 | 	rsconnect (>= 0.7.0.2),
48 | 	devtools (>= 1.12.0),
49 | 	fingerprint (>= 3.5.4),
50 | 	rcdk (>= 3.3.8),
51 | 	tcltk2 (>= 1.2.11),
52 | 	randomForest (>= 4.6.12),
53 | 	caret (>= 6.0.73),
54 | 	DEoptim (>= 2.2.4),
55 | 	splashR (>= 0.0.4)
56 | BugReports: https://github.com/WMBEdmands/compMS2Miner/issues/new
57 | URL: doi.org/10.1021/acs.analchem.6b02394
58 | LazyData: true
59 | LazyLoad: true
60 | VignetteBuilder: knitr
61 | License: GPL (>= 3) 
62 | RoxygenNote: 6.0.1
63 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export("BestAnno<-")
 4 | export("CFM<-")
 5 | export("Comments<-")
 6 | export("DBanno<-")
 7 | export("MetFrag<-")
 8 | export("Parameters<-")
 9 | export("compSpectra<-")
10 | export("couchDBconn<-")
11 | export("filePaths<-")
12 | export("metaData<-")
13 | export("network<-")
14 | export("rtPred<-")
15 | export("spectralDB<-")
16 | export("subStrAnno<-")
17 | export(BestAnno)
18 | export(CFM)
19 | export(Comments)
20 | export(DBanno)
21 | export(MS2fileInfo)
22 | export(MetFrag)
23 | export(PMIDsearch)
24 | export(Parameters)
25 | export(adduct2mass)
26 | export(annoCompoundClass)
27 | export(cleanAbs)
28 | export(combineMS2)
29 | export(combineMS2.Ions)
30 | export(combineMS2.Spectra)
31 | export(combineMS2.removeContam)
32 | export(compMS2Construct)
33 | export(compMS2Explorer)
34 | export(compSpectra)
35 | export(couchDBconn)
36 | export(couchDBcurate)
37 | export(couchDBpingReq)
38 | export(deconvNoise)
39 | export(deconvNoise.DNF)
40 | export(dotProdMatrix)
41 | export(dynamicNoiseFilter)
42 | export(falsePosIdentify)
43 | export(filePaths)
44 | export(getAbs)
45 | export(getLoginDetails)
46 | export(getTitles)
47 | export(metID)
48 | export(metID.CFM)
49 | export(metID.buildConsensus)
50 | export(metID.chemSim)
51 | export(metID.compMS2toMsp)
52 | export(metID.corrNetwork)
53 | export(metID.dbAnnotate)
54 | export(metID.dbProb)
55 | export(metID.matchSpectralDB)
56 | export(metID.metFrag)
57 | export(metID.optimConsensus)
58 | export(metID.predSMILES)
59 | export(metID.rtPred)
60 | export(metID.specSimNetwork)
61 | export(metaData)
62 | export(mfSearchPUG)
63 | export(network)
64 | export(optimCutOff)
65 | export(pubMedSearch)
66 | export(publishApp)
67 | export(rtPred)
68 | export(runGitHubApp)
69 | export(signalGrouping)
70 | export(spectralDB)
71 | export(subStrAnno)
72 | export(subStructure)
73 | export(subStructure.Annotate)
74 | export(subStructure.prob)
75 | export(subStructure.probSummary)
76 | export(subsetCompMS2)
77 | export(trueFalseSum)
78 | exportClasses(compMS2)
79 | 


--------------------------------------------------------------------------------
/R/MS2fileInfo.R:
--------------------------------------------------------------------------------
 1 | #' MS2 file information
 2 | #' 
 3 | #' Extract precursor mass-to-charge ratio, retention time, scan type and total
 4 | #' ion current for each MS2 file scan.
 5 | #' 
 6 | #' @param MS2file MS2 file imported into R via the readMzXml package as a list 
 7 | #' @return data frame number of rows equal to the number of scans and 6 
 8 | #' observations:
 9 | #' 1. "MS.scanType" = MS scan type, 1 or 2.
10 | #' 2. "precursorMz" = numeric mass-to-charge ratio for each MS2 scan
11 | #' 3. "retentionTime" = numeric precusor retention time for each MS2 scan
12 | #' 4. "TIC" = Total ion current for this scan.
13 | #' 5. "TICaboveFilter" = boolean if TIC above minimum TIC filter equal 1, else 0
14 | #' 6. "precursorIntensity" = MS2 precursor intensity, if MS1 scan returns
15 | #' zero
16 | #' 7. "collisionEnergy" = collision energy (eV)
17 | #' 8. "basePeakMz" = mass-to-charge ratio of the base peak for the scan
18 | #' 9. "basePeakIntensity" = intensity of the base peak for the scan 
19 | #' 10. "precursorScanNum" = precursor scan number (MS2) or scan number (MS1) 
20 | #' @export
21 | MS2fileInfo <- function(MS2file=NULL, TICfilter=NULL){
22 |   # error handling 
23 |   if(is.null(TICfilter)){
24 |     
25 |     stop("A TICfilter value must be supplied (e.g. 10,000)")
26 |     
27 |   } else if(is.null(MS2file)){
28 |     
29 |     stop("an MS2file (.mzXML) object must be supplied")
30 |     # error handling to if mzXML file properly formed
31 |   } else {
32 |     
33 |     Info<-function(x){
34 |       tmp.info <- c((mzR::header(MS2file, x)$msLevel == 2)+1,
35 |                     round(mzR::header(MS2file, x)$precursorMZ, digits = 6),
36 |                     round(mzR::header(MS2file, x)$retentionTime, digits = 4),  
37 |                     mzR::header(MS2file, x)$totIonCurrent,
38 |                     (mzR::header(MS2file, x)$totIonCurrent >= TICfilter)*1,
39 |                     mzR::header(MS2file, x)$precursorIntensity,
40 |                     round(mzR::header(MS2file, x)$collisionEnergy, digits = 1),
41 |                     round(mzR::header(MS2file, x)$basePeakMZ, digits=4),
42 |                     round(mzR::header(MS2file, x)$basePeakIntensity, digits=1),
43 |                     mzR::header(MS2file, x)$acquisitionNum, 
44 |                     mzR::header(MS2file, x)$precursorScanNum)}    
45 |     
46 |     # create data frame using info function     
47 |     tmp.df<-as.data.frame(t(sapply(c(1:length(MS2file)),FUN=Info)),
48 |                           stringsAsFactors = FALSE) 
49 |     # add column names
50 |     colnames(tmp.df) <- c("MS.scanType", "precursorMz", "retentionTime", "TIC", 
51 |                           "TICaboveFilter", "precursorIntensity", 
52 |                           "collisionEnergy", "basePeakMz", "basePeakIntensity",
53 |                           "acquisitionNum","precursorScanNum")
54 |     
55 |     return(tmp.df)
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/R/PMIDsearch.R:
--------------------------------------------------------------------------------
 1 | #' customized PMID search function adapted from PubChemWordcloud package v 0.3.2
 2 | #' @param keys character vector of compound names to search pubmed with
 3 | #' @param n numeric maximum number of results to return
 4 | #' @export
 5 | PMIDsearch <- function(keys=NULL, n=1000){
 6 |   searchUrl <- paste0('http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term="',
 7 |                     gsub(" ", "+", keys), paste0('"&retmax=', n), 
 8 |                     '&tool="compMS2Miner&email="edmandsw@berkeley.edu"')
 9 |   hlpURL <- RCurl::getURL(searchUrl, .opts=RCurl::curlOptions(followlocation=TRUE))
10 |   doc <- XML::xmlTreeParse(hlpURL, asText = TRUE)
11 |   IdlistHlp <- unlist(doc[["doc"]][["eSearchResult"]]["IdList"])
12 |   if('QuotedPhraseNotFound' %in% unlist(doc[["doc"]][["eSearchResult"]][["WarningList"]])){
13 |   Idlist <- 0
14 |   } else {
15 |   Count <- unlist(doc[["doc"]][["eSearchResult"]][["Count"]])[3]
16 |   Idlist <- c(Count, IdlistHlp[grep("value$", names(IdlistHlp))])
17 |   }
18 |   return(Idlist)
19 | } # end function
20 | 


--------------------------------------------------------------------------------
/R/Substructure_masses.R:
--------------------------------------------------------------------------------
 1 | #' Collection of mass spectral fragment/ neutral loss/ adduct substructures from 
 2 | #' literature sources
 3 | #'
 4 | #' This dataset contains information on substructures and Electrospray adducts/
 5 | #' artefacts manually curated from literature sources. 
 6 | #' The variables are as follows:
 7 | #'
 8 | #' \itemize{
 9 | #'  \item Entry_no.  substructure number (1 -- 268)          
10 | #'  \item SubStructure. numeric logical (0 and 1) is the entry a fragment/neutral
11 | #'  loss substructure.       
12 | #'  \item ESI_adduct.  numeric logical (0 and 1) is the entry an atmospheric 
13 | #'  pressure/ electrospray adduct.          
14 | #'  \item SubStructure_type. parent substructure type (e.g. sulfate, glucuronide) 
15 | #'  \item Abbrev_name. abbreviated name of substructure type within square 
16 | #'  brackets. This abbreviated version can be displayed more easily within a 
17 | #'  results table for instance (e.g. [PC_184]).       
18 | #'  \item name. Full descriptive name of the neutral loss/ fragment/ electrospray
19 | #'  adduct within a square bracket (e.g. [NAcCysteine Acetamide]).              
20 | #'  \item IUPAC. IUPAC name for fragment/ neutral loss.               
21 | #'  \item SMILES. Canonical SMILES code of fragment/ neutral losses.             
22 | #'  \item molecular_formula. Molecular formula of fragment/ neutral losses.           
23 | #'  \item monoisotopic_weight. Monoisotopic weight of fragment/ neutral losses.
24 | #'  (0.9840156 -- 388.0853100)
25 | #'  \item Neut_loss. numeric logical (0 and 1) is the entry a neutral loss.             
26 | #'  \item frag. numeric logical (0 and 1) is the entry a fragment.               
27 | #'  \item pos. numeric logical (0 and 1) is the entry found in positive mode.                 
28 | #'  \item neg. numeric logical (0 and 1) is the entry found in negative mode.                
29 | #'  \item mass_shift. the expected monoisotopic mass shift associated with this
30 | #'  substructure/ adduct type. (17.02655 -- 360.12678)   
31 | #'  \item ref. the literature reference for the entry.
32 | #'  }
33 | #' 
34 | #' @docType data
35 | #' @keywords datasets
36 | #' @name Substructure_masses
37 | #' @usage data(Substructure_masses)
38 | #' @source 
39 | #' 1. Levsen, K. et. al. Structure elucidation of phase II metabolites by 
40 | #' tandem mass spectrometry: an overview, Journal of Chromatography A, 
41 | #' Volume 1067, Issues 1-2, 4 March 2005, Pages 55-72, ISSN 0021-9673
42 | #' \url{http://dx.doi.org/10.1016/j.chroma.2004.08.165}.
43 | #' 
44 | #' 2. Even-electron ions: a systematic study of the neutral species lost in the 
45 | #' dissociation of quasi-molecular ions.
46 | #' 
47 | #' 3. Brügger, B. et. al. Quantitative analysis of biological membrane lipids at 
48 | #' the low picomole level by nano-electrospray ionization tandem mass 
49 | #' spectrometry. Proc. Natl. Acad. Sci. U.S.A., 94, 2339-2344 (1997).
50 | #' 
51 | #' 4. Fouquet, T et. al. Electrospray tandem mass spectrometry combined with 
52 | #' authentic compound synthesis for structural characterization of an 
53 | #' octamethylcyclotetrasiloxane plasma polymer.
54 | #' 
55 | #' @format A data frame with 263 rows and 16 variables
56 | NULL
57 | 


--------------------------------------------------------------------------------
/R/addNoMS2.R:
--------------------------------------------------------------------------------
 1 | #' add no MS2 data to compMS2 class object internal to corrNetwork function
 2 | #' 
 3 | #' @param object a "compMS2" class object.
 4 | #' @param specNames character vector of composite spectrum names.
 5 | #' @param eicMzRt data.frame of EICnos/unique id, mz values, rt values and (if applicable)
 6 | #' ESI adducts/in-source fragments in 4 columns.
 7 | #' @return a "compMS2" class object with noMS2 data added to the appropriate slots.
 8 | setGeneric("addNoMS2", function(object, ...) standardGeneric("addNoMS2"))
 9 | 
10 | setMethod("addNoMS2", signature = "compMS2", function(object, specNames=NULL,
11 |                                                       eicMzRt=NULL, ...){
12 | # error handling
13 |   stopifnot(!is.null(object))
14 |   if(class(object) != "compMS2"){
15 |     stop('argument object must be a "compMS2" class object')
16 |   }
17 |   stopifnot(!is.null(specNames))
18 |   if(!is.character(specNames)){
19 |     stop('argument specNames must be a character vector') 
20 |   }
21 |   stopifnot(!is.null(eicMzRt))
22 |   if(!is.data.frame(eicMzRt)){
23 |     stop('argument eicMzRt must be a data.frame') 
24 |   }
25 |   warning('N.B. You must recalculate any networks after adding features with no MS2 spectra matched.\n', immediate. = TRUE)
26 | network(object) <- list()
27 | emptyList <- vector('list', length(specNames))
28 | names(emptyList) <- specNames
29 | compSpectra(object) <- c(compSpectra(object), emptyList)
30 | 
31 | metaEleNames <- paste0(rep(specNames, each=4), c('_MS1_EICno', '_MS1_mz', '_MS1_RT', '_MS1_adduct'))
32 | eicMzRt <- as.vector(t(eicMzRt))
33 | names(eicMzRt) <- metaEleNames
34 | 
35 | metaDataTmp <- split(eicMzRt, rep(specNames, each=4))
36 | # convert to integer and numeric
37 | metaDataTmp <- lapply(metaDataTmp, function(x){
38 |   tmpNames <- names(x)
39 |   names(x) <- NULL
40 |   x <- split(x, 1:4)
41 |   names(x) <- tmpNames
42 |   x[[1]] <- as.integer(x[[1]])
43 |   x[[2]] <- round(as.numeric(x[[2]]), 4)
44 |   x[[3]] <- round(as.numeric(x[[3]]), 3)
45 |   return(x)
46 | })
47 | metaData(object) <- c(metaData(object), metaDataTmp)
48 | 
49 | if(length(DBanno(object)) > 0){
50 |   DBanno(object) <- c(DBanno(object), emptyList)
51 | }
52 | # if(length(BestAnno(object)) > 0){
53 | #   BestAnno(object) <- c(BestAnno(object), emptyList)
54 | # }
55 | if(nrow(Comments(object)) > 0){
56 |   commentsTmp <- Comments(object)
57 |   emptyDf <- data.frame(matrix('', nrow=length(specNames), ncol=ncol(commentsTmp)), 
58 |                         stringsAsFactors = FALSE)
59 |   colnames(emptyDf) <- colnames(commentsTmp)
60 |   emptyDf$compSpectrum <- specNames
61 |   commentsTmp <- rbind(commentsTmp, emptyDf)
62 |   Comments(object) <- commentsTmp
63 | }
64 |   return(object)
65 | }) # end function
66 | 


--------------------------------------------------------------------------------
/R/cfmFragGraphGen.R:
--------------------------------------------------------------------------------
 1 | #' cfm fragment graph generation from table of annotations
 2 | #'@param bestAnnoSubRow unique compMS2@BestAnno entries (only M-H (neg mode) and M+H (pos mode) can be in silico fragmented by CFM and no SubStr_types).
 3 | #'@param fragGraphGenExe character full path to fraggraph-gen.exe file (internal to compMS2Miner package).
 4 | #'@param compSpecAll data.frame 3 columns mass, intensity and comp spectrum index number. 
 5 | #'@param mode character ionization polarity (either 'pos' or 'neg').
 6 | #'@param frag_mzabs numeric delta predicted-observed fragment mass accuracy for matching.
 7 | #'@return if fraggraph-gen process completed then a list of fragments matched to corresponding composite spectra are return
 8 | cfmFragGraphGen <- function(bestAnnoSubRow=NULL, fragGraphGenExe=NULL, compSpecAll=NULL, keepTempFiles=FALSE, mode='pos', frag_mzabs=0.05){
 9 |   if(keepTempFiles == TRUE){
10 |     tmpCFMdir <-  paste0(getwd(), '/CFM_results/')
11 |     suppressWarnings(dir.create(tmpCFMdir))
12 |     dir.create(paste0(tmpCFMdir, bestAnnoSubRow$DBid))
13 |     csvNameTmp <- paste0(tmpCFMdir, bestAnnoSubRow$DBid, "/cfmFragOutput.csv")  
14 |     fragMode <- ifelse(mode == 'pos', ' 1 + ', ' 1 - ')
15 |   } else {
16 |     tmpCFMdir <-  tempfile(pattern = "compMS2Miner")
17 |     dir.create(tmpCFMdir)
18 |     fragMode <- ifelse(mode == 'pos', ' 1 + ', ' 1 - ')
19 |     csvNameTmp <- paste0(tmpCFMdir, "/tempCFMoutput", bestAnnoSubRow$DBid, ".csv")   
20 |   }
21 |   commandTmp <- paste0('"', fragGraphGenExe, '" "',  bestAnnoSubRow$SMILES, '"', fragMode, ' fragonly "', csvNameTmp,'"')  
22 |   cmdRes <- system(commandTmp, intern = TRUE)
23 |   
24 |   nonZeroSize <- file.info(csvNameTmp)$size > 0
25 |   # load csv if necc
26 |   if(nonZeroSize){
27 |     resTmp <- readLines(csvNameTmp)
28 |     resTmp <- do.call(rbind, strsplit(resTmp, ' '))
29 |     specIdsTmp <- as.numeric(strsplit(as.character(bestAnnoSubRow$querySpecDbId), ' ')[[1]])
30 |     compSpecSubTmp <- compSpecAll[compSpecAll$featureSubSet %in% specIdsTmp, , drop=FALSE]
31 |     sumTic <- tapply(compSpecSubTmp$intensity, compSpecSubTmp$featureSubSet, sum)  
32 |     indxTmp <- match(compSpecSubTmp$featureSubSet, names(sumTic))
33 |     compSpecSubTmp$sumTIC <- sumTic[indxTmp]
34 |     matchedPeaks <- as.numeric()
35 |     for(j in 1:nrow(resTmp)){
36 |       # calc mass difference
37 |       indxTmp <- which(abs(compSpecSubTmp$mass - as.numeric(resTmp[j, 2])) < frag_mzabs)
38 |       if(length(indxTmp) > 0){
39 |         names(indxTmp) <- rep(j, length(indxTmp))
40 |         matchedPeaks <- c(matchedPeaks, indxTmp)
41 |       }
42 |     }
43 |     
44 |     if(length(matchedPeaks) > 0){
45 |       compSpecSubTmp <- compSpecSubTmp[matchedPeaks, , drop=FALSE]
46 |       compSpecSubTmp <- cbind(compSpecSubTmp, resTmp[as.numeric(names(matchedPeaks)), , drop=FALSE])
47 |       featureSubSetTmp <- compSpecSubTmp$featureSubSet
48 |       compSpecSubTmp$featureSubSet <- NULL
49 |       colnames(compSpecSubTmp)[4:ncol(compSpecSubTmp)] <- c('CFM_rank', 'CFM_mass', 'CFM_fragSMILES')
50 |       compSpecSubTmp$CFM_rank <- paste0(compSpecSubTmp$CFM_rank, ' of ', nrow(resTmp), ' fragments')
51 |       compSpecSubTmp$CFM_fragPropEx <- compSpecSubTmp$intensity/compSpecSubTmp$sumTIC
52 |       maxFragIntCompSpec <- paste0(featureSubSetTmp, '_', compSpecSubTmp$mass)
53 |       maxFragIntCompSpec <- tapply(compSpecSubTmp$CFM_fragPropEx, maxFragIntCompSpec, max)
54 |       sumExTmp <- tapply(maxFragIntCompSpec, gsub('_.+', '', names(maxFragIntCompSpec)), sum)
55 |       
56 |       indxTmp <- match(featureSubSetTmp, names(sumExTmp))
57 |       compSpecSubTmp$CFM_totPropEx <- sumExTmp[indxTmp]
58 |       # add in best Anno info
59 |       compSpecSubTmp <- cbind(compSpecSubTmp, bestAnnoSubRow[rep(1, nrow(compSpecSubTmp)), c('WebAddress', 'DBid', 'DBname', 'SMILES'), drop=FALSE])
60 |       compSpecSubTmp <- split(compSpecSubTmp, f=featureSubSetTmp)
61 |       return(compSpecSubTmp)
62 |     } # if any matches
63 |   } # if non-zero file
64 | } # end cfmFragGraphGen function
65 | 


--------------------------------------------------------------------------------
/R/cleanAbs.R:
--------------------------------------------------------------------------------
 1 | #' Adapted from PubMedWordCloud (cleanAbstracts) to work with compMS2Miner
 2 | #' @param Abs	output of getAbs, or just a paragraph of text
 3 | #' @export
 4 | cleanAbs <- function(Abs, rmNum = TRUE, tolw = TRUE, toup = FALSE, 
 5 |           rmWords = TRUE, yrWords = NULL, stemDoc = FALSE){
 6 |   abstTxt <- tm::Corpus(tm::VectorSource(Abs))
 7 |   text2.corpus = tm::tm_map(abstTxt, tm::removePunctuation)
 8 |   if (rmNum == TRUE) {
 9 |     text2.corpus = tm::tm_map(text2.corpus, function(x) tm::removeNumbers(x))
10 |   }
11 |   if (tolw == TRUE) {
12 |     text2.corpus = tm::tm_map(text2.corpus, tolower)
13 |   }
14 |   if (toup == TRUE) {
15 |     text2.corpus = tm::tm_map(text2.corpus, toupper)
16 |   }
17 |   if (rmWords == TRUE) {
18 |     text2.corpus = tm::tm_map(text2.corpus, tm::removeWords, tm::stopwords("english"))
19 |     if (!is.null(yrWords)) {
20 |       text2.corpus = tm::tm_map(text2.corpus, tm::removeWords, 
21 |                             yrWords)
22 |     }
23 |   }
24 |   if (stemDoc == TRUE) {
25 |     text2.corpus = tm::tm_map(text2.corpus, tm::stemDocument)
26 |   }
27 |   text2.corpus <- tm::tm_map(text2.corpus, tm::PlainTextDocument)
28 |   
29 |   # tdm <- TermDocumentMatrix(text2.corpus)
30 |   indWords <- gsub(' ', '', unlist(strsplit(text2.corpus$content$content, ' ')))
31 |   indWords <- indWords[indWords != '']
32 |   tdm <- table(indWords)
33 |   m <- as.matrix(tdm)
34 |   v <- sort(rowSums(m), decreasing = TRUE)
35 |   d <- data.frame(word = names(v), freq = v)
36 |   return(d)
37 | } # end function
38 | 


--------------------------------------------------------------------------------
/R/combineMS2.Ions.R:
--------------------------------------------------------------------------------
  1 | #' Combine spectra peaks within individual spectra
  2 | #' 
  3 | #' @details group ions according to absolute m/z error. 
  4 | #' The default parameters are suitable for a high-resolution Q-ToF.
  5 | #' Following ion grouping, signal intensities are summed and an average m/z 
  6 | #' calculated for each ion group. This signal summing serves to increase the
  7 | #' overall intensity of true ion signal across multiple scans and reduce the 
  8 | #' contribution of noise within the spectrum. Calculation of the central tendency
  9 | #' of each ion group serve to homogenize the random error and improve the mass
 10 | #' accuracy of each spectrum peak.    
 11 | #' 
 12 | #' @param mzError interpeak absolute m/z error for spectra signal grouping (default = 0.01).
 13 | #' @param minPeaks Minimum number of peaks per spectrum (default = 1). 
 14 | #' @param ... option arguments to be passed along.
 15 | #' @param verbose logical if TRUE display progress bars.
 16 | #' @return A compMS2 object with ion grouped composite spectra.
 17 | #' @export
 18 | setGeneric("combineMS2.Ions", function(object, ...) standardGeneric("combineMS2.Ions"))
 19 | 
 20 | setMethod("combineMS2.Ions", signature = "compMS2", function(object, 
 21 |                                                              mzError=0.01, 
 22 |                                                              minPeaks=1,
 23 |                                                              verbose=TRUE){
 24 |   # error handling
 25 |   if(class(object) != "compMS2"){
 26 |     stop("argument object is not an CompMS2 class object")
 27 |   } else {
 28 |     
 29 |     message(paste0("Grouping ions in ", length(compSpectra(object)),
 30 |                    " spectra..."))
 31 |     flush.console()
 32 |     
 33 |     if(Parameters(object)$nCores > 0){
 34 |       if(!require(foreach)){
 35 |         stop('package foreach must be installed to use this function in parallel')
 36 |       }
 37 |       if(!require(doSNOW)){
 38 |         stop('package doSNOW must be installed to use this function in parallel')
 39 |       }
 40 |       # create a cluster using the doSNOW package
 41 |       message(paste0("Starting SNOW cluster with ", Parameters(object)$nCores,
 42 |                      " local sockets..."))
 43 |       flush.console()
 44 |       
 45 |       cl <- parallel::makeCluster(Parameters(object)$nCores, outfile='') 
 46 |       doSNOW::registerDoSNOW(cl)
 47 |       progSeq <- round({length(compSpectra(object)) * seq(0, 1, 0.05)}, 0)
 48 |       progSeq[1] <- 1
 49 |       cat(paste0('Progress (', length(compSpectra(object)), ' spectra):\n'))
 50 |       progress <- function(n){if(n %in% progSeq){cat(paste0(round({n/length(compSpectra(object))} * 100, 0), '%  '))}}
 51 |       if(verbose == TRUE){opts <- list(progress=progress)} else {opts <- list(progress=NULL)}
 52 |       # foreach and dopar from foreach package
 53 |       sign.group <- foreach(j = 1:length(compSpectra(object)),
 54 |                             .packages = c('stats'), .options.snow=opts) %dopar% {
 55 |                               signalGrouping(spectrum.df = compSpectra(object)[[j]], 
 56 |                                              mzError=mzError,
 57 |                                              minPeaks=minPeaks)}
 58 |       # stop SNOW cluster
 59 |       parallel::stopCluster(cl) 
 60 |       
 61 |       
 62 |     } else {
 63 |       # create list to store results
 64 |       sign.group <- vector("list", length(compSpectra(object)))
 65 |       # create progress bar
 66 |       if(verbose == TRUE){ pb <- txtProgressBar(min=0, max=length(sign.group), style=3)}
 67 |       
 68 |       for(j in 1:length(sign.group)){
 69 |         
 70 |         #progress bar
 71 |         if(verbose==TRUE){setTxtProgressBar(pb, j)}
 72 |         flush.console()
 73 |         
 74 |         sign.group.tmp <- signalGrouping(spectrum.df = compSpectra(object)[[j]], 
 75 |                                          mzError=mzError, 
 76 |                                          minPeaks = minPeaks)
 77 |         
 78 |         sign.group[[j]] <- sign.group.tmp
 79 |       }
 80 |     }
 81 |     
 82 |     # logical if no peaks returned
 83 |     groupIndx <- sapply(sign.group, function(x) !is.character(x))
 84 |     
 85 |     message("...done")
 86 |     flush.console()
 87 |     # number of comp spectra returned
 88 |     message(sum(groupIndx), " spectra contained more than or equal to ",
 89 |             minPeaks," peaks following ion grouping")
 90 |     flush.console()
 91 |     
 92 |     # calculate number of interfragment difference lower than 0.1 m/z and inform
 93 |     # user
 94 |     nInterFragless <- sapply(sign.group[groupIndx], function(x){
 95 |       intfrag.diff <- as.numeric(c(diff(x[, "mass"]), 0))
 96 |       length(which(intfrag.diff < 0.1))})
 97 |       message("The range of interfragment differences less than 0.1 m/z in the spectra is ",
 98 |               paste(c("min :", " max :"), range(nInterFragless)), "\n")
 99 |       flush.console()
100 |       message("The average number of interfragment differences less than 0.1 m/z in the spectra is ",
101 |               round(mean(nInterFragless), digits=0), "\n")
102 |     flush.console()
103 |     
104 |     if(round(mean(nInterFragless), digits=0) > 2){
105 |       warning("The average number of interfragment differences less than 0.1 m/z is greater than 2: please consider increasing the mzError parameter above ", 
106 |               mzError)
107 |     }
108 |     
109 |     # return grouped
110 |     
111 |     names.tmp <- names(compSpectra(object))
112 |     names(sign.group) <- names.tmp
113 |     compSpectra(object) <- sign.group[groupIndx]
114 |     metaData(object) <- metaData(object)[groupIndx]
115 |     
116 |     return(object)
117 |   }
118 | })
119 | 


--------------------------------------------------------------------------------
/R/combineMS2.R:
--------------------------------------------------------------------------------
 1 | #' combineMS2 ions either within and/ or between composite spectra 
 2 | #' 
 3 | #' @param object. a compMS2 class object obtained from the function CompMSset
 4 | #' @param method. "Ions" intra-spectrum ions grouping or "Spectra" inter-spectra
 5 | #' ion grouping, composite spectra from multiple MS2 files matched to the same
 6 | #' MS1 feature are matched. The method "removeContam" will remove possible 
 7 | #' contaminants defined as sequences of isobars with sufficient spectral similarity
 8 | #' and seperated by a maximum retention time gap. This function can be used
 9 | #' to identify and remove before or after metabolite identification methods.#' 
10 | #' @param ... option arguments to be passed along.
11 | #' 
12 | #' @return A compMS2 object with intra-composite spectrum or inter-composite 
13 | #' spectra, grouped, signal summed and group mass-to-charge averaged spectra. 
14 | #' @seealso \code{\link{combineMS2.Ions}}, \code{\link{combineMS2.Spectra}},
15 | #' \code{\link{combineMS2.removeContam}}.
16 | #' @export
17 | setGeneric("combineMS2", function(object, ...) standardGeneric("combineMS2"))
18 | 
19 | setMethod("combineMS2", signature = "compMS2", function(object, method="Ions", ...) {
20 |   
21 |   method <- match.arg(method, c("Ions","Spectra", 'removeContam'))
22 |   method <- paste("combineMS2", method, sep=".")
23 |   invisible(do.call(method, alist(object, ...)))
24 | })
25 | 


--------------------------------------------------------------------------------
/R/compMS2Create.R:
--------------------------------------------------------------------------------
  1 | #' create a compMS2 object 
  2 | #'
  3 | #'@param MSfiles character vector of mzXML file locations
  4 | compMS2Create <- function(MS2file = NULL, MS1features = NULL, 
  5 |                           TICfilter = 10000, precursorPpm = 10, ret = 10, 
  6 |                           adducts=FALSE, isoWid=4){
  7 |   
  8 |   # MS2 file name
  9 |   MS2fileName <- basename(MS2file)
 10 |   
 11 |   message(paste0("Reading ", MS2fileName, "..."))
 12 |   flush.console()
 13 |   # read MS2 file
 14 |   MS2file <- mzR::openMSfile(MS2file)
 15 |   
 16 |   message("...DONE")
 17 |   flush.console()
 18 |   
 19 |   message("extracting metaData from MS2 file")
 20 |   flush.console()
 21 |   
 22 |   metaData <- mzR::header(MS2file)
 23 |   metaData <- metaData[, c('msLevel', 'precursorMZ', 'retentionTime',
 24 |                            'totIonCurrent', 'precursorIntensity',
 25 |                            'collisionEnergy', 'basePeakMZ', 'basePeakIntensity',
 26 |                            'acquisitionNum', 'precursorScanNum')]
 27 |   metaData$TICaboveFilter <- {metaData$totIonCurrent >= TICfilter} * 1
 28 |   colnames(metaData) <- c("MS.scanType", "precursorMz", "retentionTime", "TIC", 
 29 |                           "precursorIntensity", 
 30 |                           "collisionEnergy", "basePeakMz", "basePeakIntensity",
 31 |                           "acquisitionNum","precursorScanNum", "TICaboveFilter")
 32 |   
 33 |   metaData <- metaData[, c("MS.scanType", "precursorMz", "retentionTime", "TIC", 
 34 |                            "TICaboveFilter", "precursorIntensity", 
 35 |                            "collisionEnergy", "basePeakMz", "basePeakIntensity",
 36 |                            "acquisitionNum","precursorScanNum")]
 37 |   message("...DONE")
 38 |   flush.console()
 39 |   
 40 |   # cond if no MS2 level scans detected
 41 |   if(all(metaData$MS.scanType == 1)){
 42 |     # no MS2 level scans detected cond
 43 |     warning(paste0("No MS2 levels scans within ", MS2fileName, ",  check that the 
 44 |                    file has been converted to the mzXML format correctly."), 
 45 |             immediate.=TRUE)
 46 |     flush.console()
 47 |     message("...moving to next MS2 file")
 48 |     flush.console()
 49 |   } else {
 50 |     # remove all MS/MS scans where the TIC is less than the minimum TIC threshold 
 51 |     # set by the user
 52 |     message(paste0("Of a total of ", length(which(metaData$MS.scanType == 2)),
 53 |                    " MS2 spectra..."))
 54 |     flush.console()
 55 |     # index ms2 scan and above TIC filter
 56 |     metaData$MS2TICfilt.indx <- (metaData$MS.scanType == 2 & 
 57 |                                      metaData$TICaboveFilter == 1) * 1
 58 |     nAboveTIC <- length(which(metaData$MS2TICfilt.indx == 1))
 59 |     message(paste0(nAboveTIC, " MS2 spectra were above the TIC filter of ", 
 60 |                    TICfilter))
 61 |     flush.console()
 62 |     # cond if no scan above the TIC filter
 63 |     if(length(nAboveTIC) == 0){ 
 64 |       warning(paste0("No MS2 levels scans above TIC filter of ", TICfilter, " in ", 
 65 |                      MS2fileName, ",  reduce the TIC filter parameter or check that 
 66 |                      the file has been converted to the mzXML format correctly."), 
 67 |               immediate.=TRUE)
 68 |       flush.console()
 69 |       message("...moving to next MS2 file")
 70 |       flush.console()
 71 |     } else { 
 72 |       
 73 |       message("matching MS1 peak table to precursors of MS2 spectra...")
 74 |       flush.console()
 75 |       # mapply MS1 feature match
 76 |       MS1MS2match <- mapply(MS1MatchSpectra, EIC=MS1features[, 1], 
 77 |                             mz=MS1features[, 2], RT=MS1features[, 3], 
 78 |                             adduct=MS1features[, 4],
 79 |                             precursorPpm=precursorPpm, ret=ret, 
 80 |                             MoreArgs=list(metaData=metaData, 
 81 |                                           MS2file=MS2file, adducts=adducts, 
 82 |                                           isoWid=isoWid))
 83 |       
 84 |       # for(i in 1:nrow(MS1features)){
 85 |       #   tmp <- MS1MatchSpectra(EIC=MS1features[i, 1], 
 86 |       #                          mz=MS1features[i, 2], RT=MS1features[i, 3], 
 87 |       #                          adduct=MS1features[i, 4],
 88 |       #                          precursorPpm=precursorPpm, ret=ret,
 89 |       #                          metaData=metaData, 
 90 |       #                          MS2file=MS2file, adducts=adducts, 
 91 |       #                          isoWid=isoWid)
 92 |       #   # EIC=MS1features[i, 1]; 
 93 |       #   # mz=MS1features[i, 2]; RT=MS1features[i, 3]; 
 94 |       #   # adduct=MS1features[i, 4];
 95 |       # }
 96 |       message("...done")
 97 |       flush.console()
 98 |       
 99 |       match.indx <- which(sapply(MS1MS2match, length) == 2)
100 |       # calculate composite spectra
101 |       message(paste0(length(match.indx), " MS1 features were matched to MS2 precursors"))
102 |       flush.console()
103 |       names(MS1MS2match) <- paste0(MS2fileName, "_", MS1features[, 1])
104 |       
105 |       # check for chimeric spectra and isotopes
106 |       
107 |       
108 |       MS1MS2match <- MS1MS2match[match.indx]
109 |       return(MS1MS2match)
110 |       #Results[names(MS1MS2match)] <- MS1MS2match
111 |       # close(MS2file)
112 |       #time[i] <- (proc.time() - pmt)[["elapsed"]] 
113 |     } # cond if no scan above the TIC filter
114 |   } # cond if no MS2 level scans detected
115 | } # end func
116 | 


--------------------------------------------------------------------------------
/R/compMS2Miner.R:
--------------------------------------------------------------------------------
 1 | #' compMS2Miner: a package to identify/ visualize unknowns in metabolomic datasets based on MS2 fragmentation data.
 2 | #'
 3 | #' @description Matches MS1 features to MS2 spectra (.mzXML) files based on a 
 4 | #'mass-to-charge and retention time tolerance. Composite spectra and other data
 5 | #'can subsequently be visualized during any stage of the compMS2Miner
 6 | #'processing workflow. Composite spectra can be denoised, ion signals grouped 
 7 | #'and summed, substructure groups identified, common Phase II metabolites
 8 | #'predicted and features matched to data bases monoisotopic mass data 
 9 | #'and insilico MS2 fragmentation data.
10 | #'The resulting data can then be readily curated by sending to a local or online
11 | #'couchDB database.
12 | #' 
13 | #' @details An example workflow is available in the following vignette:
14 | #' compMS2MinerWorkFlow (source, pdf)
15 | #' 
16 | #' @author WMB Edmands \url{edmandsw@@berkeley.edu}
17 | #' @docType package
18 | #' @name compMS2Miner
19 | NULL
20 | 


--------------------------------------------------------------------------------
/R/compMS2explorer.R:
--------------------------------------------------------------------------------
 1 | #' Visualize your compMS2Miner results output using a shiny app.
 2 | #' 
 3 | #' @param object a compMS2 class object or a character full path to a compMS2Miner zip file
 4 | #' @param browserLaunch logical launch app in web browser (default = TRUE).
 5 | #' @export
 6 | setGeneric("compMS2Explorer", function(object, ...) standardGeneric("compMS2Explorer"))
 7 | 
 8 | setMethod("compMS2Explorer", signature = "character", function(object, 
 9 |                                                                 browserLaunch = TRUE){
10 |   if(file.exists(object)){
11 |     outdir <- tempfile(pattern = "compMS2Miner")
12 |     dir.create(outdir)
13 |     
14 |     pathTmp <- utils::unzip(object, exdir = outdir)
15 |     shiny::runApp(dirname(pathTmp[1]), launch.browser = browserLaunch)
16 |   } else {
17 |     stop('a full path to a compMS2Miner .zip file archive must be supplied')
18 |   }
19 | })
20 |     
21 | setMethod("compMS2Explorer", signature = "compMS2", function(object, 
22 |                                                                 browserLaunch = TRUE){
23 |   # error handling
24 |   if(class(object) != "compMS2"){
25 |     stop("argument object is not an CompMS2 class object")
26 |   } else if (length(filePaths(object)) == 0) {
27 |     stop("The CompMS2 class file is empty")
28 |   } else {
29 |     if(!require(shiny)){
30 |       stop('The package shiny must be installed to use the compMS2Explorer function...')
31 |     }
32 |     appDir <- system.file("shiny-apps", "compMS2Explorer", package = "compMS2Miner")
33 |       if (appDir == "") {
34 |         stop("Could not find example directory. Try re-installing `compMS2Miner`.", call. = FALSE)
35 |       }
36 |     # add readOnly = F to parameters
37 |     Parameters(object)$readOnly <- FALSE
38 |     # create temporary directory to create zip
39 |     outDir <- tempfile(pattern = "compMS2Miner")
40 |     dir.create(outDir)
41 |     wwwOutDir <- paste0(outDir, '/www')
42 |     dir.create(wwwOutDir)
43 |     # copy latest version of shiny app from package
44 |     filesMoved <- file.copy(dir(appDir, full.names = TRUE, pattern = '\\.R$'), outDir, overwrite = TRUE)
45 |     filesMoved <- c(filesMoved, file.copy(dir(paste0(appDir, '/www'), full.names = TRUE, pattern = '\\.mp4$|\\.png$'), wwwOutDir, overwrite = TRUE))
46 |     if(any(filesMoved == FALSE)){
47 |       stop('The shiny-app file(s) were not copied to the bundle please check the compMS2Miner package is properly installed.\n')
48 |     }
49 |     
50 |     save(object, file=paste0(outDir, '/compMS2object.RData'))  
51 |       shiny::runApp(outDir, display.mode = "normal", launch.browser = browserLaunch)
52 |   }
53 | }) # end function
54 | 


--------------------------------------------------------------------------------
/R/couchDBcurate.R:
--------------------------------------------------------------------------------
 1 | #' CouchDB login
 2 | #' send CompMS2 data set to couchDB
 3 | #' @param couchDBname New or existing CouchDB database name (must be all lower case,  can contain underscores) 
 4 | #' @return CouchDB records : All Results from the current stage of the compMS2Miner
 5 | #'  are sent to the already established/ newly created couchDB database. 
 6 | #'  The following documents are sent to couchDB :
 7 | #'  
 8 | #' @export
 9 | setGeneric("couchDBcurate", function(object, ...) standardGeneric("couchDBcurate"))
10 | 
11 | setMethod("couchDBcurate", signature = "compMS2", function(object,
12 |                                                            couchDBname=NULL, 
13 |                                                            nSlaves=NULL, 
14 |                                                            Username=NULL, 
15 |                                                            Password=NULL, 
16 |                                                            Host = NULL){
17 |   # error handling
18 |   if(class(object) != "compMS2"){
19 |     stop("argument object is not an CompMS2 class object")
20 |   } else  if(is.null(couchDBname)){
21 |     stop("argument couchDBname is missing with no default")
22 |   }
23 |   
24 |   # if couch DB credentials already available
25 |   if(length(couchDBconn(compMS2)) == 0){
26 | 
27 |   # if username or password null get login details for couchDB
28 |   if(is.null(Username) | is.null(Password) | is.null(Host)){    
29 |     credentials <- getLoginDetails()
30 |   } else {
31 |     credentials <- c(Host=Host,Username=Username,Password=Password) 
32 |   }
33 |   
34 |   message("Establishing connection with CouchDB...")
35 |   flush.console()
36 |   # login to couchDB and send ping
37 |   pingReq <- couchDBpingReq(credentials = credentials)
38 |   # loop through and break if connection established to give user 3 chances 
39 |   # to get host,  username and password correct
40 |   for(i in 1:2){
41 |     #headers and cache control do not appear in 
42 |     if(any((pingReq == "Error in response from CouchDB") == TRUE)){ 
43 |       # if incorrect credentials then give user a message
44 |       tcltk::tkmessageBox(message = 'Connection could not be made with CouchDB make sure the localhost server is running, and username/password are correct see futon interface "http://localhost:5984/_utils"')
45 |       # run getLoginDetails if wrong details supplied
46 |       credentials <- getLoginDetails()
47 |       # login to couchDB and send ping
48 |       pingReq <- couchDBpingReq(credentials = credentials)
49 |     } else {
50 |       message("Connection with CouchDB made...")
51 |       flush.console()
52 |       break
53 |     }
54 |   }
55 |   
56 |   # final error if not able to login
57 |   if(any((pingReq=="Error in response from CouchDB")==TRUE))
58 |   {
59 |     tcltk::tkmessageBox(message = 'Too many failed attempts to connect with 
60 |                         CouchDB try again')
61 |     stop('Too many failed attempts to connect with CouchDB try again')
62 |   }
63 |   
64 |   # check to see if database name supplied is found if not then create new 
65 |   # database and tell user
66 |   if(is.null(couchDBname))
67 |   {
68 |     stop("a new or existing CouchDB database name must be supplied in order to 
69 |          store the results output")
70 |   } else {
71 |     # create base_url
72 |   auth <- ifelse(credentials["Username"]=="", "", 
73 |                  paste0(credentials["Username"], ":", credentials["Password"],
74 |                         "@"))
75 |   base_url  <-  paste0(proto="http",  "://",  auth,  myConn$couch_http_host, 
76 |                          ":", myConn$couch_http_port)
77 |   # data bases
78 |   path  <-  paste(base_url,  "_all_dbs",  sep = "/")
79 |   DBs <- rjson::fromJSON(file=path)
80 |   # convert to lower case
81 |   couchDBname <- tolower(couchDBname)
82 |   # if database name is not in the database list then create new 
83 |   if(!any(DBs == couchDBname)){
84 |   DBcreate <- couchDB::couch_create_database(myConn, couchDBname)
85 |   }
86 |   }
87 |   # add couchDB connection to compMS2 object
88 |   couchDBconn(object) <- list(base_url = base_url, myConn = myConn)
89 |   Parameters(object) <- data.frame(Username = credentials["Username"], 
90 |                                    couchDBname = couchDBname, Parameters(object)) 
91 |   }
92 | # send CompMS2 class object to couchDB 
93 | 
94 | 
95 | return(object)
96 | 
97 | })
98 | 


--------------------------------------------------------------------------------
/R/couchDBpingReq.R:
--------------------------------------------------------------------------------
 1 | #' CouchDBpingReq
 2 | #' send ping to couch db using log in credentials from GetLoginDetails or
 3 | #' named character vector
 4 | #' 
 5 | #' @param credentials either output from GetLoginDetails or named character vector "host", "Username", "Password" containing user Login parameters (e.g. c( host = "localhost", Username = "", Password = ""))
 6 | #' @return ping request result 
 7 | #' @export
 8 | couchDBpingReq<-function(credentials){
 9 |   # couchDB http connection 
10 |   myConn <<- couchDB::couch_http_connection(host=credentials['Host'], 
11 |                                             user=credentials['Username'], 
12 |                                             password=credentials['Password'])
13 |   # authorization
14 |   auth <<- paste0(credentials['Username'], ":", credentials['Password'], "@")
15 |   # send ping
16 |   pingReq <- couchDB::couch_ping(myConn)
17 |   
18 |   return(pingReq)
19 |   
20 | }
21 | 


--------------------------------------------------------------------------------
/R/deconvNoise.DNF.R:
--------------------------------------------------------------------------------
  1 | #' Spectral noise filtration using dynamic noise filter
  2 | #' 
  3 | #' @description uses the dynamic noise filtration algorithm adapted from the method described 
  4 | #' in Xu H. and Frietas M. "A Dynamic Noise Level Algorithm for Spectral 
  5 | #' Screening of Peptide MS/MS Spectra" 2010 BMC Bioinformatics. 
  6 | 
  7 | #' @param DNF. numeric dynamic noise filter minimum signal to noise threshold 
  8 | #' (default = 2), calculated as the ratio between the linear model predicted 
  9 | #' intensity value and the actual intensity.
 10 | #' @param minPeaks. integer minimum number of signal peaks following dynamic 
 11 | #' noise filtration (default = 1).
 12 | #' @param maxPeaks. integer maximum number of signal peaks the function will continue
 13 | #' until both the minimum DNF signal to noise ratio is exceeding and the number
 14 | #' of peaks is lower than the maximum (default = 60).
 15 | #' @param minInt. numeric minimum intensity to commence the dynamic noise filter
 16 | #' algorithm. Low values will increase computation time and increase the chance
 17 | #' that the DNF algorithm will terminate prematurely (default = 250).
 18 | #' @return noise filtered MS2 spectra.
 19 | #' @param verbose logical if TRUE display progress bars.
 20 | #' @source Xu H. and Frietas M. "A Dynamic Noise Level Algorithm for Spectral 
 21 | #' Screening of Peptide MS/MS Spectra" 2010 BMC Bioinformatics.
 22 | #' @export 
 23 | setGeneric("deconvNoise.DNF", function(object, ...) standardGeneric("deconvNoise.DNF"))
 24 | 
 25 | setMethod("deconvNoise.DNF", signature = "compMS2", function(object, DNF=2, 
 26 |                                                              minPeaks=1,
 27 |                                                              maxPeaks=20,
 28 |                                                              minInt=250, 
 29 |                                                              verbose=TRUE){
 30 |   # error handling
 31 |   if(class(object) != "compMS2"){
 32 |     stop("argument object is not an CompMS2 class object")
 33 |   } else {
 34 |     
 35 |     message(paste0("Applying dynamic noise filter to ", length(compSpectra(object)),
 36 |                    " spectra..."))
 37 |     flush.console()
 38 |     
 39 |     if(Parameters(object)$nCores > 0){
 40 |         if(!require(foreach)){
 41 |           stop('package foreach must be installed to use this function in parallel')
 42 |         }
 43 |         if(!require(doSNOW)){
 44 |           stop('package doSNOW must be installed to use this function in parallel')
 45 |         }
 46 |       # create a cluster using the doSNOW package
 47 |       message(paste0("Starting SNOW cluster with ", Parameters(object)$nCores,
 48 |                      " local sockets..."))
 49 |       flush.console()
 50 |       
 51 |       cl <- parallel::makeCluster(Parameters(object)$nCores, outfile='') 
 52 |       doSNOW::registerDoSNOW(cl)
 53 |       progSeq <- round({length(compSpectra(object)) * seq(0, 1, 0.05)}, 0)
 54 |       progSeq[1] <- 1
 55 |       cat(paste0('Progress (', length(compSpectra(object)), ' spectra):\n'))
 56 |       progress <- function(n){if(n %in% progSeq){cat(paste0(round({n/length(compSpectra(object))} * 100, 0), '%  '))}}
 57 |       if(verbose == TRUE){opts <- list(progress=progress)} else {opts <- list(progress=NULL)}
 58 |       
 59 |       
 60 |       # foreach and dopar from foreach package
 61 |       noise.filt <- foreach(j = 1:length(compSpectra(object)),
 62 |                             .packages = c('RcppEigen', 'compMS2Miner'), .options.snow=opts) %dopar% {
 63 | #                               for(j in 1:length(compSpectra(object))){
 64 | #                               compMS2Miner::dynamicNoiseFilter(spectrum.df=compSpectra(object)[[j]], 
 65 |                               dynamicNoiseFilter(spectrum.df=compSpectra(object)[[j]], 
 66 |                                                  DNF=DNF, minPeaks=minPeaks, 
 67 |                                                  maxPeaks=maxPeaks, 
 68 |                                                  minInt=minInt)}
 69 |       # stop SNOW cluster
 70 |       parallel::stopCluster(cl) 
 71 |       
 72 |     } else {
 73 |       # create list to store results
 74 |       noise.filt <- vector("list", length(compSpectra(object)))
 75 |       # create progress bar
 76 |       if(verbose == TRUE){ pb <- txtProgressBar(min=0, max=length(noise.filt), style=3)}
 77 |       
 78 |       for(j in 1:length(noise.filt)){
 79 |         
 80 |         #progress bar
 81 |         if(verbose==TRUE){setTxtProgressBar(pb, j)}
 82 |         flush.console()
 83 |         
 84 |         noise.filt.tmp <- dynamicNoiseFilter(spectrum.df=compSpectra(object)[[j]], 
 85 |                                              DNF=DNF, minPeaks=minPeaks, 
 86 |                                              maxPeaks=maxPeaks, minInt=minInt)
 87 |         
 88 |         noise.filt[[j]] <- noise.filt.tmp
 89 |       }
 90 |     }
 91 |     # logical if no peaks returned
 92 |     noiseIndx <- sapply(noise.filt, function(x) x$aboveMinPeak == TRUE)
 93 |     
 94 |     message("...done")
 95 |     flush.console()
 96 |     # number of comp spectra returned
 97 |     message(sum(noiseIndx), " spectra contained more than or equal to ",
 98 |             minPeaks," peaks following dynamic noise filtration")
 99 |     flush.console()
100 |     # return noise filtered
101 |     compSpec.tmp <- lapply(noise.filt, function(x) x$Above.noise)
102 |     prevMetaData.tmp <- metaData(object)
103 |     metaData.tmp <- lapply(c(1:length(noise.filt)), function(x) 
104 |       c(prevMetaData.tmp[[x]],
105 |         data.frame(noise.filt[[x]]$metaData, stringsAsFactors = FALSE)))
106 |     
107 |     names.tmp <- names(compSpectra(object))
108 |     names(compSpec.tmp) <- names.tmp
109 |     names(metaData.tmp) <- names.tmp
110 |     compSpectra(object) <- compSpec.tmp[noiseIndx]
111 |     metaData(object) <- metaData.tmp[noiseIndx]
112 |     
113 |     return(object)
114 |     
115 |   }
116 | }) # end function
117 | 
118 | 


--------------------------------------------------------------------------------
/R/deconvNoise.R:
--------------------------------------------------------------------------------
 1 | #' Filter spectral noise from a CompMS2 class object 
 2 | #'
 3 | #' @param object. a compMS2 class object obtained from the function CompMSset
 4 | #' @param method. dynamic noise file "DNF" or fixed maximum intensity "maxInt"
 5 | #' @param ... option arguments to be passed along.
 6 | #' 
 7 | #' @return A compMS2 object with noise filtered composite spectra.
 8 | #' @seealso \code{\link{deconvNoise.DNF}}, \code{\link{deconvNoise.maxInt}}
 9 | #' @export
10 | setGeneric("deconvNoise", function(object, ...) standardGeneric("deconvNoise"))
11 | 
12 | setMethod("deconvNoise", signature = "compMS2", function(object, method="DNF", ...) {
13 |   
14 |   method <- match.arg(method, c("DNF","maxInt"))
15 |   method <- paste("deconvNoise", method, sep=".")
16 |   invisible(do.call(method, alist(object, ...)))
17 | })
18 | 


--------------------------------------------------------------------------------
/R/deconvNoise.maxInt.R:
--------------------------------------------------------------------------------
 1 | # ' deconvolute noise according to a maximum allowed intensity 
 2 | #   
 3 | #   setGeneric("deconvNoise.maxInt", function(object, ...) standardGeneric("deconvNoise.maxInt"))
 4 | #   
 5 | #   setMethod("deconvNoise.maxInt", signature = "compMS2", function(object) {
 6 | #     # error handling
 7 | #     if(class(object) != "compMS2"){
 8 | #       stop("argument object is not an CompMS2 class object")
 9 | #     } else {
10 | #       
11 | #     }
12 | #   })
13 | 


--------------------------------------------------------------------------------
/R/dotProdMatrix.R:
--------------------------------------------------------------------------------
 1 | #' dot product matrix calculation
 2 | #' @param allSpectra a numeric matrix consisting of two columns 1. mass and 2. intensity
 3 | #' @param spectraNames character names of individual spectra to compare must equal number of rows of allSpectra
 4 | #' @param binSizeMS2 numeric the MS2 bin size to bin MS2 data prior to dot product calculation (default = 0.1 Da).
 5 | #' @export
 6 | #' @return a matrix of equal dimension corresponding to the number of unique spectrum names
 7 | dotProdMatrix <- function(allSpectra=NULL, spectraNames=NULL, binSizeMS2=NULL){
 8 |   # error handling
 9 |   stopifnot(is.matrix(allSpectra))
10 |   stopifnot(is.character(spectraNames))
11 |   message('Calculating dot product matrix ', length(unique(spectraNames)), 
12 |           ' spectra\n')
13 |   flush.console()
14 |   maxMass <- floor(max(allSpectra[, 1])) + 10
15 |   # padded integer labels
16 |   labelsTmp <- paste0('(', seq(binSizeMS2, (maxMass - binSizeMS2), binSizeMS2), ',', seq((2 * binSizeMS2), maxMass, binSizeMS2), ']')
17 |   massBinsIndivTmp <- cut(allSpectra[, 1], breaks=seq(binSizeMS2, maxMass, binSizeMS2), labels=labelsTmp)   
18 |   # empty bins
19 |   indivSpecVec <- tapply(allSpectra[, 2], paste0(spectraNames, massBinsIndivTmp), sum)
20 |   # identify any absent bins
21 |   allBinNames <- paste0(rep(unique(spectraNames), each=length(labelsTmp)), rep(labelsTmp, length(unique(spectraNames))))
22 |   # add absent bins as zeros
23 |   allBinsTmp <- rep(0, length(allBinNames))
24 |   names(allBinsTmp) <- allBinNames
25 |   # ensure indivSpecVec is in right order
26 |   allBinsTmp[match(names(indivSpecVec), allBinNames)] <- indivSpecVec
27 |   
28 |   indivSpecMat <- matrix(allBinsTmp, byrow=FALSE, nrow=length(labelsTmp))
29 |   # mean all pairwise dotproducts
30 |   dotProdMat <- crossprod(indivSpecMat)
31 |   sqrtMatrixTmp <- matrix(sqrt(colSums(indivSpecMat^2)), nrow=nrow(dotProdMat), 
32 |                           ncol=ncol(dotProdMat), byrow = TRUE) 
33 |   
34 |   dotProdsTmp <- dotProdMat / (sqrtMatrixTmp * diag(sqrtMatrixTmp))
35 |   row.names(dotProdsTmp) <- unique(spectraNames)
36 |   colnames(dotProdsTmp) <- unique(spectraNames)
37 |   return(dotProdsTmp)
38 | } # end function
39 | 


--------------------------------------------------------------------------------
/R/dynamicNoiseFilter.R:
--------------------------------------------------------------------------------
  1 | #' Dynamic Noise filtration 
  2 | #' 
  3 | #' @param spectrum.df a dataframe or matrix with two columns:
  4 | #' 1. Mass/ Mass-to-charge ratio
  5 | #' 2. Intensity
  6 | #' @param DNF dynamic noise filter minimum signal to noise threshold 
  7 | #' (default = 2), calculated as the ratio between the linear model predicted 
  8 | #' intensity value and the actual intensity.
  9 | #' @param minPeaks minimum number of signal peaks following dynamic 
 10 | #' noise filtration (default = 5).
 11 | #' @param maxPeaks maximum number of signal peaks the function will continue
 12 | #' until both the minimum DNF signal to noise ratio is exceeding and the number
 13 | #' of peaks is lower than the maximum (default = 5).
 14 | #' 
 15 | #' @return a list containing 3 objects:
 16 | #' \enumerate{
 17 | #' \item Above.noise The dynamic noise filtered matrix/ dataframe 
 18 | #' \item metaData a dataframe with the following column names:
 19 | #'        1. Noise.level the noise level determined by the dynamic noise filter 
 20 | #'           function.
 21 | #'        2. IntCompSpec Total intensity composite spectrum.
 22 | #'        3. TotalIntSNR Sparse ion signal to noise ratio 
 23 | #'        (mean intensity/ stdev intensity)
 24 | #'        4. nPeaks number of peaks in composite spectrum
 25 | #' \item aboveMinPeaks Logical are the number of signals above the minimum level}
 26 | #' @details  Dynamic noise filter adapted from the method described in Xu H. and 
 27 | #' Frietas M. "A Dynamic Noise Level Algorithm for Spectral Screening of 
 28 | #' Peptide MS/MS Spectra" 2010 BMC Bioinformatics. 
 29 | #' 
 30 | #' The function iteratively calculates linear models starting from 
 31 | #' the median value of the lower half of all intensities in the spectrum.df. 
 32 | #' The linear model is used to predict the next peak intensity and ratio is 
 33 | #' calculated between the predicted and actual intensity value. 
 34 | #' 
 35 | #' Assuming that all preceeding intensities included in the linear model 
 36 | #' are noise, the signal to noise ratio between the predicted and actual values 
 37 | #' should exceed the minimum signal to noise ratio (default DNF = 2). 
 38 | #' 
 39 | #' The function continues until either the DNF value minimum has been exceeded 
 40 | #' and is also below the maxPeaks or maximum number of peaks value. As the 
 41 | #' function must necessarily calculate potentially hundreds of linear models the 
 42 | #' RcppEigen package is used to increase the speed of computation.
 43 | #' 
 44 | #' @export
 45 | dynamicNoiseFilter <- function(spectrum.df=NULL, DNF=2, minPeaks=5, 
 46 |                                maxPeaks=20, minInt=100){
 47 |   # error handling
 48 |   if(is.null(spectrum.df)){
 49 |     stop("No spectrum matrix/dataframe supplied")    
 50 |   } else {
 51 |     # rank matrix/ dataframe by intensity
 52 |     intOrder<-order(spectrum.df[, 2])
 53 |     spectrum.df <- spectrum.df[intOrder, , drop=FALSE]
 54 |     # median bottom half of intensity values
 55 | #     medBottomHalf <- median(head(spectrum.df[, 2],
 56 | #                                  n=nrow(spectrum.df)/2))
 57 | #     medBottomHalf <- which(spectrum.df[, 2] >= medBottomHalf)[1]
 58 |     minIntIndx <- which(spectrum.df[, 2] >= minInt)[1]
 59 |     peakIndx <- seq(1, nrow(spectrum.df), 1)
 60 |     minIntIndx <- ifelse(is.na(minIntIndx), nrow(spectrum.df), minIntIndx)
 61 |     minIntIndx <- ifelse(minIntIndx == 1, 2, minIntIndx)
 62 |     # break loop if higher DNF and also less than maxPeaks
 63 | #     for(k in medBottomHalf:(nrow(spectrum.df)-1)){
 64 | if(minIntIndx < (nrow(spectrum.df)-1)){
 65 | for(k in minIntIndx:(nrow(spectrum.df)-1)){
 66 |       # calc linear model rcppeigen 
 67 |       fit <- coef(RcppEigen::fastLm(as.numeric(spectrum.df[1:k, 2]) 
 68 |                                     ~ peakIndx[1:k]))
 69 |       # predicted intensity model from intercept
 70 |       PredInt <- fit[1]+(fit[2])*(k+1)
 71 |       # calc Signal to noise ratio predicted vs. actual
 72 |       SNR <- spectrum.df[k+1, 2]/PredInt
 73 |       # if SNR reached and below max number of peaks break loop
 74 |       if(SNR >= DNF & nrow(spectrum.df) - (k+1) < maxPeaks){
 75 |         Noise.level <- as.numeric(spectrum.df[k+1, 2])
 76 |         break
 77 |       } else {
 78 |         Noise.level <- as.numeric(spectrum.df[k+1, 2])
 79 |       }
 80 |     }
 81 | } else {
 82 |   Noise.level <- spectrum.df[minIntIndx, 2]
 83 | }
 84 |     # filter by DNF noise filter level
 85 |     Noise.indx <- which(spectrum.df[, 2] >= Noise.level)
 86 |     spectrum.df <- spectrum.df[Noise.indx, , drop=FALSE]
 87 |     # sort by m/z
 88 |     spectrum.df <- spectrum.df[order(spectrum.df[,1]), , drop=FALSE]
 89 |     # number of peaks higher than minimum
 90 |     aboveMinPeaks <- nrow(spectrum.df) >= minPeaks
 91 |     # Total intensity composite spectrum 
 92 |     IntCompSpec <- sum(spectrum.df[, 2])
 93 |     # Sparse ion signal to noise ratio (mean intensity/ stdev intensity)
 94 |     if(nrow(spectrum.df) <= 1){
 95 |       TotalIntSNR <- 0
 96 |     } else {
 97 |       TotalIntSNR <- mean(spectrum.df[, 2], sd(spectrum.df[, 2]))
 98 |     }
 99 |     DNF.tmp<-list(Above.noise=spectrum.df, 
100 |                   metaData=data.frame(Noise.level=Noise.level, 
101 |                                       IntCompSpec=IntCompSpec,
102 |                                       TotalIntSNR=TotalIntSNR,
103 |                                       nPeaks=nrow(spectrum.df), 
104 |                                       stringsAsFactors = FALSE),
105 |                   aboveMinPeaks=aboveMinPeaks)
106 |     return(DNF.tmp)
107 |   }  
108 | }
109 | 


--------------------------------------------------------------------------------
/R/exactMassEle.R:
--------------------------------------------------------------------------------
 1 | #' elemental monoisotopic masses and natural abundances
 2 | #'
 3 | #' This dataset contains data taken from the table available at (\url{http://www.sisweb.com/referenc/source/exactmas.htm})
 4 | #' The variables are as follows:
 5 | #'
 6 | #' \itemize{
 7 | #'  \item eleName full element name (e.g. sodium).
 8 | #'  \item eleSymbol element symbol (e.g. Na).
 9 | #'  \item monoMass character of isotope mass(es) seperated by a space.
10 | #'  \item natAbund character of natural isotope abundance(s) seperated by a space.
11 | #'  }
12 | #' 
13 | #' @docType data
14 | #' @keywords datasets
15 | #' @name exactMassEle
16 | #' @usage data(exactMassEle)
17 | #' @source \url{http://www.sisweb.com/referenc/source/exactmas.htm}
18 | #' @format A data frame with 83 rows and 4 columns
19 | NULL
20 | 


--------------------------------------------------------------------------------
/R/example_mzXML_MS1features.R:
--------------------------------------------------------------------------------
 1 | #' example mzXML files and MS1 feature table (subset to 820 -- 940 seconds)
 2 | #' 
 3 | #' @description MS1features_example.csv 3720 MS1 features from XCMS diffreport peak table
 4 | #' from a study comparing repeat extractions of human dried blood spot samples
 5 | #' using 80\% acetonitrile (ACN) to 80\% methanol (MeOH).
 6 | #' Both extraction solvents consist of repeat preparations of the same 
 7 | #' sample (A, B, C) and repeat injections (1, 2) of each preparation 
 8 | #' (i.e. A1, A2, B1, B2, C1, C2).
 9 | #' The variables are as follows :
10 | #' \itemize{
11 | #'  \item EICno XCMS extracted ion chromatograms from XCMS peak tables 
12 | #'  (62 -- 24328).
13 | #'  \item mzmed median mass-to-charge (71.0853 -- 999.6138) 
14 | #'  \item rtmed retention time in seconds (820.01 -- 939.907)
15 | #'  \item ACN_80_A1 80% acetonitrile extract peak areas prep. replicate A inj. 1
16 | #'  \item ACN_80_A2	80% acetonitrile extract peak areas prep. replicate A inj. 2
17 | #'  \item ACN_80_B1	80% acetonitrile extract peak areas prep. replicate B inj. 1
18 | #'  \item ACN_80_B2	80% acetonitrile extract peak areas prep. replicate B inj. 2
19 | #'  \item ACN_80_C1	80% acetonitrile extract peak areas prep. replicate C inj. 1
20 | #'  \item ACN_80_C2	80% acetonitrile extract peak areas prep. replicate C inj. 2
21 | #'  \item MeOH_80_A1 80% methanol extract peak areas prep. replicate A inj. 1	
22 | #'  \item MeOH_80_A2	80% methanol extract peak areas prep. replicate A inj. 2
23 | #'  \item MeOH_80_B1	80% methanol extract peak areas prep. replicate B inj. 1
24 | #'  \item MeOH_80_B2	80% methanol extract peak areas prep. replicate B inj. 2
25 | #'  \item MeOH_80_C1	80% methanol extract peak areas prep. replicate C inj. 1
26 | #'  \item MeOH_80_C2 80% methanol extract peak areas prep. replicate C inj. 2
27 | #'  }
28 | #' @docType data
29 | #' @keywords datasets
30 | #' @name example_mzXML_MS1features
31 | #' @format A comma delimited text file with 3720 rows and 15 variables and two
32 | #' data-dependent MS2 files in centroid mode converted to the mzXML open format 
33 | #' using MSConvert software (ProteoWizard 3.0.6965 64 bit) for each extraction 
34 | #' type.
35 | #' Data were acquired on an Agilent 6550 q-tof interfaced with a nano-flow chip 
36 | #' cube running a small molecule C18-chip.  
37 | NULL
38 | 


--------------------------------------------------------------------------------
/R/getAbs.R:
--------------------------------------------------------------------------------
 1 | #' Adapted from PubMedWordCloud to work with compMS2Miner
 2 | #' @param PMID character vector of pubMed ids to get abstracts for.
 3 | #' @details if the query sequence is too long than 500 this function will not work
 4 | #' @export
 5 | getAbs <- function(PMID){
 6 |   if(!require(XML)){
 7 |     stop('The XML package is required to use this function')
 8 |   }
 9 |   if(length(PMID) > 500){
10 |     stop('The maximum PMID length is 500')
11 |   }
12 |   if (length(PMID) > 0) {
13 |     eDDownload <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="
14 |     hlp1 <- paste(eDDownload, paste(PMID, collapse = ",", 
15 |                                     sep = ""), sep = "")
16 |     hlp2 <- paste(hlp1, "&rettype=abstract", sep = "")
17 |     hlpURL <- RCurl::getURL(hlp2, .opts=RCurl::curlOptions(followlocation=TRUE))
18 |     testDoc <- XML::xmlTreeParse(hlpURL, useInternalNodes = TRUE)
19 |     topFetch <- XML::xmlRoot(testDoc)
20 |     abst <- XML::xpathSApply(topFetch, "//Abstract", xmlValue)
21 |   }
22 |   else {
23 |     abst = c("Zero", "Articles", "Found")
24 |   }
25 |   return(abst)
26 | } # end function
27 | 


--------------------------------------------------------------------------------
/R/getLoginDetails.R:
--------------------------------------------------------------------------------
 1 | #' Get Login details for CouchDB
 2 | #' 
 3 | #' tcltk GUI to get login details for couchDB
 4 | #' 
 5 | #' @param Host couchDB host name, defaults to localhost. Can be online repository.
 6 | #' @param Name couchDB administrator username (used for http commands, not stored).
 7 | #' @param Password couchDB administrator password (used for http commands, not stored).
 8 | #' @return login details for couchDB
 9 | #' @export
10 | getLoginDetails  <-  function(){
11 |   # Based on code by Barry Rowlingson
12 |   # http://r.789695.n4.nabble.com/tkentry-that-exits-after-RETURN-tt854721.html
13 |   #none
14 |   tt  <-  tcltk::tktoplevel()
15 |   tcltk::tkwm.title(tt,  "Get login details")
16 |   Host <- tcltk::tclVar("localhost")
17 |   Name  <-  tcltk::tclVar("")
18 |   Password  <-  tcltk::tclVar("")
19 |   entry.Host  <-  tcltk::tkentry(tt, width="20",  textvariable=Host)
20 |   entry.Name  <-  tcltk::tkentry(tt, width="20",  textvariable=Name)
21 |   entry.Password  <-  tcltk::tkentry(tt,  width="20",  show="*",  
22 |                               textvariable=Password)
23 |   tcltk::tkgrid(tcltk::tklabel(tt,  text="Please enter your login details."))
24 |   tcltk::tkgrid(tcltk::tklabel(tt,  text="couchDB host: "), entry.Host)
25 |   tcltk::tkgrid(tcltk::tklabel(tt,  text="username: "), entry.Name)
26 |   tcltk::tkgrid(tcltk::tklabel(tt,  text="password: "), entry.Password)
27 |   
28 |   OnOK  <-  function()
29 |   { 
30 |     tcltk::tkdestroy(tt) 
31 |   }
32 |   OK.but  <- tcltk::tkbutton(tt, text=" OK ",  command=OnOK)
33 |   tcltk::tkbind(entry.Password,  "<Return>",  OnOK)
34 |   tcltk::tkgrid(OK.but)
35 |   tcltk::tkfocus(tt)
36 |   tcltk::tkwait.window(tt)
37 |   
38 |   invisible(c(Host=tcltk::tclvalue(Host), Username=tcltk::tclvalue(Name),  
39 |               Password=tcltk::tclvalue(Password)))
40 | }
41 | 


--------------------------------------------------------------------------------
/R/getTitles.R:
--------------------------------------------------------------------------------
 1 | #' get PubMed title function adapted from PubChemWordcloud package v 0.3.2
 2 | #' @param pmid pubmed id number
 3 | #' @export
 4 | getTitles <- function(pmid){
 5 |   if(length(pmid) > 0){
 6 |     eDDownload <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="
 7 |     hlp1 <- paste(eDDownload, paste(pmid, collapse = ",", 
 8 |                                     sep = ""), sep = "")
 9 |     hlp2 <- paste(hlp1, "&rettype=Abstract", sep = "")
10 |     hlpURL <- RCurl::getURL(hlp2, .opts=RCurl::curlOptions(followlocation=TRUE))
11 |     testDoc <- XML::xmlTreeParse(hlpURL, useInternalNodes = TRUE)
12 |     topFetch <- XML::xmlRoot(testDoc)
13 |     Title <- XML::xpathSApply(topFetch, "//ArticleTitle", XML::xmlValue)
14 |   } else {
15 |     Title<-c("Zero", "Articles", "Found")
16 |   }
17 |   return(Title)
18 | }
19 | 


--------------------------------------------------------------------------------
/R/lipidAbbrev.R:
--------------------------------------------------------------------------------
 1 | #' lipid abbreviations table for pubmed text mining
 2 | #' 
 3 | #' This dataset contains fields from the LMSD database classifications and 
 4 | #' abbreviations (\url{http://www.lipidmaps.org/data/classification/lipid_cns.html})
 5 | #' The variables are as follows:
 6 | #'
 7 | #' \enumerate{
 8 | #'  \item Class lipid class name this string is intended to be searched in PubMed.
 9 | #'  \item Abbreviation abbreviation for lipid class.
10 | #'  \item regexpr an R regular expression to try to detect the lipid class in a
11 | #'  database compound name. e.g. Searching PubMed for the string "SM(18:1/14:0)"
12 | #'  for example will return no PubMed ids however searching using "sphingomyelin"
13 | #'  will return a more representative number of PubMed abstract Ids. 
14 | #'  }
15 | #' @docType data
16 | #' @keywords datasets
17 | #' @name lipidAbbrev
18 | #' @usage data(lipidAbbrev)
19 | #' @source \url{http://www.lipidmaps.org/data/classification/lipid_cns.html} 
20 | #' @format A data frame with 18 rows and 3 columns
21 | NULL
22 | # common phospholipid abbreviations
23 | # lipidmaps
24 | # dated 07/15/2016
25 | # require(XML)
26 | # htmlTables <- XML::readHTMLTable('http://www.lipidmaps.org/data/classification/lipid_cns.html', stringsAsFactors = FALSE)
27 | # lipidAbbrev <- data.frame(stringsAsFactors = FALSE)
28 | # for(i in 2:4){
29 | # lipidTableTmp <- htmlTables[[i]]
30 | # # Glycosphingolipids
31 | # lipidTableTmp <- lipidTableTmp[grepl('Glycosphingolipids', lipidTableTmp$Class) == FALSE, , drop=FALSE]
32 | # lysoIndx <- grepl('lyso', lipidTableTmp$Abbreviation)
33 | # indxTmp <-  grepl('[A-Z]', lipidTableTmp$Examples) | lysoIndx
34 | # lipidTableTmp <- lipidTableTmp[indxTmp, , drop=FALSE]
35 | # lysoIndx <- grepl('lyso', lipidTableTmp$Abbreviation)
36 | # 
37 | # lipidTableTmp$Class <- gsub('glycerophospho', 'phosphatidyl', lipidTableTmp$Class, ignore.case = TRUE)
38 | # lipidTableTmp$Class <- gsub('radylglycerolipids', 'glycerides', lipidTableTmp$Class)
39 | # lipidTableTmp$Abbreviation <- gsub(' \\(.+', '', lipidTableTmp$Abbreviation)
40 | # lipidTableTmp$Examples <- NULL
41 | # lipidTableTmp$regexpr <- gsub('$', '\\\\(', lipidTableTmp$Abbreviation)
42 | # lipidTableTmp$regexpr <- gsub('^', '\\^', lipidTableTmp$regexpr)
43 | # if(any(lysoIndx)){
44 | # lysoLipids <- lipidTableTmp[lysoIndx, , drop=FALSE]
45 | # lysoLipids$Class <- paste0('lyso', lysoLipids$Class)
46 | # lysoLipids$Abbreviation <- paste(paste0('L', lysoLipids$Abbreviation), 
47 | #                                  paste0('lyso', lysoLipids$Abbreviation), sep=' ')
48 | # lysoLipids$regexpr <- gsub(' ', '\\\\(|\\^',  lysoLipids$Abbreviation)
49 | # lysoLipids$regexpr <- gsub('$', '\\\\(', lysoLipids$regexpr)
50 | # lysoLipids$regexpr <- gsub('^', '\\^', lysoLipids$regexpr)
51 | # 
52 | # lipidTableTmp <- rbind(lipidTableTmp, lysoLipids)  
53 | # }
54 | # clIndx <- grep('^CL$', lipidTableTmp$Abbreviation)
55 | # if(length(clIndx) > 0){
56 | # lipidTableTmp$Class[clIndx] <- 'cardiolipin'  
57 | # }
58 | # lipidAbbrev <- rbind(lipidAbbrev, lipidTableTmp)
59 | # }
60 | # lipidAbbrev$Class <- tolower(gsub('s$', '', lipidAbbrev$Class))
61 | 


--------------------------------------------------------------------------------
/R/metFragAdducts.R:
--------------------------------------------------------------------------------
 1 | #' data.frame of customizable metFrag adduct types and codes
 2 | #' 
 3 | #' This dataset is the default adduct type table for the \code{\link{metID.metFrag}}
 4 | #' function. A custom table can be created following this format as more adduct
 5 | #' types are added in future versions of the metFrag command line tool.
 6 | #' N.B. adduct names must match those supplied to the \code{\link{adduct2mass}} function internal
 7 | #' to the \code{\link{metID.dbAnnotate}} function.
 8 | #'
 9 | #' \enumerate{
10 | #'  \item adduct adduct name string must match that supplied to adduct2mass.
11 | #'  \item metFragCode the MetFrag command line tool code for the adduct.
12 | #'  \item mode polarity. must be either 'pos' or 'neg'. 
13 | #'  }
14 | #' @docType data
15 | #' @keywords datasets
16 | #' @name metFragAdducts
17 | #' @usage data(metFragAdducts)
18 | #' @source \url{http://c-ruttkies.github.io/MetFrag/projects/metfragcl/} 
19 | #' @format A data frame with 9 rows and 3 columns
20 | NULL
21 | 


--------------------------------------------------------------------------------
/R/mfSearchPUG.R:
--------------------------------------------------------------------------------
 1 | #' Search pubmed compound for molecular formula using the pubchem power user gateway (PUG)
 2 | #' @param mf molecular formula character vector of length one e.g. 'C10H21N'. 
 3 | #' @return returns a character vector of pubmed compound cids matching the molecular
 4 | #' formula
 5 | #' @export
 6 | mfSearchPUG <- function(mf='C10H21N'){
 7 |   if(!require(XML)){
 8 |     stop('package XML must be installed to use this function.')
 9 |   }
10 |   qUrl <- paste0('http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/formula/', mf, '/XML')
11 |   parsedhtml <- XML::htmlParse(qUrl)
12 |   
13 |   listKeyTmp <- parsedhtml['//listkey', fun=xmlValue][[1]]
14 |   
15 |   checkUrl <- paste0('http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/listkey/', 
16 |                      listKeyTmp, '/cids/XML')
17 |   parsedCheck <- XML::htmlParse(checkUrl)
18 |   
19 |   messageTmp <- tryCatch(parsedCheck['//message', fun=xmlValue][[1]], error=function(cond){
20 |     message('query complete')
21 |   })
22 |   
23 |   message(paste0(messageTmp, '...\n'))
24 |   flush.console()
25 |   # loop until query completed
26 |   while(messageTmp == 'Your request is running'){
27 |     Sys.sleep(3)
28 |     parsedCheck <- tryCatch(XML::htmlParse(checkUrl), error=function(cond){
29 |       return('no results returned')
30 |     })
31 |     
32 |     if(is.character(parsedCheck)){
33 |       message(paste0(parsedCheck, '...\n'))
34 |       flush.console()
35 |       break
36 |     }
37 |     messageTmp <- tryCatch(parsedCheck['//message', fun=xmlValue][[1]], error=function(cond){
38 |       return('query complete')
39 |     })
40 |     # when query complete break loop
41 |     if(messageTmp == 'query complete'){
42 |       message(paste0(messageTmp, '...\n'))
43 |       flush.console()
44 |       break
45 |     }
46 |   }
47 |   
48 |   if(!is.character(parsedCheck)){
49 |   cidsTmp <- parsedCheck['//cid', fun=xmlValue]
50 |   
51 |   # n cids returned
52 |   message(length(cidsTmp), ' pubChem compound ids returned...\n')
53 |   flush.console()
54 |   return(as.numeric(unlist(cidsTmp)))
55 |   } 
56 |   # obtain identifiers for all cids
57 |   # https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/1,2,3,4,5/property/IUPACname,MolecularFormula,InChIKey,MonoisotopicMass,CanonicalSMILES/XML
58 |   # 
59 | } # end function
60 | 


--------------------------------------------------------------------------------
/R/optimCutOff.R:
--------------------------------------------------------------------------------
 1 | #' optimum cutoff for a correlation or spectral similarity matrix
 2 | #' @param x matrix of correlation coefficients or spectral similarity values
 3 | #' column and row names must match
 4 | #' @param cutOffSeq numeric a vector of cut-off values to test. (default=seq(0.01, 1, 0.01) a numeric vector of length 100)
 5 | #' @param diffConsecVals numeric the scaled difference between consecutive values
 6 | #' to identify the plateau in the network density (default=1.5*10^-3 or 0.0015).
 7 | #' @return a list containing two named elements "estCutOff" a numeric estimated optimal cut-off value and "testData" a matrix
 8 | #' of the test results at each cut-off value. The function also plots the result.
 9 | #' @references Koh Aoki, Yoshiyuki Ogata, and Daisuke Shibata
10 | #' Approaches for Extracting Practical Information from Gene Co-expression Networks in Plant Biology
11 | #' Plant Cell Physiol (2007) 48 (3): 381-390 first published online January 23, 2007 doi:10.1093/pcp/pcm013
12 | #' @export
13 | optimCutOff <- function(x=NULL, cutOffSeq=seq(0.01, 1, 0.01), diffConsecVals=1.5*10^-3,
14 |                         maxCutOff=0.95){
15 |   if(!require(igraph)){
16 |     stop('igraph package must be installed to use this function')
17 |   }
18 |   if(is.null(row.names(x)) | is.null(colnames(x))){
19 |     stop('The matrix must have row and column names')
20 |   }
21 |   if(!all.equal(row.names(x), colnames(x))){
22 |     stop('The row and column names must be equal')
23 |   }
24 |   # remove upper tri
25 |   x[upper.tri(x, diag=TRUE)] <- 0
26 |   x <- abs(x)
27 |   # ID features above below corrThresh
28 |   netData <- matrix(0, ncol=4, nrow=length(cutOffSeq))
29 |   colnames(netData) <- c('nNodes', 'nEdges', 'nClust', 'netDense')
30 |   row.names(netData) <- cutOffSeq
31 |   pb <- txtProgressBar(max=length(cutOffSeq), style=3)
32 |   for(i in 1:length(cutOffSeq)){
33 |     setTxtProgressBar(pb, i)
34 |     arrIdxTmp <- which(x >= cutOffSeq[i], arr.ind = TRUE)
35 |     arrIdxTmp <- cbind(colnames(x)[arrIdxTmp[, 1]], colnames(x)[arrIdxTmp[, 2]])
36 |     if(nrow(arrIdxTmp) > 0){
37 |       netTmp <- igraph::graph(as.vector(t(arrIdxTmp[, c(1, 2)])))
38 |       netData[i, 'nNodes'] <- length(igraph::V(netTmp))
39 |       netData[i, 'nEdges'] <- length(igraph::E(netTmp))
40 |       netData[i, 'nClust'] <- igraph::clusters(netTmp)$no
41 |     } 
42 |   }
43 |   # network density
44 |   pConn <- {netData[, 'nNodes'] * {netData[, 'nNodes'] - 1}}/2
45 |   netData[, 'netDense'] <- netData[, 'nEdges']/pConn
46 |   netData[is.na(netData)] <- 0
47 |   # estimate best cutoff value
48 |   # identify plateau in the network density
49 |   estCutOff <- 0
50 |   multVal <- 1
51 |   while(estCutOff == 0){
52 |   diffConsecSeq <- abs(diff(netData[, 'netDense']/max(netData[, 'netDense'])))
53 |   diffConsecIdx <- which(diffConsecSeq < {diffConsecVals * multVal} & diffConsecSeq > {diffConsecVals * {diffConsecVals * 0.1}} & as.numeric(names(diffConsecSeq)) > 0.6)
54 |   firstDiffConsec <- ifelse(length(diffConsecIdx) == 0, NA, min(diffConsecIdx))
55 |   if(!is.na(firstDiffConsec)){
56 |   # at least two consecutive values to establish plateau
57 |   firstDiffConsec <- ifelse({firstDiffConsec + 1} %in% diffConsecIdx, firstDiffConsec, NA)
58 |   }
59 |   multVal <- multVal + 0.01
60 |   estCutOff <- ifelse(is.na(firstDiffConsec), 0, cutOffSeq[firstDiffConsec + 1])
61 |     if(multVal > 2){
62 |       estCutOff <- NA
63 |       break
64 |     }
65 |   }
66 |   if(is.na(estCutOff)){
67 |     warning('\nFailed to find a plateau in the network density. Returning an approximation close to the maximum number of clusters on the upward slope.\n', immediate. = TRUE)
68 |     flush.console()
69 |     nClustTmp <- netData[, 'nClust']
70 |     maxClustTmp <- max(nClustTmp)
71 |     minClustTmp <- min(nClustTmp[cutOffSeq <= ifelse(min(cutOffSeq) > 0.4, min(cutOffSeq), 0.4)])
72 |     idxTmp <- c(max(which(nClustTmp == minClustTmp)), which(nClustTmp == maxClustTmp))
73 |     medUpSlope <- round(quantile(idxTmp[1]:idxTmp[2], probs = seq(0, 1, 0.33))['66%'], 0)
74 |     estCutOff <- cutOffSeq[medUpSlope]
75 |   }
76 |   if(estCutOff >= maxCutOff){
77 |     warning('\nEstimated cut-off value greater than ', round(maxCutOff, 2), '. This indicates that a large proportion of the network nodes are highly related. In the case of spectral similarity for example this could indicate large numbers of similar spectra which were not removed during the combineMS2.removeContam step for example or biologically related such as lipids.\n', immediate. = TRUE)
78 |     estCutOff <- maxCutOff
79 |     }
80 |   # estimate cutoff
81 |   par(mfrow=c(2, 2))
82 |   for(j in 1:ncol(netData)){
83 |     plot(x=cutOffSeq, y=netData[, j], xlab='cut off value', pch=19, col='red', main=colnames(netData)[j], ylab=colnames(netData)[j])
84 |     abline(v=rep(estCutOff, length(cutOffSeq)), col='blue')
85 |     text(x=estCutOff, y=max(netData[, j])/2, paste0('estCutOff-', round(estCutOff, 2)), col='blue')  
86 |   }
87 |   par(mfrow=c(1, 1))
88 |   return(list(estCutOff=estCutOff, testData=netData))
89 | } # end function
90 | 


--------------------------------------------------------------------------------
/R/pubMedSearch.R:
--------------------------------------------------------------------------------
 1 | #' return cleaned abstracts from pubmed from searched key words
 2 | #' @param keys character vector of compound names to search pubmed with
 3 | #' @param n numeric maximum number of results to return. The maximum and default is 500.
 4 | #' @param maxChar numeric maximum number of characters in cleaned abstract words to return.
 5 | #' @param ... further arguments to the \code{\link{cleanAbstracts}} function.
 6 | #' @return a list containing 3 named elements:
 7 | #' 1. titles character vector of Abstract title(s)
 8 | #' 2. abs character vector of abstract text(s).
 9 | #' 3. clAbs clean abstract word frequency data.frame with column names 'word' and 'freq'. 
10 | #' @seealso PubMedWordcloud, \code{\link{getAbstracts}}, \code{\link{cleanAbstracts}}.
11 | #' @export
12 | pubMedSearch <- function(keys=NULL, n=500, maxChar=50, ...){
13 |   #error handling
14 |   if(is.null(keys)){
15 |     stop('argument keys is missing with no default')
16 |   }
17 |   if(n > 500){
18 |     stop('The maximum PMID key length is 500')
19 |   }
20 |   message('searching pubMed for the following key words :\n', paste0(keys, "\n"))
21 |   flush.console()
22 |   # search key words against pub med abstracts 
23 |   PMIDs <- PMIDsearch(keys, n)
24 |   message(length(PMIDs) - 1, ' pubmed IDs returned')
25 |   flush.console()
26 |   
27 |   if(length(PMIDs) > 0)
28 |   { 
29 |     message('obtaining abstract text and titles from pubmed...')
30 |     flush.console()
31 |     # obtain abstract text
32 |     Abs <- getAbs(PMIDs[-1])
33 |     # return titles
34 |     titles <- getTitles(PMIDs[-1])
35 |     if(length(Abs) > 0){
36 |       message('cleaning abstracts (removing punctuations, numbers, translate characters to lower or upper case, remove stopwords, stemming words...')
37 |       flush.console()
38 |       
39 |       ClAbs <- PubMedWordcloud::cleanAbstracts(Abs, ...)
40 |     
41 |       # only keep word which are less than max characters
42 |       ClAbs <- ClAbs[which(sapply(as.character(ClAbs$word), nchar) < maxChar), , drop=FALSE]
43 |       # return results
44 |       return(list(titles=titles, Abstracts=Abs, ClAbs=ClAbs))
45 |    
46 |     } else {
47 |       stop("No abstract text was returned for the keyword(s) :\n", paste0(keys, "\n")) 
48 |     }
49 |   } else {
50 |   stop("No pubmed ids returned for the keyword(s) :\n", paste0(keys, "\n")) 
51 |   }
52 | } # end function
53 | 


--------------------------------------------------------------------------------
/R/runGitHubApp.R:
--------------------------------------------------------------------------------
 1 | #' run github shiny modified from shiny and devtools
 2 | #' @param repo character github username and repository name. in the form "username/repositoryName"
 3 | #' @param subdir character sub-directory of the repo containing the shiny and data.
 4 | #' @param dirPath character full-path to a directory in which to save the contents of the zip file. If unsupplied shiny app will be opened from a temporary directory. 
 5 | #' @param auth_token character private repo authorization token. 
 6 | #' @param browserLaunch logical launch app in web browser (default = TRUE).
 7 | #'
 8 | #' @export
 9 | runGitHubApp <- function(repo=NULL, subdir=NULL, dirPath=NULL, auth_token=NULL, browserLaunch=TRUE){
10 |   # error handling
11 |   stopifnot(is.character(repo))
12 |   
13 |     res <- strsplit(repo, "/")[[1]]
14 |     if(length(res) != 2){ 
15 |       stop("'repo' must be of the form 'username/repo'")
16 |     }
17 |     username <- res[1]
18 |     repo <- res[2]
19 |   
20 |   remote <- devtools:::remote("github", host = "api.github.com", repo = repo, subdir =
21 |                               subdir, username = username, ref = NULL, sha = NULL, 
22 |                               auth_token = auth_token)
23 |   
24 |   bundle <- devtools:::remote_download(remote, quiet = FALSE)
25 |   on.exit(unlink(bundle), add = TRUE)
26 |   outdir <- tempfile(pattern = "compMS2Miner")
27 |   dir.create(outdir)
28 |   
29 |   pathTmp <- utils::unzip(bundle, exdir = outdir) 
30 |   # if sub directory not supplied then print list of options to console
31 |   if(is.null(subdir)){
32 |    availOpts <- basename(pathTmp) 
33 |    availOpts <- availOpts[grep('\\.zip$', availOpts)]
34 |    availOpts <- gsub('\\.zip', '', availOpts)
35 |    message('\n"subdir" argument not supplied. Available directories within the repo include:\n',
36 |            paste0(availOpts, '\n'), '\nPlease type a directory name without quotations and press [enter] to continue:')
37 |    flush.console()
38 |    subdir <- readline()
39 |   }
40 |   tmpIndx <- grepl(paste0(subdir, '.zip$'), pathTmp)
41 |   if(!any(tmpIndx)){
42 |     stop(subdir, ' sub-directory name not found please check and try again...')
43 |   }
44 |   tmpAppDir <- tempfile(pattern="compMS2Miner")
45 |   if(!is.null(dirPath)){
46 |   dirPath <- gsub("/$|\\\\$", '', dirPath)
47 |   }
48 |   appPathTmp <- utils::unzip(pathTmp[tmpIndx], exdir=ifelse(is.null(dirPath), tmpAppDir, dirPath))
49 |   object <- shiny::runApp(dirname(appPathTmp[1]), launch.browser = browserLaunch)
50 |   return(object)
51 | } # end function
52 | 


--------------------------------------------------------------------------------
/R/signalGrouping.R:
--------------------------------------------------------------------------------
 1 | #' Signal grouping
 2 | #'
 3 | #' Euclidean distances between m/z signals are hierarchically clustering using 
 4 | #' the average method and the composite spectrum groups determined by a absolute
 5 | #' error cutoff
 6 | #' 
 7 | #' @param spectrum.df a dataframe or matrix with two or more columns:
 8 | #' 1. Mass/ Mass-to-charge ratio
 9 | #' 2. Intensity
10 | #' @param mzError interpeak absolute m/z error for signal grouping 
11 | #' (Default = 0.001)
12 | #' @return dataframe of m/z grouped signals, the m/z values of the input 
13 | #' dataframe/ matrix peak groups are averaged and the signal intensities summed.
14 | #' @export
15 | signalGrouping <- function(spectrum.df=NULL, mzError=0.001, 
16 |                            minPeaks = 5){
17 |   # error handling
18 |   if(is.null(spectrum.df)){
19 |     stop("No spectrum matrix/dataframe supplied")    
20 |   } else if(nrow(spectrum.df) > 1){
21 |     hr <- fastcluster::hclust(dist(spectrum.df[, 1]), method = "median", members=NULL)
22 |     # cut tree according to absolute m/z error
23 |     spectrum_group <- cutree(hr, h=mzError)
24 |      # calculate weighted mean of the m/z and sum signal within each peak group
25 |     mass <- do.call(c, as.list(by(spectrum.df, as.factor(spectrum_group), function(x){
26 |                                                    weighted.mean(x[, 1], x[, 2])})))
27 |     grouped.df <- data.frame(mass = mass, 
28 |                             intensity = tapply(spectrum.df[, 2], 
29 |                                                  as.factor(spectrum_group), sum),
30 |                               stringsAsFactors = FALSE)
31 |     #average any additional columns i.e. retention time
32 |     if(ncol(spectrum.df) > 2){
33 |     groupedCols <- apply(spectrum.df[, 3:ncol(spectrum.df), drop=FALSE], 2, function(x) 
34 |                          tapply(x, as.factor(spectrum_group), mean))
35 |     grouped.df <- cbind(grouped.df, groupedCols)
36 |     }
37 |   } else {
38 |     grouped.df <- spectrum.df
39 |   }
40 |     if(nrow(grouped.df) >= minPeaks){
41 |       if(!is.null(colnames(spectrum.df))){
42 |       colnames(grouped.df) <- colnames(spectrum.df)
43 |       }
44 |       return(grouped.df)
45 |     } else {
46 |       return("Less than minPeak")  
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/R/smiles2Form.R:
--------------------------------------------------------------------------------
 1 | #' Convert SMILES code to atomic formula
 2 | #' @param SMILES character vector of SMILES codes to convert
 3 | #' @return a character vector the formula(e).
 4 | smiles2Form <- function(SMILES=NULL){
 5 |   if(is.null(SMILES)){
 6 |     stop('SMILES argument is missing with no default')
 7 |   }
 8 |   if(!require(ChemmineR)){
 9 |     stop('ChemmineR package must be installed to use this function.')
10 |   }
11 |   data("exactMassEle")
12 |   # identify replicates
13 |   constEle <- gsub("[^[:alnum:] ]|[0-9]", "", SMILES)
14 |   constEle <- gsub('([[:upper:]])', ' \\1', constEle)
15 |   constEle <- gsub("([[:lower:]])([[:lower:]][[:lower:]])", "\\1 \\2", constEle)
16 |   potDupForm <- sapply(strsplit(constEle, ' '), function(x){
17 |     x <- x[x != '']
18 |     tmpIdx <- x %in% exactMassEle$eleSymbol
19 |     if(any(tmpIdx)){
20 |     x <- c(unlist(strsplit(toupper(x[tmpIdx == FALSE]), '')), x[tmpIdx])
21 |     }
22 |    formNoImplH <- table(x)
23 |    formNoImplH <- formNoImplH[order(names(formNoImplH))]
24 |    formNoImplH <- paste(paste0(names(formNoImplH), formNoImplH), collapse = '')
25 |    return(formNoImplH)
26 |   })
27 |   # in spite of errors only convert non duplicates
28 |   nonRedSmiIdx <- duplicated(potDupForm) == FALSE
29 |   sdfTmp <- suppressWarnings(ChemmineR::smiles2sdf(SMILES[nonRedSmiIdx]))
30 |   formTmp <- ChemmineR::MF(sdfTmp, addH=TRUE)
31 |   names(formTmp) <- potDupForm[nonRedSmiIdx]
32 |   convFormulae <- formTmp[potDupForm]
33 |   names(convFormulae) <- NULL
34 |   return(convFormulae)
35 | } # end function
36 | 


--------------------------------------------------------------------------------
/R/smiles2MonoMassForm.R:
--------------------------------------------------------------------------------
 1 | #' Convert SMILES code to monoisotopic mass and formula
 2 | #' @param SMILES character vector of SMILES codes to convert
 3 | #' @return a named numeric vector of same length as the SMILES input containing the
 4 | #' monoisotopic mass(es) and named using the formula(e).
 5 | smiles2MonoMassForm <- function(SMILES=NULL){
 6 |   if(is.null(SMILES)){
 7 |     stop('SMILES argument is missing with no default')
 8 |   }
 9 |   if(!require(ChemmineR)){
10 |     stop('ChemmineR package must be installed to use this function.')
11 |   }
12 |     sdfTmp <- suppressWarnings(ChemmineR::smiles2sdf(SMILES))
13 |     formTmp <- ChemmineR::MF(sdfTmp, addH=TRUE)
14 |     # monoisotopic mass
15 |     eleGroupsStr <- toupper(formTmp)
16 |     eleGroupsStr <- gsub('([[:upper:]])', ' \\1', formTmp)
17 |     eleGroupsStr <- gsub('$', ' ', eleGroupsStr)
18 |     eleGroupsStr <- gsub("^ ", '', eleGroupsStr)
19 | 
20 |     eleOnly <- gsub('[[:punct:]]|[0-9]', '', eleGroupsStr)
21 |     nEleOnly <- gsub('[[:punct:]]|[A-z]', "", eleGroupsStr)
22 |     eleOnly <- strsplit(eleOnly, ' ')
23 |     nEleOnly <- lapply(strsplit(nEleOnly, ' '), function(x) as.numeric(ifelse(x == '', 1, x)))
24 |     
25 |     data("exactMassEle")
26 |     massMostAbund <- apply(exactMassEle, 1, function(x){
27 |       maxAbundTmp <- which.max(as.numeric(strsplit(x['natAbund'], ' ')[[1]]))
28 |       mass <- as.numeric(strsplit(x['monoMass'], ' ')[[1]])[maxAbundTmp]
29 |       return(mass)
30 |     })
31 |     names(massMostAbund) <- exactMassEle$eleSymbol
32 |     # monoisotopic mass
33 |     monoMass <- vector('numeric', length(eleOnly))
34 |     for(i in 1:length(eleOnly)){
35 |       eleOnly[[i]] <- unlist(mapply(rep, eleOnly[[i]], each=nEleOnly[[i]]))
36 |       monoMass[i] <- sum(massMostAbund[eleOnly[[i]]])
37 |     }
38 |     names(monoMass) <- formTmp
39 |     return(monoMass)
40 | } # end function
41 | 


--------------------------------------------------------------------------------
/R/subFormulae.R:
--------------------------------------------------------------------------------
 1 | #' subtract atomic formula y from atomic formula x
 2 | #' 
 3 | #' @param x character vector of atomic formulae (must be same length as y).
 4 | #' @param y character vector of atomic formulae (must be same length as x).
 5 | subFormulae <- function(x=NULL, y=NULL){
 6 |   if(length(x) != length(y)){
 7 |     stop('x and y must be the same length.')
 8 |   }
 9 |   # split in to atoms and number x vector 
10 |   xEleGrStr <- gsub('([[:upper:]])', ' \\1', x)
11 |   xEleGrStr <- gsub('$', ' ', xEleGrStr)
12 |   xEleGrStr <- gsub("^ ", '', xEleGrStr)
13 |   
14 |   xEle <- gsub('[[:punct:]]|[0-9]', '', xEleGrStr)
15 |   xNEle <- gsub('[[:punct:]]|[A-z]', "", xEleGrStr)
16 |   
17 |   xEle <- strsplit(xEle, ' ')
18 |   xNEle <- strsplit(xNEle, ' ') 
19 |   # split in to atoms and number y vector 
20 |   yEleGrStr <- gsub('([[:upper:]])', ' \\1', y)
21 |   yEleGrStr <- gsub('$', ' ', yEleGrStr)
22 |   yEleGrStr <- gsub("^ ", '', yEleGrStr)
23 |   
24 |   yEle <- gsub('[[:punct:]]|[0-9]', '', yEleGrStr)
25 |   yNEle <- gsub('[[:punct:]]|[A-z]', "", yEleGrStr)
26 |   
27 |   yEle <- strsplit(yEle, ' ')
28 |   yNEle <- strsplit(yNEle, ' ')
29 |   # unique elements
30 |   uniEle <- unique(c(unlist(xEle), unlist(yEle)))
31 |   
32 |   constAtoms <- vector('numeric', length(uniEle))
33 |   names(constAtoms) <- uniEle
34 |   remFormulae <- vector('character', length(x))
35 |   # pb <- txtProgressBar(max=length(x), style = 3)
36 |   for(i in 1:length(x)){
37 |     # setTxtProgressBar(pb, i)
38 |     # x formula
39 |     xAts <- constAtoms
40 |     xTmp <- xNEle[[i]]
41 |     xTmp[xTmp == ''] <- 1
42 |     xAts[xEle[[i]]] <- as.numeric(xTmp)
43 |     # y formula
44 |     yAts <- constAtoms
45 |     yTmp <- yNEle[[i]]
46 |     yTmp[yTmp == ''] <- 1
47 |     yAts[yEle[[i]]] <- as.numeric(yTmp)
48 |     # subtract x from y
49 |     remAts <- xAts - yAts
50 |     # remove zeros
51 |     remAts <- remAts[remAts != 0]
52 |     remAts[remAts == 1] <- ''
53 |     # collapse to make new formula
54 |     remFormulae[i] <- paste(paste0(names(remAts), remAts), collapse='')
55 |   }
56 |   return(remFormulae)
57 | } # end function


--------------------------------------------------------------------------------
/R/subStructure.Annotate.R:
--------------------------------------------------------------------------------
 1 | #' composite spectra substructure annotation 
 2 | #' @param Frag_mzabs Absolute mass accuracy difference to identify neutral losses 
 3 | #' and fragments in composite spectra (default = 0.01).
 4 | #' @param SubStrs substructure data frame (default = Substructure_masses)
 5 | #' see ?Substructure_masses for details of the mandatory table fields/ format
 6 | #' @param minRelInt minimum relative intensity to consider a spectral signal
 7 | #' for substructure annotation (default = 5 i.e. 5\% rel. int.).
 8 | #' @export
 9 | setGeneric("subStructure.Annotate", function(object, ...) standardGeneric("subStructure.Annotate"))
10 | 
11 | setMethod("subStructure.Annotate", signature = "compMS2", 
12 |           function(object, Frag_mzabs = 0.01, SubStrs = Substructure_masses, 
13 |                    minRelInt = 5){
14 |   
15 |   # error handling
16 |   if(class(object) != "compMS2"){
17 |     stop("argument object is not an CompMS2 class object")
18 |   } 
19 |    
20 |     if(!all(colnames(SubStrs) %in% colnames(Substructure_masses))){
21 |       stop('column names for the substructure masses data frame supplied do not match the required
22 |            naming structure the required column names are as follows : \n',
23 |            paste0(1:ncol(Substructure_masses), ". ", colnames(Substructure_masses), "\n"))
24 |     }
25 |     # add parameters into object
26 |     Parameters(object)$Frag_mzabs <- Frag_mzabs
27 |     Parameters(object)$minRelInt <- minRelInt
28 |     # mode indx
29 |     mode.indx <- SubStrs[, Parameters(object)$mode] == 1
30 |     # frag indx
31 |     Fragments <- SubStrs[SubStrs$frag == 1 & mode.indx, , drop = FALSE]
32 |     # neut loss indx
33 |     Neutral.losses <- SubStrs[SubStrs$Neut.loss == 1 & mode.indx, , drop = FALSE]
34 |     # obtain MS1 mzs from compMS2 object
35 |     MS1_mzs <- sapply(metaData(object), function(x) unlist(x[grep("MS1_mz", names(x))])[1])
36 |     # comp spectra
37 |     comp_spectra.tmp <- compSpectra(object)
38 |     #    
39 |     #     if(Parameters(object)$nCores > 0){
40 |     #     
41 |     message("matching Precursor to fragment and interfragment neutral losses and fragments in ",
42 |             length(compSpectra(object)), " composite spectra")
43 |     flush.console()
44 |    
45 |     comp_spectra.tmp <- lapply(1:length(comp_spectra.tmp), function(x){
46 |       # add relative intensity
47 |       cSpectrum.tmp <- comp_spectra.tmp[[x]]
48 |       cSpectrum.tmp$Rel_Intensity <- 100 * (cSpectrum.tmp$intensity / max(cSpectrum.tmp$intensity))
49 |       cSpectrum.tmp <- cSpectrum.tmp[order(cSpectrum.tmp$mass), , drop = FALSE]
50 |       # calculate the interfragment and precursor to fragment m/z differences
51 |       cSpectrum.tmp$interfrag.diff <- as.numeric(c(diff(cSpectrum.tmp$mass), 0))
52 |       cSpectrum.tmp$Precursorfrag.diff <- format(as.numeric(MS1_mzs[x]  - cSpectrum.tmp$mass), scientific=FALSE)
53 |       Above.minRelInt <- cSpectrum.tmp$Rel_Intensity > minRelInt
54 |       # identify fragments,  neutral losses and interfragment differences
55 |       fragNLdiff <- t(sapply(c(1:nrow(cSpectrum.tmp)), function(y){
56 |         # indices of frag,  interfrag and NLs
57 |         FragIndx_tmp <- which(as.numeric(cSpectrum.tmp$mass[y]) < Fragments$monoisotopic_mass+Frag_mzabs & as.numeric(cSpectrum.tmp$mass[y]) > Fragments$monoisotopic_mass-Frag_mzabs & Above.minRelInt[y])
58 |         InterFragIndx_tmp <- which(as.numeric(cSpectrum.tmp$interfrag.diff[y]) < Neutral.losses$monoisotopic_mass+Frag_mzabs & as.numeric(cSpectrum.tmp$interfrag.diff[y]) > Neutral.losses$monoisotopic_mass-Frag_mzabs & Above.minRelInt[y])        
59 |         NeutLossIndx_tmp <- which(as.numeric(cSpectrum.tmp$Precursorfrag.diff[y]) < Neutral.losses$monoisotopic_mass+Frag_mzabs & as.numeric(cSpectrum.tmp$Precursorfrag.diff[y]) > Neutral.losses$monoisotopic_mass-Frag_mzabs & Above.minRelInt[y])
60 |         # collapse names and smiles of any matches
61 |         PA_tmp <- c(Frag.ID=paste(ifelse(Fragments[FragIndx_tmp, "name"] == '', 'noID', Fragments[FragIndx_tmp, "name"]), collapse=";"),
62 |                     Frag.ID.SMILES=paste(ifelse(Fragments[FragIndx_tmp, "SMILES"] == '', 'noSMILES', Fragments[FragIndx_tmp, "SMILES"]), collapse=";"), 
63 |                     interfrag.loss=paste(ifelse(Neutral.losses[InterFragIndx_tmp, "name"] == '', 'noID', Neutral.losses[InterFragIndx_tmp, "name"]), collapse=";"),
64 |                     interfrag.loss.SMILES=paste(ifelse(Neutral.losses[InterFragIndx_tmp, "SMILES"] == '', 'noSMILES', Neutral.losses[InterFragIndx_tmp, "SMILES"]), collapse=";"), 
65 |                     Neutral.loss=paste(ifelse(Neutral.losses[NeutLossIndx_tmp, "name"] == '', 'noID', Neutral.losses[NeutLossIndx_tmp, "name"]), collapse=";"), 
66 |                     Neutral.loss.SMILES=paste(ifelse(Neutral.losses[NeutLossIndx_tmp, "SMILES"] == '', 'noSMILES', Neutral.losses[NeutLossIndx_tmp, "SMILES"]), collapse=";"),
67 |                     Frag.ID.type=paste(ifelse(Fragments[FragIndx_tmp, "SubStructure_type"] == '', 'noID', Fragments[FragIndx_tmp, "SubStructure_type"]), collapse=";"), 
68 |                     interfrag.loss.type=paste(ifelse(Neutral.losses[InterFragIndx_tmp, "SubStructure_type"] == '', 'noID', Neutral.losses[InterFragIndx_tmp, "SubStructure_type"]), collapse=";"),
69 |                     Neutral.loss.type=paste(ifelse(Neutral.losses[NeutLossIndx_tmp, "SubStructure_type"] == '', 'noID', Neutral.losses[NeutLossIndx_tmp, "SubStructure_type"]), collapse=";"))
70 |         
71 |         PA_tmp <- gsub(";$", "", PA_tmp)
72 |         return(PA_tmp)}))
73 |       cSpectrum.tmp[, colnames(fragNLdiff)] <- fragNLdiff
74 |       return(cSpectrum.tmp)})
75 |    
76 |     names(comp_spectra.tmp)  <- names(compSpectra(object))
77 |     compSpectra(object) <- comp_spectra.tmp
78 |     return(object)
79 |  }) # end function
80 | 


--------------------------------------------------------------------------------
/R/subStructure.R:
--------------------------------------------------------------------------------
 1 | #' Identify substructures within composite spectra
 2 | #' @param object. a compMS2 class object obtained from the function CompMSset
 3 | #' @param method. "Annotate" annotation of possible substructure neutral losses/
 4 | #' fragments in composite spectra, "prob" identify most probable substructure
 5 | #' identification for a composite spectra and "probSummary" summary of probable
 6 | #' substructure annotations for each composite spectrum.
 7 | #' @param ... option arguments to be passed along.
 8 | #' 
 9 | #' @return A compMS2 object with substructure annotated composite spectra.
10 | #' @seealso \link{subStructure.Annotate}, \link{subStructure.prob}, \link{subStructure.probSummary}
11 | #' @export
12 | setGeneric("subStructure", function(object, ...) standardGeneric("subStructure"))
13 | 
14 | setMethod("subStructure", signature = "compMS2", function(object, method="Annotate", 
15 |                                                     ...) {
16 |   
17 |   method <- match.arg(method, c("Annotate","prob","probSummary"))
18 |   method <- paste("subStructure", method, sep=".")
19 |   invisible(do.call(method, alist(object, ...)))
20 | }) 
21 | 


--------------------------------------------------------------------------------
/R/subStructure.probSummary.R:
--------------------------------------------------------------------------------
 1 | #' summarizes most probable substructure type within all composite spectra
 2 | #' 
 3 | #' @param object a compMS2 class object
 4 | #' @param n number of top substructure types to print.
 5 | #' @param minSumRelInt numeric (default = 30)miminum summed relative intensity to consider a probable
 6 | #' substructure type identification. 
 7 | #' 
 8 | #' @return a named numeric vector of frequency of most probable substructure types
 9 | #' identified. The most highly ranked probable substructure type for each
10 | #' composite spectra is based on the largest summed relative intensity explained
11 | #' by the characteristic substructure neutral losses and fragments.  
12 | #' @export
13 | setGeneric("subStructure.probSummary", function(object, ...) standardGeneric("subStructure.probSummary"))
14 | 
15 | setMethod("subStructure.probSummary", signature = "compMS2", function(object, n = 10, 
16 |                                                      minSumRelInt=30) {
17 |   # error handling
18 |   if(class(object) != "compMS2"){
19 |     stop("argument object is not an CompMS2 class object")
20 |   } else {
21 |   if(nrow(subStrAnno(object)) > 0){
22 |     cat("Substructure annotation summary : \n")
23 |     tmp.df <- subStrAnno(object)
24 |     indx.tmp <- duplicated(tmp.df$compSpecName) == F
25 |     noSubStrDet.indx <- tmp.df$SubStrType != "no substructure detected"
26 |     tmp.df$SumRelInt[noSubStrDet.indx == FALSE] <- 0
27 |     aboveMinSumRI <- as.numeric(tmp.df$SumRelInt) > minSumRelInt
28 |     cat(length(which(indx.tmp==TRUE)), "composite spectra \n")
29 |     SubStr.table <- tmp.df$SubStrType[indx.tmp & noSubStrDet.indx & aboveMinSumRI]
30 |     SubStr.table <- sort(table(SubStr.table), decreasing=TRUE)
31 |     cat(sum(SubStr.table), "substructures identified above minimum sum relative intensity of", 
32 |         minSumRelInt, "\n\n") 
33 |     print(SubStr.table[1:ifelse(n > length(SubStr.table), length(SubStr.table), n)])
34 |     return(SubStr.table)
35 |   } else {
36 |     cat("The \"subStructure.prob\" function has not yet been run")
37 |   }
38 |   }
39 | })  
40 | 


--------------------------------------------------------------------------------
/R/subsetCompMS2.R:
--------------------------------------------------------------------------------
 1 | #' subset compMS2 class object using a vector of spectra names
 2 | #' 
 3 | #' @param object a "compMS2" class object.
 4 | #' @param specNames character vector of composite spectrum names.
 5 | #' @param corrNetworkNodes logical should all the first correlation network 
 6 | #' nodes of the composite spectrum names also be returned? 
 7 | #' (default = FALSE).
 8 | #' @return a "compMS2" class object with the composite spectra and all metID 
 9 | #' information removed. 
10 | #' @export
11 | setGeneric("subsetCompMS2", function(object, ...) standardGeneric("subsetCompMS2"))
12 | 
13 | setMethod("subsetCompMS2", signature = "compMS2", function(object, 
14 |                                                            specNames=NULL, 
15 |                                                            corrNetworkNodes=FALSE, 
16 |                                                            ...){
17 | # error handling
18 |   stopifnot(!is.null(object))
19 |   if(class(object) != "compMS2"){
20 |     stop('argument object must be a "compMS2" class object')
21 |   }
22 |   stopifnot(!is.null(specNames))
23 |   if(!is.character(specNames)){
24 |     stop('argument specNames must be a character vector') 
25 |   }
26 |   if(length(network(object)) > 0){
27 |   if(!require(igraph)){
28 |     stop('The igraph package must be installed to use this function.\n')
29 |   }
30 |   }
31 |   if(corrNetworkNodes == TRUE){
32 |     if(length(network(object)$corrNetworkGraph) == 0){
33 |       stop('The function metID.corrNetwork must be run if all connected nodes should be included.')
34 |     }
35 |   }
36 |   matIndxTmp <- match(specNames, names(compSpectra(object))) 
37 |   if(any(is.na(matIndxTmp))){
38 |   stop('The following composite spectrum names do not match:\n', paste0(specNames[is.na(matIndxTmp)], collapse = '\n'), '\nPlease Check and try again.')  
39 |   }
40 | # network(object) <- list()
41 | 
42 | if(!is.null(network(object)$corrNetworkGraph)){
43 | corrNetTmp <- network(object)$corrNetworkGraph
44 | corrNetIdx <- match(specNames, names(igraph::V(corrNetTmp)))
45 | corrNetIdx <- corrNetIdx[!is.na(corrNetIdx)]
46 | if(corrNetworkNodes){
47 |   # id first neighbours and add to corrNetIdx
48 |   neighSel <- sapply(corrNetIdx, function(x) names(igraph::neighbors(corrNetTmp, x)))
49 |   # add first neighbours to specNames
50 |   specNames <- unique(c(specNames, do.call(c, neighSel)))
51 |   # subset corr network
52 |   corrNetIdx <- match(specNames, names(igraph::V(corrNetTmp)))
53 |   corrNetIdx <- corrNetIdx[!is.na(corrNetIdx)]
54 |   # subset
55 |   corrNetTmp <- igraph::induced_subgraph(corrNetTmp, corrNetIdx)
56 | } 
57 | # subset layout
58 | layoutTmp <- network(object)$corrLayout
59 | corrNetIdx <- match(gsub('.+_', '', names(igraph::V(corrNetTmp))), layoutTmp[, 3])
60 | corrNetIdx <- corrNetIdx[!is.na(corrNetIdx)]
61 | layoutTmp <- layoutTmp[corrNetIdx, , drop=FALSE]
62 | # add back to object
63 | network(object)$corrLayout <- layoutTmp
64 | network(object)$corrNetworkGraph <- corrNetTmp
65 | }
66 |   
67 | compSpectra(object) <- compSpectra(object)[specNames]
68 | metaData(object) <- metaData(object)[specNames]
69 | if(length(DBanno(object)) > 0){
70 |   DBanno(object) <- DBanno(object)[specNames]
71 | }
72 | if(length(BestAnno(object)) > 0){
73 |   BestAnno(object) <- BestAnno(object)[specNames]
74 | }
75 | if(nrow(subStrAnno(object)) > 0){
76 |   subStrAnno(object) <- subStrAnno(object)[subStrAnno(object)$compSpecName %in% specNames, , drop=FALSE]
77 | }
78 | if(length(object@spectralDB) > 0){
79 |   object@spectralDB <- object@spectralDB[specNames]
80 | }
81 | if(length(object@inSilico) > 0){
82 |   if(!is.null(object@inSilico$MetFrag)){
83 |     object@inSilico$MetFrag <- object@inSilico$MetFrag[specNames]  
84 |   }
85 |   if(!is.null(object@inSilico$CFM)){
86 |     object@inSilico$CFM <- object@inSilico$CFM[specNames]  
87 |   }
88 | }
89 | if(nrow(Comments(object)) > 0){
90 |   Comments(object) <- Comments(object)[Comments(object)$compSpectrum %in% specNames, , drop=FALSE]
91 | }
92 |   return(object)
93 | }) # end function
94 | 


--------------------------------------------------------------------------------
/data/HMDB.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/data/HMDB.RData


--------------------------------------------------------------------------------
/data/LMSD.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/data/LMSD.RData


--------------------------------------------------------------------------------
/data/ReSpect.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/data/ReSpect.RData


--------------------------------------------------------------------------------
/data/Substructure_masses.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/data/Substructure_masses.RData


--------------------------------------------------------------------------------
/data/T3DB.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/data/T3DB.RData


--------------------------------------------------------------------------------
/data/compMS2Example.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/data/compMS2Example.RData


--------------------------------------------------------------------------------
/data/drugBank.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/data/drugBank.RData


--------------------------------------------------------------------------------
/data/exactMassEle.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/data/exactMassEle.RData


--------------------------------------------------------------------------------
/data/lipidAbbrev.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/data/lipidAbbrev.RData


--------------------------------------------------------------------------------
/data/metFragAdducts.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/data/metFragAdducts.RData


--------------------------------------------------------------------------------
/data/negESIAdducts.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/data/negESIAdducts.RData


--------------------------------------------------------------------------------
/data/posESIAdducts.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/data/posESIAdducts.RData


--------------------------------------------------------------------------------
/inst/doc/compMS2Miner_Workflow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/inst/doc/compMS2Miner_Workflow.pdf


--------------------------------------------------------------------------------
/inst/extdata/MetFrag2.3-CL.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/inst/extdata/MetFrag2.3-CL.jar


--------------------------------------------------------------------------------
/inst/extdata/fraggraph-gen.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/inst/extdata/fraggraph-gen.exe


--------------------------------------------------------------------------------
/inst/extdata/lpsolve55.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/inst/extdata/lpsolve55.dll


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/compMS2Template/skeleton/compMS2MinerLogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/inst/rmarkdown/templates/compMS2Template/skeleton/compMS2MinerLogo.png


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/compMS2Template/template.yaml:
--------------------------------------------------------------------------------
1 | name: compMS2 workflow template
2 | description: >
3 |   compMS2 workflow template
4 | create_dir: true
5 | 


--------------------------------------------------------------------------------
/inst/shiny-apps/compMS2Explorer/www/DNFanimation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/inst/shiny-apps/compMS2Explorer/www/DNFanimation.gif


--------------------------------------------------------------------------------
/inst/shiny-apps/compMS2Explorer/www/compMS2MinerLogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/inst/shiny-apps/compMS2Explorer/www/compMS2MinerLogo.png


--------------------------------------------------------------------------------
/inst/shiny-apps/compMS2Explorer/www/compMS2MinerLogoExApp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/inst/shiny-apps/compMS2Explorer/www/compMS2MinerLogoExApp.png


--------------------------------------------------------------------------------
/inst/shiny-apps/compMS2Explorer/www/compMS2MinerLogoTutorial.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/inst/shiny-apps/compMS2Explorer/www/compMS2MinerLogoTutorial.png


--------------------------------------------------------------------------------
/inst/shiny-apps/compMS2Explorer/www/dynamicNoiseFilterVideo.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/inst/shiny-apps/compMS2Explorer/www/dynamicNoiseFilterVideo.mp4


--------------------------------------------------------------------------------
/inst/shiny-apps/compMS2Explorer/www/optimConsensusAnimation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/inst/shiny-apps/compMS2Explorer/www/optimConsensusAnimation.gif


--------------------------------------------------------------------------------
/inst/shiny-apps/compMS2Explorer/www/optimConsensusAnimation.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/inst/shiny-apps/compMS2Explorer/www/optimConsensusAnimation.mp4


--------------------------------------------------------------------------------
/inst/shiny-apps/compMS2Explorer/www/screenshotCompMS2Explorer_260_120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/inst/shiny-apps/compMS2Explorer/www/screenshotCompMS2Explorer_260_120.png


--------------------------------------------------------------------------------
/man/MS1MatchSpectra.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MS1MatchSpectra.R
 3 | \name{MS1MatchSpectra}
 4 | \alias{MS1MatchSpectra}
 5 | \title{MS1featureMatch}
 6 | \usage{
 7 | MS1MatchSpectra(metaData = NULL, MS2file = NULL, mz = NULL, RT = NULL,
 8 |   EIC = NULL, adduct = NULL, precursorPpm = 10, ret = 10,
 9 |   adducts = FALSE, isoWid = 4)
10 | }
11 | \arguments{
12 | \item{mz}{mass to charge ratio (from the MS1 peak table).}
13 | 
14 | \item{RT}{retention time in seconds (from the MS1 peak table).}
15 | 
16 | \item{precursorPpm}{parts per million mass accuracy for match (default}
17 | 
18 | \item{ret}{retention time window (+/- seconds) for match}
19 | 
20 | \item{isoWid}{isolation width of ions.}
21 | 
22 | \item{mz.v}{numeric vector of mass to charge ratio (from MS2 precursors)}
23 | 
24 | \item{RT.v}{numeric vector of retention time in seconds (from MS2 precursors)}
25 | }
26 | \value{
27 | returns indices of matches as a list object
28 | }
29 | \description{
30 | match ms1 features defined by mz and Rt to vector of ms2 features
31 | }
32 | 


--------------------------------------------------------------------------------
/man/MS2fileInfo.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MS2fileInfo.R
 3 | \name{MS2fileInfo}
 4 | \alias{MS2fileInfo}
 5 | \title{MS2 file information}
 6 | \usage{
 7 | MS2fileInfo(MS2file = NULL, TICfilter = NULL)
 8 | }
 9 | \arguments{
10 | \item{MS2file}{MS2 file imported into R via the readMzXml package as a list}
11 | }
12 | \value{
13 | data frame number of rows equal to the number of scans and 6 
14 | observations:
15 | 1. "MS.scanType" = MS scan type, 1 or 2.
16 | 2. "precursorMz" = numeric mass-to-charge ratio for each MS2 scan
17 | 3. "retentionTime" = numeric precusor retention time for each MS2 scan
18 | 4. "TIC" = Total ion current for this scan.
19 | 5. "TICaboveFilter" = boolean if TIC above minimum TIC filter equal 1, else 0
20 | 6. "precursorIntensity" = MS2 precursor intensity, if MS1 scan returns
21 | zero
22 | 7. "collisionEnergy" = collision energy (eV)
23 | 8. "basePeakMz" = mass-to-charge ratio of the base peak for the scan
24 | 9. "basePeakIntensity" = intensity of the base peak for the scan 
25 | 10. "precursorScanNum" = precursor scan number (MS2) or scan number (MS1)
26 | }
27 | \description{
28 | Extract precursor mass-to-charge ratio, retention time, scan type and total
29 | ion current for each MS2 file scan.
30 | }
31 | 


--------------------------------------------------------------------------------
/man/PMIDsearch.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PMIDsearch.R
 3 | \name{PMIDsearch}
 4 | \alias{PMIDsearch}
 5 | \title{customized PMID search function adapted from PubChemWordcloud package v 0.3.2}
 6 | \usage{
 7 | PMIDsearch(keys = NULL, n = 1000)
 8 | }
 9 | \arguments{
10 | \item{keys}{character vector of compound names to search pubmed with}
11 | 
12 | \item{n}{numeric maximum number of results to return}
13 | }
14 | \description{
15 | customized PMID search function adapted from PubChemWordcloud package v 0.3.2
16 | }
17 | 


--------------------------------------------------------------------------------
/man/Substructure_masses.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Substructure_masses.R
 3 | \docType{data}
 4 | \name{Substructure_masses}
 5 | \alias{Substructure_masses}
 6 | \title{Collection of mass spectral fragment/ neutral loss/ adduct substructures from 
 7 | literature sources}
 8 | \format{A data frame with 263 rows and 16 variables}
 9 | \source{
10 | 1. Levsen, K. et. al. Structure elucidation of phase II metabolites by 
11 | tandem mass spectrometry: an overview, Journal of Chromatography A, 
12 | Volume 1067, Issues 1-2, 4 March 2005, Pages 55-72, ISSN 0021-9673
13 | \url{http://dx.doi.org/10.1016/j.chroma.2004.08.165}.
14 | 
15 | 2. Even-electron ions: a systematic study of the neutral species lost in the 
16 | dissociation of quasi-molecular ions.
17 | 
18 | 3. Brügger, B. et. al. Quantitative analysis of biological membrane lipids at 
19 | the low picomole level by nano-electrospray ionization tandem mass 
20 | spectrometry. Proc. Natl. Acad. Sci. U.S.A., 94, 2339-2344 (1997).
21 | 
22 | 4. Fouquet, T et. al. Electrospray tandem mass spectrometry combined with 
23 | authentic compound synthesis for structural characterization of an 
24 | octamethylcyclotetrasiloxane plasma polymer.
25 | }
26 | \usage{
27 | data(Substructure_masses)
28 | }
29 | \description{
30 | This dataset contains information on substructures and Electrospray adducts/
31 | artefacts manually curated from literature sources. 
32 | The variables are as follows:
33 | }
34 | \details{
35 | \itemize{
36 |  \item Entry_no.  substructure number (1 -- 268)          
37 |  \item SubStructure. numeric logical (0 and 1) is the entry a fragment/neutral
38 |  loss substructure.       
39 |  \item ESI_adduct.  numeric logical (0 and 1) is the entry an atmospheric 
40 |  pressure/ electrospray adduct.          
41 |  \item SubStructure_type. parent substructure type (e.g. sulfate, glucuronide) 
42 |  \item Abbrev_name. abbreviated name of substructure type within square 
43 |  brackets. This abbreviated version can be displayed more easily within a 
44 |  results table for instance (e.g. [PC_184]).       
45 |  \item name. Full descriptive name of the neutral loss/ fragment/ electrospray
46 |  adduct within a square bracket (e.g. [NAcCysteine Acetamide]).              
47 |  \item IUPAC. IUPAC name for fragment/ neutral loss.               
48 |  \item SMILES. Canonical SMILES code of fragment/ neutral losses.             
49 |  \item molecular_formula. Molecular formula of fragment/ neutral losses.           
50 |  \item monoisotopic_weight. Monoisotopic weight of fragment/ neutral losses.
51 |  (0.9840156 -- 388.0853100)
52 |  \item Neut_loss. numeric logical (0 and 1) is the entry a neutral loss.             
53 |  \item frag. numeric logical (0 and 1) is the entry a fragment.               
54 |  \item pos. numeric logical (0 and 1) is the entry found in positive mode.                 
55 |  \item neg. numeric logical (0 and 1) is the entry found in negative mode.                
56 |  \item mass_shift. the expected monoisotopic mass shift associated with this
57 |  substructure/ adduct type. (17.02655 -- 360.12678)   
58 |  \item ref. the literature reference for the entry.
59 |  }
60 | }
61 | \keyword{datasets}
62 | 


--------------------------------------------------------------------------------
/man/addNoMS2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/addNoMS2.R
 3 | \name{addNoMS2}
 4 | \alias{addNoMS2}
 5 | \title{add no MS2 data to compMS2 class object internal to corrNetwork function}
 6 | \usage{
 7 | addNoMS2(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a "compMS2" class object.}
11 | 
12 | \item{specNames}{character vector of composite spectrum names.}
13 | 
14 | \item{eicMzRt}{data.frame of EICnos/unique id, mz values, rt values and (if applicable)
15 | ESI adducts/in-source fragments in 4 columns.}
16 | }
17 | \value{
18 | a "compMS2" class object with noMS2 data added to the appropriate slots.
19 | }
20 | \description{
21 | add no MS2 data to compMS2 class object internal to corrNetwork function
22 | }
23 | 


--------------------------------------------------------------------------------
/man/adduct2mass.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/adduct2mass.R
 3 | \name{adduct2mass}
 4 | \alias{adduct2mass}
 5 | \title{converts ESI adduct names into table of monoisotopic masses}
 6 | \source{
 7 | element monoisotopic masses and natural abundances taken from \url{http://www.sisweb.com/referenc/source/exactmas.htm}, see ?exactMassEle.
 8 | }
 9 | \usage{
10 | adduct2mass(adductNames = c("[M-H]-", "[2M+2CH3OH]2-", "[M-H+C2H4O2+Na]-"))
11 | }
12 | \arguments{
13 | \item{adductNames}{character vector of ESI adduct names in a specific form for example see default names c('[M-H]-', '[2M+2CH3OH]2-', '[M-H+C2H4O2+Na]-'). The 
14 | function was developed and tested against the 128 and 133 different ESI 
15 | adducts and in-source fragment names for accuracy.}
16 | }
17 | \value{
18 | data.frame of n rows corresponding to each adduct name and 5 columns:
19 | \enumerate{
20 | \item "name" adduct name    
21 | \item "nmol" number of molecules (e.g. 2M-H = 2, M-H = 1)   
22 | \item "Ch" charge state      
23 | \item "massDiff" summed monoisotopic mass difference.
24 | \item "mode" ion polarity based on adduct name (e.g. "]-" = 'neg", and "]+" = 'pos")
25 | }
26 | }
27 | \description{
28 | converts ESI adduct names into table of monoisotopic masses
29 | }
30 | \details{
31 | this function can be used to generate a table of ESI adducts for the
32 | \code{\link{metID.dbAnnotate}} function.
33 | }
34 | \examples{
35 | massDiffs <- adduct2mass(c('[M-H]-', '[2M+2CH3OH]2-', '[M-H+C2H4O2+Na]-'))
36 | }
37 | \references{
38 | \enumerate{
39 | \item Stanstrup, J., Gerlich, M., Dragsted, L.O. et al. 
40 | Anal Bioanal Chem (2013) 405: 5037. doi:10.1007/s00216-013-6954-6
41 | }
42 | }
43 | 


--------------------------------------------------------------------------------
/man/annoCompoundClass.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/annoCompoundClass.R
 3 | \name{annoCompoundClass}
 4 | \alias{annoCompoundClass}
 5 | \title{automatically annotate compound classes comments table}
 6 | \usage{
 7 | annoCompoundClass(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a compMS2 class object.}
11 | 
12 | \item{overWrite}{logical should any existing compound_class types be overwritten in the
13 | comments table (default = FALSE).}
14 | 
15 | \item{minSimScore}{numeric mimimum chemical similarity score (default = 0.8),
16 | all chemical fingerprints with similarity above this score to entries of HMDB,
17 | drugBank and T3DB will be considered for the compound class. The highest 
18 | chemical similarity score above this threshold will be considered as the 
19 | predicted compounds class. In the event of multiple tied maximum chemical
20 | similarity scores the most frequent compound class will added to the comments
21 | table.}
22 | }
23 | \value{
24 | the compMS2 class object with compound classes (currently only phospholipids supported) contained in
25 | data(lipidAbbrev) annotated and interpreted lipid tail information added to the Comments table
26 | accessible by Comments(object).
27 | 
28 | a compMS2 class object with the compound class of the identified
29 | compound added to the metID comments table. Comments(object). Additionally
30 | a barplot is generated summarizing the identified compound class information
31 | returned.
32 | }
33 | \description{
34 | automatically annotate compound classes comments table
35 | }
36 | \details{
37 | This function attempts to automatically add chemical taxonomy information
38 | to the metID comments table accessible using Comments(object). Pre-existing
39 | compound class information can be overwritten. Additionally a predicted
40 | compound class is added for any compound not contained in HMDB, drugBank or
41 | T3DB above a minimum chemical similarity score.
42 | }
43 | 


--------------------------------------------------------------------------------
/man/cfmFragGraphGen.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cfmFragGraphGen.R
 3 | \name{cfmFragGraphGen}
 4 | \alias{cfmFragGraphGen}
 5 | \title{cfm fragment graph generation from table of annotations}
 6 | \usage{
 7 | cfmFragGraphGen(bestAnnoSubRow = NULL, fragGraphGenExe = NULL,
 8 |   compSpecAll = NULL, keepTempFiles = FALSE, mode = "pos",
 9 |   frag_mzabs = 0.05)
10 | }
11 | \arguments{
12 | \item{bestAnnoSubRow}{unique compMS2@BestAnno entries (only M-H (neg mode) and M+H (pos mode) can be in silico fragmented by CFM and no SubStr_types).}
13 | 
14 | \item{fragGraphGenExe}{character full path to fraggraph-gen.exe file (internal to compMS2Miner package).}
15 | 
16 | \item{compSpecAll}{data.frame 3 columns mass, intensity and comp spectrum index number.}
17 | 
18 | \item{mode}{character ionization polarity (either 'pos' or 'neg').}
19 | 
20 | \item{frag_mzabs}{numeric delta predicted-observed fragment mass accuracy for matching.}
21 | }
22 | \value{
23 | if fraggraph-gen process completed then a list of fragments matched to corresponding composite spectra are return
24 | }
25 | \description{
26 | cfm fragment graph generation from table of annotations
27 | }
28 | 


--------------------------------------------------------------------------------
/man/cleanAbs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cleanAbs.R
 3 | \name{cleanAbs}
 4 | \alias{cleanAbs}
 5 | \title{Adapted from PubMedWordCloud (cleanAbstracts) to work with compMS2Miner}
 6 | \usage{
 7 | cleanAbs(Abs, rmNum = TRUE, tolw = TRUE, toup = FALSE, rmWords = TRUE,
 8 |   yrWords = NULL, stemDoc = FALSE)
 9 | }
10 | \arguments{
11 | \item{Abs}{output of getAbs, or just a paragraph of text}
12 | }
13 | \description{
14 | Adapted from PubMedWordCloud (cleanAbstracts) to work with compMS2Miner
15 | }
16 | 


--------------------------------------------------------------------------------
/man/combineMS2.Ions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/combineMS2.Ions.R
 3 | \name{combineMS2.Ions}
 4 | \alias{combineMS2.Ions}
 5 | \title{Combine spectra peaks within individual spectra}
 6 | \usage{
 7 | combineMS2.Ions(object, ...)
 8 | }
 9 | \arguments{
10 | \item{...}{option arguments to be passed along.}
11 | 
12 | \item{mzError}{interpeak absolute m/z error for spectra signal grouping (default = 0.01).}
13 | 
14 | \item{minPeaks}{Minimum number of peaks per spectrum (default = 1).}
15 | 
16 | \item{verbose}{logical if TRUE display progress bars.}
17 | }
18 | \value{
19 | A compMS2 object with ion grouped composite spectra.
20 | }
21 | \description{
22 | Combine spectra peaks within individual spectra
23 | }
24 | \details{
25 | group ions according to absolute m/z error. 
26 | The default parameters are suitable for a high-resolution Q-ToF.
27 | Following ion grouping, signal intensities are summed and an average m/z 
28 | calculated for each ion group. This signal summing serves to increase the
29 | overall intensity of true ion signal across multiple scans and reduce the 
30 | contribution of noise within the spectrum. Calculation of the central tendency
31 | of each ion group serve to homogenize the random error and improve the mass
32 | accuracy of each spectrum peak.
33 | }
34 | 


--------------------------------------------------------------------------------
/man/combineMS2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/combineMS2.R
 3 | \name{combineMS2}
 4 | \alias{combineMS2}
 5 | \title{combineMS2 ions either within and/ or between composite spectra}
 6 | \usage{
 7 | combineMS2(object, ...)
 8 | }
 9 | \arguments{
10 | \item{...}{option arguments to be passed along.}
11 | 
12 | \item{object.}{a compMS2 class object obtained from the function CompMSset}
13 | 
14 | \item{method.}{"Ions" intra-spectrum ions grouping or "Spectra" inter-spectra
15 | ion grouping, composite spectra from multiple MS2 files matched to the same
16 | MS1 feature are matched. The method "removeContam" will remove possible 
17 | contaminants defined as sequences of isobars with sufficient spectral similarity
18 | and seperated by a maximum retention time gap. This function can be used
19 | to identify and remove before or after metabolite identification methods.#'}
20 | }
21 | \value{
22 | A compMS2 object with intra-composite spectrum or inter-composite 
23 | spectra, grouped, signal summed and group mass-to-charge averaged spectra.
24 | }
25 | \description{
26 | combineMS2 ions either within and/ or between composite spectra
27 | }
28 | \seealso{
29 | \code{\link{combineMS2.Ions}}, \code{\link{combineMS2.Spectra}},
30 | \code{\link{combineMS2.removeContam}}.
31 | }
32 | 


--------------------------------------------------------------------------------
/man/combineMS2.Spectra.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/combineMS2.Spectra.R
 3 | \name{combineMS2.Spectra}
 4 | \alias{combineMS2.Spectra}
 5 | \title{combine ions across spectra matched to the same MS1 feature}
 6 | \usage{
 7 | combineMS2.Spectra(object, ...)
 8 | }
 9 | \arguments{
10 | \item{mzError}{interpeak absolute m/z error for composite spectra signal grouping (default = 0.01)}
11 | 
12 | \item{minPeaks}{Minimum number of peaks per composite spectrum (default = 1)}
13 | 
14 | \item{specSimFilter}{numeric minimum spectral similarity score (dot product score) between spectra matched to the same MS1 feature (values between 0 to 1) if argument is supplied spectra are only combined if they have a minimum spectral similarity score (default = NULL). If all spectra matched to an MS1 feature are dissimilar from one another the MS2 spectrum with the highest precursor intensity will be returned.}
15 | 
16 | \item{binSizeMS2}{numeric MS2 bin size for spectral similarity matching (default = 0.1)}
17 | 
18 | \item{verbose}{logical if TRUE display progress bars.}
19 | }
20 | \description{
21 | combine ions across spectra matched to the same MS1 feature
22 | }
23 | 


--------------------------------------------------------------------------------
/man/combineMS2.removeContam.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/combineMS2.removeContam.R
 3 | \name{combineMS2.removeContam}
 4 | \alias{combineMS2.removeContam}
 5 | \title{remove any possible contaminants identified as repeating sequences of isobaric ions of 
 6 | high spectral similarity.}
 7 | \usage{
 8 | combineMS2.removeContam(object, ...)
 9 | }
10 | \arguments{
11 | \item{object}{a "compMS2" class object.}
12 | 
13 | \item{ms1Abs}{numeric ms1 mass to charge absolute error for hierarchical 
14 | clustering of MS1 masses (default = 0.01). Utilizes the median method of the
15 | \code{\link{hclust}} function of the fastcluster package.}
16 | 
17 | \item{maxRtGap}{numeric maximum retention time gap (in seconds) between two 
18 | isobaric ions. Sequences of possible contaminants will be identified using this
19 | difference in retention time (default = 60).}
20 | 
21 | \item{nContams}{numeric number of isobaric ions in a sequence separated by a maximum
22 | retention time gap (maxRtGap). Any sequences of isobaric ions greater than or equal 
23 | to this number will be removed (default = 10).}
24 | 
25 | \item{minSimScore}{numeric minimum spectral similarity score (values between 0-1).
26 | If any isobar in a sequence of possible contaminants is below this minimum 
27 | mean dot product similarity score
28 | then it will not be removed. This is to ensure that only true isobaric contaminants
29 | are removed and spectra which have been grouped amongst them are not erroneously
30 | removed (default = 0.8).}
31 | 
32 | \item{remContam}{logical should possible contaminant spectra be automatically
33 | removed from the object (default = TRUE), If FALSE the contaminant plot
34 | will still be printed but the spectra will not be removed. In this way the user
35 | can interactively determine suitable parameters prior to spectrum removal.}
36 | }
37 | \value{
38 | a "compMS2" class object with the composite spectra and all metID 
39 | information of any contaminants identified removed. Any correlation or spectral
40 | similarity networks will also have to be recalculated.
41 | }
42 | \description{
43 | remove any possible contaminants identified as repeating sequences of isobaric ions of 
44 | high spectral similarity.
45 | }
46 | \details{
47 | The function produces plots to visualize the contaminants identified.
48 | If you suspect all isobaric ions across the gradient are contaminants then
49 | you can set the argument maxRtGap to Infinite (maxRtGap=Inf). N.B. Any spectrum which is not sufficiently similar (specSimScore) to the rest
50 | of the potential contaminants across the gradient will not be erroneously removed.
51 | }
52 | \examples{
53 | compMS2contamRem <- combineMS2(compMS2Example, 'removeContam', maxRtGap=Inf, 
54 |                                nContams=4)
55 | }
56 | 


--------------------------------------------------------------------------------
/man/compMS2Construct.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compMS2Construct.R
 3 | \name{compMS2Construct}
 4 | \alias{compMS2Construct}
 5 | \title{Constructor for compMS2 class object from a peak table and MS2 mzXML/mzML/.mgf
 6 | file(s)}
 7 | \usage{
 8 | compMS2Construct(MS1features = NULL, msDataDir = NULL, MS2files = NULL,
 9 |   nCores = NULL, argsCorrNetwork = list(obsNames = NULL, corrThresh = 0.6,
10 |   corrMethod = "spearman", delta = 0.05, MTC = "BH"), mode = "pos",
11 |   precursorPpm = 10, ret = 10, TICfilter = 10000, minPeaks = 1,
12 |   isoWid = 4, verbose = TRUE)
13 | }
14 | \arguments{
15 | \item{MS1features}{either a data.frame, full file path as a character string to a  .csv file of a MS1 feature table in the form observation (samples) in columns and
16 | variables (Mass spectral signals) in rows, the first 3 columns must consist of:
17 | \enumerate{
18 |  \item EIC number or unique peak identifier.
19 |  \item mass-to-charge ratio of peak group.
20 |  \item median/ peak apex retention time in seconds. 
21 |  }
22 |  
23 |  optionally the 4th column of the MS1feature table may contain any adducts
24 |  and isotopes identified by for example the CAMERA R package.
25 |  If this column is present the adducts will be incorporated in to the compMS2
26 |  class object and used to guide the subsequent \code{\link{metID.dbAnnotate}}
27 |  function. This can be very useful for narrowing possible annotations in
28 |  subsequent stages of the compMS2miner workflow and particularly in reduction
29 |  of false positives annotations. The adduct annotations must
30 |  consist of the following notation style for example [M-H]-, [2M+2CH3OH]2-, 
31 |  [M-H+C2H4O2+Na]-. Abbreviations such as Hac (CH3COOH) for acetic acid 
32 |  and ACN (i.e. C2H3N) for acetonitrile 
33 |  are not acceptable formulae must be used to determine the correct
34 |  elemental composition must be included. As is typical of the output of
35 |  CAMERA for example multiple possible adducts can appear for the same feature
36 |  where they have shared/similar expected masses.
37 |    
38 |  If argument is not supplied a GUI (tcltk) file selection window will open and a .csv file can then be selected.}
39 | 
40 | \item{msDataDir}{character full path to a directory containing LC-MS/MS data files
41 | in either the open framework .mzXML or newer .mzML file types also mascot generic format files (.mgf). If argument is
42 | not supplied a GUI (tcltk) file selection window will open and the directory 
43 | can be selected.}
44 | 
45 | \item{MS2files}{character vector of full paths to ms2 files (either .mzML, .mzXML or .mgf).
46 | Alternative to choosing the directory. In this way particular files within a directory
47 | or files from multiple directory locations can be specified.}
48 | 
49 | \item{nCores}{numeric Number of cores for parallel computation.}
50 | 
51 | \item{mode}{character Ionisation polarity must be either 'pos' or 'neg'.}
52 | 
53 | \item{precursorPpm}{numeric Parts per million mass accuracy to match MS1 features to MS2 spectra (ppm)}
54 | 
55 | \item{ret}{numeric retention time tolerance to match MS1 features to MS2 spectra (+/- seconds).}
56 | 
57 | \item{TICfilter}{numeric Minimum Total Ion Current to consider an MS2 spectrum. Any MS2 scan
58 | below this threshold will not be considered.}
59 | 
60 | \item{isoWid}{numeric isolation width of DDA precursor ions, utilized to identify
61 | potentially chimeric spectra.}
62 | 
63 | \item{verbose}{logical if TRUE display progress bars.}
64 | 
65 | \item{minPeaks.}{minimum number of fragment ions for a spectrum to be 
66 | considered (default = 1).}
67 | }
68 | \value{
69 | A compMS2 object
70 | }
71 | \description{
72 | Matches MS1 features to MS2 spectra (.mzXML/.mzML/.mgf) files 
73 | based on a mass-to-charge and retention time tolerance. Composite spectra 
74 | and other data can subsequently be visualized during any stage of the compMS2Miner
75 | processing workflow. Composite spectra can be denoised, ion signals grouped 
76 | and summed, substructure groups identified, common Phase II metabolites
77 | predicted and features matched to data bases monoisotopic mass data 
78 | and insilico MS2 fragmentation data.
79 | The resulting data can then be readily curated by sending to a local or online
80 | couchDB database.
81 | }
82 | 


--------------------------------------------------------------------------------
/man/compMS2Create.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compMS2Create.R
 3 | \name{compMS2Create}
 4 | \alias{compMS2Create}
 5 | \title{create a compMS2 object}
 6 | \usage{
 7 | compMS2Create(MS2file = NULL, MS1features = NULL, TICfilter = 10000,
 8 |   precursorPpm = 10, ret = 10, adducts = FALSE, isoWid = 4)
 9 | }
10 | \arguments{
11 | \item{MSfiles}{character vector of mzXML file locations}
12 | }
13 | \description{
14 | create a compMS2 object
15 | }
16 | 


--------------------------------------------------------------------------------
/man/compMS2Explorer.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compMS2Explorer.R
 3 | \name{compMS2Explorer}
 4 | \alias{compMS2Explorer}
 5 | \title{Visualize your compMS2Miner results output using a shiny app.}
 6 | \usage{
 7 | compMS2Explorer(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a compMS2 class object or a character full path to a compMS2Miner zip file}
11 | 
12 | \item{browserLaunch}{logical launch app in web browser (default = TRUE).}
13 | }
14 | \description{
15 | Visualize your compMS2Miner results output using a shiny app.
16 | }
17 | 


--------------------------------------------------------------------------------
/man/compMS2Miner.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compMS2Miner.R
 3 | \docType{package}
 4 | \name{compMS2Miner}
 5 | \alias{compMS2Miner}
 6 | \alias{compMS2Miner-package}
 7 | \title{compMS2Miner: a package to identify/ visualize unknowns in metabolomic datasets based on MS2 fragmentation data.}
 8 | \description{
 9 | Matches MS1 features to MS2 spectra (.mzXML) files based on a 
10 | mass-to-charge and retention time tolerance. Composite spectra and other data
11 | can subsequently be visualized during any stage of the compMS2Miner
12 | processing workflow. Composite spectra can be denoised, ion signals grouped 
13 | and summed, substructure groups identified, common Phase II metabolites
14 | predicted and features matched to data bases monoisotopic mass data 
15 | and insilico MS2 fragmentation data.
16 | The resulting data can then be readily curated by sending to a local or online
17 | couchDB database.
18 | }
19 | \details{
20 | An example workflow is available in the following vignette:
21 | compMS2MinerWorkFlow (source, pdf)
22 | }
23 | \author{
24 | WMB Edmands \url{edmandsw@berkeley.edu}
25 | }
26 | 


--------------------------------------------------------------------------------
/man/couchDBcurate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/couchDBcurate.R
 3 | \name{couchDBcurate}
 4 | \alias{couchDBcurate}
 5 | \title{CouchDB login
 6 | send CompMS2 data set to couchDB}
 7 | \usage{
 8 | couchDBcurate(object, ...)
 9 | }
10 | \arguments{
11 | \item{couchDBname}{New or existing CouchDB database name (must be all lower case,  can contain underscores)}
12 | }
13 | \value{
14 | CouchDB records : All Results from the current stage of the compMS2Miner
15 |  are sent to the already established/ newly created couchDB database. 
16 |  The following documents are sent to couchDB :
17 | }
18 | \description{
19 | CouchDB login
20 | send CompMS2 data set to couchDB
21 | }
22 | 


--------------------------------------------------------------------------------
/man/couchDBpingReq.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/couchDBpingReq.R
 3 | \name{couchDBpingReq}
 4 | \alias{couchDBpingReq}
 5 | \title{CouchDBpingReq
 6 | send ping to couch db using log in credentials from GetLoginDetails or
 7 | named character vector}
 8 | \usage{
 9 | couchDBpingReq(credentials)
10 | }
11 | \arguments{
12 | \item{credentials}{either output from GetLoginDetails or named character vector "host", "Username", "Password" containing user Login parameters (e.g. c( host = "localhost", Username = "", Password = ""))}
13 | }
14 | \value{
15 | ping request result
16 | }
17 | \description{
18 | CouchDBpingReq
19 | send ping to couch db using log in credentials from GetLoginDetails or
20 | named character vector
21 | }
22 | 


--------------------------------------------------------------------------------
/man/deconvNoise.DNF.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deconvNoise.DNF.R
 3 | \name{deconvNoise.DNF}
 4 | \alias{deconvNoise.DNF}
 5 | \title{Spectral noise filtration using dynamic noise filter}
 6 | \source{
 7 | Xu H. and Frietas M. "A Dynamic Noise Level Algorithm for Spectral 
 8 | Screening of Peptide MS/MS Spectra" 2010 BMC Bioinformatics.
 9 | }
10 | \usage{
11 | deconvNoise.DNF(object, ...)
12 | }
13 | \arguments{
14 | \item{DNF.}{numeric dynamic noise filter minimum signal to noise threshold 
15 | (default = 2), calculated as the ratio between the linear model predicted 
16 | intensity value and the actual intensity.}
17 | 
18 | \item{minPeaks.}{integer minimum number of signal peaks following dynamic 
19 | noise filtration (default = 1).}
20 | 
21 | \item{maxPeaks.}{integer maximum number of signal peaks the function will continue
22 | until both the minimum DNF signal to noise ratio is exceeding and the number
23 | of peaks is lower than the maximum (default = 60).}
24 | 
25 | \item{minInt.}{numeric minimum intensity to commence the dynamic noise filter
26 | algorithm. Low values will increase computation time and increase the chance
27 | that the DNF algorithm will terminate prematurely (default = 250).}
28 | 
29 | \item{verbose}{logical if TRUE display progress bars.}
30 | }
31 | \value{
32 | noise filtered MS2 spectra.
33 | }
34 | \description{
35 | uses the dynamic noise filtration algorithm adapted from the method described 
36 | in Xu H. and Frietas M. "A Dynamic Noise Level Algorithm for Spectral 
37 | Screening of Peptide MS/MS Spectra" 2010 BMC Bioinformatics.
38 | }
39 | 


--------------------------------------------------------------------------------
/man/deconvNoise.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deconvNoise.R
 3 | \name{deconvNoise}
 4 | \alias{deconvNoise}
 5 | \title{Filter spectral noise from a CompMS2 class object}
 6 | \usage{
 7 | deconvNoise(object, ...)
 8 | }
 9 | \arguments{
10 | \item{...}{option arguments to be passed along.}
11 | 
12 | \item{object.}{a compMS2 class object obtained from the function CompMSset}
13 | 
14 | \item{method.}{dynamic noise file "DNF" or fixed maximum intensity "maxInt"}
15 | }
16 | \value{
17 | A compMS2 object with noise filtered composite spectra.
18 | }
19 | \description{
20 | Filter spectral noise from a CompMS2 class object
21 | }
22 | \seealso{
23 | \code{\link{deconvNoise.DNF}}, \code{\link{deconvNoise.maxInt}}
24 | }
25 | 


--------------------------------------------------------------------------------
/man/dotProdMatrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dotProdMatrix.R
 3 | \name{dotProdMatrix}
 4 | \alias{dotProdMatrix}
 5 | \title{dot product matrix calculation}
 6 | \usage{
 7 | dotProdMatrix(allSpectra = NULL, spectraNames = NULL, binSizeMS2 = NULL)
 8 | }
 9 | \arguments{
10 | \item{allSpectra}{a numeric matrix consisting of two columns 1. mass and 2. intensity}
11 | 
12 | \item{spectraNames}{character names of individual spectra to compare must equal number of rows of allSpectra}
13 | 
14 | \item{binSizeMS2}{numeric the MS2 bin size to bin MS2 data prior to dot product calculation (default = 0.1 Da).}
15 | }
16 | \value{
17 | a matrix of equal dimension corresponding to the number of unique spectrum names
18 | }
19 | \description{
20 | dot product matrix calculation
21 | }
22 | 


--------------------------------------------------------------------------------
/man/dynamicNoiseFilter.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dynamicNoiseFilter.R
 3 | \name{dynamicNoiseFilter}
 4 | \alias{dynamicNoiseFilter}
 5 | \title{Dynamic Noise filtration}
 6 | \usage{
 7 | dynamicNoiseFilter(spectrum.df = NULL, DNF = 2, minPeaks = 5,
 8 |   maxPeaks = 20, minInt = 100)
 9 | }
10 | \arguments{
11 | \item{spectrum.df}{a dataframe or matrix with two columns:
12 | 1. Mass/ Mass-to-charge ratio
13 | 2. Intensity}
14 | 
15 | \item{DNF}{dynamic noise filter minimum signal to noise threshold 
16 | (default = 2), calculated as the ratio between the linear model predicted 
17 | intensity value and the actual intensity.}
18 | 
19 | \item{minPeaks}{minimum number of signal peaks following dynamic 
20 | noise filtration (default = 5).}
21 | 
22 | \item{maxPeaks}{maximum number of signal peaks the function will continue
23 | until both the minimum DNF signal to noise ratio is exceeding and the number
24 | of peaks is lower than the maximum (default = 5).}
25 | }
26 | \value{
27 | a list containing 3 objects:
28 | \enumerate{
29 | \item Above.noise The dynamic noise filtered matrix/ dataframe 
30 | \item metaData a dataframe with the following column names:
31 |        1. Noise.level the noise level determined by the dynamic noise filter 
32 |           function.
33 |        2. IntCompSpec Total intensity composite spectrum.
34 |        3. TotalIntSNR Sparse ion signal to noise ratio 
35 |        (mean intensity/ stdev intensity)
36 |        4. nPeaks number of peaks in composite spectrum
37 | \item aboveMinPeaks Logical are the number of signals above the minimum level}
38 | }
39 | \description{
40 | Dynamic Noise filtration
41 | }
42 | \details{
43 | Dynamic noise filter adapted from the method described in Xu H. and 
44 | Frietas M. "A Dynamic Noise Level Algorithm for Spectral Screening of 
45 | Peptide MS/MS Spectra" 2010 BMC Bioinformatics. 
46 | 
47 | The function iteratively calculates linear models starting from 
48 | the median value of the lower half of all intensities in the spectrum.df. 
49 | The linear model is used to predict the next peak intensity and ratio is 
50 | calculated between the predicted and actual intensity value. 
51 | 
52 | Assuming that all preceeding intensities included in the linear model 
53 | are noise, the signal to noise ratio between the predicted and actual values 
54 | should exceed the minimum signal to noise ratio (default DNF = 2). 
55 | 
56 | The function continues until either the DNF value minimum has been exceeded 
57 | and is also below the maxPeaks or maximum number of peaks value. As the 
58 | function must necessarily calculate potentially hundreds of linear models the 
59 | RcppEigen package is used to increase the speed of computation.
60 | }
61 | 


--------------------------------------------------------------------------------
/man/exactMassEle.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/exactMassEle.R
 3 | \docType{data}
 4 | \name{exactMassEle}
 5 | \alias{exactMassEle}
 6 | \title{elemental monoisotopic masses and natural abundances}
 7 | \format{A data frame with 83 rows and 4 columns}
 8 | \source{
 9 | \url{http://www.sisweb.com/referenc/source/exactmas.htm}
10 | }
11 | \usage{
12 | data(exactMassEle)
13 | }
14 | \description{
15 | This dataset contains data taken from the table available at (\url{http://www.sisweb.com/referenc/source/exactmas.htm})
16 | The variables are as follows:
17 | }
18 | \details{
19 | \itemize{
20 |  \item eleName full element name (e.g. sodium).
21 |  \item eleSymbol element symbol (e.g. Na).
22 |  \item monoMass character of isotope mass(es) seperated by a space.
23 |  \item natAbund character of natural isotope abundance(s) seperated by a space.
24 |  }
25 | }
26 | \keyword{datasets}
27 | 


--------------------------------------------------------------------------------
/man/example_mzXML_MS1features.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/example_mzXML_MS1features.R
 3 | \docType{data}
 4 | \name{example_mzXML_MS1features}
 5 | \alias{example_mzXML_MS1features}
 6 | \title{example mzXML files and MS1 feature table (subset to 820 -- 940 seconds)}
 7 | \format{A comma delimited text file with 3720 rows and 15 variables and two
 8 | data-dependent MS2 files in centroid mode converted to the mzXML open format 
 9 | using MSConvert software (ProteoWizard 3.0.6965 64 bit) for each extraction 
10 | type.
11 | Data were acquired on an Agilent 6550 q-tof interfaced with a nano-flow chip 
12 | cube running a small molecule C18-chip.}
13 | \description{
14 | MS1features_example.csv 3720 MS1 features from XCMS diffreport peak table
15 | from a study comparing repeat extractions of human dried blood spot samples
16 | using 80\% acetonitrile (ACN) to 80\% methanol (MeOH).
17 | Both extraction solvents consist of repeat preparations of the same 
18 | sample (A, B, C) and repeat injections (1, 2) of each preparation 
19 | (i.e. A1, A2, B1, B2, C1, C2).
20 | The variables are as follows :
21 | \itemize{
22 |  \item EICno XCMS extracted ion chromatograms from XCMS peak tables 
23 |  (62 -- 24328).
24 |  \item mzmed median mass-to-charge (71.0853 -- 999.6138) 
25 |  \item rtmed retention time in seconds (820.01 -- 939.907)
26 |  \item ACN_80_A1 80% acetonitrile extract peak areas prep. replicate A inj. 1
27 |  \item ACN_80_A2	80% acetonitrile extract peak areas prep. replicate A inj. 2
28 |  \item ACN_80_B1	80% acetonitrile extract peak areas prep. replicate B inj. 1
29 |  \item ACN_80_B2	80% acetonitrile extract peak areas prep. replicate B inj. 2
30 |  \item ACN_80_C1	80% acetonitrile extract peak areas prep. replicate C inj. 1
31 |  \item ACN_80_C2	80% acetonitrile extract peak areas prep. replicate C inj. 2
32 |  \item MeOH_80_A1 80% methanol extract peak areas prep. replicate A inj. 1	
33 |  \item MeOH_80_A2	80% methanol extract peak areas prep. replicate A inj. 2
34 |  \item MeOH_80_B1	80% methanol extract peak areas prep. replicate B inj. 1
35 |  \item MeOH_80_B2	80% methanol extract peak areas prep. replicate B inj. 2
36 |  \item MeOH_80_C1	80% methanol extract peak areas prep. replicate C inj. 1
37 |  \item MeOH_80_C2 80% methanol extract peak areas prep. replicate C inj. 2
38 |  }
39 | }
40 | \keyword{datasets}
41 | 


--------------------------------------------------------------------------------
/man/falsePosIdentify.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/falsePosIdentify.R
 3 | \name{falsePosIdentify}
 4 | \alias{falsePosIdentify}
 5 | \title{Identify and/or remove false positives from metID comments table based on a prohibited
 6 | keyword search}
 7 | \usage{
 8 | falsePosIdentify(object, ...)
 9 | }
10 | \arguments{
11 | \item{object}{a "compMS2" class object.}
12 | 
13 | \item{prohibKeyWords}{character a regular expression of prohibited keywords.
14 | If any of these are discovered in any of the abstracts returned from the
15 | PubMed Entrez search then they will be removed from the metID comments table.}
16 | 
17 | \item{n}{integer number of pubmedids to return per metID comments possible_identity.
18 | (default = 50). Larger numbers may improve accuracy but increase computation time.
19 | The maximum allowed value is 500 (limited by entrez system).}
20 | 
21 | \item{meanFreqPerAbs}{numeric minimum mean frequency of the summed frequencies
22 | of the prohibited keywords. Default = 0.6 that is a mean summed frequency of 
23 | any of the key words in the regular expression of 0.6 for it to be
24 | considered. This limits the accidental removals of a true positive which
25 | has a low word count frequency by chance.}
26 | 
27 | \item{removeFP}{logical if TRUE remove possible false positive annotations
28 | from the possible_identity column of the metID comments table. If FALSE (default) the
29 | possible false positive annotation names will remain but they will be
30 | flagged in the user_comments column of the metID comments table as 
31 | "possible false positive (falsePosRemoval)".}
32 | 
33 | \item{maxChar}{numeric maximum number of characters in cleaned abstract words to return.}
34 | 
35 | \item{verbose}{logical if TRUE display progress bars.}
36 | }
37 | \description{
38 | Identify and/or remove false positives from metID comments table based on a prohibited
39 | keyword search
40 | }
41 | 


--------------------------------------------------------------------------------
/man/form2Mat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/form2Mat.R
 3 | \name{form2Mat}
 4 | \alias{form2Mat}
 5 | \title{mol formula to element count table}
 6 | \usage{
 7 | form2Mat(atomForm = NULL)
 8 | }
 9 | \arguments{
10 | \item{atomForm}{a character vector of atomic formulae.}
11 | }
12 | \value{
13 | list containing two matrices one matrix "nEleTable" with a column for each unique element and a row for each atomic
14 | formula with numbers of each element and another matrix 'eleRatio" with the ratios of all elements in each atomic formula.
15 | }
16 | \description{
17 | mol formula to element count table
18 | }
19 | 


--------------------------------------------------------------------------------
/man/formulaFilter.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/formulaFilter.R
 3 | \name{formulaFilter}
 4 | \alias{formulaFilter}
 5 | \title{filter potential formulae based on maximum element by mass and ratios}
 6 | \usage{
 7 | formulaFilter(atomForm = NULL, mass = NULL)
 8 | }
 9 | \arguments{
10 | \item{atomForm}{character vector of atomic formulae}
11 | 
12 | \item{mass}{numeric the mass of all formulae considered.}
13 | }
14 | \description{
15 | filter potential formulae based on maximum element by mass and ratios
16 | }
17 | \details{
18 | maximum element numbers and element ratios taken from 5 chemical
19 | databases (n=84891 structures)
20 | }
21 | 


--------------------------------------------------------------------------------
/man/getAbs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getAbs.R
 3 | \name{getAbs}
 4 | \alias{getAbs}
 5 | \title{Adapted from PubMedWordCloud to work with compMS2Miner}
 6 | \usage{
 7 | getAbs(PMID)
 8 | }
 9 | \arguments{
10 | \item{PMID}{character vector of pubMed ids to get abstracts for.}
11 | }
12 | \description{
13 | Adapted from PubMedWordCloud to work with compMS2Miner
14 | }
15 | \details{
16 | if the query sequence is too long than 500 this function will not work
17 | }
18 | 


--------------------------------------------------------------------------------
/man/getLoginDetails.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getLoginDetails.R
 3 | \name{getLoginDetails}
 4 | \alias{getLoginDetails}
 5 | \title{Get Login details for CouchDB}
 6 | \usage{
 7 | getLoginDetails()
 8 | }
 9 | \arguments{
10 | \item{Host}{couchDB host name, defaults to localhost. Can be online repository.}
11 | 
12 | \item{Name}{couchDB administrator username (used for http commands, not stored).}
13 | 
14 | \item{Password}{couchDB administrator password (used for http commands, not stored).}
15 | }
16 | \value{
17 | login details for couchDB
18 | }
19 | \description{
20 | tcltk GUI to get login details for couchDB
21 | }
22 | 


--------------------------------------------------------------------------------
/man/getTitles.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getTitles.R
 3 | \name{getTitles}
 4 | \alias{getTitles}
 5 | \title{get PubMed title function adapted from PubChemWordcloud package v 0.3.2}
 6 | \usage{
 7 | getTitles(pmid)
 8 | }
 9 | \arguments{
10 | \item{pmid}{pubmed id number}
11 | }
12 | \description{
13 | get PubMed title function adapted from PubChemWordcloud package v 0.3.2
14 | }
15 | 


--------------------------------------------------------------------------------
/man/lipidAbbrev.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lipidAbbrev.R
 3 | \docType{data}
 4 | \name{lipidAbbrev}
 5 | \alias{lipidAbbrev}
 6 | \title{lipid abbreviations table for pubmed text mining}
 7 | \format{A data frame with 18 rows and 3 columns}
 8 | \source{
 9 | \url{http://www.lipidmaps.org/data/classification/lipid_cns.html}
10 | }
11 | \usage{
12 | data(lipidAbbrev)
13 | }
14 | \description{
15 | This dataset contains fields from the LMSD database classifications and 
16 | abbreviations (\url{http://www.lipidmaps.org/data/classification/lipid_cns.html})
17 | The variables are as follows:
18 | }
19 | \details{
20 | \enumerate{
21 |  \item Class lipid class name this string is intended to be searched in PubMed.
22 |  \item Abbreviation abbreviation for lipid class.
23 |  \item regexpr an R regular expression to try to detect the lipid class in a
24 |  database compound name. e.g. Searching PubMed for the string "SM(18:1/14:0)"
25 |  for example will return no PubMed ids however searching using "sphingomyelin"
26 |  will return a more representative number of PubMed abstract Ids. 
27 |  }
28 | }
29 | \keyword{datasets}
30 | 


--------------------------------------------------------------------------------
/man/metFragAdducts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metFragAdducts.R
 3 | \docType{data}
 4 | \name{metFragAdducts}
 5 | \alias{metFragAdducts}
 6 | \title{data.frame of customizable metFrag adduct types and codes}
 7 | \format{A data frame with 9 rows and 3 columns}
 8 | \source{
 9 | \url{http://c-ruttkies.github.io/MetFrag/projects/metfragcl/}
10 | }
11 | \usage{
12 | data(metFragAdducts)
13 | }
14 | \description{
15 | This dataset is the default adduct type table for the \code{\link{metID.metFrag}}
16 | function. A custom table can be created following this format as more adduct
17 | types are added in future versions of the metFrag command line tool.
18 | N.B. adduct names must match those supplied to the \code{\link{adduct2mass}} function internal
19 | to the \code{\link{metID.dbAnnotate}} function.
20 | }
21 | \details{
22 | \enumerate{
23 |  \item adduct adduct name string must match that supplied to adduct2mass.
24 |  \item metFragCode the MetFrag command line tool code for the adduct.
25 |  \item mode polarity. must be either 'pos' or 'neg'. 
26 |  }
27 | }
28 | \keyword{datasets}
29 | 


--------------------------------------------------------------------------------
/man/metFragCl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metFragCl.R
 3 | \name{metFragCl}
 4 | \alias{metFragCl}
 5 | \title{metFrag command line function for localSDF files}
 6 | \source{
 7 | \url{http://c-ruttkies.github.io/MetFrag/projects/metfragcl/} developed based on the command line .jar file (MetFrag2.3-CL.jar) downloaded on 2016/07/12
 8 | \enumerate{
 9 | \item MetFrag relaunched: incorporating strategies beyond in silico fragmentation: C Ruttkies, E L Schymanski, S Wolf, J Hollender, S Neumann Journal of Cheminformatics 2016 8:3
10 | \item In silico fragmentation for computer assisted identification of metabolite mass spectra: S Wolf, S Schmidt, M Müller-Hannemann, S Neumann BMC bioinformatics 11 (1), 148
11 | }
12 | }
13 | \usage{
14 | metFragCl(massSpectrum = NULL, precMass = NULL, compSpecName = NULL,
15 |   dbEntryTable = NULL, metFragJar = NULL, SDFtmp = NULL,
16 |   keepTempFiles = FALSE, mode = "pos", frag_mzabs = 0.05,
17 |   esiList = NULL, maxTreeDepth = 1)
18 | }
19 | \arguments{
20 | \item{massSpectrum}{data.frame composite spectrum consisting of two columns mass and intensity.}
21 | 
22 | \item{precMass}{numeric the MS1 m/z (precursor mass).}
23 | 
24 | \item{compSpecName}{character name of composite spectrum for directory and file naming.}
25 | 
26 | \item{dbEntryTable}{data.frame with the requisite information for the SDFtmp localSDF database consisting of at least 4 columns named 1. 'WebAddress', 2. 'DBid', 3. 'DBname', 4. 'SMILES'.}
27 | 
28 | \item{metFragJar}{character full path to metFragCL.jar file (extdata in compMS2Miner package).}
29 | 
30 | \item{SDFtmp}{an "SDFset" class object of SDF file for the localSDF database search of metFragCL.}
31 | 
32 | \item{keepTempFiles}{logical default = FALSE, sdf, mf and results files will
33 | be created as temporary files otherwise if TRUE files will be retained in named subdirectories (see details).}
34 | 
35 | \item{mode}{character ionization polarity (either 'pos' or 'neg').}
36 | 
37 | \item{frag_mzabs}{numeric delta predicted-observed fragment mass accuracy for matching.}
38 | 
39 | \item{esiList}{named numeric vector of electrospray type numbers for metFrag params file. e.g. positive mode 
40 | \tabular{llll}{
41 | \cr M+H \tab M+NH4  \tab M+Na   \tab M+K 
42 | \cr 1    \tab 18    \tab 23    \tab 39
43 | }}
44 | 
45 | \item{maxTreeDepth}{numeric fragments of fragments? (default = 1 i.e. only direct daughter ions of the structure will be considered). Setting the tree depth to higher values may cause the metFragCL to take longer.}
46 | }
47 | \value{
48 | if MetFrag2.3-CL.jar process completed then a data.frame containing any fragments matched to the composite mass spectra are returned.
49 | MetFragPreProcessingCandidateFilter = UnconnectedCompoundFilter%\n',
50 | }
51 | \description{
52 | metFrag command line function for localSDF files
53 | }
54 | 


--------------------------------------------------------------------------------
/man/metID.CFM.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metID.CFM.R
 3 | \name{metID.CFM}
 4 | \alias{metID.CFM}
 5 | \title{Wrapper function for Competitive Fragmentation modelling (CFM) \emph{in silico} fragmentation software}
 6 | \source{
 7 | \enumerate{
 8 | \item Allen F, Pon A, Wilson M, Greiner R, and Wishart D. CFM-ID: a web server for annotation, spectrum prediction and metabolite identification from tandem mass spectra. Nucleic Acids Res. June 2014. \url{http://nar.oxfordjournals.org/content/early/2014/06/03/nar.gku436.full}.
 9 | \item fraggraph-gen.exe file in extdata downloaded (2016/07/09, cfm-id-2.2_win32.zip) from \url{https://sourceforge.net/p/cfm-id/wiki/Home/}. 
10 | \item lpsolve.dll in extdata downloaded (2016/07/09, lp_solve_5.5.2.3_IDE_Setup.exe) from \url{https://sourceforge.net/projects/lpsolve/}.
11 | }
12 | }
13 | \usage{
14 | metID.CFM(object, ...)
15 | }
16 | \arguments{
17 | \item{object}{a "compMS2" class object.}
18 | 
19 | \item{featureSubSet}{character vector of composite spectra names (e.g. CC_1, CC_2 etc.) otherwise the default is to perform CFM fragmentation on all composite spectra.}
20 | 
21 | \item{keepTempFiles}{logical default = FALSE fraggraph-gen .csv output file will
22 | be created as temporary files otherwise if TRUE file will be retained in subdirectories named by composite spectrum name.}
23 | 
24 | \item{minPropTicEx}{numeric minimum mean total ion current explained (default = 0.9)
25 | the candidate with the highest proportion of the total ion current explained
26 |  above this minimum will be automatically added to the Comments
27 | table. The argument autoPossId must also be set to TRUE.}
28 | 
29 | \item{autoPossId}{logical if TRUE the function will automatically add the name
30 | of the top annotation based on mean total ion current explained and metFrag score
31 |  (default = FALSE). Caution if TRUE
32 | this will overwrite any existing possible_identities in the "metID comments"
33 | table. This functionality is intended as an automatic annotation identification tool prior to thorough examination of the data in \code{\link{compMS2Explorer}}.
34 | The intention is that automatic annotations can be used in the metID.rtPred
35 | retention prediction function as part of a seamless first-pass workflow.}
36 | 
37 | \item{possContam}{numeric how many times does a possible annotation have
38 | to appear in the automatically generated possible annotations for it to be
39 | considered a contaminant and therefore not added to the "metID comment" table (default = 3, i.e. if a database name appears more than 3 times in the 
40 | automatic annotation table it will be removed).}
41 | 
42 | \item{verbose}{logical if TRUE display progress bars.}
43 | }
44 | \description{
45 | The function will automatically run as a parallel computation is the compMS2 object was created in parallel.
46 | }
47 | 


--------------------------------------------------------------------------------
/man/metID.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metID.R
 3 | \name{metID}
 4 | \alias{metID}
 5 | \title{Combinatorial metabolite identification methods}
 6 | \source{
 7 | Kenneth V. Price, Rainer M. Storn and Jouni A. Lampinen (2006). 
 8 | Differential Evolution - A Practical Approach to Global Optimization. 
 9 | Berlin Heidelberg: Springer-Verlag. ISBN 3540209506.
10 | }
11 | \usage{
12 | metID(object, ...)
13 | }
14 | \arguments{
15 | \item{...}{option arguments to be passed along.}
16 | 
17 | \item{object.}{a compMS2 class object}
18 | 
19 | \item{method.}{method to use for metabolite identification. See details.}
20 | }
21 | \value{
22 | A compMS2 object with various metabolite identification information.
23 | }
24 | \description{
25 | methods to facilitate metabolite identification including database
26 | monoisotopic mass matching, probable annotation filtration, mammalian Phase II
27 | metabolite prediction, molecular descriptor- random forest based retention time
28 | prediction, insilico metabolite fragmentation and nearest network neighbour
29 | metabolite chemical similarity scoring. In addition annotations can be 
30 | automatically ranked and possible identities selected based on a mean
31 | consensus score based on mass accuracy, spectral database similarity, 
32 | \emph{in silico} fragmentation similarity, predicted retention time similarity,
33 | nearest network neighbour chemical similarity and crude plausibility ranking
34 | by PubMed repository text-mining. Optionally a divergent evolution approach
35 | to globally optimize the contributary weight of each consensus score using
36 | a training set of possible annotations contained in the "metID comments" table.
37 | The metaheuristic attempts to weight the include consensus scores according to
38 | how well the correct annotations are ranked amongst the possible best annotations.
39 | }
40 | \details{
41 | Available methods:
42 | 
43 | \enumerate{
44 | \item monoisotopic mass annotation to data base resources (\code{\link{metID.dbAnnotate}}),
45 |    currently available databases include HMDB, LMSD (lipidMaps) DrugBank, T3DB and ReSpect.
46 |    possible metabolites electrospray adducts and substructure mass shifts 
47 |    are taken into account.
48 | 
49 | \item identifies most probable database annotations (\code{\link{metID.dbProb}}), 
50 |    taking into account substructure annotations identified by \code{\link{subStructure.Annotate}}.  
51 | 
52 | \item Phase II metabolite identification from canonical SMILES currently 
53 |    available phase II metabolite prediction types include: acyl-, hydroxl- 
54 |    and amine- sulfates and glucuronides and glycine conjugates 
55 |    (\code{\link{metID.PredSMILES}}). 
56 | 
57 | \item Retention time prediction using the molecular descriptors derived from the \link[package]{rcdk} package and a randomForest recursive-feature elimination method of the \link[package]{caret} package (\code{\link{metID.rtPred}}).
58 | 
59 | \item Combinatorial \emph{in silico} fragment prediction using the command line version of MetFrag (\code{\link{metID.metFrag}}) or Competitive fragmentation modelling (CFM \code{\link{metID.CFM}}).
60 | 
61 | \item Correlation network from a peak table. This function calculates a correlation matrix from the peak areas/ height sample columns and creates a prefuse force directed correlation network that can then be visualized in the \code{\link{compMS2Explorer}} application. \code{\link{metID.corrNetwork}}
62 | 
63 | \item Spectral similarity network. Inter-spectrum spectral similarity scores (dot product) are calculated. Both fragment ion and precursor - fragment neutral loss
64 | pattern similarity scores are calculated and used to identify clusters of spectra with similar fragmentation/neutral loss patterns. A spectral similarity network is then calculated based on a minimum dot product score (minDotProdThresh, default = 0.8). The resulting network can then be visualized in the \code{\link{compMS2Explorer}} application. \code{\link{metID.specSimNetwork}}.
65 | 
66 | \item Correlation and spectral similarity based 1st Neighbour maximum chemical similarity scoring and optional automatic annotation identification  (\code{\link{metID.chemSim}}). 
67 | 
68 | \item build consensus annotations (\code{\link{metID.buildConsensus}}). This function seeks to rank annotation 
69 | strength and automate metabolite identification based on 6 optional orthogonal
70 | annotation evidences, namely: mass accuracy, spectral database similarity (see function \code{\link{metID.matchSpectralDB}}),
71 | \emph{in silico} fragmentation similarity (both metFrag and CFM see functions \code{\link{metID.metFrag}} and  \code{\link{metID.CFM}}), 
72 | random forest predicted retention time similarity (see function \code{\link{metID.rtPred}}), 1st network neighbour chemical similarity (both 
73 | correlation and spectral similarity see function \code{\link{metID.chemSim}}) 
74 | and finally crude literature based metabolite annotation strength by text-mining
75 | the PubMed repository using the Entrez system.  
76 | The function can automatically add annotations to the 'metID comment" table
77 | of the \code{\link{compMS2Explorer}} application and also ranks the individual
78 | "best annotations" tables by the mean consensus metabolite annotation score.
79 | 
80 | \item optimize consensus annotations using the differential evolution algorithm
81 | of the \code{\link{DEoptim}} package.
82 | }
83 | }
84 | \seealso{
85 | \code{\link{metID.dbAnnotate}}, \code{\link{metID.dbProb}}, 
86 | \code{\link{metID.predSMILES}}, 
87 | \code{\link{metID.reconSubStr}}, \code{\link{metID.metFrag}}, 
88 | \code{\link{metID.chemSim}}, \code{\link{metID.corrNetwork}},
89 | \code{\link{metID.specSimNetwork}}, \code{\link{metID.matchSpectralDB}}, 
90 | \code{\link{metID.rtPred}}, \code{\link{metID.buildConsensus}},
91 | \code{\link{metID.optimConsensus}},
92 | \code{\link{metID.compMS2toMsp}}.
93 | }
94 | 


--------------------------------------------------------------------------------
/man/metID.chemSim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metID.chemSim.R
 3 | \name{metID.chemSim}
 4 | \alias{metID.chemSim}
 5 | \title{tanimoto chemical similarity first network neighbours}
 6 | \usage{
 7 | metID.chemSim(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a "compMS2" class object.}
11 | 
12 | \item{autoPossId}{logical if TRUE the function will automatically add the name
13 | of the top annotation based on mean maximum 1st neighbour chemical similarity
14 | above the minimum chemical similarity score (default = FALSE). Caution if TRUE
15 | this will overwrite any existing possible_identities in the "metID comments"
16 | table. This functionality is intended as an automatic annotation identification tool prior to thorough examination of the data in \code{\link{compMS2Explorer}}.
17 | The intention is that automatic annotations can be used in the metID.rtPred
18 | retention prediction function as part of a seamless first-pass workflow.}
19 | 
20 | \item{minSimScore}{numeric must be values between 0-1 minimum tanimoto chemical similarity score (default = 0.8). Any mean maximum 1st neighbour chemical 
21 | similarity scores will be considered for automatic possible identity 
22 | addition to the "metID comments"
23 |  table, the annotation with the highest mean maximum 1st neighbour chemical similarity score will then be automatically added to the "metID comments" table
24 |  in the \code{\link{compMS2Explorer}}.}
25 | 
26 | \item{possContam}{numeric how many times does a possible annotation have
27 | to appear in the automatically generated possible annotations for it to be
28 | considered a contaminant and therefore not added to the "metID comment" table (default = 3, i.e. if a database name appears more than 3 times in the 
29 | automatic annotation table it will be removed).}
30 | 
31 | \item{bitsChemFP}{numeric values between 1024-4096 number of most frequent
32 | atom-pairs in the DrugBank database see the ChemmineR function \code{\link{desc2fp}} for more details (default = 1024).}
33 | 
34 | \item{minEdges}{numeric minimum number of edges (i.e. connected/adjacent nodes/spectra) 
35 | to consider a node/spectrum for chemical similarity (default=2). 
36 | This filtration is performed after removal of isobaric spectra.
37 | For example nodes with only one edge/adjacent node are more likely to produce 
38 | false-positive annotations. For more robust chemical similarity scoring 
39 | consider increasing this number.}
40 | 
41 | \item{verbose}{logical if TRUE display progress bars.}
42 | }
43 | \description{
44 | tanimoto chemical similarity first network neighbours
45 | }
46 | \details{
47 | this function can only be utilized after running \code{\link{metID.corrNetwork}} and/or \code{\link{metID.specSimNetwork}}. The purpose of this function is to provide first-pass automatic metabolite annotation. The tanimoto chemical similarity score is first calculated from a 1024-4096 bit chemical fingerprint for every best annotation SMILES code. For annotations of each composite spectrum the maximum chemical similarity score with any first neighbours (either by correlation and/or spectral similarity) are identified and the weighted arithmetic mean maximum chemical similarity score of 1st neighbours calculated. The mean is weighted based on the mean spectral similarity and/or
48 | correlation coefficient value for an edge pair (i.e. if two spectra are connected by both spectral similarity and
49 | correlation then a mean value of the two will be calculated and used as the weight).
50 | This is to ensure that the more similar or highly correlated two composite spectra
51 | are the higher the contribution to the maximum chemical similarity score. 
52 | A new column "MMNNCSS" is added to the best annotation tables for any composite spectra with at least one composite spectrum network neighbour. Furthermore, the best Annotation table is
53 | sorted according to this new likely annotation score, this give the user a rapid
54 | means to establish a likely annotation based on chemical similarity with neighbouring node annotations.
55 |  
56 |  Optionally, the top annotations for a composite spectrum can be automatically 
57 |  added to the "metID comments" table in the compMS2Explorer application. If the 
58 |  argument \strong{autoPossId} is TRUE (default = FALSE) the function will automatically add the name
59 |  of the top annotation based on mean maximum 1st neighbour chemical similarity
60 |  above the minimum chemical similarity score (argument minSimScore, default = 0.8).
61 | }
62 | \examples{
63 | library(compMS2Miner)
64 | compMS2Example <- metID(compMS2Example, 'chemSim', minSimScore=0.8, 
65 |                         autoPossId=TRUE)
66 | }
67 | 


--------------------------------------------------------------------------------
/man/metID.compMS2toMsp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metID.compMS2ToMsp.R
 3 | \name{metID.compMS2toMsp}
 4 | \alias{metID.compMS2toMsp}
 5 | \title{msp file from compMS2 class object}
 6 | \usage{
 7 | metID.compMS2toMsp(object, studyName = NULL, existingMsp = NULL,
 8 |   outputDir = getwd(), onlyCommented = FALSE, ...)
 9 | }
10 | \arguments{
11 | \item{object}{a "compMS2" class object}
12 | 
13 | \item{studyName}{character a study name tag for your dataset.}
14 | 
15 | \item{existingMsp}{character a full path or web address of an existing msp database
16 | file to concatenate to. The new entries will appear underneath the last entry 
17 | of the existing msp datafile file. If this argument is supplied then this file will be
18 | overwritten.}
19 | 
20 | \item{outputDir}{character full path to a directory to write the msp file to.
21 | Default is to take the current working directory obtained from \code{\link{getwd}}.}
22 | 
23 | \item{onlyCommented}{logical (default = FALSE) should only metabolites recorded
24 | in the comments table be included or if true all of the composite spectra.}
25 | }
26 | \value{
27 | the msp file will be written to a file tagged with the study name and date in the output directory.
28 | }
29 | \description{
30 | msp file from compMS2 class object
31 | }
32 | \details{
33 | this function converts a compMS2 class object to an msp database
34 | file inserting experimental details and taking annotation data from the 
35 | comments table and best annotations. This creates a means to database successive
36 | experiments and potentially concatenate to a laboratories existing spectral 
37 | database. In this way previously annotated spectra from other experiments 
38 | can be matched to new datasets. A unique splash code is also generated for
39 | each composite spectrum and added to each database entry in the msp file.
40 | install splash R from GitHub using devtools:
41 |  devtools::install_github("berlinguyinca/spectra-hash", subdir="splashR")
42 | }
43 | 


--------------------------------------------------------------------------------
/man/metID.corrNetwork.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metID.corrNetwork.R
 3 | \name{metID.corrNetwork}
 4 | \alias{metID.corrNetwork}
 5 | \title{CompMS2 correlation network generation}
 6 | \usage{
 7 | metID.corrNetwork(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{A "compMS2" class object.}
11 | 
12 | \item{peakTable}{a data.frame in the form observation (samples) in columns and
13 | variables (Mass spectral signals) in rows. The first 3 columns must consist of:
14 | \enumerate{
15 |  \item EIC number or unique peak identifier.
16 |  \item mass-to-charge ratio of peak group.
17 |  \item median/ peak apex retention time in seconds. 
18 |  }
19 | These columns are utilized in the final network visualization.}
20 | 
21 | \item{obsNames}{character vector of observation (i.e. sample/ QC/ Blank) names to identify appropriate observation (sample) columns.}
22 | 
23 | \item{corrThresh}{correlation coefficient threshold to group features within
24 | a retention time cluster. If no value is supplied the default is to estimate
25 | an optimal cut-off value based on network attributes from a series of correlation cut-off values. This is based upon the relationship between the 
26 | number of nodes, edges, number of clusters and the network density (i.e. n actual edges/n potential edges) at each correlation cut-off value. A plot showing
27 | the result of this estimation will be generated.}
28 | 
29 | \item{corrMethod}{character correlation method see \code{\link{cor}} for details. default "spearman".}
30 | 
31 | \item{delta}{numeric maximum p-value (following multiple testing correction) above #' which the null hypothesis (no correlation) is rejected.}
32 | 
33 | \item{MTC}{character Multiple Testing Correction default is "none", see \code{\link{p.adjust.methods}} for
34 | details of options. ("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none").
35 | Any p-values after multiple testing correction above the value of delta will have their
36 | corresponding correlation coefficents replaced with zero.}
37 | 
38 | \item{maxNodes}{numeric above the maximum nodes the function will use the large graphing algorithm of \code{\link{igraph}}. See \code{\link{igraph::with_lgl}} else
39 | the function uses the  Fruchterman-Reingold layout algorithm. See \code{\link{igraph::with_fr}}}
40 | 
41 | \item{MS2only}{numeric 3 options (1-3) if 1 All EICs above corrThresh returned, if 2 only non-MS2 matched EICs which are first neighbours of at least one MS2-matched MS2 are returned, if 3 only MS2-matched EICs are returned (default = 1).}
42 | 
43 | \item{minClustSize}{numeric minimum number of connected nodes that is cluster size (default =3). If a cluster
44 | of nodes is less than this number then it will be removed.}
45 | }
46 | \value{
47 | "compMS2" class object with an additional network graph of any peakTable features above the correlation threshold.
48 | }
49 | \description{
50 | Uses the MS1features matched to MS2 data to generate a correlation network to view in compMS2Explorer
51 | }
52 | \references{
53 | Koh Aoki, Yoshiyuki Ogata, and Daisuke Shibata
54 | Approaches for Extracting Practical Information from Gene Co-expression Networks in Plant Biology
55 | Plant Cell Physiol (2007) 48 (3): 381-390 first published online January 23, 2007 doi:10.1093/pcp/pcm013
56 | }
57 | 


--------------------------------------------------------------------------------
/man/metID.dbAnnotate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metID.dbAnnotate.R
 3 | \name{metID.dbAnnotate}
 4 | \alias{metID.dbAnnotate}
 5 | \title{Annotate unknown features in a compMS2 class object to database 
 6 | entries based on monoisotopic mass}
 7 | \usage{
 8 | metID.dbAnnotate(object, ...)
 9 | }
10 | \arguments{
11 | \item{object}{a compMS2 class object.}
12 | 
13 | \item{ppm}{numeric mass accuracy error (ppm) to match MS1 features to data 
14 | base entries.}
15 | 
16 | \item{esiAdducts}{data.frame or character vector of custom electrospray 
17 | adducts can be supplied as a character vector of electrospray adducts or as
18 | a data.frame output from the function \link{adduct2mass}. e.g. 
19 | c("[M-H-NH3-CO-COCH2-C4H6O]-", "[4M-H+Cl]2-", "[2M-H]-", "[3M-3H+Fe2+]-", 
20 | "[M-H-CH2O]-", "[3M-2H]2-", "[M-H-CO2-C3H6]-", "[M-H+CH3COOH]-", "[3M-H]-", 
21 | "[M-2H]2-")}
22 | 
23 | \item{metDB}{a metabolite data base (see default ?HMDB for table format).
24 | Other currently available data bases include LMSD, DrugBank, T3DB and ReSpect 
25 | databases. Matching using multiple databases is also possible.}
26 | 
27 | \item{SubStrs}{named numeric vector of substructure mass shifts to consider. 
28 | If argument is not supplied then no substructure mass shifts will be considered.
29 | If the character "All" is supplied then all the substructure mass shifts 
30 | contained in the internal table will be considered 
31 | (see default \link{Substructure_masses} for more information).}
32 | 
33 | \item{featureSubSet}{character vector of composite spectra names (e.g. CC_1, CC_2 etc.) otherwise the default is to perform database annotation on all composite spectra.}
34 | 
35 | \item{includeElements}{character vector of element symbols (case-sensitive) 
36 | to include in the metDB argument. Any structure containing an element which 
37 | is not in this inclusion list will not be considered. see ?exactMassEle for 
38 | the internal element table.}
39 | 
40 | \item{mixtures}{logical (default = FALSE), should mixtures be considered. Any
41 | SMILES codes consisting of multiple non-covalently linked structures will not
42 | be considered (e.g. salts)}
43 | 
44 | \item{MS1adducts}{logical (default = FALSE), should the adducts included in
45 | the 4th column of the MS1feature table be used to reduce false positive 
46 | assignments. The adducts identified will be used to guide the annotation process,
47 | only the unique adducts/fragments will be used to calculate the expected
48 | mass shift values. If a spectrum has no adduct identified then all of the
49 | unique adducts identified in the dataset will be used.}
50 | }
51 | \value{
52 | a compMS2 class object containing potential metabolite annotations.
53 | }
54 | \description{
55 | unknown metabolite identification. MS1 features within a compMS2
56 | object are matched against a metabolite database based on a mass accuracy
57 | tolerance (ppm).
58 | Possible electrospray adducts/ in-source fragments and substructure mass 
59 | shifts are calculated for all data base entries. Substructure mass shifts 
60 | are supplied as a named numeric vector or default is all the mass shifts in 
61 | the internal substructure masses table (see default Substructure_masses).
62 | Custom ESI adduct/ in-source fragments can be supplied as a character string
63 | of names or the output of the function \link{adduct2mass}. See examples.
64 | }
65 | \examples{
66 | SubStrMassShift <- c(42.010565, 119.004101, 176.03209, 255.988909, 305.068159, 
67 |                      57.021464, 161.014666, 79.956817)
68 | names(SubStrMassShift) <- c("acetyl", "cysteine", "glucuronide", 
69 |                             "glucuronide sulfate", "glutathione", "glycine",
70 |                             "mercapturate", "sulfate")
71 | # custom ESI adducts (default is to consider all ESI adducts/ in-source 
72 | # fragments from supplementary material Beyond Profiling manuscript 
73 | # see references for details)
74 | # The function adduct2mass can interpret ESI adduct names and generate a 
75 | # data.frame of expected mass shifts
76 | customEsiAdducts <- c("[M-H-NH3-CO-COCH2-C4H6O]-", "[4M-H+Cl]2-", "[2M-H]-", 
77 |                       "[3M-3H+Fe2+]-", "[M-H-CH2O]-", "[3M-2H]2-", 
78 |                       "[M-H-CO2-C3H6]-", "[M-H+CH3COOH]-", "[3M-H]-", 
79 |                       "[M-2H]2-")
80 | 
81 | compMS2Example <- metID(compMS2Example, "dbAnnotate", SubStrs=SubStrMassShift, 
82 |                         esiAdducts=customEsiAdducts)
83 |                                       
84 | }
85 | \references{
86 | \enumerate{
87 | \item Stanstrup, J., Gerlich, M., Dragsted, L.O. et al. 
88 | Anal Bioanal Chem (2013) 405: 5037. doi:10.1007/s00216-013-6954-6
89 | }
90 | }
91 | \seealso{
92 | \link{adduct2mass}.
93 | }
94 | 


--------------------------------------------------------------------------------
/man/metID.dbProb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metID.dbProb.R
 3 | \name{metID.dbProb}
 4 | \alias{metID.dbProb}
 5 | \title{selects best annotations based on substructure annotations identified
 6 | by \code{\link{subStructure.Annotate}}}
 7 | \usage{
 8 | metID.dbProb(object, ...)
 9 | }
10 | \arguments{
11 | \item{object}{a compMS2 class object}
12 | 
13 | \item{nameFeat}{character of a unique name of a single composite spectra of
14 | interest. If not supplied (default) all most probable annotations are decided automatically
15 | and for all composite spectra. Previous most probable annotations will not
16 | be overwritten if the function is run more than once.}
17 | 
18 | \item{DBids}{unique database identifier for a specific composite spectrum, in
19 | combination with nameFeat argument.}
20 | 
21 | \item{minTimesId}{numeric (default = 2) the minimum number of times a particular
22 | substructure type must be identified for it to be considered. This helps to
23 | limit consideration of neutral losses/fragments that have been identified
24 | once for example by chance.}
25 | 
26 | \item{verbose}{logical if TRUE display progress bars.}
27 | }
28 | \value{
29 | a compMS2 class object with most probable annotation(s)
30 | }
31 | \description{
32 | Most probable database annotations either automatically 
33 | decided based on substructure type detected by the 
34 | \code{\link{subStructure.Annotate}} or user supplied most probable annotations 
35 | one composite spectrum at a time. Additionally any substructure either 
36 | neutral loss or product ion with an available SMILES code will be matched
37 | against all of the available annotations. The function utilizes the 
38 | \code{\link{cmp.similarity}} function set to mode 2 (using the size of the 
39 | descriptor intersection over the size of the smaller descriptor, to deal
40 | with compounds that vary alot in size) of the ChemmineR package to calculate
41 | the similarity of the substructure to the annotated structure. The average 
42 | score between 0-1 of all of the substructures annotated by the 
43 | \code{\link{subStructure.Annotate}}
44 | function is returned in a new column in the
45 | "Best Annotations" panel in the \code{\link{compMS2Explorer}} and the annotations ranked accordingly. Additionally, if 
46 | either a database annotation corresponds to a substructure type annotated 
47 | (e.g. glucuronide) or is the name of the database entry contains the substructure
48 | name (case-insensitive) then this will be give a maximum top score of 1.
49 | }
50 | \examples{
51 | compMS2Example <- metID(compMS2Example, 'dbProb')
52 | }
53 | \seealso{
54 | \code{\link{subStructure.Annotate}}, \code{\link{metID.dbAnnotate}},
55 | \code{\link{cmp.similarity}}.
56 | }
57 | 


--------------------------------------------------------------------------------
/man/metID.matchSpectralDB.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metID.matchSpectralDB.R
 3 | \name{metID.matchSpectralDB}
 4 | \alias{metID.matchSpectralDB}
 5 | \title{match spectra to spectral databases in the .msp file format (NIST)}
 6 | \usage{
 7 | metID.matchSpectralDB(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{A "compMS2" class object.}
11 | 
12 | \item{mspFile}{character string to an online or local .msp spectrum database file.
13 | (default = NULL, dependendent on polarity either a positive or negative mode
14 | version of massbank hosted on github will be downloaded) See \url{https://raw.githubusercontent.com/WMBEdmands/mspFiles/master/MassBank_MSMS_Pos_Rev173_vs1.msp} and \url{https://raw.githubusercontent.com/WMBEdmands/mspFiles/master/MassBank_MSMS_Neg_Rev173_vs1.msp}. Additional .msp files can be downloaded
15 | from the massbank of North America website \url{http://mona.fiehnlab.ucdavis.edu/downloads}.}
16 | 
17 | \item{minDBDotProdThresh}{minimum dot product spectral similarity score (default = 0.65).}
18 | 
19 | \item{ppmMS1}{numeric minimum mass accuracy (ppm) between database (.msp)
20 | precursor masses and the CompMS2 composite spectrum MS1 m/z. 
21 | Dot products will only be calculated for database entries within this mass 
22 | accuracy threshold (default=10ppm).}
23 | 
24 | \item{binSizeMS2}{numeric bin size for between database (.msp) fragment masses and the CompMS2 composite spectrum fragment ions (default = 0.1).}
25 | 
26 | \item{autoPossId}{logical if TRUE and if the .msp file database entries contain
27 | at the very least a SMILES code (for downstream metabolite identification purposes) 
28 | all msp file database entries will be added to the best annotations table.
29 | and the function will also automatically add the name of the best annotation 
30 | (highest dot product score above the user defined threshold) 
31 | to the comments table and a note ("metID.matchSpectralDB") added to the 
32 | comments column. This represents a form of automated metabolite identification
33 | which can still be modified in the comments table upon visualization with
34 | compMS2Explorer.}
35 | 
36 | \item{verbose}{logical if TRUE display progress bars.}
37 | }
38 | \value{
39 | "compMS2" class object with any database matches above the minimum dot product score.
40 | }
41 | \description{
42 | match spectra to spectral databases in the .msp file format (NIST)
43 | }
44 | \details{
45 | If the msp file contains smiles codes then any matches above the min
46 | dot prod or minimum proportion of the spectrum explained will be added to the 
47 | best annotations table.
48 | }
49 | 


--------------------------------------------------------------------------------
/man/metID.metFrag.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metID.metFrag.R
 3 | \name{metID.metFrag}
 4 | \alias{metID.metFrag}
 5 | \title{metFrag \emph{in silico} fragmentation query wrapper function}
 6 | \source{
 7 | \url{http://c-ruttkies.github.io/MetFrag/projects/metfragcl/} developed based on the command line .jar file (MetFrag2.3-CL.jar) downloaded on 2016/07/12
 8 | \enumerate{
 9 | \item MetFrag relaunched: incorporating strategies beyond in silico fragmentation: C Ruttkies, E L Schymanski, S Wolf, J Hollender, S Neumann Journal of Cheminformatics 2016 8:3
10 | \item In silico fragmentation for computer assisted identification of metabolite mass spectra: S Wolf, S Schmidt, M Müller-Hannemann, S Neumann BMC bioinformatics 11 (1), 148
11 | }
12 | }
13 | \usage{
14 | metID.metFrag(object, ...)
15 | }
16 | \arguments{
17 | \item{object}{a "compMS2" class object.}
18 | 
19 | \item{featureSubSet}{character vector of composite spectra names (e.g. CC_1, CC_2 etc.) otherwise the default is to perform metFrag queries on all composite spectra.}
20 | 
21 | \item{adductTable}{data.frame containing adduct names, MetFrag number codes
22 | and ionization mode. A default data.frame is internal to the package but this
23 | table is fully customizable if future adduct types are added. See View(metFragAdducts)
24 | to see the format of this table. N.B. The metFrag adduct names must perfectly match
25 | those supplied to the \code{\link{adduct2mass}} function during database
26 | annotation using \code{\link{metID.dbAnnotate}}.}
27 | 
28 | \item{keepTempFiles}{logical default = FALSE, txt, sdf and csv results files will
29 | be created as temporary files otherwise if TRUE files will be retained in named subdirectories (see details).}
30 | 
31 | \item{maxTreeDepth}{numeric maximum tree depth (default = 2). If 2 then
32 | fragments of fragments are also considered but will increase computation time.}
33 | 
34 | \item{frag_mzabs}{numeric delta predicted-observed fragment mass accuracy for matching.}
35 | 
36 | \item{minMetFragScore}{numeric minimum mean total ion current explained and metFrag score (default = 0.9)
37 | the candidate with the highest score above this minimum will be automatically added to the Comments
38 | table. The argument autoPossId must also be set to TRUE.}
39 | 
40 | \item{autoPossId}{logical if TRUE the function will automatically add the name
41 | of the top annotation based on mean total ion current explained and metFrag score
42 |  (default = FALSE). Caution if TRUE
43 | this will overwrite any existing possible_identities in the "metID comments"
44 | table. This functionality is intended as an automatic annotation identification tool prior to thorough examination of the data in \code{\link{compMS2Explorer}}.
45 | The intention is that automatic annotations can be used in the metID.rtPred
46 | retention prediction function as part of a seamless first-pass workflow.}
47 | 
48 | \item{possContam}{numeric how many times does a possible annotation have
49 | to appear in the automatically generated possible annotations for it to be
50 | considered a contaminant and therefore not added to the "metID comment" table (default = 3, i.e. if a database name appears more than 3 times in the 
51 | automatic annotation table it will be removed).}
52 | 
53 | \item{verbose}{logical if TRUE display progress bars.}
54 | }
55 | \value{
56 | a compMS2 class object containing metFrag \emph{in silico} fragmentation 
57 | results which can be visualized in \code{\link{compMS2Explorer}}.
58 | }
59 | \description{
60 | performs metFrag (msbi.ipb-halle.de/MetFrag/) \emph{in silico} combinatorial
61 | fragmentation. Local chemical structure data files (.sdf) are created from 
62 | most probable annotation canonical SMILES codes. Temporary local sdf files 
63 | and metfrag parameter files (.txt) are  created on a composite spectrum by 
64 | composite spectrum basis and
65 | \emph{in silico} fragmentation performed. Results are read back into R and stored in the compMS2 class object as results tables. In addition the temporary sdf,
66 | .txt and .csv metfrag results files may also optionally be kept (keepTempFiles = TRUE) and are saved in a subdirectory structure (see \emph{details}). The function will automatically run as a parallel computation is the compMS2 object was created in parallel. The adduct types and codes which MetFrag command line tool 
67 | will work with are included in a default internal table. See data(metFragAdducts)
68 |  the following electrospray adducts are contained within:
69 | positive mode: '[M+H]+', '[M+NH4]+', '[M+Na]+', '[M+K]+'
70 | negative mode: '[M-H]-', '[M+Cl]-', '[M-H+HCOOH]-', '[M-H+CH3COOH]-' 
71 | All other database annotations of other electrospray adducts will be discarded.
72 | If the format of the metFragAdducts table is correctly followed then additional
73 | MetFrag adduct types can be added see View(metFragAdducts). This ensures that
74 | the adduct types are customizable and can be modified to incorporate future
75 | availability. See argument metFragAdducts below.
76 | }
77 | \details{
78 | if keepTempFiles = FALSE, Results directories are generated in the current working directory: for each MS1 feature matched to MS2 data a results 
79 |                      directory is created named ("MetFrag_results"). Subdirectories are then created within the results directory named after each composite spectrum name.
80 |                      Assuming MetFrag returned any results the following files should appear within the subdirectories:
81 | \enumerate{
82 | \item MetFrag parameters files : MetFrag parameter files (.txt) are saved in each result directory. See \url{http://c-ruttkies.github.io/MetFrag/projects/metfragcl/} for details.
83 | \item localSdf file : the local sdf (chemical structure data files) are saved in 
84 |                     each result directory. This file is used as a local database for MetFrag \emph{in silico} fragmentation.
85 | \item result .csv file: the metFrag results are returned as a comma seperated values text file (.csv).
86 | }
87 | }
88 | 


--------------------------------------------------------------------------------
/man/metID.optimConsensus.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metID.optimConsensus.R
 3 | \name{metID.optimConsensus}
 4 | \alias{metID.optimConsensus}
 5 | \title{differential evolution build consensus weight optimization}
 6 | \usage{
 7 | metID.optimConsensus(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a "compMS2" class object.}
11 | 
12 | \item{...}{additional arguments to \code{\link{DEoptim.control}}.}
13 | 
14 | \item{include}{character vector of 7 options to build consensus combinatorial
15 | metabolite identification see Details below for a description of each. If
16 | specific options are not supplied as a character vector then the default 
17 | is to consider all 7. i.e. 
18 | c('massAccuracy', 'spectralDB', 'inSilico', 'rtPred', 'chemSim', 'pubMed', 'substructure').}
19 | 
20 | \item{autoPossId}{logical if TRUE the function will automatically add the names
21 | of the top annotation based on mean consensus annotation score to the 
22 | "metID comments" table (default = FALSE). Caution if TRUE this will overwrite 
23 | any existing possible_identities in the "metID comments" table. 
24 | This functionality is intended as an automatic metabolite annotation 
25 | identification tool prior to thorough examination of the data in 
26 | \code{\link{compMS2Explorer}} as part of an objective and seamless 
27 | first-pass annotation workflow. The mean build consensus score can consist
28 | of many orthogonal measurements of metabolite identification and a means
29 | to rapidly rank metabolite annotations.}
30 | 
31 | \item{minMeanBCscore}{numeric minimum mean consensus score (values between 0-1),
32 | if argument autoPossId is TRUE any metabolite annotations above this value
33 | will be automatically added to the "metID comments" table. (if argument not
34 | supplied the default is the upper interquartile range of the mean BC score).}
35 | 
36 | \item{possContam}{numeric how many times does a possible annotation have
37 | to appear in the automatically generated possible annotations for it to be
38 | considered a contaminant and therefore not added to the "metID comment" table (default = 3, i.e. if a database name appears more than 3 times in the 
39 |  automatic annotation table it will be removed).}
40 | 
41 | \item{specDbOnly}{logical if TRUE then only spectra matched to a spectral
42 | database will be considered. These annotations are identified by the flag
43 | "metID.matchSpectralDB" in the comments table. The default FALSE means that
44 | all metabolites in the metID comments table will be considered.}
45 | 
46 | \item{popSize}{numeric number of population members (see the NP argument in \code{\link{DEoptim.control}}). The default is 10 * length(include).}
47 | 
48 | \item{itermax}{numeric the maximum number of iterations (population generation)
49 | allowed. (default = 100). See \code{\link{DEoptim.control}} for further details.}
50 | 
51 | \item{plotInterval}{the number of iterations before plotting the algorithms progress (default=20). Smaller values may slightly slow the process.}
52 | }
53 | \description{
54 | differential evolution build consensus weight optimization
55 | }
56 | \details{
57 | uses the package \code{\link{DEoptim}} to calculate the optimum
58 | weighting of all included consensus scores to accurately rank the known
59 | annotations (taken from the "metID comments" table in \code{\link{compMS2Explorer}}). These global parameters can then be used to rank the unknown annotations of other
60 | unannotated composite spectra based on the optimum weighted mean consensus score.
61 | The annotations above a certain score (minMeanBCscore) can also be automatically
62 | added to the "metID comments" table. The ongoing differential evolution process will appear in a plot window with a loess fit line in red highlighting any reduction
63 | in the mean rank of the training set annotations (from "metID comments" table) as
64 | the genetic process evolves. This metaheuristic global optimization process
65 | can help to maximise the parameters for accurate metabolite annotation and ranking.
66 | }
67 | \examples{
68 | compMS2Example <- metID(compMS2Example, 'optimConsensus')
69 | }
70 | \references{
71 | \enumerate{
72 | \item David Ardia, Katharine M. Mullen, Brian G. Peterson, Joshua Ulrich (2015).
73 | 'DEoptim': Differential Evolution in 'R'. version 2.2-3.
74 | 
75 | \item Katharine Mullen, David Ardia, David Gil, Donald Windover, James Cline
76 | (2011). 'DEoptim': An R Package for Global Optimization by Differential
77 | Evolution. Journal of Statistical Software, 40(6), 1-26. URL
78 | http://www.jstatsoft.org/v40/i06/.
79 | 
80 | \item Ardia, D., Boudt, K., Carl, P., Mullen, K.M., Peterson, B.G. (2010).
81 | Differential Evolution with 'DEoptim': An Application to Non-Convex Portfolio
82 | Optimization. The R Journal, 3(1), 27-34. URL
83 | http://journal.r-project.org/archive/2011-1/2011-1_index.html.
84 | 
85 | \item Ardia, D., Ospina Arango, N., Giraldo Gomez, N. (2010). Jump-Diffusion
86 | Calibration using Differential Evolution. Wilmott Magazine, Issue 55
87 | (September), 76-79. URL http://www.wilmott.com/.
88 | 
89 |  \item Kenneth V. Price, Rainer M. Storn and Jouni A. Lampinen (2006). 
90 |  Differential Evolution - A Practical Approach to Global Optimization. Berlin Heidelberg:
91 |   Springer-Verlag. ISBN 3540209506. 
92 |   }
93 | }
94 | 


--------------------------------------------------------------------------------
/man/metID.predSMILES.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metID.predSMILES.R
 3 | \name{metID.predSMILES}
 4 | \alias{metID.predSMILES}
 5 | \title{Phase II metabolite prediction}
 6 | \usage{
 7 | metID.predSMILES(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a compMS2 object (only possible when probable annotations
11 | i.e. metID.dbProb has been already performed).}
12 | }
13 | \value{
14 | a compMS2 class object containing predicted Phase II metabolites from
15 | most probable database annotations.
16 | }
17 | \description{
18 | calculates possible Phase II metabolite structures from
19 | canonical SMILES codes of most probable metabolite annotations. Currently
20 | the algorithm predicts only certain possible Phase II metabolites and no
21 | Phase I metabolism. The simple cases of acyl-, hydroxyl- and amine- sulfates
22 | and glucuronides and glycine conjugates are predicted based on the presence
23 | of these functional groups within the SMILES code.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/metID.rtPred.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metID.rtPred.R
 3 | \name{metID.rtPred}
 4 | \alias{metID.rtPred}
 5 | \title{Quantitative Structure-Retention Relationship modelling (QSRR) using molecular descriptors and randomForest modelling}
 6 | \source{
 7 | Predicting retention time in hydrophilic interaction liquid chromatography mass spectrometry and its use for peak annotation in metabolomics \emph{et. al.} Metabolomics 2015 \url{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4419193/}
 8 | }
 9 | \usage{
10 | metID.rtPred(object, ...)
11 | }
12 | \arguments{
13 | \item{object}{A "compMS2" class object.}
14 | 
15 | \item{...}{additional arguments to \link{nearZeroVar}.}
16 | 
17 | \item{standardsTable}{data.frame of standard compounds. The standard compounds should have been acquired using the same chromatographic method as the metabolomic dataset. If this argument is supplied then this table will be used to calculate the \link{randomForest} retention time prediction model rather than the possible_identity annotations from the "met ID comments" table. The table which must contain at mimimum the following 3 column names and an error will be returned if this is not the case (will ignore case e.g. both the column names SMILES or smiles are acceptable): 
18 | \enumerate{
19 | \item compound "character" type of compound names.
20 | \item smiles "character" type of SMILES codes.
21 | \item RT "numeric" type of retention time values (in seconds)
22 | }
23 | N.B. The data.frame may also contain additional columns}
24 | 
25 | \item{descriptors}{character vector of molecular descriptor class names from 
26 | \link{get.desc.names}. If NULL then all molecular descriptors will be considered.}
27 | 
28 | \item{removeOut}{logical (default = TRUE). If true outliers identified by
29 | Tukey's method that is a retention time deviation of any of the training
30 | set compounds greater than 1.5 * the interquartile range will be removed and
31 | the QSRR model will be recalculated.}
32 | 
33 | \item{propMissing}{numeric maximum proportion of missing values to include a
34 | molecular descriptor (values 0-1, default=0.1 i.e. maximum 10\% missing values).}
35 | 
36 | \item{propZero}{numeric maximum proportion of zero values to include a molecular
37 | descriptor (values 0-1, default=0.2 i.e. maximum 20\% zero values).}
38 | 
39 | \item{corrPairWise}{numeric minimum pair-wise Pearson Product moment 
40 | correlation value (values 0-1, default = 0.9),  if any molecular descriptors 
41 | have high pair-wise correlation then the variables with the largest mean 
42 | absolute correlation of each group are removed.}
43 | 
44 | \item{verbose}{logical if TRUE display progress bars.}
45 | }
46 | \description{
47 | Quantitative Structure-Retention Relationship modelling (QSRR) using molecular descriptors and randomForest modelling
48 | }
49 | \details{
50 | Based on the method described in Cao \emph{et. al.} \url{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4419193/} and use the caret package (see tutorial: \url{http://topepo.github.io/caret/rfe.html} for the recursive feature selection. randomForest method utilized).
51 | calculates a quantitative structure-retention relationship model
52 | the default is to use the putative annotations included in the "metID comments" table of \code{\link{compMS2Explorer}} the putative annotations in the possible_identity column of the metID comments interactive table must match perfectly the database entry names found in the "best annotations" table (e.g. ensure correct matching by copy and pasting the possible compound identity in to the possible_identity column of the "metID comments" table). The metID.rtPred functions calculates molecular descriptors for all database entries in the "Best annotations" panel using the rcdk package. 
53 | 
54 | The molecular descriptors are then cleaned in the following sequence:
55 | \enumerate{
56 |  \item removing any molecular descriptors with greater than 10\% missing values.
57 |  \item removing any molecular descriptors with near zero variance using the function \code{\link{nearZeroVar}} from the caret package.
58 |  \item a correlation matrix of remaining molecular descriptors is calculated and
59 | molecular descriptors with a standard deviation are removed.
60 | \item finally any molecular descriptors with a high pair-wise correlation (>= 0.9 pearson product moment) are identified and the molecular descriptors with the largest mean absolute correlation of each group are removed. see function \link{findCorrelation} from the caret package.
61 | }
62 | 
63 | The calculation of molecular descriptors for a large number of database entries is a potentially time-consuming process and is therefore only needs to be conducted once and the results of the process saved in the \linkS4class{compMS2} object.
64 | 
65 | The caret package function \link{rfe} function is then used to identify the optimum set of remaining molecular descriptors to predict retention time. A plot should appear showing the correlation between the actual and predicted retention times of the training set.  
66 | 
67 | A possible workflow sequence would consist of initial examination of the results in \code{\link{compMS2Explorer}} with putative annotation of metabolites followed by use of the \code{\link{metID.rtPred}} function. After the first time the \code{\link{metID.rtPred}} function has run a new plot will appear in the \code{\link{compMS2Explorer}} gui where the "Best Annotations" closest
68 | to the randomForest model predicted retention times can be easily visualized.
69 | After more identifications have been made and additional putative annotations 
70 | have been included in the "metID comments" table the \code{\link{metID.rtPred}}
71 | function can be ran a second time. It should be much faster than the first 
72 | as molecular descriptors have already been calculated and cleaned for all entries.
73 | }
74 | \examples{
75 | compMS2Example <- metID(compMS2Example, 'rtPred')
76 | }
77 | \seealso{
78 | \link{nearZeroVar}, \link{rfe}, \link{randomForest}.
79 | }
80 | 


--------------------------------------------------------------------------------
/man/metID.specSimNetwork.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metID.specSimNetwork.R
 3 | \name{metID.specSimNetwork}
 4 | \alias{metID.specSimNetwork}
 5 | \title{CompMS2 spectral similarity network generation}
 6 | \usage{
 7 | metID.specSimNetwork(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{A "compMS2" class object.}
11 | 
12 | \item{minDotProdThresh}{minimum dot product spectral similarity score. 
13 | If no value is supplied the default is to estimate
14 | an optimal cut-off value based on network attributes from a series of spectral similarity cut-off values. This is based upon the relationship between the 
15 | number of nodes, edges, number of clusters and the network density (i.e. n actual edges/n potential edges) at each spectral similarity cut-off value. A plot showing
16 | the result of this estimation will be generated.}
17 | 
18 | \item{binSizeMS2}{numeric MS2 bin size for spectral similarity matching (default = 0.1)}
19 | 
20 | \item{minClustSize}{numeric minimum number of connected nodes that is cluster size (default =3). If a cluster
21 | of nodes is less than this number then it will be removed.}
22 | }
23 | \value{
24 | "compMS2" class object with an additional network graph of any peakTable features above the correlation threshold.
25 | }
26 | \description{
27 | generates a dot product spectral similarity network from the MS/MS fragmentation data. The resulting spectral similarity network can then be viewed in compMS2Explorer. Utilizes the \code{\link{graph}} function of the \code{\link{igraph}} package.
28 | }
29 | \references{
30 | Koh Aoki, Yoshiyuki Ogata, and Daisuke Shibata
31 | Approaches for Extracting Practical Information from Gene Co-expression Networks in Plant Biology
32 | Plant Cell Physiol (2007) 48 (3): 381-390 first published online January 23, 2007 doi:10.1093/pcp/pcm013
33 | }
34 | 


--------------------------------------------------------------------------------
/man/mfSearchPUG.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mfSearchPUG.R
 3 | \name{mfSearchPUG}
 4 | \alias{mfSearchPUG}
 5 | \title{Search pubmed compound for molecular formula using the pubchem power user gateway (PUG)}
 6 | \usage{
 7 | mfSearchPUG(mf = "C10H21N")
 8 | }
 9 | \arguments{
10 | \item{mf}{molecular formula character vector of length one e.g. 'C10H21N'.}
11 | }
12 | \value{
13 | returns a character vector of pubmed compound cids matching the molecular
14 | formula
15 | }
16 | \description{
17 | Search pubmed compound for molecular formula using the pubchem power user gateway (PUG)
18 | }
19 | 


--------------------------------------------------------------------------------
/man/monoMassMatch.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/monoMassMatch.R
 3 | \name{monoMassMatch}
 4 | \alias{monoMassMatch}
 5 | \title{Monoisotopic mass matching unknown to data base entry}
 6 | \usage{
 7 | monoMassMatch(unknowns = NULL, metMasses.df = NULL, esiAdducts = NULL,
 8 |   subStrMasses = NULL, mode = "pos", ppm = 10, nCores = NULL)
 9 | }
10 | \description{
11 | Monoisotopic mass matching unknown to data base entry
12 | }
13 | 


--------------------------------------------------------------------------------
/man/optimCutOff.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/optimCutOff.R
 3 | \name{optimCutOff}
 4 | \alias{optimCutOff}
 5 | \title{optimum cutoff for a correlation or spectral similarity matrix}
 6 | \usage{
 7 | optimCutOff(x = NULL, cutOffSeq = seq(0.01, 1, 0.01), diffConsecVals = 1.5
 8 |   * 10^-3, maxCutOff = 0.95)
 9 | }
10 | \arguments{
11 | \item{x}{matrix of correlation coefficients or spectral similarity values
12 | column and row names must match}
13 | 
14 | \item{cutOffSeq}{numeric a vector of cut-off values to test. (default=seq(0.01, 1, 0.01) a numeric vector of length 100)}
15 | 
16 | \item{diffConsecVals}{numeric the scaled difference between consecutive values
17 | to identify the plateau in the network density (default=1.5*10^-3 or 0.0015).}
18 | }
19 | \value{
20 | a list containing two named elements "estCutOff" a numeric estimated optimal cut-off value and "testData" a matrix
21 | of the test results at each cut-off value. The function also plots the result.
22 | }
23 | \description{
24 | optimum cutoff for a correlation or spectral similarity matrix
25 | }
26 | \references{
27 | Koh Aoki, Yoshiyuki Ogata, and Daisuke Shibata
28 | Approaches for Extracting Practical Information from Gene Co-expression Networks in Plant Biology
29 | Plant Cell Physiol (2007) 48 (3): 381-390 first published online January 23, 2007 doi:10.1093/pcp/pcm013
30 | }
31 | 


--------------------------------------------------------------------------------
/man/pubMedSearch.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pubMedSearch.R
 3 | \name{pubMedSearch}
 4 | \alias{pubMedSearch}
 5 | \title{return cleaned abstracts from pubmed from searched key words}
 6 | \usage{
 7 | pubMedSearch(keys = NULL, n = 500, maxChar = 50, ...)
 8 | }
 9 | \arguments{
10 | \item{keys}{character vector of compound names to search pubmed with}
11 | 
12 | \item{n}{numeric maximum number of results to return. The maximum and default is 500.}
13 | 
14 | \item{maxChar}{numeric maximum number of characters in cleaned abstract words to return.}
15 | 
16 | \item{...}{further arguments to the \code{\link{cleanAbstracts}} function.}
17 | }
18 | \value{
19 | a list containing 3 named elements:
20 | 1. titles character vector of Abstract title(s)
21 | 2. abs character vector of abstract text(s).
22 | 3. clAbs clean abstract word frequency data.frame with column names 'word' and 'freq'.
23 | }
24 | \description{
25 | return cleaned abstracts from pubmed from searched key words
26 | }
27 | \seealso{
28 | PubMedWordcloud, \code{\link{getAbstracts}}, \code{\link{cleanAbstracts}}.
29 | }
30 | 


--------------------------------------------------------------------------------
/man/publishApp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/publishApp.R
 3 | \name{publishApp}
 4 | \alias{publishApp}
 5 | \title{Publish your compMS2Explorer app on shinyapps.io}
 6 | \usage{
 7 | publishApp(object, appName = NULL, writeDir = NULL, addFiles = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a compMS2 class object}
11 | 
12 | \item{appName}{character name for your new app (e.g. 'compMS2Example').}
13 | 
14 | \item{writeDir}{character full path to a directory to save the compMS2 results
15 | and the current shiny app zip file to. This zip file can then be shared with others.if this argument is not supplied the results will be deployed on shinyapps.io.}
16 | 
17 | \item{addFiles}{character vector of full paths to files which will be included
18 | in the zip file or bundle to shinyapps.io. For example code used to generate 
19 | compMS2 results. The default is to include at minimum a text file containing 
20 | the output of \link{sessionInfo} this is intended to maintain reproducibility
21 | of published results.}
22 | 
23 | \item{...}{further arguments to the \link{deployApp} function}
24 | }
25 | \description{
26 | you must hold a shinyapps.io account and have a token.
27 | use this command from the package rsconnect to set up your token
28 | \code{setAccountInfo(name='your_shinyapps.io_user_name', token='your_token', secret='your_secret')}
29 | Following this the app can be deployed on your account
30 | }
31 | 


--------------------------------------------------------------------------------
/man/runGitHubApp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/runGitHubApp.R
 3 | \name{runGitHubApp}
 4 | \alias{runGitHubApp}
 5 | \title{run github shiny modified from shiny and devtools}
 6 | \usage{
 7 | runGitHubApp(repo = NULL, subdir = NULL, dirPath = NULL,
 8 |   auth_token = NULL, browserLaunch = TRUE)
 9 | }
10 | \arguments{
11 | \item{repo}{character github username and repository name. in the form "username/repositoryName"}
12 | 
13 | \item{subdir}{character sub-directory of the repo containing the shiny and data.}
14 | 
15 | \item{dirPath}{character full-path to a directory in which to save the contents of the zip file. If unsupplied shiny app will be opened from a temporary directory.}
16 | 
17 | \item{auth_token}{character private repo authorization token.}
18 | 
19 | \item{browserLaunch}{logical launch app in web browser (default = TRUE).}
20 | }
21 | \description{
22 | run github shiny modified from shiny and devtools
23 | }
24 | 


--------------------------------------------------------------------------------
/man/signalGrouping.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/signalGrouping.R
 3 | \name{signalGrouping}
 4 | \alias{signalGrouping}
 5 | \title{Signal grouping}
 6 | \usage{
 7 | signalGrouping(spectrum.df = NULL, mzError = 0.001, minPeaks = 5)
 8 | }
 9 | \arguments{
10 | \item{spectrum.df}{a dataframe or matrix with two or more columns:
11 | 1. Mass/ Mass-to-charge ratio
12 | 2. Intensity}
13 | 
14 | \item{mzError}{interpeak absolute m/z error for signal grouping 
15 | (Default = 0.001)}
16 | }
17 | \value{
18 | dataframe of m/z grouped signals, the m/z values of the input 
19 | dataframe/ matrix peak groups are averaged and the signal intensities summed.
20 | }
21 | \description{
22 | Euclidean distances between m/z signals are hierarchically clustering using 
23 | the average method and the composite spectrum groups determined by a absolute
24 | error cutoff
25 | }
26 | 


--------------------------------------------------------------------------------
/man/smiles2Form.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/smiles2Form.R
 3 | \name{smiles2Form}
 4 | \alias{smiles2Form}
 5 | \title{Convert SMILES code to atomic formula}
 6 | \usage{
 7 | smiles2Form(SMILES = NULL)
 8 | }
 9 | \arguments{
10 | \item{SMILES}{character vector of SMILES codes to convert}
11 | }
12 | \value{
13 | a character vector the formula(e).
14 | }
15 | \description{
16 | Convert SMILES code to atomic formula
17 | }
18 | 


--------------------------------------------------------------------------------
/man/smiles2MonoMassForm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/smiles2MonoMassForm.R
 3 | \name{smiles2MonoMassForm}
 4 | \alias{smiles2MonoMassForm}
 5 | \title{Convert SMILES code to monoisotopic mass and formula}
 6 | \usage{
 7 | smiles2MonoMassForm(SMILES = NULL)
 8 | }
 9 | \arguments{
10 | \item{SMILES}{character vector of SMILES codes to convert}
11 | }
12 | \value{
13 | a named numeric vector of same length as the SMILES input containing the
14 | monoisotopic mass(es) and named using the formula(e).
15 | }
16 | \description{
17 | Convert SMILES code to monoisotopic mass and formula
18 | }
19 | 


--------------------------------------------------------------------------------
/man/subFormulae.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/subFormulae.R
 3 | \name{subFormulae}
 4 | \alias{subFormulae}
 5 | \title{subtract atomic formula y from atomic formula x}
 6 | \usage{
 7 | subFormulae(x = NULL, y = NULL)
 8 | }
 9 | \arguments{
10 | \item{x}{character vector of atomic formulae (must be same length as y).}
11 | 
12 | \item{y}{character vector of atomic formulae (must be same length as x).}
13 | }
14 | \description{
15 | subtract atomic formula y from atomic formula x
16 | }
17 | 


--------------------------------------------------------------------------------
/man/subStructure.Annotate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/subStructure.Annotate.R
 3 | \name{subStructure.Annotate}
 4 | \alias{subStructure.Annotate}
 5 | \title{composite spectra substructure annotation}
 6 | \usage{
 7 | subStructure.Annotate(object, ...)
 8 | }
 9 | \arguments{
10 | \item{Frag_mzabs}{Absolute mass accuracy difference to identify neutral losses 
11 | and fragments in composite spectra (default = 0.01).}
12 | 
13 | \item{SubStrs}{substructure data frame (default = Substructure_masses)
14 | see ?Substructure_masses for details of the mandatory table fields/ format}
15 | 
16 | \item{minRelInt}{minimum relative intensity to consider a spectral signal
17 | for substructure annotation (default = 5 i.e. 5\% rel. int.).}
18 | }
19 | \description{
20 | composite spectra substructure annotation
21 | }
22 | 


--------------------------------------------------------------------------------
/man/subStructure.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/subStructure.R
 3 | \name{subStructure}
 4 | \alias{subStructure}
 5 | \title{Identify substructures within composite spectra}
 6 | \usage{
 7 | subStructure(object, ...)
 8 | }
 9 | \arguments{
10 | \item{...}{option arguments to be passed along.}
11 | 
12 | \item{object.}{a compMS2 class object obtained from the function CompMSset}
13 | 
14 | \item{method.}{"Annotate" annotation of possible substructure neutral losses/
15 | fragments in composite spectra, "prob" identify most probable substructure
16 | identification for a composite spectra and "probSummary" summary of probable
17 | substructure annotations for each composite spectrum.}
18 | }
19 | \value{
20 | A compMS2 object with substructure annotated composite spectra.
21 | }
22 | \description{
23 | Identify substructures within composite spectra
24 | }
25 | \seealso{
26 | \link{subStructure.Annotate}, \link{subStructure.prob}, \link{subStructure.probSummary}
27 | }
28 | 


--------------------------------------------------------------------------------
/man/subStructure.prob.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/subStructure.prob.R
 3 | \name{subStructure.prob}
 4 | \alias{subStructure.prob}
 5 | \title{Identifies probable substructure type}
 6 | \usage{
 7 | subStructure.prob(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{compMS2 object}
11 | 
12 | \item{minSumRelInt}{numeric (default = 70) miminum summed relative intensity to consider a probable
13 | substructure type identification. If above this minimum summed relative intensity
14 | then the most probable substructure type will be added to the compound_class 
15 | column of the Comments table in the compMS2 object with a note stating it was
16 | identified using this function e.g. sulfate (subStructure.prob). This provides
17 | a means to automatically annotate the Comments table in a first-pass 
18 | metabolite identification workflow.}
19 | }
20 | \value{
21 | a data.frame of probable substructure annotations for each composite
22 | spectrum, ranked by the sum of the relative intensities for that substructure
23 | type
24 | }
25 | \description{
26 | Identifies probable substructure type based on the summed
27 | relative intensites and therefore proportion of total composite spectrum 
28 | intensity explained.
29 | }
30 | 


--------------------------------------------------------------------------------
/man/subStructure.probSummary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/subStructure.probSummary.R
 3 | \name{subStructure.probSummary}
 4 | \alias{subStructure.probSummary}
 5 | \title{summarizes most probable substructure type within all composite spectra}
 6 | \usage{
 7 | subStructure.probSummary(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a compMS2 class object}
11 | 
12 | \item{n}{number of top substructure types to print.}
13 | 
14 | \item{minSumRelInt}{numeric (default = 30)miminum summed relative intensity to consider a probable
15 | substructure type identification.}
16 | }
17 | \value{
18 | a named numeric vector of frequency of most probable substructure types
19 | identified. The most highly ranked probable substructure type for each
20 | composite spectra is based on the largest summed relative intensity explained
21 | by the characteristic substructure neutral losses and fragments.
22 | }
23 | \description{
24 | summarizes most probable substructure type within all composite spectra
25 | }
26 | 


--------------------------------------------------------------------------------
/man/subsetCompMS2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/subsetCompMS2.R
 3 | \name{subsetCompMS2}
 4 | \alias{subsetCompMS2}
 5 | \title{subset compMS2 class object using a vector of spectra names}
 6 | \usage{
 7 | subsetCompMS2(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a "compMS2" class object.}
11 | 
12 | \item{specNames}{character vector of composite spectrum names.}
13 | 
14 | \item{corrNetworkNodes}{logical should all the members of correlation network
15 | cluster be returned? 
16 | (default = FALSE).}
17 | 
18 | \item{rtGroups}{logical should all the nodes within the same retention time
19 | group also be returned? (default = FALSE).}
20 | 
21 | \item{onlyAdductId}{logical should only ESI adduct/in-source fragments which
22 | have been identified and are connected to the specNames argument be returned?
23 | (default = FALSE). If a features of the specNames argument has no adducts
24 | identified then it will be returned in isolation.}
25 | 
26 | \item{nodeOrder}{neighbourhood order (see \link{ego} function from igraph package)
27 | for details. If argument "onlyAdductID" is TRUE then only nodes less
28 | than order away from each of the specNames argument will be returned. This
29 | occurs after onlyAdductId subset features have been identified. This is performed
30 | to reduce complexity of very dense network clusters (default = 2).}
31 | }
32 | \value{
33 | a "compMS2" class object with the composite spectra and all metID 
34 | information removed.
35 | }
36 | \description{
37 | subset compMS2 class object using a vector of spectra names
38 | }
39 | 


--------------------------------------------------------------------------------
/man/trueFalseSum.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/trueFalseSum.R
 3 | \name{trueFalseSum}
 4 | \alias{trueFalseSum}
 5 | \title{summary of true positive and false negative assignments for each score metric}
 6 | \usage{
 7 | trueFalseSum(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a "compMS2" class object}
11 | 
12 | \item{n}{integer the number of top annotations to consider (default = 5).
13 | i.e. if the correct structure is found in the top 5 structures then it is
14 | considered a true positive.}
15 | 
16 | \item{minSimScore}{numeric chemical similarity score (values between 0 and 1).
17 | This score is used to identify the compound contained in the metID comments
18 | table (default = 1). The is performed rather than by name as there can
19 | be multiple hits of the same structure in the top annotations. To check
20 | if the metric is in roughly the right ballpark highly similar structures to 
21 | the correct annotations can be considered by setting the argument to a lower 
22 | Tanimoto chemical similarity score (e.g. 0.9).}
23 | 
24 | \item{specDbOnly}{logical if TRUE (default) then only the spectral database
25 | annotations are used to calculate the true positive and false negative rates. If FALSE
26 | then all possible_identities in the metID comments table are used.}
27 | 
28 | \item{verbose}{logical if TRUE display progress bars.}
29 | }
30 | \value{
31 | a list summarizing the true positive and false negative outputs, a 
32 | summary plot of the results and also weighted average rank for each score metric.
33 | }
34 | \description{
35 | summary of true positive and false negative assignments for each score metric
36 | }
37 | \details{
38 | The weighted average takes in to account the number of possible candidates
39 | by weighting the mean correct rank by the number of possible candidates. e.g. In this
40 | way a rank of 1 out of 10 possible structures will be ranked lower than a rank of 3 out
41 | of 1,000 possible structures. The latter is a more important/impressive reflection of 
42 | ranking ability of than the former. If a more extensive chemical database is
43 | utilized such as PubMed compound for example then a far larger number of
44 | candidates may be considered than a much smaller curated database such as HMDB.
45 | }
46 | 


--------------------------------------------------------------------------------
/vignettes/compMS2Miner_Workflow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WMBEdmands/compMS2Miner/ee20d3d632b11729d6bbb5b5b93cd468b097251d/vignettes/compMS2Miner_Workflow.pdf


--------------------------------------------------------------------------------