├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS.md ├── R ├── C_function.R ├── Global_setting.R ├── MSn_processing.R ├── OptiLCMS_Class.R ├── Optimize_params.R ├── Other_Utils.R ├── Params_db.R ├── Perform_functions.R ├── ROI_Extraction.R ├── RcppExports.R ├── Resume_functions.R ├── Spectra_Utils.R ├── Spectra_import.R ├── Spectra_visulization.R └── autoconfig.R ├── README.md ├── data └── mSet.rda ├── inst ├── CITATION ├── hmdb │ └── hmdb_all.rds ├── lists │ ├── ions.csv │ ├── neutraladdition.csv │ └── neutralloss.csv ├── rules │ ├── extended_adducts_neg.csv │ ├── extended_adducts_pos.csv │ ├── primary_adducts_neg.csv │ └── primary_adducts_pos.csv └── script │ └── Example_Running_Script.R ├── man ├── CentroidCheck.Rd ├── CentroidMSData.Rd ├── ExecutePlan.Rd ├── Export.Annotation.Rd ├── Export.PeakSummary.Rd ├── Export.PeakTable.Rd ├── FormatMSnAnnotation.Rd ├── FormatPeakList.Rd ├── GaussModel.Rd ├── ImportRawMSData.Rd ├── InitDataObjects.Rd ├── InitializaPlan.Rd ├── OptiLCMS.Rd ├── PerformDBSearchingBatch.Rd ├── PerformDDADeconvolution.Rd ├── PerformDIADeconvolution.Rd ├── PerformDataInspect.Rd ├── PerformDataTrimming.Rd ├── PerformMSnImport.Rd ├── PerformMirrorPlotting.Rd ├── PerformParamsOptimization.Rd ├── PerformPeakAlignment.Rd ├── PerformPeakAnnotation.Rd ├── PerformPeakFiling.Rd ├── PerformPeakPicking.Rd ├── PerformPeakProfiling.Rd ├── PerformROIExtraction.Rd ├── PerformResultsExport.Rd ├── PerformSpectrumConsenus.Rd ├── PlotSpectraBPIadj.Rd ├── PlotSpectraInsensityStistics.Rd ├── PlotSpectraPCA.Rd ├── PlotSpectraRTadj.Rd ├── PlotXIC.Rd ├── SetAnnotationParam.Rd ├── SetGlobalParallel.Rd ├── SetPeakParam.Rd ├── SetPlotParam.Rd ├── UpdateRawfiles.Rd ├── calculate_entropy_similarity.Rd ├── calculate_unweighted_entropy_similarity.Rd ├── mSet.Rd ├── plotBPIs.Rd ├── plotMSfeature.Rd ├── plotSingleTIC.Rd ├── plotTICs.Rd ├── running.plan.Rd └── updateRawSpectraPath.Rd ├── src ├── CleanSpectrum.c ├── CleanSpectrum.h ├── DecoSpectra.cpp ├── DecoSpectra.h ├── DetectPeaks.cpp ├── DetectPeaks.h ├── Makevars ├── Makevars.win ├── PerformDDAProcess.cpp ├── PerformDDAProcess.h ├── PerformDIAProcess.cpp ├── PerformDIAProcess.h ├── RcppExports.cpp ├── SWATH_DIA_Deconv.cpp ├── SpectralEntropy.c ├── SpectralEntropy.h ├── binarySearch.h ├── dda_utilities.cpp ├── dda_utilities.h ├── decorana.f ├── entropy.cpp ├── entropy.h ├── export_interfece.cpp ├── fastMatch.c ├── fastMatch.h ├── fastcluster.h ├── hclust_ultrafast.cpp ├── hclust_ultrafast.h ├── init.c ├── linear_regression.cpp ├── linear_regression.h ├── lowess.cpp ├── lowess.h ├── massifquant │ ├── DataKeeper.cpp │ ├── DataKeeper.h │ ├── OpOverload.cpp │ ├── OpOverload.h │ ├── SegProc.cpp │ ├── SegProc.h │ ├── TrMgr.cpp │ ├── TrMgr.h │ ├── Tracker.cpp │ ├── Tracker.h │ ├── dpq.h │ ├── nmath.h │ └── xcms_massifquant.cpp ├── mzROI.c ├── obiwarp │ ├── lmat.h │ ├── mat.cpp │ ├── mat.h │ ├── vec.cpp │ ├── vec.h │ ├── xcms_dynprog.cpp │ ├── xcms_dynprog.h │ └── xcms_lmat.cpp ├── optim_src.h ├── optim_ultra.cpp ├── optim_ultra.h ├── pense │ ├── alias.hpp │ ├── autoconfig.hpp │ ├── cd_pense.hpp │ ├── constants.hpp │ ├── container_utility.hpp │ ├── enpy_initest.cc │ ├── enpy_initest.hpp │ ├── enpy_psc.cc │ ├── enpy_psc.hpp │ ├── enpy_types.hpp │ ├── m_loss.hpp │ ├── nsoptim.hpp │ ├── nsoptim │ │ ├── armadillo.hpp │ │ ├── armadillo_forward.hpp │ │ ├── config.hpp │ │ ├── container.hpp │ │ ├── container │ │ │ ├── data.hpp │ │ │ ├── forward.hpp │ │ │ ├── metrics.hpp │ │ │ └── regression_coefficients.hpp │ │ ├── objective.hpp │ │ ├── objective │ │ │ ├── adaptive_en_penalty.hpp │ │ │ ├── convex.hpp │ │ │ ├── en_penalty.hpp │ │ │ ├── forward.hpp │ │ │ ├── loss.hpp │ │ │ ├── ls_regression_loss.hpp │ │ │ └── penalty.hpp │ │ ├── optimizer.hpp │ │ ├── optimizer │ │ │ ├── admm.hpp │ │ │ ├── auglars.hpp │ │ │ ├── coordinate_descent.hpp │ │ │ ├── dal.hpp │ │ │ ├── dal_helper.hpp │ │ │ ├── linear_algebra_utilities.hpp │ │ │ ├── mm.hpp │ │ │ ├── optimizer_base.hpp │ │ │ ├── optimum.hpp │ │ │ └── soft_threshold.hpp │ │ ├── rcpp_integration.hpp │ │ ├── traits │ │ │ ├── can_evaluate.hpp │ │ │ ├── can_optimize.hpp │ │ │ ├── has_convex_surrogate.hpp │ │ │ ├── has_difference_op.hpp │ │ │ ├── is_adaptive.hpp │ │ │ ├── is_differentiable.hpp │ │ │ ├── is_en_penalty.hpp │ │ │ ├── is_iterative_algorithm.hpp │ │ │ ├── is_loss_function.hpp │ │ │ ├── is_ls_regression_loss.hpp │ │ │ ├── is_penalty_function.hpp │ │ │ ├── is_weighted.hpp │ │ │ ├── sfinae_types.hpp │ │ │ └── traits.hpp │ │ └── utilities.hpp │ ├── nsoptim_forward.hpp │ ├── omp_utils.hpp │ ├── r_en_regression.cc │ ├── r_en_regression.hpp │ ├── r_enpy.cc │ ├── r_enpy.hpp │ ├── r_interface.cc │ ├── r_interface_utils.cc │ ├── r_interface_utils.hpp │ ├── r_mesten_regression.cc │ ├── r_mesten_regression.hpp │ ├── r_pense_regression.cc │ ├── r_pense_regression.hpp │ ├── r_robust_utils.cc │ ├── r_robust_utils.hpp │ ├── r_utilities.cc │ ├── r_utilities.hpp │ ├── rcpp_integration.hpp │ ├── rcpp_parse_config.cc │ ├── rcpp_parse_config.hpp │ ├── rcpp_utils.hpp │ ├── rcpp_utils_forward.hpp │ ├── regularization_path.hpp │ ├── regularization_path_new.hpp │ ├── rho.cc │ ├── rho.hpp │ ├── robust_scale_location.cc │ ├── robust_scale_location.hpp │ └── s_loss.hpp ├── rules.h ├── spectra_searching.cpp ├── spectrumConsensus.cpp ├── sqlite │ ├── shell.c │ ├── sqlite3.c │ ├── sqlite3.h │ └── sqlite3ext.h ├── sqlite_utilities.cpp ├── sqlite_utilities.h ├── util.cpp ├── utilities.cpp ├── utilities.h ├── xcms_binners.c ├── xcms_binners.h └── xcms_obiwarp.cpp └── vignettes └── OptiLCMS_MS1.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | .git 5 | README.md 6 | ^\.github$ 7 | .DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | \#*\# 3 | *.Rproj 4 | OptiLCMS.Rproj 5 | .Rproj.user 6 | .Rhistory 7 | .RData 8 | .Ruserdata 9 | .Rbuildignore 10 | *.o 11 | src/*.so 12 | src/*.o 13 | NEWS.md 14 | *.dll 15 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: OptiLCMS 2 | Title: Optimized LC-MS Spectra Processing 3 | Version: 1.2.0 4 | Authors@R: c( 5 | person(given = "Zhiqiang", 6 | family = "Pang", 7 | role = c("aut", "cre"), 8 | email = "zhiqiang.pang@mail.mcgill.ca", 9 | comment = c(ORCID = "0000-0003-1654-7556")), 10 | person(given = "Jianguo", 11 | family = "Xia", 12 | role = c("aut"), 13 | email = "jeff.xia@mcgill.ca", 14 | comment = c(ORCID = "0000-0003-2040-2624")) 15 | ) 16 | Description: A pipeline for users to do the LC-MS (Liquid chromatography–mass spectrometry) raw data processing with automatically optimized parameters. An automated 17 | pipeline is implemented inside this page for users to avoid manaually optimized the parameters. More functions on MS/MS data processing is on the way. 18 | License: MIT + file LICENSE 19 | Depends: R (>= 4.0.0), methods, Biobase, BiocParallel (>= 1.8.0), stats, utils, MSnbase 20 | Imports: mzR (>= 2.22.0), lattice, RColorBrewer, plyr, RJSONIO, parallel, tools, grDevices, grid, graphics, 21 | RBGL, S4Vectors, ggplot2, graph, progress, rsm, Cairo, Hmisc, entropy, cluster, data.table, 22 | scales, ggrepel, Rcpp (>= 1.0.9) 23 | Suggests: knitr (>= 1.1.0), MALDIquant, MassSpecWavelet, mtbls2, rmarkdown, BiocStyle 24 | Enhances: XML 25 | Maintainer: Zhiqiang Pang 26 | VignetteBuilder: knitr 27 | biocViews: Software, MassSpectrometry, Metabolomics, DataImport 28 | BugReports: https://github.com/xia-lab/OptiLCMS/issues/new 29 | URL: https://github.com/xia-lab/OptiLCMS 30 | Encoding: UTF-8 31 | Roxygen: list(markdown = TRUE) 32 | RoxygenNote: 7.2.3 33 | LinkingTo: Rcpp, RcppArmadillo 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2020 2 | COPYRIGHT HOLDER: Jeff Xia 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(CentroidCheck) 4 | export(CentroidMSData) 5 | export(ExecutePlan) 6 | export(Export.Annotation) 7 | export(Export.PeakSummary) 8 | export(Export.PeakTable) 9 | export(FormatMSnAnnotation) 10 | export(FormatPeakList) 11 | export(GaussModel) 12 | export(ImportRawMSData) 13 | export(InitDataObjects) 14 | export(InitializaPlan) 15 | export(PerformBlankSubstraction) 16 | export(PerformDBSearchingBatch) 17 | export(PerformDDADeconvolution) 18 | export(PerformDIADeconvolution) 19 | export(PerformDataInspect) 20 | export(PerformDataTrimming) 21 | export(PerformMSnImport) 22 | export(PerformMirrorPlotting) 23 | export(PerformParamsOptimization) 24 | export(PerformPeakAlignment) 25 | export(PerformPeakAnnotation) 26 | export(PerformPeakFiling) 27 | export(PerformPeakPicking) 28 | export(PerformPeakProfiling) 29 | export(PerformROIExtraction) 30 | export(PerformResultsExport) 31 | export(PerformSpectrumConsenus) 32 | export(PlotSpectraBPIadj) 33 | export(PlotSpectraInsensityStistics) 34 | export(PlotSpectraPCA) 35 | export(PlotSpectraRTadj) 36 | export(PlotXIC) 37 | export(SetAnnotationParam) 38 | export(SetGlobalParallel) 39 | export(SetPeakParam) 40 | export(SetPlotParam) 41 | export(UpdateRawfiles) 42 | export(plotBPIs) 43 | export(plotMSfeature) 44 | export(plotSingleTIC) 45 | export(plotTICs) 46 | export(running.plan) 47 | export(updateRawSpectraPath) 48 | import(Biobase) 49 | import(BiocParallel) 50 | import(MSnbase) 51 | import(RColorBrewer) 52 | import(ggplot2) 53 | import(methods) 54 | import(parallel) 55 | import(progress) 56 | import(scales) 57 | import(tools) 58 | import(utils) 59 | importFrom(Cairo,Cairo) 60 | importFrom(Cairo,CairoFonts) 61 | importFrom(Hmisc,rcorr) 62 | importFrom(MSnbase,filterMz) 63 | importFrom(RBGL,highlyConnSG) 64 | importFrom(RJSONIO,toJSON) 65 | importFrom(cluster,clusGap) 66 | importFrom(data.table,fwrite) 67 | importFrom(entropy,KL.empirical) 68 | importFrom(ggrepel,geom_text_repel) 69 | importFrom(grDevices,boxplot.stats) 70 | importFrom(grDevices,col2rgb) 71 | importFrom(grDevices,colorRampPalette) 72 | importFrom(grDevices,dev.off) 73 | importFrom(graph,ftM2graphNEL) 74 | importFrom(graphics,boxplot) 75 | importFrom(graphics,grid) 76 | importFrom(graphics,legend) 77 | importFrom(graphics,par) 78 | importFrom(graphics,points) 79 | importFrom(grid,viewport) 80 | importFrom(lattice,cloud) 81 | importFrom(parallel,clusterExport) 82 | importFrom(rsm,ccd) 83 | importFrom(rsm,decode.data) 84 | importFrom(rsm,rsm) 85 | importFrom(stats,approx) 86 | importFrom(stats,approxfun) 87 | importFrom(stats,as.formula) 88 | importFrom(stats,convolve) 89 | importFrom(stats,cor) 90 | importFrom(stats,cor.test) 91 | importFrom(stats,cutree) 92 | importFrom(stats,density) 93 | importFrom(stats,deriv3) 94 | importFrom(stats,dist) 95 | importFrom(stats,dnorm) 96 | importFrom(stats,fft) 97 | importFrom(stats,fitted) 98 | importFrom(stats,getInitial) 99 | importFrom(stats,hclust) 100 | importFrom(stats,kmeans) 101 | importFrom(stats,lm) 102 | importFrom(stats,loess) 103 | importFrom(stats,lsfit) 104 | importFrom(stats,median) 105 | importFrom(stats,model.weights) 106 | importFrom(stats,na.omit) 107 | importFrom(stats,nextn) 108 | importFrom(stats,nls) 109 | importFrom(stats,prcomp) 110 | importFrom(stats,predict) 111 | importFrom(stats,quantile) 112 | importFrom(stats,sd) 113 | importFrom(stats,setNames) 114 | importFrom(stats,smooth.spline) 115 | importFrom(stats,stepfun) 116 | importFrom(stats,var) 117 | importFrom(stats,weighted.mean) 118 | importFrom(tools,file_ext) 119 | importFrom(tools,file_path_as_absolute) 120 | importFrom(utils,head) 121 | importFrom(utils,tail) 122 | useDynLib(OptiLCMS, .registration=TRUE) 123 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | Changes in version 0.99.0 (2021-07-15) 2 | + Initialized and Submitted to Bioconductor 3 | -------------------------------------------------------------------------------- /R/autoconfig.R: -------------------------------------------------------------------------------- 1 | ## Utility functions interfacing with configuration determined by autoconfig 2 | 3 | .k_multithreading_support <- TRUE 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OptiLCMS 2 | 3 | ### Introduction 4 | 5 | **OptiLCMS** is an R package providing an optimized LC-MS raw data processing workflow. The source code of the "MS Spectral Processing" Module in [MetaboAnalyst](https://dev.metaboanalyst.ca/MetaboAnalyst/upload/SpectraUpload.xhtml) is contained here. OptiLCMS can be used to repeat the results generated by MetaboAnalyst at local, and is also optionally dependent by MetaboAnalystR to perform raw MS data processing. 6 | 7 | 8 | ### Installation 9 | 10 | There are 3 typical approaches provided for users to install this package as below. 11 | 12 | #### (a.) Install devlopement (beta) version 13 | 14 | ```R 15 | # Latest features could only be obtained from this approach, make sure 'devtools' installed first 16 | devtools::install_github("xia-lab/OptiLCMS", build = TRUE, build_vignettes = FALSE, build_manual =TRUE) 17 | 18 | ``` 19 | 20 | #### (b.) Install from the local R tar package 21 | 22 | Download the latest version from the following Options: 23 | 24 | [v1.1.0](https://drive.google.com/file/d/1jdy3WQvNNr8qapWuJ6ZHwcoyf7fX4ywe/view?usp=share_link); 25 | 26 | 27 | ```R 28 | # Remember to replace the PATH_TO_TAR as the right path of your downloaded package (OptiLCMS_1.1.X.tar.gz). 29 | install.packages(PATH_TO_TAR, repos = NULL, type="source") 30 | 31 | ``` 32 | 33 | 34 | ### Tutorial 35 | 36 | A step-by-step tutorial will be prepared for users to start their analysis by following them (available soon). 37 | 38 | ### Citation 39 | 40 | OptiLCMS package has been developed by the [XiaLab](https://www.xialab.ca/) at McGill University. If you use the R package, please cite us: 41 | 42 | * Pang, Z., Chong, J., Li, S. and Xia, J. (2020) MetaboAnalystR 3.0: Toward an Optimized Workflow for Global Metabolomics. Metabolites 10(5) 186 [link](https://doi.org/10.3390/metabo10050186) 43 | 44 | *From within R:* 45 | 46 | ```R 47 | citation("OptiLCMS") 48 | ``` 49 | 50 | ### New features requests & Bugs reports 51 | 52 | To inform us of any bugs or requests, please open a new issue (and @ Zhiqiang-PANG !!) or send an email to zhiqiang.pang@mail.mcgill.ca. 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /data/mSet.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xia-lab/OptiLCMS/c38e6b6dcd248d7a970a1acd3ba711ca0fbfd742/data/mSet.rda -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite OptiLCMS in publications use:") 2 | 3 | citEntry( 4 | entry = "Article", 5 | title = "MetaboAnalystR 3.0: Toward an Optimized Workflow for Global Metabolomics", 6 | author = "Zhiqiang Pang et al.", 7 | journal = "Metabolites", 8 | year = "2020", 9 | volume = "10", 10 | number = "5", 11 | pages = "186", 12 | url = "https://doi.org/10.3390/metabo10050186", 13 | textVersion = paste( 14 | 15 | ) 16 | ) 17 | -------------------------------------------------------------------------------- /inst/hmdb/hmdb_all.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xia-lab/OptiLCMS/c38e6b6dcd248d7a970a1acd3ba711ca0fbfd742/inst/hmdb/hmdb_all.rds -------------------------------------------------------------------------------- /inst/lists/ions.csv: -------------------------------------------------------------------------------- 1 | name, charge, molecular_mass 2 | Na, +1, 22.989218 3 | Cl, -1, 34.969402 4 | K, +1, 38.963158 5 | -------------------------------------------------------------------------------- /inst/lists/neutraladdition.csv: -------------------------------------------------------------------------------- 1 | name, molecular mass 2 | NaCOOH, 67.98744 3 | HCOOH, 46.00547 4 | CF3COOH, 133.993 5 | NH3, 17.02655 6 | -------------------------------------------------------------------------------- /inst/lists/neutralloss.csv: -------------------------------------------------------------------------------- 1 | name, molecular mass 2 | CH2, 14.01565 3 | CH3, 15.02348 4 | NH3, 17.02655 5 | H20, 18.0153 6 | CH4, 16.0313 7 | CO, 27.9949 8 | C2H4, 28.0313 9 | COCH2, 42.01057 10 | CO2, 43.9898 11 | HCOOH, 46.00547 12 | C4H8, 56.0626 13 | C3H2O3, 86.00039 14 | C5H8O4, 132.04226 15 | C6H10O4, 146.05791 16 | C6H10O5, 162.05282 17 | C6H8O6, 176.03209 -------------------------------------------------------------------------------- /inst/rules/extended_adducts_neg.csv: -------------------------------------------------------------------------------- 1 | "name","nmol","charge","massdiff","oidscore","quasi","ips" 2 | "[M-H]-",1,-1,-1.007276,1,1,1 3 | "[M-2H]2-",1,-2,-2.014552,2,0,1 4 | "[M-3H]3-",1,-3,-3.021828,3,0,1 5 | "[M-2H+Na]-",1,-1,20.974666,4,0,0.75 6 | "[M-H+Cl]2-",1,-2,33.962126,5,0,1 7 | "[M-2H+K]-",1,-1,36.948606,6,0,0.5 8 | "[M-2H+NH4]-",1,-1,16.019271,7,0,0.25 9 | "[M+Cl]-",1,-1,34.969402,8,1,1 10 | "[M+2Cl]2-",1,-2,69.938804,9,0,1 11 | "[2M-H]-",2,-1,-1.007276,1,0,0.5 12 | "[2M-2H]2-",2,-2,-2.014552,2,0,0.5 13 | "[2M-3H]3-",2,-3,-3.021828,3,0,0.5 14 | "[2M-2H+Na]-",2,-1,20.974666,4,0,0.25 15 | "[2M-H+Cl]2-",2,-2,33.962126,5,0,0.5 16 | "[2M-2H+K]-",2,-1,36.948606,6,0,0.25 17 | "[2M-2H+NH4]-",2,-1,16.019271,7,0,0.25 18 | "[2M+Cl]-",2,-1,34.969402,8,0,0.5 19 | "[2M+2Cl]2-",2,-2,69.938804,9,0,0.5 20 | "[3M-H]-",3,-1,-1.007276,1,0,0.5 21 | "[3M-2H]2-",3,-2,-2.014552,2,0,0.5 22 | "[3M-3H]3-",3,-3,-3.021828,3,0,0.5 23 | "[3M-2H+Na]-",3,-1,20.974666,4,0,0.25 24 | "[3M-H+Cl]2-",3,-2,33.962126,5,0,0.5 25 | "[3M-2H+K]-",3,-1,36.948606,6,0,0.25 26 | "[3M-2H+NH4]-",3,-1,16.019271,7,0,0.25 27 | "[3M+Cl]-",3,-1,34.969402,8,0,0.5 28 | "[3M+2Cl]2-",3,-2,69.938804,9,0,0.5 29 | "[M+Cl+NaCOOH]-",1,-1,102.956842,10,0,0.5 30 | "[M-H+NaCOOH]-",1,-1,66.980164,11,0,0.5 31 | "[M+Cl+HCOOH]-",1,-1,80.974872,12,0,0.5 32 | "[M-H+HCOOH]-",1,-1,44.998194,13,0,0.5 33 | "[M+Cl+CF3COOH]-",1,-1,168.962402,14,0,0.5 34 | "[M-H+CF3COOH]-",1,-1,132.985724,15,0,0.5 35 | -------------------------------------------------------------------------------- /inst/rules/extended_adducts_pos.csv: -------------------------------------------------------------------------------- 1 | "name" "nmol" "charge" "massdiff" "oidscore" "quasi" "ips" 2 | "[M+H]+" 1 1 1.007276 1 1 1 3 | "[M+2H]2+" 1 2 2.014552 2 0 0.75 4 | "[M+3H]3+" 1 3 3.021828 3 0 0.75 5 | "[M+H+Na]2+" 1 2 23.996494 4 0 0.5 6 | "[M+H+K]2+" 1 2 39.970434 6 0 0.5 7 | "[M+H+NH4]2+" 1 2 19.041099 7 0 0.5 8 | "[M+Na]+" 1 1 22.989218 8 1 1 9 | "[M+2Na]2+" 1 2 45.978436 9 0 0.5 10 | "[M+K]+" 1 1 38.963158 10 1 1 11 | "[M+Na+K]2+" 1 2 61.952376 11 0 0.5 12 | "[M+2K]2+" 1 2 77.926316 13 0 0.5 13 | "[M+NH4]+" 1 1 18.033823 16 1 1 14 | "[M+2Na-H]+" 1 1 44.97116 34 0 0.5 15 | "[M+2K-H]+" 1 1 76.91904 60 0 0.5 16 | "[2M+H]+" 2 1 1.007276 1 0 0.5 17 | "[2M+2H]2+" 2 2 2.014552 2 0 0.5 18 | "[2M+3H]3+" 2 3 3.021828 3 0 0.5 19 | "[2M+H+Na]2+" 2 2 23.996494 4 0 0.5 20 | "[2M+H+K]2+" 2 2 39.970434 6 0 0.5 21 | "[2M+H+NH4]2+" 2 2 19.041099 7 0 0.5 22 | "[2M+Na]+" 2 1 22.989218 8 0 0.5 23 | "[2M+2Na]2+" 2 2 45.978436 9 0 0.5 24 | "[2M+K]+" 2 1 38.963158 10 0 0.5 25 | "[2M+Na+K]2+" 2 2 61.952376 11 0 0.5 26 | "[2M+2K]2+" 2 2 77.926316 13 0 0.5 27 | "[2M+NH4]+" 2 1 18.033823 16 0 0.5 28 | "[2M+2Na-H]+" 2 1 44.97116 34 0 0.25 29 | "[2M+2K-H]+" 2 1 76.91904 60 0 0.25 30 | "[3M+H]+" 3 1 1.007276 1 0 0.25 31 | "[3M+2H]2+" 3 2 2.014552 2 0 0.25 32 | "[3M+3H]3+" 3 3 3.021828 3 0 0.25 33 | "[3M+H+Na]2+" 3 2 23.996494 4 0 0.25 34 | "[3M+H+K]2+" 3 2 39.970434 6 0 0.25 35 | "[3M+H+NH4]2+" 3 2 19.041099 7 0 0.25 36 | "[3M+Na]+" 3 1 22.989218 8 0 0.25 37 | "[3M+2Na]2+" 3 2 45.978436 9 0 0.25 38 | "[3M+K]+" 3 1 38.963158 10 0 0.25 39 | "[3M+Na+K]2+" 3 2 61.952376 11 0 0.25 40 | "[3M+2K]2+" 3 2 77.926316 13 0 0.25 41 | "[3M+NH4]+" 3 1 18.033823 16 0 0.25 42 | "[3M+2Na-H]+" 3 1 44.97116 34 0 0.25 43 | "[3M+2K-H]+" 3 1 76.91904 60 0 0.25 44 | "[M+Na+HCOOH]+" 1 1 68.994688 87 0 0.25 45 | "[M+K+HCOOH]+" 1 1 84.968628 88 0 0.25 46 | "[M+NH4+HCOOH]+" 1 1 64.039293 89 0 0.25 47 | "[M+H+HCOOH]+" 1 1 47.012746 90 0 0.25 48 | "[M+Na+CF3COOH]+" 1 1 156.982218 91 0 0.25 49 | "[M+K+CF3COOH]+" 1 1 172.956158 92 0 0.25 50 | "[M+NH4+CF3COOH]+" 1 1 152.026823 93 0 0.25 51 | "[M+H+CF3COOH]+" 1 1 135.000276 94 0 0.25 52 | -------------------------------------------------------------------------------- /inst/rules/primary_adducts_neg.csv: -------------------------------------------------------------------------------- 1 | "name","nmol","charge","massdiff","oidscore","quasi","ips" 2 | "[M-H]-",1,-1,-1.007276,1,1,1 3 | "[M-2H+Na]-",1,-1,20.974666,4,0,0.5 4 | "[M-2H+K]-",1,-1,36.948606,6,0,0.5 5 | "[M+Cl]-",1,-1,34.969402,8,1,1 6 | -------------------------------------------------------------------------------- /inst/rules/primary_adducts_pos.csv: -------------------------------------------------------------------------------- 1 | "name","nmol","charge","massdiff","oidscore","quasi","ips" 2 | "[M+H]+",1,1,1.007276,1,1,1 3 | "[M+Na]+",1,1,22.989218,8,1,1 4 | "[M+K]+",1,1,38.963158,10,1,1 5 | "[M+NH4]+",1,1,18.033823,16,1,1 6 | -------------------------------------------------------------------------------- /man/CentroidCheck.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_import.R 3 | \name{CentroidCheck} 4 | \alias{CentroidCheck} 5 | \title{CentroidCheck} 6 | \usage{ 7 | CentroidCheck(filename) 8 | } 9 | \arguments{ 10 | \item{filename}{single file name, should contain the absolute path} 11 | } 12 | \value{ 13 | will output a logical value to indicate centroid (TRUE) or not (FALSE) 14 | } 15 | \description{ 16 | Verify the data is centroid or not 17 | } 18 | \examples{ 19 | DataFiles <- dir(system.file("mzData", package = "mtbls2"), full.names = TRUE, 20 | recursive = TRUE)[c(10:11)] 21 | # sapply(DataFiles, CentroidCheck) 22 | 23 | } 24 | \author{ 25 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} and Jeff Xia \email{jeff.xia@mcgill.ca} 26 | McGill University, Canada 27 | License: GNU GPL (>= 2) 28 | } 29 | -------------------------------------------------------------------------------- /man/CentroidMSData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_import.R 3 | \name{CentroidMSData} 4 | \alias{CentroidMSData} 5 | \title{CentroidMSData} 6 | \usage{ 7 | CentroidMSData(InFolder, OutFolder = tempdir(), ncore = 1) 8 | } 9 | \arguments{ 10 | \item{InFolder}{single file/folder name} 11 | 12 | \item{OutFolder}{output folder name, if not exits, will create one} 13 | 14 | \item{ncore}{the core number for parallel processing, default is 1} 15 | } 16 | \value{ 17 | will output a centroid mzML file into the input path 18 | } 19 | \description{ 20 | Convert the MS data as centroid 21 | } 22 | \examples{ 23 | InFolder <- system.file("mzData", package = "mtbls2") 24 | # CentroidMSData(InFolder) #remove the # befroe your testing 25 | } 26 | \author{ 27 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} and Jeff Xia \email{jeff.xia@mcgill.ca} 28 | McGill University, Canada 29 | License: GNU GPL (>= 2) 30 | } 31 | -------------------------------------------------------------------------------- /man/Export.Annotation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Perform_functions.R 3 | \name{Export.Annotation} 4 | \alias{Export.Annotation} 5 | \title{Export.Annotation} 6 | \usage{ 7 | Export.Annotation(mSet = NULL, path = getwd()) 8 | } 9 | \arguments{ 10 | \item{mSet}{mSet object, processed by FormatPeakList.} 11 | 12 | \item{path}{character, used to specify the path for result rds and csv file. Default is the working directory.} 13 | } 14 | \value{ 15 | will save annotated_peaklist.rds and annotated_peaklist.csv into working path 16 | } 17 | \description{ 18 | Export.Annotation is used to export the result of annotation 19 | } 20 | \examples{ 21 | data(mSet) 22 | Export.Annotation(mSet, path = tempdir()) 23 | # delete the exported files from the tempdir with unlink 24 | unlink(paste0(tempdir(),"/annotated_peaklist.csv"), recursive = TRUE, force = TRUE); 25 | unlink(paste0(tempdir(),"/annotated_peaklist.rds"), recursive = TRUE, force = TRUE) 26 | } 27 | \seealso{ 28 | \code{\link{ExecutePlan}} and \code{\link{PerformPeakProfiling}} for the whole pipeline. 29 | } 30 | \author{ 31 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}, Jeff Xia \email{jeff.xia@mcgill.ca} 32 | } 33 | -------------------------------------------------------------------------------- /man/Export.PeakSummary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Perform_functions.R 3 | \name{Export.PeakSummary} 4 | \alias{Export.PeakSummary} 5 | \title{Export.PeakSummary} 6 | \usage{ 7 | Export.PeakSummary(mSet = NULL, path = getwd()) 8 | } 9 | \arguments{ 10 | \item{mSet}{mSet object, processed by FormatPeakList.} 11 | 12 | \item{path}{character, used to specify the path for result rds and csv file. Default is the working directory.#'} 13 | } 14 | \value{ 15 | will save peak_result_summary.txt into working path 16 | } 17 | \description{ 18 | Export.PeakSummary is used to export the result of peak' summary 19 | } 20 | \examples{ 21 | data(mSet); 22 | Export.PeakSummary(mSet, path = tempdir()); 23 | # delete the exported files from the tempdir with unlink 24 | unlink(paste0(tempdir(),"/peak_result_summary.txt"), recursive = TRUE, force = TRUE) 25 | } 26 | \seealso{ 27 | \code{\link{ExecutePlan}} and \code{\link{PerformPeakProfiling}} for the whole pipeline. 28 | } 29 | \author{ 30 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}, Jeff Xia \email{jeff.xia@mcgill.ca} 31 | } 32 | -------------------------------------------------------------------------------- /man/Export.PeakTable.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Perform_functions.R 3 | \name{Export.PeakTable} 4 | \alias{Export.PeakTable} 5 | \title{Export.PeakTable} 6 | \usage{ 7 | Export.PeakTable(mSet = NULL, path = getwd()) 8 | } 9 | \arguments{ 10 | \item{mSet}{mSet object, processed by FormatPeakList.} 11 | 12 | \item{path}{character, used to specify the path for result rds and csv file. Default is the working directory.#'} 13 | } 14 | \value{ 15 | will save metaboanalyst_input.csv into working path 16 | } 17 | \description{ 18 | Export.PeakTable is used to export the table of peak 19 | } 20 | \examples{ 21 | data(mSet); 22 | Export.PeakTable(mSet, path = tempdir()); 23 | # delete the exported files from the tempdir with unlink 24 | unlink(paste0(tempdir(),"/metaboanalyst_input.csv"), recursive = TRUE, force = TRUE) 25 | } 26 | \seealso{ 27 | \code{\link{ExecutePlan}} and \code{\link{PerformPeakProfiling}} for the whole pipeline. 28 | } 29 | \author{ 30 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}, Jeff Xia \email{jeff.xia@mcgill.ca} 31 | } 32 | -------------------------------------------------------------------------------- /man/FormatMSnAnnotation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MSn_processing.R 3 | \name{FormatMSnAnnotation} 4 | \alias{FormatMSnAnnotation} 5 | \title{Title} 6 | \usage{ 7 | FormatMSnAnnotation(mSet = NULL, topN = 5L, isLipidomics = FALSE) 8 | } 9 | \arguments{ 10 | \item{mSet}{} 11 | 12 | \item{topN}{} 13 | 14 | \item{isLipidomics}{} 15 | } 16 | \value{ 17 | mSet Object 18 | } 19 | \description{ 20 | Title 21 | } 22 | \examples{ 23 | to add 24 | } 25 | -------------------------------------------------------------------------------- /man/FormatPeakList.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Perform_functions.R 3 | \name{FormatPeakList} 4 | \alias{FormatPeakList} 5 | \title{Format Peak List} 6 | \usage{ 7 | FormatPeakList( 8 | mSet, 9 | annParams, 10 | filtIso = TRUE, 11 | filtAdducts = FALSE, 12 | missPercent = 0.75 13 | ) 14 | } 15 | \arguments{ 16 | \item{mSet}{The mSet object generated by the PerformPeakAnnotation function.} 17 | 18 | \item{annParams}{The object created using the SetAnnotationParam function, 19 | containing user's specified or default parameters for downstream 20 | raw MS data pre-processing.} 21 | 22 | \item{filtIso}{Logical, filter out all isotopes except for \verb{[M]}+ for 23 | positive ion mode and \verb{[M]}- for negative ion mode. By default it is 24 | set to true.} 25 | 26 | \item{filtAdducts}{Logical, filter out all adducts except \verb{[M+H]}+ for 27 | positive ion more and \verb{[M-H]}- for negative ion mode. By default it is set to false.} 28 | 29 | \item{missPercent}{Numeric, specify the threshold to remove features 30 | missing in X\\\% of samples. For instance, 0.5 specifies to remove features 31 | that are missing from 50\\\% of all samples per group. Method is only valid 32 | when there are two groups.} 33 | } 34 | \value{ 35 | will return a mSet object with all result table formatted 36 | } 37 | \description{ 38 | This function formats the CAMERA output to a usable format for OptiLCMS. 39 | } 40 | \examples{ 41 | data(mSet); 42 | newPath <- dir(system.file("mzData", package = "mtbls2"), 43 | full.names = TRUE, recursive = TRUE)[c(10, 11, 12)] 44 | mSet <- updateRawSpectraPath(mSet, newPath); 45 | annParams <- SetAnnotationParam(polarity = 'positive', 46 | mz_abs_add = 0.035); 47 | 48 | ## Perform peak annotation with newly deinfed annParams 49 | # mSet <- PerformPeakAnnotation(mSet = mSet, 50 | # annotaParam = annParams, 51 | # ncore =1) 52 | ## Format the PeakList 53 | mSet <- FormatPeakList(mSet = mSet, 54 | annParams, 55 | filtIso =FALSE, 56 | filtAdducts = FALSE, 57 | missPercent = 1) 58 | } 59 | \seealso{ 60 | \code{\link{ExecutePlan}} and \code{\link{PerformPeakProfiling}} for the whole pipeline. 61 | } 62 | \author{ 63 | Jasmine Chong \email{jasmine.chong@mail.mcgill.ca}, and Jeff Xia \email{jeff.xia@mcgill.ca} 64 | McGill University, Canada 65 | License: GNU GPL (>= 2) 66 | } 67 | -------------------------------------------------------------------------------- /man/GaussModel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_Utils.R 3 | \name{GaussModel} 4 | \alias{GaussModel} 5 | \title{GaussModel} 6 | \usage{ 7 | GaussModel(x, mu, sigma, h) 8 | } 9 | \arguments{ 10 | \item{x}{a numeric vector of values at which to evaluate the model} 11 | 12 | \item{mu}{mean of the distribution function} 13 | 14 | \item{sigma}{standard deviation of the distribution fuction} 15 | 16 | \item{h}{height of the distribution function} 17 | } 18 | \value{ 19 | return result of selfstart 20 | } 21 | \description{ 22 | GaussModel 23 | } 24 | \examples{ 25 | ints<- c(c(1:5,5:1)) 26 | ##nls(y ~ GaussModel(x, mu, sigma, h), 27 | ## data.frame(x = 1:length(ints), y = ints)) 28 | } 29 | \references{ 30 | Smith, C.A., Want, E.J., O'Maille, G., Abagyan,R., Siuzdak, G. (2006). 31 | "XCMS: Processing mass spectrometry data for metabolite profiling using nonlinear 32 | peak alignment, matching and identification." Analytical Chemistry, 78, 779-787. 33 | } 34 | -------------------------------------------------------------------------------- /man/ImportRawMSData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_import.R 3 | \name{ImportRawMSData} 4 | \alias{ImportRawMSData} 5 | \title{Import raw MS data} 6 | \usage{ 7 | ImportRawMSData( 8 | mSet = NULL, 9 | path = getwd(), 10 | metadata = NULL, 11 | mode = "onDisk", 12 | plotSettings = SetPlotParam(), 13 | running.controller = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{mSet}{mSet Object, can be optional. Usually generated by InitDataObjects("spec", "raw", FALSE) before the data import.} 18 | 19 | \item{path}{Character, input the path to the folder containing 20 | the raw MS spectra to be processed. Or a character vector containing all raw files absolute paths.} 21 | 22 | \item{metadata}{Data.frame or character. A phenotype data frame or a absolute path of the metadata file (.txt) for all samples, optional. 23 | In the option, first column should be the sample name, while second column is the corresponding group name. If ommited, all samples in the same sub-folder will be 24 | considered as one group.} 25 | 26 | \item{mode}{Character, the data input mode. Default is "onDisk" to avoid memory crash. "inMemory" will 27 | read data into memory.} 28 | 29 | \item{plotSettings}{List, plotting parameters produced by SetPlotParam Function. "plot.opts" can be added through this 30 | function for samples numbers for plotting. Defalut is "default", "all" will apply all samples for plotting and may cause 31 | memory crash, especially for large sample dataset.} 32 | 33 | \item{running.controller}{The resuming pipeline running controller. Optional. Don't need to define by hand.} 34 | } 35 | \value{ 36 | will return a mSet object will raw data read inside. 37 | } 38 | \description{ 39 | This function handles the reading in of 40 | raw MS data (.mzML, .CDF and .mzXML). Users must set 41 | their working directory to the folder containing their raw 42 | data, divided into two subfolders named their desired group labels. The 43 | function will output two chromatograms into the user's working directory, a 44 | base peak intensity chromatogram (BPIC) and a total ion 45 | chromatogram (TIC). Further, this function sets the number of cores 46 | to be used for parallel processing. It first determines the number of cores 47 | within a user's computer and then sets it that number/2. 48 | } 49 | \examples{ 50 | ##' Get raw spectra files 51 | DataFiles <- dir(system.file("mzData", package = "mtbls2"), full.names = TRUE, 52 | recursive = TRUE)[c(10:12, 14:16)] 53 | ##' Create a phenodata data.frame 54 | pd <- data.frame(sample_name = sub(basename(DataFiles), pattern = ".mzData", 55 | replacement = "", fixed = TRUE), 56 | sample_group = c(rep("col0", 3), rep("cyp79", 3)), 57 | stringsAsFactors = FALSE) 58 | ##' Import raw spectra 59 | mSet <- ImportRawMSData(path = DataFiles, metadata = pd); 60 | } 61 | \author{ 62 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}, Jasmine Chong \email{jasmine.chong@mail.mcgill.ca}, 63 | Mai Yamamoto \email{yamamoto.mai@mail.mcgill.ca}, and Jeff Xia \email{jeff.xia@mcgill.ca} 64 | McGill University, Canada 65 | License: GNU GPL (>= 2) 66 | } 67 | -------------------------------------------------------------------------------- /man/InitDataObjects.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_import.R 3 | \name{InitDataObjects} 4 | \alias{InitDataObjects} 5 | \title{InitDataObjects} 6 | \usage{ 7 | InitDataObjects(data.type, anal.type, paired=FALSE) 8 | } 9 | \arguments{ 10 | \item{data.type}{The type of data, either list (Compound lists), conc (Compound concentration data), 11 | specbin (Binned spectra data), pktable (Peak intensity table), nmrpeak (NMR peak lists), mspeak (MS peak lists), 12 | or msspec (MS spectra data)} 13 | 14 | \item{anal.type}{Indicate the analysis module to be performed: stat, pathora, pathqea, msetora, msetssp, msetqea, ts, 15 | cmpdmap, smpmap, or pathinteg} 16 | 17 | \item{paired}{Indicate if the data is paired or not. Logical, default set to FALSE} 18 | } 19 | \value{ 20 | will initialize an mSet object 21 | } 22 | \description{ 23 | This functions handles the construction of a mSetObj object for storing data for further processing and analysis. 24 | It is necessary to utilize this function to specify to MetaboAnalystR the type of data and the type of analysis you will perform. 25 | } 26 | \examples{ 27 | mSet<-InitDataObjects("spec", "raw", FALSE) 28 | } 29 | \author{ 30 | Jeff Xia \email{jeff.xia@mcgill.ca} 31 | McGill University, Canada 32 | License: GNU GPL (>= 2) 33 | } 34 | -------------------------------------------------------------------------------- /man/InitializaPlan.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Resume_functions.R 3 | \name{InitializaPlan} 4 | \alias{InitializaPlan} 5 | \title{Initializing running plan} 6 | \usage{ 7 | InitializaPlan(type = "raw_ms") 8 | } 9 | \arguments{ 10 | \item{type}{Character, Initialized plan type for a resumable running mode. Can be "raw_opt" for 11 | automated optimization option, or "raw_ms" for customized pipeline.} 12 | } 13 | \value{ 14 | will return an initial plan for execution 15 | } 16 | \description{ 17 | Initialize a running plan 18 | } 19 | \examples{ 20 | plan <- InitializaPlan("raw_opt") 21 | } 22 | \seealso{ 23 | \code{\link{ExecutePlan}} for the this resumable running pipeline. 24 | } 25 | \author{ 26 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} Jeff Xia \email{jeff.xia@mcgill.ca} 27 | Mcgill University 28 | License: GNU GPL (>= 2) 29 | } 30 | -------------------------------------------------------------------------------- /man/OptiLCMS.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/C_function.R 3 | \docType{package} 4 | \name{OptiLCMS} 5 | \alias{OptiLCMS} 6 | \title{OptiLCMS: A package for computating the notorious bar statistic.} 7 | \description{ 8 | The OptiLCMS package provides a pipeline for metabolomics processing. 9 | } 10 | \section{OptiLCMS functions}{ 11 | 12 | The OptiLCMS functions ... 13 | } 14 | 15 | -------------------------------------------------------------------------------- /man/PerformDBSearchingBatch.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MSn_processing.R 3 | \name{PerformDBSearchingBatch} 4 | \alias{PerformDBSearchingBatch} 5 | \title{PerformDBSearchingBatch} 6 | \usage{ 7 | PerformDBSearchingBatch( 8 | mSet = NULL, 9 | ppm1 = 5, 10 | ppm2 = 15, 11 | rt_tol = 0, 12 | database_path = "", 13 | use_rt = FALSE, 14 | enableNL = FALSE, 15 | NLdatabase_path = NULL, 16 | ncores = 1, 17 | useEntropy = FALSE 18 | ) 19 | } 20 | \arguments{ 21 | \item{mSet}{mSet Object contains raw spectral data after results consensus from \emph{PerformSpectrumConsenus}} 22 | 23 | \item{ppm1}{numeric, ppm value of m/z for precursours;} 24 | 25 | \item{ppm2}{numeric, ppm value of m/z for ms/ms fragments matching;} 26 | 27 | \item{rt_tol}{numeric, retention time tolerance, in seconds. Only effective when use_rt is TRUE;} 28 | 29 | \item{database_path}{character, specify the path of database (.sqlite format);} 30 | 31 | \item{use_rt}{logical, to use retention time if TRUE;} 32 | 33 | \item{enableNL}{logical, to enable use Neutral Loss matching for unmatched features if TRUE;} 34 | 35 | \item{NLdatabase_path}{path of neutral loss database. Must be specified to a valid neutral loss database when enableNL is TRUE.} 36 | 37 | \item{ncores}{} 38 | } 39 | \value{ 40 | mSet Object 41 | } 42 | \description{ 43 | PerformDBSearchingBatch 44 | } 45 | -------------------------------------------------------------------------------- /man/PerformDDADeconvolution.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MSn_processing.R 3 | \name{PerformDDADeconvolution} 4 | \alias{PerformDDADeconvolution} 5 | \title{PerformDDADeconvolution} 6 | \usage{ 7 | PerformDDADeconvolution( 8 | mSet = NULL, 9 | ppm1 = 5, 10 | ppm2 = 15, 11 | sn = 12, 12 | filtering = 2000, 13 | window_size = 1, 14 | intensity_thresh = 1000, 15 | database_path = "", 16 | ncores = 1L, 17 | decoOn = TRUE, 18 | useEntropy = FALSE 19 | ) 20 | } 21 | \arguments{ 22 | \item{mSet}{mSet} 23 | 24 | \item{ppm1}{ppm1} 25 | 26 | \item{ppm2}{ppm2} 27 | 28 | \item{sn}{sn} 29 | 30 | \item{filtering}{filtering} 31 | 32 | \item{window_size}{window_size} 33 | 34 | \item{intensity_thresh}{intensity_thresh} 35 | 36 | \item{database_path}{database_path} 37 | 38 | \item{ncores}{ncores} 39 | 40 | \item{decoOn}{decoOn} 41 | 42 | \item{useEntropy}{useEntropy} 43 | } 44 | \value{ 45 | mSet Object 46 | } 47 | \description{ 48 | PerformDDADeconvolution 49 | } 50 | \examples{ 51 | to add 52 | } 53 | -------------------------------------------------------------------------------- /man/PerformDIADeconvolution.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MSn_processing.R 3 | \name{PerformDIADeconvolution} 4 | \alias{PerformDIADeconvolution} 5 | \title{PerformDIADeconvolution} 6 | \usage{ 7 | PerformDIADeconvolution( 8 | mSet = NULL, 9 | min_width = 5, 10 | ppm2, 11 | sn = 12, 12 | span = 0.3, 13 | filtering = 2000, 14 | ncores = 1L 15 | ) 16 | } 17 | \arguments{ 18 | \item{mSet}{mSet Object contains raw spectral data from \emph{PerformMSnImport}} 19 | 20 | \item{min_width}{minimum peak width value, in seconds} 21 | 22 | \item{ppm2}{} 23 | 24 | \item{sn}{} 25 | 26 | \item{span}{} 27 | 28 | \item{filtering}{} 29 | 30 | \item{ncores}{} 31 | } 32 | \value{ 33 | mSet Object 34 | } 35 | \description{ 36 | PerformDIADeconvolution 37 | } 38 | \examples{ 39 | to add 40 | } 41 | -------------------------------------------------------------------------------- /man/PerformDataInspect.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_visulization.R 3 | \name{PerformDataInspect} 4 | \alias{PerformDataInspect} 5 | \title{PerformDataInspect} 6 | \usage{ 7 | PerformDataInspect( 8 | datapath = NULL, 9 | rt.range = c(0, 0), 10 | mz.range = c(0, 0), 11 | dimension = "3D", 12 | res = 100 13 | ) 14 | } 15 | \arguments{ 16 | \item{datapath}{Character, the path of the raw MS data files (.mzXML, .CDF and .mzML) 17 | for the visual and intuitive data inspectation or the file folder (if only a folder path provided, the first file will 18 | be inspected).} 19 | 20 | \item{rt.range}{Numerics, a congregation of two values to define the lower and upper RT range (seconds) for 21 | users to inspect. This is an optional parameter, if absent, will display the MS of the whole RT range.} 22 | 23 | \item{mz.range}{Numerics, a congregation of two values to define the lower and upper mz range for 24 | users to inspect. This is an optional parameter, if absent, will display the MS of the whole mz range.} 25 | 26 | \item{dimension}{Character, the dimension for sample to display, including '2D' or '3D'. The default is '3D'.} 27 | 28 | \item{res}{Numeric, the resolution for data inspectation. The larger the value, the higher the resolution. 29 | The default value is 100. This value is usually clearly enough and also give consideration to the speed.} 30 | } 31 | \value{ 32 | will output a figure for viewing the data structure 33 | } 34 | \description{ 35 | This functions provide a path for users to visually inspect their raw data before the data 36 | trimming so as to remove the dirty or significantly uneluted peaks. 37 | } 38 | \examples{ 39 | ## Get raw spectra files 40 | DataFiles <- dir(system.file("mzData", package = "mtbls2"), full.names = TRUE, 41 | recursive = TRUE)[c(10:12, 14:16)] 42 | PerformDataInspect(DataFiles[1]) 43 | } 44 | \author{ 45 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} Jeff Xia \email{jeff.xia@mcgill.ca} 46 | Mcgill University 47 | License: GNU GPL (>= 2) 48 | } 49 | -------------------------------------------------------------------------------- /man/PerformDataTrimming.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ROI_Extraction.R 3 | \name{PerformDataTrimming} 4 | \alias{PerformDataTrimming} 5 | \title{Perform ROI Extraction from raw MS data (PerformDataTrimming)} 6 | \usage{ 7 | PerformDataTrimming( 8 | datapath, 9 | mode = "ssm", 10 | write = FALSE, 11 | mz, 12 | mzdiff, 13 | rt, 14 | rtdiff, 15 | rt.idx = 1/15, 16 | rmConts = TRUE, 17 | plot = TRUE, 18 | running.controller = NULL 19 | ) 20 | } 21 | \arguments{ 22 | \item{datapath}{Character, the path of the raw MS data files' or folder's path (.mzXML, .CDF and .mzML) 23 | for parameters training.} 24 | 25 | \item{mode}{Character, mode for data trimming to select the chraracteristic peaks. 26 | Default is 'ssm'. Users could select random trimed according to mz value (mz_random) or 27 | RT value (rt_random). Besides, specific peaks at certain mz (mz_specific) or 28 | RT (rt_specific) could also be extracted. 'none' will not trim the data.} 29 | 30 | \item{write}{Logical, if true, will write the trimmed data to the directory 'trimmed' folder 31 | in the datapath. The data in memory will be kept.} 32 | 33 | \item{mz}{Numeric, mz value(s) for specific selection. Positive values means including (the values 34 | indicted) and negative value means excluding/removing.} 35 | 36 | \item{mzdiff}{Numeric, the deviation (ppm) of mz value(s).} 37 | 38 | \item{rt}{Numeric, rt value for specific selection. Positive values means including 39 | and negative value means excluding.} 40 | 41 | \item{rtdiff}{Numeric, the deviation (seconds) of rt value(s).} 42 | 43 | \item{rt.idx}{Numeric, the relative rt (retention time) range, from 0 to 1. 1 means all retention time 44 | will be retained, while 0 means none. Default is 1/15. If default rt.idx produce too few peaks, 45 | please consider increasing this value.} 46 | 47 | \item{rmConts}{LOgical, whether to exclude/remove the potential contamination for parameters optimization. Default is TRUE.} 48 | 49 | \item{plot}{Logical, if TRUE, will plot the chromatogram of the trimmed data.} 50 | 51 | \item{running.controller}{The resuming pipeline running controller. Optional. Don't need to define by hand.} 52 | } 53 | \value{ 54 | will return an mSet objects with extracted ROI 55 | } 56 | \description{ 57 | This function performs the raw data trimming. This function will output 58 | an trimmed MSnExp file to memory or hardisk according to the choice of users must 59 | provide the data path for 'datapath', and optionally provide other corresponding parameters. 60 | } 61 | \examples{ 62 | DataFiles <- dir(system.file("mzData", package = "mtbls2"), full.names = TRUE, recursive = TRUE) 63 | # mSet <- PerformDataTrimming(datapath = DataFiles[1],rt.idx = 0.025, rmConts = FALSE); 64 | } 65 | \seealso{ 66 | \code{\link{PerformROIExtraction}} for the new version of this function. 67 | } 68 | \author{ 69 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} Jeff Xia \email{jeff.xia@mcgill.ca} 70 | Mcgill University 71 | License: GNU GPL (>= 2) 72 | } 73 | -------------------------------------------------------------------------------- /man/PerformMSnImport.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MSn_processing.R 3 | \name{PerformMSnImport} 4 | \alias{PerformMSnImport} 5 | \title{PerformMSnImport} 6 | \usage{ 7 | PerformMSnImport( 8 | mSet = NULL, 9 | filesPath = NULL, 10 | targetFeatures = NULL, 11 | acquisitionMode = "DDA", 12 | SWATH_file = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{mSet}{mSet} 17 | 18 | \item{filesPath}{filesPath} 19 | 20 | \item{targetFeatures}{targetFeatures} 21 | 22 | \item{acquisitionMode}{acquisitionMode} 23 | 24 | \item{SWATH_file}{SWATH_file} 25 | } 26 | \value{ 27 | mSet Object 28 | } 29 | \description{ 30 | PerformMSnImport 31 | } 32 | \examples{ 33 | to add 34 | } 35 | -------------------------------------------------------------------------------- /man/PerformMirrorPlotting.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MSn_processing.R 3 | \name{PerformMirrorPlotting} 4 | \alias{PerformMirrorPlotting} 5 | \title{PerformMirrorPlotting} 6 | \usage{ 7 | PerformMirrorPlotting( 8 | mSet = NULL, 9 | cutoff_relative = 5, 10 | ppm = 25, 11 | display_plot = FALSE, 12 | width = 8, 13 | height = 6, 14 | dpi = 300, 15 | format = "png", 16 | interactive = FALSE 17 | ) 18 | } 19 | \arguments{ 20 | \item{mSet}{mSet} 21 | 22 | \item{cutoff_relative}{cutoff value for relative intensity} 23 | 24 | \item{ppm}{ppm of mz of msms fragment} 25 | 26 | \item{display_plot}{display mirror plot or not, TRUE or FALSE, default is FALSE} 27 | 28 | \item{width}{width of the image, default is 8} 29 | 30 | \item{height}{height of the image, default is 6} 31 | 32 | \item{dpi}{dpi value, default is 300} 33 | 34 | \item{interactive}{to make figure interactive, default is FALSE. TRUE or FALSE.} 35 | } 36 | \description{ 37 | PerformMirrorPlotting 38 | } 39 | -------------------------------------------------------------------------------- /man/PerformParamsOptimization.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Optimize_params.R 3 | \name{PerformParamsOptimization} 4 | \alias{PerformParamsOptimization} 5 | \title{Perform Parameters Optimization} 6 | \usage{ 7 | PerformParamsOptimization( 8 | mSet, 9 | param = NULL, 10 | method = "DoE", 11 | ncore = 4, 12 | running.controller = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{mSet}{mSet object, usually generated by 'PerformROIExtraction' 17 | or 'PerformDataTrimming' here.} 18 | 19 | \item{param}{List, Parameters defined by 'SetPeakParam' function.} 20 | 21 | \item{method}{Character, method of parameters optimization, including 22 | "DoE' only. Default is "DoE". Other method 23 | is under development.} 24 | 25 | \item{ncore}{Numeric, CPU threads number used to perform the parallel 26 | based optimization. If thers is memory issue, 27 | please reduce the 'ncore' used here. For default, 2/3 CPU threads of 28 | total will be used.} 29 | 30 | \item{running.controller}{The resuming pipeline running controller. Optional. 31 | Don't need to define by hand.} 32 | } 33 | \value{ 34 | will a parameter object can be used for following processing 35 | } 36 | \description{ 37 | This function is used to optimize the critical 38 | parameters of peak picking and alignment for 39 | the following data processing. It utilizes the trimed data and 40 | the internal instrument-specific parameters. 41 | Parallel computing will be performed. The number of cores user 42 | want to use could be specified. 43 | } 44 | \examples{ 45 | 46 | DataFiles <- dir(system.file("mzData", package = "mtbls2"), full.names = TRUE, 47 | recursive = TRUE)[c(10:12, 14:16)] 48 | # remove the # before running the following command lines 49 | # mSet <- PerformROIExtraction(datapath = DataFiles[c(1:2)],rt.idx = 0.25, 50 | # rmConts = FALSE);#' 51 | # best_params <- PerformParamsOptimization(mSet, param = SetPeakParam(), ncore = 4); 52 | } 53 | \author{ 54 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} 55 | Jeff Xia \email{jeff.xia@mcgill.ca} 56 | Mcgill University 57 | License: GNU GPL (>= 2) 58 | } 59 | -------------------------------------------------------------------------------- /man/PerformPeakAlignment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_Utils.R 3 | \name{PerformPeakAlignment} 4 | \alias{PerformPeakAlignment} 5 | \title{PerformPeakAlignment} 6 | \usage{ 7 | PerformPeakAlignment(mSet) 8 | } 9 | \arguments{ 10 | \item{mSet}{the mSet object generated by PerformPeakPicking function.} 11 | } 12 | \value{ 13 | will return an mSet object with peak aligned done 14 | } 15 | \description{ 16 | PerformPeakAlignment 17 | } 18 | \examples{ 19 | data(mSet); 20 | newPath <- dir(system.file("mzData", package = "mtbls2"), 21 | full.names = TRUE, recursive = TRUE)[c(10, 11, 12)] 22 | mSet <- updateRawSpectraPath(mSet, newPath); 23 | mSet <- PerformPeakAlignment(mSet); 24 | } 25 | \references{ 26 | Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787 27 | } 28 | \author{ 29 | Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca} 30 | McGill University, Canada 31 | License: GNU GPL (>= 2) 32 | } 33 | -------------------------------------------------------------------------------- /man/PerformPeakAnnotation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Perform_functions.R 3 | \name{PerformPeakAnnotation} 4 | \alias{PerformPeakAnnotation} 5 | \title{Perform peak annotation} 6 | \usage{ 7 | PerformPeakAnnotation(mSet, annotaParam, ncore = 1, running.controller = NULL) 8 | } 9 | \arguments{ 10 | \item{mSet}{mSet object, usually generated by 'PerformPeakProfiling' here.} 11 | 12 | \item{annotaParam}{The object created using the SetAnnotationParam function, 13 | containing user's specified or default parameters for downstream 14 | raw MS data pre-processing.} 15 | 16 | \item{ncore}{annotation running core. Default is 1. Parallel running will be supported soon.} 17 | 18 | \item{running.controller}{The resuming pipeline running controller. Optional. Don't need to define by hand.} 19 | } 20 | \value{ 21 | will return an mSet object wirh annotation finished 22 | } 23 | \description{ 24 | This function performs peak annotation on 25 | the xset object created using the PerformPeakPicking function. 26 | } 27 | \examples{ 28 | data(mSet); 29 | newPath <- dir(system.file("mzData", package = "mtbls2"), 30 | full.names = TRUE, recursive = TRUE)[c(10, 11, 12)] 31 | mSet <- updateRawSpectraPath(mSet, newPath); 32 | annParams <- SetAnnotationParam(polarity = 'positive', 33 | mz_abs_add = 0.035); 34 | 35 | ## Perform peak annotation with newly deinfed annParams 36 | # mSet <- PerformPeakAnnotation(mSet = mSet, 37 | # annotaParam = annParams, 38 | # ncore =1) 39 | } 40 | \references{ 41 | Kuhl C, Tautenhahn R, Boettcher C, Larson TR, Neumann S (2012). 42 | "CAMERA: an integrated strategy for compound spectra extraction and annotation of 43 | liquid chromatography/mass spectrometry data sets." Analytical Chemistry, 84, 283-289. 44 | http://pubs.acs.org/doi/abs/10.1021/ac202450g. 45 | } 46 | \seealso{ 47 | \code{\link{ExecutePlan}} and \code{\link{PerformPeakProfiling}} for the whole pipeline. 48 | } 49 | \author{ 50 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}, Jasmine Chong \email{jasmine.chong@mail.mcgill.ca}, 51 | and Jeff Xia \email{jeff.xia@mcgill.ca} 52 | McGill University, Canada 53 | License: GNU GPL (>= 2) 54 | } 55 | -------------------------------------------------------------------------------- /man/PerformPeakFiling.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_Utils.R 3 | \name{PerformPeakFiling} 4 | \alias{PerformPeakFiling} 5 | \title{PerformPeakFiling} 6 | \usage{ 7 | PerformPeakFiling(mSet, BPPARAM = bpparam()) 8 | } 9 | \arguments{ 10 | \item{mSet}{the mSet object generated by PerformPeakPicking function.} 11 | 12 | \item{BPPARAM}{parallel method used for data processing. Default is bpparam().} 13 | } 14 | \value{ 15 | will return an mSet object with peak gaps filled 16 | } 17 | \description{ 18 | PerformPeakFiling 19 | } 20 | \examples{ 21 | data(mSet); 22 | newPath <- dir(system.file("mzData", package = "mtbls2"), 23 | full.names = TRUE, recursive = TRUE)[c(10, 11, 12)] 24 | mSet <- updateRawSpectraPath(mSet, newPath); 25 | SetGlobalParallel(1); 26 | mSet <- PerformPeakFiling(mSet); 27 | register(bpstop()); 28 | } 29 | \references{ 30 | Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787 31 | } 32 | \author{ 33 | Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca} 34 | McGill University, Canada 35 | License: GNU GPL (>= 2) 36 | } 37 | -------------------------------------------------------------------------------- /man/PerformPeakPicking.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_Utils.R 3 | \name{PerformPeakPicking} 4 | \alias{PerformPeakPicking} 5 | \title{PerformPeakPicking} 6 | \usage{ 7 | PerformPeakPicking(mSet, BPPARAM = bpparam()) 8 | } 9 | \arguments{ 10 | \item{mSet}{the raw data object read by ImportRawMSData function.} 11 | 12 | \item{BPPARAM}{parallel method used for data processing. Default is bpparam(). Optional.} 13 | } 14 | \value{ 15 | will return an mSet object with peaks picked 16 | } 17 | \description{ 18 | This funciton is used to Perform Peak Picking on an object generated by ImportRawMSdata 19 | } 20 | \examples{ 21 | data(mSet); 22 | newPath <- dir(system.file("mzData", package = "mtbls2"), 23 | full.names = TRUE, recursive = TRUE)[c(10, 11, 12)] 24 | mSet <- updateRawSpectraPath(mSet, newPath); 25 | # mSet <- PerformPeakPicking(mSet); 26 | } 27 | \references{ 28 | Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787 29 | } 30 | \author{ 31 | Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca} 32 | McGill University, Canada 33 | License: GNU GPL (>= 2) 34 | } 35 | -------------------------------------------------------------------------------- /man/PerformPeakProfiling.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Perform_functions.R 3 | \name{PerformPeakProfiling} 4 | \alias{PerformPeakProfiling} 5 | \title{Perform peak profiling} 6 | \usage{ 7 | PerformPeakProfiling( 8 | mSet, 9 | Params = NULL, 10 | plotSettings, 11 | ncore, 12 | running.controller = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{mSet}{The object created using the ImportRawMSData function, 17 | containing the raw MS data.} 18 | 19 | \item{Params}{The object created using the SetPeakParam function, 20 | containing user's specified or default parameters for downstream 21 | raw MS data pre-processing.} 22 | 23 | \item{plotSettings}{List, plotting parameters produced by SetPlotParam Function. 24 | Defaut is set to true.} 25 | 26 | \item{ncore}{Numeric, used to define the cores' number for Peak Profiling.} 27 | 28 | \item{running.controller}{The resuming pipeline running controller. Optional. Don't need to define by hand.} 29 | } 30 | \value{ 31 | will return a complete mSet object with the whole processes finished 32 | } 33 | \description{ 34 | This function performs feature extraction of user's raw MS data using 35 | the rawData object created using the ImportRawMSData function. 36 | } 37 | \examples{ 38 | ##' Get raw spectra files 39 | DataFiles <- dir(system.file("mzData", package = "mtbls2"), full.names = TRUE, 40 | recursive = TRUE)[c(10:12, 14:16)] 41 | ##' Create a phenodata data.frame 42 | pd <- data.frame(sample_name = sub(basename(DataFiles), pattern = ".mzData", 43 | replacement = "", fixed = TRUE), 44 | sample_group = c(rep("col0", 3), rep("cyp79", 3)), 45 | stringsAsFactors = FALSE) 46 | ##' Import raw spectra 47 | mSet <- ImportRawMSData(path = DataFiles, metadata = pd); 48 | 49 | ##' Perform spectra profiling 50 | mSet <- PerformPeakProfiling(mSet, Params = SetPeakParam(ppm = 15, 51 | bw = 10, 52 | mzdiff = 0.001, 53 | max_peakwidth = 15, 54 | min_peakwidth = 10), 55 | ncore = 2, 56 | plotSettings = SetPlotParam(Plot = TRUE)) 57 | 58 | ##' Set peak annotation parameters 59 | annParams <- SetAnnotationParam(polarity = 'positive', 60 | mz_abs_add = 0.035); 61 | 62 | ##' Perform peak annotation 63 | mSet <- PerformPeakAnnotation(mSet = mSet, 64 | annotaParam = annParams, 65 | ncore =1) 66 | 67 | ##' Format the PeakList 68 | mSet <- FormatPeakList(mSet = mSet, 69 | annParams, 70 | filtIso =FALSE, 71 | filtAdducts = FALSE, 72 | missPercent = 1) 73 | 74 | ##' Export the annotation result 75 | Export.Annotation(mSet, path = tempdir()); 76 | 77 | ##' Export the Peak Table 78 | Export.PeakTable(mSet, path = tempdir()); 79 | 80 | ##' Export the Peak summary 81 | Export.PeakSummary(mSet, path = tempdir()) 82 | } 83 | \author{ 84 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}, Jasmine Chong \email{jasmine.chong@mail.mcgill.ca}, 85 | and Jeff Xia \email{jeff.xia@mcgill.ca} 86 | McGill University, Canada 87 | License: GNU GPL (>= 2) 88 | } 89 | -------------------------------------------------------------------------------- /man/PerformROIExtraction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ROI_Extraction.R 3 | \name{PerformROIExtraction} 4 | \alias{PerformROIExtraction} 5 | \title{Perform ROI Extraction from raw MS data} 6 | \usage{ 7 | PerformROIExtraction( 8 | datapath, 9 | mode = "ssm", 10 | write = FALSE, 11 | mz, 12 | mzdiff, 13 | rt, 14 | rtdiff, 15 | rt.idx = 1/15, 16 | rmConts = TRUE, 17 | plot = TRUE, 18 | running.controller = NULL 19 | ) 20 | } 21 | \arguments{ 22 | \item{datapath}{Character, the path of the raw MS data files' or folder's path (.mzXML, .CDF and .mzML) 23 | for parameters training.} 24 | 25 | \item{mode}{Character, mode for data trimming to select the chraracteristic peaks. 26 | Default is 'ssm'. Users could select random trimed according to mz value (mz_random) or 27 | RT value (rt_random). Besides, specific peaks at certain mz (mz_specific) or 28 | RT (rt_specific) could also be extracted. 'none' will not trim the data.} 29 | 30 | \item{write}{Logical, if true, will write the trimmed data to the directory 'trimmed' folder 31 | in the datapath. The data in memory will be kept.} 32 | 33 | \item{mz}{Numeric, mz value(s) for specific selection. Positive values means including (the values 34 | indicted) and negative value means excluding/removing.} 35 | 36 | \item{mzdiff}{Numeric, the deviation (ppm) of mz value(s).} 37 | 38 | \item{rt}{Numeric, rt value for specific selection. Positive values means including 39 | and negative value means excluding.} 40 | 41 | \item{rtdiff}{Numeric, the deviation (seconds) of rt value(s).} 42 | 43 | \item{rt.idx}{Numeric, the relative rt (retention time) range, from 0 to 1. 1 means all retention time 44 | will be retained, while 0 means none. Default is 1/15. If default rt.idx produce too few peaks, 45 | please consider increasing this value.} 46 | 47 | \item{rmConts}{LOgical, whether to exclude/remove the potential contamination for parameters optimization. Default is TRUE.} 48 | 49 | \item{plot}{Logical, if TRUE, will plot the chromatogram of the trimmed data.} 50 | 51 | \item{running.controller}{The resuming pipeline running controller. Optional. Don't need to define by hand.} 52 | } 53 | \value{ 54 | will return an mSet objects with extracted ROI 55 | } 56 | \description{ 57 | This function performs the raw data trimming. This function will output 58 | an trimmed MSnExp file to memory or hardisk according to the choice of users must 59 | provide the data path for 'datapath', and optionally provide other corresponding parameters. 60 | } 61 | \examples{ 62 | DataFiles <- dir(system.file("mzData", package = "mtbls2"), full.names = TRUE, recursive = TRUE) 63 | # mSet <- PerformROIExtraction(datapath = DataFiles[1],rt.idx = 0.025, rmConts = FALSE); 64 | } 65 | \seealso{ 66 | \code{\link{PerformDataTrimming}} for the old version of this function. 67 | } 68 | \author{ 69 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} Jeff Xia \email{jeff.xia@mcgill.ca} 70 | Mcgill University 71 | License: GNU GPL (>= 2) 72 | } 73 | -------------------------------------------------------------------------------- /man/PerformResultsExport.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MSn_processing.R 3 | \name{PerformResultsExport} 4 | \alias{PerformResultsExport} 5 | \title{PerformResultsExport} 6 | \usage{ 7 | PerformResultsExport( 8 | mSet = NULL, 9 | type = 0L, 10 | topN = 10L, 11 | ncores = 1L, 12 | lipids = F 13 | ) 14 | } 15 | \arguments{ 16 | \item{mSet}{} 17 | 18 | \item{type}{} 19 | 20 | \item{topN}{} 21 | 22 | \item{ncores}{} 23 | 24 | \item{lipids}{} 25 | } 26 | \value{ 27 | mSet Object 28 | } 29 | \description{ 30 | PerformResultsExport 31 | } 32 | \examples{ 33 | to add 34 | } 35 | -------------------------------------------------------------------------------- /man/PerformSpectrumConsenus.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MSn_processing.R 3 | \name{PerformSpectrumConsenus} 4 | \alias{PerformSpectrumConsenus} 5 | \title{PerformSpectrumConsenus} 6 | \usage{ 7 | PerformSpectrumConsenus( 8 | mSet = NULL, 9 | ppm2, 10 | concensus_fraction = 0.5, 11 | database_path = "", 12 | use_rt = FALSE, 13 | user_dbCorrection = TRUE, 14 | useEntropy = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{mSet}{} 19 | 20 | \item{ppm2}{} 21 | 22 | \item{concensus_fraction}{} 23 | 24 | \item{database_path}{} 25 | 26 | \item{use_rt}{} 27 | 28 | \item{user_dbCorrection}{} 29 | } 30 | \value{ 31 | mSet Object 32 | } 33 | \description{ 34 | PerformSpectrumConsenus 35 | } 36 | \examples{ 37 | to add 38 | } 39 | -------------------------------------------------------------------------------- /man/PlotSpectraBPIadj.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_visulization.R 3 | \name{PlotSpectraBPIadj} 4 | \alias{PlotSpectraBPIadj} 5 | \title{PlotSpectraBPIadj} 6 | \usage{ 7 | PlotSpectraBPIadj(mSet = NULL, imgName, format = "png", dpi = 72, width = NA) 8 | } 9 | \arguments{ 10 | \item{mSet}{mSet object, usually generated after the peakannotaion finished here.} 11 | 12 | \item{imgName}{Character, to give the name of BPI figures ploted.} 13 | 14 | \item{format}{Character, to give the format of BPI figures ploted. Can be "jpeg", "png", "pdf", "svg", 15 | "tiff" or "ps". Default is "png".} 16 | 17 | \item{dpi}{Numeric, to define the dpi of the figures. Default is 72.} 18 | 19 | \item{width}{Numeric, to define the width of the figure. Height = width * 0.618.} 20 | } 21 | \value{ 22 | will return a figure of adjusted BPIs 23 | } 24 | \description{ 25 | This function is used to plot the adjust BPI (Base Peak Ion) 26 | } 27 | \examples{ 28 | data(mSet); 29 | newPath <- dir(system.file("mzData", package = "mtbls2"), 30 | full.names = TRUE, recursive = TRUE)[c(10, 11, 12)] 31 | mSet <- updateRawSpectraPath(mSet, newPath); 32 | PlotSpectraBPIadj(mSet); 33 | } 34 | -------------------------------------------------------------------------------- /man/PlotSpectraInsensityStistics.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_visulization.R 3 | \name{PlotSpectraInsensityStistics} 4 | \alias{PlotSpectraInsensityStistics} 5 | \title{PlotSpectraInsensityStistics} 6 | \usage{ 7 | PlotSpectraInsensityStistics( 8 | mSet = NULL, 9 | imgName, 10 | format = "png", 11 | dpi = 72, 12 | width = NA 13 | ) 14 | } 15 | \arguments{ 16 | \item{mSet}{mSet object, usually generated after the peakannotaion finished here.} 17 | 18 | \item{imgName}{Character, to give the name of BPI figures ploted.} 19 | 20 | \item{format}{Character, to give the format of BPI figures ploted. Can be "jpeg", "png", "pdf", "svg", 21 | "tiff" or "ps". Default is "png".} 22 | 23 | \item{dpi}{Numeric, to define the dpi of the figures. Default is 72.} 24 | 25 | \item{width}{Numeric, to define the width of the figure. Height = width * 0.618.} 26 | } 27 | \value{ 28 | will return a figure of spectral peak intensity 29 | } 30 | \description{ 31 | This function is used to do the statistics on the spectra intensity 32 | } 33 | \examples{ 34 | data(mSet); 35 | newPath <- dir(system.file("mzData", package = "mtbls2"), 36 | full.names = TRUE, recursive = TRUE)[c(10, 11, 12)] 37 | mSet <- updateRawSpectraPath(mSet, newPath); 38 | PlotSpectraInsensityStistics(mSet); 39 | } 40 | -------------------------------------------------------------------------------- /man/PlotSpectraPCA.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_visulization.R 3 | \name{PlotSpectraPCA} 4 | \alias{PlotSpectraPCA} 5 | \title{PlotSpectraPCA} 6 | \usage{ 7 | PlotSpectraPCA(mSet = NULL, imgName, format = "png", dpi = 72, width = NA) 8 | } 9 | \arguments{ 10 | \item{mSet}{mSet object, usually generated after the peakannotaion finished here.} 11 | 12 | \item{imgName}{Character, to give the name of BPI figures ploted.} 13 | 14 | \item{format}{Character, to give the format of BPI figures ploted. Can be "jpeg", "png", "pdf", "svg", 15 | "tiff" or "ps". Default is "png".} 16 | 17 | \item{dpi}{Numeric, to define the dpi of the figures. Default is 72.} 18 | 19 | \item{width}{Numeric, to define the width of the figure. Height = width * 0.618.} 20 | } 21 | \value{ 22 | will return a figure of PCA after log tranformation (log2) 23 | } 24 | \description{ 25 | This function is used to plot the PCA of all spectra 26 | } 27 | \examples{ 28 | data(mSet); 29 | newPath <- dir(system.file("mzData", package = "mtbls2"), 30 | full.names = TRUE, recursive = TRUE)[c(10, 11, 12)] 31 | mSet <- updateRawSpectraPath(mSet, newPath); 32 | PlotSpectraPCA(mSet); 33 | } 34 | -------------------------------------------------------------------------------- /man/PlotSpectraRTadj.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_visulization.R 3 | \name{PlotSpectraRTadj} 4 | \alias{PlotSpectraRTadj} 5 | \title{PlotSpectraRTadj} 6 | \usage{ 7 | PlotSpectraRTadj(mSet = NULL, imgName, format = "png", dpi = 72, width = NA) 8 | } 9 | \arguments{ 10 | \item{mSet}{mSet object, usually generated after the peakannotaion finished here.} 11 | 12 | \item{imgName}{Character, to give the name of BPI figures ploted.} 13 | 14 | \item{format}{Character, to give the format of BPI figures ploted. Can be "jpeg", "png", "pdf", "svg", 15 | "tiff" or "ps". Default is "png".} 16 | 17 | \item{dpi}{Numeric, to define the dpi of the figures. Default is 72.} 18 | 19 | \item{width}{Numeric, to define the width of the figure. Height = width * 0.618.} 20 | } 21 | \value{ 22 | will return a figure of spectral adjustment of retention time 23 | } 24 | \description{ 25 | This function is used to plot the adjustment of retention time of all spectra 26 | } 27 | \examples{ 28 | data(mSet); 29 | newPath <- dir(system.file("mzData", package = "mtbls2"), 30 | full.names = TRUE, recursive = TRUE)[c(10, 11, 12)] 31 | mSet <- updateRawSpectraPath(mSet, newPath); 32 | PlotSpectraRTadj(mSet); 33 | } 34 | -------------------------------------------------------------------------------- /man/PlotXIC.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_visulization.R 3 | \name{PlotXIC} 4 | \alias{PlotXIC} 5 | \title{PlotXIC/EIC} 6 | \usage{ 7 | PlotXIC( 8 | mSet = NULL, 9 | featureNum, 10 | sample_labeled, 11 | Group_labeled, 12 | format, 13 | dpi, 14 | width, 15 | height, 16 | sample_filled, 17 | group_filled 18 | ) 19 | } 20 | \arguments{ 21 | \item{mSet}{mSet Object. Should contain the spectra processing result.} 22 | 23 | \item{featureNum}{Numeric, Feature number in the feature table.} 24 | 25 | \item{sample_labeled}{Logical, whether to lable the sample name.} 26 | 27 | \item{Group_labeled}{Logical, whether to lable the group name.} 28 | 29 | \item{format}{Character, to give the format of BPI figures ploted. Can be "jpeg", "png", "pdf", "svg", 30 | "tiff" or "ps". Default is "png".} 31 | 32 | \item{dpi}{Numeric, to define the dpi of the figures. Default is 72.} 33 | 34 | \item{width}{Numeric, to define the width of the figure.} 35 | 36 | \item{height}{Numeric, to define the height of the figure.} 37 | 38 | \item{sample_filled}{Logical, to determine the EIC/XIC is filled or not for sample EIC} 39 | 40 | \item{group_filled}{Logical, to determine the EIC/XIC is filled or not for group EIC} 41 | } 42 | \value{ 43 | will return a figure of EIC/XIC 44 | } 45 | \description{ 46 | This functionn creates an extracted ion chromatogram (XIC/EIC) for a specific 47 | m/z and retention time. This is used for quality-control of raw m/s data. 48 | } 49 | \examples{ 50 | data(mSet); 51 | newPath <- dir(system.file("mzData", package = "mtbls2"), 52 | full.names = TRUE, recursive = TRUE)[c(10, 11, 12)] 53 | mSet <- updateRawSpectraPath(mSet, newPath); 54 | #PlotXIC(mSet, 1, TRUE, TRUE); 55 | } 56 | -------------------------------------------------------------------------------- /man/SetAnnotationParam.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Perform_functions.R 3 | \name{SetAnnotationParam} 4 | \alias{SetAnnotationParam} 5 | \title{Set annotation parameters} 6 | \usage{ 7 | SetAnnotationParam( 8 | polarity = "positive", 9 | perc_fwhm = 0.6, 10 | mz_abs_iso = 0.005, 11 | max_charge = 2, 12 | max_iso = 2, 13 | corr_eic_th = 0.85, 14 | mz_abs_add = 0.001, 15 | adducts = NULL 16 | ) 17 | } 18 | \arguments{ 19 | \item{polarity}{Character, specify the polarity of the MS instrument. Either 20 | "negative" or "positive".} 21 | 22 | \item{perc_fwhm}{Numeric, set the percentage of the width of the FWHM for peak grouping. 23 | Default is set to 0.6.} 24 | 25 | \item{mz_abs_iso}{Numeric, set the allowed variance for the search (for isotope annotation). 26 | The default is set to 0.005.} 27 | 28 | \item{max_charge}{Numeric, set the maximum number of the isotope charge. For example, 29 | the default is 2, therefore the max isotope charge is 2+/-.} 30 | 31 | \item{max_iso}{Numeric, set the maximum number of isotope peaks. For example, the default 32 | is 2, therefore the max number of isotopes per peaks is 2.} 33 | 34 | \item{corr_eic_th}{Numeric, set the threshold for intensity correlations across samples. 35 | Default is set to 0.85.} 36 | 37 | \item{mz_abs_add}{Numeric, set the allowed variance for the search (for adduct annotation). 38 | The default is set to 0.001.} 39 | 40 | \item{adducts}{Character, specify the adducts based on your instrument settings.} 41 | } 42 | \value{ 43 | will return a annotation parameter set for following annotation steps 44 | } 45 | \description{ 46 | This function sets the parameters for peak annotation. 47 | } 48 | \examples{ 49 | ##' Set peak annotation parameters 50 | annParams <- SetAnnotationParam(polarity = 'positive', 51 | mz_abs_add = 0.035); 52 | 53 | ##' Please check the example of PerformPeakProfiling 54 | ##' and ExcutePlan for the whole running pipeline. 55 | } 56 | \seealso{ 57 | \code{\link{ExecutePlan}} and \code{\link{PerformPeakProfiling}} for the whole pipeline. 58 | } 59 | \author{ 60 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}, Jasmine Chong \email{jasmine.chong@mail.mcgill.ca}, 61 | and Jeff Xia \email{jeff.xia@mcgill.ca} 62 | McGill University, Canada 63 | License: GNU GPL (>= 2) 64 | } 65 | -------------------------------------------------------------------------------- /man/SetGlobalParallel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Global_setting.R 3 | \name{SetGlobalParallel} 4 | \alias{SetGlobalParallel} 5 | \title{SetGlobalParallel} 6 | \usage{ 7 | SetGlobalParallel(ncore = 1) 8 | } 9 | \arguments{ 10 | \item{ncore}{Numeric, used to set the global core numbers, default is 1} 11 | } 12 | \value{ 13 | void function, return nothing but will define the global parallel number 14 | } 15 | \description{ 16 | SetGlobalParallel used to set the global core numbers 17 | } 18 | \examples{ 19 | SetGlobalParallel(1); 20 | register(bpstop()); 21 | } 22 | \author{ 23 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}, 24 | Jasmine Chong \email{jasmine.chong@mail.mcgill.ca}, 25 | and Jeff Xia \email{jeff.xia@mcgill.ca} 26 | McGill University, Canada 27 | License: GNU GPL (>= 2) 28 | } 29 | -------------------------------------------------------------------------------- /man/SetPlotParam.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_visulization.R 3 | \name{SetPlotParam} 4 | \alias{SetPlotParam} 5 | \title{SetPlotParam} 6 | \usage{ 7 | SetPlotParam( 8 | Plot = FALSE, 9 | labels = TRUE, 10 | format = "png", 11 | dpi = 72, 12 | width = 9, 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{Plot}{Logical, if true, the function will plot internal figures for different functions.} 18 | 19 | \item{labels}{Logical, if true, the labels in the plot will be added.} 20 | 21 | \item{format}{Numeric, input the format of the image to create.} 22 | 23 | \item{dpi}{Numeric, input the dpi of the image to create.} 24 | 25 | \item{width}{Numeric, input the width of the image to create.} 26 | 27 | \item{...}{Other specific parameters for specific function. Please set them according to the corresponding function.} 28 | } 29 | \value{ 30 | will return a plotting parameters set 31 | } 32 | \description{ 33 | This function sets the generic Plotting Parameters for different functions 34 | } 35 | \examples{ 36 | SetPlotParam(Plot = TRUE, dpi = 144, width = 12) 37 | } 38 | \author{ 39 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}, and Jeff Xia \email{jeff.xia@mcgill.ca} 40 | McGill University, Canada 41 | License: GNU GPL (>= 2) 42 | } 43 | -------------------------------------------------------------------------------- /man/UpdateRawfiles.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_import.R 3 | \name{UpdateRawfiles} 4 | \alias{UpdateRawfiles} 5 | \title{UpdateRawfiles} 6 | \usage{ 7 | UpdateRawfiles(mSet = NULL, filesIncluded = NULL) 8 | } 9 | \arguments{ 10 | \item{mSet}{mSet objects generated 11 | with \"mSet <- InitDataObjects(\"spec\", \"raw\", FALSE)\", or omitted.} 12 | 13 | \item{filesIncluded}{filesIncluded is a vector containing the files' 14 | paths for the following processing;} 15 | } 16 | \value{ 17 | will return an mSet object with raw files updated 18 | } 19 | \description{ 20 | Update the Raw spectra included for Processing. 21 | All wrong format and uncentroided files will be filtered. 22 | NOTE: this function is only effective before data import stage 23 | AND can NOT be used for resuming pipeline. 24 | } 25 | \examples{ 26 | ### Example 1 --- 27 | data(mSet) 28 | newfiles <- dir(system.file("mzData", package = "mtbls2"), 29 | full.names = TRUE, recursive = TRUE)[c(14:16)] 30 | mSet <- UpdateRawfiles(mSet, filesIncluded = newfiles) 31 | 32 | ### Example 2 --- 33 | ## load googledrive package to download example data 34 | # library("googledrive"); 35 | # data_folder_Sample <- "Raw_data_example" 36 | # temp <- tempfile(fileext = ".zip"); 37 | ## Please authorize the package to download the data from web 38 | # dl <- drive_download(as_id("1CjEPed1WZrwd5T3Ovuic1KVF-Uz13NjO"), path = temp, overwrite = TRUE); 39 | # out <- unzip(temp, exdir = data_folder_Sample); 40 | # out; 41 | # mSet<-InitDataObjects("spec", "raw", FALSE); 42 | ## include only two samples CD_SM-77FXR.mzML and CD_SM-6KUCT.mzML for data import. 43 | # mSet<-UpdateRawfiles(mSet, c("Raw_data_example/CD/CD_SM-77FXR.mzML", 44 | # "Raw_data_example/CD/CD_SM-6KUCT.mzML")) 45 | } 46 | \author{ 47 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} and 48 | Jeff Xia \email{jeff.xia@mcgill.ca} 49 | McGill University, Canada 50 | License: GNU GPL (>= 2) 51 | } 52 | -------------------------------------------------------------------------------- /man/calculate_entropy_similarity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{calculate_entropy_similarity} 4 | \alias{calculate_entropy_similarity} 5 | \title{Entropy similarity between two spectra} 6 | \usage{ 7 | calculate_entropy_similarity( 8 | peaks_a, 9 | peaks_b, 10 | ms2_tolerance_in_da, 11 | ms2_tolerance_in_ppm, 12 | clean_spectra, 13 | min_mz, 14 | max_mz, 15 | noise_threshold, 16 | max_peak_num 17 | ) 18 | } 19 | \arguments{ 20 | \item{peaks_a}{A matrix of spectral peaks, with two columns: mz and intensity} 21 | 22 | \item{peaks_b}{A matrix of spectral peaks, with two columns: mz and intensity} 23 | 24 | \item{ms2_tolerance_in_da}{The MS2 tolerance in Da, set to -1 to disable} 25 | 26 | \item{ms2_tolerance_in_ppm}{The MS2 tolerance in ppm, set to -1 to disable} 27 | 28 | \item{clean_spectra}{Whether to clean the spectra before calculating the entropy similarity, see \code{\link{clean_spectrum}}} 29 | 30 | \item{min_mz}{The minimum mz value to keep, set to -1 to disable} 31 | 32 | \item{max_mz}{The maximum mz value to keep, set to -1 to disable} 33 | 34 | \item{noise_threshold}{The noise threshold, set to -1 to disable, all peaks have intensity < noise_threshold * max_intensity will be removed} 35 | 36 | \item{max_peak_num}{The maximum number of peaks to keep, set to -1 to disable} 37 | } 38 | \value{ 39 | The entropy similarity 40 | } 41 | \description{ 42 | Calculate the entropy similarity between two spectra 43 | } 44 | \examples{ 45 | mz_a <- c(169.071, 186.066, 186.0769) 46 | intensity_a <- c(7.917962, 1.021589, 100.0) 47 | mz_b <- c(120.212, 169.071, 186.066) 48 | intensity_b <- c(37.16, 66.83, 999.0) 49 | peaks_a <- matrix(c(mz_a, intensity_a), ncol = 2, byrow = FALSE) 50 | peaks_b <- matrix(c(mz_b, intensity_b), ncol = 2, byrow = FALSE) 51 | calculate_entropy_similarity(peaks_a, peaks_b, 52 | ms2_tolerance_in_da = 0.02, ms2_tolerance_in_ppm = -1, 53 | clean_spectra = TRUE, min_mz = 0, max_mz = 1000, 54 | noise_threshold = 0.01, 55 | max_peak_num = 100) 56 | 57 | } 58 | -------------------------------------------------------------------------------- /man/calculate_unweighted_entropy_similarity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{calculate_unweighted_entropy_similarity} 4 | \alias{calculate_unweighted_entropy_similarity} 5 | \title{Unweighted entropy similarity between two spectra} 6 | \usage{ 7 | calculate_unweighted_entropy_similarity( 8 | peaks_a, 9 | peaks_b, 10 | ms2_tolerance_in_da, 11 | ms2_tolerance_in_ppm, 12 | clean_spectra, 13 | min_mz, 14 | max_mz, 15 | noise_threshold, 16 | max_peak_num 17 | ) 18 | } 19 | \arguments{ 20 | \item{peaks_a}{A matrix of spectral peaks, with two columns: mz and intensity} 21 | 22 | \item{peaks_b}{A matrix of spectral peaks, with two columns: mz and intensity} 23 | 24 | \item{ms2_tolerance_in_da}{The MS2 tolerance in Da, set to -1 to disable} 25 | 26 | \item{ms2_tolerance_in_ppm}{The MS2 tolerance in ppm, set to -1 to disable} 27 | 28 | \item{clean_spectra}{Whether to clean the spectra before calculating the entropy similarity, see \code{\link{clean_spectrum}}} 29 | 30 | \item{min_mz}{The minimum mz value to keep, set to -1 to disable} 31 | 32 | \item{max_mz}{The maximum mz value to keep, set to -1 to disable} 33 | 34 | \item{noise_threshold}{The noise threshold, set to -1 to disable, all peaks have intensity < noise_threshold * max_intensity will be removed} 35 | 36 | \item{max_peak_num}{The maximum number of peaks to keep, set to -1 to disable} 37 | } 38 | \value{ 39 | The unweighted entropy similarity 40 | } 41 | \description{ 42 | Calculate the unweighted entropy similarity between two spectra 43 | } 44 | \examples{ 45 | mz_a <- c(169.071, 186.066, 186.0769) 46 | intensity_a <- c(7.917962, 1.021589, 100.0) 47 | mz_b <- c(120.212, 169.071, 186.066) 48 | intensity_b <- c(37.16, 66.83, 999.0) 49 | peaks_a <- matrix(c(mz_a, intensity_a), ncol = 2, byrow = FALSE) 50 | peaks_b <- matrix(c(mz_b, intensity_b), ncol = 2, byrow = FALSE) 51 | calculate_unweighted_entropy_similarity(peaks_a, peaks_b, 52 | ms2_tolerance_in_da = 0.02, ms2_tolerance_in_ppm = -1, 53 | clean_spectra = TRUE, min_mz = 0, max_mz = 1000, 54 | noise_threshold = 0.01, 55 | max_peak_num = 100) 56 | 57 | } 58 | -------------------------------------------------------------------------------- /man/mSet.Rd: -------------------------------------------------------------------------------- 1 | \name{mSet} 2 | \alias{mSet} 3 | \docType{data} 4 | \title{ 5 | LC/MS data mSet object example 6 | } 7 | \description{ 8 | Comparative LC/MS-based profiling of silver nitrate-treated Arabidopsis thaliana leaves of wild-type and cyp79B2 cyp79B3 double knockout plants. Böttcher et al. (2004) 9 | } 10 | \usage{data("mSet")} 11 | \references{ 12 | Chambers, C. M, Maclean, Brendan, Burke, Robert, Amodei, Dario, Ruderman, L. D, Neumann, Steffen, Gatto, Laurent, Fischer, Bernd, Pratt, Brian, Egertson, Jarrett, Hoff, Katherine, Kessner, Darren, Tasman, Natalie, Shulman, Nicholas, Frewen, Barbara, Baker, A. T, Brusniak, Mi-Youn, Paulse, Christopher, Creasy, David, Flashner, Lisa, Kani, Kian, Moulding, Chris, Seymour, L. S, Nuwaysir, M. L, Lefebvre, Brent, Kuhlmann, Frank, Roark, Joe, Rainer, Paape, Detlev, Suckau, Hemenway, Tina, Huhmer, Andreas, Langridge, James, Connolly, Brian, Chadick, Trey, Holly, Krisztina, Eckels, Josh, Deutsch, W. E, Moritz, L. R, Katz, E. J, Agus, B. D, MacCoss, Michael, Tabb, L. D, Mallick, Parag (2012). “A cross-platform toolkit for mass spectrometry and proteomics.” Nat Biotech, 30(10), 918–920. doi: 10.1038/nbt.2377, http://dx.doi.org/10.1038/nbt.2377. 13 | 14 | Martens L, Chambers M, Sturm M, Kessner D, Levander F, Shofstahl J, Tang WH, Rompp A, Neumann S, Pizarro AD, Montecchi-Palazzi L, Tasman N, Coleman M, Reisinger F, Souda P, Hermjakob H, Binz P, Deutsch EW (2010). “mzML - a Community Standard for Mass Spectrometry Data.” Mol Cell Proteomics. doi: 10.1074/mcp.R110.000133. 15 | 16 | Pedrioli PGA, Eng JK, Hubley R, Vogelzang M, Deutsch EW, Raught B, Pratt B, Nilsson E, Angeletti RH, Apweiler R, Cheung K, Costello CE, Hermjakob H, Huang S, Julian RK, Kapp E, McComb ME, Oliver SG, Omenn G, Paton NW, Simpson R, Smith R, Taylor CF, Zhu W, Aebersold R (2004). “A common open representation of mass spectrometry data and its application to proteomics research.” Nat Biotechnol, 22(11), 1459–1466. doi: 10.1038/nbt1031. 17 | 18 | Keller A, Eng J, Zhang N, Li X, Aebersold R (2005). “A uniform proteomics MS/MS analysis platform utilizing open XML file formats.” Mol Syst Biol. 19 | 20 | Kessner D, Chambers M, Burke R, Agus D, Mallick P (2008). “ProteoWizard: open source software for rapid proteomics tools development.” Bioinformatics, 24(21), 2534–2536. doi: 10.1093/bioinformatics/btn323. 21 | } 22 | \examples{ 23 | data(mSet) 24 | ## maybe str(mSet) ; plot(mSet) ... 25 | } 26 | \keyword{datasets} 27 | -------------------------------------------------------------------------------- /man/plotBPIs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_visulization.R 3 | \name{plotBPIs} 4 | \alias{plotBPIs} 5 | \title{plotBPIs} 6 | \usage{ 7 | plotBPIs(mSet = NULL, imgName, format = "png", dpi = 72, width = NA) 8 | } 9 | \arguments{ 10 | \item{mSet}{mSet Object, should be processed by ImportMSData.} 11 | 12 | \item{imgName}{Character, to give the filename for the TIC plotting.} 13 | 14 | \item{format}{Character, to give the format of BPI figures ploted. Can be "jpeg", "png", "pdf", "svg", 15 | "tiff" or "ps". Default is "png". (only works for web version)} 16 | 17 | \item{dpi}{Numeric, to define the dpi of the figures. Default is 72. (only works for web version)} 18 | 19 | \item{width}{Numeric, width of the figure (default is NA, usually set it as 6~12)} 20 | } 21 | \value{ 22 | will return a figure of BPIs 23 | } 24 | \description{ 25 | plotBPIs is used to plot the BPI of all files 26 | } 27 | \examples{ 28 | newPath <- dir(system.file("mzData", package = "mtbls2"), 29 | full.names = TRUE, recursive = TRUE)[c(10:12)] 30 | data(mSet) 31 | mSet <- updateRawSpectraPath(mSet, newPath) 32 | plotBPIs(mSet) 33 | } 34 | -------------------------------------------------------------------------------- /man/plotMSfeature.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_visulization.R 3 | \name{plotMSfeature} 4 | \alias{plotMSfeature} 5 | \title{plotMSfeature} 6 | \usage{ 7 | plotMSfeature(mSet = NULL, FeatureNM = 1, dpi = 72, format = "png", width = NA) 8 | } 9 | \arguments{ 10 | \item{mSet}{mSet Object, should be processed aby 'PerformPeakProfiling'.} 11 | 12 | \item{FeatureNM}{Numeric, feature number in the feature table.} 13 | 14 | \item{dpi}{Numeric, to define the dpi of the figures. Default is 72. (only works for web version)} 15 | 16 | \item{format}{Character, to give the format of BPI figures ploted. Can be "jpeg", "png", "pdf", "svg", 17 | "tiff" or "ps". Default is "png". (only works for web version)} 18 | 19 | \item{width}{Numeric, width of the figure (default is NA, usually set it as 6~12)} 20 | } 21 | \value{ 22 | will return a figure of ms stats 23 | } 24 | \description{ 25 | plotMSfeature is used to plot the feature intensity of different groups 26 | } 27 | \examples{ 28 | data(mSet); 29 | newPath <- dir(system.file("mzData", package = "mtbls2"), 30 | full.names = TRUE, recursive = TRUE)[c(10, 11, 12)] 31 | mSet <- updateRawSpectraPath(mSet, newPath); 32 | plotMSfeature (mSet, 1); # Here is only one group 33 | } 34 | -------------------------------------------------------------------------------- /man/plotSingleTIC.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_visulization.R 3 | \name{plotSingleTIC} 4 | \alias{plotSingleTIC} 5 | \title{plotSingleTIC} 6 | \usage{ 7 | plotSingleTIC( 8 | mSet = NULL, 9 | filename, 10 | imagename, 11 | dpi = 72, 12 | width = 7, 13 | format = "png" 14 | ) 15 | } 16 | \arguments{ 17 | \item{mSet}{mSet Object, should be processed by ImportMSData.} 18 | 19 | \item{filename}{Character, to give the filename for the TIC plotting.} 20 | 21 | \item{imagename}{Character, to give the filename of the TIC plotted. (only works for web version)} 22 | 23 | \item{dpi}{Numeric, dpi of the figure (default is 72, usually set it as 72, 144, 360)} 24 | 25 | \item{width}{Numeric, width of the figure (default is 7, usually set it as 6~12)} 26 | 27 | \item{format}{Character, format of the figure (default is 'png', usually can be 'png', 'pdf','tiff','svg','eps','jpg')} 28 | } 29 | \value{ 30 | will return a figure of a single TIC 31 | } 32 | \description{ 33 | plotSingleTIC is used to plot the TIC of a certain spectra 34 | } 35 | \examples{ 36 | data(mSet); 37 | newPath <- dir(system.file("mzData", package = "mtbls2"), 38 | full.names = TRUE, recursive = TRUE)[c(10, 11, 12)] 39 | mSet <- updateRawSpectraPath(mSet, newPath); 40 | plotSingleTIC(mSet, "MSpos-Ex2-Col0-48h-Ag-2_1-A,3_01_9829.mzData", 41 | "MSpos-Ex2-Col0-48h-Ag-2_1-A,3_01_9829.png") 42 | } 43 | -------------------------------------------------------------------------------- /man/plotTICs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_visulization.R 3 | \name{plotTICs} 4 | \alias{plotTICs} 5 | \title{plotTICs} 6 | \usage{ 7 | plotTICs(mSet = NULL, imgName, format = "png", dpi = 72, width = NA) 8 | } 9 | \arguments{ 10 | \item{mSet}{mSet Object, should be processed by ImportMSData.} 11 | 12 | \item{imgName}{Character, to name the imgName for the TIC plotting.} 13 | 14 | \item{format}{Character, to give the format of BPI figures ploted. Can be "jpeg", "png", "pdf", "svg", 15 | "tiff" or "ps". Default is "png". (only works for web version)} 16 | 17 | \item{dpi}{Numeric, to define the dpi of the figures. Default is 72. (only works for web version)} 18 | 19 | \item{width}{Numeric, width of the figure (default is NA, usually set it as 6~12)} 20 | } 21 | \value{ 22 | will return a figure of TICs 23 | } 24 | \description{ 25 | plotTICs is used to plot the TIC of all files 26 | } 27 | \examples{ 28 | newPath <- dir(system.file("mzData", package = "mtbls2"), 29 | full.names = TRUE, recursive = TRUE)[c(10:12)] 30 | data(mSet) 31 | mSet <- updateRawSpectraPath(mSet, newPath) 32 | plotTICs(mSet) 33 | } 34 | -------------------------------------------------------------------------------- /man/running.plan.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Resume_functions.R 3 | \name{running.plan} 4 | \alias{running.plan} 5 | \title{running.plan} 6 | \usage{ 7 | running.plan(plan = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{plan}{ResummingPlan object. The object is generated by 'InitializaPlan' function.} 11 | 12 | \item{...}{Multiple Processing commands can be input here.} 13 | } 14 | \value{ 15 | will return a defined plan 16 | } 17 | \description{ 18 | define a plan for resumalbe running 19 | } 20 | \examples{ 21 | ##' Download the raw spectra data 22 | DataFiles <- dir(system.file("mzData", package = "mtbls2"), full.names = TRUE, 23 | recursive = TRUE)[c(10:12, 14:16)] 24 | ##' Create a phenodata data.frame 25 | pd <- data.frame(sample_name = sub(basename(DataFiles), pattern = ".mzData", 26 | replacement = "", fixed = TRUE), 27 | sample_group = c(rep("col0", 3), rep("cyp79", 3)), 28 | stringsAsFactors = FALSE) 29 | 30 | ##' Initialize your plan 31 | plan <- InitializaPlan("raw_opt") 32 | 33 | ##' Define your plan 34 | plan <- running.plan(plan, 35 | mSet <- PerformROIExtraction(datapath = DataFiles[c(1:2)], rt.idx = 0.025, 36 | plot = FALSE, rmConts = FALSE, 37 | running.controller = rc), 38 | param_initial <- SetPeakParam(), 39 | best_parameters <- PerformParamsOptimization(mSet = mSet, param_initial, 40 | ncore = 1, 41 | running.controller = rc), 42 | param <- best_parameters, 43 | plotSettings1 <- SetPlotParam(Plot=TRUE), 44 | plotSettings2 <- SetPlotParam(Plot=TRUE), 45 | mSet <- ImportRawMSData(mSet = mSet, path = DataFiles, 46 | metadata = pd, 47 | plotSettings = plotSettings1, 48 | running.controller = rc), 49 | mSet <- PerformPeakProfiling(mSet = mSet, Params = param, 50 | plotSettings = plotSettings2, ncore = 1, 51 | running.controller = rc), 52 | annParams <- SetAnnotationParam(polarity = 'negative', 53 | mz_abs_add = 0.025), 54 | mSet <- PerformPeakAnnotation(mSet = mSet, 55 | annotaParam = annParams, ncore =1, 56 | running.controller = rc), 57 | mSet <- FormatPeakList(mSet = mSet, annParams, 58 | filtIso =FALSE, filtAdducts = FALSE, 59 | missPercent = 1)); 60 | ##' Run it! 61 | # result <- ExecutePlan(plan); 62 | } 63 | \seealso{ 64 | \code{\link{ExecutePlan}} for the this resumable running pipeline. 65 | } 66 | \author{ 67 | Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} Jeff Xia \email{jeff.xia@mcgill.ca} 68 | Mcgill University 69 | License: GNU GPL (>= 2) 70 | } 71 | -------------------------------------------------------------------------------- /man/updateRawSpectraPath.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Spectra_Utils.R 3 | \name{updateRawSpectraPath} 4 | \alias{updateRawSpectraPath} 5 | \title{updateRawSpectraPath} 6 | \usage{ 7 | updateRawSpectraPath(mSet, newPath) 8 | } 9 | \arguments{ 10 | \item{mSet}{mSet object generated by ImportRawMSData or the following functions.} 11 | 12 | \item{newPath}{Character vector, a character vector specify the absolute path of the new raw spectra files.} 13 | } 14 | \value{ 15 | will return an mSet object with raw files' path updated 16 | } 17 | \description{ 18 | updateRawSpectraPath 19 | } 20 | \examples{ 21 | data(mSet); 22 | newPath <- dir(system.file("mzData", package = "mtbls2"), 23 | full.names = TRUE, recursive = TRUE)[c(10, 11, 12)] 24 | mSet <- updateRawSpectraPath(mSet, newPath); 25 | } 26 | \author{ 27 | Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca} 28 | McGill University, Canada 29 | } 30 | -------------------------------------------------------------------------------- /src/CleanSpectrum.h: -------------------------------------------------------------------------------- 1 | // SpectralEntropy.h : Include file for standard system include files, 2 | // or project specific include files. 3 | 4 | #pragma once 5 | #define __DEBUG__CLEAN_SPECTRUM__ 0 6 | 7 | #include 8 | // #define false 0 9 | // #define true 1 10 | // #define bool int 11 | // typedef int bool; // or #define bool int 12 | #ifdef SPEC_TYPE 13 | #else 14 | typedef float float_spec; 15 | #endif 16 | // static_assert(sizeof(float_spec) == 4); 17 | 18 | /** 19 | * @brief Clean the spectrum. 20 | * 21 | * The function will modify the content in the peaks in place and return the length of the cleaned peaks. 22 | * If you want to keep the original peaks, please copy it before calling this function. 23 | * 24 | * This function will clean the peaks by the following steps: 25 | * 1. Remove empty peaks (m/z <= 0 or intensity <= 0). 26 | * 2. Remove peaks with m/z >= max_mz or m/z < min_mz. 27 | * 3. Centroid the spectrum by merging peaks within min_ms2_difference_in_da or min_ms2_difference_in_ppm. 28 | * 4. Remove peaks with intensity < noise_threshold * max_intensity. 29 | * 5. Keep only the top max_peak_num peaks. 30 | * 6. Normalize the intensity to sum to 1. 31 | * 32 | * Note: The only one of min_ms2_difference_in_da and min_ms2_difference_in_ppm should be positive. 33 | 34 | * @param peaks The peaks to be cleaned. A 2D array. peaks[x][0] is the m/z, peaks[x][1] is the intensity. 35 | * @param peaks_length The length of the peaks. 36 | * @param min_mz The minimum m/z of the peaks. If set to -1, this function will not remove peaks with m/z < min_mz. 37 | * @param max_mz The maximum m/z of the peaks. If set to -1, this function will not remove peaks with m/z >= max_mz. 38 | * @param noise_threshold The noise threshold of the peaks. If set to -1, this function will not remove peaks with intensity < noise_threshold * max_intensity. 39 | * @param min_ms2_difference_in_da The minimum difference in m/z to merge peaks. If set to -1, this function will not centroid the peaks. 40 | * @param min_ms2_difference_in_ppm The minimum difference in ppm to merge peaks. If set to -1, this function will not centroid the peaks. 41 | * @param max_peak_num The maximum number of peaks to keep. If set to -1, this function will not remove peaks. 42 | * @param normalize_intensity Whether to normalize the intensity to sum to 1. 43 | * 44 | * @return int The length of the cleaned peaks. 45 | */ 46 | int clean_spectrum(float_spec* peaks, int peaks_length, 47 | float min_mz, float max_mz, 48 | float noise_threshold, 49 | float min_ms2_difference_in_da, float min_ms2_difference_in_ppm, 50 | int max_peak_num, 51 | bool normalize_intensity); 52 | 53 | void print_spectrum(const char* info, float_spec (*spectrum_2d)[2], int spectrum_len); 54 | void swap(float_spec* a, float_spec* b); 55 | void swap_int(int* a, int* b); 56 | void sort_spectrum_by_mz(float_spec (*spectrum_2d)[2], int spectrum_len); 57 | int sort_spectrum_by_mz_and_zero_intensity(float_spec (*spectrum_2d)[2], int spectrum_len); 58 | void calculate_spectrum_argsort(float_spec (*spectrum_2d)[2], int spectrum_len, int* spectrum_argsort); 59 | bool need_centroid(float_spec (*spectrum_2d)[2], int spectrum_len, float min_ms2_difference_in_da, float min_ms2_difference_in_ppm); 60 | // Centroid the spectrum, the content in the spectrum will be modified. 61 | int centroid_spectrum(float_spec (*spectrum_2d)[2], int peaks_length, float min_ms2_difference_in_da, float min_ms2_difference_in_ppm, int* spectrum_argsort); 62 | -------------------------------------------------------------------------------- /src/DecoSpectra.h: -------------------------------------------------------------------------------- 1 | #ifndef DSPEC_H 2 | #define DSPEC_H 3 | 4 | #include "DetectPeaks.h" 5 | 6 | using namespace Rcpp; 7 | using namespace std; 8 | 9 | List DecoSpectra(int idx_pg, 10 | List spectra_eics, 11 | NumericVector peak_ms1, 12 | int num_scantime, 13 | int idx_apex_eic, 14 | NumericVector info_pk_ms1, 15 | double peakwidth_min, 16 | double snthr, 17 | bool is_dec_smoothed); 18 | 19 | // 20 | // List DecoSpectra(int idx_pg, 21 | // String nm_smp, 22 | // List spectra_eics, 23 | // NumericMatrix peak_ms1, 24 | // int num_scantime, 25 | // int idx_apex_eic, 26 | // NumericVector info_pk_ms1, 27 | // int peakwidth_min, 28 | // int snthr, 29 | // bool isFWHM, 30 | // bool is_dec_all, 31 | // bool is_dec_smoothed); 32 | 33 | #endif -------------------------------------------------------------------------------- /src/DetectPeaks.h: -------------------------------------------------------------------------------- 1 | #ifndef DPEAKS_H 2 | #define DPEAKS_H 3 | 4 | #include "utilities.h" 5 | 6 | using namespace Rcpp; 7 | using namespace std; 8 | 9 | List DetectPeaks(NumericMatrix eic, double peakwidth_min, int num_scantime, int idx_apex_eic, 10 | double snthr, bool is_smooth, int n_skip_max); 11 | 12 | #endif -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX14 2 | 3 | MQOBJECTS=massifquant/xcms_massifquant.o massifquant/TrMgr.o massifquant/Tracker.o massifquant/SegProc.o massifquant/DataKeeper.o massifquant/OpOverload.o 4 | 5 | OBIOBJECTS=obiwarp/mat.o obiwarp/vec.o obiwarp/xcms_dynprog.o obiwarp/xcms_lmat.o xcms_obiwarp.o 6 | 7 | XCMSOBJECTS=mzROI.o util.o xcms_binners.o fastMatch.o init.o 8 | 9 | SQLITOBJECTS = sqlite/sqlite3.o 10 | 11 | PENSEOBJECTS = pense/robust_scale_location.o pense/rho.o pense/rcpp_parse_config.o pense/r_utilities.o pense/r_robust_utils.o pense/r_pense_regression.o pense/r_mesten_regression.o pense/r_interface_utils.o pense/r_interface.o pense/r_enpy.o pense/r_en_regression.o pense/enpy_psc.o pense/enpy_initest.o 12 | 13 | OPTMSOBJECTS = utilities.o sqlite_utilities.o optim_ultra.o lowess.o hclust_ultrafast.o dda_utilities.o spectrumConsensus.o SWATH_DIA_Deconv.o RcppExports.o PerformDIAProcess.o PerformDDAProcess.o DetectPeaks.o DecoSpectra.o linear_regression.o spectra_searching.o export_interfece.o 14 | 15 | ENTROBJECTS = entropy.o CleanSpectrum.o SpectralEntropy.o 16 | 17 | OBJECTS= $(MQOBJECTS) $(OBIOBJECTS) $(XCMSOBJECTS) $(BATCHOBJECTS) $(SQLITOBJECTS) $(PENSEOBJECTS) $(OPTMSOBJECTS) $(ENTROBJECTS) 18 | 19 | PKG_CXXFLAGS= -fstrict-aliasing 20 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 21 | 22 | .PHONY: all 23 | 24 | all: $(SHLIB) 25 | 26 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX14 2 | 3 | MQOBJECTS=massifquant/xcms_massifquant.o massifquant/TrMgr.o massifquant/Tracker.o massifquant/SegProc.o massifquant/DataKeeper.o massifquant/OpOverload.o 4 | 5 | OBIOBJECTS=obiwarp/mat.o obiwarp/vec.o obiwarp/xcms_dynprog.o obiwarp/xcms_lmat.o xcms_obiwarp.o 6 | 7 | XCMSOBJECTS=mzROI.o util.o xcms_binners.o fastMatch.o init.o 8 | 9 | SQLITOBJECTS = sqlite/sqlite3.o 10 | 11 | PENSEOBJECTS = pense/robust_scale_location.o pense/rho.o pense/rcpp_parse_config.o pense/r_utilities.o pense/r_robust_utils.o pense/r_pense_regression.o pense/r_mesten_regression.o pense/r_interface_utils.o pense/r_interface.o pense/r_enpy.o pense/r_en_regression.o pense/enpy_psc.o pense/enpy_initest.o 12 | 13 | OPTMSOBJECTS = utilities.o sqlite_utilities.o optim_ultra.o lowess.o hclust_ultrafast.o dda_utilities.o spectrumConsensus.o SWATH_DIA_Deconv.o RcppExports.o PerformDIAProcess.o PerformDDAProcess.o DetectPeaks.o DecoSpectra.o linear_regression.o spectra_searching.o export_interfece.o 14 | 15 | ENTROBJECTS = entropy.o CleanSpectrum.o SpectralEntropy.o 16 | 17 | OBJECTS= $(MQOBJECTS) $(OBIOBJECTS) $(XCMSOBJECTS) $(BATCHOBJECTS) $(SQLITOBJECTS) $(PENSEOBJECTS) $(OPTMSOBJECTS) $(ENTROBJECTS) 18 | 19 | PKG_CXXFLAGS= -fstrict-aliasing 20 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 21 | 22 | all: $(SHLIB) 23 | 24 | clean: 25 | rm -f $(OBJECTS) -------------------------------------------------------------------------------- /src/PerformDDAProcess.cpp: -------------------------------------------------------------------------------- 1 | #include "PerformDDAProcess.h" 2 | 3 | // this function is used to run everything with one or more cores 4 | List PerformDDA_main(NumericMatrix pm, 5 | NumericVector scant1, 6 | NumericVector scant2, 7 | List scanms1, 8 | List scanms2, 9 | NumericMatrix prec_mzs, 10 | double win_size, 11 | double ppm1, 12 | double ppm2, 13 | double sn, 14 | double filt, 15 | double intensity_thresh, 16 | int ionmode, 17 | std::string db_path, 18 | bool decoOn, 19 | bool useEntropy, 20 | bool showOutput, 21 | int thread_num, 22 | string file_nm) { 23 | if(showOutput){ 24 | cout << "Running into PerformDDA_main <----==== " << endl; 25 | } 26 | ddaprocess runObj; 27 | runObj.setDDA_Arguments(pm, scant1, scant2, scanms1, scanms2, prec_mzs, 28 | win_size, ppm1, ppm2, sn, filt, showOutput, thread_num, file_nm, useEntropy, 29 | 0, intensity_thresh, ionmode, db_path); 30 | runObj.PerformDDAProcess_core(decoOn, showOutput); 31 | List resList = runObj.getResultsList(); 32 | List res; 33 | 34 | // if no results, return an empty list 35 | 36 | if(resList.size() == 0){ 37 | List rs, ri; 38 | IntegerVector intf; 39 | res = List::create(Named("Spectra") = rs, 40 | Named("Indicator") = ri, 41 | Named("FeatureIdx") = intf); 42 | return res; 43 | } 44 | 45 | IntegerVector intFtvec = runObj.getFeatureIdxVec(); 46 | 47 | if(pm.ncol()>1){ 48 | // if peak matrix is from users' input, no need to return it 49 | res = List::create(Named("Spectra") = resList["Spectra"], 50 | Named("Indicator") = resList["Indicator"], 51 | Named("FeatureIdx") = intFtvec); 52 | } else { 53 | // if peak matrix is generated by this algorithm, return it 54 | pm = runObj.get_peak_matrix(); 55 | res = List::create(Named("Spectra") = resList["Spectra"], 56 | Named("Indicator") = resList["Indicator"], 57 | Named("FeatureIdx") = intFtvec, 58 | Named("Peak_matrix") = pm); 59 | } 60 | 61 | return res; 62 | } 63 | 64 | -------------------------------------------------------------------------------- /src/PerformDIAProcess.cpp: -------------------------------------------------------------------------------- 1 | #include "DecoSpectra.h" 2 | #include "PerformDIAProcess.h" 3 | 4 | // [[Rcpp::export]] 5 | List PerformDIA_main(List pm, 6 | NumericMatrix swath, 7 | NumericVector scanrt1, 8 | NumericVector scanrt2, 9 | List scanms1, 10 | List scanms2, 11 | double pkw_min, 12 | double ppm2, 13 | double sn, 14 | double sm_span, 15 | double filt){ 16 | cout << "PerformDIAProcess Preparation starting..." << "\n"; 17 | diaprocess runObj; 18 | runObj.setDIA_Arguments(pm, swath, scanrt1, scanrt2, 19 | scanms1, scanms2, 20 | pkw_min, ppm2, sn, sm_span, filt); 21 | runObj.PerformDIAProcess_core(); 22 | List res = runObj.getResults(); 23 | cout << "====== DIA data deconvolution done! ====== \n"; 24 | return res; 25 | } 26 | 27 | // [[Rcpp::export]] 28 | List dia_feature_preparation(NumericMatrix groupPkMtx, NumericMatrix chromPeaks, List peakidx){ 29 | List ft_list(peakidx.size()); 30 | for(int i=0; i this_ft_info(7); 33 | this_ft_info[0] = groupPkMtx(i,0); //mz 34 | this_ft_info[1] = groupPkMtx(i,1); //mz min 35 | this_ft_info[2] = groupPkMtx(i,2); //mz max 36 | 37 | this_ft_info[3] = groupPkMtx(i,3); //rt 38 | this_ft_info[4] = groupPkMtx(i,4); //rt min 39 | this_ft_info[5] = groupPkMtx(i,5); //rt max 40 | 41 | double baseline = 0, maxo, sn, tmpv; 42 | int kc = 0; 43 | for(int j=0; j 2 | 3 | #include "lowess.h" 4 | #include "DetectPeaks.h" 5 | 6 | using namespace Rcpp; 7 | using namespace std; 8 | 9 | 10 | class funcSets { 11 | 12 | public: 13 | void testFUN() { 14 | cout << "RUnning testFUN in Qiangclass" << "\n"; 15 | } 16 | 17 | std::vector lowessCpp(NumericVector x, NumericVector y, double spanVal){ 18 | const std::vector xs = as>(x); 19 | const std::vector ys = as>(y); 20 | std::vector res; 21 | lowess(xs, ys, spanVal, res); 22 | return res; 23 | } 24 | 25 | 26 | }; 27 | 28 | // [[Rcpp::export]] 29 | List SpectraDeconvCore(int idx, 30 | List spectra_eics, 31 | NumericMatrix ms1Peak, int ScanNum, NumericVector ms1PeakInfo, int idx_apex_ms1, 32 | double min_peakwidth, double max_peakwidth, double snthr) { 33 | 34 | /* 35 | * This function is used to speed up --> DecoSpectra 36 | * Arguments Explainations 37 | */ 38 | 39 | // idx : is the index of the ms1 peak 40 | // spectra_eics : contains all ms2 spectra info within the MS1 RT range 41 | // ms1Peak: is the EIC info of the MS1 peak 42 | // ScanNum: is the number of the MS2 scan of all MS2 spectra within the RT range 43 | // ms1PeakInfo: is the ms1 peak summary vector 44 | // idx_apex_ms1: is the index of the apex mz point 45 | // min_peakwidth: is the minimum peak width for ms2 peak deconvolution 46 | // max_peakwidth: is the maximum peak width for ms2 peak deconvolution 47 | // snthr: is the signal to noise threshold 48 | 49 | 50 | 51 | Rcpp::List spec_decon(idx); 52 | Rcout << "Running into the FUNCTION ---> SpectraDeconvCore" << endl; 53 | 54 | 55 | return spec_decon; 56 | } 57 | 58 | 59 | 60 | 61 | 62 | std::vector LowessFun(NumericVector x, NumericVector y, double spanVal) { 63 | funcSets testObj; // TODO: to delete when everything is done 64 | return testObj.lowessCpp(x,y,spanVal); 65 | } 66 | 67 | 68 | // You can include R code blocks in C++ files processed with sourceCpp 69 | // (useful for testing and development). The R code will be automatically 70 | // run after the compilation. 71 | // 72 | 73 | /*** R 74 | res <- SpectraDeconvCore(3, ms2.eic.ext, peak.ms1.smooth, length(idx.ms2.ext), 75 | as.numeric(info.pk.ms1), 11, 5, 20, 10) 76 | */ 77 | -------------------------------------------------------------------------------- /src/binarySearch.h: -------------------------------------------------------------------------------- 1 | int lowerBound(double val,double *mzval,int first, int length); 2 | int upperBound(double val,double *mzval,int first, int length); -------------------------------------------------------------------------------- /src/dda_utilities.h: -------------------------------------------------------------------------------- 1 | #ifndef DDAUTILITY_H 2 | #define DDAUTILITY_H 3 | 4 | #include 5 | // [[Rcpp::depends(RcppArmadillo)]] 6 | 7 | using namespace Rcpp; 8 | using namespace std; 9 | 10 | List MSCentroidsGrouping(NumericVector allMZs); 11 | 12 | NumericMatrix row_erase (NumericMatrix& x, IntegerVector& rowID); 13 | 14 | NumericMatrix cosineSimilarity(NumericMatrix Xr); 15 | 16 | float dot(NumericVector a, NumericVector b, bool norm=true); 17 | 18 | double entropy(NumericMatrix peaks_a, NumericMatrix peaks_b); 19 | 20 | NumericMatrix ms2peak_parse(string text); 21 | 22 | double spectrumSimilarity(NumericMatrix mtx1, NumericMatrix mtx2, double ppm_ms2); 23 | 24 | double entropySimilarity(NumericMatrix mtx1, NumericMatrix mtx2, double ppm_ms2); 25 | double get_mass_sodium(); 26 | 27 | double get_mass_potassium(); 28 | 29 | vector parse_formula(std::string formula_txt); 30 | 31 | double neutral_loss_similarity(NumericVector exp_mzs, NumericVector exp_ints, 32 | NumericVector ref_mzs, NumericVector ref_ints, 33 | double ppm2); 34 | 35 | NumericVector unique_num(NumericVector x); 36 | 37 | #endif -------------------------------------------------------------------------------- /src/entropy.h: -------------------------------------------------------------------------------- 1 | // Li, Y., Kind, T., Folz, J. et al. Spectral entropy outperforms MS/MS dot product similarity for small-molecule compound identification. Nat Methods 18, 1524–1531 (2021). https://doi.org/10.1038/s41592-021-01331-z 2 | 3 | 4 | #define SPEC_TYPE 5 | typedef double float_spec; 6 | 7 | #include 8 | #include "CleanSpectrum.h" 9 | #include "SpectralEntropy.h" 10 | 11 | using namespace Rcpp; 12 | 13 | 14 | #ifndef SPEC_TYPEX 15 | #define SPEC_TYPEX 16 | 17 | 18 | Rcpp::NumericVector convert_matrix_to_vector(const Rcpp::NumericMatrix peaks); 19 | 20 | Rcpp::NumericMatrix convert_vector_to_matrix(const Rcpp::NumericVector peaks, int nrow); 21 | 22 | 23 | double r_calculate_unweighted_entropy_similarity(const Rcpp::NumericMatrix peaks_a, 24 | const Rcpp::NumericMatrix peaks_b, 25 | float ms2_tolerance_in_da, float ms2_tolerance_in_ppm, 26 | bool clean_spectra, 27 | float min_mz, float max_mz, 28 | float noise_threshold, 29 | int max_peak_num); 30 | 31 | 32 | double r_calculate_entropy_similarity(const Rcpp::NumericMatrix peaks_a, 33 | const Rcpp::NumericMatrix peaks_b, 34 | float ms2_tolerance_in_da, float ms2_tolerance_in_ppm, 35 | bool clean_spectra, 36 | float min_mz, float max_mz, 37 | float noise_threshold, 38 | int max_peak_num); 39 | 40 | #endif -------------------------------------------------------------------------------- /src/export_interfece.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "PerformDDAProcess.h" 3 | #include "PerformDIAProcess.h" 4 | 5 | using namespace Rcpp; 6 | 7 | /* 8 | * This script is designed to export DDA + DIA performing cpp function into R to accept all input parameters 9 | */ 10 | 11 | // [[Rcpp::export]] 12 | List PerformDDADeco(NumericMatrix pm, 13 | NumericVector scant1, 14 | NumericVector scant2, 15 | List scanms1, 16 | List scanms2, 17 | NumericMatrix prec_mzs, 18 | double win_size, 19 | double ppm1, 20 | double ppm2, 21 | double sn, 22 | double filt, 23 | double intensity_thresh, 24 | int ionmode, 25 | std::string db_path, 26 | bool decoOn, 27 | bool useEntropy, 28 | bool show_output, 29 | int thread_num, 30 | std::string file_nm){ 31 | 32 | List res0 = PerformDDA_main(pm, 33 | scant1, scant2, 34 | scanms1, scanms2, 35 | prec_mzs, win_size, 36 | ppm1, ppm2, 37 | sn, filt, 38 | intensity_thresh, 39 | ionmode, 40 | db_path, 41 | decoOn, 42 | useEntropy, 43 | show_output, 44 | thread_num, 45 | file_nm); 46 | List res = res0;//[1]; 47 | 48 | return res; 49 | } 50 | 51 | // [[Rcpp::export]] 52 | List PerformDIADeco(List pm, 53 | NumericMatrix swath, 54 | NumericVector scant1, 55 | NumericVector scant2, 56 | List scanms1, 57 | List scanms2, 58 | double pkw_min, 59 | double ppm2, 60 | double sn, 61 | double span, 62 | double filt){ 63 | 64 | List res0 = PerformDIA_main(pm, 65 | swath, 66 | scant1, 67 | scant2, 68 | scanms1, 69 | scanms2, 70 | pkw_min, 71 | ppm2, 72 | sn, 73 | span, 74 | filt); 75 | List res = res0; 76 | 77 | return res; 78 | } 79 | 80 | 81 | 82 | /*** R 83 | # rm(list = ls()[ls() != "PerformDDADeco"]) 84 | # if(file.exists("~/Github/OptiLCMS2ID/data/dda_input_data.rda")){ 85 | # load("~/Github/OptiLCMS2ID/data/dda_input_data_hilic.rda") 86 | # } else if(file.exists("~/../Data/Github/OptiLCMS2ID/data/dda_input_data.rda")){ 87 | # load("~/../Data/Github/OptiLCMS2ID/data/dda_input_data_hilic.rda") 88 | # } 89 | # # "dda_input_data_hilic"" is a hilic urine example, positive mode 90 | # # system.time(res <- PerformDDA_main(matrix(), scanrts_ms1, scanrts_ms2, scan_ms1, scan_ms2, 91 | # # prec_mzs, 0.4, 10, 20, 12, 1000)) 92 | # # peak_mtx[,2] <- peak_mtx[,2]-1 93 | # # peak_mtx[,3] <- peak_mtx[,3]+1 94 | # system.time(res <- PerformDDADeco(peak_mtx, scanrts_ms1, scanrts_ms2, scan_ms1, scan_ms2, 95 | # prec_mzs, 1, 10, 20, 12, 1000)) 96 | */ -------------------------------------------------------------------------------- /src/fastMatch.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "R.h" 6 | #include "Rdefines.h" 7 | #include "binarySearch.h" 8 | 9 | struct idxStruct 10 | { 11 | int from; 12 | int to; 13 | }; 14 | 15 | SEXP fastMatch(SEXP x, SEXP y, SEXP xidx, SEXP yidx, SEXP xolength, SEXP tol) { 16 | double *px, *py, dtol; 17 | int nx, ny, yi, xi, lb, ub, txi, from, to, *pxidx, *pyidx, xoLength; 18 | int lastlb=0; 19 | SEXP ans, residx; 20 | 21 | px = REAL(x); 22 | py = REAL(y); 23 | pxidx = INTEGER(xidx); 24 | pyidx = INTEGER(yidx); 25 | xoLength = INTEGER(xolength)[0]; 26 | dtol = REAL(tol)[0]; 27 | nx = length(x); 28 | ny = length(y); 29 | 30 | struct idxStruct * pidxS = calloc(nx, sizeof(struct idxStruct)); 31 | if (pidxS == NULL) 32 | error("fastMatch/calloc: memory could not be allocated ! (%d bytes)\n", nx * sizeof(struct idxStruct) ); 33 | for (xi=0;xi < nx;xi++) 34 | pidxS[xi].from = ny+1; 35 | 36 | for (yi=0;yi < ny;yi++) { 37 | lb = lowerBound(py[yi] - dtol, px, lastlb, nx-lastlb); 38 | if (lb < nx-1) 39 | lastlb=lb; 40 | 41 | if (lb >= nx-1){ 42 | lb=nx-1; 43 | ub=nx-1; 44 | } else 45 | ub = upperBound(py[yi] + dtol, px, lb, nx-lb); 46 | 47 | if (ub > nx-1) 48 | ub = nx -1; 49 | 50 | // Rprintf("yi %d lb %d ub %d \n",yi, lb,ub); 51 | 52 | for (xi=lb;xi <= ub;xi++) { 53 | if (fabs(py[yi] - px[xi]) <= dtol) { 54 | // Rprintf(" -> Match xi %d \n",xi); 55 | if (yi < pidxS[xi].from) 56 | pidxS[xi].from = yi; 57 | if (yi > pidxS[xi].to) 58 | pidxS[xi].to = yi; 59 | // Rprintf("xi %d from %d to %d \n",xi, pidxS[xi].from, pidxS[xi].to); 60 | } 61 | } 62 | } 63 | 64 | PROTECT(ans = allocVector(VECSXP, xoLength)); 65 | 66 | for (xi=0;xi < nx;xi++) { 67 | // Rprintf("xi %d from %d to %d \n",xi, pidxS[xi].from, pidxS[xi].to); 68 | 69 | // no match 70 | if (pidxS[xi].from == ny +1 && pidxS[xi].to == 0) 71 | continue; 72 | 73 | txi = pxidx[xi] -1; 74 | from = pidxS[xi].from; 75 | to = pidxS[xi].to; 76 | 77 | // single match 78 | if (pidxS[xi].from == ny +1) 79 | from=pidxS[xi].to; 80 | if (pidxS[xi].to == 0) 81 | to=pidxS[xi].from; 82 | 83 | PROTECT(residx = NEW_INTEGER(to-from+1)); 84 | 85 | int p=0; 86 | for (yi=from;yi <= to;yi++) { 87 | INTEGER_POINTER(residx)[p] = pyidx[yi]; 88 | p++; 89 | } 90 | 91 | SET_VECTOR_ELT(ans, txi, residx); 92 | UNPROTECT(1); // residx 93 | } 94 | 95 | UNPROTECT(1); // ans 96 | free(pidxS); 97 | return(ans); 98 | } 99 | 100 | -------------------------------------------------------------------------------- /src/fastMatch.h: -------------------------------------------------------------------------------- 1 | 2 | struct idxStruct 3 | { 4 | int from; 5 | int to; 6 | }; 7 | 8 | 9 | SEXP fastMatch(SEXP x, SEXP y, SEXP xidx, SEXP yidx, SEXP xolength, SEXP tol); -------------------------------------------------------------------------------- /src/fastcluster.h: -------------------------------------------------------------------------------- 1 | // 2 | // C++ standalone verion of fastcluster by Daniel Muellner 3 | // 4 | // Copyright: Daniel Muellner, 2011 5 | // Christoph Dalitz, 2020 6 | // License: BSD style license 7 | // (see the file LICENSE for details) 8 | // 9 | 10 | #ifndef fastclustercpp_H 11 | #define fastclustercpp_H 12 | 13 | // 14 | // Assigns cluster labels (0, ..., nclust-1) to the n points such 15 | // that the cluster result is split into nclust clusters. 16 | // 17 | // Input arguments: 18 | // n = number of observables 19 | // merge = clustering result in R format 20 | // nclust = number of clusters 21 | // Output arguments: 22 | // labels = allocated integer array of size n for result 23 | // 24 | void cutree_k(int n, const int* merge, int nclust, int* labels); 25 | 26 | // 27 | // Assigns cluster labels (0, ..., nclust-1) to the n points such 28 | // that the hierarchical clsutering is stopped at cluster distance cdist 29 | // 30 | // Input arguments: 31 | // n = number of observables 32 | // merge = clustering result in R format 33 | // height = cluster distance at each merge step 34 | // cdist = cutoff cluster distance 35 | // Output arguments: 36 | // labels = allocated integer array of size n for result 37 | // 38 | void cutree_cdist(int n, const int* merge, double* height, double cdist, int* labels); 39 | 40 | // 41 | // Hierarchical clustering with one of Daniel Muellner's fast algorithms 42 | // 43 | // Input arguments: 44 | // n = number of observables 45 | // distmat = condensed distance matrix, i.e. an n*(n-1)/2 array representing 46 | // the upper triangle (without diagonal elements) of the distance 47 | // matrix, e.g. for n=4: 48 | // d00 d01 d02 d03 49 | // d10 d11 d12 d13 -> d01 d02 d03 d12 d13 d23 50 | // d20 d21 d22 d23 51 | // d30 d31 d32 d33 52 | // method = cluster metric (see enum hclust_fast_methods) 53 | // Output arguments: 54 | // merge = allocated (n-1)x2 matrix (2*(n-1) array) for storing result. 55 | // Result follows R hclust convention: 56 | // - observabe indices start with one 57 | // - merge[i][] contains the merged nodes in step i 58 | // - merge[i][j] is negative when the node is an atom 59 | // height = allocated (n-1) array with distances at each merge step 60 | // Return code: 61 | // 0 = ok 62 | // 1 = invalid method 63 | // 64 | int hclust_fast(int n, double* distmat, int method, int* merge, double* height); 65 | enum hclust_fast_methods { 66 | // single link with the minimum spanning tree algorithm (Rohlf, 1973) 67 | HCLUST_METHOD_SINGLE = 0, 68 | // complete link with the nearest-neighbor-chain algorithm (Murtagh, 1984) 69 | HCLUST_METHOD_COMPLETE = 1, 70 | // unweighted average link with the nearest-neighbor-chain algorithm (Murtagh, 1984) 71 | HCLUST_METHOD_AVERAGE = 2, 72 | // median link with the generic algorithm (Müllner, 2011) 73 | // requires euclidean distances as distance data 74 | HCLUST_METHOD_MEDIAN = 3, 75 | 76 | HCLUST_METHOD_CENTROID = 4 77 | }; 78 | 79 | 80 | #endif 81 | -------------------------------------------------------------------------------- /src/hclust_ultrafast.h: -------------------------------------------------------------------------------- 1 | #ifndef ultrafastcluste_H 2 | #define ultrafastcluste_H 3 | 4 | #include 5 | #include // for std::pow, std::sqrt 6 | #include // for std::ptrdiff_t 7 | #include // for std::numeric_limits<...>::infinity() 8 | #include // for std::fill_n 9 | #include // for std::runtime_error 10 | #include // for std::string 11 | #include // also for DBL_MAX, DBL_MIN 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | using namespace Rcpp; 19 | 20 | 21 | // Older versions of Microsoft Visual Studio do not have the fenv header. 22 | #ifdef _MSC_VER 23 | #if (_MSC_VER == 1500 || _MSC_VER == 1600) 24 | #define NO_INCLUDE_FENV 25 | #endif 26 | #endif 27 | // NaN detection via fenv might not work on systems with software 28 | // floating-point emulation (bug report for Debian armel). 29 | #ifdef __SOFTFP__ 30 | #define NO_INCLUDE_FENV 31 | #endif 32 | #ifdef NO_INCLUDE_FENV 33 | #pragma message("Do not use fenv header.") 34 | #else 35 | #include 36 | #endif 37 | 38 | 39 | #ifndef DBL_MANT_DIG 40 | #error The constant DBL_MANT_DIG could not be defined. 41 | #endif 42 | #define T_FLOAT_MANT_DIG DBL_MANT_DIG 43 | 44 | #ifndef LONG_MAX 45 | #include 46 | #endif 47 | #ifndef LONG_MAX 48 | #error The constant LONG_MAX could not be defined. 49 | #endif 50 | #ifndef INT_MAX 51 | #error The constant INT_MAX could not be defined. 52 | #endif 53 | 54 | #ifndef INT32_MAX 55 | #ifdef _MSC_VER 56 | #if _MSC_VER >= 1600 57 | #define __STDC_LIMIT_MACROS 58 | #include 59 | #else 60 | typedef __int32 int_fast32_t; 61 | typedef __int64 int64_t; 62 | #endif 63 | #else 64 | #define __STDC_LIMIT_MACROS 65 | #include 66 | #endif 67 | #endif 68 | 69 | #define FILL_N std::fill_n 70 | #ifdef _MSC_VER 71 | #if _MSC_VER < 1600 72 | #undef FILL_N 73 | #define FILL_N stdext::unchecked_fill_n 74 | #endif 75 | #endif 76 | 77 | // Suppress warnings about (potentially) uninitialized variables. 78 | #ifdef _MSC_VER 79 | #pragma warning (disable:4700) 80 | #endif 81 | 82 | #ifndef HAVE_DIAGNOSTIC 83 | #if __GNUC__ > 4 || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 6)) 84 | #define HAVE_DIAGNOSTIC 1 85 | #endif 86 | #endif 87 | 88 | #ifndef HAVE_VISIBILITY 89 | #if __GNUC__ >= 4 90 | #define HAVE_VISIBILITY 1 91 | #endif 92 | #endif 93 | 94 | /* Since the public interface is given by the Python respectively R interface, 95 | * we do not want other symbols than the interface initalization routines to be 96 | * visible in the shared object file. The "visibility" switch is a GCC concept. 97 | * Hiding symbols keeps the relocation table small and decreases startup time. 98 | * See http://gcc.gnu.org/wiki/Visibility 99 | */ 100 | #if HAVE_VISIBILITY 101 | #pragma GCC visibility push(hidden) 102 | #endif 103 | 104 | typedef int_fast32_t t_index; 105 | #ifndef INT32_MAX 106 | #define MAX_INDEX 0x7fffffffL 107 | #else 108 | #define MAX_INDEX INT32_MAX 109 | #endif 110 | #if (LONG_MAX < MAX_INDEX) 111 | #error The integer format "t_index" must not have a greater range than "long int". 112 | #endif 113 | #if (INT_MAX > MAX_INDEX) 114 | #error The integer format "int" must not have a greater range than "t_index". 115 | #endif 116 | typedef double t_float; 117 | 118 | NumericVector auto_hclust(NumericVector x0); 119 | 120 | NumericVector auto_hclust_median(NumericVector x0); 121 | 122 | NumericVector ultra_hclust(NumericVector x0, int n_clusts); 123 | 124 | NumericVector matrix_hclust(NumericMatrix data_mtx); 125 | 126 | #endif 127 | -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include // for NULL 4 | #include 5 | 6 | /* .C calls */ 7 | // extern void continuousPtsAboveThreshold(void *, void *, void *, void *, void *, void *); 8 | // extern void continuousPtsAboveThresholdIdx(void *, void *, void *, void *, void *, void *); 9 | // extern void DescendMin(void *, void *, void *, void *, void *); 10 | // extern void FindEqualGreaterM(void *, void *, void *, void *, void *); 11 | // extern void RectUnique(void *, void *, void *, void *, void *, void *, void *); 12 | // extern void WhichColMax(void *, void *, void *, void *); 13 | // extern void DescendZero(void *, void *, void *, void *, void *); 14 | // extern void ColMax(void *, void *, void *, void *); 15 | // 16 | // 17 | // static const R_CMethodDef CEntries[] = { 18 | // {"continuousPtsAboveThreshold", (DL_FUNC) &continuousPtsAboveThreshold, 6}, 19 | // {"continuousPtsAboveThresholdIdx", (DL_FUNC) &continuousPtsAboveThresholdIdx, 6}, 20 | // {"DescendMin", (DL_FUNC) &DescendMin, 5}, 21 | // {"FindEqualGreaterM", (DL_FUNC) &FindEqualGreaterM, 5}, 22 | // {"RectUnique", (DL_FUNC) &RectUnique, 7}, 23 | // {"WhichColMax", (DL_FUNC) &WhichColMax, 4}, 24 | // {"DescendZero", (DL_FUNC) &DescendZero, 5}, 25 | // {"ColMax", (DL_FUNC) &ColMax, 4}, 26 | // {NULL, NULL, 0} 27 | // }; 28 | 29 | 30 | // void R_init_OptiLCMS(DllInfo *info) { 31 | // R_RegisterCCallable("OptiLCMS", "continuousPtsAboveThreshold", (DL_FUNC) &continuousPtsAboveThreshold); 32 | // R_RegisterCCallable("OptiLCMS", "DescendZero", (DL_FUNC) &DescendZero); 33 | // } 34 | 35 | //void R_init_OptiLCMS(DllInfo *info) { 36 | // R_RegisterCCallable("OptiLCMS", "add", (DL_FUNC) &CEntries); 37 | //} -------------------------------------------------------------------------------- /src/linear_regression.h: -------------------------------------------------------------------------------- 1 | #ifndef LINEAR_REGRESSION_HPP_ 2 | #define LINEAR_REGRESSION_HPP_ 3 | 4 | #include 5 | #include 6 | #include "pense/nsoptim_forward.hpp" 7 | #include "pense/r_en_regression.hpp" 8 | 9 | using namespace Rcpp; 10 | using namespace arma; 11 | using namespace pense::r_interface; 12 | 13 | NumericVector PerformLinearRegress(NumericMatrix X, NumericVector Y, NumericVector penalty_loadings); 14 | 15 | 16 | #endif // LINEAR_REGRESSION_HPP_ 17 | -------------------------------------------------------------------------------- /src/lowess.h: -------------------------------------------------------------------------------- 1 | #ifndef LOWESS_H 2 | #define LOWESS_H 3 | 4 | using namespace std; 5 | 6 | void lowess_core(const vector &x, const vector &y, double f, long nsteps, double delta, vector &ys, vector &rw, vector &res); 7 | 8 | void lowess(const vector &x, const vector &y, double f, vector &ys); 9 | 10 | void lowest(const vector &x, const vector &y, double xs, double &ys, long nleft, long nright, vector &w,bool userw, vector &rw, bool &ok); 11 | 12 | #endif -------------------------------------------------------------------------------- /src/massifquant/DataKeeper.h: -------------------------------------------------------------------------------- 1 | #ifndef DK_h 2 | #define DK_h 3 | 4 | #include 5 | 6 | #ifndef WIN32 7 | //------------Unix------------ 8 | #include 9 | #define MKDIR(path,mask) mkdir(path,mask) 10 | #define GETCWD(buf,len) getcwd(buf,len) 11 | #define OPEN(buf,mode,access) open(buf,mode,access) 12 | #endif 13 | 14 | //outside libraries 15 | #include 16 | #include 17 | 18 | /* Not able to find these header files */ 19 | #include 20 | #include 21 | 22 | 23 | const int MZITR = 0; 24 | const int IITR = 1; 25 | const int FILECHARMAX = 300; 26 | const int RANGEMAXNUM = 6; 27 | class DataKeeper { 28 | 29 | private: 30 | uint32_t num_scans; 31 | std::vector scan_idx; 32 | std::vector rt; //a single big array 33 | std::vector mz; //an 34 | std::vector intensity; 35 | 36 | struct scanBuf * scbuf; 37 | double * pmz; 38 | double * pinten; 39 | int * pscanindex; 40 | int nmz; 41 | int lastScan; 42 | double * pscantime; 43 | char filename[FILECHARMAX]; 44 | 45 | double initMZS2; 46 | double initIS2; //var 47 | double initIS; //sd 48 | 49 | void printVec(const std::vector & myvec); 50 | void printList(const std::list & mylist); 51 | void printList(const std::list & mylist); 52 | 53 | /*not working on windows build of bioconductor*/ 54 | //void assign_values(float64* data, uint32_t data_len, std::vector & vec, int vec_len); 55 | 56 | std::vector privGetMZScan(int s); 57 | std::vector privGetIScan(int s); 58 | 59 | void privGetScanXcms(int scan, std::vector & mzScan, 60 | std::vector & intenScan); 61 | 62 | //want to be able to change data 63 | std::vector transformIntensity(std::vector & A); 64 | void transformIntensityR(); 65 | 66 | public: 67 | 68 | DataKeeper(SEXP mz, SEXP inten, SEXP scanindex, SEXP ls, SEXP scantime); 69 | DataKeeper(const char* dotplms1); 70 | ~DataKeeper(); 71 | 72 | uint32_t getTotalScanNumbers(); 73 | int getTotalCentroidCount(); 74 | double getInitMZS2(); 75 | double getInitIS2(); 76 | double getInitIS(); 77 | 78 | std::vector getMZScan(int s); 79 | std::vector getIScan(int s); 80 | 81 | void getScanMQ(int s, std::vector & mzScan, std::vector & intenScan); 82 | void getScanXcms(int scan, int nmz, int lastScan, std::vector & mzScan, std::vector & intenScan); 83 | 84 | double getScanTime(int s); 85 | 86 | void ghostScan(); 87 | 88 | void ghostScanR(); 89 | 90 | }; 91 | #endif 92 | -------------------------------------------------------------------------------- /src/massifquant/OpOverload.h: -------------------------------------------------------------------------------- 1 | #ifndef OP_h 2 | #define OP_h 3 | 4 | #include 5 | #include 6 | 7 | /* 8 | Operator Overloading 9 | */ 10 | 11 | /*matrix multiplication of 2 X 2 matrices*/ 12 | std::vector operator * (const std::vector & A, 13 | const std::vector & B); 14 | 15 | /*vector or matrix addition for type double*/ 16 | std::vector operator+ (const std::vector & A, 17 | const std::vector & B); 18 | /*vector or matrix addition for type int*/ 19 | std::vector operator + (const std::vector & A, 20 | const std::vector & B); 21 | 22 | /*vector or matrix subtract off scalar for type double*/ 23 | std::vector operator - (const std::vector & A, 24 | const double & b); 25 | 26 | /*vector or matrix division by scalar for type double*/ 27 | std::vector operator / (const std::vector & A, 28 | const double & b); 29 | 30 | /*logical indexing for type int*/ 31 | std::vector operator >= (const std::vector & A, 32 | const int & b); 33 | std::vector operator <= (const std::vector & A, 34 | const int & b); 35 | std::vector operator == (const std::vector & A, 36 | const int & b); 37 | 38 | std::vector operator == (const std::list & A, 39 | const int & b); 40 | 41 | /*made for init data of trackers (should be the same size) */ 42 | std::list operator == (const std::list & A, 43 | const std::list & B); 44 | 45 | std::vector operator != (const std::list & A, 46 | const int & b); 47 | 48 | std::list operator != (const std::vector & A, 49 | const int & b); 50 | 51 | /*logical indexing for type double*/ 52 | 53 | std::vector operator > (const std::vector & A, 54 | const double & b); 55 | 56 | std::vector operator >= (const std::vector & A, 57 | const double & b); 58 | std::vector operator <= (const std::vector & A, 59 | const double & b); 60 | 61 | /*Perform this specific operation: Ab = x;*/ 62 | std::vector multiplyMatVec(const std::vector & A, 63 | const std::vector & b); 64 | 65 | /*element-wise multiplication of vectors*/ 66 | std::vector dottimes (const std::vector & A, 67 | const std::vector & B); 68 | 69 | /*element-wise addition of vectors*/ 70 | std::vector dotadd (const std::vector & A, 71 | const std::vector & B); 72 | 73 | std::vector copySubIdx(const std::vector & A, 74 | const std::vector & subidx); 75 | 76 | std::vector copySubIdx(const std::vector & A, 77 | const std::vector & subidx); 78 | 79 | std::vector createSequence(const int start, 80 | const int stop, const int spacing); 81 | 82 | double computeAnyXbar(const std::list & x); 83 | 84 | double computeAnySampVar(const std::list & x); 85 | 86 | void printvector(const std::vector & myvec); 87 | 88 | void printvector(const std::vector & myvec); 89 | 90 | void printList(const std::list & mylist); 91 | void printList(const std::list & mylist); 92 | bool myuniqcomp(int i, int j); 93 | int lowerBound(double val, std::vector mzvals, int first, int length); 94 | int upperBound(double val, std::vector mzvals, int first, int length); 95 | 96 | struct scanBuf { 97 | std::vector mz; 98 | std::vector intensity; 99 | }; 100 | 101 | 102 | #endif 103 | -------------------------------------------------------------------------------- /src/massifquant/SegProc.h: -------------------------------------------------------------------------------- 1 | #ifndef SPROC_h 2 | #define SPROC_h 3 | 4 | #include "nmath.h" 5 | #include "dpq.h" 6 | 7 | #include 8 | #include 9 | 10 | const int MAXGAP = 5; 11 | const double TROBUST1 = 0.5; 12 | const double TROBUST2 = 2; 13 | const double ALPHA = 0.001; 14 | 15 | class SegProc { 16 | 17 | private: 18 | 19 | double origTrNum; //num of trackers before soldering of trackers 20 | double t; //the test statistic from a student t distribution 21 | double v; //degrees of freedom for student t distribution 22 | double p; //the probability of obtaining a test statistic at least 23 | //as extreme as the one that was actually observed, assuming 24 | //that the null hypothesis is true (wiki) 25 | 26 | std::vector segClusters; 27 | std::vector segIdx;//the list of tracker indices to be soldered together 28 | std::vector unionIdx; //the delimiation of unions of soldered trks 29 | 30 | //after cleaned subsets 31 | std::vector cleanSegIdx;//the list of tracker indices to be soldered together 32 | std::vector cleanUnionIdx; //the delimiation of unions of soldered trks 33 | 34 | 35 | 36 | void compareMeans(TrMgr & busybody, const int seed, const std::list edges, const int & segCounts); 37 | 38 | //equal variances 39 | void ttestEq(double xbar1, double xbar2, double n1, double n2, double s12, double s22); 40 | 41 | //unequal variance - Welch-Satterwaite Approx. 42 | void ttestWelch(double xbar1, double xbar2, double n1, double n2, double s12, double s22); 43 | 44 | double pt(double x, double n, int lower_tail, int log_p); 45 | 46 | public: 47 | 48 | SegProc(int otn); 49 | 50 | ~SegProc(); 51 | 52 | void groupSegments(TrMgr & busybody); 53 | 54 | void collapseSubsets(); 55 | 56 | void solderSegs(TrMgr & busybody); 57 | 58 | void segsToFile(TrMgr & busybody); 59 | }; 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /src/massifquant/TrMgr.h: -------------------------------------------------------------------------------- 1 | #ifndef TRMGR_h 2 | #define TRMGR_h 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "DataKeeper.h" 10 | #include "OpOverload.h" 11 | #include "Tracker.h" 12 | 13 | //const int MAXTRKS = 1e6; 14 | const double CLAIMEDPT = -1; 15 | 16 | class TrMgr { 17 | 18 | private: 19 | int currScanIdx; 20 | double minIntensity; 21 | int minTrLen; 22 | int currMissedMax; 23 | double ppm; 24 | double criticalT; 25 | int scanBack; 26 | 27 | std::vector iData; //intensity for a given scan 28 | std::vector mData; //mz 29 | 30 | std::vector trks; //old -> trks[MAXTRKS]; 31 | int initCounts; 32 | std::vector actIdx; 33 | std::vector picIdx; 34 | std::map startMap; //first is startScanIdx, 35 | //second is subidx of picIdx. 36 | int picCounts; 37 | int actCounts; 38 | //prediction info 39 | std::list predDatIdx; //store data points corresponding to claimed tr 40 | std::list initDatIdx; //mark points for new trackers to be init. 41 | std::vector predDist; //store distance from claimed tr pred 42 | std::list foundActIdx; //active index of trs that found 43 | std::list missActIdx; //active index of trs that missed 44 | 45 | 46 | std::list excludeMisses(const std::list & A); 47 | 48 | int findMinIdx(const std::vector & d, 49 | const std::vector & idx); 50 | 51 | void judgeTracker(const int & i); 52 | 53 | std::list diff(const std::list vec); 54 | 55 | bool hasMzDeviation(int i); 56 | 57 | /* bool isSeizmo(int i); */ 58 | 59 | public: 60 | 61 | 62 | bool customSort(int i, int j); 63 | 64 | TrMgr(int sidx, const double mi, 65 | const int ml, const double cmm, 66 | const double mass_acc, const double ct, const int sB); 67 | 68 | ~TrMgr(); 69 | 70 | void setDataScan(const std::vector & mdat, 71 | const std::vector & idat); 72 | void setCurrScanIdx(const int sidx); 73 | void setPredDatIdx(const std::list & pdi); 74 | 75 | void setFoundActIdx(const std::list & fai); 76 | void setMissActIdx(const std::list & mai); 77 | void setPredDist(const std::vector & dist); 78 | void setActIdx(const std::vector & ai); 79 | 80 | int getPicCounts(); 81 | 82 | int getActiveCounts(); 83 | 84 | Tracker* getTracker(int i); 85 | 86 | std::vector getPicIdx(); 87 | 88 | double getPpm(); 89 | 90 | std::vector iterOverFeatures(int i, double * scanTime); 91 | 92 | void predictScan(const std::vector & mzScan, const std::vector & intenScan); 93 | 94 | void competeAct(); 95 | 96 | void manageMissed(); 97 | 98 | void manageTracked(); 99 | 100 | void initTrackers(const double & q_int, const double & q_mz, 101 | const double & r_int, const double & r_mz, 102 | const int & sidx); 103 | 104 | void removeOvertimers(); 105 | 106 | void displayTracked(); 107 | 108 | void writePICsToFile(); 109 | 110 | void sortPicIdx(); 111 | 112 | void erasePicElements(const std::vector & eIdx); 113 | 114 | void shiftUpIndices(const int i); 115 | 116 | }; 117 | 118 | 119 | #endif 120 | -------------------------------------------------------------------------------- /src/obiwarp/lmat.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _LMAT_H 3 | #define _LMAT_H 4 | 5 | #include "vec.h" 6 | #include "mat.h" 7 | 8 | extern "C"{ 9 | using namespace VEC; 10 | 11 | class LMat { 12 | private: 13 | #define LEN_LARGEST_NUM (30) 14 | #define LARGEST_NUM_MZ_VALS (40000) 15 | #define LARGEST_NUM_TIME_VALS (40000) 16 | public: 17 | int _mz_vals; 18 | int _tm_vals; 19 | 20 | // All constructors call new! 21 | // All swaps of these MUST delete their memory before swapping! 22 | MatF *_mat; 23 | VecF *_mz; 24 | VecF *_tm; 25 | 26 | LMat(); 27 | // Takes a binary lmat file as input 28 | LMat(const char *file); 29 | 30 | ~LMat(); 31 | int mzlen() { return _mz_vals; } 32 | int tmlen() { return _tm_vals; } 33 | int num_mz() { return _mz_vals; } 34 | int num_tm() { return _tm_vals; } 35 | MatF * mat() { return _mat; } 36 | VecF * mz() { return _mz; } 37 | VecF * tm() { return _tm; } 38 | 39 | float hi_mz() { return (*_mz)[_mz_vals-1]; } 40 | float lo_mz() { return (*_mz)[0]; } 41 | float hi_tm() { return (*_tm)[_tm_vals-1]; } 42 | float lo_tm() { return (*_tm)[0]; } 43 | void mz_axis_vals(VecI &mzCoords, VecF &mzVals); 44 | void tm_axis_vals(VecI &tmCoords, VecF &tmVals); 45 | 46 | void set_from_xcms(int valuescantime, double *pscantime, int mzrange, 47 | double *mz, double *intensity); 48 | void print_xcms(); 49 | 50 | // selfTimes and equivTimes are the anchor points for the warping 51 | // function.. warps the time values (not the actual data values) 52 | void warp_tm(VecF &selfTimes, VecF &equivTimes); 53 | 54 | // expects one line with the # mz vals and next with the vals 55 | void set_mz_from_ascii(FILE *fpt); 56 | // expects one line with the # tm vals and next with the vals 57 | void set_tm_from_ascii(FILE *fpt); 58 | // expects the matrix in ascii format 59 | void set_mat_from_ascii(FILE *ptr, int rows, int cols); 60 | // writes the lmat in binary to a file (or STDOUT if NULL) 61 | void write(const char *file=NULL); 62 | // writes the lmat in ascii to a file (or STDOUT if NULL) 63 | void print(const char *file=NULL); 64 | 65 | // obviously not the final resting place 66 | void chomp_plus_spaces( char *str); 67 | }; 68 | 69 | #endif 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/obiwarp/xcms_lmat.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include "string.h" 4 | #include "stdio.h" 5 | 6 | #include "lmat.h" 7 | #include "assert.h" 8 | #include "math.h" 9 | #include "vec.h" 10 | #include "mat.h" 11 | 12 | #include 13 | 14 | extern "C" { 15 | 16 | bool DEBUG = 0; 17 | using namespace VEC; 18 | 19 | LMat::LMat() : _mz_vals(0), _tm_vals(0) { 20 | _mz = new VecF(); 21 | _tm = new VecF(); 22 | _mat = new MatF(); 23 | } 24 | 25 | LMat::~LMat() { 26 | delete _mz; 27 | delete _tm; 28 | delete _mat; 29 | } 30 | 31 | void LMat::set_from_xcms(int valuescantime, double *pscantime, int mzrange, double *mz, double *intensity) { 32 | delete _mz; 33 | delete _tm; 34 | delete _mat; 35 | 36 | // Get the time values: 37 | _tm_vals = valuescantime; 38 | 39 | float *tm_tmp = new float[_tm_vals]; 40 | for(int i=0; i < _tm_vals; i++) { 41 | tm_tmp[i] = pscantime[i]; 42 | } 43 | 44 | _tm = new VecF(_tm_vals, tm_tmp); 45 | 46 | // Get the mz values: 47 | _mz_vals = mzrange; 48 | float *mz_tmp = new float[_mz_vals]; 49 | for(int i=0; i < _mz_vals; i++) { 50 | mz_tmp[i] = mz[i]; 51 | } 52 | _mz = new VecF(_mz_vals, mz_tmp); 53 | 54 | // Read the matrix: 55 | int rows_by_cols = _tm_vals * _mz_vals; 56 | float *mat_tmp = new float[rows_by_cols]; 57 | 58 | for(int i=0; i < rows_by_cols; i++) { 59 | mat_tmp[i] = intensity[i]; 60 | } 61 | 62 | _mat = new MatF(_tm_vals, _mz_vals, mat_tmp); 63 | } 64 | 65 | void LMat::print_xcms() { 66 | float *mztmp = (float*)(*_mz); 67 | float *tmtmp = (float*)(*_tm); 68 | float *mattmp = (float*)(*_mat); 69 | int i; 70 | 71 | // The TIME vals: 72 | Rprintf("%d\n", _tm_vals); // num of vals 73 | for (i = 0; i < _tm_vals - 1; ++i) { 74 | Rprintf("%f ", tmtmp[i]); 75 | } 76 | Rprintf("%f\n", tmtmp[i]); // the last one 77 | 78 | // The M/Z vals: 79 | Rprintf("%d\n", _mz_vals); // num of vals 80 | for (i = 0; i < _mz_vals - 1; ++i) { 81 | Rprintf("%f ", mztmp[i]); 82 | } 83 | Rprintf("%f\n", mztmp[i]); // the last one 84 | for (int m = 0; m < _tm_vals; ++m) { 85 | int n; 86 | for (n = 0; n < _mz_vals - 1; ++n) { 87 | Rprintf("%f ", mattmp[m*_mz_vals+n]); 88 | } 89 | Rprintf("%f\n", mattmp[m*_mz_vals+n]); 90 | } 91 | 92 | } 93 | 94 | 95 | void LMat::mz_axis_vals(VecI &mzCoords, VecF &mzVals) { 96 | VecF tmp(mzCoords.length()); 97 | for (int i = 0; i < mzCoords.length(); ++i) { 98 | if (mzCoords[i] < _mz_vals) { 99 | tmp[i] = (*_mz)[mzCoords[i]]; 100 | } 101 | else { 102 | Rprintf("asking for mz value at index: %d (length: %d)\n", mzCoords[i], _mz_vals); 103 | R_ShowMessage("Serious error in obiwarp."); 104 | } 105 | } 106 | mzVals.take(tmp); 107 | } 108 | 109 | void LMat::tm_axis_vals(VecI &tmCoords, VecF &tmVals) { 110 | // Rprintf("tmCoords"); tmCoords.print(); 111 | VecF tmp(tmCoords.length()); 112 | //printf("tm_vals %d \n", _tm_vals); 113 | for (int i = 0; i < tmCoords.length(); ++i) { 114 | if (tmCoords[i] < _tm_vals) { 115 | tmp[i] = (*_tm)[tmCoords[i]]; 116 | //printf("tmCoords[i] %d val out %f\n", tmCoords[i], tmp[i]); 117 | } 118 | else { 119 | Rprintf("asking for time value at index: %d (length: %d)\n", tmCoords[i], _tm_vals); 120 | R_ShowMessage("Serious error in obiwarp."); 121 | } 122 | } 123 | tmVals.take(tmp); 124 | } 125 | 126 | 127 | void LMat::chomp_plus_spaces( char *str ) { 128 | if( str ) { 129 | int len = strlen( str ); 130 | if ( len <= 0 ) return; 131 | while ( --len ) { 132 | if ( str[len]=='\r' || str[len]=='\n' ) { 133 | str[len] = 0; 134 | } 135 | else break; 136 | } 137 | // At this point len == strlen(str) - 1 138 | len = len+1; 139 | while ( --len ) { 140 | if ( str[len] != ' ' ) { 141 | break; 142 | } 143 | else { 144 | str[len] = 0; 145 | } 146 | } 147 | } 148 | } 149 | 150 | 151 | void LMat::warp_tm(VecF &selfTimes, VecF &equivTimes) { 152 | VecF out; 153 | VecF::chfe(selfTimes, equivTimes, *_tm, out, 1); // run with sort option 154 | _tm->take(out); 155 | } 156 | 157 | } 158 | -------------------------------------------------------------------------------- /src/optim_ultra.cpp: -------------------------------------------------------------------------------- 1 | #include "optim_ultra.h" 2 | 3 | 4 | class OptimU : public Functor { 5 | public: 6 | arma::mat mpkMatrix; 7 | arma::vec eicVec; 8 | 9 | double operator()(const arma::vec &x) { 10 | double res = 0; 11 | int Sr = mpkMatrix.n_rows; 12 | int Sc = mpkMatrix.n_cols; 13 | arma::vec m = arma::ones(Sr) * 0; 14 | for(int i = 0; i < Sr; i++){ 15 | for(int j =0; j < Sc; j++){ 16 | m(i) = m(i) + x(j) * mpkMatrix(i, j); 17 | } 18 | } 19 | 20 | for(int i = 0; i < Sr; i++){ 21 | res = res+ pow(eicVec(i) - m(i), 2); 22 | } 23 | return res; 24 | } 25 | }; 26 | 27 | 28 | double optim_ultra(NumericMatrix mpkmtx, NumericVector vec_eic, int main_idx) { 29 | 30 | arma::vec eic = as(wrap(vec_eic)); 31 | arma::mat mpk_mtx = as(wrap(mpkmtx)); 32 | 33 | // arma::vec eic = arma::ones(vec_eic.size()) * 0; 34 | // arma::mat mpk_mtx = arma::zeros(mpkmtx.nrow(), mpkmtx.ncol()); 35 | // //arma::mat mpk_mtx; 36 | // for(int i = 0; i < vec_eic.size(); i++){ 37 | // eic[i] = vec_eic[i]; 38 | // for(int j = 0; j < mpk_mtx.n_cols; j++){ 39 | // mpk_mtx(i,j) = mpkmtx(i,j); 40 | // } 41 | // } 42 | 43 | OptimU f; 44 | f.eicVec = eic; 45 | f.mpkMatrix = mpk_mtx; 46 | arma::vec lower = arma::ones(mpk_mtx.n_cols) * 0; 47 | Roptim opt("L-BFGS-B"); 48 | opt.set_lower(lower); 49 | opt.control.trace = 0; 50 | arma::vec x = arma::ones(mpk_mtx.n_cols) * 0; 51 | opt.minimize(f, x); 52 | 53 | arma::vec res = opt.par(); 54 | return res[main_idx]; 55 | //Rcpp::Rcout << "------res --> " << res << std::endl; 56 | } -------------------------------------------------------------------------------- /src/optim_ultra.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTIM_H 2 | #define OPTIM_H 3 | 4 | #include "optim_src.h" 5 | #include 6 | // [[Rcpp::depends(RcppArmadillo)]] 7 | 8 | 9 | using namespace Rcpp; 10 | using namespace roptim; 11 | 12 | double optim_ultra(NumericMatrix mpkmtx, NumericVector vec_eic, int main_idx); 13 | 14 | 15 | #endif -------------------------------------------------------------------------------- /src/pense/alias.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // alias.hpp 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-01-30. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef ALIAS_HPP_ 10 | #define ALIAS_HPP_ 11 | 12 | #include 13 | #include 14 | #include "nsoptim_forward.hpp" 15 | 16 | namespace pense { 17 | namespace alias { 18 | //! Alias for pointers to regression data. 19 | using RegressionDataPtr = std::shared_ptr; 20 | using ConstRegressionDataPtr = std::shared_ptr; 21 | 22 | //! Alias for std::forward_list used throughout the codebase 23 | template 24 | using FwdList = std::forward_list; 25 | 26 | //! Alias for a list of optima. 27 | template 28 | using Optima = FwdList; 29 | 30 | } // namespace alias 31 | } // namespace pense 32 | 33 | #endif // ALIAS_HPP_ 34 | -------------------------------------------------------------------------------- /src/pense/autoconfig.hpp: -------------------------------------------------------------------------------- 1 | /* src/autoconfig.hpp. Generated from autoconfig.hpp.in by configure. */ 2 | // 3 | // autoconfig.hpp 4 | // pense 5 | // 6 | // Created by David Kepplinger on 2019-04-03. 7 | // Copyright © 2019 David Kepplinger. All rights reserved. 8 | // 9 | 10 | #ifndef AUTOCONFIG_HPP_ 11 | #define AUTOCONFIG_HPP_ 12 | 13 | #define PENSE_ENABLE_OPENMP 1 14 | /* #undef PENSE_DISABLE_OPENMP */ 15 | /* #undef PENSE_OPENMP_ADD_CONST_SHARED */ 16 | #define NSOPTIM_METRICS_DISABLED 1 17 | /* #undef NSOPTIM_METRICS_ENABLED */ 18 | /* #undef NSOPTIM_METRICS_DETAILED */ 19 | 20 | #endif // AUTOCONFIG_HPP_ 21 | -------------------------------------------------------------------------------- /src/pense/constants.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // constants.hpp 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-01-30. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef CONSTANTS_HPP_ 10 | #define CONSTANTS_HPP_ 11 | 12 | namespace pense { 13 | 14 | //! Default (fallback) convergence tolerance for numeric algorithms. 15 | constexpr double kDefaultConvergenceTolerance = 1e-6; 16 | 17 | //! The threshold for any numeric value to be considered 0. 18 | constexpr double kNumericZero = 1e-12; 19 | 20 | //! Integer IDs for the supported rho-functions 21 | enum class RhoFunctionType { 22 | kRhoBisquare = 1, 23 | kRhoHuber = 2 24 | }; 25 | 26 | //! Integer IDs for supported EN algorithms 27 | enum class EnAlgorithm { 28 | kLinearizedAdmm = 1, 29 | kVarStepAdmm = 2, 30 | kDal = 3, 31 | kRidge = 4, 32 | kLars = 5, 33 | kCoordinateDescent = 6 34 | }; 35 | 36 | //! Integer IDs for supported EN algorithms 37 | enum class PenseAlgorithm { 38 | kMm = 1, 39 | kAdmm = 2, 40 | kCoordinateDescent = 3 41 | }; 42 | 43 | //! Integer IDs for supported EN algorithms 44 | enum class MestEnAlgorithm { 45 | kMm = 1 46 | }; 47 | 48 | //! Default tuning constant for the Huber rho function for location estimates. 49 | constexpr double kDefaultHuberLocationCc = 1.345; 50 | //! Default tuning constant for the Bisquare rho function for location estimates. 51 | constexpr double kDefaultBisquareLocationCc = 4.685061; 52 | //! Default tuning constant for the Bisquare rho function for M-scale estimates. 53 | constexpr double kDefaultBisquareMscaleCc = 2.937015; 54 | //! Default breakdown point for the M-scale equation. 55 | constexpr double kDefaultMscaleDelta = 0.25; 56 | 57 | constexpr EnAlgorithm kDefaultEnAlgorithm = EnAlgorithm::kLars; 58 | constexpr PenseAlgorithm kDefaultPenseAlgorithm = PenseAlgorithm::kMm; 59 | constexpr MestEnAlgorithm kDefaultMestAlgorithm = MestEnAlgorithm::kMm; 60 | constexpr bool kDefaultUseSparse = false; 61 | 62 | } // namespace pense 63 | 64 | #endif // CONSTANTS_HPP_ 65 | -------------------------------------------------------------------------------- /src/pense/container_utility.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // container_utility.hpp 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-11-04. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef CONTAINER_UTILITY_HPP_ 10 | #define CONTAINER_UTILITY_HPP_ 11 | 12 | #include 13 | #include 14 | 15 | namespace pense { 16 | namespace utility { 17 | //! A std::forward_list with items ordered according to the value of another element. 18 | template> 19 | class OrderedList { 20 | using ListType = std::forward_list; 21 | 22 | public: 23 | //! Create an empty ordered list. 24 | OrderedList() noexcept {} 25 | 26 | //! Create an empty ordered list using *comp* for comparisons. 27 | explicit OrderedList(const Comparator& comp) noexcept : comp_(comp) {} 28 | 29 | //! Insert an item at the position given by *order_item*. 30 | //! 31 | //! @return Iterator pointing to the inserted element. 32 | typename ListType::iterator insert(const T1& order_item, const T2& item) { 33 | auto order_it = order_items_.begin(); 34 | auto insert_order_it = order_items_.before_begin(); 35 | const auto order_end = order_items_.end(); 36 | auto insert_item_it = items_.before_begin(); 37 | 38 | while (order_it != order_end && comp_(*order_it, order_item)) { 39 | ++insert_item_it; 40 | ++insert_order_it; 41 | ++order_it; 42 | } 43 | order_items_.insert_after(insert_order_it, order_item); 44 | return items_.insert_after(insert_item_it, item); 45 | } 46 | 47 | //! Emplace an item at the position given by *order_item*. 48 | //! 49 | //! @return Iterator pointing to the inserted element. 50 | template 51 | typename ListType::iterator emplace(const T1& order_item, Args&&... args) { 52 | auto order_it = order_items_.begin(); 53 | auto insert_order_it = order_items_.before_begin(); 54 | const auto order_end = order_items_.end(); 55 | auto insert_item_it = items_.before_begin(); 56 | 57 | while (order_it != order_end && comp_(*order_it, order_item)) { 58 | ++insert_item_it; 59 | ++insert_order_it; 60 | ++order_it; 61 | } 62 | order_items_.insert_after(insert_order_it, order_item); 63 | return items_.emplace_after(insert_item_it, std::forward(args)...); 64 | } 65 | 66 | //! Get the list of actual items. 67 | const std::forward_list& items() const noexcept { 68 | return items_; 69 | } 70 | 71 | //! Get the list of actual items. 72 | std::forward_list& items() noexcept { 73 | return items_; 74 | } 75 | 76 | typename ListType::iterator begin() noexcept { 77 | return items_.begin(); 78 | } 79 | 80 | typename ListType::iterator end() noexcept { 81 | return items_.end(); 82 | } 83 | 84 | typename ListType::const_iterator begin() const noexcept { 85 | return items_.begin(); 86 | } 87 | 88 | typename ListType::const_iterator end() const noexcept { 89 | return items_.end(); 90 | } 91 | 92 | typename ListType::const_iterator cbegin() const noexcept { 93 | return items_.cbegin(); 94 | } 95 | 96 | typename ListType::const_iterator cend() const noexcept { 97 | return items_.cend(); 98 | } 99 | 100 | private: 101 | Comparator comp_; 102 | std::forward_list order_items_; 103 | std::forward_list items_; 104 | }; 105 | } // namespace utility 106 | } // namespace pense 107 | 108 | #endif // CONTAINER_UTILITY_HPP_ 109 | -------------------------------------------------------------------------------- /src/pense/enpy_types.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // enpy_types.hpp 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-01-30. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef ENPY_TYPES_HPP_ 10 | #define ENPY_TYPES_HPP_ 11 | 12 | #include "nsoptim.hpp" 13 | #include "alias.hpp" 14 | 15 | namespace pense { 16 | //! PY Result Structure 17 | //! Contains a list of initial estimates and the associated metrics. 18 | template 19 | struct PyResult { 20 | PyResult() noexcept : metrics("enpy_initest") {} 21 | explicit PyResult(nsoptim::Metrics&& _metrics) noexcept : metrics(std::move(_metrics)) {} 22 | 23 | nsoptim::Metrics metrics; 24 | alias::Optima initial_estimates; 25 | }; 26 | 27 | } // namespace pense 28 | 29 | #endif // ENPY_TYPES_HPP_ 30 | -------------------------------------------------------------------------------- /src/pense/nsoptim.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // nsoptim.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_HPP_ 10 | #define NSOPTIM_HPP_ 11 | 12 | #if defined(Rcpp_hpp) 13 | #error "The file 'Rcpp.h' should not be included. Please correct to include only 'nsoptim.hpp'." 14 | #endif 15 | #if defined(RcppArmadillo__RcppArmadillo__h) 16 | #error "The file 'RcppArmadillo.h' should not be included. Please correct to include only 'nsoptim.hpp'." 17 | #endif 18 | 19 | #include "autoconfig.hpp" 20 | #include "nsoptim_forward.hpp" 21 | 22 | #include "nsoptim/armadillo.hpp" 23 | #include "nsoptim/rcpp_integration.hpp" 24 | #include "nsoptim/objective.hpp" 25 | #include "nsoptim/container.hpp" 26 | #include "nsoptim/optimizer.hpp" 27 | 28 | #endif // NSOPTIM_HPP_ 29 | -------------------------------------------------------------------------------- /src/pense/nsoptim/armadillo.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // armadillo.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_ARMADILLO_HPP_ 10 | #define NSOPTIM_ARMADILLO_HPP_ 11 | 12 | #ifndef ARMA_USE_CXX11 13 | # define ARMA_USE_CXX11 1 14 | #endif 15 | 16 | #ifndef ARMA_DONT_USE_OPENMP 17 | # define ARMA_DONT_USE_OPENMP 1 18 | #endif 19 | 20 | #ifdef __clang__ 21 | # pragma clang diagnostic push 22 | # pragma clang diagnostic ignored "-Weverything" 23 | #endif 24 | 25 | # include 26 | 27 | 28 | #ifdef __clang__ 29 | # pragma clang diagnostic pop 30 | #endif 31 | 32 | #endif // NSOPTIM_ARMADILLO_HPP_ 33 | -------------------------------------------------------------------------------- /src/pense/nsoptim/armadillo_forward.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // armadillo_forward.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_ARMADILLO_FORWARD_HPP_ 10 | #define NSOPTIM_ARMADILLO_FORWARD_HPP_ 11 | 12 | #define ARMA_DONT_USE_OPENMP 1 13 | 14 | #ifdef __clang__ 15 | # pragma clang diagnostic push 16 | # pragma clang diagnostic ignored "-Weverything" 17 | #endif 18 | 19 | # include 20 | 21 | #ifdef __clang__ 22 | # pragma clang diagnostic pop 23 | #endif 24 | 25 | #endif // NSOPTIM_ARMADILLO_FORWARD_HPP_ 26 | -------------------------------------------------------------------------------- /src/pense/nsoptim/config.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // armadillo.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_CONFIG_HPP_ 10 | #define NSOPTIM_CONFIG_HPP_ 11 | 12 | #define NSOPTIM_METRICS_LEVEL 1 13 | 14 | #ifdef NSOPTIM_METRICS_DETAILED 15 | # undef NSOPTIM_METRICS_LEVEL 16 | # define NSOPTIM_METRICS_LEVEL 2 17 | #endif 18 | 19 | #ifdef NSOPTIM_METRICS_ENABLED 20 | # undef NSOPTIM_METRICS_LEVEL 21 | # define NSOPTIM_METRICS_LEVEL 1 22 | #endif 23 | 24 | #ifdef NSOPTIM_METRICS_DISABLED 25 | # undef NSOPTIM_METRICS_LEVEL 26 | # define NSOPTIM_METRICS_LEVEL 0 27 | #endif 28 | 29 | #endif // NSOPTIM_CONFIG_HPP_ 30 | -------------------------------------------------------------------------------- /src/pense/nsoptim/container.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // container.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_CONTAINER_HPP_ 10 | #define NSOPTIM_CONTAINER_HPP_ 11 | 12 | #include "container/data.hpp" 13 | #include "container/metrics.hpp" 14 | #include "container/regression_coefficients.hpp" 15 | 16 | #endif // NSOPTIM_CONTAINER_HPP_ 17 | -------------------------------------------------------------------------------- /src/pense/nsoptim/container/forward.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // forward.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_CONTAINER_FORWARD_HPP_ 10 | #define NSOPTIM_CONTAINER_FORWARD_HPP_ 11 | 12 | #include "../config.hpp" 13 | #include "../armadillo_forward.hpp" 14 | 15 | namespace nsoptim { 16 | //! Full definition at nsoptim/container/regression_coefficients.hpp 17 | template class RegressionCoefficients; 18 | 19 | //! Full definition at nsoptim/container/regression_coefficients.hpp 20 | template class RegressionCoefficients; 21 | 22 | //! Full definition at nsoptim/container/data.hpp 23 | class PredictorResponseData; 24 | 25 | namespace _metrics_internal { 26 | //! Full definition at nsoptim/container/metrics.hpp 27 | template class Metrics; 28 | } // namespace _metrics_internal 29 | 30 | //! Export the correct Metrics collection based on the configuration. 31 | using Metrics = _metrics_internal::Metrics; 32 | } // namespace nsoptim 33 | 34 | #endif // NSOPTIM_CONTAINER_FORWARD_HPP_ 35 | -------------------------------------------------------------------------------- /src/pense/nsoptim/objective.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // objective.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_OBJECTIVE_HPP_ 10 | #define NSOPTIM_OBJECTIVE_HPP_ 11 | 12 | // Loss functions 13 | #include "objective/ls_regression_loss.hpp" 14 | 15 | // Penalty functions 16 | #include "objective/en_penalty.hpp" 17 | #include "objective/adaptive_en_penalty.hpp" 18 | 19 | #endif // NSOPTIM_OBJECTIVE_HPP_ 20 | -------------------------------------------------------------------------------- /src/pense/nsoptim/objective/convex.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // convex.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_OBJECTIVE_CONVEX_HPP_ 10 | #define NSOPTIM_OBJECTIVE_CONVEX_HPP_ 11 | 12 | namespace nsoptim { 13 | 14 | //! CRTP helper class for convex functions which returns the object itself as convex surrogate. 15 | template 16 | class ConvexFunction { 17 | public: 18 | using ConvexSurrogateType = Function; 19 | 20 | template 21 | Function& GetConvexSurrogate(const T&) { 22 | return static_cast(*this); 23 | } 24 | }; 25 | 26 | } // namespace nsoptim 27 | 28 | #endif // NSOPTIM_OBJECTIVE_CONVEX_HPP_ 29 | -------------------------------------------------------------------------------- /src/pense/nsoptim/objective/forward.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // forward.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_OBJECTIVE_FORWARD_HPP_ 10 | #define NSOPTIM_OBJECTIVE_FORWARD_HPP_ 11 | 12 | #include "../config.hpp" 13 | #include "../armadillo_forward.hpp" 14 | 15 | namespace nsoptim { 16 | 17 | //! Full definition at nsoptim/objective/ls_regression_loss.hpp 18 | class WeightedLsRegressionLoss; 19 | class LsRegressionLoss; 20 | 21 | //! Full definition at nsoptim/objective/en_penalty.hpp 22 | class EnPenalty; 23 | class LassoPenalty; 24 | class RidgePenalty; 25 | 26 | //! Full definition at nsoptim/objective/adaptive_en_penalty.hpp 27 | class AdaptiveEnPenalty; 28 | class AdaptiveLassoPenalty; 29 | 30 | } // namespace nsoptim 31 | 32 | #endif // NSOPTIM_OBJECTIVE_FORWARD_HPP_ 33 | -------------------------------------------------------------------------------- /src/pense/nsoptim/objective/loss.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // loss.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_OBJECTIVE_LOSS_HPP_ 10 | #define NSOPTIM_OBJECTIVE_LOSS_HPP_ 11 | 12 | namespace nsoptim { 13 | 14 | //! Boilerplate base class for all loss functions. 15 | //! 16 | //! Loss functions must at least the following methods: 17 | //! `data()` to give read access to the internal data, and 18 | //! `operator(where)` to evaluate the loss at the given coefficients values. 19 | //! `ZeroCoefficients()` to obtain the 0-coefficient value. 20 | //! 21 | //! Loss functions can optionally also implement the following methods: 22 | //! `Difference(a, b)` to evaluate the difference of two coefficient values. 23 | //! 24 | //! Loss functions should be easy and quick to copy and move. The main purpose is not to provide functionality but 25 | //! context. 26 | template 27 | class LossFunction { 28 | public: 29 | using DataType = Data; 30 | 31 | //! Access the data the loss operates on. 32 | //! 33 | //! @return the data the loss operates on. 34 | //! const Data& data() const; 35 | 36 | //! Evaluate the loss function. 37 | //! 38 | //! @param where where to evaluate the loss function. 39 | //! @return the loss evaluated at the given coefficients. 40 | //! double operator()(const Coefficients& where) const; 41 | //! Get the zero coefficients for this loss type. 42 | //! 43 | //! @return zero coefficients. 44 | // Coefficients ZeroCoefficients() const; 45 | 46 | //! Get the difference between two sets of coefficients. 47 | //! 48 | //! @param x a set of regression coefficients. 49 | //! @param y the other set of regression coefficients. 50 | //! @return the relative difference between `x` and `y`. 51 | // double Difference(const Coefficients& x, const Coefficients& y) const; 52 | }; 53 | } // namespace nsoptim 54 | 55 | #endif // NSOPTIM_OBJECTIVE_LOSS_HPP_ 56 | -------------------------------------------------------------------------------- /src/pense/nsoptim/objective/penalty.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // penalty.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_OBJECTIVE_PENALTY_HPP_ 10 | #define NSOPTIM_OBJECTIVE_PENALTY_HPP_ 11 | 12 | namespace nsoptim { 13 | 14 | //! Boilerplate base class for all penalty functions. 15 | //! 16 | //! Penalty functions must at least implement the following method: 17 | //! `operator(where)` to evaluate the penalty at the given coefficients values. 18 | //! 19 | //! Penalty functions can optionally also implement the following methods: 20 | //! `Difference(a, b)` to evaluate the difference of two coefficient values. 21 | class PenaltyFunction { 22 | public: 23 | //! Evaluate the penalty function. 24 | //! 25 | //! @param where where to evaluate the penalty function. 26 | //! @return the penalty evaluated at the given coefficients. 27 | //! double operator()(const Coefficients& where) const; 28 | 29 | //! Get the difference between two sets of coefficients. 30 | //! 31 | //! @param x a set of regression coefficients. 32 | //! @param y the other set of regression coefficients. 33 | //! @return the relative difference between `x` and `y`. 34 | // double Difference(const Coefficients& x, const Coefficients& y) const; 35 | }; 36 | } // namespace nsoptim 37 | 38 | #endif // NSOPTIM_OBJECTIVE_PENALTY_HPP_ 39 | -------------------------------------------------------------------------------- /src/pense/nsoptim/optimizer.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // optimizer.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_OPTIMIZER_HPP_ 10 | #define NSOPTIM_OPTIMIZER_HPP_ 11 | 12 | #include 13 | #include 14 | 15 | #include "optimizer/optimum.hpp" 16 | #include "optimizer/auglars.hpp" 17 | #include "optimizer/mm.hpp" 18 | #include "optimizer/dal.hpp" 19 | #include "optimizer/admm.hpp" 20 | #include "optimizer/coordinate_descent.hpp" 21 | 22 | #endif // NSOPTIM_OPTIMIZER_HPP_ 23 | -------------------------------------------------------------------------------- /src/pense/nsoptim/optimizer/optimizer_base.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // optimizer_base.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2019-01-02. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_OPTIMIZER_OPTIMIZER_BASE_HPP_ 10 | #define NSOPTIM_OPTIMIZER_OPTIMIZER_BASE_HPP_ 11 | 12 | #include "optimum.hpp" 13 | #include "../traits/traits.hpp" 14 | 15 | namespace nsoptim { 16 | //! Base class for all optimizer using loss function type `T`, penalty function type `U` and coefficient type `V`. 17 | //! This class checks whether `T` is a valid loss function for coefficient type `V` as well as if `U` is a valid 18 | //! penalty function for coefficient type `V`. 19 | template 20 | class Optimizer { 21 | public: 22 | using LossFunction = T; //< Loss function type 23 | using PenaltyFunction = U; //< Penalty function type 24 | using Coefficients = V; //< Coefficients type 25 | using Optimum = nsoptim::Optimum; 26 | 27 | static_assert(traits::is_loss_function::value, 28 | "LossFunction does not implement the loss function interface"); 29 | static_assert(traits::is_penalty_function::value, 30 | "PenaltyFunction does not implement the penalty function interface"); 31 | static_assert(traits::loss_supports_evaluation::value, 32 | "LossFunction does not support evaluation of the coefficients."); 33 | static_assert(traits::can_evaluate::value, 34 | "PenaltyFunction does not support evaluation of the coefficients."); 35 | }; 36 | } // nsoptim 37 | 38 | #endif // NSOPTIM_OPTIMIZER_OPTIMIZER_BASE_HPP_ 39 | -------------------------------------------------------------------------------- /src/pense/nsoptim/rcpp_integration.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // rcpp_integration.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_RCPP_INTEGRATION_HPP_ 10 | #define NSOPTIM_RCPP_INTEGRATION_HPP_ 11 | 12 | #include "armadillo.hpp" 13 | 14 | namespace Rcpp { 15 | //! Wrap a sparse vector into a Matrix::sparseVector object 16 | template 17 | SEXP wrap(const arma::SpCol& svec) { 18 | const int RTYPE = Rcpp::traits::r_sexptype_traits::rtype; 19 | 20 | // important: update internal state of SpMat object 21 | svec.sync(); 22 | IntegerVector length = IntegerVector::create(svec.n_elem); 23 | 24 | // copy the data into R objects 25 | const Vector values(svec.values, svec.values + svec.n_nonzero); 26 | IntegerVector rowind(svec.row_indices, svec.row_indices + svec.n_nonzero); 27 | 28 | // the sparseVector uses 1-based row indices. 29 | for (arma::uword i = 0; i < svec.n_nonzero; ++i) { 30 | rowind[i] += 1; 31 | } 32 | 33 | S4 r_sparse_vector("dsparseVector"); 34 | r_sparse_vector.slot("length") = length; 35 | r_sparse_vector.slot("i") = rowind; 36 | r_sparse_vector.slot("x") = values; 37 | return r_sparse_vector; 38 | } 39 | 40 | //! Specialize Rcpp::wrap for nsoptim::RegressionCoefficients 41 | template 42 | SEXP wrap(const nsoptim::RegressionCoefficients& coefs) { 43 | return List::create(Named("intercept") = coefs.intercept, 44 | Named("beta") = coefs.beta); 45 | } 46 | 47 | namespace traits { 48 | //! Specialize Rcpp::as for nsoptim::RegressionCoefficients 49 | template 50 | class Exporter> { 51 | public: 52 | explicit Exporter(SEXP robj) { 53 | const List coef_list(robj); 54 | coefs_.intercept = Rcpp::as(coef_list["intercept"]); 55 | coefs_.beta = Rcpp::as(coef_list["beta"]); 56 | } 57 | 58 | nsoptim::RegressionCoefficients get() const { 59 | return coefs_; 60 | } 61 | 62 | private: 63 | nsoptim::RegressionCoefficients coefs_; 64 | }; 65 | 66 | //! Specialize Rcpp::as for arma::SpCol 67 | template 68 | class Exporter> { 69 | public: 70 | explicit Exporter(SEXP r_obj) { 71 | // Assume that the given R object is of type S4 (dsparseVector) 72 | S4 r_svec(r_obj); 73 | if (r_svec.is("dsparseVector")) { 74 | const auto nrows = as(r_svec.slot("length")); 75 | const auto rowind = as(r_svec.slot("i")); 76 | SEXP val_slot = r_svec.slot("x"); 77 | const arma::vec values(REAL(val_slot), Rf_length(val_slot), false, true); 78 | const arma::uvec colptr {0, rowind.n_elem}; 79 | obj_ = arma::SpMat(rowind - 1, colptr, values, nrows, 1).col(0); 80 | } 81 | } 82 | 83 | arma::SpMat get() const { 84 | return obj_; 85 | } 86 | private: 87 | arma::SpMat obj_; 88 | }; 89 | } // namespace traits 90 | } // namespace Rcpp 91 | 92 | #endif // NSOPTIM_RCPP_INTEGRATION_HPP_ 93 | -------------------------------------------------------------------------------- /src/pense/nsoptim/traits/can_evaluate.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // can_evaluate.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-01-26. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_TRAITS_CAN_EVALUATE_FUNCTION_HPP_ 10 | #define NSOPTIM_TRAITS_CAN_EVALUATE_FUNCTION_HPP_ 11 | 12 | #include 13 | #include 14 | #include "sfinae_types.hpp" 15 | 16 | namespace nsoptim { 17 | namespace traits { 18 | namespace internal { 19 | //! Test if the type T can be evaluated with the coefficient type U 20 | template 21 | static auto test_can_evaluate(double) -> std::false_type; 22 | 23 | //! Test if the type T can be evaluated with the coefficient type U 24 | template 25 | static auto test_can_evaluate(int) -> sfinae_method_type()(std::declval())), double>; 26 | 27 | } // namespace internal 28 | 29 | //! Type trait if the type T supports evaluation of the coefficient type `U`. 30 | template 31 | struct can_evaluate : decltype(internal::test_can_evaluate(0)) {}; 32 | 33 | } // namespace traits 34 | } // namespace nsoptim 35 | 36 | #endif // NSOPTIM_TRAITS_CAN_EVALUATE_HPP_ 37 | -------------------------------------------------------------------------------- /src/pense/nsoptim/traits/can_optimize.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // can_optimize.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-01-26. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_TRAITS_CAN_OPTIMIZE_HPP_ 10 | #define NSOPTIM_TRAITS_CAN_OPTIMIZE_HPP_ 11 | 12 | #include 13 | #include "sfinae_types.hpp" 14 | 15 | namespace nsoptim { 16 | namespace traits { 17 | namespace internal { 18 | template 19 | static auto test_can_optimize_empty(double) -> std::false_type; 20 | 21 | template 22 | static auto test_can_optimize_empty(int) -> sfinae_method_type().Optimize()), 23 | typename T::Optimum>; 24 | 25 | 26 | template 27 | static auto test_can_optimize_start(double) -> std::false_type; 28 | 29 | template 30 | static auto test_can_optimize_start(int) -> sfinae_method_type< 31 | decltype(std::declval().Optimize(std::declval())), typename T::Optimum>; 32 | } // namespace internal 33 | 34 | //! Type trait for optimizer. 35 | //! Checks whether the optimizer T can optimize for coefficients U 36 | template 37 | struct can_optimize : internal::tf_switch(0))::value && 38 | decltype(internal::test_can_optimize_start(0))::value> {}; 39 | } // namespace traits 40 | } // namespace nsoptim 41 | 42 | #endif // NSOPTIM_TRAITS_CAN_OPTIMIZE_HPP_ 43 | -------------------------------------------------------------------------------- /src/pense/nsoptim/traits/has_convex_surrogate.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // has_convex_surrogate.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-01-26. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_TRAITS_HAS_CONVEX_SURROGATE_HPP_ 10 | #define NSOPTIM_TRAITS_HAS_CONVEX_SURROGATE_HPP_ 11 | 12 | #include 13 | #include "sfinae_types.hpp" 14 | 15 | namespace nsoptim { 16 | namespace traits { 17 | namespace internal { 18 | 19 | template 20 | static auto test_has_convex_surrogate(double) -> std::false_type; 21 | 22 | template 23 | static auto test_has_convex_surrogate(int) -> sfinae_method_type< 24 | decltype(std::declval().GetConvexSurrogate(std::declval())), typename T::ConvexSurrogateType>; 25 | 26 | } // namespace internal 27 | 28 | //! Type trait for loss & penalty functions that have a convex surrogate. 29 | //! A loss/penalty function which has a convex surrogate must have a member function `ConvexSurrogate`. 30 | template 31 | struct has_convex_surrogate : decltype(internal::test_has_convex_surrogate(0)) {}; 32 | } // namespace traits 33 | } // namespace nsoptim 34 | 35 | #endif // NSOPTIM_TRAITS_HAS_CONVEX_SURROGATE_HPP_ 36 | -------------------------------------------------------------------------------- /src/pense/nsoptim/traits/has_difference_op.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // has_difference_op.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-01-26. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_TRAITS_HAS_DIFFERENCE_OP_HPP_ 10 | #define NSOPTIM_TRAITS_HAS_DIFFERENCE_OP_HPP_ 11 | 12 | #include 13 | #include "sfinae_types.hpp" 14 | 15 | namespace nsoptim { 16 | namespace traits { 17 | namespace internal { 18 | 19 | template 20 | static auto test_has_difference_op(double) -> std::false_type; 21 | 22 | template 23 | static auto test_has_difference_op(int) -> sfinae_method_type< 24 | decltype(std::declval().Difference(std::declval(), std::declval())), double>; 25 | 26 | } // namespace internal 27 | 28 | //! Type trait for loss & penalty functions that have a convex surrogate. 29 | //! A loss/penalty function which has a convex surrogate must have a member function `ConvexSurrogate`. 30 | template 31 | struct has_difference_op : decltype(internal::test_has_difference_op(0)) {}; 32 | } // namespace traits 33 | } // namespace nsoptim 34 | 35 | #endif // NSOPTIM_TRAITS_HAS_DIFFERENCE_OP_HPP_ 36 | -------------------------------------------------------------------------------- /src/pense/nsoptim/traits/is_adaptive.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // is_adaptive.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-01-26. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_TRAITS_IS_ADAPTIVE_HPP_ 10 | #define NSOPTIM_TRAITS_IS_ADAPTIVE_HPP_ 11 | 12 | #include 13 | #include "../armadillo.hpp" 14 | #include "sfinae_types.hpp" 15 | 16 | namespace nsoptim { 17 | namespace traits { 18 | namespace internal { 19 | template 20 | static auto test_has_loadings(int) -> sfinae_method_type().loadings()), arma::vec>; 21 | 22 | template 23 | static auto test_has_loadings(double) -> std::false_type; 24 | } // namespace internal 25 | 26 | //! Type trait for adaptive penalty functions. 27 | //! Adaptive penalty functions have a member function `loadings` to access the penalty loadings. 28 | template 29 | struct is_adaptive : decltype(internal::test_has_loadings(0)) {}; 30 | } // namespace traits 31 | } // namespace nsoptim 32 | 33 | #endif // NSOPTIM_TRAITS_IS_ADAPTIVE_HPP_ 34 | -------------------------------------------------------------------------------- /src/pense/nsoptim/traits/is_differentiable.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // is_differentiable.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-01-26. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_TRAITS_IS_DIFFERENTIABLE_HPP_ 10 | #define NSOPTIM_TRAITS_IS_DIFFERENTIABLE_HPP_ 11 | 12 | #include 13 | #include "sfinae_types.hpp" 14 | 15 | namespace nsoptim { 16 | namespace traits { 17 | namespace internal { 18 | template 19 | static auto test_is_differentiable(double) -> std::false_type; 20 | 21 | template 22 | static auto test_is_differentiable(int) -> sfinae_method_type().Gradient(std::declval())), 23 | typename T::template GradientType>; 24 | 25 | } // namespace internal 26 | 27 | //! Type trait for differentiable loss & penalty functions. 28 | //! A differentiable loss/penalty function supports computing of the gradient. 29 | template 30 | struct is_differentiable : decltype(internal::test_is_differentiable(0)) {}; 31 | } // namespace traits 32 | } // namespace nsoptim 33 | 34 | #endif // NSOPTIM_TRAITS_IS_DIFFERENTIABLE_HPP_ 35 | -------------------------------------------------------------------------------- /src/pense/nsoptim/traits/is_en_penalty.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // is_en_penalty.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-01-26. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_TRAITS_IS_EN_PENALTY_HPP_ 10 | #define NSOPTIM_TRAITS_IS_EN_PENALTY_HPP_ 11 | 12 | #include 13 | #include "sfinae_types.hpp" 14 | 15 | namespace nsoptim { 16 | namespace traits { 17 | namespace internal { 18 | template 19 | static auto test_is_en_penalty(sfinae_type_wrapper*) -> std::true_type; 20 | 21 | template 22 | static auto test_is_en_penalty(...) -> std::false_type; 23 | 24 | } // namespace internal 25 | 26 | //! Type trait to identify a penalty function as "elastic net"-like. 27 | template 28 | struct is_en_penalty : decltype(internal::test_is_en_penalty(0)) {}; 29 | } // namespace traits 30 | } // namespace nsoptim 31 | 32 | #endif // NSOPTIM_TRAITS_IS_EN_PENALTY_HPP_ 33 | -------------------------------------------------------------------------------- /src/pense/nsoptim/traits/is_iterative_algorithm.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // is_iterative.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-01-26. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_TRAITS_IS_ITERATIVE_ALGORITHM_HPP_ 10 | #define NSOPTIM_TRAITS_IS_ITERATIVE_ALGORITHM_HPP_ 11 | 12 | #include 13 | #include "sfinae_types.hpp" 14 | 15 | namespace nsoptim { 16 | namespace traits { 17 | namespace internal { 18 | template 19 | static auto test_is_iterative(int) -> sfinae_method_any().convergence_tolerance(1.0))>; 20 | 21 | template 22 | static auto test_is_iterative(double) -> std::false_type; 23 | } // namespace internal 24 | 25 | //! Type trait for iterative algorithms. 26 | //! Iterative algorithms support changes to the convergence threshold and calling Optimize with a maximum number 27 | //! of iterations. 28 | template 29 | struct is_iterative_algorithm : decltype(internal::test_is_iterative(0)) {}; 30 | } // namespace traits 31 | } // namespace nsoptim 32 | #endif // NSOPTIM_TRAITS_IS_ITERATIVE_ALGORITHM_HPP_ 33 | -------------------------------------------------------------------------------- /src/pense/nsoptim/traits/is_loss_function.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // is_loss_function.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-01-26. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_TRAITS_IS_LOSS_FUNCTION_HPP_ 10 | #define NSOPTIM_TRAITS_IS_LOSS_FUNCTION_HPP_ 11 | 12 | #include 13 | #include 14 | 15 | #include "sfinae_types.hpp" 16 | #include "can_evaluate.hpp" 17 | #include "../objective/loss.hpp" 18 | 19 | namespace nsoptim { 20 | namespace traits { 21 | namespace internal { 22 | //! Test if the loss function T uses data type U 23 | template 24 | static auto test_loss_supports_data(double) -> std::false_type; 25 | 26 | //! Test if the loss function T uses data type U 27 | template 28 | static auto test_loss_supports_data(int) -> sfinae_method_type().data()), U>; 29 | 30 | //! Test if the loss function T can create a "zero" coefficient object of type U. 31 | template 32 | static auto test_loss_supports_zero_coefs(double) -> std::false_type; 33 | 34 | //! Test if the loss function T can create a "zero" coefficient object of type U. 35 | template 36 | static auto test_loss_supports_zero_coefs(int) -> sfinae_method_type< 37 | decltype(std::declval().template ZeroCoefficients()), U>; 38 | } // namespace internal 39 | 40 | //! Type trait for a loss functions. 41 | //! Tests if the given type implements the LossFunction interface. 42 | template 43 | struct has_data_member : decltype(internal::test_loss_supports_data(0))::type {}; 44 | 45 | //! Type trait if a loss function supports evaluation of the coefficient type `U`. 46 | template 47 | struct loss_supports_evaluation : internal::tf_switch< 48 | decltype(internal::test_loss_supports_zero_coefs(0))::value && 49 | can_evaluate::value> {}; 50 | 51 | //! Type trait if a type implements the LossFunction interface. 52 | template struct is_loss_function : internal::tf_switch< 53 | std::is_copy_constructible::value && std::is_base_of, T>::value> {}; 54 | 55 | } // namespace traits 56 | } // namespace nsoptim 57 | 58 | #endif // NSOPTIM_TRAITS_IS_LOSS_FUNCTION_HPP_ 59 | -------------------------------------------------------------------------------- /src/pense/nsoptim/traits/is_ls_regression_loss.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // is_ls_regression_loss.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-01-26. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_TRAITS_IS_LS_REGRESSION_LOSS_HPP_ 10 | #define NSOPTIM_TRAITS_IS_LS_REGRESSION_LOSS_HPP_ 11 | 12 | #include 13 | #include "sfinae_types.hpp" 14 | #include "is_loss_function.hpp" 15 | 16 | namespace nsoptim { 17 | namespace traits { 18 | namespace internal { 19 | template 20 | static auto test_is_ls_regression_loss(sfinae_type_wrapper*) -> std::true_type; 21 | 22 | template 23 | static auto test_is_ls_regression_loss(...) -> std::false_type; 24 | 25 | } // namespace internal 26 | 27 | //! Type trait to identify a penalty function as "elastic net"-like. 28 | // template 29 | // struct is_ls_regression_loss : decltype(internal::test_is_ls_regression_loss(0)) {}; 30 | 31 | template 32 | struct is_ls_regression_loss : internal::tf_switch(0))::value && 33 | is_loss_function::value>::type {}; 34 | 35 | } // namespace traits 36 | } // namespace nsoptim 37 | 38 | #endif // NSOPTIM_TRAITS_IS_LS_REGRESSION_LOSS_HPP_ 39 | -------------------------------------------------------------------------------- /src/pense/nsoptim/traits/is_penalty_function.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // is_penalty_function.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-01-26. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_TRAITS_IS_PENALTY_FUNCTION_HPP_ 10 | #define NSOPTIM_TRAITS_IS_PENALTY_FUNCTION_HPP_ 11 | 12 | #include 13 | #include 14 | 15 | #include "sfinae_types.hpp" 16 | #include "can_evaluate.hpp" 17 | #include "../objective/penalty.hpp" 18 | 19 | namespace nsoptim { 20 | namespace traits { 21 | //! Type trait if a type implements the LossFunction interface. 22 | // template struct is_penalty_function : std::false_type {}; 23 | template struct is_penalty_function : internal::tf_switch::value && 24 | std::is_copy_constructible::value> {}; 25 | } // namespace traits 26 | } // namespace nsoptim 27 | 28 | #endif // NSOPTIM_TRAITS_IS_PENALTY_FUNCTION_HPP_ 29 | -------------------------------------------------------------------------------- /src/pense/nsoptim/traits/is_weighted.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // is_weighted.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-01-26. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_TRAITS_IS_WEIGHTED_HPP_ 10 | #define NSOPTIM_TRAITS_IS_WEIGHTED_HPP_ 11 | 12 | #include 13 | #include "../armadillo.hpp" 14 | #include "sfinae_types.hpp" 15 | 16 | namespace nsoptim { 17 | namespace traits { 18 | namespace internal { 19 | template 20 | static auto test_is_weighted(int) -> sfinae_method_type().weights()), arma::vec>; 21 | 22 | template 23 | static auto test_is_weighted(double) -> std::false_type; 24 | } // namespace internal 25 | 26 | //! Type trait for weighted loss functions. 27 | //! Weighted loss functions must have a member function `weights` to access the weights. 28 | template 29 | struct is_weighted : decltype(internal::test_is_weighted(0)) {}; 30 | } // namespace traits 31 | } // namespace nsoptim 32 | 33 | #endif // NSOPTIM_TRAITS_IS_WEIGHTED_HPP_ 34 | -------------------------------------------------------------------------------- /src/pense/nsoptim/traits/sfinae_types.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // sfinae_types.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-01-26. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_TRAITS_SFINAE_TYPES_HPP_ 10 | #define NSOPTIM_TRAITS_SFINAE_TYPES_HPP_ 11 | 12 | #include 13 | 14 | #include "../armadillo.hpp" 15 | 16 | namespace nsoptim { 17 | namespace traits { 18 | namespace internal { 19 | template::type, U>::value, void>::type> 21 | struct sfinae_method_type : std::true_type {}; 22 | 23 | template class Tag> 24 | struct sfinae_method_tagged : Tag::type>::type {}; 25 | 26 | template 27 | struct sfinae_method_any : std::true_type {}; 28 | 29 | template struct sfinae_type_wrapper {}; 30 | 31 | template 32 | using tf_switch = typename std::conditional::type; 33 | 34 | } // namespace internal 35 | } // namespace traits 36 | } // namespace nsoptim 37 | 38 | #endif // NSOPTIM_TRAITS_SFINAE_TYPES_HPP_ 39 | -------------------------------------------------------------------------------- /src/pense/nsoptim/traits/traits.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // traits.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_TRAITS_TRAITS_HPP_ 10 | #define NSOPTIM_TRAITS_TRAITS_HPP_ 11 | 12 | #include "can_evaluate.hpp" 13 | #include "can_optimize.hpp" 14 | #include "has_convex_surrogate.hpp" 15 | #include "is_adaptive.hpp" 16 | #include "is_differentiable.hpp" 17 | #include "is_en_penalty.hpp" 18 | #include "is_iterative_algorithm.hpp" 19 | #include "is_loss_function.hpp" 20 | #include "is_ls_regression_loss.hpp" 21 | #include "is_penalty_function.hpp" 22 | #include "is_weighted.hpp" 23 | #include "has_difference_op.hpp" 24 | 25 | #endif // NSOPTIM_TRAITS_TRAITS_HPP_ 26 | -------------------------------------------------------------------------------- /src/pense/nsoptim/utilities.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // utilities.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2019-01-02. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_UTILITIES_HPP_ 10 | #define NSOPTIM_UTILITIES_HPP_ 11 | 12 | #include 13 | #include 14 | 15 | namespace nsoptim { 16 | //! Globally unique ID for all objects used in nsoptim. 17 | class ObjectId { 18 | public: 19 | ObjectId() noexcept : id_(ObjectId::NextId()) {} 20 | //! Copying is allowed and will preserve the ID! 21 | ObjectId(const ObjectId&) = default; 22 | ObjectId& operator=(const ObjectId&) = default; 23 | //! Moving is allowed and will preserve the ID! 24 | ObjectId(ObjectId&&) = default; 25 | ObjectId& operator=(ObjectId&&) = default; 26 | 27 | //! Compare two IDs. 28 | //! 29 | //! @param other the other ID. 30 | //! @return true if the IDs are not equal. 31 | bool operator!=(const ObjectId& other) const noexcept { 32 | return id_ != other.id_; 33 | } 34 | 35 | //! Compare two IDs. 36 | //! 37 | //! @param other the other ID. 38 | //! @return true if the IDs are equal. 39 | bool operator==(const ObjectId& other) const noexcept { 40 | return id_ == other.id_; 41 | } 42 | 43 | friend std::ostream& operator<< (std::ostream& stream, const ObjectId& id) { 44 | stream << "0x" << std::hex << id.id_ << std::dec; 45 | return stream; 46 | } 47 | 48 | static ObjectId null() noexcept { 49 | return ObjectId(kNullId); 50 | } 51 | private: 52 | static constexpr std::size_t kNullId = 0; 53 | std::size_t id_; 54 | 55 | ObjectId(const std::size_t id) noexcept : id_(id) {} 56 | 57 | static std::size_t NextId() noexcept { 58 | static std::size_t obj_counter = kNullId; 59 | 60 | std::size_t next_id = kNullId; 61 | #pragma omp atomic capture 62 | next_id = ++obj_counter; 63 | 64 | return next_id; 65 | } 66 | }; 67 | } // namespace nsoptim 68 | 69 | #endif // NSOPTIM_UTILITIES_HPP_ -------------------------------------------------------------------------------- /src/pense/nsoptim_forward.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // nsoptim_forward.hpp 3 | // nsoptim 4 | // 5 | // Created by David Kepplinger on 2018-11-30. 6 | // Copyright © 2018 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef NSOPTIM_FORWARD_HPP_ 10 | #define NSOPTIM_FORWARD_HPP_ 11 | 12 | #include "autoconfig.hpp" 13 | #include "nsoptim/config.hpp" 14 | #include "nsoptim/armadillo_forward.hpp" 15 | #include "nsoptim/container/forward.hpp" 16 | #include "nsoptim/objective/forward.hpp" 17 | 18 | namespace Rcpp { 19 | // Specialize Rcpp::wrap for RegressionCoefficients from 20 | template 21 | SEXP wrap(const nsoptim::RegressionCoefficients&); 22 | 23 | //! Specialize Rcpp::wrap for armadillo sparse vectors 24 | template 25 | SEXP wrap(const arma::SpCol&); 26 | 27 | namespace traits { 28 | //! Specialize Rcpp::as for armadillo sparse vectors 29 | template 30 | class Exporter>; 31 | 32 | // Specialize Rcpp::as for RegressionCoefficients from 33 | template 34 | class Exporter>; 35 | } // namespace traits 36 | } // namespace Rcpp 37 | 38 | #endif // NSOPTIM_FORWARD_HPP_ 39 | -------------------------------------------------------------------------------- /src/pense/omp_utils.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // omp_utils.hpp 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-11-02. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef OMP_UTILS_HPP_ 10 | #define OMP_UTILS_HPP_ 11 | 12 | #include "autoconfig.hpp" 13 | 14 | #ifdef PENSE_DISABLE_OPENMP 15 | # undef PENSE_ENABLE_OPENMP 16 | #endif 17 | 18 | #ifdef PENSE_ENABLE_OPENMP 19 | # undef PENSE_DISABLE_OPENMP 20 | #endif 21 | 22 | #define const_shared(list) 23 | 24 | #ifdef PENSE_ENABLE_OPENMP 25 | 26 | #include 27 | 28 | namespace pense { 29 | namespace omp { 30 | 31 | #ifdef PENSE_OPENMP_ADD_CONST_SHARED 32 | # undef const_shared 33 | # define const_shared(list) shared(list) 34 | #endif 35 | 36 | //! Returns ``true`` if OpenMP is enabled. 37 | inline bool Enabled(const int nr_threads) noexcept { 38 | return nr_threads > 1; 39 | } 40 | 41 | //! A conditional lock. 42 | //! The lock is only active, if it is constructed as such. 43 | class Lock { 44 | public: 45 | //! A lock which is only activated if the first argument is set to ``true``, 46 | inline explicit Lock(const bool enabled = true) noexcept : enabled_(enabled) { 47 | if (enabled_) { 48 | omp_init_lock(&lock_); 49 | } 50 | } 51 | 52 | //! A lock can not be copied, moved, or assigned to! 53 | Lock(const Lock&) = delete; 54 | Lock(Lock&&) = delete; 55 | Lock& operator=(const Lock&) = delete; 56 | Lock& operator=(Lock&&) = delete; 57 | 58 | virtual ~Lock() { 59 | if (enabled_) { 60 | omp_destroy_lock(&lock_); 61 | } 62 | } 63 | 64 | //! Acquire the lock. 65 | inline void Acquire() noexcept { 66 | if (enabled_) { 67 | omp_set_lock(&lock_); 68 | } 69 | } 70 | 71 | //! Release the lock. 72 | inline void Release() noexcept { 73 | if (enabled_) { 74 | omp_unset_lock(&lock_); 75 | } 76 | } 77 | 78 | private: 79 | const bool enabled_; 80 | omp_lock_t lock_; 81 | }; 82 | 83 | //! An implicit guard which locks the given lock on construction and unlocks the lock when the guard goes out of scope. 84 | class Guard { 85 | public: 86 | explicit Guard(Lock* lock) noexcept : lock_(lock) { 87 | lock_->Acquire(); 88 | } 89 | 90 | virtual ~Guard() noexcept { 91 | lock_->Release(); 92 | } 93 | 94 | private: 95 | Lock* lock_; 96 | }; 97 | 98 | } // namespace omp 99 | } // namespace pense 100 | 101 | #else 102 | 103 | namespace pense { 104 | namespace omp { 105 | 106 | //! Return ``true` if OpenMP is enabled. 107 | inline constexpr bool Enabled(const int) noexcept { 108 | return false; 109 | } 110 | 111 | //! A lock object. 112 | //! If OpenMP support is disabled, this is just a dummy which does not do anything. 113 | class Lock { 114 | public: 115 | explicit Lock(const bool = true) noexcept {} 116 | void Acquire() const noexcept {} 117 | void Release() const noexcept {} 118 | }; 119 | 120 | class Guard { 121 | public: 122 | explicit Guard(Lock* lock) noexcept {} 123 | }; 124 | 125 | } // namespace omp 126 | } // namespace pense 127 | 128 | #endif 129 | 130 | #endif // OMP_UTILS_HPP_ 131 | -------------------------------------------------------------------------------- /src/pense/r_en_regression.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // r_en_regression.hpp 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-04-03. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef R_EN_REGRESSION_HPP_ 10 | #define R_EN_REGRESSION_HPP_ 11 | 12 | #include "nsoptim_forward.hpp" 13 | 14 | namespace pense { 15 | namespace r_interface { 16 | //! Compute the EN Regularization Path. 17 | //! 18 | //! @param x numeric predictor matrix with `n` rows and `p` columns. 19 | //! @param y numeric response vector with `n` elements. 20 | //! @param penalties a list of EN penalties with decreasing values of the lambda hyper-parameter. 21 | //! @param include_intercept include an intercept in the loss function? 22 | //! @param optional_args a list containing the following named items: 23 | //! `en_options` ... control options for the EN algorithm 24 | //! `obs_weights` ... optional vector of length `n` with non-negative observation weights. 25 | //! `pen_loadings` ... optional vector of length `p` with non-negative penalty loadings 26 | SEXP LsEnRegression(SEXP x, SEXP y, SEXP penalties, SEXP include_intercept, SEXP optional_args) noexcept; 27 | } // namespace r_interface 28 | } // namespace pense 29 | 30 | #endif // R_EN_REGRESSION_HPP_ 31 | -------------------------------------------------------------------------------- /src/pense/r_enpy.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // r_enpy.hpp 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-04-03. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef R_ENPY_HPP_ 10 | #define R_ENPY_HPP_ 11 | 12 | #include "nsoptim_forward.hpp" 13 | 14 | namespace pense { 15 | namespace r_interface { 16 | //! Compute the (Adaptive) penalized PY Initial Estimators. 17 | //! 18 | //! @param x numeric predictor matrix with `n` rows and `p` columns. 19 | //! @param y numeric response vector with `n` elements. 20 | //! @param penalties a list of EN penalties with decreasing values of the lambda hyper-parameter. 21 | //! @param sloss_params parameters for the M-scale. 22 | //! @param enpy_opts a list of options for the ENPY algorithm. 23 | //! @param optional_args a list containing the following named items: 24 | //! `pen_loadings` ... optional vector of length `p` with non-negative penalty loadings. 25 | SEXP PenPyInitialEstimator(SEXP x, SEXP y, SEXP penalties, SEXP sloss_params, SEXP enpy_opts, 26 | SEXP optional_args) noexcept; 27 | 28 | //! Compute the Principal Sensitivity Components. 29 | //! 30 | //! @param x numeric predictor matrix with `n` rows and `p` columns. 31 | //! @param y numeric response vector with `n` elements. 32 | //! @param penalties a list of EN penalties with decreasing values of the lambda hyper-parameter. 33 | //! @param en_options options for the EN algorithm. 34 | //! @param optional_args a list containing the following named items: 35 | //! `intercept` ... boolean determining if an intercept should be included. 36 | //! `num_threads` ... number of threads. 37 | //! `pen_loadings` ... optional vector of length `p` with non-negative penalty loadings. 38 | SEXP PrincipalSensitivityComponents(SEXP x, SEXP y, SEXP penalties, SEXP en_options, SEXP optional_args) noexcept; 39 | 40 | } // namespace r_interface 41 | } // namespace pense 42 | 43 | #endif // R_ENPY_HPP_ 44 | -------------------------------------------------------------------------------- /src/pense/r_interface.cc: -------------------------------------------------------------------------------- 1 | // 2 | // r_interface.cc 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-04-03. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifdef HAVE_RCPP 10 | #include 11 | 12 | #include "rcpp_integration.hpp" 13 | #include "r_en_regression.hpp" 14 | #include "r_pense_regression.hpp" 15 | #include "r_mesten_regression.hpp" 16 | #include "r_robust_utils.hpp" 17 | #include "r_enpy.hpp" 18 | #include "r_utilities.hpp" 19 | 20 | extern "C" SEXP run_testthat_tests() noexcept; 21 | 22 | //! R initialzing function (must be in the global namespace). 23 | extern "C" void R_init_pense(DllInfo *dll) noexcept; 24 | 25 | using namespace pense::r_interface; 26 | 27 | namespace { 28 | //! Exported methods 29 | 30 | } // namespace 31 | 32 | 33 | 34 | #endif // HAVE_RCPP 35 | -------------------------------------------------------------------------------- /src/pense/r_mesten_regression.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // r_pensem_regression.hpp 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2020-06-08 6 | // Copyright © 2020 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef R_MESTEN_REGRESSION_HPP_ 10 | #define R_MESTEN_REGRESSION_HPP_ 11 | 12 | #include "nsoptim_forward.hpp" 13 | 14 | namespace pense { 15 | namespace r_interface { 16 | //! Compute the (Adaptive) M-EN Regularization Path. 17 | //! 18 | //! @param x numeric predictor matrix with `n` rows and `p` columns. 19 | //! @param y numeric response vector with `n` elements. 20 | //! @param scale scalar, numeric, auxiliary scale. 21 | //! @param penalties a list of EN penalties with decreasing values of the lambda hyper-parameter. 22 | //! @param mest_opts a list of options for the M-estimation algorithm. 23 | //! @param optional_args a list containing the following named items: 24 | //! `shared_starts` ... optional list of coefficients to start at every penalty. 25 | //! `individual_starts` ... optional list the same length as `penalties` with a list coefficients 26 | //! to start at the corresponding penalty. 27 | //! `pen_loadings` ... optional vector of length `p` with non-negative penalty loadings. 28 | SEXP MestEnRegression(SEXP x, SEXP y, SEXP scale, SEXP penalties, SEXP mest_opts, SEXP optional_args) noexcept; 29 | 30 | //! Get the smallest lambda such that the (Adaptive) M-EN-estimate gives the empty model. 31 | //! 32 | //! @param x numeric predictor matrix with `n` rows and `p` columns. 33 | //! @param y numeric response vector with `n` elements. 34 | //! @param scale scalar, numeric, auxiliary scale. 35 | //! @param mest_opts a list of options for the M-estimation algorithm. 36 | //! @param optional_args a list containing the following named items: 37 | //! `pen_loadings` ... optional vector of length `p` with non-negative penalty loadings. 38 | SEXP MestEnMaxLambda(SEXP x, SEXP y, SEXP scale, SEXP mest_opts, SEXP optional_args) noexcept; 39 | 40 | } // namespace r_interface 41 | } // namespace pense 42 | 43 | #endif // R_MESTEN_REGRESSION_HPP_ 44 | -------------------------------------------------------------------------------- /src/pense/r_pense_regression.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // r_pense_regression.hpp 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-04-03. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef R_PENSE_REGRESSION_HPP_ 10 | #define R_PENSE_REGRESSION_HPP_ 11 | 12 | #include "nsoptim_forward.hpp" 13 | 14 | namespace pense { 15 | namespace r_interface { 16 | //! Compute the (Adaptive) PENSE Regularization Path. 17 | //! 18 | //! @param x numeric predictor matrix with `n` rows and `p` columns. 19 | //! @param y numeric response vector with `n` elements. 20 | //! @param penalties a list of EN penalties with decreasing values of the lambda hyper-parameter. 21 | //! @param enpy_inds a vector of 1-based indices for the `penalties` list, at which initial ENPY estimates should be 22 | //! computed. 23 | //! @param pense_opts a list of options for the PENSE algorithm. 24 | //! @param enpy_opts a list of options for the ENPY algorithm. 25 | //! @param optional_args a list containing the following named items: 26 | //! `shared_starts` ... optional list of coefficients to start at every penalty. 27 | //! `individual_starts` ... optional list the same length as `penalties` with a list coefficients 28 | //! to start at the corresponding penalty. 29 | //! `pen_loadings` ... optional vector of length `p` with non-negative penalty loadings. 30 | SEXP PenseEnRegression(SEXP x, SEXP y, SEXP penalties, SEXP enpy_inds, SEXP pense_opts, SEXP enpy_opts, 31 | SEXP optional_args) noexcept; 32 | 33 | //! Get the smallest lambda such that the (Adaptive) PENSE estimate gives the empty model. 34 | //! 35 | //! @param x numeric predictor matrix with `n` rows and `p` columns. 36 | //! @param y numeric response vector with `n` elements. 37 | //! @param pense_opts a list of options for the PENSE algorithm. 38 | //! @param optional_args a list containing the following named items: 39 | //! `pen_loadings` ... optional vector of length `p` with non-negative penalty loadings. 40 | SEXP PenseMaxLambda(SEXP x, SEXP y, SEXP pense_opts, SEXP optional_args) noexcept; 41 | 42 | } // namespace r_interface 43 | } // namespace pense 44 | 45 | #endif // R_PENSE_REGRESSION_HPP_ 46 | -------------------------------------------------------------------------------- /src/pense/r_robust_utils.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // r_robust_utils.hpp 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-04-03. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef R_ROBUST_UTILS_HPP_ 10 | #define R_ROBUST_UTILS_HPP_ 11 | 12 | #include "nsoptim_forward.hpp" 13 | 14 | namespace pense { 15 | namespace r_interface { 16 | //! Compute the tau-Scale of Centered Values 17 | //! 18 | //! @param x numeric values. 19 | //! @return the tau-scale of the centered values. 20 | SEXP TauSize(SEXP x) noexcept; 21 | 22 | //! Compute the M-scale of Centered Values 23 | //! 24 | //! @param x numeric values. 25 | //! @param mscale_opts a list of options for the M-scale equation. 26 | //! @return the M-scale of the centered values. 27 | SEXP MScale(SEXP x, SEXP mscale_opts) noexcept; 28 | 29 | //! Compute the derivative of the M-scale function with respect to each coordinate. 30 | //! 31 | //! @param x numeric values. 32 | //! @param mscale_opts a list of options for the M-scale equation. 33 | //! @param order the order of the derivative to compute (1 or 2) 34 | //! @return the M-scale of the centered values. 35 | SEXP MScaleDerivative(SEXP x, SEXP mscale_opts, SEXP order) noexcept; 36 | 37 | //! Compute the maximum derivative of M-scale function over a grid of values 38 | //! 39 | //! @param x original numeric values. 40 | //! @param grid grid of values to look for maximal derivative. 41 | //! @param change number of elements in `x` to change. 42 | //! @param mscale_opts a list of options for the M-scale equation. 43 | //! @return the derivative of the M-scale function. 44 | SEXP MaxMScaleDerivative(SEXP r_x, SEXP r_grid, SEXP r_change, SEXP r_mscale_opts) noexcept; 45 | 46 | //! Compute the maximum entry in the gradient and Hessian of the M-scale 47 | //! function over a grid of values 48 | //! 49 | //! @param x original numeric values. 50 | //! @param grid grid of values to look for maximal derivative. 51 | //! @param change number of elements in `x` to change. 52 | //! @param mscale_opts a list of options for the M-scale equation. 53 | //! @return a vector with 2 elements: the maximum gradient and the maximum 54 | //! Hessian of the M-scale function. 55 | SEXP MaxMScaleGradientHessian(SEXP r_x, SEXP r_grid, SEXP r_change, 56 | SEXP r_mscale_opts) noexcept; 57 | 58 | //! Compute the M-location 59 | //! 60 | //! @param x numeric values. 61 | //! @param scale the scale of the values. 62 | //! @param opts a list of options for the M-estimating equation. 63 | //! @return the M-estimate of location. 64 | SEXP MLocation(SEXP x, SEXP scale, SEXP opts) noexcept; 65 | 66 | //! Compute the M-estimate of the Location and Scale 67 | //! 68 | //! @param x numeric values. 69 | //! @param mscale_opts a list of options for the M-estimating equation. 70 | //! @param location_opts a list of options for the location rho-function 71 | //! @return a vector with 2 elements: the location and the scale estimate. 72 | SEXP MLocationScale(SEXP x, SEXP mscale_opts, SEXP location_opts) noexcept; 73 | } // namespace r_interface 74 | } // namespace pense 75 | 76 | #endif // R_ROBUST_UTILS_HPP_ 77 | -------------------------------------------------------------------------------- /src/pense/r_utilities.cc: -------------------------------------------------------------------------------- 1 | // 2 | // r_utilities.cc 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-05-12. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #include "r_utilities.hpp" 10 | 11 | #include 12 | 13 | namespace pense { 14 | namespace r_interface { 15 | //! Approximate value matching. 16 | //! 17 | //! Returns a vector of 1-based positions of the (first) matches of `x` in `table`. 18 | //! 19 | //! @param x numeric predictor matrix with `n` rows and `p` columns. 20 | //! @param y numeric response vector with `n` elements. 21 | //! @return a vector the same lenght of `x` with integers giving the position in `table` of the first match 22 | //! if there is a match, or `NA_integer_` otherwise. 23 | SEXP ApproximateMatch(SEXP r_x, SEXP r_table, SEXP r_eps) noexcept { 24 | const R_xlen_t len_x = Rf_xlength(r_x); 25 | const int len_table = Rf_length(r_table); 26 | SEXP r_matches = PROTECT(Rf_allocVector(INTSXP, len_x)); 27 | int* matches = INTEGER(r_matches); 28 | double const * x = REAL(r_x); 29 | double const * table = REAL(r_table); 30 | const double eps = *REAL(r_eps); 31 | 32 | for (R_xlen_t i = 0; i < len_x; ++i) { 33 | matches[i] = NA_INTEGER; 34 | for (int j = 0; j < len_table; ++j) { 35 | if (std::abs(x[i] - table[j]) < eps) { 36 | matches[i] = j + 1; 37 | break; 38 | } 39 | } 40 | } 41 | 42 | UNPROTECT(1); 43 | return r_matches; 44 | } 45 | 46 | } // namespace r_interface 47 | } // namespace pense 48 | -------------------------------------------------------------------------------- /src/pense/r_utilities.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // r_utilities.hpp 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-05-12. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef R_UTILITIES_HPP_ 10 | #define R_UTILITIES_HPP_ 11 | 12 | #include "nsoptim_forward.hpp" 13 | 14 | namespace pense { 15 | namespace r_interface { 16 | //! Approximate value matching. 17 | //! 18 | //! Returns a vector of 1-based positions of the (first) matches of `x` in `table`. 19 | //! 20 | //! @param x numeric predictor matrix with `n` rows and `p` columns. 21 | //! @param y numeric response vector with `n` elements. 22 | //! @return a vector the same lenght of `x` with integers giving the position in `table` of the first match 23 | //! if there is a match, or `NA_integer_` otherwise. 24 | SEXP ApproximateMatch(SEXP x, SEXP table, SEXP eps) noexcept; 25 | 26 | } // namespace r_interface 27 | } // namespace pense 28 | 29 | #endif // R_UTILITIES_HPP_ 30 | -------------------------------------------------------------------------------- /src/pense/rcpp_integration.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // rcpp_integration.hpp 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-01-30. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef RCPP_INTEGRATION_HPP_ 10 | #define RCPP_INTEGRATION_HPP_ 11 | 12 | #include "rcpp_utils_forward.hpp" 13 | 14 | #include "nsoptim.hpp" 15 | 16 | #include "rcpp_utils.hpp" 17 | #include "rcpp_parse_config.hpp" 18 | 19 | 20 | #endif // RCPP_INTEGRATION_HPP_ 21 | -------------------------------------------------------------------------------- /src/pense/rcpp_parse_config.cc: -------------------------------------------------------------------------------- 1 | // 2 | // rcpp_parse_config.cc 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-05-01. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | #include "nsoptim.hpp" 9 | 10 | #include "rcpp_parse_config.hpp" 11 | #include "rcpp_utils.hpp" 12 | #include "cd_pense.hpp" 13 | 14 | #include "nsoptim.hpp" 15 | 16 | namespace { 17 | constexpr int kAdmmMaxIt = 1000; 18 | constexpr double kAdmmAcceleration = 1; 19 | 20 | constexpr int kCDLsMaxIt = 1000; 21 | constexpr int kCDLsResetIt = 8; 22 | 23 | constexpr int kCDPenseMaxIt = 1000; 24 | constexpr int kCDPenseResetIt = 8; 25 | constexpr double kCDPenseLinesearchMult = 0.; 26 | constexpr int kCDPenseLinesearchSteps = 10; 27 | 28 | constexpr int kDalMaxIt = 100; 29 | constexpr int kDalMaxInnerIt = 100; 30 | constexpr double kDalEtaMult = 2; 31 | constexpr double kDalEtaStartNumeratorCons = 0.01; 32 | constexpr double kDalEtaStartNumeratorAggr = 1; 33 | constexpr double kDalLambdaRelChangeAggr = 0.25; 34 | 35 | constexpr int kMmMaxIt = 500; 36 | constexpr nsoptim::MMConfiguration::TighteningType kMmTightening = nsoptim::MMConfiguration::TighteningType::kAdaptive; 37 | constexpr int kMmTighteningSteps = 10; 38 | } // namespace 39 | 40 | namespace Rcpp { 41 | namespace traits { 42 | 43 | nsoptim::AdmmLinearConfiguration Exporter::get() const { 44 | const Rcpp::List config_list = as(r_obj_); 45 | nsoptim::AdmmLinearConfiguration tmp = { 46 | pense::GetFallback(config_list, "max_it", kAdmmMaxIt), 47 | pense::GetFallback(config_list, "accelerate", kAdmmAcceleration) 48 | }; 49 | return tmp; 50 | } 51 | 52 | nsoptim::DalEnConfiguration Exporter::get() const { 53 | const Rcpp::List config_list = as(r_obj_); 54 | nsoptim::DalEnConfiguration tmp = { 55 | pense::GetFallback(config_list, "max_it", kDalMaxIt), 56 | pense::GetFallback(config_list, "max_inner_it", kDalMaxInnerIt), 57 | pense::GetFallback(config_list, "eta_start_numerator_conservative", kDalEtaStartNumeratorCons), 58 | pense::GetFallback(config_list, "eta_start_numerator_aggressive", kDalEtaStartNumeratorAggr), 59 | pense::GetFallback(config_list, "lambda_relchange_aggressive", kDalLambdaRelChangeAggr), 60 | pense::GetFallback(config_list, "eta_multiplier", kDalEtaMult) 61 | }; 62 | return tmp; 63 | } 64 | 65 | pense::CDPenseConfiguration Exporter::get() const { 66 | const Rcpp::List config_list = as(r_obj_); 67 | pense::CDPenseConfiguration tmp = { 68 | pense::GetFallback(config_list, "max_it", kCDPenseMaxIt), 69 | pense::GetFallback(config_list, "linesearch_mult", kCDPenseLinesearchMult), 70 | pense::GetFallback(config_list, "linesearch_steps", kCDPenseLinesearchSteps), 71 | pense::GetFallback(config_list, "reset_it", kCDPenseResetIt) 72 | }; 73 | return tmp; 74 | } 75 | 76 | nsoptim::CDConfiguration Exporter::get() const { 77 | const Rcpp::List config_list = as(r_obj_); 78 | nsoptim::CDConfiguration tmp = { 79 | pense::GetFallback(config_list, "max_it", kCDLsMaxIt), 80 | pense::GetFallback(config_list, "reset_it", kCDLsResetIt) 81 | }; 82 | return tmp; 83 | } 84 | 85 | nsoptim::MMConfiguration Exporter::get() const { 86 | const Rcpp::List config_list = as(r_obj_); 87 | nsoptim::MMConfiguration tmp = { 88 | pense::GetFallback(config_list, "max_it", kMmMaxIt), 89 | pense::GetFallback(config_list, "tightening", kMmTightening), 90 | pense::GetFallback(config_list, "tightening_steps", kMmTighteningSteps) 91 | }; 92 | return tmp; 93 | } 94 | 95 | } // namespace traits 96 | } // namespace Rcpp 97 | -------------------------------------------------------------------------------- /src/pense/rcpp_parse_config.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // rcpp_parse_config.hpp 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-01-30. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #ifndef RCPP_PARSE_CONFIG_HPP_ 10 | #define RCPP_PARSE_CONFIG_HPP_ 11 | 12 | #include "nsoptim_forward.hpp" 13 | #include "cd_pense.hpp" 14 | 15 | namespace Rcpp { 16 | namespace traits { 17 | //! Converter for an R-list to configuration options for the linearized ADMM algorithm. 18 | template<> class Exporter< nsoptim::AdmmLinearConfiguration > { 19 | public: 20 | explicit Exporter(SEXP r_obj) noexcept : r_obj_(r_obj) {} 21 | nsoptim::AdmmLinearConfiguration get() const; 22 | private: 23 | SEXP r_obj_; 24 | }; 25 | 26 | //! Converter for an R-list to configuration options for the DAL algorithm. 27 | template<> class Exporter< nsoptim::DalEnConfiguration > { 28 | public: 29 | explicit Exporter(SEXP r_obj) noexcept : r_obj_(r_obj) {} 30 | nsoptim::DalEnConfiguration get() const; 31 | private: 32 | SEXP r_obj_; 33 | }; 34 | 35 | //! Converter for an R-list to configuration options for the CD-Pense algorithm. 36 | template<> class Exporter< pense::CDPenseConfiguration > { 37 | public: 38 | explicit Exporter(SEXP r_obj) noexcept : r_obj_(r_obj) {} 39 | pense::CDPenseConfiguration get() const; 40 | private: 41 | SEXP r_obj_; 42 | }; 43 | 44 | //! Converter for an R-list to configuration options for the CD-LS algorithm. 45 | template<> class Exporter< nsoptim::CDConfiguration > { 46 | public: 47 | explicit Exporter(SEXP r_obj) noexcept : r_obj_(r_obj) {} 48 | nsoptim::CDConfiguration get() const; 49 | private: 50 | SEXP r_obj_; 51 | }; 52 | 53 | //! Converter for an R-list to configuration options for the MM algorithm. 54 | template<> class Exporter< nsoptim::MMConfiguration > { 55 | public: 56 | explicit Exporter(SEXP r_obj) noexcept : r_obj_(r_obj) {} 57 | nsoptim::MMConfiguration get() const; 58 | private: 59 | SEXP r_obj_; 60 | }; 61 | 62 | } // namespace traits 63 | } // namespace Rcpp 64 | 65 | #endif // RCPP_PARSE_CONFIG_HPP_ 66 | -------------------------------------------------------------------------------- /src/pense/rcpp_utils_forward.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // rcpp_utils_forward.hpp 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-01-30. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | #ifndef RCPP_UTILS_FORWARD_HPP_ 9 | #define RCPP_UTILS_FORWARD_HPP_ 10 | 11 | #include 12 | 13 | #include "nsoptim_forward.hpp" 14 | #include "alias.hpp" 15 | 16 | namespace pense { 17 | //! PY Result Structure 18 | //! Contains a list of initial estimates and the associated metrics. 19 | template struct PyResult; 20 | } // namespace pense 21 | 22 | namespace Rcpp { 23 | //! Wrap a pense::alias::FwdList (aka std::forward_list) into an R list. 24 | //! 25 | //! @param list forward list. 26 | //! @return an R list. 27 | template SEXP wrap(const pense::alias::FwdList& list); 28 | 29 | //! Wrap an nsoptim::Metrics object into an R list. 30 | //! 31 | //! @param metrics the metrics object. 32 | //! @return an R list. 33 | template<> SEXP wrap(const nsoptim::Metrics& metrics); 34 | 35 | //! Wrap a PyResult into an R list. 36 | //! 37 | //! @param py_result PyResult structure. 38 | //! @return an R list. 39 | template SEXP wrap(const pense::PyResult& py_result); 40 | 41 | namespace traits { 42 | //! Create an exporter for any `std::forward_list` where the elements are supported by Rcpp::as. 43 | //! This does not use the same functionality as in `Rcpp/internal/Exporter.h` because forward_lists are better 44 | //! created sequentially. 45 | template 46 | class Exporter< std::forward_list >; 47 | 48 | //! Converter for an R-list to an EN penalty. 49 | template<> class Exporter< nsoptim::EnPenalty >; 50 | //! Converter for an R-list to a LASSO penalty. 51 | template<> class Exporter< nsoptim::LassoPenalty >; 52 | //! Converter for an R-list to a Ridge penalty. 53 | template<> class Exporter< nsoptim::RidgePenalty >; 54 | 55 | } // namespace traits 56 | } // namespace Rcpp 57 | 58 | #endif // RCPP_UTILS_FORWARD_HPP_ 59 | -------------------------------------------------------------------------------- /src/pense/robust_scale_location.cc: -------------------------------------------------------------------------------- 1 | // 2 | // robust_scale_location.cc 3 | // pense 4 | // 5 | // Created by David Kepplinger on 2019-01-30. 6 | // Copyright © 2019 David Kepplinger. All rights reserved. 7 | // 8 | 9 | #include 10 | #include "nsoptim.hpp" 11 | #include "rcpp_utils.hpp" 12 | #include "robust_scale_location.hpp" 13 | #include "constants.hpp" 14 | 15 | using arma::vec; 16 | using arma::median; 17 | using arma::abs; 18 | using arma::uword; 19 | 20 | namespace { 21 | constexpr double kTauSizeC2Squared = 9.; 22 | constexpr double kTauSizeConsistencyConstant = 1. / 0.961; 23 | 24 | constexpr double kMadScaleConsistencyConstant = 1.4826; 25 | } // namespace 26 | 27 | namespace pense { 28 | double TauSize(const vec& values) noexcept { 29 | const vec abs_values(abs(values)); 30 | const double sigma_0 = median(abs_values); 31 | 32 | if (sigma_0 < kNumericZero) { 33 | return 0.; 34 | } 35 | 36 | const double tau_size = arma::mean(arma::clamp(arma::square(abs_values / sigma_0), 37 | 0, kTauSizeC2Squared)); 38 | return sigma_0 * kTauSizeConsistencyConstant * sqrt(tau_size); 39 | } 40 | 41 | namespace robust_scale_location { 42 | double InitialScaleEstimate(const vec& values, const double delta, const double eps) { 43 | // Try the MAD of the uncentered values. 44 | const double mad = kMadScaleConsistencyConstant * median(abs(values)); 45 | if (mad > eps) { 46 | return mad; 47 | } else if (static_cast((1 - delta) * values.n_elem) > values.n_elem / 2) { 48 | // If the MAD is also (almost) 0, but the M-scale takes into account more observations than the MAD, 49 | // compute the variance of the additional elements (i.e., the variance without considering the smallest 50 | // 50% of the observations) 51 | const uword lower_index = values.n_elem / 2; 52 | const uword upper_index = static_cast((1 - delta) * values.n_elem); 53 | const vec ordered_values = arma::sort(abs(values)); 54 | const double scale = arma::var(ordered_values.rows(lower_index, upper_index)); 55 | if (scale > eps) { 56 | return scale; 57 | } 58 | } 59 | return 0.; 60 | } 61 | 62 | } // namespace robust_scale_location 63 | 64 | } // namespace pense 65 | -------------------------------------------------------------------------------- /src/sqlite_utilities.h: -------------------------------------------------------------------------------- 1 | #ifndef SQLITEU_H 2 | #define SQLITEU_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "sqlite/sqlite3.h" 8 | 9 | using namespace Rcpp; 10 | using namespace std; 11 | 12 | class SqliteDriver { 13 | 14 | private: 15 | string database; 16 | string db_table; 17 | sqlite3* db; 18 | sqlite3_stmt* stmt; 19 | const unsigned char* MS2peaks; 20 | string MS2Peak_str; 21 | 22 | // These vectors are used to store the information of MS/MS records [they are paired] 23 | vector IDs_vec; 24 | vector FMs_vec; 25 | vector MS2Peaks_vec; 26 | vector cmpds_vec; 27 | vector smiles_vec; 28 | vector inchikeys_vec; 29 | vector precmz_vec; 30 | vector rt_vec; 31 | 32 | vector all_DB; 33 | vector all_expDB; 34 | 35 | public: 36 | 37 | SqliteDriver(String dbase, string db_tb, int ion_mode); 38 | 39 | int setDatabase (string db_path); 40 | 41 | int setDB_table (string db_tb); 42 | 43 | int setEntireDatabase (int ion_mode); 44 | 45 | int create_connection(String database_Path); 46 | 47 | int db_Executor_core(const char* Query_statement); 48 | 49 | int disconnectDB(); 50 | 51 | int extractMS2_with_ID(int ID); 52 | 53 | int extractMS2s_with_IDs(int IDs[]); 54 | 55 | vector extractIDs_with_mzRange(double min_mz, double max_mz); 56 | 57 | int extractIDMS2_with_mzRange(double min_mz, double max_mz); 58 | 59 | int extractIDMS2_with_mzrtRange(double min_mz, double max_mz, double min_rt, double max_rt); 60 | 61 | vector extractIDs_with_mzRange_entireDB(double min_mz, double max_mz); 62 | 63 | int extractIDMS2_with_mzRange_entireDB(double min_mz, double max_mz); 64 | 65 | int extractIDMS2_with_mzRange_expDB(double min_mz, double max_mz); 66 | 67 | int extractFMMS2_with_mzRange_entireDB(double min_mz, double max_mz); 68 | 69 | int extractALLMS2_with_mzRange(double min_mz, double max_mz); 70 | 71 | CharacterVector convertID2InChiKeys(IntegerVector IDs); 72 | 73 | CharacterVector convertID2CMPDNMs(IntegerVector IDs); 74 | 75 | CharacterVector convertID2Formulas(IntegerVector IDs); 76 | 77 | vector convertID2alls(IntegerVector IDs); 78 | 79 | CharacterVector convertID2MS2Peaks(IntegerVector IDs); 80 | 81 | vector extractClasses(IntegerVector IDs); 82 | 83 | bool clsTableExsiting(); 84 | 85 | string getMS2Peaks(); 86 | 87 | vector getIDsVec(); 88 | 89 | vector getMS2PeaksVec(); 90 | 91 | vector getFMs(); 92 | 93 | vector getPrecMZVec(); 94 | 95 | vector getRTVec(); 96 | 97 | vector getCMPDsVec(); 98 | 99 | vector getSimlesVec(); 100 | 101 | vector getInchikeysVec(); 102 | 103 | }; 104 | 105 | #endif 106 | -------------------------------------------------------------------------------- /src/utilities.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILITY_H 2 | #define UTILITY_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "lowess.h" 8 | 9 | using namespace Rcpp; 10 | using namespace std; 11 | 12 | NumericVector SmoothLoess(NumericMatrix &eic, double span); 13 | 14 | vector lowessCpp(IntegerVector x, NumericVector y, double spanVal); 15 | 16 | bool checkContinuousPtsAboveThr(NumericVector v, int iStart, double num, double thr, int nSkipMax); 17 | 18 | NumericVector getContinuousPtsAboveThrIdx(NumericVector v, int iStart, int num, double thr, int nSkipMax); 19 | 20 | IntegerVector whichTrue(LogicalVector vecValues); 21 | 22 | int whichTrue1(LogicalVector vecValues); 23 | 24 | IntegerVector GetRoi(NumericVector is_roi, int idx_apex_eic); 25 | 26 | double EstimateChromNoise(NumericVector &x, double trim, int min_pts); 27 | 28 | NumericVector GetLocalNoiseEstimate(NumericVector d, IntegerVector idx_fr_roi, double noiserange_min, double noiserange_max, 29 | int Nscantime, double threshold, int num); 30 | 31 | NumericVector CalculateBL(NumericVector d, IntegerVector drange, double threshold, int num, int n_skip_max, double noiserange_min); 32 | 33 | IntegerVector FindLocalMax(NumericVector x, int m, double v); 34 | 35 | IntegerVector FindLocalMin(NumericVector x, int m); 36 | 37 | NumericMatrix mergeEIC(NumericMatrix x, NumericMatrix y); 38 | 39 | double Gauss(int x, int h, int mu, int sigma); 40 | 41 | double cor_fast (NumericVector x, NumericVector y); 42 | 43 | double GetDistantP(NumericMatrix peak1, NumericMatrix peak2); 44 | 45 | // double funOptimc(NumericVector x, NumericVector M, NumericMatrix S); 46 | // 47 | // NumericVector optim_real(NumericMatrix mpk_mtx, NumericVector eic); 48 | // 49 | // NumericVector optim_ultra(NumericMatrix mpk_mtx, NumericVector eic); 50 | // 51 | 52 | // List extractEIC(List specExp, NumericMatrix mzRange, NumericVector mz); 53 | 54 | #endif --------------------------------------------------------------------------------