├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ └── R-CMD-check.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R ├── GenerateMassTraceList.R ├── GetFWXM.R ├── GetSharpness.R ├── GetZigzagIDX.R ├── Get_MZ_list.R ├── Get_peak_vars.R ├── Helper_functions.R ├── LazyPeakIntegration_peaks.R ├── Limit_Target_list.R ├── Skyline_functions.R ├── alignment_error_plot.R ├── assemble_peaks.R ├── assess_alignment.R ├── callmzRAPP.R ├── check_IR_biases.R ├── check_missing_peaks.R ├── clean_peak_assignments.R ├── comp_classify_false_negative.R ├── comp_count_max_error.R ├── comp_feature_compare.R ├── comp_find_best_feature_feature.R ├── comp_find_r_s_error.R ├── comp_functions.R ├── comp_funktionen_fuer_alignment.R ├── comp_generate_options.R ├── comp_generate_results_text.R ├── comp_import_Metaboanalyst.R ├── comp_import_benchmark.R ├── comp_import_compd.R ├── comp_import_elmaven.R ├── comp_import_msdial.R ├── comp_import_mzMine.R ├── comp_import_openms.R ├── comp_import_slaw.R ├── comp_import_xcms.R ├── comp_peak_compare.R ├── comp_pick_algo.R ├── comp_pick_main_feature.R ├── comp_pick_main_peak.R ├── cutout_peak.R ├── detect_double_peaks.R ├── getEICfromROI_par.R ├── get_EIC_table.R ├── get_pot_peak_ranges.R ├── globals.R ├── match_NPPpeaks_to_NPPfeatures.R ├── match_features_to_benchmark.R ├── match_peaks_to_benchmark.R ├── match_peaks_to_benchmark_split.R ├── metrics_per_molecule.R ├── plot_Peak.R ├── plot_bench_histo.R ├── plot_bench_overview.R ├── plot_bench_peak_overview.R ├── plot_comp_dist_of_found_peaks.R ├── plot_comp_iso_pred_error.R ├── plot_comp_missing_value_hm.R ├── plot_comp_peak_overview.R ├── plot_comp_scatter_plot.R ├── predict_Iso.R └── sunburst_plots.R ├── README.Rmd ├── README.md ├── inst ├── CITATION └── md │ ├── .gitignore │ ├── Alignment_error_graphic.png │ ├── IR_tolerance.PNG │ ├── Matching.png │ ├── Missing_value_graphic.PNG │ ├── Peak_subsets.PNG │ ├── README.Rmd │ ├── README.html │ ├── README.log │ ├── Vignette_mzRAPP_Example_workflow.html │ ├── mzRAPP_0.2.0.tar.gz │ └── mzRAPP_report_template.Rmd ├── man ├── .gitignore ├── Alignment_error_plot.Rd ├── File_con_test.Rd ├── GetFWXM.Rd ├── GetSharpness.Rd ├── GetZigzagIDX.Rd ├── Get_MZ_list.Rd ├── Get_peak_vars.Rd ├── Limit_Target_list.Rd ├── SkylinePeakBoundaries.Rd ├── SkylineTransitionList.Rd ├── as.sunburstDF.Rd ├── assemble_peaks.Rd ├── assess_alignment.Rd ├── best_feature_per_comparison.Rd ├── callmzRAPP.Rd ├── check_IR_biases.Rd ├── check_benchmark_input.Rd ├── check_missing_peaks.Rd ├── check_nonTargeted_input.Rd ├── classify_false_negative.Rd ├── clean_peak_assignments.Rd ├── compare_peaks.Rd ├── count_alignment_errors.Rd ├── count_errors_max.Rd ├── cutout_peaks.Rd ├── derive_performance_metrics.Rd ├── detect_double_peaks2.Rd ├── feature_compare.Rd ├── find_bench_peaks.Rd ├── find_best_feature_feature.Rd ├── find_r_s_error.Rd ├── generate_options.Rd ├── getXIC.Rd ├── get_EIMatches_BM_NPPpeaks.Rd ├── get_ROIs.Rd ├── get_avg_noise.Rd ├── get_main_UT_groups.Rd ├── get_mz_table.Rd ├── get_pot_peak_ranges2.Rd ├── import_grouped_Metaboanalyst.Rd ├── import_grouped_elmaven.Rd ├── import_grouped_msdial.Rd ├── import_grouped_mzmine.Rd ├── import_grouped_openms.Rd ├── import_grouped_slaw.Rd ├── import_grouped_xcms.Rd ├── import_options.Rd ├── import_ungrouped_Metaboanalyst.Rd ├── import_ungrouped_cd.Rd ├── import_ungrouped_elmaven.Rd ├── import_ungrouped_msdial.Rd ├── import_ungrouped_mzmine.Rd ├── import_ungrouped_openms.Rd ├── import_ungrouped_slaw.Rd ├── import_ungrouped_xcms.Rd ├── match_NPPpeaks_to_NPPfeatures.Rd ├── match_features_to_benchmark.Rd ├── match_peaks_to_benchmark.Rd ├── match_peaks_to_benchmark_split.Rd ├── metrics_per_molecule.Rd ├── pick_main_feature.Rd ├── pick_main_feature_sd.Rd ├── pick_main_peak.Rd ├── pick_main_peak_sd.Rd ├── plot_IR_peaks.Rd ├── plot_Peak.Rd ├── plot_Peak_per_mol.Rd ├── plot_Peak_with_predicted_peak.Rd ├── plot_bench_histo.Rd ├── plot_bench_overview.Rd ├── plot_bench_peak_overview.Rd ├── plot_comp_dist_of_found_peaks.Rd ├── plot_comp_iso_pred_error.Rd ├── plot_comp_missing_value_hm.Rd ├── plot_comp_peak_overview.Rd ├── plot_comp_scatter_plot.Rd ├── plot_sunburst_alignment.Rd ├── plot_sunburst_peakQuality.Rd ├── plot_sunburst_peaks.Rd ├── plotly_click_wo_warnings.Rd ├── predict_Iso.Rd ├── reIndexFeatures.Rd ├── remove_identical_peaks.Rd ├── rename_columns_from_options.Rd ├── round_woe.Rd └── top_to_x.Rd ├── mzRAPP.Rproj └── vignettes ├── .gitignore └── Vignette_mzRAPP_Example_workflow.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.Rmd$ 4 | ^doc$ 5 | ^Meta$ 6 | ^README.Rmd 7 | ^\.Rproj$ 8 | ^\.github$ 9 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | pull_request: 9 | branches: 10 | - main 11 | - master 12 | 13 | name: R-CMD-check 14 | 15 | jobs: 16 | R-CMD-check: 17 | runs-on: ${{ matrix.config.os }} 18 | 19 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | config: 25 | - {os: windows-latest, r: 'release'} 26 | # - {os: macOS-latest, r: 'release'} 27 | - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} 28 | # - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} 29 | 30 | env: 31 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 32 | R_COMPILE_AND_INSTALL_PACKAGES: "always" 33 | RSPM: ${{ matrix.config.rspm }} 34 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 35 | RGL_USE_NULL: true 36 | 37 | steps: 38 | - uses: actions/checkout@v2 39 | 40 | - uses: r-lib/actions/setup-r@v1 41 | with: 42 | r-version: ${{ matrix.config.r }} 43 | 44 | - uses: r-lib/actions/setup-pandoc@v1 45 | 46 | - name: Query dependencies 47 | run: | 48 | install.packages('remotes') 49 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 50 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 51 | shell: Rscript {0} 52 | 53 | - name: Restore R package cache 54 | if: runner.os != 'Windows' 55 | uses: actions/cache@v2 56 | with: 57 | path: ${{ env.R_LIBS_USER }} 58 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 59 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 60 | 61 | - name: Install system dependencies 62 | if: runner.os == 'Linux' 63 | run: | 64 | while read -r cmd 65 | do 66 | eval sudo $cmd 67 | done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') 68 | 69 | - name: Install dependencies 70 | run: | 71 | remotes::install_deps(dependencies = TRUE) 72 | remotes::install_cran("rcmdcheck") 73 | shell: Rscript {0} 74 | 75 | - name: Check 76 | env: 77 | _R_CHECK_CRAN_INCOMING_REMOTE_: false 78 | run: | 79 | options(crayon.enabled = TRUE) 80 | rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "error", check_dir = "check") 81 | shell: Rscript {0} 82 | 83 | - name: Upload check results 84 | if: failure() 85 | uses: actions/upload-artifact@main 86 | with: 87 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 88 | path: check 89 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | *.csv 6 | *.Rmd 7 | RObject.rds 8 | inst/doc 9 | doc 10 | Meta 11 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: mzRAPP 2 | Type: Package 3 | Title: Benchmark Dataset Generation and Non-Targeted Data Pre-Processing Assessment 4 | Version: 1.2.1 5 | Authors@R: c(person("Yasin", "El Abiead", 6 | email = "yasin.el.abiead@univie.ac.at", 7 | role = c("aut", "cre"), 8 | comment = c(ORCID = "0000-0003-4392-7706")), 9 | person("Maximilian", "Milford", 10 | email = "max@mmilford.com", 11 | role = "aut", 12 | comment = c(ORCID = "0000-0003-3616-8334")) 13 | ) 14 | Description: Generates and evaluates liquid chromatography-high resolution mass spectrometry benchmark peak-lists, which can be used for the assessment of non-targeted data pre-processing outputs from 'XCMS', 'XCMS3', 'XCMS-online', 'MZmine 2', 'MS-DIAL', 'El-MAVEN', 'OpenMS', etc.. 15 | License: GPL-3 + file LICENSE 16 | Encoding: UTF-8 17 | LazyData: true 18 | RoxygenNote: 7.2.3 19 | biocViews: 20 | Imports: 21 | ncdf4, 22 | lazyeval, 23 | doFuture, 24 | data.table, 25 | dplyr, 26 | shinyjs, 27 | tcltk, 28 | hutils, 29 | retistruct, 30 | enviPat, 31 | DescTools, 32 | signal, 33 | S4Vectors, 34 | future, 35 | shiny, 36 | kableExtra, 37 | MSnbase, 38 | xcms (>= 3.14.0), 39 | ggplot2, 40 | shinyFiles, 41 | plotly 42 | Remotes: 43 | KelseyChetnik/MetaClean, 44 | souravc83/fastAdaboost 45 | Suggests: 46 | foreach, 47 | tibble, 48 | utf8, 49 | stats, 50 | knitr, 51 | bit64, 52 | boot, 53 | shinybusy, 54 | shinydashboard, 55 | shinyWidgets, 56 | shinycssloaders, 57 | rmarkdown 58 | VignetteBuilder: 59 | knitr 60 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(Alignment_error_plot) 4 | export(SkylinePeakBoundaries) 5 | export(SkylineTransitionList) 6 | export(callmzRAPP) 7 | export(check_benchmark_input) 8 | export(check_nonTargeted_input) 9 | export(compare_peaks) 10 | export(derive_performance_metrics) 11 | export(find_bench_peaks) 12 | export(find_r_s_error) 13 | export(get_ROIs) 14 | export(get_mz_table) 15 | export(plot_Peak) 16 | export(plot_Peak_per_mol) 17 | export(plot_Peak_with_predicted_peak) 18 | export(plot_bench_histo) 19 | export(plot_bench_overview) 20 | export(plot_bench_peak_overview) 21 | export(plot_comp_dist_of_found_peaks) 22 | export(plot_comp_iso_pred_error) 23 | export(plot_comp_missing_value_hm) 24 | export(plot_comp_peak_overview) 25 | export(plot_comp_scatter_plot) 26 | export(plot_sunburst_alignment) 27 | export(plot_sunburst_peakQuality) 28 | export(plot_sunburst_peaks) 29 | import(enviPat) 30 | import(plotly) 31 | import(shinyjs) 32 | importFrom(data.table,as.data.table) 33 | importFrom(data.table,data.table) 34 | importFrom(data.table,is.data.table) 35 | importFrom(data.table,melt.data.table) 36 | importFrom(data.table,setDT) 37 | importFrom(data.table,setkey) 38 | importFrom(dplyr,count) 39 | importFrom(dplyr,summarize) 40 | importFrom(dplyr,sym) 41 | importFrom(ggplot2,aes) 42 | importFrom(ggplot2,annotate) 43 | importFrom(ggplot2,coord_equal) 44 | importFrom(ggplot2,element_blank) 45 | importFrom(ggplot2,element_text) 46 | importFrom(ggplot2,facet_wrap) 47 | importFrom(ggplot2,geom_bar) 48 | importFrom(ggplot2,geom_col) 49 | importFrom(ggplot2,geom_histogram) 50 | importFrom(ggplot2,geom_line) 51 | importFrom(ggplot2,geom_point) 52 | importFrom(ggplot2,geom_tile) 53 | importFrom(ggplot2,geom_vline) 54 | importFrom(ggplot2,ggplot) 55 | importFrom(ggplot2,ggtitle) 56 | importFrom(ggplot2,labs) 57 | importFrom(ggplot2,position_dodge) 58 | importFrom(ggplot2,scale_color_manual) 59 | importFrom(ggplot2,scale_colour_manual) 60 | importFrom(ggplot2,scale_fill_manual) 61 | importFrom(ggplot2,theme) 62 | importFrom(ggplot2,theme_classic) 63 | importFrom(ggplot2,xlab) 64 | importFrom(ggplot2,ylab) 65 | importFrom(plotly,plotlyOutput) 66 | importFrom(retistruct,line.line.intersection) 67 | importFrom(shiny,downloadHandler) 68 | importFrom(shiny,incProgress) 69 | importFrom(shiny,isolate) 70 | importFrom(shiny,modalDialog) 71 | importFrom(shiny,observe) 72 | importFrom(shiny,observeEvent) 73 | importFrom(shiny,reactive) 74 | importFrom(shiny,reactiveVal) 75 | importFrom(shiny,renderTable) 76 | importFrom(shiny,renderText) 77 | importFrom(shiny,renderUI) 78 | importFrom(shiny,req) 79 | importFrom(shiny,showModal) 80 | importFrom(shiny,updateSelectInput) 81 | importFrom(shiny,updateTabsetPanel) 82 | importFrom(shiny,withProgress) 83 | importFrom(shinyjs,disable) 84 | importFrom(shinyjs,enable) 85 | importFrom(shinyjs,useShinyjs) 86 | importFrom(stats,median) 87 | importFrom(xcms,filterMz) 88 | importFrom(xcms,filterRt) 89 | -------------------------------------------------------------------------------- /R/GetFWXM.R: -------------------------------------------------------------------------------- 1 | #' @title GetFWXM 2 | #' 3 | #' @description Returns the width of a peak at Full Width at X Maximum, with X being the factor by which the maximum of the peak is multiplied. 4 | #' 5 | #' @param RT_vect vector with retention times 6 | #' @param Int_vect vector with intensities 7 | #' @param baseL height of baseline 8 | #' @param X peak height at which width should be measured (e.g. 0.5 means width at halfe maximum) 9 | #' @param return_diff TRUE/FALSE; should the output be a difference of end point and start point, or both points as a vector 10 | #' 11 | #' @return Width of the peak (numeric) 12 | #' 13 | #' @importFrom retistruct line.line.intersection 14 | #' 15 | #' @keywords internal 16 | 17 | 18 | GetFWXM <- function(RT_vect, Int_vect, baseL, X, peak_borders = FALSE, return_diff = FALSE) 19 | { 20 | 21 | 22 | 23 | gw <- baseL + (max(Int_vect) - baseL) * X 24 | tl <- rle(Int_vect > gw) 25 | l <- tl[["lengths"]] 26 | v <- tl[["values"]] 27 | 28 | dt <- data.table::data.table(idx = seq(1:length(l)), 29 | l = l, 30 | v = v) 31 | 32 | if(nrow(dt[v == TRUE]) == 0 || max(dt[v == TRUE]$l) < 4){return(NA_real_)} 33 | 34 | main_peak <- dt[v == TRUE & l == max(dt[v == TRUE]$l)]$idx[1] 35 | IntSec1 <- NA 36 | IntSec2 <- NA 37 | if(v[1] == FALSE & v[length(v)] == FALSE & length(v) > 1 | peak_borders == TRUE){ 38 | 39 | 40 | if(peak_borders == FALSE | peak_borders == TRUE & v[1] == FALSE){ 41 | 42 | fs <- 1 43 | if(length(l) > 4 & peak_borders == TRUE){ 44 | 45 | dt_before <- dt[idx < main_peak] 46 | 47 | if(nrow(dt_before[v == FALSE & l > 2]) > 0){ 48 | fs <- suppressWarnings(max(dt_before[v == FALSE & l > 2]$idx[length(dt_before[v == FALSE & l > 2]$idx)])) 49 | } 50 | 51 | } 52 | 53 | fs <- sum(l[1:fs]) 54 | 55 | 56 | 57 | P1 <- c(RT_vect[fs], Int_vect[fs]) 58 | P2 <- c(RT_vect[fs + 1], Int_vect[fs + 1]) 59 | P3 <- c(RT_vect[fs], (baseL + (max(Int_vect) - baseL) * X)) 60 | P4 <- c(RT_vect[fs + 1], (baseL + (max(Int_vect) - baseL) * X)) 61 | 62 | 63 | IntSec1 <- retistruct::line.line.intersection(P1, P2, P3, P4, interior.only = TRUE) 64 | } else {IntSec1 <- c(min(RT_vect))} 65 | 66 | if(peak_borders == FALSE | peak_borders == TRUE & v[length(v)] == FALSE){ 67 | 68 | fs <- length(l) 69 | if(length(l) > 4 & peak_borders == TRUE){ 70 | 71 | dt_after <- dt[idx > main_peak] 72 | 73 | if(nrow(dt_after[v == FALSE & l > 2]) > 0){ 74 | fs <- suppressWarnings(min(dt_after[v == FALSE & l > 2]$idx)) 75 | } 76 | 77 | 78 | } 79 | 80 | u <- sum(l) - sum(l[fs: length(l)]) + 1 81 | 82 | P1 <- c(RT_vect[u], Int_vect[u]) 83 | P2 <- c(RT_vect[u - 1], Int_vect[u - 1]) 84 | P3 <- c(RT_vect[u], (baseL + (max(Int_vect) - baseL) * X)) 85 | P4 <- c(RT_vect[u - 1], (baseL + (max(Int_vect) - baseL) * X)) 86 | 87 | IntSec2 <- retistruct::line.line.intersection(P1, P2, P3, P4, interior.only = TRUE) 88 | } else {IntSec2 <- c(max(RT_vect))} 89 | 90 | if(return_diff == FALSE) { return(as.double(c(IntSec1[1], IntSec2[1]))) } else { 91 | 92 | if(is.na(IntSec2[1]) | is.na(IntSec1[1])) return(NA_real_) 93 | return(as.double(IntSec2[1] - IntSec1[1])) 94 | 95 | } 96 | 97 | 98 | } else {NA} 99 | 100 | } 101 | -------------------------------------------------------------------------------- /R/GetSharpness.R: -------------------------------------------------------------------------------- 1 | #' GetSharpness 2 | #' 3 | #' As described in: 4 | #' Zhang,W. and Zhao,P.X. (2014) Quality evaluation of extracted ion chromatograms and chromatographic peaks in 5 | #' liquid chromatography/mass spectrometry-based metabolomics data. BMC Bioinformatics, 15, 1–13. 6 | #' 7 | #' @param int int 8 | #' 9 | #' 10 | #' @keywords internal 11 | GetSharpness <- function(int){ 12 | 13 | 14 | int <- int[int>0] 15 | befAp = 0 16 | aftAp = 0 17 | ApPos <- which.max(int) 18 | for(n in c(2:(length(int)-1))){ 19 | if(n <= ApPos){ 20 | befAp <- befAp + (int[n] - int[n - 1]) / int[n - 1] 21 | } 22 | 23 | if(n >= ApPos){ 24 | aftAp <- aftAp + (int[n] - int[n + 1]) / int[n + 1] 25 | } 26 | } 27 | 28 | sharpness <- aftAp + befAp 29 | 30 | return(sharpness) 31 | 32 | } 33 | -------------------------------------------------------------------------------- /R/GetZigzagIDX.R: -------------------------------------------------------------------------------- 1 | #' GetZigzagIDX 2 | #' 3 | #' 4 | #'As described in: 5 | #' Zhang,W. and Zhao,P.X. (2014) Quality evaluation of extracted ion chromatograms and chromatographic peaks in 6 | #' liquid chromatography/mass spectrometry-based metabolomics data. BMC Bioinformatics, 15, 1–13. 7 | #' 8 | #' 9 | #' @param int int 10 | #' @param height height 11 | #' 12 | #' 13 | #' @keywords internal 14 | GetZigzagIDX <- function(int, height){ 15 | 16 | Zaeler_zzi = 0 17 | for(n in c(2:(length(int)-1))){ 18 | Zaeler_zzi <- Zaeler_zzi + (2 * int[n] - int[n - 1] - int[n + 1])^2 19 | } 20 | 21 | zigzag_idx <- Zaeler_zzi / (length(int) * height^2) 22 | 23 | return(zigzag_idx) 24 | 25 | } 26 | -------------------------------------------------------------------------------- /R/Get_MZ_list.R: -------------------------------------------------------------------------------- 1 | #' Get_MZ_list 2 | #' 3 | #' @param l.peaks l.peaks 4 | #' @param raw_data raw_data 5 | #' @param CompCol_xic CompCol_xic 6 | #' 7 | #' 8 | #' 9 | #' @importFrom xcms filterRt filterMz 10 | #' 11 | #' @keywords internal 12 | Get_MZ_list <- function(l.peaks, raw_data, CompCol_xic, EIC.dt, max.mz.diff_ppm){ 13 | 14 | 15 | l.peaks <- 16 | l.peaks[l.peaks[, .(pnts = length(EIC.dt[!is.na(int_wo_spikes) & rt >= StartTime & rt <= EndTime & int > 0]$int)), by = .(idx)]$pnts >= 5] 17 | 18 | if(nrow(l.peaks) < 1){return(NULL)} 19 | 20 | suppressWarnings( 21 | raw_data_lim <- raw_data %>% 22 | xcms::filterRt(rt = c(min(l.peaks$StartTime), max(l.peaks$EndTime))) %>% 23 | xcms::filterMz(mz = c(CompCol_xic$eic_mzmin - max.mz.diff_ppm * 4 * 1e-6 * CompCol_xic$eic_mzmin, CompCol_xic$eic_mzmax + max.mz.diff_ppm * 4 * 1e-6 * CompCol_xic$eic_mzmin)) 24 | ) 25 | 26 | l.peaks.mz_list <- list() 27 | length(l.peaks.mz_list) <- nrow(l.peaks) 28 | nc <- 1 29 | 30 | 31 | 32 | while(nc <= nrow(l.peaks)){ 33 | 34 | suppressWarnings( 35 | raw_data_lim1 <- raw_data_lim %>% 36 | xcms::filterRt(rt = unlist(unname(l.peaks[nc, c("StartTime", "EndTime")]))) #%>% 37 | ) 38 | 39 | 40 | 41 | suppressWarnings( 42 | l.peaks.mz_list[[nc]] <- list(mz = xcms::mz(raw_data_lim1), 43 | int = xcms::intensity(raw_data_lim1)) 44 | 45 | ) 46 | 47 | 48 | #get area of extended mz 49 | intensity <- lapply(l.peaks.mz_list[[nc]][["int"]], function(x){if(is.null(x)){NULL} else sum(x)}) 50 | 51 | mean_int <- mean(unlist(intensity)) 52 | 53 | 54 | if(length(l.peaks.mz_list[[nc]][["mz"]]) > 1){ 55 | 56 | #get original peak 57 | 58 | orig_idx <- 59 | mapply(function(x, lower = CompCol_xic$eic_mzmin - 0.001, upper = CompCol_xic$eic_mzmax + 0.001){which(data.table::between(x, lower, upper) == TRUE)}, 60 | l.peaks.mz_list[[1]][["mz"]], 61 | SIMPLIFY = FALSE 62 | ) 63 | 64 | 65 | l.peaks.mz_list[[nc]][["int"]] <- 66 | lapply(1:length(orig_idx), 67 | function(x, 68 | li = l.peaks.mz_list[[1]][["int"]], 69 | idx = orig_idx){ 70 | return(li[[x]][idx[[x]]]) 71 | }) 72 | 73 | 74 | l.peaks.mz_list[[nc]][["mz"]] <- 75 | lapply(1:length(orig_idx), 76 | function(x, 77 | li = l.peaks.mz_list[[1]][["mz"]], 78 | idx = orig_idx){ 79 | return(li[[x]][idx[[x]]]) 80 | }) 81 | 82 | 83 | #take highest mass peak 84 | 85 | highstInt_idx <- 86 | mapply(which.max, 87 | l.peaks.mz_list[[1]][["int"]], 88 | SIMPLIFY = FALSE 89 | ) 90 | 91 | 92 | l.peaks.mz_list[[nc]][["int"]] <- 93 | lapply(1:length(highstInt_idx), 94 | function(x, 95 | li = l.peaks.mz_list[[1]][["int"]], 96 | idx = highstInt_idx){ 97 | return(li[[x]][idx[[x]]]) 98 | }) 99 | 100 | 101 | 102 | l.peaks.mz_list[[nc]][["mz"]] <- 103 | lapply(1:length(highstInt_idx), 104 | function(x, 105 | li = l.peaks.mz_list[[1]][["mz"]], 106 | idx = highstInt_idx){ 107 | return(li[[x]][idx[[x]]]) 108 | }) 109 | 110 | 111 | } 112 | 113 | 114 | #get area of original mz 115 | intensity <- lapply(l.peaks.mz_list[[nc]][["int"]], function(x){if(is.null(x)){NULL} else sum(x)}) 116 | 117 | mean_orig_int <- mean(unlist(intensity)) 118 | 119 | l.peaks.mz_list[[nc]][["EXTvsORIG"]] <- c(mean_int, mean_orig_int) 120 | 121 | nc <- nc + 1 122 | } 123 | 124 | 125 | 126 | return(l.peaks.mz_list) 127 | } 128 | 129 | 130 | -------------------------------------------------------------------------------- /R/Helper_functions.R: -------------------------------------------------------------------------------- 1 | #' plotly_click_wo_warnings 2 | #' 3 | #' @param sc sc 4 | #' 5 | #' @return dataframe; plotly event 6 | #' 7 | #' @import plotly 8 | #' 9 | #' @keywords internal 10 | plotly_click_wo_warnings <- function(sc){ 11 | 12 | storeWarn<- getOption("warn") 13 | options(warn = -1) 14 | event <- plotly::event_data("plotly_click", source = sc, priority = "event") 15 | 16 | return(event) 17 | 18 | } 19 | 20 | 21 | 22 | #' getXIC 23 | #' 24 | #' @param PC PC 25 | #' @param IndexNumber IndexNumber 26 | #' 27 | #' 28 | #' 29 | #' @keywords internal 30 | getXIC <- function(PC, IndexNumber){ 31 | 32 | plot.table <- data.table::data.table(rt = as.numeric(unlist(strsplit(PC[IDX == IndexNumber, RT.v], split = ","))), 33 | int = as.numeric(unlist(strsplit(PC[IDX == IndexNumber, Intensities.v], split = ",")))) 34 | 35 | return(plot.table) 36 | 37 | } 38 | 39 | 40 | 41 | #' reIndexFeatures 42 | #' 43 | #' @param vct vct 44 | #' 45 | #' @return list 46 | #' 47 | #' @keywords internal 48 | reIndexFeatures <- function(vct){ 49 | 50 | apply(as.matrix(vct), 1, function(x, tab = names(sort(table(vct), decreasing = TRUE))){ 51 | 52 | idx <- which(tab == x) 53 | 54 | y <- paste0("F" ,idx) 55 | y 56 | }) 57 | 58 | 59 | 60 | } 61 | 62 | 63 | 64 | #' round_woe 65 | #' 66 | #' @param x x 67 | #' @param stellen stellen 68 | #' 69 | #' 70 | #' @keywords internal 71 | round_woe <- function(x, stellen){ 72 | 73 | if(!is.na(x) == TRUE & !is.null(x) == TRUE){ 74 | 75 | return(round(x, stellen)) 76 | 77 | } else return(NA) 78 | 79 | 80 | } 81 | 82 | 83 | 84 | 85 | #' top_to_x 86 | #' 87 | #' @param number numeric(1) 88 | #' @param x roof to this number 89 | #' 90 | #' @return numeric 91 | #' @keywords internal 92 | #' 93 | top_to_x <- function(number, x = 0){ 94 | 95 | if(!is.na(number) && is.numeric(number) & number < 0){return(0)} else {return(number)} 96 | 97 | } 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /R/Limit_Target_list.R: -------------------------------------------------------------------------------- 1 | #' Limit_Target_list 2 | #' 3 | #' @param CompCol CompCol 4 | #' @param CondPeaks CondPeaks 5 | #' @param iso.run iso.run 6 | #' @param file file 7 | #' @param adduct.run adduct.run 8 | #' @param Min.PointsperPeak PpP 9 | #' 10 | #' 11 | #' @keywords internal 12 | #' 13 | Limit_Target_list <- function(CompCol, CondPeaks, iso.run, adduct.run, file, Min.PointsperPeak){ 14 | 15 | if (iso.run == "MAiso") { 16 | if (adduct.run == "main_adduct") { 17 | CompCol_xic <- CompCol[isoab == 100 & 18 | FileName == sub(pattern = "(.*)\\..*$", 19 | replacement = "\\1", 20 | basename(file)) & 21 | adduct == main_adduct] 22 | } else if (adduct.run == "screen_adducts") { 23 | CompCol_xic <- CompCol[isoab == 100 & 24 | FileName == sub(pattern = "(.*)\\..*$", 25 | replacement = "\\1", 26 | basename(file)) & 27 | adduct != main_adduct, !c("rtmin", "rtmax")] 28 | 29 | CompCol_xic <- 30 | stats::na.omit(CompCol_xic[unique(CondPeaks[peaks.PpP > Min.PointsperPeak & 31 | !is.na(peaks.PpP), c("molecule", "FileName", "peaks.StartTime", "peaks.EndTime")]), 32 | on = .(molecule, FileName), 33 | allow.cartesian = TRUE], cols = c("eic_mzmin", "eic_mzmax", "StartTime.EIC", "EndTime.EIC")) 34 | 35 | data.table::setnames(CompCol_xic, "peaks.StartTime", "rtmin") 36 | data.table::setnames(CompCol_xic, "peaks.EndTime", "rtmax") 37 | } 38 | 39 | } else if (iso.run == "LAisos") { 40 | if (adduct.run == "main_adduct") { 41 | CompCol_xic <- stats::na.omit(CompCol[isoab < 100 & 42 | FileName == sub(pattern = "(.*)\\..*$", 43 | replacement = "\\1", 44 | basename(file)) & 45 | adduct == main_adduct, !c("rtmin", "rtmax")], cols = c("eic_mzmin", "eic_mzmax", "StartTime.EIC", "EndTime.EIC")) 46 | } else { 47 | CompCol_xic <- CompCol[isoab < 100 & 48 | FileName == sub(pattern = "(.*)\\..*$", 49 | replacement = "\\1", 50 | basename(file)) & 51 | adduct != main_adduct, !c("rtmin", "rtmax")] 52 | } 53 | 54 | CompCol_xic <- 55 | stats::na.omit(CompCol_xic[unique(CondPeaks[peaks.PpP > Min.PointsperPeak & 56 | !is.na(peaks.PpP), c("molecule", "adduct", "FileName", "peaks.StartTime", "peaks.EndTime")]), 57 | on = .(molecule, adduct, FileName), 58 | allow.cartesian = TRUE], cols = c("eic_mzmin", "eic_mzmax", "StartTime.EIC", "EndTime.EIC")) 59 | 60 | data.table::setnames(CompCol_xic, "peaks.StartTime", "rtmin") 61 | data.table::setnames(CompCol_xic, "peaks.EndTime", "rtmax") 62 | } 63 | 64 | return(CompCol_xic) 65 | 66 | } 67 | -------------------------------------------------------------------------------- /R/Skyline_functions.R: -------------------------------------------------------------------------------- 1 | #' SkylineTransitionList 2 | #' 3 | #' @description Takes the output of \code{\link{find_bench_peaks}} and generates a Skyline Transition list (automatically exported to working directory) which can then be imported to Skyline via 4 | #' Skyline -> File -> Import -> Transition List 5 | #' 6 | #' @param BM output of \code{\link{find_bench_peaks}} 7 | #' @param export_to_csv export output automatically to working directory 8 | #' 9 | #' 10 | #' @return Skyline Transition List 11 | #' @export 12 | #' 13 | #' 14 | SkylineTransitionList <- 15 | function(BM, export_to_csv = TRUE){ 16 | 17 | MassPrec <- round(max(BM$peaks.mz_span_ppm) / 2, 1) 18 | 19 | BM <- BM[, c("molecule", "adduct", "isoab", "peaks.mz_accurate", "charge")] 20 | BM$molecule <- as.character(BM$molecule) 21 | BM[, "Precursor Name" := paste0(molecule, "_", adduct, "_", round(isoab, 2))] 22 | 23 | BM <- unique(BM[, c("Precursor Name", "charge")])[BM[, .(`Molecule List Name` = molecule, 24 | `Precursor m/z` = mean(peaks.mz_accurate), 25 | `Product m/z` = mean(peaks.mz_accurate)), 26 | by = .(`Precursor Name`)], on = .(`Precursor Name`)] 27 | 28 | BM <- BM[, c("Precursor charge", "Product charge") := .(charge, charge)][, !"charge"] 29 | 30 | if(export_to_csv == TRUE){ 31 | data.table::fwrite(unique(BM), file = "Skyline_Transition_List.csv", row.names = FALSE) 32 | message(paste0("Transition List has been saved to your working directory as ", getwd(), "/Skyline_Transition_List.csv")) 33 | } 34 | 35 | message(paste0("Please go to 'Skyline -> Settings -> Transition Settings -> Full-Scan -> Mass Accuracy' and set 'Precursor mass analyzer' to 'Centroided' and ", 36 | "Mass Accuracy to about ", MassPrec, " ppm. You can then load this Transition list into Skyline via 'Skyline -> File -> Import -> Transition List...'.")) 37 | 38 | return(unique(BM)) 39 | 40 | } 41 | 42 | 43 | #' SkylinePeakBoundaries 44 | #' @description Takes the output of \code{\link{find_bench_peaks}} and generates a Skyline peak-boundaries file (automatically exported to working directory) which can then be imported to Skyline via 45 | #' Skyline -> File -> Import -> Peak Boundaries... (after the required mzML files have been imported into Skyline using Skyline -> Import -> Results...) 46 | #' 47 | #' @param BM output of \code{\link{find_bench_peaks}} 48 | #' @param export_to_csv export output automatically to working directory 49 | #' 50 | #' @return Skyline peak boundaries 51 | #' @export 52 | #' 53 | #' 54 | SkylinePeakBoundaries <- 55 | function(BM, export_to_csv = TRUE){ 56 | 57 | BM <- BM[, c("molecule", "adduct", "isoab", "FileName", "peaks.StartTime", "peaks.EndTime")] 58 | BM[, peaks.StartTime := peaks.StartTime/60] 59 | BM[, peaks.EndTime := peaks.EndTime/60] 60 | BM[, molecule := as.character(molecule)] 61 | 62 | BM[order(BM$isoab, decreasing = TRUE),] 63 | 64 | BM[, "Peptide Modified Sequence" := paste0(molecule, "_", adduct, "_", round(isoab, 2))] 65 | 66 | files <- data.table::data.table("FileName" = sort(unique(BM$FileName)), 67 | "i" = seq(length(unique(BM$FileName)))) 68 | 69 | EICs <- data.table::data.table("Peptide Modified Sequence" = sort(unique(BM$`Peptide Modified Sequence`)), 70 | "i" = rep(1, length(unique(BM$`Peptide Modified Sequence`)))) 71 | 72 | Peak_Boundaries_Skyline <- EICs[files, on=.(i<=i), allow.cartesian = TRUE][, !"i"] 73 | 74 | Peak_Boundaries_Skyline <- BM[, c("FileName" ,"Peptide Modified Sequence", "peaks.StartTime", "peaks.EndTime")][ 75 | Peak_Boundaries_Skyline, 76 | on = .(`Peptide Modified Sequence`, FileName), 77 | nomatch = NA 78 | ] 79 | 80 | 81 | Peak_Boundaries_Skyline[is.na(peaks.StartTime), peaks.StartTime := 0] 82 | Peak_Boundaries_Skyline[is.na(peaks.EndTime), peaks.EndTime := 0] 83 | 84 | colnames(Peak_Boundaries_Skyline) <- c("File Name", "Peptide Modified Sequence", "Min Start Time", "Max End Time") 85 | 86 | 87 | if(export_to_csv == TRUE){ 88 | data.table::fwrite(Peak_Boundaries_Skyline, file = "Skyline_Peak_Boundaries.csv", row.names = FALSE) 89 | message(paste0("Peak Boundaries have been saved to your working directory as ", getwd(), "/Skyline_Peak_Boundaries.csv")) 90 | } 91 | 92 | message("After Transition List and mzML files have been loaded into Skyline you can apply these Peak Boundaries via 'Skyline -> File -> Import -> Peak Boundaries...'.") 93 | 94 | return(Peak_Boundaries_Skyline) 95 | } 96 | 97 | -------------------------------------------------------------------------------- /R/alignment_error_plot.R: -------------------------------------------------------------------------------- 1 | #' Alignment_error_plot 2 | #' 3 | #' @param mol Name of molecule 4 | #' @param add Name of adduct 5 | #' @param comparison_data output of compare_peaks 6 | #' 7 | #' 8 | #' 9 | #' @import plotly 10 | #' @importFrom ggplot2 ggplot aes ggtitle coord_equal labs theme element_text geom_tile 11 | #' @return plotly object 12 | #' @export 13 | #' 14 | Alignment_error_plot <- function(comparison_data, mol, add){ 15 | 16 | peak_area_rounded_ug <- NULL 17 | 18 | if(missing(mol) | missing(add) | missing(comparison_data) | length(mol) < 1 | length(add) < 1) return(plotly::ggplotly(ggplot() + 19 | ggtitle("Missing arguments"))) 20 | 21 | dt <- data.table::rbindlist(list(comparison_data$Matches_BM_NPPpeaks[main_peak == TRUE], comparison_data$Unmatched_BM_NPPpeaks), fill = TRUE) 22 | 23 | if('peak_area_rounded_ug' %in% colnames(dt)){ 24 | dt <- dt[, 'peak_area_ug' := peak_area_rounded_ug] 25 | } 26 | if(nrow(dt[(main_peak == "TRUE" | is.na(main_peak)) & 27 | molecule_b == mol & 28 | adduct_b == add]) == 0) return(plotly::ggplotly(ggplot() + 29 | ggtitle("No peaks"))) 30 | 31 | dt <- dt[(main_peak == "TRUE" | is.na(main_peak)) & 32 | molecule_b == mol & 33 | adduct_b == add, c('sample_id_b', 34 | 'main_peak', 35 | 'isoab_b', 36 | 'feature_id_g', 37 | 'molecule_b', 38 | 'adduct_b', 39 | 'peak_area_g', 40 | 'peak_area_ug', 41 | 'sample_name_b')] 42 | 43 | 44 | dt <- dt[, peak_status := ifelse(is.na(peak_area_g) & is.na(peak_area_ug), "Lost_b.PP", 45 | ifelse(is.na(peak_area_g) & !is.na(peak_area_ug), 'Lost_b.A', 46 | ifelse(!is.na(peak_area_g) & !is.na(peak_area_ug) & peak_area_g != peak_area_ug, feature_id_g, feature_id_g)))] #repl -3 47 | 48 | dt_for_error_count <- data.table::dcast(dt, sample_id_b ~ isoab_b, value.var='peak_status', fun.aggregate = function(x) paste(x, collapse = "")) 49 | 50 | error_count <- count_alignment_errors(dt_for_error_count, get_main_UT_groups(dt_for_error_count))[1] 51 | 52 | p <- ggplot(dt, aes(x = as.character(sample_name_b), 53 | y = as.factor(round(isoab_b, 2)), 54 | peak_area_ug = peak_area_ug)) + 55 | geom_tile(aes(fill = as.character(peak_status), width = 0.5, height = 0.5), color = "white") + 56 | coord_equal() + 57 | ggtitle(paste0("Alignment of ", mol, " ", add, " | Min. errors: ",ifelse(is.na(error_count), 0, error_count))) + 58 | labs(x = "Samples", y = "Isotopologues", fill = "NPP feature ID") + 59 | theme(axis.text.x = element_text(angle = 45, hjust = 1)) 60 | 61 | return(plotly::ggplotly(p)) 62 | 63 | 64 | } 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /R/assemble_peaks.R: -------------------------------------------------------------------------------- 1 | #' assemble_peaks 2 | #' 3 | #' @param peak.grp peak.grp 4 | #' @param s start 5 | #' @param e end 6 | #' @param breakP break point 7 | #' 8 | #' @keywords internal 9 | #' 10 | 11 | assemble_peaks <- function(peak.grp, s, e, breakP) { 12 | if (anyNA(breakP)) { 13 | data.frame(peak.grp = peak.grp, 14 | s = as.integer(s), 15 | e = as.integer(e), 16 | stringsAsFactors = FALSE) 17 | } 18 | else { 19 | borders <- c(s, breakP, e) 20 | 21 | dfs <- lapply(seq_along(borders)[-length(borders)], function(i) { 22 | data.frame(s = borders[i], 23 | e = borders[as.integer(i) + 1L], 24 | stringsAsFactors = FALSE) 25 | }) 26 | output <- data.frame(peak.grp = peak.grp, do.call(rbind, dfs, TRUE)) 27 | 28 | return(output) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /R/assess_alignment.R: -------------------------------------------------------------------------------- 1 | #' assess_alignment 2 | #' 3 | #' @param Matches_BM_NPPpeaks Matches_BM_NPPpeaks 4 | #' @param Unmatched_BM_NPPpeaks Unmatched_BM_NPPpeaks 5 | #' @param g_table g_table 6 | #' 7 | #' @keywords internal 8 | #' 9 | #' 10 | 11 | assess_alignment <- function(Matches_BM_NPPpeaks, 12 | Unmatched_BM_NPPpeaks, 13 | g_table){ 14 | 15 | 16 | if(nrow(g_table) > 0){ 17 | 18 | 19 | AlignmentErrors_per_moleculeAndAdduct <- 20 | data.table::rbindlist(list(Matches_BM_NPPpeaks, Unmatched_BM_NPPpeaks), fill = TRUE) 21 | 22 | if('peak_area_rounded_ug' %in% colnames(AlignmentErrors_per_moleculeAndAdduct)){ 23 | AlignmentErrors_per_moleculeAndAdduct <- AlignmentErrors_per_moleculeAndAdduct[, 'peak_area_ug' := peak_area_rounded_ug] 24 | } 25 | 26 | AlignmentErrors_per_moleculeAndAdduct <- AlignmentErrors_per_moleculeAndAdduct[, as.list(count_errors_max(.SD)), .SDcols=c('molecule_b', 27 | 'adduct_b', 28 | 'main_peak', 29 | 'sample_id_b', 30 | 'isoab_b', 31 | 'feature_id_g', 32 | 'peak_area_g', 33 | 'peak_area_ug'), 34 | by=.(molecule_b, adduct_b)] 35 | 36 | AlignmentErrors_per_moleculeAndAdduct <- data.table::setnames(AlignmentErrors_per_moleculeAndAdduct, c('errors', 'Lost_b.A', 'diff_BM', 'molecule_b', 'adduct_b'), c('Min.errors', 'Lost_b.A', 'BM.div', 'Molecule', 'Adduct')) 37 | 38 | } else { 39 | 40 | AlignmentErrors_per_moleculeAndAdduct <- stats::setNames(data.table(matrix(nrow = 0, ncol = 5)), c("Molecule", "Adduct", "Min.errors", "Lost_b.A", "BM.div")) 41 | 42 | } 43 | 44 | return(AlignmentErrors_per_moleculeAndAdduct) 45 | 46 | } 47 | -------------------------------------------------------------------------------- /R/check_IR_biases.R: -------------------------------------------------------------------------------- 1 | #' check_IR_biases 2 | #' 3 | #' @param Matches_BM_NPPpeaks Matches_BM_NPPpeaks 4 | #' @param Matches_BM_NPPpeaks_NPPfeatures Matches_BM_NPPpeaks_NPPfeatures 5 | #' @param g_table g_table 6 | #' @param b_table b_table 7 | #' 8 | #' @keywords internal 9 | #' 10 | 11 | check_IR_biases <- function(Matches_BM_NPPpeaks, 12 | Matches_BM_NPPpeaks_NPPfeatures, 13 | g_table, 14 | b_table){ 15 | 16 | if(nrow(g_table) > 0){ 17 | 18 | 19 | 20 | IT_ratio_biases <- Matches_BM_NPPpeaks_NPPfeatures 21 | 22 | 23 | IT_ratio_biases <- 24 | data.table::merge.data.table(IT_ratio_biases[!is.na(peak_area_b)], 25 | b_table[, c("molecule_b", 26 | "adduct_b", 27 | "isoab_b", 28 | "sample_name_b", 29 | "peaks.rt_neighbors_b", 30 | "peaks.mz_neighbors_b")], 31 | allow.cartesian = TRUE, 32 | by = c("molecule_b", "adduct_b", "isoab_b", "sample_name_b"), 33 | all.x = TRUE, 34 | all.y = TRUE) 35 | 36 | # IT_ratio_biases <- IT_ratio_biases[b_table[, c("molecule_b", 37 | # "adduct_b", 38 | # "isoab_b", 39 | # "sample_name_b", 40 | # "peaks.rt_neighbors_b", 41 | # "peaks.mz_neighbors_b")], on = .(molecule_b, adduct_b, isoab_b, sample_name_b)] 42 | 43 | } else { 44 | Matches_BM_NPPpeaks[, sample_id_b := as.factor(sample_id_b)] 45 | Matches_BM_NPPpeaks[, area_g := as.numeric(NA)] 46 | IT_ratio_biases <- Matches_BM_NPPpeaks 47 | } 48 | 49 | IT_ratio_biases <- IT_ratio_biases[isoab_b != 100][IT_ratio_biases[isoab_b == 100, 50 | c("sample_id_b", "sample_name_b", "molecule_b", "adduct_b", "area_g", "peak_area_b", "peak_area_ug", "peaks.rt_neighbors_b", "peaks.mz_neighbors_b")], 51 | on=.(sample_name_b, molecule_b, adduct_b), 52 | nomatch = NA, allow.cartesian=TRUE][,c("benchmark", 53 | "NPP_peak picking", 54 | "NPP_features", 55 | "RT_neighbors", 56 | "mz_neighbors") := .((peak_area_b / ((i.peak_area_b * isoab_b) / 100) - 1) * 100, 57 | (peak_area_ug / ((i.peak_area_ug * isoab_b) / 100) - 1) * 100, 58 | (area_g / ((i.area_g * isoab_b) / 100) - 1) * 100, 59 | paste0(paste0(i.peaks.rt_neighbors_b, " | "), peaks.rt_neighbors_b), 60 | paste0(paste0(i.peaks.mz_neighbors_b, " | "), peaks.mz_neighbors_b))] 61 | 62 | 63 | IT_ratio_biases[, diffH20PP_pp := as.character( 64 | abs(abs(benchmark) - abs(`NPP_peak picking`)) > 10 & 65 | abs(`NPP_peak picking` - benchmark) > 20 & 66 | abs(`NPP_peak picking`) > 30)] 67 | 68 | IT_ratio_biases[, diffH20PP_ft := as.character(abs(abs(benchmark) - abs(NPP_features)) > 10 & 69 | abs(NPP_features - benchmark) > 20 & 70 | abs(NPP_features) > 30)] 71 | 72 | IT_ratio_biases[diffH20PP_pp == "TRUE"]$diffH20PP_pp <- "Inc. > 20%p" 73 | IT_ratio_biases[diffH20PP_pp == "FALSE"]$diffH20PP_pp <- "Inc. < 20%p" 74 | 75 | IT_ratio_biases[diffH20PP_ft == "TRUE"]$diffH20PP_ft <- "Inc. > 20%p" 76 | IT_ratio_biases[diffH20PP_ft == "FALSE"]$diffH20PP_ft <- "Inc. < 20%p" 77 | 78 | IT_ratio_biases <- IT_ratio_biases[!is.na(peak_area_b)] 79 | 80 | return(IT_ratio_biases) 81 | 82 | } 83 | -------------------------------------------------------------------------------- /R/check_missing_peaks.R: -------------------------------------------------------------------------------- 1 | #' check_missing_peaks 2 | #' 3 | #' @param Matches_BM_NPPpeaks Matches_BM_NPPpeaks 4 | #' @param Unmatched_BM_NPPpeaks Unmatched_BM_NPPpeaks 5 | #' @param g_table g_table 6 | #' @param Matches_BM_NPPpeaks_NPPfeatures Matches_BM_NPPpeaks_NPPfeatures 7 | #' 8 | #' @keywords internal 9 | #' 10 | 11 | check_missing_peaks <- function(Matches_BM_NPPpeaks, 12 | Unmatched_BM_NPPpeaks, 13 | Matches_BM_NPPpeaks_NPPfeatures, 14 | g_table){ 15 | 16 | MissingPeak_classification <- data.table::rbindlist(list(Matches_BM_NPPpeaks, Unmatched_BM_NPPpeaks), fill = TRUE) 17 | 18 | MissingPeak_classification <- MissingPeak_classification[, c("molecule_b", "adduct_b", "isoab_b", "sample_name_b", "peak_area_b", "peak_height_b", 19 | "peak_area_ug", "peak_area_g", "feature_id_g", "sample_id_b" 20 | )] 21 | 22 | if(nrow(g_table) > 0){ 23 | 24 | join_vct <- c("molecule_b", 25 | "adduct_b", 26 | "isoab_b", 27 | "sample_name_b") 28 | 29 | join_on_dt <- unique(rbind(MissingPeak_classification[, ..join_vct], 30 | Matches_BM_NPPpeaks_NPPfeatures[main_feature == TRUE & 31 | !is.na(peak_area_b), 32 | ..join_vct])) 33 | 34 | 35 | MissingPeak_classification <- MissingPeak_classification[join_on_dt, on = .(molecule_b, adduct_b, isoab_b, sample_name_b)] 36 | 37 | MissingPeak_classification <- Matches_BM_NPPpeaks_NPPfeatures[main_feature == TRUE & 38 | !is.na(peak_area_b), c("molecule_b", 39 | "adduct_b", 40 | "isoab_b", 41 | "sample_name_b", 42 | "area_g")][MissingPeak_classification, 43 | on = .(molecule_b, adduct_b, isoab_b, sample_name_b)] 44 | 45 | 46 | MissingPeak_classification[, peak_area_g := area_g] 47 | 48 | MissingPeak_classification <- MissingPeak_classification[!is.na(peak_area_b)] 49 | MissingPeak_classification <- MissingPeak_classification[order(feature_id_g)] 50 | 51 | MissingPeak_classification <- 52 | MissingPeak_classification[, Connected := File_con_test( 53 | sample_name_b, 54 | feature_id_g), 55 | by = .(molecule_b, adduct_b)] 56 | 57 | colnames(MissingPeak_classification) <- replace(colnames(MissingPeak_classification), colnames(MissingPeak_classification) == "area_g", "peak_area_g") 58 | 59 | } else { 60 | 61 | MissingPeak_classification[, Connected := TRUE] 62 | } 63 | 64 | MissingPeak_classification <- 65 | MissingPeak_classification[, c("missing_peaks_ug", "missing_peaks_g") := .(find_r_s_error( 66 | peak_area_b, 67 | peak_area_ug, 68 | peak_height_b, 69 | Connected), 70 | find_r_s_error( 71 | peak_area_b, 72 | peak_area_g, 73 | peak_height_b, 74 | Connected) 75 | ), by = .(molecule_b, adduct_b, isoab_b)] 76 | 77 | 78 | 79 | MissingPeak_classification <- MissingPeak_classification[!is.na(peak_area_b)] 80 | 81 | return(MissingPeak_classification) 82 | 83 | } 84 | -------------------------------------------------------------------------------- /R/clean_peak_assignments.R: -------------------------------------------------------------------------------- 1 | #' clean_peak_assignments 2 | #' 3 | #' @param Input_table unfinished benchmark 4 | #' 5 | #' @return unfinished benchmark 6 | #' 7 | #' @keywords internal 8 | #' 9 | clean_peak_assignments <- function(Input_table){ 10 | 11 | if(nrow(Input_table) < 1){ 12 | return(Input_table) 13 | 14 | } 15 | 16 | Depl_table <- Input_table 17 | Depl_table$IDX <- seq.int(nrow(Depl_table)) 18 | dpl_peaks <- hutils::duplicated_rows(Depl_table, by = c("peaks.rt_raw", 19 | "peaks.height", 20 | "FileName")) 21 | 22 | if("user.rt" %in% colnames(Depl_table) & nrow(dpl_peaks) > 0){ 23 | 24 | dpl_peaks$rt_diff <- abs(dpl_peaks$user.rt - dpl_peaks$peaks.rt_raw) 25 | dpl_peaks[, rt_diff_min := min(rt_diff), by = .(peaks.height, peaks.rt_raw)] 26 | dpl_peaks$keep_rt <- dpl_peaks$rt_diff == dpl_peaks$rt_diff_min 27 | kick_list <- dpl_peaks[keep_rt == FALSE]$IDX 28 | Depl_table <- Depl_table[!(IDX %in% kick_list)] 29 | dpl_peaks <- hutils::duplicated_rows(Depl_table, by = c("peaks.rt_raw", 30 | "peaks.height", 31 | "FileName")) 32 | 33 | } 34 | 35 | if(nrow(dpl_peaks) > 0){ 36 | 37 | dpl_peaks[, mz_diff_min := min(peaks.mz_accuracy_ppm), by = .(peaks.height, peaks.rt_raw)] 38 | dpl_peaks$keep_mz <- dpl_peaks$peaks.mz_accuracy_ppm == dpl_peaks$mz_diff_min 39 | kick_list <- dpl_peaks[keep_mz == FALSE]$IDX 40 | Depl_table <- Depl_table[!(IDX %in% kick_list)] 41 | 42 | 43 | 44 | } 45 | 46 | return(Depl_table) 47 | 48 | } 49 | -------------------------------------------------------------------------------- /R/comp_classify_false_negative.R: -------------------------------------------------------------------------------- 1 | #' classify_false_negative 2 | #' 3 | #' @param dt dt 4 | #' 5 | #' 6 | #' @keywords internal 7 | classify_false_negative <- function(dt) { 8 | 9 | #check if feature ID = main ID 10 | dt <- dt[, false_negative := ifelse(is.na(feature_id_g), 'TRUE', 11 | ifelse(feature_id_g != main_feature, 'SPLIT','FALSE'))] 12 | 13 | 14 | 15 | dt <- dt[, feature_id_b_temp := feature_id_b] 16 | temp_groups <- dt[, if(any(false_negative == 'TRUE')) .SD, by=.(feature_id_b_temp)] 17 | temp_groups <- temp_groups[, if(!all(false_negative == 'TRUE')) .SD, by=.(feature_id_b_temp)] 18 | 19 | 20 | r_s_check <- function(group){ 21 | 22 | 23 | group <- group[order(peak_area_b)] 24 | group <- group[, order_temp := .I] 25 | 26 | first_found_area <- min(which(!is.na(group$peak_area_g))) 27 | 28 | group <- group[, area_diff := abs(((peak_area_b-min(peak_area_b))*100)/min(peak_area_b))] 29 | group <- group[, false_negative_type := as.character(ifelse(order_temp < first_found_area & false_negative == 'TRUE', 'R', 30 | ifelse(order_temp > first_found_area&false_negative == 'TRUE'&area_diff >=20, 'S', 31 | ifelse(order_temp > first_found_area&false_negative == 'TRUE'&area_diff <20, 'R', NA))))] 32 | return(group) 33 | } 34 | 35 | temp_groups <- temp_groups[, r_s_check(.SD), by=.(feature_id_b)] 36 | temp_groups <- temp_groups[, false_negative := paste(false_negative, false_negative_type, sep="_")] 37 | 38 | return(temp_groups) 39 | } 40 | -------------------------------------------------------------------------------- /R/comp_count_max_error.R: -------------------------------------------------------------------------------- 1 | #' count_errors_max 2 | #' 3 | #' @param dt dt 4 | #' 5 | #' 6 | #' @keywords internal 7 | count_errors_max <- function(dt){ 8 | theMolecule <- unique(dt$molecule_b) 9 | theAdduct <- unique(dt$adduct_b) 10 | 11 | dt <- dt[main_peak=='TRUE' | is.na(main_peak), c('sample_id_b', 'isoab_b', 'feature_id_g', 'molecule_b', 'adduct_b', 'peak_area_g', 'peak_area_ug')] 12 | 13 | #Generate peak status Column 14 | ## -1 = Peak not found in g or ug (peakpicking error) 15 | ## -2 = Peak not found in g (grouping error) 16 | ## -3 = Different Peak in g and ug 17 | dt <- dt[, peak_status := ifelse((is.na(peak_area_g)) & (is.na(peak_area_ug)), "Lost_b.PP", 18 | ifelse((is.na(peak_area_g)) & (!is.na(peak_area_ug)), 'Lost_b.A', 19 | ifelse((!is.na(peak_area_g)) & (!is.na(peak_area_ug)) & (peak_area_g != peak_area_ug), feature_id_g, feature_id_g)))] # put -3 here 20 | 21 | 22 | dt[, sample_id_b := as.character(sample_id_b)] 23 | dt[, isoab_b := as.character(isoab_b)] 24 | dt[, peak_status := as.character(peak_status)] 25 | 26 | dt <- data.table::dcast(dt, sample_id_b ~ isoab_b, value.var='peak_status', fun.aggregate = function(x) paste(x, collapse = "")) 27 | 28 | theReturn <- count_alignment_errors(dt, get_main_UT_groups(dt)) 29 | return(theReturn) 30 | } 31 | -------------------------------------------------------------------------------- /R/comp_feature_compare.R: -------------------------------------------------------------------------------- 1 | #' feature_compare 2 | #' 3 | #' @param b_table b_table 4 | #' @param g_table g_table 5 | #' @param areaMatch_table areaMatch_table 6 | #' 7 | #' 8 | #' @keywords internal 9 | #' 10 | feature_compare <- function(b_table, g_table, areaMatch_table = NA){ 11 | 12 | #Find smallest and largest mz and rt and area per BM feature to fine NPP candidates 13 | b_table <- b_table[, ':=' (min_mz_start = min(mz_start_b), 14 | max_mz_end = max(mz_end_b), 15 | min_rt_start = min(peak_core_rt_range_start_b),# - if(is.na(sd(rt_b))){ 0} else stats::sd(rt_b), 16 | max_rt_end = max(peak_core_rt_range_end_b),# + if(is.na(sd(rt_b))){ 0} else stats::sd(rt_b), 17 | total_area_b = sum(peak_area_b), 18 | present_samples_b = paste(.SD$sample_id_b, collapse = ','), 19 | sample_id_b_suf = paste0('sample_',sample_id_b, '_b')), by=c('molecule_b', 'isoab_b', 'adduct_b')] 20 | 21 | 22 | #Bring b_table into wide format 23 | b_table <- data.table::dcast(b_table, feature_id_b + molecule_b + isoab_b + adduct_b + total_area_b + min_mz_start + max_mz_end + min_rt_start + max_rt_end + present_samples_b ~ sample_id_b_suf, value.var=c('peak_area_b')) 24 | 25 | #add NPP feature candidates if areas matched from NPP peaks 26 | if(length(areaMatch_table) > 1){ 27 | areaMatch_table <- areaMatch_table[b_table, on = .(feature_id_b)] 28 | } 29 | 30 | 31 | 32 | b_table[, min_mz_start_temp := min_mz_start] 33 | b_table[, max_mz_end_temp := max_mz_end] 34 | b_table[, min_rt_start_temp := min_rt_start] 35 | b_table[, max_rt_end_temp := max_rt_end] 36 | 37 | 38 | #Calculate total area of g feature 39 | g_table <- g_table[, ':=' (total_area_g= sum(peak_area_g), 40 | present_samples_g = paste(.SD$sample_id_g, collapse = ','), 41 | sample_id_g_suf = paste0('sample_',sample_id_g, '_g')), by=c('feature_id_g')] 42 | 43 | #Bring g_table into wide format 44 | g_table <- data.table::dcast(g_table, feature_id_g + total_area_g + rt_g + mz_g + present_samples_g ~ sample_id_g_suf, value.var = c('peak_area_g')) 45 | 46 | 47 | if(length(areaMatch_table) > 1){ 48 | areaMatch_table <- g_table[areaMatch_table, on = .(feature_id_g)][!is.na(feature_id_g) & !is.na(feature_id_b)] 49 | } 50 | 51 | 52 | #Merge 53 | cf_table <- b_table[g_table, on=.(min_mz_start_temp <= mz_g, 54 | max_mz_end_temp >= mz_g, 55 | min_rt_start_temp <= rt_g, 56 | max_rt_end_temp >= rt_g), allow.cartesian=TRUE, nomatch=NULL, mult='all'] 57 | 58 | cf_table <- cf_table[, !c("min_mz_start_temp", "max_mz_end_temp", "min_rt_start_temp", "max_rt_end_temp")] 59 | 60 | 61 | if(length(areaMatch_table) > 1){ 62 | cf_table <- data.table::rbindlist(list(cf_table, areaMatch_table[, !c("rt_g", "mz_g")]), use.names = TRUE) 63 | cf_table <- unique(cf_table) 64 | } 65 | 66 | 67 | if(nrow(cf_table) == 1){ 68 | 69 | cf_table$samples_to_compare <- paste0(apply(cf_table,1,function(x){paste(intersect(unlist(strsplit(x['present_samples_g'], ',')), unlist(strsplit(x['present_samples_b'], ','))))}), collapse = ",") 70 | 71 | }else{ 72 | 73 | cf_table$samples_to_compare <- apply(cf_table,1,function(x){paste(intersect(unlist(strsplit(x['present_samples_g'], ',')), unlist(strsplit(x['present_samples_b'], ','))), collapse = ",")}) 74 | 75 | } 76 | 77 | 78 | return(cf_table) 79 | } 80 | -------------------------------------------------------------------------------- /R/comp_find_best_feature_feature.R: -------------------------------------------------------------------------------- 1 | #' find_best_feature_feature 2 | #' 3 | #' @param dt dt 4 | #' @param bys bys 5 | #' 6 | #' @return best feature 7 | #' 8 | #' @keywords internal 9 | find_best_feature_feature <- function(dt, bys) { 10 | 11 | dt <- data.table::copy(dt) 12 | dt <- dt[, 'cross_join_key' := 1] 13 | 14 | all_iso <- sort(unique(dt$isoab_b), decreasing=TRUE) 15 | #Stop if each iso occures axactly once, mark all as main feature 16 | if (length(all_iso) == nrow(dt)){ 17 | return_dt <- data.table::setDT(list(feature_id_g = unique(dt$feature_id_g))) 18 | return_dt[, 'main_feature' := TRUE] 19 | } else if (length(all_iso) < nrow(dt)){ 20 | 21 | return_dt <- data.table::setDT(list(feature_id_g = unique(dt$feature_id_g))) 22 | 23 | exp_ratio = all_iso[2]/all_iso[1] 24 | 25 | #Figure out first (100) iso main feature 26 | merged_dt <- merge(dt[isoab_b == all_iso[1]], dt[isoab_b == all_iso[2]], by='cross_join_key', allow.cartesian = TRUE) 27 | merged_dt[, area_ratio := mean_area_g.y/mean_area_g.x] 28 | merged_dt[, main_feature := ifelse(abs(area_ratio-exp_ratio) == min(abs(area_ratio-exp_ratio)), TRUE, FALSE)] 29 | 30 | #Debug check - exactly one main feature should be present and no na 31 | if (nrow(merged_dt[main_feature == TRUE]) > 1){ 32 | stop('error in main feature') 33 | } 34 | 35 | highest_main_feature <- merged_dt[main_feature == TRUE]$feature_id_g.x 36 | main_features <- list(highest_main_feature) 37 | 38 | #for Debug 39 | second_h_feature <- merged_dt[main_feature == TRUE]$feature_id_g.y 40 | 41 | rm(merged_dt) 42 | 43 | for (i in 2:length(all_iso)) { 44 | exp_ratio = all_iso[i]/all_iso[1] 45 | merged_dt <- merge(dt[feature_id_g == highest_main_feature], dt[isoab_b == all_iso[i]], by='cross_join_key', allow.cartesian = TRUE) 46 | merged_dt[, area_ratio := mean_area_g.y/mean_area_g.x] 47 | merged_dt[, main_feature := ifelse(abs(area_ratio-exp_ratio) == min(abs(area_ratio-exp_ratio)), TRUE, FALSE)] 48 | 49 | #Debug check - exactly one main feature should be present and no na 50 | if (nrow(merged_dt[main_feature == TRUE]) > 1){ 51 | stop('error in main feature') 52 | } 53 | 54 | main_features[i] <- merged_dt[main_feature == TRUE]$feature_id_g.y 55 | } 56 | return_dt[, main_feature := ifelse(feature_id_g %in% main_features, TRUE, FALSE)] 57 | } 58 | return(return_dt) 59 | } 60 | -------------------------------------------------------------------------------- /R/comp_find_r_s_error.R: -------------------------------------------------------------------------------- 1 | #' find_r_s_error 2 | #' 3 | #' @param peak_area_b peak_area_b 4 | #' @param peak_area_ug peak_area_ug 5 | #' @param peak_height_b peak_height_b 6 | #' 7 | #' @export 8 | #' 9 | #' @importFrom data.table data.table 10 | #' @keywords internal 11 | #' 12 | find_r_s_error <- function(peak_area_b, peak_area, peak_height_b, Connected){ 13 | 14 | temp_dt <- data.table(peak_area_b, peak_area, peak_height_b, Connected) 15 | 16 | temp_dt[, r_s_error := NA_character_] 17 | 18 | if (all(is.na(temp_dt$peak_area))){ 19 | first_found_ug_area <- NA 20 | first_found_ug_height <- NA 21 | } else { 22 | first_found_ug_area <- temp_dt[which.min(peak_area), peak_area_b] 23 | first_found_ug_height <- temp_dt[which.min(peak_area), peak_height_b] 24 | } 25 | 26 | #No UG Peaks where found 27 | if(is.na(first_found_ug_area)){ 28 | temp_dt[, r_s_error := 'L'] 29 | } 30 | 31 | #UG Peaks were found 32 | else { 33 | #20% of first found area 34 | temp_dt[, first_found_area_temp := first_found_ug_area] 35 | temp_dt[is.na(peak_area), r_s_error := as.character(ifelse((peak_area_b > first_found_ug_area * 1.5 & peak_height_b > first_found_ug_height * 1.5), 36 | as.character('R'), 37 | as.character('S'))) 38 | ] 39 | 40 | } 41 | temp_dt[is.na(r_s_error) & !is.na(first_found_ug_area), r_s_error := 'F'] 42 | 43 | temp_dt[Connected == FALSE & r_s_error != 'F', r_s_error := 'NC'] 44 | 45 | 46 | return(temp_dt$r_s_error) 47 | } 48 | 49 | 50 | 51 | 52 | 53 | 54 | #' File_con_test 55 | #' 56 | #' @param FileName FileName 57 | #' @param feature_id feature_id 58 | #' 59 | #' @return connected file grps 60 | #' 61 | #' 62 | #' @keywords internal 63 | #' 64 | File_con_test <- function(FileName, feature_id){ 65 | 66 | sub_tab <- data.table(FileName, feature_id) 67 | 68 | tt <- stats::na.omit(sub_tab) 69 | 70 | if(length(unique(tt$FileName)) <= 1 | 71 | length(unique(tt$feature_id)) <= 1){return(rep(FALSE, nrow(sub_tab)))} 72 | 73 | df <- as.data.frame.matrix(table(tt)) 74 | 75 | name.vct <- 76 | lapply(df, function(x, cn = rownames(df)){ 77 | return(cn[x > 0]) 78 | }) 79 | 80 | for (x in unique(tt$FileName)){ 81 | 82 | sub.vct <- lapply(name.vct, function(y, var = x){if(var %in% y){y}}) 83 | sub.vct <- sub.vct[lengths(sub.vct) != 0] 84 | subst.vct <- Reduce(union, sub.vct) 85 | sub.vct <- lapply(name.vct, function(y, var = x){if(!(var %in% y)){y}}) 86 | name.vct <- sub.vct[lengths(sub.vct) != 0] 87 | name.vct[[(length(name.vct) + 1)]] <- subst.vct 88 | 89 | } 90 | 91 | con_files <- unname(unlist(name.vct[which.max(lengths(name.vct))])) 92 | sub_tab$Connected <- FALSE 93 | sub_tab[FileName %in% con_files]$Connected <- TRUE 94 | 95 | return(sub_tab$Connected) 96 | } 97 | 98 | 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /R/comp_functions.R: -------------------------------------------------------------------------------- 1 | #' import_options 2 | #' 3 | #' @param file_path file_path 4 | #' 5 | #' 6 | #' @keywords internal 7 | import_options <- function (file_path) { 8 | 9 | if(is.null(file_path)){ 10 | stop('No options file selected') 11 | } 12 | if(tools::file_ext(file_path) != 'csv'){ 13 | stop('options file is not a valid csv file') 14 | } 15 | options_table = data.table::fread(file_path, na.strings = c("")) 16 | 17 | if (!data.table::is.data.table(options_table)){ 18 | stop('Options is not type DataTable') 19 | } 20 | return(options_table) 21 | } 22 | 23 | 24 | #' rename_columns_from_options 25 | #' 26 | #' Renames the columns of dt by replacing all names defined in vector old_columns by vector new_columns 27 | #' 28 | #' @param dt dt 29 | #' @param options_table options_table 30 | #' @param old_column old_column 31 | #' @param new_columns new_columns 32 | #' 33 | #' 34 | #' @keywords internal 35 | rename_columns_from_options <- function(dt, options_dt, old_columns, new_columns) { 36 | rename_table = stats::na.omit(options_dt[, c(old_columns, new_columns), with=FALSE], old_columns) 37 | return(data.table::setnames(dt, rename_table[[old_columns]], rename_table[[new_columns]])) 38 | } 39 | 40 | 41 | #' remove_identical_peaks 42 | #' 43 | #' @param dt dt 44 | #' @param incl_height height 45 | #' 46 | #' @return dpl 47 | #' 48 | #' @keywords internal 49 | remove_identical_peaks <- function(dt, grouped = FALSE){ 50 | peaks_before <- nrow(dt) 51 | if (grouped == FALSE){ 52 | dt <- dt[!duplicated(dt, by=c('peak_area', 'peak_height', 'mz', 'mz_start', 'mz_end', 'rt', 'rt_start', 'rt_end'))] 53 | } else { 54 | dt <- dt[!duplicated(dt, by=c('peak_area', 'mz', 'rt'))] 55 | } 56 | peaks_removed <- peaks_before-nrow(dt) 57 | message(paste0('Removed ', peaks_removed,' identical peaks from non-targeted output')) 58 | return(dt) 59 | } 60 | 61 | -------------------------------------------------------------------------------- /R/comp_funktionen_fuer_alignment.R: -------------------------------------------------------------------------------- 1 | #' @title get_main_UT_groups 2 | #' 3 | #' @param DT DT 4 | #' 5 | #' @description Find most occuring feature ID within matched peaks of a benchmark feature 6 | #' 7 | #' 8 | #' 9 | #' @keywords internal 10 | 11 | get_main_UT_groups <- function(DT){ 12 | 13 | DT[] <- lapply(DT, function(x) as.character(x)) 14 | 15 | if(nrow(DT) < 2){return(NA)} 16 | 17 | checked_files <- c() 18 | checked_UTgrps <- c() 19 | checked_isoab <- c() 20 | used_UTgrps <- c() 21 | 22 | 23 | keep <- apply(DT[,!c("sample_id_b")], 1, function(x){any(!x %in% c("Lost_b.PP", "Lost_b.A", NA, ""))}) #find samples without any UT_groupes 24 | DT <- DT[keep,] #remove samples without any UN_groupes 25 | 26 | #maybe check if some ut_groupes apper in more then one isoab (mz error) 27 | 28 | repeat { 29 | 30 | bestIso <- suppressWarnings(names(which.max(apply(DT[,-1], 2, function(x) {max(table(x[!x %in% c("Lost_b.PP", "Lost_b.A", NA, "", used_UTgrps) ]))})))) 31 | 32 | best_UTgrp <- names(which.max(table(stats::na.omit(DT[!DT[[bestIso]] %in% c("Lost_b.PP", "Lost_b.A", NA, "", used_UTgrps), ..bestIso])))) 33 | 34 | used_UTgrps <- c(used_UTgrps, best_UTgrp) 35 | 36 | if(nrow(DT[DT[[bestIso]] == best_UTgrp]) < 2) {return(list(seq_along(checked_isoab), checked_isoab, checked_UTgrps))} 37 | 38 | if(length(setdiff(unique(DT[DT[[bestIso]] == best_UTgrp]$sample_id_b), checked_files)) >= 1) { 39 | checked_files <- unique(c(checked_files, DT[DT[[bestIso]] == best_UTgrp]$sample_id_b)) 40 | checked_isoab <- c(checked_isoab, bestIso) 41 | checked_UTgrps <- c(checked_UTgrps, best_UTgrp) 42 | } 43 | 44 | if(length(checked_files) == nrow(DT)) return(list(seq_along(checked_isoab),checked_isoab, checked_UTgrps)) 45 | 46 | } 47 | 48 | } 49 | 50 | 51 | #' @title count_alignment_errors 52 | #' 53 | #' @description count alignment errors occuring within a compound 54 | #' @param DT DT 55 | #' @param main_UTgroups main_UTgroups 56 | #' @param method method 57 | #' 58 | #' 59 | #' @keywords internal 60 | 61 | count_alignment_errors <- function(DT, main_UTgroups, method = "both"){ 62 | 63 | DT[] <- lapply(DT, function(x) as.character(x)) 64 | 65 | 66 | if(method == "self-critical" | method == "both"){ 67 | 68 | lba <- data.table::as.data.table(table(unlist(DT))) 69 | if(nrow(lba[V1 == "Lost_b.A"]) == 1) { 70 | lba_e <- as.integer(lba[V1 == "Lost_b.A"]$N) 71 | } else {lba_e <- 0L} 72 | 73 | if(length(DT) < 3 | is.na(main_UTgroups[[1]][1])){return(c(errors = 0L, Lost_b.A = lba_e, diff_BM = 0L))} 74 | 75 | #going through isotopologues which are necessary to cover all samples! 76 | 77 | error_list <- lapply(main_UTgroups[[1]], function(x) { 78 | entrustedGrp <- main_UTgroups[[2]][x] #isotopologue for this round 79 | DTsub <- DT[DT[[entrustedGrp]] == main_UTgroups[[3]][x]] #samples for which this isotopologues can be used 80 | if(x>1){ 81 | for(i in seq(x-1)){ 82 | already_testedGrp <- main_UTgroups[[2]][i] 83 | DTsub <- DTsub[DTsub[[already_testedGrp]] != main_UTgroups[[3]][i]] 84 | } 85 | } 86 | 87 | DT <- DT[DT[[entrustedGrp]] == main_UTgroups[[3]][x]] 88 | isos_to_test <- list(seq(length(DTsub[, !c("sample_id_b", ..entrustedGrp)])), #all isotopologues which should now be checked via the one used in this round 89 | colnames(DTsub[, !c("sample_id_b", ..entrustedGrp)])) 90 | 91 | 92 | errors <- lapply(isos_to_test[[1]], function(y){ 93 | 94 | iso_to_test <- isos_to_test[[2]][y] #iso checked in this round 95 | 96 | yDTsub <- unname(unlist(DTsub[, ..iso_to_test])) 97 | yDTall <- unname(unlist(DT[, ..iso_to_test])) 98 | best_UTgrp <- names(which.max(table(yDTall[!yDTall %in% c("Lost_b.PP", "Lost_b.A", NA, "")]))) 99 | 100 | if(!is.null(best_UTgrp)){ 101 | 102 | alignment_splits_vector.all <- !yDTall %in% best_UTgrp 103 | alignment_splits_vector.sub <- !yDTsub %in% best_UTgrp 104 | 105 | if(length(as.character(alignment_splits_vector.all)[as.character(alignment_splits_vector.all) == "FALSE"]) > 0 & 106 | length(as.character(alignment_splits_vector.all)[as.character(alignment_splits_vector.all) == "TRUE"]) > 0){ 107 | 108 | problematic_joins <- yDTsub[alignment_splits_vector.sub][!yDTsub[alignment_splits_vector.sub] %in% c("Lost_b.A", "Lost_b.PP", NA, "")] 109 | return(length(problematic_joins)) 110 | } 111 | } 112 | }) 113 | 114 | errors <- unlist(errors) 115 | }) 116 | 117 | if(method == "self-critical"){ 118 | return(c(errors = as.integer(sum(unlist(error_list))), Lost_b.A = as.integer(sum(lba_e)), diff_BM = as.integer(NA))) 119 | } 120 | } 121 | 122 | 123 | 124 | if(method == "trustfull" | method == "both"){ 125 | 126 | diff_BM_list <- apply(DT[, !c("sample_id_b")], 2, function(x) { 127 | 128 | if(length(x[!x %in% c("Lost_b.PP", "Lost_b.A", NA)]) > 0) { 129 | best_UTgrp <- names(which.max(table(x[!x %in% c("Lost_b.PP", "Lost_b.A", NA, "")]))) 130 | } else best_UTgrp <- NULL 131 | 132 | diff_BM_c <- length(x[!x %in% c(best_UTgrp, "Lost_b.PP", NA, "", "Lost_b.A")]) 133 | return(diff_BM_c) 134 | 135 | 136 | } ) 137 | 138 | if(method == "trustfull"){ 139 | return(c(errors = as.integer(NA), Lost_b.A = as.integer(sum(lba_e)), diff_BM = as.integer(sum(unlist(diff_BM_list))))) 140 | } 141 | 142 | return(c(errors = as.integer(sum(unlist(error_list))), Lost_b.A = as.integer(sum(lba_e)), diff_BM = as.integer(sum(unlist(diff_BM_list))))) 143 | } 144 | } 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /R/comp_import_Metaboanalyst.R: -------------------------------------------------------------------------------- 1 | #' import_ungrouped_Metaboanalyst 2 | #' 3 | #' @param file file 4 | #' @param options_dt options_dt 5 | #' 6 | #' 7 | #' @keywords internal 8 | import_ungrouped_Metaboanalyst <- function(file, options_dt){ 9 | 10 | 11 | 12 | message('Starting Metaboanalyst unaligned import') 13 | 14 | if(is.null(file)){ 15 | stop('No ungrouped file selected') 16 | } 17 | 18 | #Check if filetype is csv 19 | if(tools::file_ext(file) != 'csv'){ 20 | stop('ungrouped dataset is not a valid csv file') 21 | } 22 | 23 | 24 | if(length(file) != 1){ 25 | stop('There should only be 1 file for the unaligned Metaboanalyst output!') 26 | } 27 | 28 | #Import csv file 29 | ug_table <- data.table::fread(file) 30 | 31 | 32 | #Check if all columns defined in optionsframe are present 33 | ug_req_cols <- stats::na.omit(options_dt$ug_columns) 34 | if(!all(ug_req_cols %in% colnames(ug_table))){ 35 | cols_not_found <- setdiff(ug_req_cols, colnames(ug_table)) 36 | stop('Columns defined in options but not present in unaligned Metaboanalyst output: ', paste0(cols_not_found, sep = " - ")) 37 | } 38 | 39 | 40 | #rename all columns for internal use according to options frame 41 | ug_table <- rename_columns_from_options(ug_table, options_dt, 'ug_columns', 'internal_columns') 42 | 43 | #correct ug_sample_IDs 44 | options_dt[, ug_samples := ug_samples + max(ug_table$sample_name, na.rm = TRUE)] 45 | 46 | #Add a sample_id column based on the sample_names in options_dt 47 | ug_table <- ug_table[options_dt, ':=' (sample_id = i.sample_id), on=c(sample_name = 'ug_samples')] 48 | 49 | #Remove peaks where height and area are below 0 50 | ug_table <- ug_table[peak_area > 0 & peak_height > 0] 51 | 52 | #Generate comp_id for each peak 53 | ug_table$comp_id <- seq.int(nrow(ug_table)) 54 | 55 | #Add "_ug" as suffix to each column name 56 | colnames(ug_table) <- paste(colnames(ug_table), 'ug', sep = '_') 57 | 58 | message(paste0('Successful Metaboanalyst unaligned import. No. of peaks imported: ', nrow(ug_table))) 59 | 60 | return(ug_table) 61 | } 62 | 63 | 64 | #' import_grouped_Metaboanalyst 65 | #' 66 | #' @param file file 67 | #' @param options_dt options_dt 68 | #' 69 | #' 70 | #' @keywords internal 71 | import_grouped_Metaboanalyst <- function (file, options_dt) { 72 | 73 | message('Starting Metaboanalyst aligned import') 74 | 75 | if(is.null(file)){ 76 | return(NULL) 77 | stop('No grouped file selected') 78 | } 79 | 80 | #Check if filetype is csv 81 | if(tools::file_ext(file) != 'csv'){ 82 | stop('ungrouped dataset is not a valid csv file') 83 | } 84 | 85 | 86 | 87 | 88 | #Import csv file 89 | g_table <- data.table::fread(file) 90 | 91 | if(!"Sample" %in% colnames(g_table)){ 92 | stop('Column "Sample" is missing in aligned output.') 93 | } 94 | 95 | g_table <- g_table[Sample != "Label"] 96 | g_table <- g_table[, c("mz", "rt") := data.table::tstrsplit(Sample, "@", fixed=TRUE)] 97 | data.table::setcolorder(g_table, c("mz", "rt", "Sample", colnames(g_table)[! colnames(g_table) %in% c("mz", "rt", "Sample")])) 98 | 99 | #Check if all columns defined in optionsframe are present 100 | g_req_cols <- stats::na.omit(options_dt$g_columns) 101 | colnames(g_table) <- tools::file_path_sans_ext(colnames(g_table)) 102 | if(!all(g_req_cols %in% colnames(g_table))){ 103 | cols_not_found <- setdiff(g_req_cols, colnames(g_table)) 104 | stop('Columns defined in options but not present in aligned Metaboanalyst output: ', paste0(cols_not_found, sep = " - ")) 105 | } 106 | 107 | #Compare order of present samples in options_dt to grouped output and update options_dt 108 | nt_samples <- colnames(g_table) 109 | #nt_ids <- match(na.omit(options_dt$g_samples), nt_samples) 110 | options_dt[!is.na(g_samples), ug_samples := match(stats::na.omit(options_dt$g_samples), nt_samples) - length(g_table)] #columns not containing samples have to be substracted in ug_import 111 | 112 | #Add feature_id for each row 113 | g_table$feature_id <- seq.int(nrow(g_table)) 114 | 115 | 116 | #Transforming table from wide to long format, creating 1 peak-per-row format 117 | id_vars <- append(stats::na.omit(options_dt[['g_columns']]), 'feature_id') 118 | measure_vars = stats::na.omit(options_dt[!is.na(ug_samples), g_samples]) 119 | g_table <- data.table::melt(g_table, id.vars = id_vars, measure.vars = measure_vars, variable.name = 'sample_name', value.name = 'peak_area') 120 | 121 | #rename all columns for internal use according to options frame 122 | g_table <- rename_columns_from_options(g_table, options_dt, 'g_columns', 'internal_columns') 123 | 124 | #Add a sample_id column based on the sample_names in options_dt 125 | g_table <- g_table[options_dt, ':=' (sample_id = i.sample_id), on=c(sample_name = 'g_samples')] 126 | 127 | #Remove peaks where area is below 0 128 | g_table[, peak_area := as.numeric(peak_area)] 129 | g_table[, rt := as.numeric(rt)] 130 | g_table[, mz := as.numeric(mz)] 131 | g_table <- g_table[peak_area > 0] 132 | 133 | #Remove identical peaks 134 | #g_table <- remove_identical_peaks(g_table, grouped = TRUE) 135 | 136 | #Add comp_id for each peak 137 | g_table$comp_id <- seq.int(nrow(g_table)) 138 | 139 | #Add "_g" as suffix to each column name 140 | colnames(g_table) <- paste(colnames(g_table), 'g', sep = '_') 141 | 142 | message(paste0('Successful Metaboanalyst aligned import. No. of peaks imported: ', nrow(g_table))) 143 | 144 | return(list(g_table = g_table, options_dt = options_dt)) 145 | } 146 | -------------------------------------------------------------------------------- /R/comp_import_benchmark.R: -------------------------------------------------------------------------------- 1 | #' check_benchmark_input 2 | #' 3 | #' Checks the benchmark dataset and brings it into a format readable by \code{\link{compare_peaks}}. All molecules for which the most abundant isotopolgue is not present, 4 | #' or less than 2 isotopologues are present are deleted. Moreover, isotopologues which appear in only one file are deleted. 5 | #' 6 | #' @param file output of \code{\link{find_bench_peaks}}. Can be path to csv file or a data table object (meaning that is.data.table(file) returns TRUE). 7 | #' @param options_path can be a string "generate" in order to use default column names for chosen algo. In the future we might include a possibility to allow the user to choose column names. 8 | #' @param from_csv TRUE or FALSE depending on file being a data.table object or a path to a csv 9 | #' @param algo tool output format to compare the benchmark against. can be XCMS, XCMS3, Metaboanalyst, SLAW, El-Maven, OpenMS, MS-DIAL, CompoundDiscoverer, MZmine 2, or MZmine 3 Outputs from different tools can also be used as long as they are reformatted to one of those types. 10 | #' 11 | #' 12 | #' 13 | #' @return returns a list including the benchmark in a format readable by \code{\link{compare_peaks}}. 14 | #' @export 15 | #' 16 | check_benchmark_input <- function (file, options_path = "generate", from_csv = TRUE, algo) { 17 | 18 | if(from_csv){ 19 | if(is.null(file)){ 20 | stop('No benchmark file selected') 21 | } 22 | #Make sure file points to a csv file 23 | if(tools::file_ext(file) != 'csv'){ 24 | stop('benchmark is not a valid csv file') 25 | } 26 | 27 | #Import csv file 28 | b_table <- data.table::fread(file) 29 | } else { 30 | if (!data.table::is.data.table(file)){ 31 | stop('Generated benchmark is not a datatable') 32 | } else { 33 | b_table <- data.table::copy(file) 34 | } 35 | } 36 | 37 | if(options_path == 'generate'){ 38 | options_table <- generate_options(b_table, algo) 39 | } else { 40 | options_table <- import_options(options_path) 41 | } 42 | 43 | #Make sure options_table is valid 44 | if (!data.table::is.data.table(options_table)){ 45 | stop('Options is not type DataTable') 46 | } 47 | 48 | #Check if all columns defined in optionsframe are present 49 | b_req_cols <- stats::na.omit(options_table$b_columns) 50 | if(!all(b_req_cols %in% colnames(b_table))){ 51 | cols_not_found <- setdiff(b_req_cols, colnames(b_table)) 52 | stop('Columns defined in options but not present in raw benchmark dataset: ', paste0(cols_not_found, sep = " - ")) 53 | } 54 | 55 | #rename all columns for internal use according to options frame 56 | b_table <- rename_columns_from_options(b_table, options_table, 'b_columns', 'internal_columns') 57 | 58 | #Remove peaks where height and area are below 0 59 | b_table <- b_table[peak_area > 0 & peak_height > 0] 60 | 61 | #Add a sample_id and grp_id column based on the sample_names in options_table 62 | b_table <- b_table[options_table, ':=' (sample_id = i.sample_id), on=c(sample_name = 'b_samples')] 63 | 64 | 65 | #Check for duplicate peaks, should not be present so warning, removing them if there 66 | if (any(duplicated(b_table, by=c('peak_area')))){ 67 | b_table <- b_table[!duplicated(b_table, by=c('peak_area', 'rt'))] 68 | warning('Duplicate peaks removed from raw benchmark file') 69 | } 70 | 71 | #Generate feature ID to quickly detect features later 72 | b_table <- b_table[, feature_id := .GRP, by = c('molecule', 'adduct', 'isoab')] 73 | 74 | #Remove Features if only observed in one sample (if more than one sample is present in benchmark) 75 | if(length(unique(b_table$sample_name)) > 1){ 76 | b_table[, ft_count := .N, by = .(feature_id)] 77 | b_table <- b_table[ft_count > 1, !c("ft_count")] 78 | } 79 | 80 | #Remove molecules for which only one isotopologue is present 81 | b_table[, iso_count2 := .N, by =.(molecule, adduct, sample_name)] 82 | b_table <- b_table[iso_count2 > 1, !c("iso_count2")] 83 | 84 | #Remove molecules if most abundant isotopologue is not present 85 | b_table[, maIso := any(isoab == "100"), by =.(molecule, adduct, sample_name)] 86 | b_table <- b_table[maIso == TRUE, !c("maIso")] 87 | 88 | #Generate id for each peak 89 | b_table$comp_id <- seq.int(nrow(b_table)) 90 | 91 | #Add "_b" as suffix to each column name 92 | colnames(b_table) <- paste(colnames(b_table), 'b', sep = '_') 93 | 94 | return(list('b_table' = b_table, 'options_table' = options_table)) 95 | } 96 | -------------------------------------------------------------------------------- /R/comp_import_compd.R: -------------------------------------------------------------------------------- 1 | #' import_ungrouped_cd 2 | #' 3 | #' @param file_path file_path 4 | #' @param options_table options_table 5 | #' 6 | #' 7 | #' @keywords internal 8 | import_ungrouped_cd <- function(file_path, options_table){ 9 | 10 | message('Start import unaligned compound discoverer') 11 | 12 | if(is.null(file_path)){ 13 | stop('No ungrouped file selected') 14 | } 15 | #Check if filetype is csv 16 | if(tools::file_ext(file_path) != 'csv'){#impo 17 | stop('ungrouped dataset is not a valid csv file') 18 | } 19 | 20 | #Import csv file 21 | ug_table <- data.table::fread(file_path) 22 | 23 | #Make sure options_table is valid 24 | if (!data.table::is.data.table(options_table)){ 25 | stop('Options is not type DataTable') 26 | } 27 | 28 | #Check if all columns defined in optionsframe are present 29 | ug_req_cols <- stats::na.omit(options_table$ug_columns) 30 | if(!all(ug_req_cols %in% colnames(ug_table))){ 31 | cols_not_found <- setdiff(ug_req_cols, colnames(ug_table)) 32 | stop('Columns defined in options but not present in Compound Discoverer ungrouped dataset: ', paste0(cols_not_found, sep = " - ")) 33 | } 34 | 35 | #rename all columns for internal use according to options frame 36 | ug_table <- rename_columns_from_options(ug_table, options_table, 'ug_columns', 'internal_columns') 37 | 38 | #Add a sample_id column based on the sample_names in options_dt 39 | ug_table <- ug_table[options_table, ':=' (sample_id = i.sample_id), on=c(sample_name = 'ug_samples')] 40 | 41 | #Remove peaks where height and area are below 0 42 | ug_table <- ug_table[peak_area > 0 & peak_height > 0] 43 | 44 | #Generate comp_id for each peak 45 | ug_table$comp_id <- seq.int(nrow(ug_table)) 46 | 47 | #Multiply rt by 60 to convert min to seconds ##MAKE OPTIONAL LATER 48 | ug_table[, ':=' (rt = rt*60, rt_start = rt_start*60, rt_end = rt_end*60)] 49 | 50 | #Add "_ug" as suffix to each column name 51 | colnames(ug_table) <- paste(colnames(ug_table), 'ug', sep = '_') 52 | 53 | message(paste0('Successful import of unaligned Compound Discoverer output. No. of peaks imported: ', nrow(ug_table))) 54 | 55 | return(ug_table) 56 | } 57 | 58 | 59 | -------------------------------------------------------------------------------- /R/comp_import_elmaven.R: -------------------------------------------------------------------------------- 1 | #' import_ungrouped_elmaven 2 | #' 3 | #' @param file file 4 | #' @param options_dt options_dt 5 | #' 6 | #' 7 | #' @keywords internal 8 | import_ungrouped_elmaven <- function(file, options_dt){ 9 | 10 | message('start elmaven unaligned import') 11 | 12 | if(is.null(file)){ 13 | stop('No ungrouped file selected') 14 | } 15 | 16 | #Check if filetype is csv 17 | if(tools::file_ext(file) != 'csv'){ 18 | stop('ungrouped dataset is not a valid csv file') 19 | } 20 | 21 | 22 | if(length(file) != 1){ 23 | stop('There should only be 1 file for the unaligned El-MAVEN output!') 24 | } 25 | #Import csv file 26 | ug_table <- data.table::fread(file) 27 | 28 | 29 | 30 | 31 | #Check if all columns defined in optionsframe are present 32 | ug_req_cols <- stats::na.omit(options_dt$ug_columns) 33 | if(!all(ug_req_cols %in% colnames(ug_table))){ 34 | cols_not_found <- setdiff(ug_req_cols, colnames(ug_table)) 35 | stop('Columns defined in options but not present in unaligned El-MAVEN output: ', paste0(cols_not_found, sep = " - ")) 36 | } 37 | 38 | #rename all columns for internal use according to options frame 39 | ug_table <- rename_columns_from_options(ug_table, options_dt, 'ug_columns', 'internal_columns') 40 | 41 | #Add a sample_id column based on the sample_names in options_dt 42 | ug_table <- ug_table[options_dt, ':=' (sample_id = i.sample_id), on=c(sample_name = 'ug_samples')] 43 | 44 | #Remove peaks where height and area are below 0 45 | ug_table <- ug_table[peak_area > 0 & peak_height > 0] 46 | 47 | #Generate comp_id for each peak 48 | ug_table$comp_id <- seq.int(nrow(ug_table)) 49 | 50 | #set units to seconds 51 | ug_table[, rt := as.numeric(rt) * 60] 52 | ug_table[, rt_start := as.numeric(rt_start) * 60] 53 | ug_table[, rt_end := as.numeric(rt_end) * 60] 54 | 55 | #Add "_ug" as suffix to each column name 56 | colnames(ug_table) <- paste(colnames(ug_table), 'ug', sep = '_') 57 | 58 | message(paste0('Successful El-MAVEN unaligned import. No. of peaks imported: ', nrow(ug_table))) 59 | 60 | 61 | 62 | return(ug_table) 63 | } 64 | 65 | 66 | #' import_grouped_elmaven 67 | #' 68 | #' @param file file 69 | #' @param options_dt options_dt 70 | #' 71 | #' 72 | #' @keywords internal 73 | import_grouped_elmaven <- function (file, options_dt) { 74 | 75 | message('Starting El-MAVEN aligned import') 76 | 77 | if(is.null(file)){ 78 | return(NULL) 79 | stop('No grouped file selected') 80 | } 81 | 82 | #Check if filetype is csv 83 | if(tools::file_ext(file) != 'csv'){ 84 | stop('ungrouped dataset is not a valid csv (/Rda) file') 85 | } 86 | #Import csv file 87 | g_table <- data.table::fread(file) 88 | 89 | 90 | #Check if all columns defined in optionsframe are present 91 | g_req_cols <- stats::na.omit(options_dt$g_columns) 92 | if(!all(g_req_cols %in% colnames(g_table))){ 93 | cols_not_found <- setdiff(g_req_cols, colnames(g_table)) 94 | stop('Columns defined in options but not present in aligned El-MAVEN output: ', paste0(cols_not_found, sep = " - ")) 95 | } 96 | 97 | #Add feature_id for each row 98 | g_table$feature_id <- seq.int(nrow(g_table)) 99 | 100 | 101 | #Transforming table from wide to long format, creating 1 peak-per-row format 102 | id_vars <- append(stats::na.omit(options_dt[['g_columns']]), 'feature_id') 103 | measure_vars = stats::na.omit(options_dt[, g_samples]) 104 | g_table <- data.table::melt(g_table, id.vars = id_vars, measure.vars = measure_vars, variable.name = 'sample_name', value.name = 'peak_area') 105 | 106 | 107 | #rename all columns for internal use according to options frame 108 | g_table <- rename_columns_from_options(g_table, options_dt, 'g_columns', 'internal_columns') 109 | 110 | g_table[, rt := as.numeric(rt) * 60] 111 | #Add a sample_id column based on the sample_names in options_dt 112 | g_table <- g_table[options_dt, ':=' (sample_id = i.sample_id), on=c(sample_name = 'g_samples')] 113 | 114 | #Remove peaks where area is below 0 115 | g_table <- g_table[peak_area > 0] 116 | 117 | #Remove identical peaks 118 | #g_table <- remove_identical_peaks(g_table, grouped = TRUE) 119 | 120 | #Add comp_id for each peak 121 | g_table$comp_id <- seq.int(nrow(g_table)) 122 | 123 | #Add "_g" as suffix to each column name 124 | colnames(g_table) <- paste(colnames(g_table), 'g', sep = '_') 125 | 126 | message(paste0('Successful El-MAVEN aligned import. No. of peaks imported: ', nrow(g_table))) 127 | 128 | return(g_table) 129 | } 130 | -------------------------------------------------------------------------------- /R/comp_import_msdial.R: -------------------------------------------------------------------------------- 1 | #' import_ungrouped_msdial 2 | #' 3 | #' @param file_list file_list 4 | #' @param options_dt options_dt 5 | #' 6 | #' 7 | #' @keywords internal 8 | import_ungrouped_msdial <- function(file_list, options_dt){ 9 | 10 | message('Starting MS-DIAL unaligned import') 11 | 12 | if(is.null(file_list)){ 13 | stop('No ungrouped files selected') 14 | } 15 | 16 | if(length(file_list) < 2){ 17 | stop('There should be multiple files for the unaligned MS-DIAL output!') 18 | } 19 | 20 | #Add files to ut dt if name is in options ug_samples 21 | for (i in 1:length(file_list)){ 22 | file_path <- file_list[i] 23 | file_name <- tools::file_path_sans_ext(basename(file_path)) 24 | 25 | #Check if ug_table exists, if not: create 26 | if(!exists("ug_table")){ 27 | ug_table <- data.table::fread(file_path, integer64 = "numeric") 28 | if(!("Area" %in% names(ug_table))) {stop(paste("There are MS-DIAL specific columns missing in " , file_path))} 29 | ug_table <- ug_table[, sample_name := file_name] 30 | } else if (exists("ug_table")){ 31 | temp_data <- data.table::fread(file_path, integer64 = "numeric") 32 | temp_data <- temp_data[, sample_name := file_name] 33 | ug_table <- rbind(ug_table, temp_data) 34 | } 35 | } 36 | 37 | 38 | #Check if all columns defined in optionsframe are present 39 | ug_req_cols <- stats::na.omit(options_dt$ug_columns) 40 | if(!all(ug_req_cols %in% colnames(ug_table))){ 41 | cols_not_found <- setdiff(ug_req_cols, colnames(ug_table)) 42 | stop('Columns defined in options but not present in raw benchmark dataset: ', paste0(cols_not_found, sep = " - ")) 43 | } 44 | 45 | #rename all columns for internal use according to option frame 46 | ug_table <- rename_columns_from_options(ug_table, options_dt, 'ug_columns', 'internal_columns') 47 | 48 | #Add a sample_id based on the sample_names in options_dt 49 | ug_table <- ug_table[options_dt, ':=' (sample_id = i.sample_id), on=c(sample_name = 'ug_samples')] 50 | 51 | #Remove peaks where height and area are below 0 52 | ug_table <- ug_table[peak_area > 0 & peak_height > 0] 53 | 54 | #make sure area is not bit64 55 | ug_table <- ug_table[, 'peak_area' := as.double(peak_area)] 56 | 57 | #Multiply rt by 60 to convert min to seconds ##MAKE OPTIONAL LATER 58 | ug_table[, ':=' (rt = rt*60, rt_start = rt_start*60, rt_end = rt_end*60)] 59 | 60 | #Generate comp_id for each peak 61 | ug_table$comp_id <- seq.int(nrow(ug_table)) 62 | 63 | # Add Rounding Column for later merge 64 | ug_table <- ug_table[, 'peak_area_rounded' := round(peak_area, 0)] 65 | 66 | #Add "_ug" as suffix to each column name 67 | colnames(ug_table) <- paste(colnames(ug_table), 'ug', sep = '_') 68 | 69 | message(paste0('Successful MS-DIAL unaligned import. No. of peaks imported: ', nrow(ug_table))) 70 | 71 | return(ug_table) 72 | } 73 | 74 | #' import_grouped_msdial 75 | #' 76 | #' @param file_path file_path 77 | #' @param options_dt options_dt 78 | #' 79 | #' 80 | #' @keywords internal 81 | import_grouped_msdial <- function(file_path, options_dt){ 82 | 83 | message('Starting MS-DIAL aligned import') 84 | 85 | if(is.null(file_path)){ 86 | return(NULL) 87 | stop('No grouped file selected') 88 | } 89 | 90 | #Check if filetype is text 91 | if(tools::file_ext(file_path) != 'txt' & tools::file_ext(file_path) != 'msdial'){ 92 | stop('grouped dataset is not a valid .txt or .msdial file') 93 | } 94 | 95 | if(length(file) != 1){ 96 | stop('There should only be only one file for the aligned MS-DIAL output!') 97 | } 98 | 99 | #Import text file 100 | #Make skip variable 101 | 102 | g_table <- data.table::fread(file_path, skip=4, integer64 = "double", verbose = FALSE) 103 | 104 | 105 | #Check if all columns defined in optionsframe are present 106 | g_req_cols <- stats::na.omit(options_dt$g_columns) 107 | if(!all(g_req_cols %in% colnames(g_table))){ 108 | cols_not_found <- setdiff(g_req_cols, colnames(g_table)) 109 | stop('Columns defined in options but not present in raw benchmark dataset: ', paste0(cols_not_found, sep = " - ")) 110 | } 111 | 112 | #Add feature id for each row 113 | g_table$feature_id <- seq.int(nrow(g_table)) 114 | 115 | #Transforming table from wide to long format, creating 1 peak-per-row format 116 | id_vars <- append(stats::na.omit(options_dt[['g_columns']]), 'feature_id') 117 | measure_vars = stats::na.omit(options_dt[, g_samples]) 118 | g_table <- data.table::melt(g_table, id.vars = id_vars, measure.vars = measure_vars, variable.name = 'sample_name', value.name = 'peak_area') 119 | 120 | #rename all columns for internal use according to options frame 121 | g_table <- rename_columns_from_options(g_table, options_dt, 'g_columns', 'internal_columns') 122 | 123 | 124 | #make sure area is not bit64 125 | g_table <- g_table[, 'peak_area' := as.double(peak_area)] 126 | 127 | 128 | #Add a sample_id column based on the sample_names in options_dt 129 | g_table <- g_table[options_dt, ':=' (sample_id = i.sample_id), on=c(sample_name = 'g_samples')] 130 | 131 | #Remove peaks where area is below or equal 0 132 | g_table <- g_table[peak_area > 0] 133 | 134 | #Multiply rt by 60 to convert min to seconds MAKE OPTIONAL LATER 135 | g_table[, rt := rt*60] 136 | 137 | #Add comp_id for each peak 138 | g_table$comp_id <- seq.int(nrow(g_table)) 139 | 140 | #Add "_g" as suffix to each column name 141 | colnames(g_table) <- paste(colnames(g_table), 'g', sep = '_') 142 | 143 | message(paste0('Successful MS-DIAL aligned import. No. of peaks imported: ', nrow(g_table))) 144 | 145 | return(g_table) 146 | } 147 | -------------------------------------------------------------------------------- /R/comp_import_slaw.R: -------------------------------------------------------------------------------- 1 | #' import_ungrouped_slaw 2 | #' 3 | #' @param file_list file_list 4 | #' @param options_dt options_dt 5 | #' 6 | #' 7 | #' @keywords internal 8 | import_ungrouped_slaw <- function(file_list, options_dt){ 9 | 10 | 11 | 12 | message('Starting SLAW unaligned import') 13 | 14 | if(is.null(file_list)){ 15 | stop('No ungrouped files selected') 16 | } 17 | 18 | if(length(file_list) < 2){ 19 | stop('There should be multiple files for the unaligned SLAW output!') 20 | } 21 | 22 | ug_req_cols <- stats::na.omit(options_dt$ug_columns) 23 | 24 | 25 | for (i in 1:length(file_list)){ 26 | file_path <- file_list[i] 27 | file_name <- tools::file_path_sans_ext(basename(file_path)) 28 | 29 | #Check if ug_table exists, if not: create 30 | if(!exists("ug_table")){ 31 | ug_table <- data.table::fread(file_path) 32 | if(!all(ug_req_cols %in% colnames(ug_table))) {stop(paste("There are SLAW specific columns missing in " , file_path))} 33 | ug_table <- ug_table[, sample_name := file_name] 34 | } else if (exists("ug_table")){ 35 | temp_data <- data.table::fread(file_path) 36 | temp_data <- temp_data[, sample_name := file_name] 37 | ug_table <- rbind(ug_table, temp_data) 38 | } 39 | } 40 | 41 | 42 | 43 | 44 | #Check if all columns defined in optionsframe are present 45 | ug_req_cols <- stats::na.omit(options_dt$ug_columns) 46 | if(!all(ug_req_cols %in% colnames(ug_table))){ 47 | cols_not_found <- setdiff(ug_req_cols, colnames(ug_table)) 48 | stop('Columns defined in options but not present in unaligned SLAW output: ', paste0(cols_not_found, sep = " - ")) 49 | } 50 | 51 | 52 | #rename all columns for internal use according to options frame 53 | ug_table <- rename_columns_from_options(ug_table, options_dt, 'ug_columns', 'internal_columns') 54 | 55 | #Add a sample_id based on the sample_names in options_dt 56 | ug_table <- ug_table[options_dt, ':=' (sample_id = i.sample_id), on=c(sample_name = 'ug_samples')] 57 | 58 | #Remove peaks where height and area are below 0 59 | ug_table <- ug_table[peak_area > 0 & peak_height > 0] 60 | 61 | #Multiply rt by 60 to convert min to seconds ##MAKE OPTIONAL LATER 62 | ug_table[, ':=' (rt = rt*60, rt_start = rt_start*60, rt_end = rt_end*60)] 63 | 64 | #Generate comp_id for each peak 65 | ug_table$comp_id <- seq.int(nrow(ug_table)) 66 | 67 | #Add "_ug" as suffix to each column name 68 | colnames(ug_table) <- paste(colnames(ug_table), 'ug', sep = '_') 69 | 70 | message(paste0('Successful SLAW unaligned import. No. of peaks imported: ', nrow(ug_table))) 71 | 72 | return(ug_table) 73 | } 74 | 75 | 76 | #' import_grouped_slaw 77 | #' 78 | #' @param file file 79 | #' @param options_dt options_dt 80 | #' 81 | #' 82 | #' @keywords internal 83 | import_grouped_slaw <- function (file, options_dt) { 84 | 85 | message('Starting SLAW aligned import') 86 | 87 | if(is.null(file)){ 88 | return(NULL) 89 | stop('No grouped file selected') 90 | } 91 | 92 | #Check if filetype is csv 93 | if(tools::file_ext(file) != 'csv'){ 94 | stop('grouped dataset is not a valid csv file') 95 | } 96 | 97 | #Import csv file 98 | g_table <- data.table::fread(file) 99 | 100 | 101 | 102 | #Rename raw files by removing _intensity tag and file extension 103 | raw_files <- colnames(g_table)[grepl(".csv", colnames(g_table))] 104 | raw_files_stspl <- strsplit(raw_files, "_") 105 | raw_files <- 106 | lapply(raw_files_stspl, function(x){ 107 | 108 | paste0(x[2:length(x)], collapse = "_") 109 | 110 | }) 111 | colnames(g_table)[grepl(".csv", colnames(g_table))] <- tools::file_path_sans_ext(unlist(raw_files)) 112 | 113 | 114 | #Check if all columns defined in optionsframe are present 115 | g_req_cols <- stats::na.omit(options_dt$g_columns) 116 | if(!all(g_req_cols %in% colnames(g_table))){ 117 | cols_not_found <- setdiff(g_req_cols, colnames(g_table)) 118 | stop('Columns defined in options but not present in aligned SLAW output: ', paste0(cols_not_found, sep = " - ")) 119 | } 120 | 121 | 122 | #Add feature_id for each row 123 | g_table$feature_id <- seq.int(nrow(g_table)) 124 | 125 | 126 | #Transforming table from wide to long format, creating 1 peak-per-row format 127 | id_vars <- append(stats::na.omit(options_dt[['g_columns']]), 'feature_id') 128 | measure_vars = stats::na.omit(options_dt[!is.na(ug_samples), g_samples]) 129 | g_table <- data.table::melt(g_table, id.vars = id_vars, measure.vars = measure_vars, variable.name = 'sample_name', value.name = 'peak_area') 130 | 131 | #rename all columns for internal use according to options frame 132 | g_table <- rename_columns_from_options(g_table, options_dt, 'g_columns', 'internal_columns') 133 | 134 | #Add a sample_id column based on the sample_names in options_dt 135 | g_table <- g_table[options_dt, ':=' (sample_id = i.sample_id), on=c(sample_name = 'g_samples')] 136 | 137 | #Remove peaks where area is below 0 138 | g_table <- g_table[peak_area > 0] 139 | 140 | #Multiply rt by 60 to convert min to seconds MAKE OPTIONAL LATER 141 | g_table[, rt := rt*60] 142 | 143 | #Remove identical peaks 144 | #g_table <- remove_identical_peaks(g_table, grouped = TRUE) 145 | 146 | #Add comp_id for each peak 147 | g_table$comp_id <- seq.int(nrow(g_table)) 148 | 149 | #Add "_g" as suffix to each column name 150 | colnames(g_table) <- paste(colnames(g_table), 'g', sep = '_') 151 | 152 | message(paste0('Successful SLAW aligned import. No. of peaks imported: ', nrow(g_table))) 153 | 154 | return(g_table) 155 | } 156 | -------------------------------------------------------------------------------- /R/comp_import_xcms.R: -------------------------------------------------------------------------------- 1 | #' import_ungrouped_xcms 2 | #' 3 | #' @param file file 4 | #' @param options_dt options_dt 5 | #' 6 | #' 7 | #' @keywords internal 8 | import_ungrouped_xcms <- function(file, options_dt){ 9 | 10 | 11 | 12 | message('Starting xcms unaligned import') 13 | 14 | if(is.null(file)){ 15 | stop('No ungrouped file selected') 16 | } 17 | 18 | #Check if filetype is csv 19 | if(tools::file_ext(file) != 'csv' & tools::file_ext(file) != "Rda"){ 20 | stop('ungrouped dataset is not a valid csv (/Rda) file') 21 | } 22 | 23 | 24 | if(length(file) != 1){ 25 | stop('There should only be 1 file for the unaligned XCMS output!') 26 | } 27 | 28 | if(tools::file_ext(file) == "Rda"){ 29 | rda_file_v <- load(file = file, envir = environment()) 30 | rda_file <- get(rda_file_v[1]) 31 | ug_table <- data.table::as.data.table(xcms::peaks(rda_file)) 32 | } else { 33 | #Import csv file 34 | ug_table <- data.table::fread(file) 35 | } 36 | 37 | 38 | #Check if all columns defined in optionsframe are present 39 | ug_req_cols <- stats::na.omit(options_dt$ug_columns) 40 | if(!all(ug_req_cols %in% colnames(ug_table))){ 41 | cols_not_found <- setdiff(ug_req_cols, colnames(ug_table)) 42 | stop('Columns defined in options but not present in unaligned XCMS output: ', paste0(cols_not_found, sep = " - ")) 43 | } 44 | 45 | 46 | #rename all columns for internal use according to options frame 47 | ug_table <- rename_columns_from_options(ug_table, options_dt, 'ug_columns', 'internal_columns') 48 | 49 | #correct ug_sample_IDs 50 | options_dt[, ug_samples := ug_samples + max(ug_table$sample_name, na.rm = TRUE)] 51 | 52 | #Add a sample_id column based on the sample_names in options_dt 53 | ug_table <- ug_table[options_dt, ':=' (sample_id = i.sample_id), on=c(sample_name = 'ug_samples')] 54 | 55 | #Remove peaks where height and area are below 0 56 | ug_table <- ug_table[peak_area > 0 & peak_height > 0] 57 | 58 | #Generate comp_id for each peak 59 | ug_table$comp_id <- seq.int(nrow(ug_table)) 60 | 61 | #Add "_ug" as suffix to each column name 62 | colnames(ug_table) <- paste(colnames(ug_table), 'ug', sep = '_') 63 | 64 | message(paste0('Successful xcms unaligned import. No. of peaks imported: ', nrow(ug_table))) 65 | 66 | return(ug_table) 67 | } 68 | 69 | 70 | #' import_grouped_xcms 71 | #' 72 | #' @param file file 73 | #' @param options_dt options_dt 74 | #' 75 | #' 76 | #' @keywords internal 77 | import_grouped_xcms <- function (file, options_dt) { 78 | 79 | message('Starting xcms aligned import') 80 | 81 | if(is.null(file)){ 82 | return(NULL) 83 | stop('No grouped file selected') 84 | } 85 | 86 | #Check if filetype is csv 87 | if(tools::file_ext(file) != 'csv' & tools::file_ext(file) != "Rda"){ 88 | stop('grouped dataset is not a valid csv (/Rda) file') 89 | } 90 | if(tools::file_ext(file) == "Rda"){ 91 | rda_file_v <- load(file = file, envir = environment()) 92 | rda_file <- get(rda_file_v[1]) 93 | g_table <- data.table::as.data.table(xcms::peakTable(rda_file)) 94 | } else { 95 | #Import csv file 96 | g_table <- data.table::fread(file) 97 | } 98 | 99 | 100 | #Check if all columns defined in optionsframe are present 101 | g_req_cols <- stats::na.omit(options_dt$g_columns) 102 | colnames(g_table) <- tools::file_path_sans_ext(colnames(g_table)) 103 | if(!all(g_req_cols %in% colnames(g_table))){ 104 | cols_not_found <- setdiff(g_req_cols, colnames(g_table)) 105 | stop('Columns defined in options but not present in aligned XCMS output: ', paste0(cols_not_found, sep = " - ")) 106 | } 107 | 108 | #Compare order of present samples in options_dt to grouped output and update options_dt 109 | nt_samples <- colnames(g_table) 110 | #nt_ids <- match(na.omit(options_dt$g_samples), nt_samples) 111 | options_dt[!is.na(g_samples), ug_samples := match(stats::na.omit(options_dt$g_samples), nt_samples) - length(g_table)] #columns not containing samples have to be substracted in ug_import 112 | 113 | #Add feature_id for each row 114 | g_table$feature_id <- seq.int(nrow(g_table)) 115 | 116 | 117 | #Transforming table from wide to long format, creating 1 peak-per-row format 118 | id_vars <- append(stats::na.omit(options_dt[['g_columns']]), 'feature_id') 119 | measure_vars = stats::na.omit(options_dt[!is.na(ug_samples), g_samples]) 120 | g_table <- data.table::melt(g_table, id.vars = id_vars, measure.vars = measure_vars, variable.name = 'sample_name', value.name = 'peak_area') 121 | 122 | #rename all columns for internal use according to options frame 123 | g_table <- rename_columns_from_options(g_table, options_dt, 'g_columns', 'internal_columns') 124 | 125 | #Add a sample_id column based on the sample_names in options_dt 126 | g_table <- g_table[options_dt, ':=' (sample_id = i.sample_id), on=c(sample_name = 'g_samples')] 127 | 128 | #Remove peaks where area is below 0 129 | g_table <- g_table[peak_area > 0] 130 | 131 | #Remove identical peaks 132 | #g_table <- remove_identical_peaks(g_table, grouped = TRUE) 133 | 134 | #Add comp_id for each peak 135 | g_table$comp_id <- seq.int(nrow(g_table)) 136 | 137 | #Add "_g" as suffix to each column name 138 | colnames(g_table) <- paste(colnames(g_table), 'g', sep = '_') 139 | 140 | message(paste0('Successful xcms aligned import. No. of peaks imported: ', nrow(g_table))) 141 | 142 | return(list(g_table = g_table, options_dt = options_dt)) 143 | } 144 | -------------------------------------------------------------------------------- /R/comp_pick_algo.R: -------------------------------------------------------------------------------- 1 | #' check_nonTargeted_input 2 | #' 3 | #' @param ug_table_path path to unaligned table(s) 4 | #' @param g_table_path path to aligned table 5 | #' @param options_table output from \code{\link{check_benchmark_input}} 6 | #' @param algo tool output format of ug_table and g_table. can be XCMS, XCMS3, Metaboanalyst, SLAW, El-Maven, OpenMS, MS-DIAL, CompoundDiscoverer, MZmine 2. or MZmine 3. Outputs from different tools can also be used as long as they are reformatted to one of those types. 7 | #' 8 | #' @return returns unaligned and aligned outputs from non-targeted tool in a format readable via \code{\link{compare_peaks}} 9 | #' @export 10 | #' 11 | #' 12 | check_nonTargeted_input <- function(ug_table_path, g_table_path, options_table = 'generate', algo){ 13 | 14 | if(is.list(options_table) == TRUE && data.table::is.data.table(options_table) == FALSE){ 15 | 16 | options_table <- options_table$options_table 17 | 18 | } 19 | 20 | switch(algo, 21 | 'XCMS' = { 22 | g_table <- import_grouped_xcms(g_table_path, options_table) 23 | options_table <- g_table[["options_dt"]] 24 | g_table <- g_table[["g_table"]] 25 | ug_table <- import_ungrouped_xcms(ug_table_path, options_table) 26 | }, 27 | 'XCMS3' = { 28 | g_table <- import_grouped_xcms(g_table_path, options_table) 29 | options_table <- g_table[["options_dt"]] 30 | g_table <- g_table[["g_table"]] 31 | ug_table <- import_ungrouped_xcms(ug_table_path, options_table) 32 | }, 33 | 'Metaboanalyst' = { 34 | g_table <- import_grouped_Metaboanalyst(g_table_path, options_table) 35 | options_table <- g_table[["options_dt"]] 36 | g_table <- g_table[["g_table"]] 37 | ug_table <- import_ungrouped_Metaboanalyst(ug_table_path, options_table) 38 | }, 39 | 'SLAW' = { 40 | ug_table <- import_ungrouped_slaw(ug_table_path, options_table) 41 | g_table <- import_grouped_slaw(g_table_path, options_table) 42 | }, 43 | 'El-MAVEN' = { 44 | ug_table <- import_ungrouped_elmaven(ug_table_path, options_table) 45 | g_table <- import_grouped_elmaven(g_table_path, options_table) 46 | }, 47 | 'OpenMS' = { 48 | ug_table <- import_ungrouped_openms(ug_table_path, options_table) 49 | g_table <- import_grouped_openms(g_table_path, options_table) 50 | }, 51 | 'MS-DIAL' = { 52 | ug_table <- import_ungrouped_msdial(ug_table_path, options_table) 53 | g_table <- import_grouped_msdial(g_table_path, options_table) 54 | }, 55 | 'CompoundDiscoverer' = { 56 | ug_table <- import_ungrouped_cd(ug_table_path, options_table) 57 | g_table = NULL 58 | }, 59 | 'MZmine 2' = { 60 | ug_table <-import_ungrouped_mzmine(ug_table_path, options_table) 61 | g_table <- import_grouped_mzmine(g_table_path, options_table) 62 | }, 63 | 'MZmine 3' = { 64 | ug_table <-import_ungrouped_mzmine(ug_table_path, options_table) 65 | g_table <- import_grouped_mzmine(g_table_path, options_table) 66 | }, 67 | {return (NULL)}) 68 | return(list('ug_table' = ug_table, 'g_table' = g_table)) 69 | } 70 | -------------------------------------------------------------------------------- /R/comp_pick_main_peak.R: -------------------------------------------------------------------------------- 1 | #' pick_main_peak 2 | #' 3 | #' @param dt dt 4 | #' 5 | #' 6 | #' @keywords internal 7 | pick_main_peak <- function(dt){ 8 | 9 | main_peak_dt <- dt[, pick_main_peak_sd(.SD), by=c('molecule_b', 'adduct_b', 'sample_id_b'), .SDcols=c('molecule_b', 'adduct_b', 'sample_id_b', 'comp_id_b', 'comp_id_ug', 'isoab_b', 'rt_start_ug', 'rt_end_ug', 'peak_area_ug')] 10 | dt <- merge(dt, main_peak_dt[,c('comp_id_b', 'comp_id_ug', 'main_peak')], by=c('comp_id_b', 'comp_id_ug'), all.x=TRUE) 11 | dt[is.na(main_peak) & !is.na(peak_area_ug), main_peak := FALSE] 12 | return(dt) 13 | 14 | } 15 | 16 | 17 | #' pick_main_peak_sd 18 | #' 19 | #' @param dt dt 20 | #' 21 | #' 22 | #' @keywords internal 23 | pick_main_peak_sd <- function(dt){ 24 | dt <- data.table::copy(dt) 25 | all_iso_abbs <- sort(unique(dt[,isoab_b]), decreasing = TRUE) 26 | 27 | if(nrow(dt) == length(unique(all_iso_abbs))){ 28 | #If number of rows is equal to isoabs mark all as main peak 29 | dt[, 'main_peak' := TRUE] 30 | return(dt[main_peak == TRUE, c('comp_id_b', 'comp_id_ug', 'main_peak')]) 31 | } else if(length(unique(all_iso_abbs)) == 1){ 32 | #If only one isoab is present pick peak with shortest length 33 | dt[, 'peak_length' := rt_end_ug - rt_start_ug] 34 | dt[, 'main_peak' := ifelse(peak_length == min(peak_length), TRUE, FALSE)] 35 | if(nrow(dt[main_peak == TRUE]) > 1){ 36 | dt[peak_area_ug < max(peak_area_ug), main_peak := FALSE] 37 | } 38 | return(dt[main_peak == TRUE, c('comp_id_b', 'comp_id_ug', 'main_peak')]) 39 | } else { 40 | #Iso_abb Comparison 41 | dt[, 'merge_key' := 1] 42 | dt[, 'peak_length' := rt_end_ug - rt_start_ug] 43 | 44 | #Build comparison DT 45 | comp_dt <- merge(dt, dt, by=c('merge_key'), allow.cartesian = TRUE) 46 | comp_dt <- comp_dt[isoab_b.x > isoab_b.y] 47 | 48 | #Calculate %difference from expected ratio 49 | comp_dt <- comp_dt[,'ratio_diff' := abs((peak_area_ug.y/peak_area_ug.x)-(isoab_b.y/isoab_b.x))] 50 | 51 | #Calc best best peak per comparison (=comp) 52 | comp_dt <- comp_dt[, 'min_ratio_diff' := ifelse(ratio_diff == min(ratio_diff), TRUE, FALSE), by=c('isoab_b.x', 'isoab_b.y')] 53 | 54 | x_dt <- data.table::setnames(comp_dt[min_ratio_diff == TRUE, c('comp_id_b.x', 'comp_id_ug.x', 'ratio_diff')], c('comp_id_b.x', 'comp_id_ug.x'), c('comp_id_b', 'comp_id_ug')) 55 | y_dt <- data.table::setnames(comp_dt[min_ratio_diff == TRUE, c('comp_id_b.y', 'comp_id_ug.y', 'ratio_diff')], c('comp_id_b.y', 'comp_id_ug.y'), c('comp_id_b', 'comp_id_ug')) 56 | main_peaks_dt <- data.table::rbindlist(list(x_dt, y_dt), use.names = TRUE) 57 | main_peaks_dt <- main_peaks_dt[!duplicated(main_peaks_dt, by=c('comp_id_b', 'comp_id_ug'))] 58 | main_peaks_dt <- main_peaks_dt[, 'main_peak' := ifelse(ratio_diff == min(ratio_diff), TRUE, FALSE), by=c('comp_id_b')] 59 | if(nrow(main_peaks_dt[main_peak == TRUE]) != length(unique(dt$comp_id_b))){ 60 | #If several possible peaks have the same ratio pick the smallest peak, if still multiple present pick first one 61 | main_peaks_dt <- merge(main_peaks_dt, dt[, c('comp_id_ug', 'peak_length')], by=c('comp_id_ug'), all.x=TRUE) 62 | main_peaks_dt <- main_peaks_dt[, 'main_peak' := ifelse(peak_length == min(peak_length), TRUE, FALSE), by=c('comp_id_b')] 63 | main_peaks_dt <- main_peaks_dt[!duplicated(main_peaks_dt, by=c('comp_id_b', 'main_peak'))] 64 | } 65 | return(main_peaks_dt[main_peak == TRUE, c('comp_id_b', 'comp_id_ug', 'main_peak')]) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /R/cutout_peak.R: -------------------------------------------------------------------------------- 1 | #' cutout_peaks 2 | #' 3 | #' @description detects peaks 4 | #' 5 | #' @param int int 6 | #' @param rt rt 7 | #' @param Min.PpP Min.PpP 8 | #' @param peak.spotting.factor. peak.spotting.factor. 9 | #' @param Integration_baseL_factor. Integration_baseL_factor. 10 | #' @param l l 11 | #' @param r r 12 | #' @param M0.grp M0.grp 13 | #' @param main_adduct.grp main_adduct.grp 14 | #' @param Min.Res. Min.Res. 15 | #' 16 | #' @keywords internal 17 | #' 18 | cutout_peaks <- 19 | function(int, 20 | rt, 21 | Min.PpP = 10, 22 | peak.spotting.factor. = 0.01, 23 | Integration_baseL_factor. = 0.1, 24 | l = 1, 25 | r = length(int), 26 | M0.grp = NA, 27 | main_adduct.grp = NA, 28 | Min.Res. = 70) { 29 | 30 | 31 | ################################## 32 | #limit peak-detection to specific rt-region 33 | ################################## 34 | vl <- length(int) 35 | int <- int[l:r] 36 | rt <- rt[l:r] 37 | if (sum(int) == 0) { 38 | return(NULL) 39 | } 40 | 41 | 42 | ################################## 43 | #pickup potential peaks by counting consectuive points above the base line 44 | ################################## 45 | pot.peak.ranges <- get_pot_peak_ranges2(int, 46 | Min.PpP = Min.PpP, 47 | peak.spotting.factor = peak.spotting.factor.) 48 | if (is.null(pot.peak.ranges)) { 49 | return(NULL) 50 | } 51 | l.peaks <- 52 | pot.peak.ranges[, c("idx", "s", "e")][, c("unres.s", "unres.e", "peak.grp") := .(rep(FALSE, nrow(pot.peak.ranges)), 53 | rep(FALSE, nrow(pot.peak.ranges)), 54 | idx)][] 55 | 56 | 57 | ################################## 58 | #check each potential peak on whether multiple peaks can be resolved from it 59 | ################################## 60 | # double.peak.borders <- mapply( 61 | # detect_double_peaks2, 62 | # l = pot.peak.ranges$s, 63 | # r = pot.peak.ranges$e, 64 | # MoreArgs = list( 65 | # pot.doubleP.v = as.numeric(int), 66 | # Min.PpP = Min.PpP, 67 | # Min.Res = Min.Res. 68 | # ), 69 | # SIMPLIFY = FALSE 70 | # ) 71 | # double.peak.borders <- 72 | # rbindlist(double.peak.borders, use.names = TRUE) 73 | # if (nrow(double.peak.borders) > 0) { 74 | # ################################## 75 | # #insert double peak borders into potential peak ranges 76 | # ################################## 77 | # l.peaks <- double.peak.borders[pot.peak.ranges, 78 | # assemble_peaks(idx, s, e, x.breakP), 79 | # on = .(breakP > s, breakP < e), 80 | # by = .EACHI][,-(1:2)][, c("unres.s", "unres.e", "idx") := .(!is.na(match(s, double.peak.borders$breakP)), 81 | # !is.na(match(e, double.peak.borders$breakP)), 82 | # seq(1:length(s)))][] 83 | # } 84 | # 85 | 86 | ################################## 87 | #add different variables per peak 88 | ################################## 89 | l.peaks$idx <- seq_len(nrow(l.peaks)) 90 | l.peaks <- 91 | l.peaks[l.peaks[, .( 92 | res.s = as.double(ifelse(unres.s == TRUE, 100 * int[s] / max(int[s:e]), NA)), 93 | res.e = as.double(ifelse(unres.e == TRUE, 100 * int[e] / max(int[s:e]), NA)), 94 | rtmin = rt[s], 95 | rtmax = rt[e], 96 | baseL = min(int[s:e]) + (max(int[s:e]) - min(int[s:e])) * Integration_baseL_factor. 97 | ), by = .(idx)], 98 | on = .(idx)] 99 | l.peaks$s <- l + l.peaks$s - 1 100 | l.peaks$e <- l + l.peaks$e - 1 101 | 102 | 103 | ################################## 104 | #add indicators for lower abundant isotopologues and screened adducts 105 | ################################## 106 | if (!is.na(M0.grp)) { 107 | suppressWarnings(l.peaks[, M0.grp := rep(M0.grp, nrow(l.peaks))][]) 108 | } else 109 | l.peaks[, M0.grp := idx][] 110 | if (!is.na(main_adduct.grp)) { 111 | suppressWarnings(l.peaks[, main_adduct.grp := rep(main_adduct.grp, nrow(l.peaks))][]) 112 | } else 113 | l.peaks[, main_adduct.grp := idx][] 114 | 115 | ifelse(nrow(l.peaks) > 0, return(l.peaks[, !c("s", "e", "main_adduct.grp", "baseL", "res.s", "res.e", "unres.s", "unres.e", "peak.grp")]),return(NULL)) 116 | } 117 | 118 | 119 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /R/get_EIC_table.R: -------------------------------------------------------------------------------- 1 | #' get_EIMatches_BM_NPPpeaks 2 | #' 3 | #' @param rt rt 4 | #' @param int int 5 | #' @param Min.PpP Min.PpP 6 | #' 7 | #' 8 | #' 9 | #' @keywords internal 10 | get_EIMatches_BM_NPPpeaks <- function(rt, int, Min.PpP) { 11 | EIC.dt <- data.table::data.table( 12 | rt = rt[!duplicated(rt)], 13 | int = int[!duplicated(rt)], 14 | val = as.vector(S4Vectors::Rle(int[!duplicated(rt)] > 0)), 15 | len = unlist(lapply(S4Vectors::Rle(int[!duplicated(rt)] > 0)@lengths, function(x) { 16 | rep(x, x) 17 | })), 18 | int_wo_spikes = int[!duplicated(rt)] 19 | ) 20 | 21 | # EIC.dt[val == TRUE & 22 | # len <= max(Min.PpP / 3, 2)]$int_wo_spikes <- 0 23 | # 24 | # EIC.dt$val <- as.vector(S4Vectors::Rle(EIC.dt$int_wo_spikes > 0)) 25 | # EIC.dt$len <- 26 | # unlist(lapply(S4Vectors::Rle(EIC.dt$int_wo_spikes > 0)@lengths, function(x) { 27 | # rep(x, x) 28 | # })) 29 | 30 | EIC.dt <- EIC.dt[val == TRUE & len <= max(Min.PpP / 3, 2), int_wo_spikes := NA] 31 | 32 | if(sum(EIC.dt$int_wo_spikes, na.rm = TRUE) == 0 | length(EIC.dt[int_wo_spikes > 0]$int_wo_spikes) < 5) { 33 | EIC.dt <- EIC.dt[, int_smooth := int_wo_spikes][] 34 | return(EIC.dt) 35 | } 36 | 37 | EIC.dt_tmp <- 38 | EIC.dt[!is.na(int_wo_spikes)][, "rt"][, int_smooth := sapply(signal::sgolayfilt( 39 | EIC.dt[!is.na(int_wo_spikes)]$int_wo_spikes, 40 | p = 41 | 3, 42 | n = max(5, ifelse( 43 | DescTools::IsOdd(round(0.05 * nrow(EIC.dt[int > 0.1 * max(int)]))) == TRUE, 44 | round(0.05 * nrow(EIC.dt[int > 45 | 0.1 * max(int)])), 46 | round(0.05 * nrow(EIC.dt[int > 47 | 0.1 * max(int)])) + 1 48 | )) 49 | ), 50 | function(ele) {if (ele < 0) ele = 0 else ele = ele})] 51 | 52 | 53 | EIC.dt <- EIC.dt_tmp[EIC.dt, on = .(rt)] 54 | EIC.dt[, sc.i := seq(nrow(EIC.dt))][] 55 | 56 | return(EIC.dt) 57 | 58 | } 59 | -------------------------------------------------------------------------------- /R/get_pot_peak_ranges.R: -------------------------------------------------------------------------------- 1 | #' get_pot_peak_ranges2 2 | #' 3 | #' @param int int 4 | #' @param Min.PpP Min.PpP 5 | #' @param peak.spotting.factor peak.spotting.factor 6 | #' 7 | #' 8 | #' @keywords internal 9 | get_pot_peak_ranges2 <- function(int, Min.PpP = 10, peak.spotting.factor = 0.01){ 10 | 11 | 12 | 13 | ################################## 14 | #generate table with more than Min.PpP consecutive points above baseline 15 | ################################## 16 | baseL <- min(int) + (max(int) - min(int)) * peak.spotting.factor 17 | rle.obj <- S4Vectors::Rle(int > baseL) 18 | rle.dt <- data.table::data.table( 19 | v = rle.obj@values, 20 | l = rle.obj@lengths, 21 | s = S4Vectors::start(rle.obj), 22 | e = S4Vectors::end(rle.obj) 23 | ) 24 | consec.points <- rle.dt[v==TRUE & l >= Min.PpP] 25 | 26 | 27 | 28 | ################################## 29 | #extend potential peak ranges by one scan into each both directs (if they are not already reaching until the first/last scan) 30 | ################################## 31 | if(nrow(consec.points) == 0) { return(NULL) } 32 | consec.points[s>1]$s <- as.integer(consec.points[s>1]$s - 1) 33 | consec.points[e 0.03 * max(consec.points$height)] 37 | 38 | 39 | return(consec.points[,!c("v", "l", "height")]) 40 | } 41 | 42 | 43 | -------------------------------------------------------------------------------- /R/globals.R: -------------------------------------------------------------------------------- 1 | 2 | utils::globalVariables(c("adduct_b", "isoab_b", "idx", "int_wo_spikes", "rt", "StartTime", 3 | "EndTime", "int", "isoab", "FileName", "adduct", "main_adduct", 4 | "peaks.PpP", "molecule", "i", "charge", "root", "..currentCols", 5 | "values", ".N", ".SD", "parents", "ids", "resolution_list", "adducts", 6 | "js", "Lost_b.A", "BM.div", "Molecule", "Adduct", ".GRP", "maIso", 7 | "IDX", "rt_add_temp", "rt_end_b", "rt_b", "rt_start_b", "sample_id_ug", 8 | "rt_start_ug", "rt_end_ug", "rt_ug", "mz_ug", "sample_id_b", 9 | "peak_core_rt_range_start_b", "peak_core_rt_range_end_b", "mz_start_b", "mz_end_b", "rt_end_ug_temp", 10 | "mz_start_b_temp", "mz_ug_temp", "mz_end_b_temp", "rt_ug_temp", 11 | "main_peak", "peak_area_g", "sample_id_g", "peak_area_ug", "N_fid", 12 | "molecule_b", "feature_id_g", "id_b_ug", "comp_id_b", "comp_id_ug", 13 | "main_feature", "dpl", "area_b", "sample_name_b", "..join_vct", 14 | "peak_area_b", "area_g", "Connected", "peak_height_b", "i.peak_area_b", 15 | "i.peak_area_ug", "i.area_g", "diffH20PP_pp", "benchmark", "diffH20PP_ft", 16 | "NPP_features", "missing_peaks_g", "Min.errors", "V1", "..iso_to_test", 17 | "peak_status", "unres.s", "s", "e", "unres.e", "feature_id_b", 18 | "min_mz_start", "max_mz_end", "min_rt_start", "max_rt_end", "mz_g", 19 | "rt_g", "sample_name", "eic_mzmin", "eic_mzmax", "MinMz", "MaxMz", 20 | "isoab_ol", "peaks.idx", "peaks.M0.grp", "Iso_count", "sample_group", 21 | "Grp", "iso_id", "mz_ex", "feature_id", "ft_count", "iso_count2", 22 | "rt_diff", "peaks.rt_raw", "user.rt", "min_rt_diff", "rt_raw_span", 23 | "r_s_error", "sample_id", "b_samples", "RT.v", "Intensities.v", "rt_measure", 24 | "val", "len", "user.rtmin", "user.rtmax", "fileIdx", "roi_id", 25 | "scmin", "scmax", "mz", "mzmin", "mzmax", "..bestIso", "adduct_c", 26 | "SumForm2_c", "value", "abundance", "formula", "m/z", "v", "l", 27 | "height", "V.1", "vars", "id", "g_samples", "peak_area", "rt_start", 28 | "rt_end", "peak_length", "isoab_b.x", "isoab_b.y", "ratio_diff", 29 | "RT.v_b", "Intensities.v_b", "peaks.StartTime", "peaks.EndTime", 30 | "start", "end", "start.ut", "end.ut", "peak_found", "nr", "..vct", 31 | "colors", "i.peaks.area", "peaks.area", "i.peaks.height", "peaks.height", 32 | "ExpectedArea", "ErrorRel_A", "ErrorAbs_A", "ExpectedHeight", 33 | "ErrorRel_H", "ErrorAbs_H", "peaks.mz_accurate", "Precursor Name", 34 | "min_ratio_diff", "StartTime.EIC", "EndTime.EIC", "Split_peak", 35 | "present_in_found", "NPP_status", "peak_height", "i.sample_id", 36 | "false_negative", "feature_id_b_temp", "false_negative_type", 37 | "rt_diff_min", "keep_rt", "mz_diff_min", "peaks.mz_accuracy_ppm", 38 | "keep_mz", "sample_id_b_temp", "sample_id_ug_temp", "peak_core_rt_range_start_b_temp", 39 | "rt_start_ug_temp", "peak_core_rt_range_end_b_temp", "peak_area_ug_temp", 40 | "peak_area_rounded_ug", "peak_area_g_temp", "sample_id_g_temp", 41 | "i.peaks.rt_neighbors_b", "peaks.rt_neighbors_b", "i.peaks.mz_neighbors_b", 42 | "peaks.mz_neighbors_b", "missing_peaks_ug", "valley_diff", "min_mz_start_temp", 43 | "max_mz_end_temp", "min_rt_start_temp", "max_rt_end_temp", "M0_int", 44 | "rtmin", "rtmax", "peaks.cor_w_main_add", "peaks.cor_w_M0", "area_ratio", 45 | "mean_area_g.y", "mean_area_g.x", "first_found_area_temp", "int_smooth", 46 | "sc.i", "mzlowerBD", "mzupperBD", "filename", "average_area", 47 | "merge_key", "no_of_samples", "peak_area_ug.y", "peak_area_ug.x", 48 | "f_nf_col", "..var", "var_r", "N", "n", "diffH20PP", 49 | "grp_col", "data_type", "Pred_error", "RT_neighbors", "mz_neighbors", 50 | "missing_peaks", "plot_group", "ord", "ErrorRel_A_b", "order_temp", 51 | ".I", "area_diff", "..entrustedGrp", "Peptide", "NPP_peak", "Peptide Modified Sequence", ".", 52 | ":=", "NPP_peak", "NPP_peak picking", "focus", "ug_samples", "i.sample_name_b", "mz_b", 53 | "rt_start_b_temp", "rt_end_b_temp", "match_tables_with_alignment_recovery_info", "Sample", "raw_files", "raw_files_stspl", 54 | "Extra_feature_matches_ft", "dupl")) 55 | -------------------------------------------------------------------------------- /R/match_NPPpeaks_to_NPPfeatures.R: -------------------------------------------------------------------------------- 1 | #' match_NPPpeaks_to_NPPfeatures 2 | #' 3 | #' @param Matches_BM_NPPpeaks Matches_BM_NPPpeaks 4 | #' @param SplittedMatches_BM_NPPpeaks SplittedMatches_BM_NPPpeaks 5 | #' @param g_table g_table 6 | #' 7 | #' @keywords internal 8 | 9 | match_NPPpeaks_to_NPPfeatures <- function(Matches_BM_NPPpeaks, 10 | SplittedMatches_BM_NPPpeaks, 11 | g_table){ 12 | 13 | 14 | match_tables_with_alignment_recovery_info <- list() 15 | length(match_tables_with_alignment_recovery_info) <- 2 16 | names(match_tables_with_alignment_recovery_info) <- c("Matches_BM_NPPpeaks", "SplittedMatches_BM_NPPpeaks") 17 | g_table[, peak_area_g := as.numeric(peak_area_g)] 18 | 19 | #Creating temp columns to prevent over-writing by join 20 | #If statement is solution for msdial 21 | 22 | if ('peak_area_rounded_ug' %in% colnames(Matches_BM_NPPpeaks)){ 23 | Matches_BM_NPPpeaks[, peak_area_ug_temp := peak_area_rounded_ug] 24 | Matches_BM_NPPpeaks[, sample_id_b_temp := sample_id_b] 25 | SplittedMatches_BM_NPPpeaks[, peak_area_ug_temp := peak_area_rounded_ug] 26 | g_table[, peak_area_g_temp := peak_area_g] 27 | g_table[, sample_id_g_temp := sample_id_g] 28 | } else { 29 | Matches_BM_NPPpeaks[, peak_area_ug_temp := round(peak_area_ug,2)] 30 | Matches_BM_NPPpeaks[, sample_id_b_temp := sample_id_b] 31 | SplittedMatches_BM_NPPpeaks[, peak_area_ug_temp := round(peak_area_ug,2)] 32 | g_table[, peak_area_g_temp := round(peak_area_g,2)] 33 | g_table[, sample_id_g_temp := sample_id_g] 34 | } 35 | 36 | #Join 37 | #Matches_BM_NPPpeaks <- g_table[Matches_BM_NPPpeaks, on=.(peak_area_g_temp == peak_area_ug_temp, sample_id_g_temp == sample_id_b_temp), 38 | # allow.cartesian = TRUE, nomatch=NA, mult='all'] 39 | 40 | 41 | Matches_BM_NPPpeaks <- 42 | data.table::merge.data.table(Matches_BM_NPPpeaks, 43 | g_table, 44 | allow.cartesian = TRUE, 45 | #by = c("molecule_b", "adduct_b", "isoab_b", "sample_name_b"), 46 | by.x = c("peak_area_ug_temp", "sample_id_b_temp"), 47 | by.y = c("peak_area_g_temp", "sample_id_g_temp"), 48 | all.x = TRUE, 49 | all.y = FALSE) 50 | 51 | Matches_BM_NPPpeaks[, sample_id_g_temp := sample_id_b_temp] 52 | Matches_BM_NPPpeaks[, peak_area_g_temp := peak_area_ug_temp] 53 | 54 | 55 | #In case of duplicate area matches during ug - g match take the g-match which occurred most often for other peaks in the same BM feature 56 | Matches_BM_NPPpeaks[, N_fid := .N, by = .(molecule_b, adduct_b, isoab_b, feature_id_g)] 57 | Matches_BM_NPPpeaks <- Matches_BM_NPPpeaks[order(-rank(N_fid))][, !"N_fid"] 58 | Matches_BM_NPPpeaks <- unique(Matches_BM_NPPpeaks, by = c("molecule_b", "adduct_b", "isoab_b", "peak_area_g_temp", "sample_id_b")) 59 | 60 | #Replace 0 in peak_area_g with NA (no idea why they appear in the first place)(maybe int64?) 61 | 62 | SplittedMatches_BM_NPPpeaks <- g_table[SplittedMatches_BM_NPPpeaks, on=.(peak_area_g_temp == peak_area_ug_temp), 63 | allow.cartesian = TRUE, nomatch=NA, mult='all'] 64 | 65 | #Remove _temp Columns 66 | suppressWarnings(Matches_BM_NPPpeaks[,grep('_temp$', colnames(Matches_BM_NPPpeaks)):=NULL]) 67 | 68 | 69 | Matches_BM_NPPpeaks[, id_b_ug := paste(comp_id_b, comp_id_ug, sep='_')] 70 | SplittedMatches_BM_NPPpeaks[, id_b_ug := paste(comp_id_b, comp_id_ug, sep='_')] 71 | 72 | SplittedMatches_BM_NPPpeaks[, present_in_found := ifelse(id_b_ug %in% Matches_BM_NPPpeaks$id_b_ug, 'TRUE', 'FALSE')] 73 | 74 | 75 | match_tables_with_alignment_recovery_info[["Matches_BM_NPPpeaks"]] <- Matches_BM_NPPpeaks 76 | match_tables_with_alignment_recovery_info[["SplittedMatches_BM_NPPpeaks"]] <- SplittedMatches_BM_NPPpeaks 77 | 78 | 79 | #Make sure main peaks only occur once 80 | if (any(duplicated(Matches_BM_NPPpeaks[main_peak==TRUE]))){ 81 | stop('Duplicate Peaks still present after analysis') 82 | } 83 | 84 | 85 | return(match_tables_with_alignment_recovery_info) 86 | 87 | } 88 | -------------------------------------------------------------------------------- /R/match_features_to_benchmark.R: -------------------------------------------------------------------------------- 1 | 2 | #' match_features_to_benchmark 3 | #' 4 | #' @param g_table g_table 5 | #' @param b_table b_table 6 | #' @param Matches_BM_NPPpeaks Matches_BM_NPPpeaks 7 | #' @param Unmatched_BM_NPPpeaks Unmatched_BM_NPPpeaks 8 | #' 9 | #' @keywords internal 10 | 11 | match_features_to_benchmark <- function(g_table, 12 | b_table, 13 | Matches_BM_NPPpeaks, 14 | Unmatched_BM_NPPpeaks){ 15 | 16 | if(nrow(g_table) > 0){ 17 | 18 | ff_table_dt <- pick_main_feature(feature_compare(b_table, g_table, Matches_BM_NPPpeaks[, c("feature_id_b", "feature_id_g")])) 19 | 20 | dt <- ff_table_dt#[main_feature == TRUE] 21 | #dt[, dpl := duplicated(dt, by = c("feature_id_b"))] 22 | #dt <- dt[dpl != TRUE][, !"dpl"] 23 | 24 | id.cols <- c("feature_id_b", "feature_id_g", "molecule_b", "isoab_b", "adduct_b", 25 | "total_area_b", "min_mz_start", "max_mz_end", "min_rt_start", 26 | "max_rt_end", "main_feature") 27 | 28 | dt_melt_b <- data.table::melt(dt, 29 | id.vars = id.cols, 30 | measure.vars = colnames(dt)[grepl(utils::glob2rx("sample_*_b"), colnames(dt))], 31 | value.name = "area_b", 32 | variable.name = "sample_id_b", 33 | variable.factor = FALSE) 34 | 35 | dt_melt_b[, sample_id_b := as.factor(substr(dt_melt_b$sample_id_b, 8, nchar(dt_melt_b$sample_id_b) - 2))] 36 | 37 | dt_melt_g <- data.table::melt(dt, 38 | id.vars = id.cols, 39 | measure.vars = colnames(dt)[grepl(utils::glob2rx("sample_*_g"), colnames(dt))], 40 | value.name = "area_g", 41 | variable.name = "sample_id_b", 42 | variable.factor = FALSE) 43 | 44 | dt_melt_g[, sample_id_b := as.factor(substr(dt_melt_g$sample_id_b, 8, nchar(dt_melt_g$sample_id_b) - 2))] 45 | 46 | 47 | dt_n <- dt_melt_g[dt_melt_b, on = colnames(dt_melt_b)[-length(dt_melt_b)]] 48 | 49 | tmp <- unique(data.table(sample_id_b = as.factor(Matches_BM_NPPpeaks[["sample_id_b"]]), 50 | sample_name_b = Matches_BM_NPPpeaks[["sample_name_b"]])) 51 | 52 | Matches_BM_NPPpeaks_NPPfeatures <- dt_n[tmp, on = .(sample_id_b)] 53 | 54 | ug_info <- data.table::rbindlist(list(Matches_BM_NPPpeaks, Unmatched_BM_NPPpeaks), fill = TRUE, use.names = TRUE) 55 | 56 | 57 | # Matches_BM_NPPpeaks_NPPfeatures <- 58 | # Matches_BM_NPPpeaks_NPPfeatures[!is.na(area_b)][ug_info[, c("molecule_b", 59 | # "adduct_b", 60 | # "isoab_b", 61 | # "sample_name_b", 62 | # "peak_area_b", 63 | # "peak_area_ug")], 64 | # on = .(molecule_b, adduct_b, isoab_b, sample_name_b), 65 | # ] 66 | 67 | 68 | Matches_BM_NPPpeaks_NPPfeatures <- 69 | data.table::merge.data.table(Matches_BM_NPPpeaks_NPPfeatures[!is.na(area_b)], 70 | ug_info[, c("molecule_b", 71 | "adduct_b", 72 | "isoab_b", 73 | "sample_name_b", 74 | "peak_area_b", 75 | "peak_area_ug")], 76 | allow.cartesian = TRUE, 77 | by = c("molecule_b", "adduct_b", "isoab_b", "sample_name_b"), 78 | all.x = TRUE, 79 | all.y = TRUE) 80 | 81 | } else { 82 | 83 | Matches_BM_NPPpeaks_NPPfeatures <- stats::setNames(data.table(matrix(nrow = 0, ncol = 15)), c("feature_id_b", "feature_id_g", "molecule_b", "isoab_b", "adduct_b", 84 | "total_area_b", "min_mz_start", "max_mz_end", "min_rt_start", 85 | "max_rt_end", "main_feature", "sample_id_b", "area_g", "area_b", 86 | "sample_name_b")) 87 | } 88 | 89 | return(Matches_BM_NPPpeaks_NPPfeatures) 90 | 91 | } 92 | -------------------------------------------------------------------------------- /R/match_peaks_to_benchmark.R: -------------------------------------------------------------------------------- 1 | #' match_peaks_to_benchmark 2 | #' 3 | #' @param b_table b_table 4 | #' @param ug_table ug_table 5 | #' 6 | #' @keywords internal 7 | 8 | match_peaks_to_benchmark <- function(b_table, 9 | ug_table){ 10 | 11 | ############## 12 | #Generating minimum peak boundaries in benchmark 13 | #Untargeted rt range must completely envelope these boundaries 14 | #Defined as taking the shorter of rt_start_b to rt_b or rt_end_b to rt_b, 15 | #taking 50% of this distance, adding and subtracting it from rt_b 16 | ############## 17 | 18 | #Creating temp columns to prevent over-writing by join 19 | ug_table[, ':=' (sample_id_ug_temp = sample_id_ug, 20 | rt_start_ug_temp = rt_start_ug, 21 | rt_end_ug_temp = rt_end_ug, 22 | rt_ug_temp = rt_ug, 23 | mz_ug_temp = mz_ug)] 24 | 25 | 26 | b_table[, ':=' (sample_id_b_temp = sample_id_b, 27 | rt_start_b_temp = rt_start_b, 28 | rt_end_b_temp = rt_end_b, 29 | peak_core_rt_range_start_b_temp = peak_core_rt_range_start_b, 30 | peak_core_rt_range_end_b_temp = peak_core_rt_range_end_b, 31 | mz_start_b_temp = mz_start_b - 0.0002, 32 | mz_end_b_temp = mz_end_b + 0.0002)] 33 | 34 | 35 | ############## 36 | #Conducting non-equi join. 37 | #rt range must be larger on both sides than calculated peak limits, 38 | #mz must fall within mz start and end of benchmark 39 | ############## 40 | Matches_BM_NPPpeaks <- b_table[ug_table, on=.(sample_id_b_temp == sample_id_ug_temp, 41 | peak_core_rt_range_start_b_temp >= rt_start_ug_temp, 42 | peak_core_rt_range_end_b_temp <= rt_end_ug_temp, 43 | mz_start_b_temp <= mz_ug_temp, 44 | mz_end_b_temp >= mz_ug_temp, 45 | rt_start_b_temp < rt_ug_temp, 46 | rt_end_b_temp > rt_ug_temp), 47 | allow.cartesian=TRUE, nomatch=NULL, mult='all'] 48 | 49 | 50 | Matches_BM_NPPpeaks <- pick_main_peak(Matches_BM_NPPpeaks) 51 | 52 | #Matches_BM_NPPpeaks <- Matches_BM_NPPpeaks[main_peak == TRUE] 53 | suppressWarnings(b_table[,grep('_temp$', colnames(b_table)):=NULL]) 54 | suppressWarnings(ug_table[,grep('_temp$', colnames(ug_table)):=NULL]) 55 | suppressWarnings(Matches_BM_NPPpeaks[,grep('_temp$', colnames(Matches_BM_NPPpeaks)):=NULL]) 56 | 57 | return(Matches_BM_NPPpeaks) 58 | 59 | } 60 | -------------------------------------------------------------------------------- /R/match_peaks_to_benchmark_split.R: -------------------------------------------------------------------------------- 1 | #' match_peaks_to_benchmark_split 2 | #' 3 | #' @param b_table b_table 4 | #' @param ug_table ug_table 5 | #' 6 | #' @keywords internal 7 | 8 | match_peaks_to_benchmark_split <- function(b_table, 9 | ug_table){ 10 | 11 | ug_table[, ':=' (sample_id_ug_temp = sample_id_ug, 12 | rt_start_ug_temp = rt_start_ug, 13 | rt_end_ug_temp = rt_end_ug, 14 | rt_ug_temp = rt_ug, 15 | mz_ug_temp = mz_ug)] 16 | 17 | 18 | b_table[, ':=' (sample_id_b_temp = sample_id_b, 19 | rt_start_b_temp = rt_start_b, 20 | rt_end_b_temp = rt_end_b, 21 | peak_core_rt_range_start_b_temp = peak_core_rt_range_start_b, 22 | peak_core_rt_range_end_b_temp = peak_core_rt_range_end_b, 23 | mz_start_b_temp = mz_start_b - 0.0002, 24 | mz_end_b_temp = mz_end_b + 0.0002)] 25 | 26 | #Find Peaks to the left of benchmark boundaries 27 | split_left_table <- b_table[ug_table, on=.(sample_id_b_temp == sample_id_ug_temp, 28 | peak_core_rt_range_start_b_temp >= rt_start_ug_temp, 29 | peak_core_rt_range_start_b_temp <= rt_end_ug_temp, 30 | peak_core_rt_range_end_b_temp >= rt_end_ug_temp, 31 | mz_start_b_temp <= mz_ug_temp, 32 | mz_end_b_temp >= mz_ug_temp), 33 | allow.cartesian=TRUE, nomatch=NULL, mult='all'] 34 | 35 | 36 | #Find Peaks to the right of benchmark boundaries 37 | split_right_table <- b_table[ug_table, on=.(sample_id_b_temp == sample_id_ug_temp, 38 | peak_core_rt_range_start_b_temp <= rt_start_ug_temp, 39 | peak_core_rt_range_end_b_temp >= rt_start_ug_temp, 40 | peak_core_rt_range_end_b_temp <= rt_end_ug_temp, 41 | mz_start_b_temp <= mz_ug_temp, 42 | mz_end_b_temp >= mz_ug_temp), 43 | allow.cartesian=TRUE, nomatch=NULL, mult='all'] 44 | 45 | #Find Peaks inside of benchmark boundaries 46 | split_middle_table <- b_table[ug_table, on=.(sample_id_b_temp == sample_id_ug_temp, 47 | peak_core_rt_range_start_b_temp <= rt_start_ug_temp, 48 | peak_core_rt_range_end_b_temp >= rt_end_ug_temp, 49 | mz_start_b_temp <= mz_ug_temp, 50 | mz_end_b_temp >= mz_ug_temp), 51 | allow.cartesian=TRUE, nomatch=NULL, mult='all'] 52 | 53 | #Combine the split peak tables 54 | SplittedMatches_BM_NPPpeaks <- data.table::rbindlist(list('split_left_table' = split_left_table, 'split_right_table' = split_right_table, 'split_middle_table' = split_middle_table), fill=TRUE, use.names = TRUE, idcol='file') 55 | 56 | suppressWarnings(b_table[,grep('_temp$', colnames(b_table)):=NULL]) 57 | suppressWarnings(ug_table[,grep('_temp$', colnames(ug_table)):=NULL]) 58 | suppressWarnings(SplittedMatches_BM_NPPpeaks[,grep('_temp$', colnames(SplittedMatches_BM_NPPpeaks)):=NULL]) 59 | 60 | return(SplittedMatches_BM_NPPpeaks) 61 | 62 | } 63 | -------------------------------------------------------------------------------- /R/metrics_per_molecule.R: -------------------------------------------------------------------------------- 1 | #' metrics_per_molecule 2 | #' 3 | #' @param Matches_BM_NPPpeaks Matches_BM_NPPpeaks 4 | #' @param Unmatched_BM_NPPpeaks Unmatched_BM_NPPpeaks 5 | #' @param Matches_BM_NPPpeaks_NPPfeatures Matches_BM_NPPpeaks_NPPfeatures 6 | #' @param IT_ratio_biases IT_ratio_biases 7 | #' @param SplittedMatches_BM_NPPpeaks SplittedMatches_BM_NPPpeaks 8 | #' @param MissingPeak_classification MissingPeak_classification 9 | #' @param AlignmentErrors_per_moleculeAndAdduct AlignmentErrors_per_moleculeAndAdduct 10 | #' 11 | #' @keywords internal 12 | #' 13 | 14 | metrics_per_molecule <- function(Matches_BM_NPPpeaks, 15 | Unmatched_BM_NPPpeaks, 16 | Matches_BM_NPPpeaks_NPPfeatures, 17 | IT_ratio_biases, 18 | SplittedMatches_BM_NPPpeaks, 19 | MissingPeak_classification, 20 | AlignmentErrors_per_moleculeAndAdduct){ 21 | 22 | 23 | extra_pks_pp <- data.table::copy(Matches_BM_NPPpeaks[main_peak == FALSE]) 24 | bm_tab <- data.table::rbindlist(list(Matches_BM_NPPpeaks[main_peak == TRUE], Unmatched_BM_NPPpeaks), fill = TRUE, use.names = TRUE) 25 | bm_tab[is.na(main_peak), main_peak := FALSE] 26 | 27 | 28 | extra_peaks_pp <- extra_pks_pp[,.(Extra_peak_matches_pp = sum(main_peak == FALSE)), by = .(molecule_b)] 29 | 30 | peaks_pp <- bm_tab[,.(Found_peaks_pp = sum(main_peak), 31 | Not_Found_peaks_pp = length(main_peak) - sum(main_peak)), by = .(molecule_b)] 32 | 33 | 34 | extra_peaks_ft <- data.table::copy(Matches_BM_NPPpeaks_NPPfeatures[!is.na(peak_area_b) & main_feature == FALSE, .(Extra_feature_matches_ft = length(unique(feature_id_g[!is.na(area_g)]))), 35 | by = .(molecule_b, adduct_b, isoab_b)]) 36 | 37 | 38 | extra_peaks_ft <- extra_peaks_ft[, .(Extra_feature_matches_ft = sum(Extra_feature_matches_ft)), by = .(molecule_b)] 39 | 40 | 41 | peaks_ft <- Matches_BM_NPPpeaks_NPPfeatures[!is.na(peak_area_b) & (is.na(area_g) | main_feature == TRUE), .(Found_peaks_ft = sum(!is.na(area_g)), 42 | Not_Found_peaks_ft = sum(is.na(area_g))), 43 | by = .(molecule_b)] 44 | 45 | IRb <- IT_ratio_biases[, c("molecule_b", "diffH20PP_pp", "diffH20PP_ft")][, .(IRb_ok_pp = sum(diffH20PP_pp == "Inc. < 20%p", na.rm = TRUE), 46 | IRb_off_pp = sum(diffH20PP_pp == "Inc. > 20%p", na.rm = TRUE), 47 | IRb_ok_ft = sum(diffH20PP_ft == "Inc. < 20%p", na.rm = TRUE), 48 | IRb_off_ft = sum(diffH20PP_ft == "Inc. > 20%p", na.rm = TRUE)), 49 | by = .(molecule_b)] 50 | 51 | split_pp <- SplittedMatches_BM_NPPpeaks[,c("molecule_b")][,.( Split_peaks = .N), by = .(molecule_b)] 52 | 53 | mw_tab <- MissingPeak_classification[, .(R_pp = sum(missing_peaks_ug == "R", na.rm = TRUE), 54 | S_pp = sum(missing_peaks_ug == "S", na.rm = TRUE), 55 | R_ft = sum(missing_peaks_g == "R", na.rm = TRUE), 56 | S_ft = sum(missing_peaks_g == "S", na.rm = TRUE)), 57 | by = .(molecule_b)] 58 | 59 | 60 | ali_tab <- AlignmentErrors_per_moleculeAndAdduct[, .(Min.er = sum(Min.errors, na.rm = TRUE), 61 | BM.div = sum(BM.div, na.rm = TRUE), 62 | lost = sum(Lost_b.A, na.rm = TRUE)), 63 | by = .(Molecule)] 64 | 65 | colnames(ali_tab)[1] <- "molecule_b" 66 | 67 | sum_tab <- unique(bm_tab[!is.na(molecule_b), "molecule_b"]) 68 | sum_tab <- extra_peaks_pp[sum_tab, on = .(molecule_b)] 69 | sum_tab <- peaks_pp[sum_tab, on = .(molecule_b)] 70 | sum_tab <- peaks_ft[sum_tab, on = .(molecule_b)] 71 | sum_tab <- extra_peaks_ft[sum_tab, on = .(molecule_b)] 72 | sum_tab <- split_pp[sum_tab, on = .(molecule_b)] 73 | sum_tab <- IRb[sum_tab, on = .(molecule_b)] 74 | sum_tab <- mw_tab[sum_tab, on = .(molecule_b)] 75 | sum_tab <- ali_tab[sum_tab, on = .(molecule_b)] 76 | 77 | data.table::setnafill(sum_tab, fill=0, cols = colnames(sum_tab)[-1]) 78 | 79 | 80 | return(sum_tab) 81 | 82 | 83 | } 84 | -------------------------------------------------------------------------------- /R/plot_bench_histo.R: -------------------------------------------------------------------------------- 1 | 2 | #' plot_bench_histo 3 | #' 4 | #' @param benchmark_data output from \code{\link{find_bench_peaks}} 5 | #' @param var variable name to be plotted 6 | #' @param choice_vector_bench named vector including variable to be plotted as element 7 | #' @param color color of histogram 8 | #' @param post_comp TRUE/FALSE are data from benchmark or comparison with non-targeted 9 | #' @param rm_NF_legend for shiny functionality 10 | #' 11 | #' 12 | #' @importFrom ggplot2 ggplot geom_line aes geom_point geom_vline theme labs annotate 13 | #' scale_fill_manual ggtitle scale_colour_manual theme_classic geom_histogram element_blank xlab geom_bar 14 | #' @return plotly object 15 | #' @export 16 | #' 17 | plot_bench_histo <- function(benchmark_data, var, choice_vector_bench, color = "blue", post_comp = FALSE, rm_NF_legend = FALSE){ 18 | 19 | if(post_comp == FALSE){ 20 | if(data.table::is.data.table(benchmark_data) == FALSE){ 21 | benchmark_data <- benchmark_data$PCal 22 | } 23 | } else if (post_comp == TRUE){ 24 | dtf <- benchmark_data$Matches_BM_NPPpeaks_NPPfeatures 25 | benchmark_data <- data.table::rbindlist(list(benchmark_data$Matches_BM_NPPpeaks, benchmark_data$Unmatched_BM_NPPpeaks), fill = TRUE, use.names = TRUE) 26 | benchmark_data <- benchmark_data[main_peak == TRUE | 27 | is.na(peak_area_ug)] 28 | 29 | benchmark_data <- dtf[!is.na(area_b)][benchmark_data, on = .(molecule_b, adduct_b, isoab_b, sample_name_b), nomatch = NA] 30 | benchmark_data[, peak_found := FALSE] 31 | benchmark_data[!is.na(area_g), peak_found := TRUE] 32 | } 33 | 34 | suppressWarnings( 35 | if(!(var %in% c("molecule", "FileName", "Grp", "adduct", "molecule_b", "sample_name_b", "Grp_b", "adduct_b"))){ 36 | 37 | if(var == "peak_height_b" | 38 | var == "peak_area_b" | 39 | var == "peaks.area" | 40 | var == "peaks.height" | 41 | var == "ExpectedArea" | 42 | var == "ExpectedArea_b" | 43 | var == "ExpectedHeight" | 44 | var == "ErrorAbs_A" | 45 | var == "ErrorAbs_H" | 46 | var == "ErrorAbs_A_b" | 47 | var == "ErrorAbs_H_b" | 48 | var == "ExpectedHeight_b"){ 49 | benchmark_data[, eval(quote(var)) := log10(get(var))] 50 | if(var != "peak_height_b" & var != "peak_area_b"){ 51 | names(choice_vector_bench)[choice_vector_bench == var] <- paste0("log10(",names(choice_vector_bench)[choice_vector_bench == var], ")") 52 | } 53 | } 54 | 55 | p <- ggplot(data = benchmark_data[!is.na(get(var))], aes(get(var), fill = if(post_comp == TRUE){as.factor(peak_found)}else{color})) + 56 | geom_histogram(position = "dodge", 57 | #fill = color, 58 | bins = 30) + 59 | theme_classic() + 60 | ggtitle("Overview - Histogram") + 61 | xlab(names(choice_vector_bench)[choice_vector_bench == var]) + 62 | scale_colour_manual(name="benchmark data", 63 | values= if(post_comp == TRUE){c("red", color)}else{color}, 64 | labels= if(post_comp == TRUE){c(names(choice_vector_bench)[choice_vector_bench == var], "2")}else{ 65 | names(choice_vector_bench)[choice_vector_bench == var]}) + 66 | scale_fill_manual(name="benchmark data", 67 | values= if(post_comp == TRUE){c("red", color)}else{color}, 68 | labels= if(post_comp == TRUE){c(names(choice_vector_bench)[choice_vector_bench == var], "2")}else{ 69 | names(choice_vector_bench)[choice_vector_bench == var]}) + 70 | theme(legend.title = element_blank()) 71 | 72 | 73 | } else{ 74 | p <- ggplot() + 75 | geom_bar(data = benchmark_data[!is.na(get(var))], aes(as.character(get(var))), fill = color) + 76 | ggtitle("Overview - Histogram") + 77 | xlab(names(choice_vector_bench)[choice_vector_bench == var]) 78 | 79 | } 80 | ) 81 | 82 | p <- plotly::ggplotly(p, dynamicTicks = TRUE) 83 | 84 | if(post_comp == TRUE){ 85 | p$x$data[[2]]$name <- names(choice_vector_bench)[choice_vector_bench == var] 86 | p$x$data[[1]]$name <- "Peak not Found" 87 | p$x$data[[2]]$legendgroup <- var 88 | p$x$data[[1]]$legendgroup <- "Peak not Found" 89 | } else { 90 | p$x$data[[1]]$name <- names(choice_vector_bench)[choice_vector_bench == var] 91 | } 92 | 93 | if(rm_NF_legend == TRUE){ 94 | p$x$data[[1]]$showlegend <- F 95 | } 96 | 97 | 98 | 99 | return(p) 100 | } 101 | -------------------------------------------------------------------------------- /R/plot_bench_overview.R: -------------------------------------------------------------------------------- 1 | #' plot_bench_overview 2 | #' 3 | #' @param benchmark_data output from \code{\link{find_bench_peaks}} 4 | #' @param x variable (column name) to be plotted on x axis 5 | #' @param y variable (column name) to be plotted on y axis 6 | #' @param colb variable (column name) to color by 7 | #' @param choice_vector_bench named vector including all variables used as elements 8 | #' 9 | #' 10 | #' @importFrom ggplot2 ggplot aes geom_point theme labs ggtitle theme_classic geom_histogram element_blank xlab 11 | #' @return plotly object 12 | #' @export 13 | #' 14 | plot_bench_overview <- function(benchmark_data, x, y, colb, choice_vector_bench){ 15 | benchmark_data <- benchmark_data$PCal 16 | 17 | suppressWarnings( 18 | p <- ggplot() + 19 | geom_point(data = benchmark_data[!is.na(get(x)) & !is.na(get(y))], aes(x = get(x), 20 | y = get(y), 21 | color = get(colb), 22 | molecule = molecule, 23 | adduct = adduct, 24 | isoab = isoab, 25 | sample_name = FileName, 26 | key = IDX)) + 27 | theme_classic() + 28 | labs(x = names(choice_vector_bench)[choice_vector_bench == x], 29 | y = names(choice_vector_bench)[choice_vector_bench == y]) + 30 | labs(color=names(choice_vector_bench)[choice_vector_bench == colb]) + 31 | ggtitle("Overview - Peaks")) 32 | 33 | p <- plotly::ggplotly(p, 34 | tooltip = c("molecule", 35 | "adduct", 36 | "isoab", 37 | "sample_name"), 38 | dynamicTicks = TRUE, 39 | source = "bench_scatter")#, 40 | #width = 1000) 41 | return(p) 42 | } 43 | -------------------------------------------------------------------------------- /R/plot_bench_peak_overview.R: -------------------------------------------------------------------------------- 1 | #' plot_bench_peak_overview 2 | #' 3 | #' @param benchmark_data output from \code{\link{find_bench_peaks}} 4 | #' @param molecule molecule 5 | #' @param adduct adduct 6 | #' @param ia isotopic abundance rounded to 2 digits 7 | #' 8 | #' @return plotly object 9 | #' @export 10 | #' 11 | plot_bench_peak_overview <- function (benchmark_data, molecule, adduct, ia){ 12 | benchmark_data <- benchmark_data$PCal 13 | if(nrow(benchmark_data[molecule == molecule & adduct == adduct & round(isoab, 2) == ia]) > 0){ 14 | p <- suppressWarnings( 15 | plot_Peak_per_mol(benchmark_data, 16 | mol = molecule, 17 | add = adduct, 18 | ia = ia 19 | ) 20 | ) 21 | return(p) 22 | } else {return(NULL)} 23 | } 24 | -------------------------------------------------------------------------------- /R/plot_comp_iso_pred_error.R: -------------------------------------------------------------------------------- 1 | #' plot_comp_iso_pred_error 2 | #' 3 | #' @param comparison_data output from \code{\link{compare_peaks}} 4 | #' @param post_alignment TRUE/FALSE should NT data from before or after alignment be plotted 5 | #' @param BMvsPPvsAl TRUE/FALSE should argument post_alignment be ignored in order to plot both in one plot 6 | #' 7 | #' @importFrom ggplot2 ggplot geom_line aes geom_point geom_vline theme labs annotate 8 | #' scale_fill_manual ggtitle scale_colour_manual theme_classic geom_histogram element_blank xlab scale_color_manual 9 | #' @return plotly object 10 | #' @export 11 | #' 12 | plot_comp_iso_pred_error <- function(comparison_data, post_alignment = FALSE, BMvsPPvsAl = TRUE) { 13 | 14 | IT_ratio_biases <- comparison_data$IT_ratio_biases 15 | 16 | if(nrow(IT_ratio_biases[!is.na(NPP_features)]) == 0) { 17 | BMvsPPvsAl <- FALSE 18 | } 19 | 20 | if(BMvsPPvsAl == FALSE){ 21 | 22 | if(post_alignment == FALSE){ 23 | IT_ratio_biases$diffH20PP <- IT_ratio_biases$diffH20PP_pp 24 | 25 | IT_ratio_biases <- 26 | data.table::melt( 27 | IT_ratio_biases, 28 | id.vars = c('molecule_b', 'adduct_b', 'Grp_b', 'isoab_b', 'sample_name_b', 'diffH20PP'), 29 | measure.vars = c("benchmark", "NPP_peak picking"), 30 | variable.name = 'data_type', 31 | value.name = 'Pred_error' 32 | ) 33 | 34 | 35 | } else if(post_alignment == TRUE) { 36 | IT_ratio_biases$diffH20PP <- IT_ratio_biases$diffH20PP_ft 37 | 38 | IT_ratio_biases <- 39 | data.table::melt( 40 | IT_ratio_biases, 41 | id.vars = c('molecule_b', 'adduct_b', 'Grp_b', 'isoab_b', 'sample_name_b', 'diffH20PP'), 42 | measure.vars = c("benchmark", "NPP_features"), 43 | variable.name = 'data_type', 44 | value.name = 'Pred_error' 45 | ) 46 | 47 | 48 | } else {stop("Argument post_alignment must be TRUE or FALSE!")} 49 | 50 | }else if(BMvsPPvsAl == TRUE){ 51 | 52 | IT_ratio_biases <- IT_ratio_biases[!is.na(sample_name_b) & !is.na(peak_area_b)] 53 | IT_ratio_biases[, diffH20PP := diffH20PP_ft] 54 | IT_ratio_biases[is.na(diffH20PP_ft), diffH20PP := diffH20PP_pp] 55 | IT_ratio_biases[diffH20PP_pp == "Inc. < 20%p" & (diffH20PP_ft == "Inc. > 20%p"), diffH20PP := "Feature Inc. > 20%p"] 56 | 57 | IT_ratio_biases <- 58 | data.table::melt( 59 | IT_ratio_biases, 60 | id.vars = c('molecule_b', 'adduct_b', 'isoab_b', 'sample_name_b', 'RT_neighbors', 'mz_neighbors', 'diffH20PP'), 61 | measure.vars = c("benchmark", "NPP_peak picking", "NPP_features"), 62 | variable.name = 'data_type', 63 | value.name = 'Pred_error' 64 | ) 65 | } 66 | 67 | IT_ratio_biases[, grp_col := paste(molecule_b, adduct_b, isoab_b, sample_name_b, sep = "_;_")] 68 | IT_ratio_biases <- IT_ratio_biases[!is.na(Pred_error) & !is.na(diffH20PP)] 69 | 70 | p <- ggplot(IT_ratio_biases[isoab_b < 100]) + 71 | suppressWarnings( geom_line(suppressWarnings( aes(x = data_type, 72 | y = Pred_error, 73 | group = paste(grp_col, diffH20PP), 74 | color = diffH20PP, 75 | molecule = molecule_b, 76 | adduct = adduct_b, 77 | isoab = isoab_b, 78 | sample = sample_name_b, 79 | RT_neighbors = RT_neighbors, 80 | mz_neighbors = mz_neighbors, 81 | diffH20PP = diffH20PP, 82 | key = grp_col 83 | )), alpha = 0.3)) + 84 | theme_classic() + 85 | scale_color_manual(name = "+ > 20%p", values=c(`Inc. < 20%p` = "#82e0aa", `Inc. > 20%p` = "#ed7467", `Feature Inc. > 20%p` = "goldenrod2")) + 86 | ggtitle("Relative IT ratio bias") + 87 | labs(x = "", y = "IT ratio bias [%]") + 88 | theme(legend.title = element_blank(), 89 | legend.position = 'bottom') 90 | 91 | 92 | 93 | return(plotly::ggplotly(p, tooltip = c("molecule", "adduct", "isoab", "sample", "RT_neighbors", "mz_neighbors", "Pred_error"), 94 | dynamicTicks = "y", 95 | source = "IRbias") %>% 96 | plotly::layout(legend = list(orientation = "h", x = -0.05, y =-0.1))) 97 | 98 | } 99 | -------------------------------------------------------------------------------- /R/plot_comp_missing_value_hm.R: -------------------------------------------------------------------------------- 1 | #' plot_comp_missing_value_hm 2 | #' 3 | #' @param comparison_data output from \code{\link{compare_peaks}} 4 | #' @param post_alignment TRUE/FALSE should NT data from before or after alignment be plotted 5 | #' @param disable_plot if TRUE plot is not generated (for shiny app due to long loading times) 6 | #' 7 | #' 8 | #' @importFrom ggplot2 ggplot aes geom_tile theme labs annotate scale_fill_manual ggtitle scale_colour_manual theme_classic geom_histogram element_blank xlab 9 | #' @return plotly object 10 | #' @export 11 | #' 12 | plot_comp_missing_value_hm <- function(comparison_data, post_alignment = FALSE, disable_plot = FALSE) { 13 | 14 | if(missing (comparison_data) | disable_plot == TRUE) return(plotly::ggplotly(ggplot() + ggtitle("Disabeled"))) 15 | 16 | hm_dt <- comparison_data$MissingPeak_classification 17 | hm_dt <- hm_dt[, missing_peaks := missing_peaks_ug] 18 | 19 | if(post_alignment == FALSE){ 20 | hm_dt <- hm_dt[, missing_peaks := missing_peaks_ug] 21 | 22 | } else if(post_alignment == TRUE){ 23 | hm_dt <- hm_dt[, missing_peaks := missing_peaks_g] 24 | 25 | } else {stop("Argument post_alignment must be TRUE or FALSE!")} 26 | 27 | hm_dt <- hm_dt[, if (any(missing_peaks != 'F')) .SD, by = .(molecule_b, adduct_b, isoab_b)] 28 | if(nrow(hm_dt) == 0) {return(plotly::ggplotly(ggplot() + ggtitle("No missing peaks present")))} 29 | hm_dt[, plot_group := .GRP, by = .(molecule_b, adduct_b, isoab_b)] 30 | hm_dt <- hm_dt[missing_peaks == "F", .(nr = .N), by = .(plot_group)][hm_dt, on =.(plot_group), nomatch = NA] 31 | hm_dt[is.na(nr)]$nr <- 0 32 | 33 | 34 | hm_dt$ord <- as.integer(hm_dt$sample_id_b) 35 | hm_dt$sample_id_b <- as.integer(hm_dt$sample_id_b) 36 | hm_dt <- hm_dt[, c("molecule_b", "adduct_b", "isoab_b", "sample_name_b", "plot_group", "sample_id_b", "missing_peaks", "nr", "ord")] 37 | 38 | if(post_alignment == TRUE){ 39 | 40 | } else{ 41 | 42 | } 43 | 44 | hm_dt[missing_peaks == "F", NPP_status := "Found"] 45 | hm_dt[missing_peaks == "R", NPP_status := "High NA"] 46 | hm_dt[missing_peaks == "S", NPP_status := "Low NA"] 47 | hm_dt[missing_peaks == "L", NPP_status := "feature missing"] 48 | hm_dt[missing_peaks == "NC", NPP_status := "not confirmable"] 49 | 50 | 51 | plot_r_s <- ggplot( 52 | hm_dt, 53 | aes( 54 | x = stats::reorder(as.factor(plot_group), nr), 55 | y = stats::reorder(as.factor(sample_name_b), ord), 56 | fill = NPP_status, 57 | molecule = molecule_b, 58 | #mz = mz_acc_b, 59 | isoab = round(isoab_b, 2), 60 | adduct = adduct_b, 61 | FileName = sample_name_b 62 | ) 63 | ) + 64 | theme_classic() + 65 | geom_tile() + 66 | scale_fill_manual(values=c(`Found` = "#82e0aa", `High NA` = "red", `Low NA` ="goldenrod2", `feature missing` = "lightpink2", `not confirmable` = "grey76")) + 67 | ggtitle("Missing values") + 68 | labs(x = "benchmark features", y = "samples") + 69 | theme(legend.title = element_blank(), 70 | axis.text.x=element_blank(), 71 | axis.ticks.x=element_blank(), 72 | axis.text.y=element_blank(), 73 | axis.ticks.y=element_blank()) 74 | return(plotly::ggplotly(plot_r_s,tooltip = c("NPP_status", "molecule", "adduct", "isoab", "FileName")#, "mz") 75 | 76 | )) 77 | 78 | 79 | } 80 | -------------------------------------------------------------------------------- /R/plot_comp_peak_overview.R: -------------------------------------------------------------------------------- 1 | #' plot_comp_peak_overview 2 | #' 3 | #' @param comparison_data output from \code{\link{compare_peaks}} 4 | #' @param mol_c molecule 5 | #' @param add_c adduct 6 | #' @param ia_c isotopic abundance rounded to 2 digits 7 | #' 8 | #' @return plotly object 9 | #' @export 10 | #' 11 | plot_comp_peak_overview <- function(comparison_data, mol_c, add_c, ia_c) { 12 | 13 | plot_dt <- data.table::rbindlist(list(comparison_data$Matches_BM_NPPpeaks[main_peak == TRUE], comparison_data$Unmatched_BM_NPPpeaks), fill = TRUE) 14 | 15 | if(nrow(plot_dt[molecule_b == mol_c & adduct_b == add_c & round(isoab_b, 2) == ia_c]) > 0){ 16 | 17 | p <- plot_Peak_per_mol( 18 | plot_dt, 19 | mol = mol_c, 20 | add = add_c, 21 | ia = ia_c 22 | ) 23 | 24 | return(p) 25 | } else { 26 | return(NULL) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /R/predict_Iso.R: -------------------------------------------------------------------------------- 1 | #' predict_Iso 2 | #' 3 | #' @description Takes the output of \code{\link{find_bench_peaks}} predicts peak areas as well as peak heights for lower abundant isotopologues 4 | #' from the most abundant isotopologue 5 | #' 6 | #' @param DTT output of \code{\link{find_bench_peaks}} 7 | #' @param SampleIdentifier_col name of column(s) with file names 8 | #' @param Molecule_Adduct_col name of column(s) with molecule and adduct identifiers 9 | #' @param isoab_col name of column with isotopic abundance information 10 | #' @param flag_extremes whether outliers should be flagged (more than 30\% of in area or more than 30\% of in area but with area and height being in agreement within 30\%) 11 | #' 12 | #' @keywords internal 13 | #' 14 | predict_Iso <- function(DTT, SampleIdentifier_col, Molecule_Adduct_col, isoab_col, flag_extremes = FALSE, max_bias_area = 35, max_bias_height = 30, area_height_bias_diff = 30) 15 | { 16 | 17 | DTT <- DTT[, eval(substitute(isoab_col)) := as.numeric(get(isoab_col))] 18 | 19 | 20 | newcols <- c("ExpectedArea", "ErrorRel_A", "ErrorAbs_A", "ExpectedHeight", "ErrorRel_H", "ErrorAbs_H") 21 | 22 | 23 | 24 | 25 | DT_tmp <- DTT[get(isoab_col) != 100][DTT[get(isoab_col) == 100], 26 | on=c(SampleIdentifier_col, Molecule_Adduct_col), 27 | nomatch = 0L, allow.cartesian=TRUE][,(newcols) := .((i.peaks.area * get(isoab_col)) / 100, 28 | (peaks.area / ((i.peaks.area * get(isoab_col)) / 100) - 1) * 100, 29 | peaks.area - ((i.peaks.area * get(isoab_col)) / 100), 30 | i.peaks.height * get(isoab_col) / 100, 31 | (peaks.height / ((i.peaks.height * get(isoab_col)) / 100) - 1) * 100, 32 | peaks.height - ((i.peaks.height * get(isoab_col)) / 100))] 33 | 34 | 35 | Output <- merge(DTT, DT_tmp[,.(IDX, ExpectedArea, ErrorRel_A, ErrorAbs_A, ExpectedHeight, ErrorRel_H, ErrorAbs_H)], by = 'IDX', all.x = TRUE, allow.cartesian = TRUE) 36 | 37 | 38 | if(flag_extremes == TRUE){ 39 | 40 | Output$isoab_ol <- TRUE 41 | Output[(abs(ErrorRel_A) < max_bias_area & abs(ErrorRel_H) < max_bias_height & abs(ErrorRel_H - ErrorRel_A) < area_height_bias_diff) | isoab == 100]$isoab_ol <- FALSE 42 | 43 | } 44 | Output 45 | } 46 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite mzRAPP in publications use:") 2 | 3 | citEntry( 4 | entry = "Article", 5 | title = "mzRAPP: a tool for reliability assessment of data pre-processing in non-targeted metabolomics", 6 | author = "Yasin El Abiead, Maximilian Milford, Reza M Salek and Gunda Koellensperger", 7 | journal = "Bioinformatics", 8 | year = "2021", 9 | #volume = , 10 | #number = , 11 | #pages = , 12 | url = "https://doi.org/10.1093/bioinformatics/btab231", 13 | textVersion = paste( 14 | "Yasin El Abiead, Maximilian Milford, Reza M Salek, Gunda Koellensperger, mzRAPP: a tool for reliability assessment of data pre-processing in non-targeted metabolomics, Bioinformatics, 2021;, btab231, https://doi.org/10.1093/bioinformatics/btab231" 15 | ) 16 | ) 17 | -------------------------------------------------------------------------------- /inst/md/.gitignore: -------------------------------------------------------------------------------- 1 | README.Rmd 2 | *.csv 3 | *.RMD 4 | *.Rmd 5 | *.txt 6 | *.Rproj 7 | -------------------------------------------------------------------------------- /inst/md/Alignment_error_graphic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YasinEl/mzRAPP/3abbccdedd2bbf8562d977f5605013ee31142b2b/inst/md/Alignment_error_graphic.png -------------------------------------------------------------------------------- /inst/md/IR_tolerance.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YasinEl/mzRAPP/3abbccdedd2bbf8562d977f5605013ee31142b2b/inst/md/IR_tolerance.PNG -------------------------------------------------------------------------------- /inst/md/Matching.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YasinEl/mzRAPP/3abbccdedd2bbf8562d977f5605013ee31142b2b/inst/md/Matching.png -------------------------------------------------------------------------------- /inst/md/Missing_value_graphic.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YasinEl/mzRAPP/3abbccdedd2bbf8562d977f5605013ee31142b2b/inst/md/Missing_value_graphic.PNG -------------------------------------------------------------------------------- /inst/md/Peak_subsets.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YasinEl/mzRAPP/3abbccdedd2bbf8562d977f5605013ee31142b2b/inst/md/Peak_subsets.PNG -------------------------------------------------------------------------------- /inst/md/mzRAPP_0.2.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YasinEl/mzRAPP/3abbccdedd2bbf8562d977f5605013ee31142b2b/inst/md/mzRAPP_0.2.0.tar.gz -------------------------------------------------------------------------------- /man/.gitignore: -------------------------------------------------------------------------------- 1 | choose_main_peak_old.Rd 2 | -------------------------------------------------------------------------------- /man/Alignment_error_plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/alignment_error_plot.R 3 | \name{Alignment_error_plot} 4 | \alias{Alignment_error_plot} 5 | \title{Alignment_error_plot} 6 | \usage{ 7 | Alignment_error_plot(comparison_data, mol, add) 8 | } 9 | \arguments{ 10 | \item{comparison_data}{output of compare_peaks} 11 | 12 | \item{mol}{Name of molecule} 13 | 14 | \item{add}{Name of adduct} 15 | } 16 | \value{ 17 | plotly object 18 | } 19 | \description{ 20 | Alignment_error_plot 21 | } 22 | -------------------------------------------------------------------------------- /man/File_con_test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_find_r_s_error.R 3 | \name{File_con_test} 4 | \alias{File_con_test} 5 | \title{File_con_test} 6 | \usage{ 7 | File_con_test(FileName, feature_id) 8 | } 9 | \arguments{ 10 | \item{FileName}{FileName} 11 | 12 | \item{feature_id}{feature_id} 13 | } 14 | \value{ 15 | connected file grps 16 | } 17 | \description{ 18 | File_con_test 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/GetFWXM.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GetFWXM.R 3 | \name{GetFWXM} 4 | \alias{GetFWXM} 5 | \title{GetFWXM} 6 | \usage{ 7 | GetFWXM(RT_vect, Int_vect, baseL, X, peak_borders = FALSE, return_diff = FALSE) 8 | } 9 | \arguments{ 10 | \item{RT_vect}{vector with retention times} 11 | 12 | \item{Int_vect}{vector with intensities} 13 | 14 | \item{baseL}{height of baseline} 15 | 16 | \item{X}{peak height at which width should be measured (e.g. 0.5 means width at halfe maximum)} 17 | 18 | \item{return_diff}{TRUE/FALSE; should the output be a difference of end point and start point, or both points as a vector} 19 | } 20 | \value{ 21 | Width of the peak (numeric) 22 | } 23 | \description{ 24 | Returns the width of a peak at Full Width at X Maximum, with X being the factor by which the maximum of the peak is multiplied. 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/GetSharpness.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GetSharpness.R 3 | \name{GetSharpness} 4 | \alias{GetSharpness} 5 | \title{GetSharpness} 6 | \usage{ 7 | GetSharpness(int) 8 | } 9 | \arguments{ 10 | \item{int}{int} 11 | } 12 | \description{ 13 | As described in: 14 | Zhang,W. and Zhao,P.X. (2014) Quality evaluation of extracted ion chromatograms and chromatographic peaks in 15 | liquid chromatography/mass spectrometry-based metabolomics data. BMC Bioinformatics, 15, 1–13. 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/GetZigzagIDX.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GetZigzagIDX.R 3 | \name{GetZigzagIDX} 4 | \alias{GetZigzagIDX} 5 | \title{GetZigzagIDX} 6 | \usage{ 7 | GetZigzagIDX(int, height) 8 | } 9 | \arguments{ 10 | \item{int}{int} 11 | 12 | \item{height}{height} 13 | } 14 | \description{ 15 | As described in: 16 | Zhang,W. and Zhao,P.X. (2014) Quality evaluation of extracted ion chromatograms and chromatographic peaks in 17 | liquid chromatography/mass spectrometry-based metabolomics data. BMC Bioinformatics, 15, 1–13. 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/Get_MZ_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Get_MZ_list.R 3 | \name{Get_MZ_list} 4 | \alias{Get_MZ_list} 5 | \title{Get_MZ_list} 6 | \usage{ 7 | Get_MZ_list(l.peaks, raw_data, CompCol_xic, EIC.dt, max.mz.diff_ppm) 8 | } 9 | \arguments{ 10 | \item{l.peaks}{l.peaks} 11 | 12 | \item{raw_data}{raw_data} 13 | 14 | \item{CompCol_xic}{CompCol_xic} 15 | } 16 | \description{ 17 | Get_MZ_list 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/Get_peak_vars.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Get_peak_vars.R 3 | \name{Get_peak_vars} 4 | \alias{Get_peak_vars} 5 | \title{Get_peak_vars} 6 | \usage{ 7 | Get_peak_vars( 8 | l.peaks, 9 | EIC.dt, 10 | CompCol_xic, 11 | l.peaks.mz_list, 12 | iso.run, 13 | adduct.run, 14 | manual_bound 15 | ) 16 | } 17 | \arguments{ 18 | \item{l.peaks}{l.peaks} 19 | 20 | \item{EIC.dt}{EIC.dt} 21 | 22 | \item{CompCol_xic}{CompCol_xic} 23 | 24 | \item{l.peaks.mz_list}{l.peaks.mz_list} 25 | 26 | \item{iso.run}{iso.run} 27 | 28 | \item{adduct.run}{adduct.run} 29 | 30 | \item{manual_bound}{manual_bound} 31 | } 32 | \description{ 33 | Get_peak_vars 34 | } 35 | \keyword{internal} 36 | -------------------------------------------------------------------------------- /man/Limit_Target_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Limit_Target_list.R 3 | \name{Limit_Target_list} 4 | \alias{Limit_Target_list} 5 | \title{Limit_Target_list} 6 | \usage{ 7 | Limit_Target_list( 8 | CompCol, 9 | CondPeaks, 10 | iso.run, 11 | adduct.run, 12 | file, 13 | Min.PointsperPeak 14 | ) 15 | } 16 | \arguments{ 17 | \item{CompCol}{CompCol} 18 | 19 | \item{CondPeaks}{CondPeaks} 20 | 21 | \item{iso.run}{iso.run} 22 | 23 | \item{adduct.run}{adduct.run} 24 | 25 | \item{file}{file} 26 | 27 | \item{Min.PointsperPeak}{PpP} 28 | } 29 | \description{ 30 | Limit_Target_list 31 | } 32 | \keyword{internal} 33 | -------------------------------------------------------------------------------- /man/SkylinePeakBoundaries.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Skyline_functions.R 3 | \name{SkylinePeakBoundaries} 4 | \alias{SkylinePeakBoundaries} 5 | \title{SkylinePeakBoundaries} 6 | \usage{ 7 | SkylinePeakBoundaries(BM, export_to_csv = TRUE) 8 | } 9 | \arguments{ 10 | \item{BM}{output of \code{\link{find_bench_peaks}}} 11 | 12 | \item{export_to_csv}{export output automatically to working directory} 13 | } 14 | \value{ 15 | Skyline peak boundaries 16 | } 17 | \description{ 18 | Takes the output of \code{\link{find_bench_peaks}} and generates a Skyline peak-boundaries file (automatically exported to working directory) which can then be imported to Skyline via 19 | Skyline -> File -> Import -> Peak Boundaries... (after the required mzML files have been imported into Skyline using Skyline -> Import -> Results...) 20 | } 21 | -------------------------------------------------------------------------------- /man/SkylineTransitionList.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Skyline_functions.R 3 | \name{SkylineTransitionList} 4 | \alias{SkylineTransitionList} 5 | \title{SkylineTransitionList} 6 | \usage{ 7 | SkylineTransitionList(BM, export_to_csv = TRUE) 8 | } 9 | \arguments{ 10 | \item{BM}{output of \code{\link{find_bench_peaks}}} 11 | 12 | \item{export_to_csv}{export output automatically to working directory} 13 | } 14 | \value{ 15 | Skyline Transition List 16 | } 17 | \description{ 18 | Takes the output of \code{\link{find_bench_peaks}} and generates a Skyline Transition list (automatically exported to working directory) which can then be imported to Skyline via 19 | Skyline -> File -> Import -> Transition List 20 | } 21 | -------------------------------------------------------------------------------- /man/as.sunburstDF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sunburst_plots.R 3 | \name{as.sunburstDF} 4 | \alias{as.sunburstDF} 5 | \title{as.sunburstDF} 6 | \usage{ 7 | as.sunburstDF(DF, valueCol = NULL) 8 | } 9 | \arguments{ 10 | \item{DF}{DF} 11 | 12 | \item{valueCol}{valueCol} 13 | } 14 | \description{ 15 | This function was written by ismirsehregal on stackoverflow https://stackoverflow.com/questions/57395424/how-to-format-data-for-plotly-sunburst-diagram 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/assemble_peaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assemble_peaks.R 3 | \name{assemble_peaks} 4 | \alias{assemble_peaks} 5 | \title{assemble_peaks} 6 | \usage{ 7 | assemble_peaks(peak.grp, s, e, breakP) 8 | } 9 | \arguments{ 10 | \item{peak.grp}{peak.grp} 11 | 12 | \item{s}{start} 13 | 14 | \item{e}{end} 15 | 16 | \item{breakP}{break point} 17 | } 18 | \description{ 19 | assemble_peaks 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/assess_alignment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assess_alignment.R 3 | \name{assess_alignment} 4 | \alias{assess_alignment} 5 | \title{assess_alignment} 6 | \usage{ 7 | assess_alignment(Matches_BM_NPPpeaks, Unmatched_BM_NPPpeaks, g_table) 8 | } 9 | \arguments{ 10 | \item{Matches_BM_NPPpeaks}{Matches_BM_NPPpeaks} 11 | 12 | \item{Unmatched_BM_NPPpeaks}{Unmatched_BM_NPPpeaks} 13 | 14 | \item{g_table}{g_table} 15 | } 16 | \description{ 17 | assess_alignment 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/best_feature_per_comparison.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_pick_main_feature.R 3 | \name{best_feature_per_comparison} 4 | \alias{best_feature_per_comparison} 5 | \title{best_feature_per_comparison} 6 | \usage{ 7 | best_feature_per_comparison(dt) 8 | } 9 | \arguments{ 10 | \item{dt}{dt} 11 | } 12 | \description{ 13 | best_feature_per_comparison 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/callmzRAPP.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/callmzRAPP.R 3 | \name{callmzRAPP} 4 | \alias{callmzRAPP} 5 | \title{callmzRAPP} 6 | \usage{ 7 | callmzRAPP() 8 | } 9 | \value{ 10 | shiny app 11 | } 12 | \description{ 13 | callmzRAPP 14 | } 15 | -------------------------------------------------------------------------------- /man/check_IR_biases.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_IR_biases.R 3 | \name{check_IR_biases} 4 | \alias{check_IR_biases} 5 | \title{check_IR_biases} 6 | \usage{ 7 | check_IR_biases( 8 | Matches_BM_NPPpeaks, 9 | Matches_BM_NPPpeaks_NPPfeatures, 10 | g_table, 11 | b_table 12 | ) 13 | } 14 | \arguments{ 15 | \item{Matches_BM_NPPpeaks}{Matches_BM_NPPpeaks} 16 | 17 | \item{Matches_BM_NPPpeaks_NPPfeatures}{Matches_BM_NPPpeaks_NPPfeatures} 18 | 19 | \item{g_table}{g_table} 20 | 21 | \item{b_table}{b_table} 22 | } 23 | \description{ 24 | check_IR_biases 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/check_benchmark_input.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_benchmark.R 3 | \name{check_benchmark_input} 4 | \alias{check_benchmark_input} 5 | \title{check_benchmark_input} 6 | \usage{ 7 | check_benchmark_input(file, options_path = "generate", from_csv = TRUE, algo) 8 | } 9 | \arguments{ 10 | \item{file}{output of \code{\link{find_bench_peaks}}. Can be path to csv file or a data table object (meaning that is.data.table(file) returns TRUE).} 11 | 12 | \item{options_path}{can be a string "generate" in order to use default column names for chosen algo. In the future we might include a possibility to allow the user to choose column names.} 13 | 14 | \item{from_csv}{TRUE or FALSE depending on file being a data.table object or a path to a csv} 15 | 16 | \item{algo}{tool output format to compare the benchmark against. can be XCMS, XCMS3, Metaboanalyst, SLAW, El-Maven, OpenMS, MS-DIAL, CompoundDiscoverer or MZmine 2 Outputs from different tools can also be used as long as they are reformatted to one of those types.} 17 | } 18 | \value{ 19 | returns a list including the benchmark in a format readable by \code{\link{compare_peaks}}. 20 | } 21 | \description{ 22 | Checks the benchmark dataset and brings it into a format readable by \code{\link{compare_peaks}}. All molecules for which the most abundant isotopolgue is not present, 23 | or less than 2 isotopologues are present are deleted. Moreover, isotopologues which appear in only one file are deleted. 24 | } 25 | -------------------------------------------------------------------------------- /man/check_missing_peaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_missing_peaks.R 3 | \name{check_missing_peaks} 4 | \alias{check_missing_peaks} 5 | \title{check_missing_peaks} 6 | \usage{ 7 | check_missing_peaks( 8 | Matches_BM_NPPpeaks, 9 | Unmatched_BM_NPPpeaks, 10 | Matches_BM_NPPpeaks_NPPfeatures, 11 | g_table 12 | ) 13 | } 14 | \arguments{ 15 | \item{Matches_BM_NPPpeaks}{Matches_BM_NPPpeaks} 16 | 17 | \item{Unmatched_BM_NPPpeaks}{Unmatched_BM_NPPpeaks} 18 | 19 | \item{Matches_BM_NPPpeaks_NPPfeatures}{Matches_BM_NPPpeaks_NPPfeatures} 20 | 21 | \item{g_table}{g_table} 22 | } 23 | \description{ 24 | check_missing_peaks 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/check_nonTargeted_input.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_pick_algo.R 3 | \name{check_nonTargeted_input} 4 | \alias{check_nonTargeted_input} 5 | \title{check_nonTargeted_input} 6 | \usage{ 7 | check_nonTargeted_input( 8 | ug_table_path, 9 | g_table_path, 10 | options_table = "generate", 11 | algo 12 | ) 13 | } 14 | \arguments{ 15 | \item{ug_table_path}{path to unaligned table(s)} 16 | 17 | \item{g_table_path}{path to aligned table} 18 | 19 | \item{options_table}{output from \code{\link{check_benchmark_input}}} 20 | 21 | \item{algo}{tool output format of ug_table and g_table. can be XCMS, XCMS3, Metaboanalyst, SLAW, El-Maven, OpenMS, MS-DIAL, CompoundDiscoverer or MZmine 2. Outputs from different tools can also be used as long as they are reformatted to one of those types.} 22 | } 23 | \value{ 24 | returns unaligned and aligned outputs from non-targeted tool in a format readable via \code{\link{compare_peaks}} 25 | } 26 | \description{ 27 | check_nonTargeted_input 28 | } 29 | -------------------------------------------------------------------------------- /man/classify_false_negative.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_classify_false_negative.R 3 | \name{classify_false_negative} 4 | \alias{classify_false_negative} 5 | \title{classify_false_negative} 6 | \usage{ 7 | classify_false_negative(dt) 8 | } 9 | \arguments{ 10 | \item{dt}{dt} 11 | } 12 | \description{ 13 | classify_false_negative 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/clean_peak_assignments.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/clean_peak_assignments.R 3 | \name{clean_peak_assignments} 4 | \alias{clean_peak_assignments} 5 | \title{clean_peak_assignments} 6 | \usage{ 7 | clean_peak_assignments(Input_table) 8 | } 9 | \arguments{ 10 | \item{Input_table}{unfinished benchmark} 11 | } 12 | \value{ 13 | unfinished benchmark 14 | } 15 | \description{ 16 | clean_peak_assignments 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/count_alignment_errors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_funktionen_fuer_alignment.R 3 | \name{count_alignment_errors} 4 | \alias{count_alignment_errors} 5 | \title{count_alignment_errors} 6 | \usage{ 7 | count_alignment_errors(DT, main_UTgroups, method = "both") 8 | } 9 | \arguments{ 10 | \item{DT}{DT} 11 | 12 | \item{main_UTgroups}{main_UTgroups} 13 | 14 | \item{method}{method} 15 | } 16 | \description{ 17 | count alignment errors occuring within a compound 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/count_errors_max.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_count_max_error.R 3 | \name{count_errors_max} 4 | \alias{count_errors_max} 5 | \title{count_errors_max} 6 | \usage{ 7 | count_errors_max(dt) 8 | } 9 | \arguments{ 10 | \item{dt}{dt} 11 | } 12 | \description{ 13 | count_errors_max 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/cutout_peaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cutout_peak.R 3 | \name{cutout_peaks} 4 | \alias{cutout_peaks} 5 | \title{cutout_peaks} 6 | \usage{ 7 | cutout_peaks( 8 | int, 9 | rt, 10 | Min.PpP = 10, 11 | peak.spotting.factor. = 0.01, 12 | Integration_baseL_factor. = 0.1, 13 | l = 1, 14 | r = length(int), 15 | M0.grp = NA, 16 | main_adduct.grp = NA, 17 | Min.Res. = 70 18 | ) 19 | } 20 | \arguments{ 21 | \item{int}{int} 22 | 23 | \item{rt}{rt} 24 | 25 | \item{Min.PpP}{Min.PpP} 26 | 27 | \item{peak.spotting.factor.}{peak.spotting.factor.} 28 | 29 | \item{Integration_baseL_factor.}{Integration_baseL_factor.} 30 | 31 | \item{l}{l} 32 | 33 | \item{r}{r} 34 | 35 | \item{M0.grp}{M0.grp} 36 | 37 | \item{main_adduct.grp}{main_adduct.grp} 38 | 39 | \item{Min.Res.}{Min.Res.} 40 | } 41 | \description{ 42 | detects peaks 43 | } 44 | \keyword{internal} 45 | -------------------------------------------------------------------------------- /man/derive_performance_metrics.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_generate_results_text.R 3 | \name{derive_performance_metrics} 4 | \alias{derive_performance_metrics} 5 | \title{derive_performance_metrics} 6 | \usage{ 7 | derive_performance_metrics(comparison_data) 8 | } 9 | \arguments{ 10 | \item{comparison_data}{output from \code{\link{compare_peaks}}} 11 | } 12 | \value{ 13 | returns a list containing different performance metrics of non-targeted data pre-processing. 14 | } 15 | \description{ 16 | Returns a list with counts, statistics and performance metrics. For each metric a count and for some also a confidence interval (CI) 17 | as estimated via bootstrapping is reported. CIs are reported for percentages which should not only be true for the benchmark but also be an estimate for non-targeted 18 | processing results. For more information please check the details section below. Background on the logic behind the different metrics is provided in the mzRAPP Readme 19 | \url{https://github.com/YasinEl/mzRAPP#generation-and-interpretation-of-npp-performance-metrics} 20 | } 21 | \details{ 22 | Bootstrapping is performed on benchmark molecules with R=1000 using \code{\link{boot.ci}} with type="basic" which returns a vector of 23 | length = 5. The last two numbers of this vectors correspond to the upper and lower boundaries of the CI. For more information please check \code{\link{boot.ci}}. 24 | 25 | \strong{Benchmark:} information on the used benchmark (BM) 26 | 27 | BM_peaks: number of BM peaks 28 | 29 | Features: number of aligned BM features 30 | 31 | \strong{Before_alignment:} Information on the non-targeted peak picking step. Ideas behind the different metrics are explained in the mzRAPP readme. 32 | 33 | NT_peaks: NA 34 | 35 | Found_peaks$count: Number of BM peaks for which a match was found in unaligned results. 36 | 37 | Found_peaks$CI: CI for percentage of BM peaks for which a match was found. 38 | 39 | Split_peaks$count: Number of split-peaks detected (as defined in mzRAPP readme) 40 | 41 | Split_peaks$CI: CI for percentage of split-peaks from all matches (Found_peaks$count + Split_peaks$count) 42 | 43 | Missing_peaks$Systematic: count of low missing peaks (as defined in mzRAPP readme) 44 | 45 | Missing_peaks$Random$count: count of high missing peaks 46 | 47 | Missing_peaks$Random$CI: CI for percentage of high missing peaks from all classifiable missing peaks (Missing_peaks$Systematic + Missing_peaks$Random$count) 48 | 49 | IR_quality$Error_inc_below20pp: count for isotopologue ratios biases which did not increase by more than 20 %p. (as defined in mzRAPP readme) 50 | 51 | IR_quality$Error_inc_above20pp$count: count for isotopologue ratios biases which did increase by more than 20 %p. 52 | 53 | IR_quality$Error_inc_above20pp$CI: CI for percentage of all isotopologue ratios derivable from matched non-targeted peaks which did increase by more than 20 %p. 54 | 55 | \strong{Alignment:} Information on the non-targeted alignment step. Ideas behind the different metrics are explained in the mzRAPP readme. 56 | 57 | Min.Errors$count: Count of alignment errors 58 | 59 | Min.Errors$CI: CI for percentage of alignment errors from all matched peaks (Found_peaks$count) 60 | 61 | BM_divergences$count: count of divergences in alignment between the BM and the non-targeted output. (Min.Errors$count is a subgroup of this) 62 | 63 | BM_divergences$CI: CI for percentage of benchmark divergences from all matched peaks (Found_peaks$count) 64 | 65 | Lost_b.A$count: count of matched non-targeted peaks (Found_peaks$count) which were present in the unaligned, but not the aligned output. 66 | 67 | Lost_b.A$CI: CI for percentage of not-found matched peaks from all matched peaks (Found_peaks$count) 68 | 69 | \strong{After_alignment:} The same metrics calculated before alignment are also calculated here (with the exception of Split_peaks 70 | which can not be derived from aligned results) 71 | } 72 | -------------------------------------------------------------------------------- /man/detect_double_peaks2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/detect_double_peaks.R 3 | \name{detect_double_peaks2} 4 | \alias{detect_double_peaks2} 5 | \title{detect_double_peaks2} 6 | \usage{ 7 | detect_double_peaks2( 8 | pot.doubleP.v, 9 | Min.PpP = 10, 10 | l = 1, 11 | r = length(pot.doubleP.v), 12 | Min.Res = 70 13 | ) 14 | } 15 | \arguments{ 16 | \item{pot.doubleP.v}{pot.doubleP.v} 17 | 18 | \item{Min.PpP}{Min.PpP} 19 | 20 | \item{l}{l} 21 | 22 | \item{r}{r} 23 | 24 | \item{Min.Res}{Min.Res} 25 | } 26 | \description{ 27 | detect_double_peaks2 28 | } 29 | \keyword{internal} 30 | -------------------------------------------------------------------------------- /man/feature_compare.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_feature_compare.R 3 | \name{feature_compare} 4 | \alias{feature_compare} 5 | \title{feature_compare} 6 | \usage{ 7 | feature_compare(b_table, g_table, areaMatch_table = NA) 8 | } 9 | \arguments{ 10 | \item{b_table}{b_table} 11 | 12 | \item{g_table}{g_table} 13 | 14 | \item{areaMatch_table}{areaMatch_table} 15 | } 16 | \description{ 17 | feature_compare 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/find_best_feature_feature.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_find_best_feature_feature.R 3 | \name{find_best_feature_feature} 4 | \alias{find_best_feature_feature} 5 | \title{find_best_feature_feature} 6 | \usage{ 7 | find_best_feature_feature(dt, bys) 8 | } 9 | \arguments{ 10 | \item{dt}{dt} 11 | 12 | \item{bys}{bys} 13 | } 14 | \value{ 15 | best feature 16 | } 17 | \description{ 18 | find_best_feature_feature 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/find_r_s_error.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_find_r_s_error.R 3 | \name{find_r_s_error} 4 | \alias{find_r_s_error} 5 | \title{find_r_s_error} 6 | \usage{ 7 | find_r_s_error(peak_area_b, peak_area, peak_height_b, Connected) 8 | } 9 | \arguments{ 10 | \item{peak_area_b}{peak_area_b} 11 | 12 | \item{peak_height_b}{peak_height_b} 13 | 14 | \item{peak_area_ug}{peak_area_ug} 15 | } 16 | \description{ 17 | find_r_s_error 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/generate_options.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_generate_options.R 3 | \name{generate_options} 4 | \alias{generate_options} 5 | \title{generate_options} 6 | \usage{ 7 | generate_options(raw_b_table, algo) 8 | } 9 | \arguments{ 10 | \item{raw_b_table}{raw_b_table} 11 | 12 | \item{algo}{algo} 13 | } 14 | \description{ 15 | generate_options 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/getXIC.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Helper_functions.R 3 | \name{getXIC} 4 | \alias{getXIC} 5 | \title{getXIC} 6 | \usage{ 7 | getXIC(PC, IndexNumber) 8 | } 9 | \arguments{ 10 | \item{PC}{PC} 11 | 12 | \item{IndexNumber}{IndexNumber} 13 | } 14 | \description{ 15 | getXIC 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/get_EIMatches_BM_NPPpeaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_EIC_table.R 3 | \name{get_EIMatches_BM_NPPpeaks} 4 | \alias{get_EIMatches_BM_NPPpeaks} 5 | \title{get_EIMatches_BM_NPPpeaks} 6 | \usage{ 7 | get_EIMatches_BM_NPPpeaks(rt, int, Min.PpP) 8 | } 9 | \arguments{ 10 | \item{rt}{rt} 11 | 12 | \item{int}{int} 13 | 14 | \item{Min.PpP}{Min.PpP} 15 | } 16 | \description{ 17 | get_EIMatches_BM_NPPpeaks 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/get_ROIs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getEICfromROI_par.R 3 | \name{get_ROIs} 4 | \alias{get_ROIs} 5 | \title{get_ROIs} 6 | \usage{ 7 | get_ROIs( 8 | files, 9 | Target.table, 10 | minCentroids = 4, 11 | AccurateMZtol = 5, 12 | PrecisionMZtol = 5, 13 | plan = "multiprocess" 14 | ) 15 | } 16 | \arguments{ 17 | \item{files}{vector containing all mzML file paths} 18 | 19 | \item{Target.table}{output of function \code{\link{get_mz_table}}} 20 | 21 | \item{minCentroids}{minimum number of consecutive scans > 0 for a ROI to be picked up (eq. to minCentroids argument in xcms:::findmzROI function)} 22 | 23 | \item{AccurateMZtol}{mass accuracy (systematic error tolerance) in +/- ppm; this value is used to recognize detected ROIs as the expected mz values calculated in \code{\link{get_mz_table}}. If multiple ROIs fit the same benchmark peak they are combined.} 24 | 25 | \item{PrecisionMZtol}{mass precision (random error tolerance) in +/- ppm; this value is used as for setting the maximum spread of scans within one ROI (equ. to "dev" argument * 1e-6 in xcms:::findmzROI)} 26 | 27 | \item{plan}{see \code{\link{plan}}} 28 | } 29 | \value{ 30 | data.table object with information on ROIs for each row in Target.table. additional columns from Target.table are retained 31 | } 32 | \description{ 33 | Takes a the output of \code{\link{get_mz_table}} and adds columns with information on regions of interest (ROI). 34 | } 35 | \details{ 36 | \strong{eic_mzmin:} lowest mz value detected in respective ROI 37 | 38 | \strong{eic_mzmax:} highest mz value detected in respective ROI 39 | } 40 | -------------------------------------------------------------------------------- /man/get_avg_noise.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/detect_double_peaks.R 3 | \name{get_avg_noise} 4 | \alias{get_avg_noise} 5 | \title{get_avg_noise} 6 | \usage{ 7 | get_avg_noise(int) 8 | } 9 | \arguments{ 10 | \item{int}{int} 11 | } 12 | \description{ 13 | get_avg_noise 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/get_main_UT_groups.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_funktionen_fuer_alignment.R 3 | \name{get_main_UT_groups} 4 | \alias{get_main_UT_groups} 5 | \title{get_main_UT_groups} 6 | \usage{ 7 | get_main_UT_groups(DT) 8 | } 9 | \arguments{ 10 | \item{DT}{DT} 11 | } 12 | \description{ 13 | Find most occuring feature ID within matched peaks of a benchmark feature 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/get_mz_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GenerateMassTraceList.R 3 | \name{get_mz_table} 4 | \alias{get_mz_table} 5 | \title{get_mz_table} 6 | \usage{ 7 | get_mz_table( 8 | DT, 9 | instrumentRes, 10 | RelInt_threshold = 0.05, 11 | stick_method = "intensoid", 12 | adducts, 13 | isotopes, 14 | screening_adducts = NULL 15 | ) 16 | } 17 | \arguments{ 18 | \item{DT}{data.table with columns "molecule", "SumForm_c", "adduct_c". Additional columns with "user.rtmin", and "user.rtmax" (start and end times of peaks in seconds won't be required by this, but the next function in the mzRAPP pipeline so it is recommended to add them already here.) (see details)} 19 | 20 | \item{instrumentRes}{data frame with mz vs resolution dependence (see \code{\link{resolution_list}}) e.g. "resolution_list$`OTFusion,QExactiveHF_120000@200`"} 21 | 22 | \item{RelInt_threshold}{relative abundance of the lowest isotopologue to be considered as percentage} 23 | 24 | \item{stick_method}{method that should be used to calculate discrete m/z values from calculated profile pattern e.g. "intensoid" (see \code{\link{vdetect}})} 25 | 26 | \item{adducts}{data frame containing adducts (see \code{\link{adducts}})} 27 | 28 | \item{isotopes}{data frame containing isotopes (see \code{\link{isotopes}})} 29 | 30 | \item{screening_adducts}{vector of adduct names to be added for all molecules (if not already in the provided DT)} 31 | } 32 | \value{ 33 | data.table with columns "molecule", "formula", "adduct", "charge", "m/z" and "abundance" 34 | } 35 | \description{ 36 | This is basically a wrapper-function around the enviPat package. It takes a table with columns "molecule", "SumForm_c" and 37 | "main_adduct" and calculates theoretical m/z values and abundances for isotopologues down to a user-defined abundance threshold. 38 | } 39 | \details{ 40 | Make sure that molecular formulas in column "SumForm_c" only contain valid molecular formulas as described in \code{\link{check_chemform}}. Otherwise, the function might never complete. Additional columns in DT will be retained in the output of the function. However, the column names "adduct", "isoab", "formula", "charge" and "mz" are reserved. 41 | } 42 | -------------------------------------------------------------------------------- /man/get_pot_peak_ranges2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_pot_peak_ranges.R 3 | \name{get_pot_peak_ranges2} 4 | \alias{get_pot_peak_ranges2} 5 | \title{get_pot_peak_ranges2} 6 | \usage{ 7 | get_pot_peak_ranges2(int, Min.PpP = 10, peak.spotting.factor = 0.01) 8 | } 9 | \arguments{ 10 | \item{int}{int} 11 | 12 | \item{Min.PpP}{Min.PpP} 13 | 14 | \item{peak.spotting.factor}{peak.spotting.factor} 15 | } 16 | \description{ 17 | get_pot_peak_ranges2 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/import_grouped_Metaboanalyst.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_Metaboanalyst.R 3 | \name{import_grouped_Metaboanalyst} 4 | \alias{import_grouped_Metaboanalyst} 5 | \title{import_grouped_Metaboanalyst} 6 | \usage{ 7 | import_grouped_Metaboanalyst(file, options_dt) 8 | } 9 | \arguments{ 10 | \item{file}{file} 11 | 12 | \item{options_dt}{options_dt} 13 | } 14 | \description{ 15 | import_grouped_Metaboanalyst 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/import_grouped_elmaven.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_elmaven.R 3 | \name{import_grouped_elmaven} 4 | \alias{import_grouped_elmaven} 5 | \title{import_grouped_elmaven} 6 | \usage{ 7 | import_grouped_elmaven(file, options_dt) 8 | } 9 | \arguments{ 10 | \item{file}{file} 11 | 12 | \item{options_dt}{options_dt} 13 | } 14 | \description{ 15 | import_grouped_elmaven 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/import_grouped_msdial.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_msdial.R 3 | \name{import_grouped_msdial} 4 | \alias{import_grouped_msdial} 5 | \title{import_grouped_msdial} 6 | \usage{ 7 | import_grouped_msdial(file_path, options_dt) 8 | } 9 | \arguments{ 10 | \item{file_path}{file_path} 11 | 12 | \item{options_dt}{options_dt} 13 | } 14 | \description{ 15 | import_grouped_msdial 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/import_grouped_mzmine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_mzMine.R 3 | \name{import_grouped_mzmine} 4 | \alias{import_grouped_mzmine} 5 | \title{import_grouped_mzmine} 6 | \usage{ 7 | import_grouped_mzmine(file_path, options_table) 8 | } 9 | \arguments{ 10 | \item{file_path}{file_path} 11 | 12 | \item{options_table}{options_table} 13 | } 14 | \value{ 15 | data.table 16 | } 17 | \description{ 18 | import_grouped_mzmine 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/import_grouped_openms.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_openms.R 3 | \name{import_grouped_openms} 4 | \alias{import_grouped_openms} 5 | \title{import_grouped_openms} 6 | \usage{ 7 | import_grouped_openms(file_path, options_table) 8 | } 9 | \arguments{ 10 | \item{file_path}{file_path} 11 | 12 | \item{options_table}{options_table} 13 | } 14 | \description{ 15 | import_grouped_openms 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/import_grouped_slaw.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_slaw.R 3 | \name{import_grouped_slaw} 4 | \alias{import_grouped_slaw} 5 | \title{import_grouped_slaw} 6 | \usage{ 7 | import_grouped_slaw(file, options_dt) 8 | } 9 | \arguments{ 10 | \item{file}{file} 11 | 12 | \item{options_dt}{options_dt} 13 | } 14 | \description{ 15 | import_grouped_slaw 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/import_grouped_xcms.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_xcms.R 3 | \name{import_grouped_xcms} 4 | \alias{import_grouped_xcms} 5 | \title{import_grouped_xcms} 6 | \usage{ 7 | import_grouped_xcms(file, options_dt) 8 | } 9 | \arguments{ 10 | \item{file}{file} 11 | 12 | \item{options_dt}{options_dt} 13 | } 14 | \description{ 15 | import_grouped_xcms 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/import_options.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_functions.R 3 | \name{import_options} 4 | \alias{import_options} 5 | \title{import_options} 6 | \usage{ 7 | import_options(file_path) 8 | } 9 | \arguments{ 10 | \item{file_path}{file_path} 11 | } 12 | \description{ 13 | import_options 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/import_ungrouped_Metaboanalyst.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_Metaboanalyst.R 3 | \name{import_ungrouped_Metaboanalyst} 4 | \alias{import_ungrouped_Metaboanalyst} 5 | \title{import_ungrouped_Metaboanalyst} 6 | \usage{ 7 | import_ungrouped_Metaboanalyst(file, options_dt) 8 | } 9 | \arguments{ 10 | \item{file}{file} 11 | 12 | \item{options_dt}{options_dt} 13 | } 14 | \description{ 15 | import_ungrouped_Metaboanalyst 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/import_ungrouped_cd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_compd.R 3 | \name{import_ungrouped_cd} 4 | \alias{import_ungrouped_cd} 5 | \title{import_ungrouped_cd} 6 | \usage{ 7 | import_ungrouped_cd(file_path, options_table) 8 | } 9 | \arguments{ 10 | \item{file_path}{file_path} 11 | 12 | \item{options_table}{options_table} 13 | } 14 | \description{ 15 | import_ungrouped_cd 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/import_ungrouped_elmaven.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_elmaven.R 3 | \name{import_ungrouped_elmaven} 4 | \alias{import_ungrouped_elmaven} 5 | \title{import_ungrouped_elmaven} 6 | \usage{ 7 | import_ungrouped_elmaven(file, options_dt) 8 | } 9 | \arguments{ 10 | \item{file}{file} 11 | 12 | \item{options_dt}{options_dt} 13 | } 14 | \description{ 15 | import_ungrouped_elmaven 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/import_ungrouped_msdial.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_msdial.R 3 | \name{import_ungrouped_msdial} 4 | \alias{import_ungrouped_msdial} 5 | \title{import_ungrouped_msdial} 6 | \usage{ 7 | import_ungrouped_msdial(file_list, options_dt) 8 | } 9 | \arguments{ 10 | \item{file_list}{file_list} 11 | 12 | \item{options_dt}{options_dt} 13 | } 14 | \description{ 15 | import_ungrouped_msdial 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/import_ungrouped_mzmine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_mzMine.R 3 | \name{import_ungrouped_mzmine} 4 | \alias{import_ungrouped_mzmine} 5 | \title{import_ungrouped_mzmine} 6 | \usage{ 7 | import_ungrouped_mzmine(folder_path, options_table) 8 | } 9 | \arguments{ 10 | \item{folder_path}{folder_path} 11 | 12 | \item{options_table}{options_table} 13 | } 14 | \description{ 15 | import_ungrouped_mzmine 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/import_ungrouped_openms.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_openms.R 3 | \name{import_ungrouped_openms} 4 | \alias{import_ungrouped_openms} 5 | \title{import_ungrouped_openms} 6 | \usage{ 7 | import_ungrouped_openms(file_list, options_dt) 8 | } 9 | \arguments{ 10 | \item{file_list}{file_list} 11 | 12 | \item{options_dt}{options_dt} 13 | } 14 | \description{ 15 | import_ungrouped_openms 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/import_ungrouped_slaw.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_slaw.R 3 | \name{import_ungrouped_slaw} 4 | \alias{import_ungrouped_slaw} 5 | \title{import_ungrouped_slaw} 6 | \usage{ 7 | import_ungrouped_slaw(file_list, options_dt) 8 | } 9 | \arguments{ 10 | \item{file_list}{file_list} 11 | 12 | \item{options_dt}{options_dt} 13 | } 14 | \description{ 15 | import_ungrouped_slaw 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/import_ungrouped_xcms.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_import_xcms.R 3 | \name{import_ungrouped_xcms} 4 | \alias{import_ungrouped_xcms} 5 | \title{import_ungrouped_xcms} 6 | \usage{ 7 | import_ungrouped_xcms(file, options_dt) 8 | } 9 | \arguments{ 10 | \item{file}{file} 11 | 12 | \item{options_dt}{options_dt} 13 | } 14 | \description{ 15 | import_ungrouped_xcms 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/match_NPPpeaks_to_NPPfeatures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/match_NPPpeaks_to_NPPfeatures.R 3 | \name{match_NPPpeaks_to_NPPfeatures} 4 | \alias{match_NPPpeaks_to_NPPfeatures} 5 | \title{match_NPPpeaks_to_NPPfeatures} 6 | \usage{ 7 | match_NPPpeaks_to_NPPfeatures( 8 | Matches_BM_NPPpeaks, 9 | SplittedMatches_BM_NPPpeaks, 10 | g_table 11 | ) 12 | } 13 | \arguments{ 14 | \item{Matches_BM_NPPpeaks}{Matches_BM_NPPpeaks} 15 | 16 | \item{SplittedMatches_BM_NPPpeaks}{SplittedMatches_BM_NPPpeaks} 17 | 18 | \item{g_table}{g_table} 19 | } 20 | \description{ 21 | match_NPPpeaks_to_NPPfeatures 22 | } 23 | \keyword{internal} 24 | -------------------------------------------------------------------------------- /man/match_features_to_benchmark.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/match_features_to_benchmark.R 3 | \name{match_features_to_benchmark} 4 | \alias{match_features_to_benchmark} 5 | \title{match_features_to_benchmark} 6 | \usage{ 7 | match_features_to_benchmark( 8 | g_table, 9 | b_table, 10 | Matches_BM_NPPpeaks, 11 | Unmatched_BM_NPPpeaks 12 | ) 13 | } 14 | \arguments{ 15 | \item{g_table}{g_table} 16 | 17 | \item{b_table}{b_table} 18 | 19 | \item{Matches_BM_NPPpeaks}{Matches_BM_NPPpeaks} 20 | 21 | \item{Unmatched_BM_NPPpeaks}{Unmatched_BM_NPPpeaks} 22 | } 23 | \description{ 24 | match_features_to_benchmark 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/match_peaks_to_benchmark.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/match_peaks_to_benchmark.R 3 | \name{match_peaks_to_benchmark} 4 | \alias{match_peaks_to_benchmark} 5 | \title{match_peaks_to_benchmark} 6 | \usage{ 7 | match_peaks_to_benchmark(b_table, ug_table) 8 | } 9 | \arguments{ 10 | \item{b_table}{b_table} 11 | 12 | \item{ug_table}{ug_table} 13 | } 14 | \description{ 15 | match_peaks_to_benchmark 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/match_peaks_to_benchmark_split.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/match_peaks_to_benchmark_split.R 3 | \name{match_peaks_to_benchmark_split} 4 | \alias{match_peaks_to_benchmark_split} 5 | \title{match_peaks_to_benchmark_split} 6 | \usage{ 7 | match_peaks_to_benchmark_split(b_table, ug_table) 8 | } 9 | \arguments{ 10 | \item{b_table}{b_table} 11 | 12 | \item{ug_table}{ug_table} 13 | } 14 | \description{ 15 | match_peaks_to_benchmark_split 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/metrics_per_molecule.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/metrics_per_molecule.R 3 | \name{metrics_per_molecule} 4 | \alias{metrics_per_molecule} 5 | \title{metrics_per_molecule} 6 | \usage{ 7 | metrics_per_molecule( 8 | Matches_BM_NPPpeaks, 9 | Unmatched_BM_NPPpeaks, 10 | Matches_BM_NPPpeaks_NPPfeatures, 11 | IT_ratio_biases, 12 | SplittedMatches_BM_NPPpeaks, 13 | MissingPeak_classification, 14 | AlignmentErrors_per_moleculeAndAdduct 15 | ) 16 | } 17 | \arguments{ 18 | \item{Matches_BM_NPPpeaks}{Matches_BM_NPPpeaks} 19 | 20 | \item{Unmatched_BM_NPPpeaks}{Unmatched_BM_NPPpeaks} 21 | 22 | \item{Matches_BM_NPPpeaks_NPPfeatures}{Matches_BM_NPPpeaks_NPPfeatures} 23 | 24 | \item{IT_ratio_biases}{IT_ratio_biases} 25 | 26 | \item{SplittedMatches_BM_NPPpeaks}{SplittedMatches_BM_NPPpeaks} 27 | 28 | \item{MissingPeak_classification}{MissingPeak_classification} 29 | 30 | \item{AlignmentErrors_per_moleculeAndAdduct}{AlignmentErrors_per_moleculeAndAdduct} 31 | } 32 | \description{ 33 | metrics_per_molecule 34 | } 35 | \keyword{internal} 36 | -------------------------------------------------------------------------------- /man/pick_main_feature.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_pick_main_feature.R 3 | \name{pick_main_feature} 4 | \alias{pick_main_feature} 5 | \title{pick_main_feature} 6 | \usage{ 7 | pick_main_feature(dt) 8 | } 9 | \arguments{ 10 | \item{dt}{dt} 11 | } 12 | \description{ 13 | pick_main_feature 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/pick_main_feature_sd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_pick_main_feature.R 3 | \name{pick_main_feature_sd} 4 | \alias{pick_main_feature_sd} 5 | \title{pick_main_feature_sd} 6 | \usage{ 7 | pick_main_feature_sd(dt) 8 | } 9 | \arguments{ 10 | \item{dt}{dt} 11 | } 12 | \description{ 13 | pick_main_feature_sd 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/pick_main_peak.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_pick_main_peak.R 3 | \name{pick_main_peak} 4 | \alias{pick_main_peak} 5 | \title{pick_main_peak} 6 | \usage{ 7 | pick_main_peak(dt) 8 | } 9 | \arguments{ 10 | \item{dt}{dt} 11 | } 12 | \description{ 13 | pick_main_peak 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/pick_main_peak_sd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_pick_main_peak.R 3 | \name{pick_main_peak_sd} 4 | \alias{pick_main_peak_sd} 5 | \title{pick_main_peak_sd} 6 | \usage{ 7 | pick_main_peak_sd(dt) 8 | } 9 | \arguments{ 10 | \item{dt}{dt} 11 | } 12 | \description{ 13 | pick_main_peak_sd 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/plot_IR_peaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_Peak.R 3 | \name{plot_IR_peaks} 4 | \alias{plot_IR_peaks} 5 | \title{plot_IR_peaks} 6 | \usage{ 7 | plot_IR_peaks(PC, plotly_key) 8 | } 9 | \arguments{ 10 | \item{PC}{PC} 11 | 12 | \item{plotly_key}{plotly_key} 13 | } 14 | \description{ 15 | plot_IR_peaks 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/plot_Peak.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_Peak.R 3 | \name{plot_Peak} 4 | \alias{plot_Peak} 5 | \title{plot_Peak} 6 | \usage{ 7 | plot_Peak(PC, IndexNumber) 8 | } 9 | \arguments{ 10 | \item{PC}{output from \code{\link{find_bench_peaks}}} 11 | 12 | \item{IndexNumber}{IDX number of peak to be plotted} 13 | } 14 | \value{ 15 | plotly object 16 | } 17 | \description{ 18 | plot_Peak 19 | } 20 | -------------------------------------------------------------------------------- /man/plot_Peak_per_mol.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_Peak.R 3 | \name{plot_Peak_per_mol} 4 | \alias{plot_Peak_per_mol} 5 | \title{plot_Peak_per_mol} 6 | \usage{ 7 | plot_Peak_per_mol(PC_object, mol, ia = 100, add = "M+H") 8 | } 9 | \arguments{ 10 | \item{PC_object}{output from \code{\link{compare_peaks}} or output from \code{\link{find_bench_peaks}}} 11 | 12 | \item{mol}{molecule} 13 | 14 | \item{ia}{isotopic abundance rounded to 2 digits} 15 | 16 | \item{add}{adduct} 17 | } 18 | \value{ 19 | plotly object 20 | } 21 | \description{ 22 | plot_Peak_per_mol 23 | } 24 | -------------------------------------------------------------------------------- /man/plot_Peak_with_predicted_peak.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_Peak.R 3 | \name{plot_Peak_with_predicted_peak} 4 | \alias{plot_Peak_with_predicted_peak} 5 | \title{plot_Peak_with_predicted_peak} 6 | \usage{ 7 | plot_Peak_with_predicted_peak(PC_object, IndexNumber, focus = TRUE) 8 | } 9 | \arguments{ 10 | \item{PC_object}{output from \code{\link{find_bench_peaks}}} 11 | 12 | \item{IndexNumber}{IDX number of peak to be plotted} 13 | 14 | \item{focus}{should plotted chromatogram be limited to peak (TRUE/FALSE)} 15 | } 16 | \value{ 17 | plotly object 18 | } 19 | \description{ 20 | plot_Peak_with_predicted_peak 21 | } 22 | -------------------------------------------------------------------------------- /man/plot_bench_histo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_bench_histo.R 3 | \name{plot_bench_histo} 4 | \alias{plot_bench_histo} 5 | \title{plot_bench_histo} 6 | \usage{ 7 | plot_bench_histo( 8 | benchmark_data, 9 | var, 10 | choice_vector_bench, 11 | color = "blue", 12 | post_comp = FALSE, 13 | rm_NF_legend = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{benchmark_data}{output from \code{\link{find_bench_peaks}}} 18 | 19 | \item{var}{variable name to be plotted} 20 | 21 | \item{choice_vector_bench}{named vector including variable to be plotted as element} 22 | 23 | \item{color}{color of histogram} 24 | 25 | \item{post_comp}{TRUE/FALSE are data from benchmark or comparison with non-targeted} 26 | 27 | \item{rm_NF_legend}{for shiny functionality} 28 | } 29 | \value{ 30 | plotly object 31 | } 32 | \description{ 33 | plot_bench_histo 34 | } 35 | -------------------------------------------------------------------------------- /man/plot_bench_overview.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_bench_overview.R 3 | \name{plot_bench_overview} 4 | \alias{plot_bench_overview} 5 | \title{plot_bench_overview} 6 | \usage{ 7 | plot_bench_overview(benchmark_data, x, y, colb, choice_vector_bench) 8 | } 9 | \arguments{ 10 | \item{benchmark_data}{output from \code{\link{find_bench_peaks}}} 11 | 12 | \item{x}{variable (column name) to be plotted on x axis} 13 | 14 | \item{y}{variable (column name) to be plotted on y axis} 15 | 16 | \item{colb}{variable (column name) to color by} 17 | 18 | \item{choice_vector_bench}{named vector including all variables used as elements} 19 | } 20 | \value{ 21 | plotly object 22 | } 23 | \description{ 24 | plot_bench_overview 25 | } 26 | -------------------------------------------------------------------------------- /man/plot_bench_peak_overview.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_bench_peak_overview.R 3 | \name{plot_bench_peak_overview} 4 | \alias{plot_bench_peak_overview} 5 | \title{plot_bench_peak_overview} 6 | \usage{ 7 | plot_bench_peak_overview(benchmark_data, molecule, adduct, ia) 8 | } 9 | \arguments{ 10 | \item{benchmark_data}{output from \code{\link{find_bench_peaks}}} 11 | 12 | \item{molecule}{molecule} 13 | 14 | \item{adduct}{adduct} 15 | 16 | \item{ia}{isotopic abundance rounded to 2 digits} 17 | } 18 | \value{ 19 | plotly object 20 | } 21 | \description{ 22 | plot_bench_peak_overview 23 | } 24 | -------------------------------------------------------------------------------- /man/plot_comp_dist_of_found_peaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_comp_dist_of_found_peaks.R 3 | \name{plot_comp_dist_of_found_peaks} 4 | \alias{plot_comp_dist_of_found_peaks} 5 | \title{plot_comp_dist_of_found_peaks} 6 | \usage{ 7 | plot_comp_dist_of_found_peaks( 8 | comparison_data, 9 | var, 10 | choice_vector_comp, 11 | post_alignment = FALSE, 12 | limits = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{comparison_data}{output from \code{\link{compare_peaks}}} 17 | 18 | \item{var}{variable (column name) to be plotted} 19 | 20 | \item{choice_vector_comp}{named vector containing variable to be plotted as element} 21 | 22 | \item{post_alignment}{TRUE/FALSE should data be plotted from before or after alignment.} 23 | 24 | \item{limits}{(optional) numeric vector of length 2, allowing to filter values of column var} 25 | } 26 | \value{ 27 | plotly object 28 | } 29 | \description{ 30 | plot_comp_dist_of_found_peaks 31 | } 32 | -------------------------------------------------------------------------------- /man/plot_comp_iso_pred_error.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_comp_iso_pred_error.R 3 | \name{plot_comp_iso_pred_error} 4 | \alias{plot_comp_iso_pred_error} 5 | \title{plot_comp_iso_pred_error} 6 | \usage{ 7 | plot_comp_iso_pred_error( 8 | comparison_data, 9 | post_alignment = FALSE, 10 | BMvsPPvsAl = TRUE 11 | ) 12 | } 13 | \arguments{ 14 | \item{comparison_data}{output from \code{\link{compare_peaks}}} 15 | 16 | \item{post_alignment}{TRUE/FALSE should NT data from before or after alignment be plotted} 17 | 18 | \item{BMvsPPvsAl}{TRUE/FALSE should argument post_alignment be ignored in order to plot both in one plot} 19 | } 20 | \value{ 21 | plotly object 22 | } 23 | \description{ 24 | plot_comp_iso_pred_error 25 | } 26 | -------------------------------------------------------------------------------- /man/plot_comp_missing_value_hm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_comp_missing_value_hm.R 3 | \name{plot_comp_missing_value_hm} 4 | \alias{plot_comp_missing_value_hm} 5 | \title{plot_comp_missing_value_hm} 6 | \usage{ 7 | plot_comp_missing_value_hm( 8 | comparison_data, 9 | post_alignment = FALSE, 10 | disable_plot = FALSE 11 | ) 12 | } 13 | \arguments{ 14 | \item{comparison_data}{output from \code{\link{compare_peaks}}} 15 | 16 | \item{post_alignment}{TRUE/FALSE should NT data from before or after alignment be plotted} 17 | 18 | \item{disable_plot}{if TRUE plot is not generated (for shiny app due to long loading times)} 19 | } 20 | \value{ 21 | plotly object 22 | } 23 | \description{ 24 | plot_comp_missing_value_hm 25 | } 26 | -------------------------------------------------------------------------------- /man/plot_comp_peak_overview.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_comp_peak_overview.R 3 | \name{plot_comp_peak_overview} 4 | \alias{plot_comp_peak_overview} 5 | \title{plot_comp_peak_overview} 6 | \usage{ 7 | plot_comp_peak_overview(comparison_data, mol_c, add_c, ia_c) 8 | } 9 | \arguments{ 10 | \item{comparison_data}{output from \code{\link{compare_peaks}}} 11 | 12 | \item{mol_c}{molecule} 13 | 14 | \item{add_c}{adduct} 15 | 16 | \item{ia_c}{isotopic abundance rounded to 2 digits} 17 | } 18 | \value{ 19 | plotly object 20 | } 21 | \description{ 22 | plot_comp_peak_overview 23 | } 24 | -------------------------------------------------------------------------------- /man/plot_comp_scatter_plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_comp_scatter_plot.R 3 | \name{plot_comp_scatter_plot} 4 | \alias{plot_comp_scatter_plot} 5 | \title{plot_comp_scatter_plot} 6 | \usage{ 7 | plot_comp_scatter_plot( 8 | comparison_data, 9 | x, 10 | y, 11 | col, 12 | choice_vector_comp, 13 | post_alignment = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{comparison_data}{output from \code{\link{compare_peaks}}} 18 | 19 | \item{x}{variable (column name) to be plotted on x axis} 20 | 21 | \item{y}{variable (column name) to be plotted on y axis} 22 | 23 | \item{col}{variable (column name) to be colored by} 24 | 25 | \item{choice_vector_comp}{named vector including all variables used as elements} 26 | 27 | \item{post_alignment}{TRUE/FALSE should NT data from before or after alignment be plotted} 28 | } 29 | \value{ 30 | plotly object 31 | } 32 | \description{ 33 | plot_comp_scatter_plot 34 | } 35 | -------------------------------------------------------------------------------- /man/plot_sunburst_alignment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sunburst_plots.R 3 | \name{plot_sunburst_alignment} 4 | \alias{plot_sunburst_alignment} 5 | \title{plot_sunburst_alignment} 6 | \usage{ 7 | plot_sunburst_alignment(result_txt) 8 | } 9 | \arguments{ 10 | \item{result_txt}{output from \code{\link{derive_performance_metrics}}} 11 | } 12 | \value{ 13 | plotly object 14 | } 15 | \description{ 16 | Generates a sunburst plot visualizing non-targeted data pre-processing alignment errors. 17 | From inside to outside the donuts correspond to peaks found during peak detection, aligned/lost peaks, correct/incorrect alignments and error type. For 18 | more information please check the mzRAPP readme. 19 | } 20 | -------------------------------------------------------------------------------- /man/plot_sunburst_peakQuality.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sunburst_plots.R 3 | \name{plot_sunburst_peakQuality} 4 | \alias{plot_sunburst_peakQuality} 5 | \title{plot_sunburst_peakQuality} 6 | \usage{ 7 | plot_sunburst_peakQuality(result_txt, comparison_object) 8 | } 9 | \arguments{ 10 | \item{result_txt}{output from \code{\link{derive_performance_metrics}}} 11 | 12 | \item{comparison_object}{output from \code{\link{compare_peaks}}} 13 | } 14 | \value{ 15 | plotly object 16 | } 17 | \description{ 18 | Generates a sunburst plot visualizing the proportions of well recovered isotopologue ratios in order to assess non-targeted data pre-processing. 19 | From inside to outside the donuts correspond to peaks found during peak detection and peaks found after alignment/feature processing. For 20 | more information please check the mzRAPP readme. 21 | } 22 | -------------------------------------------------------------------------------- /man/plot_sunburst_peaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sunburst_plots.R 3 | \name{plot_sunburst_peaks} 4 | \alias{plot_sunburst_peaks} 5 | \title{plot_sunburst_peaks} 6 | \usage{ 7 | plot_sunburst_peaks(result_txt, comparison_object) 8 | } 9 | \arguments{ 10 | \item{result_txt}{output from \code{\link{derive_performance_metrics}}} 11 | 12 | \item{comparison_object}{output from \code{\link{compare_peaks}}} 13 | } 14 | \value{ 15 | plotly object 16 | } 17 | \description{ 18 | Generates a sunburst plot visualizing the proportions of found/not found peaks in order to assess non-targeted data pre-processing. 19 | From inside to outside the donuts correspond to peaks found during peak detection and peaks found after alignment/feature processing. For 20 | more information please check the mzRAPP readme. 21 | } 22 | -------------------------------------------------------------------------------- /man/plotly_click_wo_warnings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Helper_functions.R 3 | \name{plotly_click_wo_warnings} 4 | \alias{plotly_click_wo_warnings} 5 | \title{plotly_click_wo_warnings} 6 | \usage{ 7 | plotly_click_wo_warnings(sc) 8 | } 9 | \arguments{ 10 | \item{sc}{sc} 11 | } 12 | \value{ 13 | dataframe; plotly event 14 | } 15 | \description{ 16 | plotly_click_wo_warnings 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/predict_Iso.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict_Iso.R 3 | \name{predict_Iso} 4 | \alias{predict_Iso} 5 | \title{predict_Iso} 6 | \usage{ 7 | predict_Iso( 8 | DTT, 9 | SampleIdentifier_col, 10 | Molecule_Adduct_col, 11 | isoab_col, 12 | flag_extremes = FALSE, 13 | max_bias_area = 35, 14 | max_bias_height = 30, 15 | area_height_bias_diff = 30 16 | ) 17 | } 18 | \arguments{ 19 | \item{DTT}{output of \code{\link{find_bench_peaks}}} 20 | 21 | \item{SampleIdentifier_col}{name of column(s) with file names} 22 | 23 | \item{Molecule_Adduct_col}{name of column(s) with molecule and adduct identifiers} 24 | 25 | \item{isoab_col}{name of column with isotopic abundance information} 26 | 27 | \item{flag_extremes}{whether outliers should be flagged (more than 30\% of in area or more than 30\% of in area but with area and height being in agreement within 30\%)} 28 | } 29 | \description{ 30 | Takes the output of \code{\link{find_bench_peaks}} predicts peak areas as well as peak heights for lower abundant isotopologues 31 | from the most abundant isotopologue 32 | } 33 | \keyword{internal} 34 | -------------------------------------------------------------------------------- /man/reIndexFeatures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Helper_functions.R 3 | \name{reIndexFeatures} 4 | \alias{reIndexFeatures} 5 | \title{reIndexFeatures} 6 | \usage{ 7 | reIndexFeatures(vct) 8 | } 9 | \arguments{ 10 | \item{vct}{vct} 11 | } 12 | \value{ 13 | list 14 | } 15 | \description{ 16 | reIndexFeatures 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/remove_identical_peaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_functions.R 3 | \name{remove_identical_peaks} 4 | \alias{remove_identical_peaks} 5 | \title{remove_identical_peaks} 6 | \usage{ 7 | remove_identical_peaks(dt, grouped = FALSE) 8 | } 9 | \arguments{ 10 | \item{dt}{dt} 11 | 12 | \item{incl_height}{height} 13 | } 14 | \value{ 15 | dpl 16 | } 17 | \description{ 18 | remove_identical_peaks 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/rename_columns_from_options.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comp_functions.R 3 | \name{rename_columns_from_options} 4 | \alias{rename_columns_from_options} 5 | \title{rename_columns_from_options} 6 | \usage{ 7 | rename_columns_from_options(dt, options_dt, old_columns, new_columns) 8 | } 9 | \arguments{ 10 | \item{dt}{dt} 11 | 12 | \item{new_columns}{new_columns} 13 | 14 | \item{options_table}{options_table} 15 | 16 | \item{old_column}{old_column} 17 | } 18 | \description{ 19 | Renames the columns of dt by replacing all names defined in vector old_columns by vector new_columns 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/round_woe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Helper_functions.R 3 | \name{round_woe} 4 | \alias{round_woe} 5 | \title{round_woe} 6 | \usage{ 7 | round_woe(x, stellen) 8 | } 9 | \arguments{ 10 | \item{x}{x} 11 | 12 | \item{stellen}{stellen} 13 | } 14 | \description{ 15 | round_woe 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/top_to_x.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Helper_functions.R 3 | \name{top_to_x} 4 | \alias{top_to_x} 5 | \title{top_to_x} 6 | \usage{ 7 | top_to_x(number, x = 0) 8 | } 9 | \arguments{ 10 | \item{number}{numeric(1)} 11 | 12 | \item{x}{roof to this number} 13 | } 14 | \value{ 15 | numeric 16 | } 17 | \description{ 18 | top_to_x 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /mzRAPP.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: collate 22 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | --------------------------------------------------------------------------------