├── tests └── .Rapp.history ├── R ├── simplerspec.R ├── utils-pipe.R ├── data.R ├── clustering-outl-spc.R ├── select-spc.R ├── tbl-lcol-map-funprog.R ├── predict-spc.R ├── join-chem-spectra.R ├── average-spc.R ├── utils-unsupervised.R ├── select-ref-spectra.R ├── slice-spc.R ├── plot_spc.R ├── remove-outl-spectra.R ├── utils-stats.R ├── resample-spc.R ├── read-asd.R ├── preprocess-spc.R ├── pls-vip.R ├── gather-spc.R └── plot-spc-extended.R ├── man ├── .gitignore ├── figures │ └── simplerspec-logo.png ├── pipe.Rd ├── join_chem_spec.Rd ├── join_spc_chem.Rd ├── split_df2l.Rd ├── soilspec_yamsys.Rd ├── extract_lcols2dts.Rd ├── create_vip_rects.Rd ├── predict_from_spc.Rd ├── extract_pls_vip.Rd ├── select_spc_vars.Rd ├── select_ref_spc.Rd ├── evaluate_model.Rd ├── read_asd.Rd ├── bind_lcols_dts.Rd ├── assess_multimodels.Rd ├── read_asd_bin.Rd ├── read_opus_bin_univ.Rd ├── remove_outliers.Rd ├── slice_xvalues.Rd ├── average_spc.Rd ├── merge_dts.Rd ├── preprocess_spc.Rd ├── read_opus_univ.Rd ├── merge_dts_l.Rd ├── plot_pls_vip.Rd ├── plot_spc.Rd ├── fit_rf.Rd ├── plot_spc_ext.Rd ├── resample_spc.Rd ├── fit_pls.Rd └── gather_spc.Rd ├── .gitignore ├── _pkgdown.yml ├── data └── soilspec_yamsys.rda ├── .Rbuildignore ├── tic.R ├── simplerspec.Rproj ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── .github └── workflows │ └── tic.yml └── README.md /tests/.Rapp.history: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R/simplerspec.R: -------------------------------------------------------------------------------- 1 | #' @import rlang 2 | #' @import dplyr 3 | -------------------------------------------------------------------------------- /man/.gitignore: -------------------------------------------------------------------------------- 1 | .unison.tune_model_loocv_q.Rd.f89c7b744be0454bad4399a2fe4b87d9.unison.tmp 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | .DS_Store 6 | ._* 7 | docs/ 8 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://philipp-baumann.github.io/simplerspec 2 | 3 | development: 4 | mode: auto 5 | -------------------------------------------------------------------------------- /data/soilspec_yamsys.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philipp-baumann/simplerspec/HEAD/data/soilspec_yamsys.rda -------------------------------------------------------------------------------- /man/figures/simplerspec-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philipp-baumann/simplerspec/HEAD/man/figures/simplerspec-logo.png -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.travis\.yml$ 4 | LICENSE\.md 5 | ^appveyor\.yml$ 6 | ^tic\.R$ 7 | ^\.ccache$ 8 | ^\.github$ 9 | _pkgdown.yml 10 | -------------------------------------------------------------------------------- /R/utils-pipe.R: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @export 9 | #' @importFrom magrittr %>% 10 | #' @usage lhs \%>\% rhs 11 | NULL 12 | -------------------------------------------------------------------------------- /tic.R: -------------------------------------------------------------------------------- 1 | # installs dependencies, runs R CMD check, runs covr::codecov() 2 | do_package_checks() 3 | 4 | if (ci_on_ghactions() && ci_has_env("BUILD_PKGDOWN")) { 5 | # creates pkgdown site and pushes to gh-pages branch 6 | # only for the runner with the "BUILD_PKGDOWN" env var set 7 | do_pkgdown() 8 | } 9 | -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-pipe.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \description{ 10 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' Soil spectra and laboratory reference data from Baumann et al. (2021) 2 | #' 3 | #' Data from "Estimation of soil properties with mid-infrared soil spectroscopy 4 | #' across yam production landscapes in West Africa". 5 | #' 6 | #' @format ## `soilspec_yamsys` 7 | #' A tibble data frame with 284 rows and 40 columns. The spectra are in the 8 | #' `spc` list-column. 9 | #' @source 10 | "soilspec_yamsys" 11 | -------------------------------------------------------------------------------- /simplerspec.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace,vignette 22 | -------------------------------------------------------------------------------- /man/join_chem_spec.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/join-chem-spectra.R 3 | \name{join_chem_spec} 4 | \alias{join_chem_spec} 5 | \title{Join chemical and spectral data frames} 6 | \usage{ 7 | join_chem_spec(dat_chem, dat_spec, by = "sample_ID") 8 | } 9 | \arguments{ 10 | \item{dat_chem}{data.frame that contains chemical values of 11 | the sample} 12 | 13 | \item{dat_spec}{List that contains spectral data} 14 | 15 | \item{by}{character of column name that defines sample_ID} 16 | } 17 | \value{ 18 | List: xxx 19 | } 20 | \description{ 21 | Combines spectral data (data.frame) and chemical 22 | data (data.frame). 23 | } 24 | -------------------------------------------------------------------------------- /man/join_spc_chem.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/join-chem-spectra.R 3 | \name{join_spc_chem} 4 | \alias{join_spc_chem} 5 | \title{Join spectra data and chemical data tibbles} 6 | \usage{ 7 | join_spc_chem(spc_tbl, chem_tbl, by = "sample_id") 8 | } 9 | \arguments{ 10 | \item{spc_tbl}{Tibble that contains spectral data} 11 | 12 | \item{chem_tbl}{Tibble that contains chemical reference values of 13 | the samples} 14 | 15 | \item{by}{character of column name that defines sample_ID} 16 | } 17 | \value{ 18 | Tibble joined by sample_id 19 | } 20 | \description{ 21 | Combines spectral data (tibble class) and chemical 22 | data (tibble class). 23 | } 24 | -------------------------------------------------------------------------------- /man/split_df2l.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tbl-lcol-map-funprog.R 3 | \name{split_df2l} 4 | \alias{split_df2l} 5 | \title{Split a tibble data frame into a list of tibbles by a group column} 6 | \usage{ 7 | split_df2l(tbl_df, group) 8 | } 9 | \arguments{ 10 | \item{tbl_df}{Tibble data frame} 11 | 12 | \item{group}{Character vector with name of column based on which tibble 13 | is split into a list of tibbles} 14 | } 15 | \value{ 16 | List of tibbles. Each tibble contains data split by 17 | a group column within \code{tbl_df}. 18 | } 19 | \description{ 20 | Helper function that calls \code{split} on a tibble using a 21 | grouping column within tibble. 22 | } 23 | -------------------------------------------------------------------------------- /man/soilspec_yamsys.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{soilspec_yamsys} 5 | \alias{soilspec_yamsys} 6 | \title{Soil spectra and laboratory reference data from Baumann et al. (2021)} 7 | \format{ 8 | \subsection{\code{soilspec_yamsys}}{ 9 | 10 | A tibble data frame with 284 rows and 40 columns. The spectra are in the 11 | \code{spc} list-column. 12 | } 13 | } 14 | \source{ 15 | \url{https://soil.copernicus.org/articles/7/717/2021/} 16 | } 17 | \usage{ 18 | soilspec_yamsys 19 | } 20 | \description{ 21 | Data from "Estimation of soil properties with mid-infrared soil spectroscopy 22 | across yam production landscapes in West Africa". 23 | } 24 | \keyword{datasets} 25 | -------------------------------------------------------------------------------- /man/extract_lcols2dts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tbl-lcol-map-funprog.R 3 | \name{extract_lcols2dts} 4 | \alias{extract_lcols2dts} 5 | \title{Extract multiple tibble list-columns and return data as list of 6 | data.tables} 7 | \usage{ 8 | extract_lcols2dts(spc_tbl, lcols) 9 | } 10 | \arguments{ 11 | \item{spc_tbl}{Spectral tibble (data frame) with spectral data contained 12 | in list-columns} 13 | 14 | \item{lcols}{Character vector containing names of list-columns to be 15 | extracted into a list of data.tables} 16 | } 17 | \value{ 18 | List of data.tables. Each element is a data.table derivied from a 19 | list-column specified in the \code{lcols} argument. 20 | } 21 | \description{ 22 | Extract multiple tibble list columns, row bind them separately 23 | into single data tables and return a list of data.tables. 24 | } 25 | -------------------------------------------------------------------------------- /man/create_vip_rects.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pls-vip.R 3 | \name{create_vip_rects} 4 | \alias{create_vip_rects} 5 | \title{Create a data frame containing start and end positions (wavenumbers) 6 | where variable importance in projection (VIP) is > 1} 7 | \usage{ 8 | create_vip_rects(df_vip) 9 | } 10 | \arguments{ 11 | \item{df_vip}{Data frame containing \code{wavenumber} and \code{vip} columns 12 | (numeric)} 13 | } 14 | \value{ 15 | Data.frame containing vectors \code{start} (numeric; wavenumber), 16 | \code{end} (numeric; wavenumber) and group (integer; values are 17 | \code{1:length(start))}. 18 | } 19 | \description{ 20 | Given a data frame with VIP outputs (wavenumber and vip 21 | columns), start and end values denoting spectral regions where VIP > 1 22 | are returned as data frame. The functions can be used as helper 23 | function for plotting VIP. 24 | } 25 | -------------------------------------------------------------------------------- /man/predict_from_spc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict-spc.R 3 | \name{predict_from_spc} 4 | \alias{predict_from_spc} 5 | \title{Predict soil properties of new spectra based on a list of calibration models} 6 | \usage{ 7 | predict_from_spc(model_list, spc_tbl, slice = TRUE) 8 | } 9 | \arguments{ 10 | \item{model_list}{List of model output generated from calibration step 11 | (\code{pls_ken_stone()}} 12 | 13 | \item{spc_tbl}{Tibble of spectra after preprocessing 14 | (\code{preprocess_spc()})} 15 | 16 | \item{slice}{Logical expression wheather only one row per sample_id returned.} 17 | } 18 | \value{ 19 | tibble with new columns \code{model}, and predicted values with 20 | column names of model list. 21 | } 22 | \description{ 23 | Append predictions for a set of responses specified by a list 24 | of calibration models and a tibble containing preprocessed spectra as 25 | list-columns. 26 | } 27 | -------------------------------------------------------------------------------- /man/extract_pls_vip.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pls-vip.R 3 | \name{extract_pls_vip} 4 | \alias{extract_pls_vip} 5 | \title{Extract VIPs (variable importance in the projection) for a PLS 6 | regression model output returned from model fitting with 7 | \code{simplerspec::fit_pls()}} 8 | \usage{ 9 | extract_pls_vip(mout) 10 | } 11 | \arguments{ 12 | \item{mout}{Model output list returned from \code{simplerspec::fit_pls()}.} 13 | } 14 | \value{ 15 | A tibble data frame with columns \code{wavenumber} and correponding 16 | VIP values in the column \code{vip} for the finally chosen PLS regression 17 | model at the final number of PLS components. 18 | } 19 | \description{ 20 | VIPs are extracted based on the \code{finalModel} sublist 21 | in the \code{caret::train} output contained in the \code{model} element 22 | of the \code{simplerspec::fit_pls()} model output list. The VIPs for 23 | derived number of PLS components in the \code{finalModel} are computed. 24 | } 25 | -------------------------------------------------------------------------------- /man/select_spc_vars.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/select-spc.R 3 | \name{select_spc_vars} 4 | \alias{select_spc_vars} 5 | \title{Select every n-th spectral variable for all spectra and x-values in spectral 6 | tibble (\code{spc_tbl})} 7 | \usage{ 8 | select_spc_vars( 9 | spc_tbl, 10 | lcol_spc = "spc_pre", 11 | lcol_xvalues = "xvalues_pre", 12 | every = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{spc_tbl}{Tibble data.frame containing spectra in list-column} 17 | 18 | \item{lcol_spc}{List-column containing spectra, specified with column 19 | name as symbols or 1L character vector.} 20 | 21 | \item{lcol_xvalues}{List-column containing x-values, specified with 22 | column name as symbols or 1L character vector.} 23 | 24 | \item{every}{Every n-th spectral positions to keep as 1L integer vector.} 25 | } 26 | \value{ 27 | a spectral tibble 28 | } 29 | \description{ 30 | Select every n-th spectral variable for all spectra and x-values in spectral 31 | tibble (\code{spc_tbl}) 32 | } 33 | -------------------------------------------------------------------------------- /man/select_ref_spc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/select-ref-spectra.R 3 | \name{select_ref_spc} 4 | \alias{select_ref_spc} 5 | \title{Select a set of reference spectra to be measured by reference analysis 6 | methods} 7 | \usage{ 8 | select_ref_spc(spc_tbl, ratio_ref, pc, print = TRUE) 9 | } 10 | \arguments{ 11 | \item{spc_tbl}{Spectra as tibble objects that contain preprocessed spectra} 12 | 13 | \item{ratio_ref}{Ratio of desired reference samples to total sample number} 14 | 15 | \item{pc}{Number of principal components (numeric). If pc < 1, the number 16 | of principal components kept corresponds to the number of components 17 | explaining at least (pc * 100) percent of the total variance.} 18 | 19 | \item{print}{logical expression whether a plot (ggplot2) of sample selection 20 | for reference analysis is shown in PCA space 21 | (\code{TRUE} or \code{FALSE}).} 22 | } 23 | \description{ 24 | Select a set of calibration spectra to develop spectral models. 25 | Samples in this list will be analyzed using laboratory reference methods. 26 | } 27 | -------------------------------------------------------------------------------- /R/clustering-outl-spc.R: -------------------------------------------------------------------------------- 1 | ## Robust multivariate outlier detection method based on semi-robust principal 2 | ## components ================================================================== 3 | 4 | # Apply pcout for a nested list of matrices at a certain depth 5 | pcout_depth <- function(x, depth = 2) { 6 | purrr::modify_depth(.x = x, .depth = depth, 7 | .f = ~ mvoutlier::pcout(x = ., makeplot = FALSE)) 8 | } 9 | 10 | # Extracts mvoutlier::pcout $wscat elements in nested list (default at depth 2) 11 | wscat_depth <- function(x, depth = 2) { 12 | purrr::modify_depth(.x = x, .depth = 2, .f = "wscat") 13 | } 14 | 15 | # Returns nested list containing logicals from test which scattering weights 16 | # are zero 17 | which_wscat0_depth <- function(x) { 18 | purrr::modify_depth(wscat_depth(x), 2, ~ . == 0) 19 | } 20 | 21 | # Extract $wfinal01 elements 22 | wfinal01_depth <- function(x) { 23 | purrr::modify_depth(x, 2, "wfinal01") 24 | } 25 | 26 | # Returns nested list containing logicals from test which final 0/1 weights 27 | # are zero 28 | which_wfinal0_depth <- function(x) { 29 | purrr::modify_depth(wfinal01_depth(x), 2, ~ . == 0) 30 | } 31 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Type: Package 2 | Package: simplerspec 3 | Version: 0.2.1 4 | Date: 2023-09-27 5 | Title: Soil and plant spectroscopic model building and prediction 6 | Description: Functions that cover 7 | reading of spectral data, outlier removal, 8 | spectral preprocessing, calibration sampling, PLS regression 9 | using caret, and model diagnostic statistics and plots. 10 | Authors@R: person("Philipp", "Baumann", 11 | email = "baumann-philipp@protonmail.com", role = c("aut", "cre")) 12 | URL: https://github.com/philipp-baumann/simplerspec 13 | BugReports: https://github.com/philipp-baumann/simplerspec 14 | Depends: R (>= 3.0), 15 | foreach 16 | Imports: 17 | caret, 18 | cowplot, 19 | data.table, 20 | dplyr (>= 0.7.0), 21 | e1071, 22 | ggplot2 (>= 2.0.0), 23 | hexView, 24 | magrittr, 25 | mvoutlier, 26 | modelr, 27 | pls, 28 | plyr, 29 | prospectr, 30 | purrr, 31 | reshape2, 32 | rlang (>= 0.2.0), 33 | stringr, 34 | tibble, 35 | tidyselect, 36 | tidyr, 37 | utils, 38 | glue, 39 | broom 40 | License: GPL-2 41 | LazyData: true 42 | RoxygenNote: 7.2.3 43 | Roxygen: list(markdown = TRUE) 44 | Encoding: UTF-8 45 | -------------------------------------------------------------------------------- /man/evaluate_model.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-stats.R 3 | \name{evaluate_model} 4 | \alias{evaluate_model} 5 | \alias{summary_df} 6 | \title{Calculate model evaluation metrics} 7 | \usage{ 8 | evaluate_model(data, obs, pred) 9 | 10 | summary_df(df, x, y) 11 | } 12 | \arguments{ 13 | \item{data}{\code{data.frame} with predicted and observed data in columns.} 14 | 15 | \item{obs}{Column that contains observed values, \code{symbol}/\code{name} or 16 | \code{character} (wrapped in "").} 17 | 18 | \item{pred}{Column that contains predicted values, \code{symbol}/\code{name} or 19 | \code{character} (wrapped in "").} 20 | 21 | \item{df}{\code{data.frame} with predicted and observed data in columns.} 22 | 23 | \item{x}{Column that contains observed values, \code{symbol}/\code{name} or 24 | \code{character} (wrapped in "").} 25 | 26 | \item{y}{Column that contains predicted values, \code{symbol}/\code{name} or 27 | \code{character} (wrapped in "").} 28 | } 29 | \description{ 30 | Calculate summary statistics of observed values and model 31 | evaluation statistics for assessing agreement between observed (\code{obs}) and 32 | predicted (\code{pred}) values. 33 | } 34 | -------------------------------------------------------------------------------- /man/read_asd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read-asd.R 3 | \name{read_asd} 4 | \alias{read_asd} 5 | \title{Read ASD fieldspec spectrometer data export into into simplerspec 6 | spectra tibble.} 7 | \usage{ 8 | read_asd(file) 9 | } 10 | \arguments{ 11 | \item{file}{Tab delmited file from ASD software export where the first 12 | column called \code{Wavelength} contais wavelengths in nanometer and the 13 | remaining columns are sample spectra referred by an ID name provided in the 14 | first row of these columns.} 15 | } 16 | \value{ 17 | Spectra data in tibble data frame (class \code{tbl_df}) that contains 18 | columns \code{sample_id} (derived from 2nd and following column names of 19 | tab delimited ASD exported text file), 20 | \code{spc} (list-column of spectral matrices) 21 | and \code{wavelengths} (list-column containing wavelength vectors). 22 | } 23 | \description{ 24 | Read tab delimited text (.txt) files exported from ASD field 25 | spectrometer into simplerspec spectra tibble. 26 | ASD Fieldspec data files are expected in .txt tab delimited file format. 27 | The first row should contain 28 | the name 'Wavelength' for the first column and the file names for the 29 | remaining columns. 30 | } 31 | -------------------------------------------------------------------------------- /man/bind_lcols_dts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot-spc-extended.R 3 | \name{bind_lcols_dts} 4 | \alias{bind_lcols_dts} 5 | \title{Bind list-columns within a tibble into a list of data.tables} 6 | \usage{ 7 | bind_lcols_dts(spc_tbl, lcols, spc_id = "unique_id", group_id = "sample_id") 8 | } 9 | \arguments{ 10 | \item{spc_tbl}{Spectral data in a tibble data frame (classes "tibble_df", 11 | "tbl" and "data.frame").} 12 | 13 | \item{lcols}{Character vector of column names of list-columns to be bound 14 | into a list of data.tables} 15 | 16 | \item{spc_id}{Character vector denoting column name for a unique spectrum ID. 17 | Default is \code{"unique_id"}.} 18 | 19 | \item{group_id}{Character vector denoting column name for the spectrum group 20 | ID. Default is \code{"sample_id"}. The group ID can later be used for 21 | plotting spectra by group (e.g. by using different colors or panels).} 22 | } 23 | \value{ 24 | A list of data.tables. Elements contain data from list-columns 25 | specified in \code{lcols} argument as data.tables. All data.tables contain in 26 | addition \code{spc_id} and \code{group_id} columns. 27 | } 28 | \description{ 29 | Bind one to many list-columns in spectral tibble into a list 30 | of data.tables. 31 | } 32 | -------------------------------------------------------------------------------- /man/assess_multimodels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-stats.R 3 | \name{assess_multimodels} 4 | \alias{assess_multimodels} 5 | \title{Assess multiple pairs of measured and predicted values} 6 | \usage{ 7 | assess_multimodels( 8 | data, 9 | ..., 10 | .metrics = c("simplerspec", "yardstick"), 11 | .model_name = "model" 12 | ) 13 | } 14 | \arguments{ 15 | \item{data}{Data frame with all measured (observed) and predicted variables.} 16 | 17 | \item{...}{Multiple arguments with observed (measured)-predicted pairs, 18 | specified with \code{dplyr::vars(o = , p = )}. 19 | Column names can strings or symbols. The arguments in \code{...} need to be named.} 20 | 21 | \item{.metrics}{Character vector with package used for metrics calculation. 22 | Default is \code{"simplerspec"}, which uses 23 | \code{simplerspec::evaluate_model()}.} 24 | 25 | \item{.model_name}{String with name for the new column that specifies the 26 | model or the outcome variable. Default is \code{"model"}.} 27 | } 28 | \value{ 29 | Data frame with with summary statistics for measured values and 30 | performance metrics for the pairs of measured and predicted values. 31 | } 32 | \description{ 33 | Return performance metrics for test set predictions and 34 | measured values, e.g. for different model outcome variables. 35 | } 36 | -------------------------------------------------------------------------------- /man/read_asd_bin.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read-asd.R 3 | \name{read_asd_bin} 4 | \alias{read_asd_bin} 5 | \title{Read ASD binary files and gather spectra and metadata in tibble data 6 | frame.} 7 | \usage{ 8 | read_asd_bin(fnames) 9 | } 10 | \arguments{ 11 | \item{fnames}{Character vector containing full paths of ASD binary files 12 | to be read} 13 | } 14 | \value{ 15 | A spectral tibble (data frame) containing the follwing columns: 16 | \item{unique_id}{Character vector. Unique identifier containing file name 17 | pasted with date and time.} 18 | \item{file_id}{Character vector containing file names and exension} 19 | \item{sample_id}{Character vector containing files names without extension} 20 | \item{metadata}{List-column. List of data frames containing spectral 21 | metadata} 22 | \item{wavelengths}{List-column. List of wavelengths vectors (numeric).} 23 | \item{spc_radiance}{List-column. List of data.tables containing 24 | radiance sample spectra.} 25 | \item{spc_reference}{List-column. List of data.tables containing 26 | reference reflectance spectra.} 27 | \item{spc}{List-column. List of data.tables containing final reflectance 28 | spectra.} 29 | } 30 | \description{ 31 | Read multiple ASD binary files and gather spectra and metadata 32 | into a simplerspec spectral tibble (data frame). The resulting spectral 33 | tibble is compatible with the simplerspec spectra processing and modeling 34 | framework. 35 | } 36 | -------------------------------------------------------------------------------- /man/read_opus_bin_univ.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read-opus-universal.R 3 | \name{read_opus_bin_univ} 4 | \alias{read_opus_bin_univ} 5 | \title{Read a Bruker OPUS spectrum binary file} 6 | \usage{ 7 | read_opus_bin_univ(file_path, extract = c("spc"), 8 | print_progress = TRUE, atm_comp_minus4offset = FALSE) 9 | } 10 | \arguments{ 11 | \item{file_path}{Character vector with path to file} 12 | 13 | \item{extract}{Character vector of spectra types to extract from OPUS binary 14 | file. Default is \code{c("spc")}, which will extract the final spectra, e.g. 15 | expressed in absorbance (named \code{AB} in Bruker OPUS programs). Possible 16 | additional values for the character vector supplied to extract are 17 | \code{"ScSm"} (single channel spectrum of the sample measurement), \ 18 | code{"ScRf"} (single channel spectrum of the reference measurment), 19 | \code{"IgSm"} (interferogram of the sample measurment) and \code{"IgRf"} 20 | (interferogram of the reference measurement).} 21 | 22 | \item{print_progress}{Logical (default \code{TRUE}) whether a message is 23 | printed when an OPUS binary file is parsed into an R list entry.} 24 | 25 | \item{atm_comp_minus4offset}{Logical whether spectra after atmospheric 26 | compensation are read with an offset of \code{-4} bites from Bruker OPUS 27 | files. Default is \code{FALSE}.} 28 | } 29 | \description{ 30 | Read single binary file acquired with an 31 | Bruker Vertex FTIR Instrument 32 | } 33 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export("%>%") 4 | export(assess_multimodels) 5 | export(average_spc) 6 | export(bind_lcols_dts) 7 | export(create_vip_rects) 8 | export(evaluate_model) 9 | export(extract_lcols2dts) 10 | export(extract_pls_vip) 11 | export(fit_pls) 12 | export(fit_rf) 13 | export(gather_spc) 14 | export(join_chem_spec) 15 | export(join_spc_chem) 16 | export(merge_dts) 17 | export(merge_dts_l) 18 | export(plot_pls_vip) 19 | export(plot_spc) 20 | export(plot_spc_ext) 21 | export(pls_ken_stone) 22 | export(predict_from_spc) 23 | export(preprocess_spc) 24 | export(read_asd) 25 | export(read_asd_bin) 26 | export(read_opus_bin_univ) 27 | export(read_opus_univ) 28 | export(remove_outliers) 29 | export(resample_spc) 30 | export(select_ref_spc) 31 | export(select_spc_vars) 32 | export(slice_xvalues) 33 | export(split_df2l) 34 | export(summary_df) 35 | import(purrr) 36 | import(stats) 37 | importFrom(data.table,":=") 38 | importFrom(data.table,.SD) 39 | importFrom(data.table,data.table) 40 | importFrom(data.table,fread) 41 | importFrom(data.table,rbindlist) 42 | importFrom(data.table,setDT) 43 | importFrom(data.table,setkey) 44 | importFrom(dplyr,pull) 45 | importFrom(e1071,kurtosis) 46 | importFrom(foreach,"%do%") 47 | importFrom(foreach,"%dopar%") 48 | importFrom(magrittr,"%>%") 49 | importFrom(purrr,imap) 50 | importFrom(purrr,modify_depth) 51 | importFrom(rlang,ensym) 52 | importFrom(rlang,quo_name) 53 | importFrom(rlang,set_names) 54 | importFrom(stringr,str_replace) 55 | importFrom(tidyselect,one_of) 56 | -------------------------------------------------------------------------------- /man/remove_outliers.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/remove-outl-spectra.R 3 | \name{remove_outliers} 4 | \alias{remove_outliers} 5 | \title{Remove outlier spectra} 6 | \usage{ 7 | remove_outliers(list_spectra, remove = TRUE) 8 | } 9 | \arguments{ 10 | \item{list_spectra}{List that contains averaged 11 | spectral information 12 | in list element \code{MIR_mean} (data.frame) and metadata in 13 | \code{data_meta} (data.frame).} 14 | 15 | \item{remove}{logical expression (\code{TRUE} or \code{FALSE}) 16 | that specifies weather spectra shall be removed. 17 | If \code{rm = FALSE}, there will be no outlier removal} 18 | } 19 | \value{ 20 | Returns list \code{spectra_out} that contains: 21 | \itemize{ 22 | \item \code{MIR_mean}: Outlier removed MIR spectra as 23 | data.frame object. If \code{remove = FALSE}, 24 | the function will 25 | return almost identical list identical to \code{list_spectra}, 26 | except that the first \code{indices} column of the spectral 27 | data frame \code{MIR_mean} is removed 28 | (This is done for both options 29 | \code{remove = TRUE} and \code{remove = FALSE}). 30 | \item \code{data_meta}: metadata data.frame, identical 31 | as in the \code{list_spectra} input list. 32 | \item \code{plot_out}: (optional) ggplot2 graph 33 | that shows all spectra (absorbance on x-axis and wavenumber 34 | on y-axis) with outlier marked, if 35 | \code{remove = TRUE}. 36 | } 37 | } 38 | \description{ 39 | Remove outlier spectra based on the 40 | \code{pcout()} function of the \code{mvoutlier} package. 41 | } 42 | \details{ 43 | This is an optional function if one wants to remove 44 | outliers. 45 | } 46 | -------------------------------------------------------------------------------- /R/select-spc.R: -------------------------------------------------------------------------------- 1 | #' Select every n-th spectral variable for all spectra and x-values in spectral 2 | #' tibble (`spc_tbl`) 3 | #' 4 | #' @param spc_tbl Tibble data.frame containing spectra in list-column 5 | #' @param lcol_spc List-column containing spectra, specified with column 6 | #' name as symbols or 1L character vector. 7 | #' @param lcol_xvalues List-column containing x-values, specified with 8 | #' column name as symbols or 1L character vector. 9 | #' @param every Every n-th spectral positions to keep as 1L integer vector. 10 | #' @return a spectral tibble 11 | #' @export 12 | select_spc_vars <- function(spc_tbl, 13 | lcol_spc = "spc_pre", 14 | lcol_xvalues = "xvalues_pre", 15 | every = NULL) { 16 | lcol_spc <- rlang::enquo(lcol_spc) 17 | lcol_spc_nm <- rlang::quo_name(lcol_spc) 18 | lcol_xvalues <- rlang::enquo(lcol_xvalues) 19 | lcol_xvalues_nm <- rlang::quo_name(lcol_xvalues) 20 | 21 | stopifnot(tibble::is_tibble(spc_tbl)) 22 | 23 | if (is.null(every)) {return(spc_tbl); 24 | message("Returning `spc_tbl` and keep all variables.")} 25 | 26 | spc_lst <- dplyr::pull(spc_tbl, !!lcol_spc) 27 | spc <- data.table::rbindlist(spc_lst) 28 | 29 | pos_sel <- seq(1L, ncol(spc), every) 30 | 31 | spc_sel <- spc[, pos_sel, with = FALSE] 32 | 33 | spc_lst_out <- stats::setNames( 34 | map(purrr::transpose(spc_sel), data.table::as.data.table), 35 | names(spc_lst)) 36 | 37 | xvalues <- dplyr::pull(spc_tbl, !!lcol_xvalues) 38 | xvalues_sel <- map(xvalues, ~ .x[pos_sel]) 39 | 40 | dplyr::mutate(spc_tbl %>% dplyr::ungroup(), 41 | !!lcol_spc_nm := spc_lst_out, !!lcol_xvalues_nm := xvalues_sel) 42 | } 43 | -------------------------------------------------------------------------------- /man/slice_xvalues.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/slice-spc.R 3 | \name{slice_xvalues} 4 | \alias{slice_xvalues} 5 | \title{Slice spectra into defined x-axis ranges} 6 | \usage{ 7 | slice_xvalues( 8 | spc_tbl, 9 | xunit_lcol = "wavenumbers", 10 | spc_lcol = "spc", 11 | xvalues_cut = NULL 12 | ) 13 | } 14 | \arguments{ 15 | \item{spc_tbl}{Spectral data in a tibble object (classes "tibble_df", "tbl" 16 | and "data.frame"). The spectra tibble is expected to contain at least 17 | the column \code{spc} (list-column with spectral matrices stored in a list) 18 | and \code{wavenumbers} or \code{wavelengths} (list-column that contains list 19 | of x-axis values).} 20 | 21 | \item{xunit_lcol}{Character vector that specifies column name where x-axis 22 | axis units are stored within \code{spc_tbl}. Default is \code{"wavenumber"}.} 23 | 24 | \item{spc_lcol}{Character vector that specifies which column (list-column) 25 | contains spectra to be sliced. Default is \code{"spc"}.} 26 | 27 | \item{xvalues_cut}{List of numeric vectors that contains upper and lower bounds of respective regions to keep in spectra. The spectral regions outside 28 | the \code{xvalues_cut} intervals will be cut out in the output spectra.} 29 | } 30 | \value{ 31 | Spectral tibble (data frame with list-columns) with sliced x-axis 32 | column and spectral column. Both the x-axis list-column and the spectral 33 | tibble list-column only contain data specified within the \code{xvalues_cut} 34 | argument (list of numeric vectors). 35 | } 36 | \description{ 37 | Slice spectra contained in list-column of spectral tibble 38 | (data frame). A list of x-axis value ranges can be specified. Spectra are 39 | cut based on these ranges. 40 | } 41 | -------------------------------------------------------------------------------- /man/average_spc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/average-spc.R 3 | \name{average_spc} 4 | \alias{average_spc} 5 | \title{Average spectra in list-column by entries in grouping column} 6 | \usage{ 7 | average_spc(spc_tbl, by = "sample_id", column_in = "spc_rs") 8 | } 9 | \arguments{ 10 | \item{spc_tbl}{Tibble data frame containing at least the grouping column 11 | given in argument \code{by} and input spectra given in list-column \code{column_in}.} 12 | 13 | \item{by}{Character vector of length 1L or name/symbol that specifies the 14 | column by which groups of spectra are averaged. Default is \code{"sample_id"}.} 15 | 16 | \item{column_in}{Character vector of length 1L or or name/symbol that 17 | specifies the list-column that contains the inputs spectra to be averaged. 18 | Default is \code{"spc_rs"}, which are resampled spectra (i.e., resulting after 19 | preceding \code{resample_spc()} step).} 20 | } 21 | \value{ 22 | Spectra tibble data frame (class \code{"tbl_df"}, \code{"tbl"}, \code{"data.frame"}) 23 | with a new list-column of column name \code{"spc_mean"} at the last position, 24 | containing mean spectra with identical row replicates within the same 25 | \code{by}-group. 26 | } 27 | \description{ 28 | Average spectra in list-column of spectra tibble (\code{spc_tbl}) by 29 | groups given in group column. 30 | } 31 | \details{ 32 | For memory efficiency and subsequent modeling, consider slicing the 33 | extra row copies of \code{spc_mean} resulting from \code{average_spc()} for example by 34 | \itemize{ 35 | \item \verb{split(x = spc_tbl, f = spc_tbl$) \%>\% lapply(., function(x) x x[1, ]) \%>\% do.call(., rbind)} 36 | \item \verb{dplyr::group_by(spc_tbl, ) \%>\% dplyr::slice(1L)} 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /man/merge_dts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot-spc-extended.R 3 | \name{merge_dts} 4 | \alias{merge_dts} 5 | \title{Merge list-columns of spectra, x-axis values, metadata and additional 6 | measured variables into a single long form data.table} 7 | \usage{ 8 | merge_dts( 9 | spc_tbl, 10 | lcols_spc = c("spc", "spc_pre"), 11 | lcol_measure = NULL, 12 | spc_id = "unique_id", 13 | group_id = "sample_id" 14 | ) 15 | } 16 | \arguments{ 17 | \item{spc_tbl}{Tibble data frame containing spectra, x-axis values, metadata 18 | and eventual measured variables as list-columns.} 19 | 20 | \item{lcols_spc}{Character vector of spectral list-columns to be extracted. 21 | Default is \code{c("spc", "spc_pre")} (raw and preprocessed spectra).} 22 | 23 | \item{lcol_measure}{Character vector of length 1 denoting the column name 24 | of the measure columns. This argument is optional. Default is \code{NULL}, 25 | which does not extract an additional measure column.} 26 | 27 | \item{spc_id}{Character vector of column that contains a unique spectral 28 | identifier for all spectra. Default is \code{"unique_id"}.} 29 | 30 | \item{group_id}{Character vector of columns that is used assigning spectra 31 | into groups. Default is \code{"sample_id"}. The \code{group_id} can be 32 | used for later plotting and thereby visually separating spectral groups into 33 | using different colors or panels.} 34 | } 35 | \value{ 36 | A single data.table containing long form aggregated data of 37 | spectra, x-axis values, metadata and an additionally measured variable. 38 | } 39 | \description{ 40 | Helper function that merges all spectra and related data into 41 | a single long form data.table than can subsequently be used for plotting. 42 | } 43 | -------------------------------------------------------------------------------- /man/preprocess_spc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/preprocess-spc.R 3 | \name{preprocess_spc} 4 | \alias{preprocess_spc} 5 | \title{Preprocess spectra} 6 | \usage{ 7 | preprocess_spc(spc_tbl, select, column_in = "spc_mean", custom_function = NULL) 8 | } 9 | \arguments{ 10 | \item{spc_tbl}{Tibble that contains spectra to be preprocessed within 11 | a list-column.} 12 | 13 | \item{select}{Character vector of predefined preprocessing options to be 14 | applied to the spectra list-column specified in \code{column_in}. 15 | Common prefined values are stated as abbreviated preprocessing methods and 16 | options such as \code{"sg_1_w21"}, where \code{"sg"} stands for 17 | Savitzky-Golay and \code{1} for first derivative and \code{"w21"} 18 | for a window size of 21 points.} 19 | 20 | \item{column_in}{Character vector of single list-column in \code{spc_tbl} that 21 | contain list of spectra (1 row matrix) to be processed by function supplied 22 | in \code{select}.} 23 | 24 | \item{custom_function}{A character string of a custom processing function 25 | that is later parsed (produces expression in a list) and evaluated within 26 | the function \code{preprocess_spc}. 27 | The character vector argument of \code{custom_function} 28 | needs to contain \code{"spc_raw"}, which is the single data table of spectra 29 | that results from binding a list of data.tables (spectra to preprocess) 30 | from the spectra list-column specified in \code{column_in}. 31 | An example for a value is 32 | \code{"prospectr::savitzkyGolay(X = spc_raw, m = 0, p = 3, w = 9)"}. 33 | Optional argument. Default is \code{NULL}.} 34 | } 35 | \description{ 36 | Preprocesses spectra in tibble column by sample_id after 37 | averaging spectra by \code{simplerspec::average_spc()}. 38 | } 39 | -------------------------------------------------------------------------------- /man/read_opus_univ.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read-opus-universal.R 3 | \name{read_opus_univ} 4 | \alias{read_opus_univ} 5 | \title{Read a list of Bruker OPUS spectrum binary files.} 6 | \usage{ 7 | read_opus_univ(fnames, extract = c("spc"), parallel = FALSE, 8 | atm_comp_minus4offset = FALSE) 9 | } 10 | \arguments{ 11 | \item{fnames}{List of character vectors containing full path names of spectra} 12 | 13 | \item{extract}{Character vector of spectra types to extract from file. 14 | Possible values are: "spc" (AB block in Bruker Opus software), "spc_nocomp" 15 | (Spectra before final atmospheric compensation; only present if background 16 | correction has been set in Opus), "ScSm" (Single channel spectrum of the 17 | sample), "ScRf" (Single channel spectrum of the sample), "IgSm" (Interferogram 18 | of the sample), "IgRf" (Interferogram of the reference). Default is 19 | \code{extract = c("spc")}.} 20 | 21 | \item{parallel}{Logical (\code{TRUE} or \code{FALSE} indicating whether 22 | files are read in parallel (multiple processors or multiple cores)). 23 | Default is \code{parallel = FALSE}. If \code{TRUE} a parallel backend needs 24 | to be registered, e.g. by using the \code{doParallel} package.} 25 | 26 | \item{atm_comp_minus4offset}{Logical whether spectra after atmospheric 27 | compensation are read with an offset of \code{-4} bites from Bruker OPUS 28 | files. Default is \code{FALSE}.} 29 | } 30 | \value{ 31 | out List spectra and metadata (parameters) extracted from 32 | Bruker OPUS spectrometer files. List names are the names of the OPUS 33 | files whose spectral data were extracted. 34 | } 35 | \description{ 36 | Read multiple spectral files measured with a Bruker FTIR Instrument. Files 37 | containing spectra are in OPUS binary format. 38 | \code{read_opus_univ} is a wrapper for \code{read_opus_bin_univ()}) 39 | } 40 | -------------------------------------------------------------------------------- /man/merge_dts_l.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot-spc-extended.R 3 | \name{merge_dts_l} 4 | \alias{merge_dts_l} 5 | \title{Wrapper function around \code{merge_dts()} for list of tibbles to 6 | aggregate data for plotting.} 7 | \usage{ 8 | merge_dts_l( 9 | spc_tbl_l, 10 | lcols_spc = c("spc", "spc_pre"), 11 | lcol_measure = NULL, 12 | spc_id = "unique_id", 13 | group_id = "sample_id" 14 | ) 15 | } 16 | \arguments{ 17 | \item{spc_tbl_l}{List of spectral tibbles (data frames).} 18 | 19 | \item{lcols_spc}{Character vector of spectral list-columns to be extracted. 20 | Default is \code{c("spc", "spc_pre")} (raw and preprocessed spectra).} 21 | 22 | \item{lcol_measure}{Character vector of length 1 denoting the column name 23 | of the measure columns. This argument is optional. Default is \code{NULL}, 24 | which does not extract an additional measure column.} 25 | 26 | \item{spc_id}{Character vector of column that contains a unique spectral 27 | identifier for all spectra. Default is \code{"unique_id"}.} 28 | 29 | \item{group_id}{Character vector of columns that is used assigning spectra 30 | into groups. Default is \code{"sample_id"}. The \code{group_id} can be 31 | used for later plotting and thereby visually separating spectral groups into 32 | using different colors or panels.} 33 | } 34 | \value{ 35 | A single data.table containing long form aggregated data of 36 | spectra, x-axis values, metadata and an additionally measured variable. 37 | An additional column called \code{group_id_tbl} is appended. It denotes 38 | the name of the spectral tibble supplied with the list \code{spc_tbl_l}. 39 | } 40 | \description{ 41 | Instead of a single spectral tibble (data frame) multiple 42 | spectral tibbles can be merged into a long-form data.table for plotting 43 | spectra and related data. For details, see 44 | \code{\link{merge_dts}}. 45 | } 46 | -------------------------------------------------------------------------------- /R/tbl-lcol-map-funprog.R: -------------------------------------------------------------------------------- 1 | # Split a tibble data frame into a list of tibbles by a group column 2 | #' @title Split a tibble data frame into a list of tibbles by a group column 3 | #' @description Helper function that calls \code{split} on a tibble using a 4 | #' grouping column within tibble. 5 | #' @param tbl_df Tibble data frame 6 | #' @param group Character vector with name of column based on which tibble 7 | #' is split into a list of tibbles 8 | #' @return List of tibbles. Each tibble contains data split by 9 | #' a group column within \code{tbl_df}. 10 | #' @export 11 | split_df2l <- function(tbl_df, group) { 12 | split(tbl_df, tbl_df[, group]) 13 | } 14 | 15 | # Extract multiple tibble list columns, row bind them separately into 16 | # single data tables and return a list of data.tables 17 | #' @title Extract multiple tibble list-columns and return data as list of 18 | #' data.tables 19 | #' @description Extract multiple tibble list columns, row bind them separately 20 | #' into single data tables and return a list of data.tables. 21 | #' @param spc_tbl Spectral tibble (data frame) with spectral data contained 22 | #' in list-columns 23 | #' @param lcols Character vector containing names of list-columns to be 24 | #' extracted into a list of data.tables 25 | #' @return List of data.tables. Each element is a data.table derivied from a 26 | #' list-column specified in the \code{lcols} argument. 27 | #' @import purrr 28 | #' @export 29 | extract_lcols2dts <- function(spc_tbl, lcols) { 30 | # Below code is first part of simplerspec::bind_lcols_dts 31 | # todo: add warning for lcols not present in spc_tbl 32 | which_bind <- colnames(spc_tbl) %in% lcols 33 | lcols_to_bind <- colnames(spc_tbl)[which_bind] 34 | names(lcols_to_bind) <- lcols_to_bind 35 | dts <- map(lcols_to_bind, 36 | function(y) { 37 | if (is.list(spc_tbl[[y]])) { 38 | # todo: Test if number of columns is equal in each data.frame or matrix 39 | # of the list-(column); if not, return a comprehensible error 40 | data.table::data.table(do.call(rbind, spc_tbl[[y]])) 41 | } else if (is.atomic((spc_tbl[[y]]))) { 42 | data.table::data.table(spc_tbl[, y]) 43 | } 44 | } 45 | ) 46 | } 47 | -------------------------------------------------------------------------------- /R/predict-spc.R: -------------------------------------------------------------------------------- 1 | #' @title Predict soil properties of new spectra based on a list of calibration models 2 | #' @description Append predictions for a set of responses specified by a list 3 | #' of calibration models and a tibble containing preprocessed spectra as 4 | #' list-columns. 5 | #' @param model_list List of model output generated from calibration step 6 | #' (\code{pls_ken_stone()} 7 | #' @param spc_tbl Tibble of spectra after preprocessing 8 | #' (\code{preprocess_spc()}) 9 | #' @param slice Logical expression wheather only one row per sample_id returned. 10 | #' @usage predict_from_spc(model_list, spc_tbl, slice = TRUE) 11 | #' @return tibble with new columns \code{model}, and predicted values with 12 | #' column names of model list. 13 | #' @export 14 | predict_from_spc <- function(model_list, spc_tbl, slice = TRUE) { 15 | 16 | if (all(sapply(model_list, class) == "train")) { 17 | # If model_list is a list of elements of class "train", model_list 18 | # can be directly handed over to caret::extractPrediction 19 | models <- model_list 20 | } else { 21 | # Extract pls_model elements (outputs from caret) for a list of models 22 | models <- lapply(model_list, function(x) x[["model"]]) 23 | stopifnot(all(sapply(models, class) == "train")) 24 | } 25 | 26 | # Group by spectra tibble by sample_id and keep one row per sample_id 27 | if (slice == TRUE) { 28 | spc_tbl <- spc_tbl %>% dplyr::group_by(sample_id) %>% 29 | dplyr::slice(1L) %>% dplyr::ungroup() 30 | } 31 | 32 | # Collect preprocessed spectra in one data.table 33 | spc <- data.table::rbindlist(spc_tbl$spc_pre) 34 | pred_caret <- caret::extractPrediction( 35 | models, 36 | unkX = spc 37 | ) 38 | 39 | # Number of caret model objects used to predict 40 | n <- length(unique(pred_caret$object)) 41 | # Add sample_id from metadata of spectra to predicted values 42 | sample_id <- spc_tbl$sample_id 43 | # id column to long form data frame 44 | id <- rep(sample_id, n) 45 | pred_id <- cbind(pred_caret, sample_id = id) 46 | # Get data into wide form 47 | pred_wide <- tidyr::spread( 48 | data = pred_id, key = "object", value = "pred" 49 | ) 50 | 51 | # Join predictions with tibble 52 | dplyr::inner_join(spc_tbl, pred_wide, by = "sample_id") 53 | } 54 | -------------------------------------------------------------------------------- /R/join-chem-spectra.R: -------------------------------------------------------------------------------- 1 | ## Join chemical and spectral data ============================== 2 | #' @title Join chemical and spectral data frames 3 | #' @description Combines spectral data (data.frame) and chemical 4 | #' data (data.frame). 5 | #' @param dat_chem data.frame that contains chemical values of 6 | #' the sample 7 | #' @param dat_spec List that contains spectral data 8 | #' @return List: xxx 9 | #' @param by character of column name that defines sample_ID 10 | #' @export 11 | join_chem_spec <- function( 12 | dat_chem, dat_spec, by = "sample_ID") { 13 | 14 | # Alternative when "no visible binding for global variable": 15 | data_meta <- MIR <- MIR_pre <- ori <- MIR_mean <- NULL 16 | # http://stackoverflow.com/questions/23475309/in-r-is-it-possible-to-suppress-note-no-visible-binding-for-global-variable 17 | # Replace sample_ID by ID 18 | if(!is.data.frame(dat_chem)) { 19 | stop(dat_chem, "needs to be a data.frame", call. = FALSE) 20 | } else { 21 | colnames(dat_chem)[colnames(dat_chem) == by] <- "ID" 22 | dat_chem$ID <- as.factor(dat_chem$ID) 23 | # Select only chemical data that have no outlier spectra 24 | dat_chem <- dat_chem[dat_spec$data_meta$ID, ] 25 | ID <- as.factor(dat_spec$data_meta$ID) 26 | # Join ref analyses 27 | MIRdata <- data.frame(ID = ID) 28 | MIRdata$MIR <- dat_spec$MIR_pre 29 | MIRdata$ori <- dat_spec$MIR_mean 30 | # Joining by ID, type = "inner" 31 | MIRdata_chem <- plyr::join(dat_chem, MIRdata, type = "inner") 32 | # before dplyr::inner_join(dat_chem, MIRdata) 33 | MIRdata_chem 34 | } 35 | } 36 | 37 | ## Join spectra and chemical tibbles =========================================== 38 | 39 | #' @title Join spectra data and chemical data tibbles 40 | #' @description Combines spectral data (tibble class) and chemical 41 | #' data (tibble class). 42 | #' @param spc_tbl Tibble that contains spectral data 43 | #' @param chem_tbl Tibble that contains chemical reference values of 44 | #' the samples 45 | #' @param by character of column name that defines sample_ID 46 | #' @return Tibble joined by sample_id 47 | #' @export 48 | join_spc_chem <- function(spc_tbl, chem_tbl, by = "sample_id") { 49 | 50 | if(!tibble::is_tibble(spc_tbl)) { 51 | stop(spc_tbl, "needs to be a Tibble", call. = FALSE) 52 | } else { 53 | # Rename column sample_ID to sample_id if sample_ID exists 54 | if("sample_ID" %in% colnames(chem_tbl)) { 55 | # Create a list with new column name; use standard evaluation 56 | rename_list <- list(sample_id = "sample_ID") 57 | chem_tbl <- dplyr::rename_(chem_tbl, .dots = rename_list) 58 | } 59 | spc_tbl <- dplyr::inner_join(spc_tbl, chem_tbl) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /man/plot_pls_vip.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pls-vip.R 3 | \name{plot_pls_vip} 4 | \alias{plot_pls_vip} 5 | \title{Plot stacked ggplot2 graphs with the Variable Importance for the 6 | Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id, 7 | and the preprocessed spectra.} 8 | \usage{ 9 | plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre", 10 | by = "sample_id", 11 | xlab = expression(paste("Wavenumber [", cm^-1, "]")), 12 | ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.", 13 | alpha = 0.2) 14 | } 15 | \arguments{ 16 | \item{mout}{Model output list that is returned from 17 | \code{simplerspec::fit_pls()}. This object contains a nested list with 18 | the \code{caret::train()} object (class \code{train}), based on which 19 | VIPs at finally selected number of PLS components are computed.} 20 | 21 | \item{y1}{Character vector of list-column name in 22 | \code{mout$data$calibration}, where spectra for bottom graph are extracted. 23 | Default is \code{"spc_mean"}, which plots the mean calibration spectra after 24 | resampling.} 25 | 26 | \item{y2}{Character string of list-column name in 27 | \code{mout$data$calibration}, where spectra for bottom graph are extracted. 28 | Default is \code{"spc_pre"}, which plots the preprocessed calibration 29 | spectra after resampling.} 30 | 31 | \item{by}{Character string that is used to assign spectra to the same group 32 | and therefore ensures that all spectra are plotted with the same colour. 33 | Default is \code{"sample_id"}} 34 | 35 | \item{xlab}{Character string of X axis title for shared x axis of stacked 36 | graphs. Default is \code{expression(paste("Wavenumber [", cm^-1, "]"))}} 37 | 38 | \item{ylab1}{Y axis title of bottom spectrum. Default is \code{"Absorbance"}.} 39 | 40 | \item{ylab2}{Y axis title of bottom spectrum. Default is 41 | \code{"Preprocessed Abs."}.} 42 | 43 | \item{alpha}{Double between 0 and 1 that defines transparency of spectra 44 | lines in returned graph (ggplot plot object).} 45 | } 46 | \description{ 47 | Plot stacked ggplot2 graphs of VIP for the final 48 | PLS regression model output of the calibration (training) data set for the 49 | final number of components, raw (replicate mean) spectra, and preprocessed 50 | spectra. Regions with VIP > 1 are highlighted across the stacked graphs 51 | in beige colour rectangles. VIP calculation is implemented as described in 52 | Chong, I.-G., and Jun, C.-H. (2005). Performance of some variable selection 53 | methods when multicollinearity is present. Chemometrics and Intelligent 54 | Laboratory Systems, 78(1--2), 103--112. https://doi.org/10.1016/j.chemolab.2004.12.011 55 | } 56 | -------------------------------------------------------------------------------- /man/plot_spc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_spc.R 3 | \name{plot_spc} 4 | \alias{plot_spc} 5 | \title{Plot tibble spectra} 6 | \usage{ 7 | plot_spc(spc_tbl, spc_tbl_2 = NULL, 8 | x_unit = "wavenumber", 9 | y = "spc", by = "unique_id", 10 | graph_id_1 = "Set 1", graph_id_2 = "Set 2", 11 | graph_id_1_col = "black", graph_id_2_col = "red", 12 | xlab = expression(paste("Wavenumber [", cm^-1, "]")), 13 | ylab = "Absorbance", 14 | alpha = 0.2, legend = TRUE) 15 | } 16 | \arguments{ 17 | \item{spc_tbl}{Tibble that contains the first set of spectra to plot as 18 | list-column} 19 | 20 | \item{spc_tbl_2}{Tibble that contains the second set of spectra (optional) 21 | to plot as list-column.} 22 | 23 | \item{x_unit}{Character string describing the x axis unit. Default is 24 | \code{"wavenumber"}, which will produce a graph with wavenumbers on the 25 | x axis with reversed number. If \code{x_unit = "wavelength"}, the axis 26 | will be in regular order (lower wavelengths in nm on the left and higher 27 | on the right side of the axis).} 28 | 29 | \item{y}{Character string of list-column name in tibble where spectra of 30 | desired type are extracted to plot.} 31 | 32 | \item{by}{Character string of column that is used to group the spectra. 33 | Default is \code{"unique_id"}. If replica spectra are present in the file 34 | and processed spectra resulting after averaging need to be plotted, 35 | it is recommend to use \code{"sample_id"} as argument to group according 36 | the sample_id column in the tibble(s) containing the spectra (\code{spc_tbl} 37 | and \code{spc_tbl_2}).} 38 | 39 | \item{graph_id_1}{Character string used for grouping the first spectra set 40 | (\code{spc_tbl}) and producing 41 | the label text accordingly. Default is \code{"Set 1"}.} 42 | 43 | \item{graph_id_2}{Character string used for grouping the second spectra set 44 | (\code{spc_tbl_2}) and producing the label text accordingly. Default is 45 | \code{"Set 2"}} 46 | 47 | \item{graph_id_1_col}{Character string for the colour of the first spectra 48 | set. Default is \code{"black"}.} 49 | 50 | \item{graph_id_2_col}{Character string for the colour of the first spectra 51 | set. Default is \code{"red"}.} 52 | 53 | \item{xlab}{Character string or mathematical expression 54 | (use \code{expression}) for the x axis title. Default is 55 | \code{expression(paste("Wavenumber [", cm^-1, "]"))}.} 56 | 57 | \item{ylab}{Character string or mathematical expression 58 | (use \code{expression}) for the y axis title. Default is \code{"absorbance"}.} 59 | 60 | \item{alpha}{Double in between 0 and 1. Sets the transparency for the plotted 61 | spectra lines.} 62 | 63 | \item{legend}{Logical whether to plot a legend for the spectra describing 64 | its name selected in arguments \code{graph_id_1} and \code{graph_id_2}. 65 | Default is \code{TRUE}.} 66 | } 67 | \description{ 68 | Plot spectra from tibble spectra objects. 69 | } 70 | -------------------------------------------------------------------------------- /R/average-spc.R: -------------------------------------------------------------------------------- 1 | #' @title Average spectra in list-column by entries in grouping column 2 | #' @description Average spectra in list-column of spectra tibble (`spc_tbl`) by 3 | #' groups given in group column. 4 | #' @param spc_tbl Tibble data frame containing at least the grouping column 5 | #' given in argument `by` and input spectra given in list-column `column_in`. 6 | #' @param by Character vector of length 1L or name/symbol that specifies the 7 | #' column by which groups of spectra are averaged. Default is `"sample_id"`. 8 | #' @param column_in Character vector of length 1L or or name/symbol that 9 | #' specifies the list-column that contains the inputs spectra to be averaged. 10 | #' Default is `"spc_rs"`, which are resampled spectra (i.e., resulting after 11 | #' preceding `resample_spc()` step). 12 | #' @return Spectra tibble data frame (class `"tbl_df"`, `"tbl"`, `"data.frame"`) 13 | #' with a new list-column of column name `"spc_mean"` at the last position, 14 | #' containing mean spectra with identical row replicates within the same 15 | #' `by`-group. 16 | #' @details For memory efficiency and subsequent modeling, consider slicing the 17 | #' extra row copies of `spc_mean` resulting from `average_spc()` for example by 18 | #' * `split(x = spc_tbl, f = spc_tbl$) %>% lapply(., function(x) x x[1, ]) %>% do.call(., rbind)` 19 | #' * `dplyr::group_by(spc_tbl, ) %>% dplyr::slice(1L)` 20 | #' @import stats 21 | #' @importFrom data.table data.table rbindlist setkey setDT := .SD 22 | #' @importFrom rlang ensym quo_name 23 | #' @export 24 | average_spc <- function(spc_tbl, by = "sample_id", column_in = "spc_rs") { 25 | 26 | # Avoid R CMD check note: `"...no visible binding for global variable..."` 27 | spc_rs <- sample_id <- id <- NULL 28 | 29 | # Quote the symbol or the string supplied by the second and third argument 30 | column_in <- rlang::enquo(column_in) 31 | by <- rlang::enquo(by) 32 | 33 | # Combine rows of all resampled spectra in one data.table 34 | spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!column_in)) 35 | 36 | # Add `id` group column to input spectra 37 | spc[, id := dplyr::pull(spc_tbl, !!by)] # spc_tbl[, by][[by]] 38 | 39 | # Average spectra, use `id` column as index for grouping 40 | data.table::setkey(spc, id) 41 | spc_mean <- spc[, lapply(.SD, mean), by = id] 42 | 43 | # Create new vector of group ID values from column `id` 44 | group_id_mean <- spc_mean[, id] 45 | # Delete sample_id column in data.table 46 | spc_mean_noid <- spc_mean[, id := NULL] 47 | 48 | # Create list of averaged spectra, one spectrum is one data.table 49 | # Use best performing alternative: 50 | # https://github.com/jennybc/row-oriented-workflows/blob/master/iterate-over-rows.md 51 | spc_mean_list <- stats::setNames( 52 | map(purrr::transpose(spc_mean_noid), data.table::as.data.table), 53 | group_id_mean 54 | ) 55 | 56 | # Convert averaged spectra and sample_id to tibble 57 | spc_mean_tbl <- tibble::tibble( 58 | !!by := group_id_mean, spc_mean = spc_mean_list 59 | ) 60 | # Join mean spectra tibble spc_tbl_mean to spc_tbl 61 | spc_tbl_out <- dplyr::left_join(spc_tbl, spc_mean_tbl, 62 | by = rlang::quo_name(by)) 63 | 64 | return(spc_tbl_out) 65 | } 66 | -------------------------------------------------------------------------------- /R/utils-unsupervised.R: -------------------------------------------------------------------------------- 1 | # Helper functions to compute principal component analysis on spectral 2 | # data and to append scores and importance ------------------------------------- 3 | 4 | pca_append_scores <- function(spc_tbl, y = "spc_pre", 5 | slice = TRUE, slice_by = sample_id, 6 | select_comps = 1:2, 7 | scale = TRUE, center = TRUE, 8 | .unnest = NULL) { 9 | var <- rlang::enquo(y) 10 | var_nm_pca <- paste0(rlang::quo_name(var), "_pca_scores") 11 | slice_by <- rlang::enquo(slice_by) 12 | 13 | if (slice) { 14 | spc_tbl <- spc_tbl %>% dplyr::group_by(!!slice_by) %>% dplyr::slice(1L) 15 | } 16 | 17 | sample_id <- spc_tbl %>% dplyr::pull(!!slice_by) 18 | 19 | # Pull the list of preprocessed spectra data.tables into one data.table 20 | spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!var)) 21 | 22 | # Perform a principal component analysis 23 | spc_pca <- stats::prcomp(spc, scale = scale, center = center) 24 | 25 | # Extract PCA scores for selected principal components 26 | spc_pca_scores <- dplyr::as_data_frame(spc_pca$x[, select_comps]) 27 | # Convert data frame of scores to list column 28 | spc_pca_scores <- stats::setNames( 29 | split(spc_pca_scores, seq_len(nrow(spc_pca_scores))), 30 | sample_id) 31 | ncomp <- ncol(spc_pca$rotation) 32 | 33 | # Add list-column with PCA scores to the spectral tibble object 34 | spc_tbl_pca <- tibble::add_column(spc_tbl, !!var_nm_pca := spc_pca_scores) 35 | if (!is.null(.unnest)) { 36 | # Convert variable name of new column to symbol 37 | var_sym_pca <- rlang::sym(var_nm_pca) 38 | spc_tbl_pca <- spc_tbl_pca %>% tidyr::unnest(!!var_sym_pca) 39 | } 40 | spc_tbl_pca 41 | } 42 | 43 | #' @noRd 44 | pca_append_importance <- function(spc_tbl, y = "spc_pre", 45 | slice = TRUE, slice_by = "sample_id", 46 | select_comps = 1:2, 47 | scale = TRUE, center = TRUE) { 48 | var <- rlang::enquo(y) 49 | slice_by <- rlang::enquo(slice_by) 50 | 51 | if (slice) { 52 | spc_tbl <- spc_tbl %>% dplyr::group_by(!!slice_by) %>% dplyr::slice(1L) 53 | } 54 | 55 | # Pull the list of preprocessed spectra data.tables into one data.table 56 | spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!var)) 57 | 58 | # Perform a principal component analysis 59 | spc_pca <- stats::prcomp(spc, scale = scale, center = center) 60 | ncomp <- ncol(spc_pca$rotation) 61 | 62 | # // pb 20180509: broom::tidy(spc_pca) returns error; this is a bug in 63 | # broom:::tidy.prcomp(); does not work if data table without row names 64 | # Extract variance explained 65 | importance_measures <- c("sd", "var_prop", "var_cum") 66 | # "sd" := "Standard deviation"; "var_prop" := "Proportion of Variance"; 67 | # "var_cum" := "Cumulative Proportion" 68 | spc_pca_varexpl <- broom::fix_data_frame( 69 | t(summary(spc_pca)$importance), 70 | newnames = importance_measures, newcol = "PC") %>% 71 | tibble::add_column(ncomp = 1:ncomp, .before = 1) %>% 72 | tibble::as_tibble() 73 | 74 | # Return list of spectral tibble with pc scores attached, and 75 | # the PCA importance measures in a tidy data frame 76 | spc_pca_varexpl 77 | } 78 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # simplerspec 0.2.1 4 | 5 | - `fit_pls()`: Fixed the join of predicted vs. measured values when choosing 6 | evaluation with cross-validation; now predictions and model evaluation 7 | statistics are reported at best `ncomp`. Instead of an `dplyr::inner_join()`, 8 | all cross-validated predictions were done with `dplyr::anti_join()`, 9 | specifically when using `evaluation_method == "resampling"` together with 10 | `tuning_method == "resampling"`. This resulted in predictions incorrectly 11 | being aggregated for all tested but not best ncomp (calculated in caret). 12 | Thus, also the cross-validation metrics were not correctly reported what 13 | should have been the case at optimal `ncomp` derived based on resampling and 14 | model tuning. The fix now correctly does an inner join, so that only the 15 | values at best `ncomp` are extracted and used for the evaluation statistics. 16 | This is also shown on the resulting plot outputs. 17 | 18 | # simplerspec 0.2.0 19 | 20 | - Add new example data set `soilspec_yamsys`. 21 | - `gather_spc()`: Consolidate the documentation with details on how data are matched from list and gathered into a spectra tibble. 22 | 23 | # simplerspec 0.1.0.9001 24 | 25 | * `resample_spc()` now supports flexible spectra and x-axis types as inputs. Its 26 | interface has been carefully augmented without breaking previous 27 | functionality ([#9](https://github.com/philipp-baumann/simplerspec/issues/9) 28 | * New argument `column_in` specifies the string or name (unquoting support) 29 | of the input column that contains the list of spectra. The following 30 | spectrum types, which are automatically matched against the list-column that 31 | contains the corresponding x-unit value vectors, are currently supported: 32 | `spc` (raw or unprocessed spectra), `spc_rs` (resampled spectra), 33 | `spc_mean` (mean spectra), `spc_nocomp` (spectra prior atmospheric 34 | compensation), `sc_sm` (single channel sample spectra), `sc_rf` (single 35 | channel reference spectra), `spc_pre` (preprocessed spectra). 36 | * New argument `interpol_method` specifying the interpolation method is 37 | introduced. Default is `"linear"` to achieve identical results with both 38 | prospectr v0.1.0 and v0.2.0. The current CRAN prospectr v0.2.0 has changed 39 | the default of `interpol` to `"spline"`. The previous `resample_spc()` 40 | unfortunatelty did not explicitly state the method internally, and relied 41 | on the default instead. The measures taken ensure downward compatibility of 42 | `resample_spc()` with previous versions of prospectr and simplerspec. 43 | * The arguments gain more defensive checks inside the function (supplied types 44 | and presence of objects in spectra). 45 | * The function components and the help are updated accordingly. Clearer 46 | vocabulary to describe the functionality and more consistent terminology for 47 | physical quantities and R objects are used. 48 | 49 | * Add UTF-8 support to DESCRIPTION because roxygen2 version 7.1.0 requires it. 50 | 51 | 52 | # simplerspec 0.1.0.9000 53 | 54 | * Start using Kirill's `{fledge}` for tracking and communicating the simplerspec 55 | development process in `NEWS`. 56 | 57 | 58 | # simplerspec 0.1.0 59 | 60 | * Added a `NEWS.md` file to track changes to the package 61 | 62 | # simplerspec 0.1.0.1 63 | 64 | * `read_opus_bin_univ()`: Add support for Bruker files that have undefined `PLF` value (`:= NULL`) 65 | -------------------------------------------------------------------------------- /R/select-ref-spectra.R: -------------------------------------------------------------------------------- 1 | # Quick fix implementation of select_ref_samples using the tibble framework ---- 2 | 3 | # Perform sampling for selection of reference samples based on spectral PCA ---- 4 | #' @title Select a set of reference spectra to be measured by reference analysis 5 | #' methods 6 | #' @description Select a set of calibration spectra to develop spectral models. 7 | #' Samples in this list will be analyzed using laboratory reference methods. 8 | #' @param spc_tbl Spectra as tibble objects that contain preprocessed spectra 9 | #' @param ratio_ref Ratio of desired reference samples to total sample number 10 | #' @param pc Number of principal components (numeric). If pc < 1, the number 11 | #' of principal components kept corresponds to the number of components 12 | #' explaining at least (pc * 100) percent of the total variance. 13 | #' @param print logical expression whether a plot (ggplot2) of sample selection 14 | #' for reference analysis is shown in PCA space 15 | #' (\code{TRUE} or \code{FALSE}). 16 | #' @usage select_ref_spc(spc_tbl, ratio_ref, pc, print = TRUE) 17 | #' @export 18 | select_ref_spc <- function(spc_tbl, ratio_ref = 0.15, pc = 2, 19 | print = TRUE) { 20 | 21 | # Avoid `R CMD check` NOTE: `no visible binding for global variable ...` 22 | PC1 <- PC2 <- type <- NULL 23 | 24 | if (tibble::is_tibble(spc_tbl)) { 25 | # Slice based on sample_id if spectral data is in tibble class 26 | spc_tbl <- dplyr::group_by(spc_tbl, !!rlang::sym("sample_id")) %>% 27 | dplyr::slice(1L) 28 | # Bind list of data.tables in list-column spc_pre to one data table 29 | # containing spectral data 30 | spc_pre <- as.matrix(data.table::rbindlist(spc_tbl$spc_pre)) 31 | } 32 | # Perform Kennard-Stone calibration sampling --------------------------------- 33 | sel <- prospectr::kenStone(X = spc_pre, 34 | k = round(ratio_ref * nrow(spc_pre)), pc = pc) 35 | # Select spectra tibble of reference samples based on row indices 36 | spc_ref <- spc_tbl[sel$model, ] 37 | # Select spectra tibble of prediction samples based on row indices 38 | spc_pred <- spc_tbl[-sel$model, ] 39 | 40 | # Prepare data for ggplot graphs of reference and prediction sample PC score 41 | # plots (PC1 and PC2) -------------------------------------------------------- 42 | sel_df_ref <- data.frame(sel$pc[sel$model, 1:2]) 43 | sel_df_ref$type <- as.factor( 44 | rep("reference analysis", nrow(sel_df_ref)) 45 | ) 46 | sel_df_pred <- data.frame(sel$pc[-sel$model, 1:2]) 47 | # Create type column for visually differentiate reference and prediction 48 | # samples 49 | sel_df_pred$type <- as.factor( 50 | rep("model prediction", nrow(sel_df_pred))) 51 | # Bind rows of reference and prediction PC scores data frames 52 | sel_df <- rbind(sel_df_ref, sel_df_pred) 53 | # Compute ratio needed to make the figure square 54 | ratio <- with(sel_df, diff(range(PC1)) / diff(range(PC2))) 55 | # Create spectra PC score plots ---------------------------------------------- 56 | p_pca <- ggplot2::ggplot(data = sel_df) + 57 | ggplot2::geom_point( 58 | ggplot2::aes(x = PC1, y = PC2, shape = type), size = 4) + 59 | ggplot2::coord_fixed(ratio = 1) + 60 | ggplot2::scale_shape_manual(values=c(19, 1)) + 61 | ggplot2::scale_colour_manual(values=c("black", "red")) + 62 | ggplot2::theme_bw() + 63 | ggplot2::theme(legend.title = ggplot2::element_blank()) 64 | # Print reference and prediction samples in PC1 and PC2 65 | if (print == TRUE) { 66 | p_pca 67 | } 68 | # Return spectral tibbles for reference spectra (spc_ref), 69 | # prediction spectra (spc_pr) and ggplot object of score plots (p_pca) 70 | list( 71 | spc_ref = spc_ref, 72 | spc_pred = spc_pred, 73 | p_pca = p_pca, 74 | pc_scores = sel$pc 75 | ) 76 | } 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /R/slice-spc.R: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ## Functions to slice spectra into a set of x axis ranges. A subset of the 3 | ## spectra stored in a list-column of a spectral tibble will be selected and 4 | ## the x unit axis vector will be reduced accordingly 5 | ################################################################################ 6 | 7 | # Define a set of helper functions --------------------------------------------- 8 | 9 | # Helper function to find indices of minimum distances between two vectors 10 | get_idx <- function(x, x_cut) { 11 | sapply(x, function(x) which.min(abs(x_cut - x))) 12 | } 13 | 14 | # Returns a a list of wavenumber position index sequences (type integer) from 15 | # a vector or list of upper and lower slicing boundary values in spectra x 16 | # units (xvalues_cut) 17 | slice_xvalues_idxseq <- function(spc_l, xvalues, xvalues_cut) { 18 | idx_lim <- lapply(seq_along(xvalues_cut), 19 | function(i) get_idx(x = xvalues, x_cut = xvalues_cut[i])) 20 | Map(function(from, to) seq(from, to), idx_lim[[1]], idx_lim[[2]]) 21 | } 22 | 23 | # Use helper functions in final spectrum x unit slicing function --------------- 24 | 25 | #' @title Slice spectra into defined x-axis ranges 26 | #' @description Slice spectra contained in list-column of spectral tibble 27 | #' (data frame). A list of x-axis value ranges can be specified. Spectra are 28 | #' cut based on these ranges. 29 | #' @param spc_tbl Spectral data in a tibble object (classes "tibble_df", "tbl" 30 | #' and "data.frame"). The spectra tibble is expected to contain at least 31 | #' the column \code{spc} (list-column with spectral matrices stored in a list) 32 | #' and \code{wavenumbers} or \code{wavelengths} (list-column that contains list 33 | #' of x-axis values). 34 | #' @param xunit_lcol Character vector that specifies column name where x-axis 35 | #' axis units are stored within \code{spc_tbl}. Default is \code{"wavenumber"}. 36 | #' @param spc_lcol Character vector that specifies which column (list-column) 37 | #' contains spectra to be sliced. Default is \code{"spc"}. 38 | #' @param xvalues_cut List of numeric vectors that contains upper and lower bounds of respective regions to keep in spectra. The spectral regions outside 39 | #' the \code{xvalues_cut} intervals will be cut out in the output spectra. 40 | #' @return Spectral tibble (data frame with list-columns) with sliced x-axis 41 | #' column and spectral column. Both the x-axis list-column and the spectral 42 | #' tibble list-column only contain data specified within the \code{xvalues_cut} 43 | #' argument (list of numeric vectors). 44 | #' @export 45 | slice_xvalues <- function(spc_tbl, xunit_lcol = "wavenumbers", spc_lcol = "spc", 46 | xvalues_cut = NULL) { 47 | if (is.atomic(xvalues_cut)) xvalues_cut <- list(xvalues_cut) 48 | if (!is.null(xvalues_cut)) { 49 | spc_l <- spc_tbl[[spc_lcol]] 50 | xvalues <- spc_tbl[[xunit_lcol]] 51 | # Match spectral indices for columns based on xvalue ranges 52 | idxseq <- lapply(seq_along(xvalues_cut), 53 | function(i) slice_xvalues_idxseq( 54 | spc_l = spc_l, xvalues = xvalues, xvalues_cut = xvalues_cut[[i]])) 55 | idxseq_c <- Reduce(function(x,y) Map(c, x, y), idxseq) 56 | if (any(sapply(spc_l, data.table::is.data.table))) { 57 | spc_tbl[[spc_lcol]] <- Map(function(x, idx) x[, idx, with = FALSE], 58 | spc_l, idxseq_c) # idx is not a column name of any data.table 59 | } else { 60 | spc_tbl[[spc_lcol]] <- Map(function(x, idx) x[, idx], spc_l, idxseq_c) 61 | } 62 | spc_tbl[[xunit_lcol]] <- Map(function(x, idx) x[idx], xvalues, idxseq_c) 63 | } 64 | spc_tbl 65 | } 66 | 67 | # Test spectra slicing function: 68 | # tbl_sliced <- slice_xvalues(spc_tbl = spc_tbl, 69 | # xvalues_cut = list(c(1500, 1024), c(1004, 998))) 70 | -------------------------------------------------------------------------------- /.github/workflows/tic.yml: -------------------------------------------------------------------------------- 1 | ## tic GitHub Actions template: linux-macos-windows-deploy 2 | ## revision date: 2022-11-23 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | - master 9 | - cran-* 10 | pull_request: 11 | branches: 12 | - main 13 | - master 14 | # for now, CRON jobs only run on the default branch of the repo (i.e. usually on master) 15 | schedule: 16 | # * is a special character in YAML so you have to quote this string 17 | - cron: "0 4 * * *" 18 | 19 | name: tic 20 | 21 | jobs: 22 | all: 23 | runs-on: ${{ matrix.config.os }} 24 | 25 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 26 | 27 | strategy: 28 | fail-fast: false 29 | matrix: 30 | config: 31 | # use a different tic template type if you do not want to build on all listed platforms 32 | - { os: windows-latest, r: "release" } 33 | - { os: macOS-latest, r: "release", pkgdown: "true", latex: "true" } 34 | - { os: ubuntu-latest, r: "devel" } 35 | - { os: ubuntu-latest, r: "release" } 36 | 37 | env: 38 | # make sure to run `tic::use_ghactions_deploy()` to set up deployment 39 | TIC_DEPLOY_KEY: ${{ secrets.TIC_DEPLOY_KEY }} 40 | # prevent rgl issues because no X11 display is available 41 | RGL_USE_NULL: true 42 | # if you use bookdown or blogdown, replace "PKGDOWN" by the respective 43 | # capitalized term. This also might need to be done in tic.R 44 | BUILD_PKGDOWN: ${{ matrix.config.pkgdown }} 45 | # use GITHUB_TOKEN from GitHub to workaround rate limits in {remotes} 46 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 47 | 48 | steps: 49 | - uses: actions/checkout@v3 50 | 51 | - uses: r-lib/actions/setup-r@v2 52 | with: 53 | r-version: ${{ matrix.config.r }} 54 | Ncpus: 4 55 | 56 | - uses: r-lib/actions/setup-tinytex@v2 57 | if: matrix.config.latex == 'true' 58 | 59 | - uses: r-lib/actions/setup-pandoc@v2 60 | 61 | # set date/week for use in cache creation 62 | # https://github.community/t5/GitHub-Actions/How-to-set-and-access-a-Workflow-variable/m-p/42970 63 | # - cache R packages daily 64 | - name: "[Cache] Prepare daily timestamp for cache" 65 | if: runner.os != 'Windows' 66 | id: date 67 | run: echo "date=$(date '+%d-%m')" >> $GITHUB_OUTPUT 68 | 69 | - name: "[Cache] Cache R packages" 70 | if: runner.os != 'Windows' 71 | uses: pat-s/always-upload-cache@v3 72 | with: 73 | path: ${{ env.R_LIBS_USER }} 74 | key: ${{ runner.os }}-r-${{ matrix.config.r }}-${{steps.date.outputs.date}} 75 | restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-${{steps.date.outputs.date}} 76 | 77 | - name: "[Stage] Install pak" 78 | run: Rscript -e "install.packages('pak', repos = 'https://r-lib.github.io/p/pak/stable')" 79 | 80 | - name: "[Stage] Install" 81 | run: Rscript -e "if (grepl('Ubuntu', Sys.info()[['version']]) && !grepl('Under development', R.version[['status']])) {options(repos = c(CRAN = sprintf('https://packagemanager.rstudio.com/all/__linux__/%s/latest', system('lsb_release -cs', intern = TRUE))))}else{options(repos = c(CRAN = 'cloud.r-project.org'))}; pak::pkg_install('ropensci/tic')" -e "print(tic::dsl_load())" -e "tic::prepare_all_stages()" -e "tic::before_install()" -e "tic::install()" 82 | 83 | - name: "[Stage] Script" 84 | run: Rscript -e 'tic::script()' 85 | 86 | - name: "[Stage] After Success" 87 | run: Rscript -e "tic::after_success()" 88 | 89 | - name: "[Stage] Upload R CMD check artifacts" 90 | if: failure() 91 | uses: actions/upload-artifact@v2 92 | with: 93 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 94 | path: check 95 | - name: "[Stage] Before Deploy" 96 | run: | 97 | Rscript -e "tic::before_deploy()" 98 | 99 | - name: "[Stage] Deploy" 100 | run: Rscript -e "tic::deploy()" 101 | 102 | - name: "[Stage] After Deploy" 103 | run: Rscript -e "tic::after_deploy()" 104 | -------------------------------------------------------------------------------- /man/fit_rf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pls-modeling.R 3 | \name{fit_rf} 4 | \alias{fit_rf} 5 | \title{Calibration sampling, and random forest model tuning and evaluation} 6 | \usage{ 7 | fit_rf( 8 | spec_chem, 9 | response, 10 | variable = NULL, 11 | evaluation_method = "test_set", 12 | validation = NULL, 13 | split_method = "ken_stone", 14 | ratio_val, 15 | ken_sto_pc = 2, 16 | pc = NULL, 17 | invert = TRUE, 18 | tuning_method = "resampling", 19 | resampling_seed = 123, 20 | cv = NULL, 21 | ntree_max = 500, 22 | print = TRUE, 23 | env = parent.frame() 24 | ) 25 | } 26 | \arguments{ 27 | \item{spec_chem}{Tibble that contains spectra, metadata and chemical 28 | reference as list-columns. The tibble to be supplied to \code{spec_chem} can 29 | be generated by the \verb{join_chem_spc() function}} 30 | 31 | \item{response}{Response variable as symbol or name 32 | (without quotes, no character string). The provided response symbol needs to be 33 | a column name in the \code{spec_chem} tibble.} 34 | 35 | \item{variable}{Depreciated and replaced by \code{response}} 36 | 37 | \item{evaluation_method}{Character string stating evaluation method. 38 | Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"} 39 | will split the data into a calibration (training) and validation (test) set, 40 | and evaluate the final model by predicting on the validation set. 41 | If \code{"resampling"}, the finally selected model will be evaluated based 42 | on the cross-validation hold-out predictions.} 43 | 44 | \item{validation}{Depreciated and replaced by \code{evaluation_method}. 45 | Default is \code{TRUE}.} 46 | 47 | \item{split_method}{Method how to to split the data into a independent test 48 | set. Default is \code{"ken_sto"}, which will select samples for calibration 49 | based on Kennard-Stone sampling algorithm of preprocessed spectra. The 50 | proportion of validation to the total number of samples can be specified 51 | in the argument \code{ratio_val}. 52 | \code{split_method = "random"} will create a single random split.} 53 | 54 | \item{ratio_val}{Ratio of validation (test) samples to 55 | total number of samples (calibration (training) and validation (test)).} 56 | 57 | \item{ken_sto_pc}{Number of component used 58 | for calculating mahalanobsis distance on PCA scores for computing 59 | Kennard-Stone algorithm. 60 | Default is \code{ken_sto_pc = 2}, which will use the first two PCA 61 | components.} 62 | 63 | \item{pc}{Depreciated; renamed argument is \code{ken_sto_pc}.} 64 | 65 | \item{invert}{Logical} 66 | 67 | \item{tuning_method}{Character specifying tuning method. Tuning method 68 | affects how caret selects a final tuning value set from a list of candidate 69 | values. Possible values are \code{"resampling"}, which will use a 70 | specified resampling method such as repeated k-fold cross-validation (see 71 | argument \code{resampling_method}) and the generated performance profile 72 | based on the hold-out predictions to decide on the final tuning values 73 | that lead to optimal model performance. The value \code{"none"} will force 74 | caret to compute a final model for a predefined canditate PLS tuning 75 | parameter number of PLS components. In this case, the value 76 | supplied by \code{ncomp_fixed}` is used to set model complexity at 77 | a fixed number of components.} 78 | 79 | \item{resampling_seed}{Random seed (integer) that will be used for generating 80 | resampling indices, which will be supplied to \code{caret::trainControl}. 81 | This makes sure that modeling results are constant when re-fitting. 82 | Default is \code{resampling_seed = 123}.} 83 | 84 | \item{cv}{Depreciated. Use \code{resampling_method} instead.} 85 | 86 | \item{ntree_max}{Maximum random forest trees 87 | by caret::train. Caret will aggregate a performance profile using resampling 88 | for an integer sequence from 1 to \code{ntree_max} trees.} 89 | 90 | \item{print}{Logical expression whether model evaluation graphs shall be 91 | printed} 92 | 93 | \item{env}{Environment where function is evaluated. Default is 94 | \code{parent.frame}.} 95 | } 96 | \description{ 97 | Perform calibration sampling and use selected 98 | calibration set for model tuning 99 | } 100 | -------------------------------------------------------------------------------- /man/plot_spc_ext.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot-spc-extended.R 3 | \name{plot_spc_ext} 4 | \alias{plot_spc_ext} 5 | \title{ggplot2 wrapper for extended spectra plotting} 6 | \usage{ 7 | plot_spc_ext( 8 | spc_tbl, 9 | spc_tbl_l = NULL, 10 | lcols_spc = "spc", 11 | lcol_measure = NULL, 12 | lcol_measure_col_palette = "Spectral", 13 | lcol_measure_col_direction = -1, 14 | spc_id = "unique_id", 15 | group_id = "sample_id", 16 | group_id_order = TRUE, 17 | group_color = TRUE, 18 | group_color_palette = NULL, 19 | group_panel = TRUE, 20 | group_legend = FALSE, 21 | ncol = NULL, 22 | relabel_spc = TRUE, 23 | ylab = "Spectrum value", 24 | alpha = 0.5, 25 | line_width = 0.2, 26 | ... 27 | ) 28 | } 29 | \arguments{ 30 | \item{spc_tbl}{Tibble data frame containing spectra, x-axis values, metadata 31 | and eventual measured variables as list-columns.} 32 | 33 | \item{spc_tbl_l}{List of spectral tibbles (data frames). Default is 34 | \code{NULL} (argument is not used).} 35 | 36 | \item{lcols_spc}{Character vector of spectral list-columns to be extracted. 37 | Default is \code{"spc"} (raw spectra).} 38 | 39 | \item{lcol_measure}{Character vector of length 1 denoting the column name 40 | of the measure columns. This argument is optional. Default is \code{NULL}, 41 | which does not extract an additional measure column.} 42 | 43 | \item{lcol_measure_col_palette}{Palette value supplied to 44 | \code{ggplot::scale_colour_brewer()}. Default is \code{"Spectral"}, but you can set 45 | it to the default argument \code{1} (will use 46 | \code{scale_colour_brewer(..., palette = 1)}).} 47 | 48 | \item{lcol_measure_col_direction}{Sets the the order of colours in the scale 49 | that is based on a measure column. Default is \code{-1} which reverses the 50 | scale. Argument is passed on to the function \code{ggplot2::sclae_colour_brewer()} 51 | as argument \code{direction}.} 52 | 53 | \item{spc_id}{Character vector denoting column name for a unique spectrum ID. 54 | Default is \code{"unique_id"}.} 55 | 56 | \item{group_id}{Character vector denoting column name for the spectrum group 57 | ID. Default is \code{"sample_id"}. The group ID is used for 58 | plotting spectra by group (e.g. by using different colors or panels).} 59 | 60 | \item{group_id_order}{Logical that specifies whether the panel names 61 | derived from a numeric \code{group_id} column are reordered using ascending 62 | numbers. Default is \code{TRUE}.} 63 | 64 | \item{group_color}{Logical defining whether spectra are colored by the column 65 | specified by \code{group_id}.} 66 | 67 | \item{group_color_palette}{Character (1L) defining the diverging colour 68 | scales from colorbrewer.org; see \code{?scale_colour_brewer} for supported 69 | diverging colur types (\code{palette} argument).} 70 | 71 | \item{group_panel}{Logical defining whether spectra are arranged into panels 72 | by groups specified in \code{group_id}. Default is \code{TRUE}.} 73 | 74 | \item{group_legend}{Logical defining whether a legend for the \code{group_id} 75 | is plotted. Default is \code{FALSE}.} 76 | 77 | \item{ncol}{Integer vector of length 1. Defines number of columns when 78 | plotting panels (facets). Default is \code{NULL} (argument not used).} 79 | 80 | \item{relabel_spc}{Logical defining whether panels are relabeled with custom 81 | names for spectra types. Default is TRUE. When \code{TRUE}, arguments 82 | from \code{relabel_spc_types} can be passed to \code{plot_spc_ext} 83 | (supported via the \code{...} (ellipsis) argument)} 84 | 85 | \item{ylab}{Character vector or vector of type \code{"expression"} created by 86 | mathematical expression created by \code{expression}. Custom annotation for 87 | y-axis of spectra} 88 | 89 | \item{alpha}{Integer of length 1, from 0 to 1. Defines transparency of 90 | spectral lines. Default is \code{0.5} (0 is completely transparent and 91 | 1 is no transparency).} 92 | 93 | \item{line_width}{Numeric vector of length 1 specifying the width of the 94 | spectral lines. Default is \code{0.2}.} 95 | 96 | \item{...}{Further arguments to be passed to \code{plot_spc_ext}. Currently, 97 | arguments of \code{relabel_spc_types} are supported.} 98 | } 99 | \value{ 100 | Object of class \code{"ggplot"} (ggplot2 graph). 101 | } 102 | \description{ 103 | \code{plot_spc_ext} is a custom plotting function developed 104 | within the simplerspec framework. Returns plots based on ggplot2 105 | (class "ggplot"). Different spectra types such as raw or preprocessed spectra 106 | and groups can be differentiated by different colors or by using panels 107 | (so called facets). Additionally, spectra can be colored based on an 108 | additional measure variable, e.g. determined by chemical reference analysis. 109 | } 110 | -------------------------------------------------------------------------------- /man/resample_spc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/resample-spc.R 3 | \name{resample_spc} 4 | \alias{resample_spc} 5 | \title{Resample spectra in list-column to new x-axis interval} 6 | \usage{ 7 | resample_spc( 8 | spc_tbl, 9 | column_in = "spc", 10 | x_unit = c("wavenumber", "wavelength"), 11 | wn_lower = 500, 12 | wn_upper = 4000, 13 | wn_interval = 2, 14 | wl_lower = 350, 15 | wl_upper = 2500, 16 | wl_interval = 1, 17 | interpol_method = c("linear", "spline") 18 | ) 19 | } 20 | \arguments{ 21 | \item{spc_tbl}{Spectra data embedded in a tibble object (classes 22 | \verb{"tbl_df", "tbl", "data.frame"}). The spectra tibble needs to contain at 23 | least of one of the the spectra columns \code{spc}, \code{spc_rs}, \code{spc_mean}, 24 | \code{spc_nocomp}, \code{sc_sm}, \code{sc_rf}, or \code{spc_pre} (list-columns with spectral 25 | \code{data.table}s), and \code{wavenumbers} or \code{wavelengths} (list-column with vectors 26 | of x-axis values corresponding to each spectrum). The help section \emph{"Matching 27 | spectrum type and corresponding x-axis type"} describes the spectra types 28 | and corresponding x-axis types.} 29 | 30 | \item{column_in}{Character vector of length 1L or symbol/name 31 | specifying the name of list-column that contains the spectra to be resampled.} 32 | 33 | \item{x_unit}{Character vector of length 1L specifying the measurement unit 34 | of the x-axis values (list-column) of the input spectra in \code{spc_tbl}. 35 | Possible values are \code{"wavenumber"} (default) or \code{"wavelength"}. Wavenumber 36 | is a convenient unit of frequency in the mid-infrared spectral range, 37 | where wavelength is often used as spatial period for the visible and 38 | near-infrared range.} 39 | 40 | \item{wn_lower}{Numeric value of lowest wavenumber. This argument will only 41 | be used if \code{x_unit = "wavenumber"}. The value serves as starting value for 42 | the new wavenumber sequence that the spectra will be resampled upon. Default 43 | value is 500 (i.e., in reciprocal centimeters).} 44 | 45 | \item{wn_upper}{Numeric value of highest wavenumber. This argument will only 46 | be used if \verb{x_unit = "wavenumber}. The value will be used as last value of 47 | the new wavenumber sequence that the spectra will be resampled upon. Default 48 | value is 4000 (i.e., in reciprocal centimeters).} 49 | 50 | \item{wn_interval}{Numeric value of the wavenumber increment for the new 51 | wavenumber sequence that the spectra will be resampled upon. Default value 52 | is 2 (i.e., in reciprocal centimeters).} 53 | 54 | \item{wl_lower}{Numeric value of lowest wavelength. This argument will only 55 | be used if \code{x_unit = "wavelength"}. The value serves as starting value of 56 | the new wavenumber sequence that the spectra will be resampled upon. 57 | Default value is 350 (i.e. in nanometers).} 58 | 59 | \item{wl_upper}{Numeric value of highest wavelength. This argument will only 60 | be used if \code{x_unit = "wavelength"}. The value will be used as last value of 61 | the new wavenumber sequence that the spectra will be resampled upon. Default 62 | value is 2500 (i.e., in nanometers).} 63 | 64 | \item{wl_interval}{Numeric value of the wavelength increment for the new 65 | wavenumber sequence that the spectra will be resampled upon. This argument 66 | will only be used if \code{x_unit = "wavelength"}. Default value is 1 (i.e., in 67 | nanometers).} 68 | 69 | \item{interpol_method}{Character of \code{"linear"} (default) or \code{"spline"} with 70 | the interpolation method. \code{"spline"} uses a cubic spline to interpolate the 71 | input spectra at given x-axis values to new equispaced x-axis intervals.} 72 | } 73 | \value{ 74 | A spectra tibble (\code{spc_tbl}) containing two added list-columns: 75 | \itemize{ 76 | \item \verb{spc_rs:} Resampled spectra as list of \code{data.table}s 77 | \item \code{wavenumbers_rs} or \code{wavelengths_rs}: Resampled x-axis values as list of 78 | numeric vectors 79 | } 80 | } 81 | \description{ 82 | Resamples (interpolates) different spectra types with 83 | corresponding x-axis values that are both stored in list-columns of a spectra 84 | tibble. A spectra tibble hosts spectra, x-axis vectors, metadata, and 85 | further linked data with standardized naming conventions. Data input for 86 | resampling can for example be generated with \code{simplerspec::gather_spc()}. 87 | Resampling is a key harmonizing step to process and later model spectra 88 | measured at different resolutions and spectral ranges (i.e., different 89 | spectrometer devices and/or measurement settings). 90 | } 91 | \section{Matching spectrum type and corresponding x-axis type}{ 92 | 93 | The combinations of input spectrum types (\code{column_in}) and 94 | corresponding x-axis types are generated from a simple lookup list. The 95 | following key-value(s) pairs can be matched at given key, which is the column 96 | name from \code{column_in} containing the spectra. 97 | \itemize{ 98 | \item \code{"spc"} : \code{"wavenumbers"} or \code{"wavelengths"} (raw spectra) 99 | \item \code{"spc_rs"} : \code{"wavenumbers_rs"} or \code{"wavelengths_rs"}) (resampled spectra) 100 | \item \code{"spc_mean"} : \code{"wavenumbers_rs"} or \code{"wavelengths_rs"} (mean spectra) 101 | \item \code{"spc_nocomp"} \code{"wavenumbers"} or \code{"wavelengths"} (spectra prior 102 | atmospheric compensation) 103 | \item \code{"sc_sm" : c("wavenumbers_sc_sm", "wavelengths_sc_sm")} (single channel 104 | sample spectra) 105 | \item \code{"sc_rf" : c("wavenumbers_sc_rf", "wavelengths_sc_rf")} (single channel 106 | reference spectra) 107 | \item \code{"spc_pre" : "xvalues_pre"} (preprocessed spectra) 108 | } 109 | } 110 | 111 | -------------------------------------------------------------------------------- /man/fit_pls.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pls-modeling.R 3 | \name{fit_pls} 4 | \alias{fit_pls} 5 | \alias{pls_ken_stone} 6 | \title{Calibration sampling, model tuning, and PLS regression} 7 | \usage{ 8 | fit_pls( 9 | spec_chem, 10 | response, 11 | variable = NULL, 12 | center = TRUE, 13 | scale = TRUE, 14 | evaluation_method = "test_set", 15 | validation = TRUE, 16 | split_method = "ken_stone", 17 | ratio_val = 1/3, 18 | ken_sto_pc = 2, 19 | pc, 20 | invert = TRUE, 21 | tuning_method = "resampling", 22 | resampling_method = "kfold_cv", 23 | cv = NULL, 24 | resampling_seed = 123, 25 | pls_ncomp_max = 20, 26 | ncomp_fixed = 5, 27 | print = TRUE, 28 | env = parent.frame() 29 | ) 30 | 31 | pls_ken_stone( 32 | spec_chem, 33 | response, 34 | variable = NULL, 35 | center = TRUE, 36 | scale = TRUE, 37 | evaluation_method = "test_set", 38 | validation = TRUE, 39 | split_method = "ken_stone", 40 | ratio_val = 1/3, 41 | ken_sto_pc = 2, 42 | pc, 43 | invert = TRUE, 44 | tuning_method = "resampling", 45 | resampling_method = "kfold_cv", 46 | cv = NULL, 47 | resampling_seed = 123, 48 | pls_ncomp_max = 20, 49 | ncomp_fixed = 5, 50 | print = TRUE, 51 | env = parent.frame() 52 | ) 53 | } 54 | \arguments{ 55 | \item{spec_chem}{Tibble that contains spectra, metadata and chemical 56 | reference as list-columns. The tibble to be supplied to \code{spec_chem} can 57 | be generated by the \verb{join_chem_spc() function}} 58 | 59 | \item{response}{Response variable as symbol or name 60 | (without quotes, no character string). The provided response symbol needs to be 61 | a column name in the \code{spec_chem} tibble.} 62 | 63 | \item{variable}{Depreciated and replaced by \code{response}} 64 | 65 | \item{center}{Logical whether to perform mean centering of each spectrum column 66 | (e.g. wavenumber or wavelength) after common spectrum preprocessing. Default is 67 | \code{center = TRUE}} 68 | 69 | \item{scale}{Logical whether to perform standard deviation scaling 70 | of each spectrum column (e.g. wavenumber or wavelength) after common 71 | spectrum preprocessing. Default is \code{scale = TRUE}} 72 | 73 | \item{evaluation_method}{Character string stating evaluation method. 74 | Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"} 75 | will split the data into a calibration (training) and validation (test) set, 76 | and evaluate the final model by predicting on the validation set. 77 | If \code{"resampling"}, the finally selected model will be evaluated based 78 | on the cross-validation hold-out predictions.} 79 | 80 | \item{validation}{Depreciated and replaced by \code{evaluation_method}. 81 | Default is \code{TRUE}.} 82 | 83 | \item{split_method}{Method how to to split the data into a independent test 84 | set. Default is \code{"ken_sto"}, which will select samples for calibration 85 | based on Kennard-Stone sampling algorithm of preprocessed spectra. The 86 | proportion of validation to the total number of samples can be specified 87 | in the argument \code{ratio_val}. 88 | \code{split_method = "random"} will create a single random split.} 89 | 90 | \item{ratio_val}{Ratio of validation (test) samples to 91 | total number of samples (calibration (training) and validation (test)).} 92 | 93 | \item{ken_sto_pc}{Number of component used 94 | for calculating mahalanobsis distance on PCA scores for computing 95 | Kennard-Stone algorithm. 96 | Default is \code{ken_sto_pc = 2}, which will use the first two PCA 97 | components.} 98 | 99 | \item{pc}{Depreciated; renamed argument is \code{ken_sto_pc}.} 100 | 101 | \item{invert}{Logical} 102 | 103 | \item{tuning_method}{Character specifying tuning method. Tuning method 104 | affects how caret selects a final tuning value set from a list of candidate 105 | values. Possible values are \code{"resampling"}, which will use a 106 | specified resampling method such as repeated k-fold cross-validation (see 107 | argument \code{resampling_method}) and the generated performance profile 108 | based on the hold-out predictions to decide on the final tuning values 109 | that lead to optimal model performance. The value \code{"none"} will force 110 | caret to compute a final model for a predefined canditate PLS tuning 111 | parameter number of PLS components. In this case, the value 112 | supplied by \code{ncomp_fixed}` is used to set model complexity at 113 | a fixed number of components.} 114 | 115 | \item{resampling_method}{Character specifying resampling method. Currently, 116 | \code{"kfold_cv"} (default, performs 10-fold cross-validation), 117 | \code{"rep_kfold_cv"} (performs 5-times repeated 10-fold cross-validation), 118 | \code{"loocv"} (performs leave-one-out cross-validation), and \code{"none"} 119 | (if \code{resampling_method = "none"}) are supported.} 120 | 121 | \item{cv}{Depreciated. Use \code{resampling_method} instead.} 122 | 123 | \item{resampling_seed}{Random seed (integer) that will be used for generating 124 | resampling indices, which will be supplied to \code{caret::trainControl}. 125 | This makes sure that modeling results are constant when re-fitting. 126 | Default is \code{resampling_seed = 123}.} 127 | 128 | \item{pls_ncomp_max}{Maximum number of PLS components that are evaluated 129 | by caret::train. Caret will aggregate a performance profile using resampling 130 | for an integer sequence from 1 to \code{pls_ncomp_max}} 131 | 132 | \item{ncomp_fixed}{Integer of fixed number of PLS components. Will only be 133 | used when \code{tuning_method = "none"} and \code{resampling_method = "none"} 134 | are used.} 135 | 136 | \item{print}{Logical expression whether model evaluation graphs shall be 137 | printed} 138 | 139 | \item{env}{Environment where function is evaluated. Default is 140 | \code{parent.frame}.} 141 | } 142 | \description{ 143 | Perform calibration sampling and use selected 144 | calibration set for model tuning 145 | } 146 | -------------------------------------------------------------------------------- /man/gather_spc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gather-spc.R 3 | \name{gather_spc} 4 | \alias{gather_spc} 5 | \title{Gather measurements of different spectra types, corresponding 6 | x-axis values and metadata from nested list.} 7 | \usage{ 8 | gather_spc(data, spc_types = "spc") 9 | } 10 | \arguments{ 11 | \item{data}{Recursive list named with filename (\code{file_id}) at first level 12 | entries, where each element containing a sample measurement has nested 13 | metadata (\code{"metadata"}), spectra types (see \code{spc_types}), corresponding 14 | x-axis values (see section \emph{"Details on spectra data checks and matching"}). 15 | The \code{data} list is a structural convention to organize spectra and their 16 | metadata. It follows for example the list structure returned from the Bruker 17 | OPUS binary reader \code{simplerspec::read_opus_univ()}.} 18 | 19 | \item{spc_types}{Character vector with the spectra types to be extracted 20 | from \code{data} list and gathered into list-columns. The spectra type names need 21 | to exactly follow the naming conventions, and the element names and contents 22 | need to be present at the second list hierarchy of \code{data}. These values are 23 | allowed: 24 | \itemize{ 25 | \item \code{"spc"} (default): final raw spectra after atmospheric compensation, if 26 | performed (named \code{AB} in Bruker OPUS software; results from referencing 27 | sample to reference single channel reflectance and transforming to 28 | absorbance). 29 | \item \code{"spc_nocomp"}: raw spectra without atmospheric correction 30 | \item \code{"sc_sm"}: Single channel reflectance spectra of the samples 31 | \item \code{"sc_rf"}: Single channel reflectance spectra of the reference (background 32 | spectra) 33 | \item \code{"ig_sm"}: Interferograms of the sample spectra (currently only spectra 34 | without x-axis list-columns are matched and returned) 35 | \item \code{"ig_rf"}: Interferograms of the reference spectra (currently only spectra 36 | without x-axis list-columns are matched and returned) 37 | }} 38 | } 39 | \value{ 40 | Spectra tibble (\code{spc_tbl} with classes \code{"tbl_df"}, \code{"tbl"}, and 41 | \code{"data.frame"}) with the following (list-)columns: 42 | \itemize{ 43 | \item \code{"unique_id"}: Character vector with unique measurement identifier, likely 44 | a string with file names in combination with date and time (extracted from 45 | each \code{"metadata"} data frame column). 46 | \item \code{"file_id"} : Character vector with file name including the extension 47 | (extracted from each \code{"metadata"} data frame column). 48 | \item \code{"sample_id"}: Character vector with sample identifier. For Bruker OPUS 49 | binary files, this corresponds to the file name without the file extension 50 | in integer increments of sample replicate measurements. 51 | \item One or multiple of \code{"spc"}, \code{"spc_nocomp"}, \code{"sc_sm"}, or \code{"sc_rf"}: 52 | List(s) of data.table's containing spectra type(s). 53 | \item One or multiple of \code{"wavenumbers"}, \code{"wavelengths"}, \code{"x_values"}, 54 | \code{"wavenumbers_sc_sm"}, \code{"wavelengths_sc_sm"}, \code{"x_values_sc_sm"}, 55 | \code{"wavenumbers_sc_rf"}, \code{"wavelengths_sc_rf"}, or \code{"x_values_sc_rf"}: 56 | List(s) of numeric vectors with matched x-axis values (see \emph{"Details on 57 | spectra data checks and matching"} below). 58 | } 59 | } 60 | \description{ 61 | Gather spectra, corresponding x-axis values, and device and 62 | measurement metadata from a nested list into a spectra tibble, so that one 63 | row represents one spectral measurement. Spectra, x-axis values and metadata 64 | are mapped from the individual list elements (named after file name including 65 | the extension) and transformed into (list-)columns of a spectra tibble, 66 | which is an extended data frame. For each measurement, spectral data and 67 | metadata are combined into one row of the tidy data frame. In addition, the ID 68 | columns \code{unique_id}, \code{file_id}, and \code{sample_id} are extracted from 69 | \code{"metadata"} (data frame) list entries and returned as identifier columns of 70 | the spectra tibble. List-columns facilitate keeping related data together in 71 | a rectangular data structure. They can be manipulated easily during 72 | subsequent transformations, for example using the standardized functions of 73 | the simplerspec data processing pipeline. 74 | } 75 | \section{Details on spectra data checks and matching}{ 76 | 77 | \code{gather_spc()} checks whether these conditions are met for each measurement 78 | in the list \code{data}: 79 | \enumerate{ 80 | \item Make sure that the first level \code{data} elements are named (assumed to be 81 | the file name the data originate from), and remove missing measurements with 82 | an informative message. 83 | \item Remove any duplicated file names and raise a message if there are 84 | name duplicates at first level. 85 | \item Check whether \code{spc_types} inputs are supported (see argument \code{spc_types}) 86 | and present at the second level of the \code{data} list. If not, remove 87 | all data elements for incomplete spectral measurements. 88 | \item Match spectra types and possible corresponding x-axis types from 89 | a lookup list. For each selected spectrum type (left), at least one of 90 | the element names of the x-axis type (right) needs to be present for each 91 | measurement in the list \code{data}: 92 | \itemize{ 93 | \item \code{"spc"} : \code{"wavenumbers"}, \code{"wavelengths"}, or \code{"x_values"} 94 | \item \code{"spc_nocomp"} : \code{"wavenumbers"}, \code{"wavelengths"}, or \code{"x_values"} 95 | \item \code{"sc_sm"} : \code{"wavenumbers_sc_sm"}, \code{"wavelengths_sc_sm"}, or 96 | \code{"x_values_sc_sm"} 97 | \item \code{"sc_rf"} : \code{"wavenumbers_sc_rf"}, \code{"wavelengths_sc_rf"}, or 98 | \code{"x_values_sc_rf"} 99 | } 100 | \item Check if \code{"metadata"} elements are present and remove data elements for 101 | measurements with missing or incorrectly named metadata elements 102 | (message). 103 | } 104 | } 105 | 106 | -------------------------------------------------------------------------------- /R/plot_spc.R: -------------------------------------------------------------------------------- 1 | #' @title Plot tibble spectra 2 | #' @description Plot spectra from tibble spectra objects. 3 | #' @param spc_tbl Tibble that contains the first set of spectra to plot as 4 | #' list-column 5 | #' @param spc_tbl_2 Tibble that contains the second set of spectra (optional) 6 | #' to plot as list-column. 7 | #' @param x_unit Character string describing the x axis unit. Default is 8 | #' \code{"wavenumber"}, which will produce a graph with wavenumbers on the 9 | #' x axis with reversed number. If \code{x_unit = "wavelength"}, the axis 10 | #' will be in regular order (lower wavelengths in nm on the left and higher 11 | #' on the right side of the axis). 12 | #' @param y Character string of list-column name in tibble where spectra of 13 | #' desired type are extracted to plot. 14 | #' @param by Character string of column that is used to group the spectra. 15 | #' Default is \code{"unique_id"}. If replica spectra are present in the file 16 | #' and processed spectra resulting after averaging need to be plotted, 17 | #' it is recommend to use \code{"sample_id"} as argument to group according 18 | #' the sample_id column in the tibble(s) containing the spectra (\code{spc_tbl} 19 | #' and \code{spc_tbl_2}). 20 | #' @param graph_id_1 Character string used for grouping the first spectra set 21 | #' (\code{spc_tbl}) and producing 22 | #' the label text accordingly. Default is \code{"Set 1"}. 23 | #' @param graph_id_2 Character string used for grouping the second spectra set 24 | #' (\code{spc_tbl_2}) and producing the label text accordingly. Default is 25 | #' \code{"Set 2"} 26 | #' @param graph_id_1_col Character string for the colour of the first spectra 27 | #' set. Default is \code{"black"}. 28 | #' @param graph_id_2_col Character string for the colour of the first spectra 29 | #' set. Default is \code{"red"}. 30 | #' @param xlab Character string or mathematical expression 31 | #' (use \code{expression}) for the x axis title. Default is 32 | #' \code{expression(paste("Wavenumber [", cm^-1, "]"))}. 33 | #' @param ylab Character string or mathematical expression 34 | #' (use \code{expression}) for the y axis title. Default is \code{"absorbance"}. 35 | #' @param alpha Double in between 0 and 1. Sets the transparency for the plotted 36 | #' spectra lines. 37 | #' @param legend Logical whether to plot a legend for the spectra describing 38 | #' its name selected in arguments \code{graph_id_1} and \code{graph_id_2}. 39 | #' Default is \code{TRUE}. 40 | #' @usage plot_spc(spc_tbl, spc_tbl_2 = NULL, 41 | #' x_unit = "wavenumber", 42 | #' y = "spc", by = "unique_id", 43 | #' graph_id_1 = "Set 1", graph_id_2 = "Set 2", 44 | #' graph_id_1_col = "black", graph_id_2_col = "red", 45 | #' xlab = expression(paste("Wavenumber [", cm^-1, "]")), 46 | #' ylab = "Absorbance", 47 | #' alpha = 0.2, legend = TRUE) 48 | #' @export 49 | plot_spc <- function(spc_tbl, spc_tbl_2 = NULL, 50 | x_unit = "wavenumber", 51 | y = "spc", by = "unique_id", 52 | graph_id_1 = "Set 1", graph_id_2 = "Set 2", 53 | graph_id_1_col = "black", graph_id_2_col = "red", 54 | xlab = expression(paste("Wavenumber [", cm^-1, "]")), 55 | ylab = "Absorbance", alpha = 0.2, 56 | legend = TRUE) { 57 | 58 | # Fix `R CMD check NOTE`: "no visible binding for global variable ‘...‘" 59 | graph_id <- id <- variable <- value <- NULL 60 | 61 | # (1) Gather spectra into one data.table 62 | if (!is.null(spc_tbl_2)) { 63 | if (y == "spc") { 64 | # raw spectra are not yet data.tables and extraction is done alternatively 65 | # via do.call(rbind, list) -> a little bit slower 66 | dt_1 <- data.table::data.table(do.call(rbind, spc_tbl[, y][[y]])) 67 | dt_2 <- data.table::data.table(do.call(rbind, spc_tbl_2[, y][[y]])) 68 | } else { 69 | dt_1 <- data.table::rbindlist(spc_tbl[, y][[y]]) 70 | dt_2 <- data.table::rbindlist(spc_tbl_2[, y][[y]]) 71 | } 72 | } else { 73 | if (y == "spc") { 74 | # raw spectra are not yet data.tables and extraction is done alternatively 75 | # via do.call(rbind, list) -> a little bit slower 76 | dt_1 <- data.table::data.table(do.call(rbind, spc_tbl[, y][[y]])) 77 | } else { 78 | dt_1 <- data.table::rbindlist(spc_tbl[, y][[y]]) 79 | } 80 | } 81 | 82 | # (2) Extract ID variable and append it to the data.table 83 | id_1 <- spc_tbl[, by][[by]] 84 | # Add a graph identity column to distiguish graphical layers for spectra 85 | # tibble comparisons 86 | graph_id_1 <- as.factor(rep(graph_id_1, nrow(spc_tbl))) 87 | dt_1[, graph_id := graph_id_1] 88 | dt_1[, id := id_1] 89 | 90 | # Only if spc_tbl_2 exists 91 | if (!is.null(spc_tbl_2)) { 92 | # (2) Extract ID variable and append it to the data.table 93 | id_2 <- spc_tbl_2[, by][[by]] 94 | # Add a graph identity column to distiguish graphical layers for spectra 95 | # tibble comparisons 96 | graph_id_2 <- as.factor(rep(graph_id_2, nrow(spc_tbl_2))) 97 | dt_2[, graph_id := graph_id_2] 98 | dt_2[, id := id_2] 99 | dt_list <- list( 100 | dt_1 = dt_1, 101 | dt_2 = dt_2 102 | ) 103 | dt <- data.table::rbindlist(dt_list) 104 | } else { 105 | dt <- dt_1 106 | } 107 | # (3) Convert data.table from wide to long form 108 | dt_long <- data.table::melt( 109 | dt, 110 | measure = names(dt)[!names(dt) %in% c("id", "graph_id")] 111 | ) 112 | # Convert variable column from factor to numeric 113 | dt_long[, variable := as.numeric(as.character(variable))] 114 | # (4) Plot spectra 115 | # Define nice breaks for x axis 116 | brk <- pretty(as.numeric(names(dt)[!names(dt) %in% c("id", "graph_id")]), n = 10) 117 | p <- ggplot2::ggplot(dt_long, ggplot2::aes(variable, value)) + 118 | ggplot2::labs(x = xlab, y = ylab) + 119 | ggplot2::theme_bw() + 120 | ggplot2::scale_x_reverse(breaks = brk) + 121 | # Bring graph_id_2 spectra to front 122 | # http://stackoverflow.com/questions/21120088/ggplot2-bring-one-line-to-the-front-but-save-the-colors 123 | ggplot2::geom_line(ggplot2::aes(colour = graph_id, group = id), 124 | alpha = alpha, size = 0.2 125 | ) + 126 | # scale_color_manual(values = rep("black", nrow(dt))) 127 | ggplot2::scale_color_manual(values = c(graph_id_1_col, graph_id_2_col)) 128 | 129 | if ("wavelengths_rs" %in% names(spc_tbl) && x_unit == "wavelength") { 130 | p <- p + 131 | ggplot2::scale_x_continuous(breaks = brk) + 132 | ggplot2::xlab("Wavelength [nm]") + 133 | ggplot2::ylab("Reflectance") 134 | } 135 | 136 | if (legend == FALSE) { 137 | p <- p + 138 | # Remove legend 139 | ggplot2::guides(colour = FALSE) 140 | } 141 | p 142 | } 143 | -------------------------------------------------------------------------------- /R/remove-outl-spectra.R: -------------------------------------------------------------------------------- 1 | #' @title Remove outlier spectra 2 | #' @description Remove outlier spectra based on the 3 | #' \code{pcout()} function of the \code{mvoutlier} package. 4 | #' @usage remove_outliers(list_spectra, remove = TRUE) 5 | #' @param list_spectra List that contains averaged 6 | #' spectral information 7 | #' in list element \code{MIR_mean} (data.frame) and metadata in 8 | #' \code{data_meta} (data.frame). 9 | #' @param remove logical expression (\code{TRUE} or \code{FALSE}) 10 | #' that specifies weather spectra shall be removed. 11 | #' If \code{rm = FALSE}, there will be no outlier removal 12 | #' @return Returns list \code{spectra_out} that contains: 13 | #' \itemize{ 14 | #' \item \code{MIR_mean}: Outlier removed MIR spectra as 15 | #' data.frame object. If \code{remove = FALSE}, 16 | #' the function will 17 | #' return almost identical list identical to \code{list_spectra}, 18 | #' except that the first \code{indices} column of the spectral 19 | #' data frame \code{MIR_mean} is removed 20 | #' (This is done for both options 21 | #' \code{remove = TRUE} and \code{remove = FALSE}). 22 | #' \item \code{data_meta}: metadata data.frame, identical 23 | #' as in the \code{list_spectra} input list. 24 | #' \item \code{plot_out}: (optional) ggplot2 graph 25 | #' that shows all spectra (absorbance on x-axis and wavenumber 26 | #' on y-axis) with outlier marked, if 27 | #' \code{remove = TRUE}. 28 | #' } 29 | #' @details This is an optional function if one wants to remove 30 | #' outliers. 31 | #' @export 32 | remove_outliers <- function(list_spectra, remove = TRUE) { 33 | # Outlier detection 34 | # Use the mvoutlier package and pcout function to identify 35 | # multivariate outliers 36 | wfinal01 <- ID <- NULL 37 | if (remove == TRUE) { 38 | # Remove the 'indices' column 39 | list_spectra$MIR_mean <- list_spectra$MIR_mean[, -1] 40 | out <- mvoutlier::pcout(list_spectra$MIR_mean, makeplot = T, 41 | outbound = 0.05) # parameters should be adapted 42 | # Plot outlying spectra 43 | plot_out <- plotMIR( 44 | list_spectra$MIR_mean[ 45 | order(out$wfinal01, decreasing = T), ], 46 | col = as.factor(out$wfinal01[order(out$wfinal01, 47 | decreasing = T)])) + 48 | ggplot2::scale_colour_brewer("outlier", palette = "Set1") 49 | out_id <- as.character( 50 | list_spectra$data_meta$ID[!as.logical(out$wfinal01)] 51 | ) 52 | # Remove outliers 53 | MIR_mean <- list_spectra$MIR_mean[ 54 | ! list_spectra$data_meta$ID %in% out_id, ] 55 | # rep ID and country name 56 | data_meta <- list_spectra$data_meta[ 57 | ! list_spectra$data_meta$ID %in% out_id, ] 58 | spectra_out <- list(MIR_mean = MIR_mean, 59 | data_meta = data_meta, 60 | plot_out = plot_out) 61 | } else { 62 | # Remove the 'indices' column 63 | list_spectra$MIR_mean <- list_spectra$MIR_mean[, -1] 64 | spectra_out <- list(MIR_mean = list_spectra$MIR_mean, 65 | data_meta = list_spectra$data_meta) 66 | } 67 | spectra_out 68 | } 69 | 70 | ## plotMIR function of Antoine Stevens; don't export this 71 | ## function to the NAMESPACE 72 | plotMIR <- function(spc, group = NULL, col = NULL, 73 | linetype = NULL, wr = NULL, brk = NULL, 74 | ylab = "Absorbance", xlab = "Wavenumber /cm-1", 75 | by = NULL, by.wrap = T, ...){ 76 | # Function to plot spectra, based on the ggplot2 package 77 | # spc = spectral matrix, with colnames = wavelengths 78 | # group = grouping variable, usually the id's of the sample 79 | # wr = wavelength range to plot 80 | # brk = breaks of the x-axis 81 | # by = factor variable for which the mean and sd of 82 | # each level will be computed and plotted (optional) 83 | # Requires packages ggplot2; data.table; reshape2 84 | # Workaround to pass R CMD check: 85 | # http://stackoverflow.com/questions/9439256/how-can-i-handle-r-cmd-check-no-visible-binding-for-global-variable-notes-when 86 | # Setting the variables to NULL first 87 | variable <- value <- colour <- NULL 88 | spc <- as.data.frame(spc) 89 | if (!is.null(wr)) 90 | spc <- spc[, as.numeric(colnames(spc)) >= min(wr) & 91 | as.numeric(colnames(spc)) <= max(wr)] 92 | if (is.null(brk)) 93 | brk <- pretty(as.numeric(colnames(spc)), n = 10) 94 | if (!is.null(by)) { 95 | spc$by <- by 96 | spc <- data.table::data.table(spc, check.names = F) 97 | mean.spc <- reshape2::melt( 98 | spc[, lapply(data.table::.SD, mean), by = by], 99 | id.vars = "by" 100 | ) 101 | sd.spc <- reshape2::melt( 102 | spc[, lapply(data.table::.SD, sd), by = by], 103 | id.vars = "by" 104 | ) 105 | mean.spc$min <- mean.spc$value - sd.spc$value 106 | mean.spc$max <- mean.spc$value + sd.spc$value 107 | mean.spc$variable <- as.numeric( 108 | as.character(mean.spc$variable) 109 | ) 110 | if (by.wrap) { 111 | p <- ggplot2::ggplot(data = mean.spc) + 112 | ggplot2::geom_ribbon( 113 | ggplot2::aes(x = variable, ymin = min, ymax = max), 114 | fill = "grey", col = "black", size = 0.15) + 115 | ggplot2::theme_bw() 116 | p <- p + ggplot2::geom_line( 117 | ggplot2::aes(x = variable, y = value), 118 | size = 0.25) + 119 | ggplot2::facet_wrap(~ by) + 120 | ggplot2::labs(x = xlab, y = ylab) + 121 | ggplot2::scale_x_reverse(breaks = brk) 122 | } else { 123 | p <- ggplot2::ggplot(data = mean.spc, 124 | ggplot2::aes(x = variable, y = value, group = by, col = by)) + 125 | ggplot2::geom_line(size = 0.25) + 126 | ggplot2::labs(x = xlab, y = ylab) + 127 | ggplot2::scale_x_reverse(breaks = brk) + 128 | ggplot2::theme_bw() 129 | } 130 | return(p) 131 | } else { 132 | if (is.null(group)) 133 | group <- as.character(1:nrow(spc)) 134 | spc$group <- group 135 | spc$colour <- col 136 | spc$linetype <- linetype 137 | id.var <- colnames(spc)[ 138 | grep("group|colour|linetype",colnames(spc))] 139 | tmp <- reshape2::melt(spc, id.var = id.var) 140 | tmp$variable <- as.numeric(as.character(tmp$variable)) 141 | p <- ggplot2::ggplot(tmp, 142 | ggplot2::aes(variable, value, group = group)) + 143 | ggplot2::labs(x = xlab, y = ylab) + 144 | ggplot2::theme_bw() + 145 | ggplot2::scale_x_reverse(breaks = brk) 146 | if (is.null(col) & is.null(linetype)) 147 | p <- p + ggplot2::geom_line( 148 | ggplot2::aes(colour = group)) 149 | else if (!is.null(col) & is.null(linetype)) 150 | p <- p + ggplot2::geom_line( 151 | ggplot2::aes(colour = colour)) 152 | else if (is.null(col) & !is.null(linetype)) 153 | p <- p + ggplot2::geom_line( 154 | ggplot2::aes(colour = group, 155 | linetype = linetype)) 156 | else p <- p + ggplot2::geom_line( 157 | ggplot2::aes(colour = colour, 158 | linetype = linetype)) 159 | return(p) 160 | } 161 | } 162 | 163 | -------------------------------------------------------------------------------- /R/utils-stats.R: -------------------------------------------------------------------------------- 1 | #' @title Assess multiple pairs of measured and predicted values 2 | #' @description Return performance metrics for test set predictions and 3 | #' measured values, e.g. for different model outcome variables. 4 | #' @param data Data frame with all measured (observed) and predicted variables. 5 | #' @param ... Multiple arguments with observed (measured)-predicted pairs, 6 | #' specified with \code{dplyr::vars(o = , p = )}. 7 | #' Column names can strings or symbols. The arguments in `...` need to be named. 8 | #' @param .metrics Character vector with package used for metrics calculation. 9 | #' Default is \code{"simplerspec"}, which uses 10 | #' \code{simplerspec::evaluate_model()}. 11 | #' @param .model_name String with name for the new column that specifies the 12 | #' model or the outcome variable. Default is \code{"model"}. 13 | #' 14 | #' @return Data frame with with summary statistics for measured values and 15 | #' performance metrics for the pairs of measured and predicted values. 16 | #' @importFrom purrr modify_depth imap 17 | #' @export 18 | assess_multimodels <- function(data, 19 | ..., 20 | .metrics = c("simplerspec", "yardstick"), 21 | .model_name = "model") { 22 | args <- rlang::enquos(...) 23 | args_tidy <- map(args, rlang::eval_tidy) 24 | stopifnot( 25 | all(map_int(names(args), nchar)), 26 | all(map_lgl(args_tidy, is.list)) 27 | ) 28 | 29 | vars_names <- modify_depth(args_tidy, .depth = 1, names) 30 | vars_names_ok <- map_lgl(vars_names, ~ all(c("o", "p") %in% .x)) 31 | if (!all(vars_names_ok)) { 32 | stop("Assessment variables supplied in `vars()` need to be named with 33 | 'o' (observed) and 'p' (predicted)")} 34 | 35 | metrics <- match.arg(.metrics) 36 | # pb 2018-05-09: todo: support yardstick metrics 37 | assessment <- switch (metrics, 38 | "simplerspec" = map( 39 | .x = args_tidy, 40 | ~ evaluate_model(data = data, 41 | obs = !!.x[["o"]], pred = !!.x[["p"]])) 42 | ) 43 | assessment_models <- imap(assessment, 44 | ~ tibble::add_column(.x, !!.model_name := .y, .before = 1)) 45 | 46 | dplyr::bind_rows(assessment_models) 47 | } 48 | 49 | 50 | #' @title Calculate model evaluation metrics 51 | #' @description Calculate summary statistics of observed values and model 52 | #' evaluation statistics for assessing agreement between observed (`obs`) and 53 | #' predicted (`pred`) values. 54 | #' @param data `data.frame` with predicted and observed data in columns. 55 | #' @param obs Column that contains observed values, `symbol`/`name` or 56 | #' `character` (wrapped in ""). 57 | #' @param pred Column that contains predicted values, `symbol`/`name` or 58 | #' `character` (wrapped in ""). 59 | #' @importFrom e1071 kurtosis 60 | #' @export 61 | 62 | # Note that coefficient of determination (r2) derived from a linear regression 63 | # of observed values on the prediction 64 | # solely describes what proportion of variance in the measured data is 65 | # simulated by the model. A linear regression line between observed (x) and 66 | # predicted (y) does not provide a measure of model error! 67 | 68 | # Mean squared error (MSE) can be decomposed into squared bias/deviation (SE^2) 69 | # and mean squared variation (MSV) (:= SDE^2 := "squared standard deviation of 70 | # the error"; see e.g. Kobayashi and Salam (2000) 71 | # Gauch et al. (2003) propose a more sophisticated additive partitioning of the 72 | # MSE that is more informative about the sources of error and link to the 73 | # regression parameters; namely, these are squared bias (SB), 74 | # non-unity slope (NU) and lack of correlation (LC) 75 | 76 | evaluate_model <- function(data, obs, pred) { 77 | # Implement quasiquotation to first quote the `obs` and `pred` arguments, and 78 | # unquote/evaluate in the context of the `data` data.frame; 79 | # obs` and `pred` can be # both symbols or character; 80 | # `obs = obs` or `obs = "obs"` when the column that contains observed values 81 | # is named "obs" 82 | obs <- rlang::enquo(obs) 83 | pred <- rlang::enquo(pred) 84 | obs <- dplyr::pull(data, !!obs) 85 | pred <- dplyr::pull(data, !!pred) 86 | 87 | tibble::tibble( 88 | ## Compute descriptive statistics of the observations/measurements 89 | n = length(obs), 90 | min = min(obs, ra.rm = TRUE), 91 | max = max(obs, na.rm = TRUE), 92 | mean = mean(obs, na.rm = TRUE), 93 | median = median(obs, na.rm = TRUE), 94 | sdev = sd(obs, na.rm = TRUE), 95 | cv = sd(obs, na.rm = TRUE) / mean(obs, na.rm = TRUE), 96 | skewness_b1 = e1071::skewness(obs, na.rm = TRUE, type = 3), 97 | kurtosis = e1071::kurtosis(obs, na.rm = TRUE), 98 | 99 | ## Compute model evaluation measures to address different aspects 100 | ## of how well predictions correspond to observations/measurements 101 | # Root mean squared error 102 | rmse = mean((obs - pred)^2, na.rm = TRUE)^.5, 103 | # Mean squared error; mse^2 = me^2 + msv = me^2 + sde^2 104 | mse = mean((obs - pred)^2, na.rm = TRUE), 105 | # Terms mean error (ME) and bias are equivalent 106 | me = mean(obs - pred, na.rm = TRUE), 107 | bias = mean(obs - pred, na.rm = TRUE), 108 | # Mean squared variation (of the error); difference between the simulation 109 | # and the measurement with respect to the deviation from the means 110 | msv = mean(((mean(obs, na.rm = TRUE) - obs) 111 | - (mean(pred, na.rm = TRUE) - pred))^2), 112 | # Standard deviation of the error := SDE = MSV^0.5 113 | sde = mean(((mean(obs, na.rm = TRUE) - obs) 114 | - (mean(pred, na.rm = TRUE) - pred))^2)^0.5, 115 | # Mean absolute error 116 | mae = mean(abs(obs - pred), na.rm = TRUE), 117 | r2 = cor(obs, pred, use = "pairwise.complete.obs")^2, 118 | b = lm(obs ~ pred)$coefficients[2], 119 | # Ratio of performance to deviation 120 | rpd = sd(obs, na.rm = TRUE) / mean((obs - pred)^2, na.rm = TRUE)^.5, 121 | # Ratio of performance to interquartile range 122 | rpiq = (quantile(obs, .75, na.rm = TRUE) - quantile(obs, .25, na.rm = TRUE)) 123 | / mean((obs - pred)^2, na.rm = TRUE)^.5, 124 | # See Gauch et. al., 2003) for MSD decomposition into SB, NU and LC; 125 | # Squared bias 126 | SB = (mean(obs - pred, na.rm = TRUE))^2, 127 | # Non-unity slope 128 | NU = mean((pred - mean(pred))^2) * (1 - lm(obs ~ pred)$coefficients[2])^2, 129 | # Lack of correlation 130 | LC = mean((obs - mean(obs))^2) 131 | * (1 - cor(obs, pred, use = "pairwise.complete.obs")^2), 132 | # Proportional contributions of SB, NU and LC to MSE in percent 133 | SB_prop = round((mean(obs - pred, na.rm = TRUE))^2 134 | / mean((pred - obs)^2) * 100, 0), 135 | NU_prop = round(mean((pred - mean(pred))^2) 136 | * (1 - lm(obs ~ pred)$coefficients[2])^2 / mean((pred - obs)^2) * 100, 0), 137 | LC_prop = round(mean((obs - mean(obs))^2) 138 | * (1 - cor(obs, pred, use = "pairwise.complete.obs")^2) 139 | / mean((pred - obs)^2) * 100, 0) 140 | ) 141 | } 142 | 143 | # Wrapper function to ensure compatibility with old summary_df() function 144 | # !!! note that y are predicted values and x are observed values; 145 | # this is only for backward compatibility; 146 | # This deviates from the principles that observed (O) should be denoted in the 147 | # in the y-axis vs. predicted (P) in the x-axis ((OP) regressions), according 148 | # Pineiro et al. (2008) 149 | #' @rdname evaluate_model 150 | #' @param df `data.frame` with predicted and observed data in columns. 151 | #' @param x Column that contains observed values, `symbol`/`name` or 152 | #' `character` (wrapped in ""). 153 | #' @param y Column that contains predicted values, `symbol`/`name` or 154 | #' `character` (wrapped in ""). 155 | #' @export 156 | summary_df <- function(df, x, y) { 157 | evaluate_model(data = df, obs = x, pred = y) 158 | } 159 | 160 | # Function to calculate standard error of the mean 161 | sem_ci <- function(x) { 162 | qt(0.975, df = length(na.omit(x)) - 1) * 163 | sqrt(var(x, na.rm = TRUE) / length(na.omit(x))) 164 | } 165 | 166 | # Calculate standard error 167 | se <- function(x) { 168 | sqrt(var(x, na.rm = TRUE) / length(na.omit(x))) 169 | } 170 | -------------------------------------------------------------------------------- /R/resample-spc.R: -------------------------------------------------------------------------------- 1 | #' @title Resample spectra in list-column to new x-axis interval 2 | #' @description Resamples (interpolates) different spectra types with 3 | #' corresponding x-axis values that are both stored in list-columns of a spectra 4 | #' tibble. A spectra tibble hosts spectra, x-axis vectors, metadata, and 5 | #' further linked data with standardized naming conventions. Data input for 6 | #' resampling can for example be generated with `simplerspec::gather_spc()`. 7 | #' Resampling is a key harmonizing step to process and later model spectra 8 | #' measured at different resolutions and spectral ranges (i.e., different 9 | #' spectrometer devices and/or measurement settings). 10 | #' @param spc_tbl Spectra data embedded in a tibble object (classes 11 | #' `"tbl_df", "tbl", "data.frame"`). The spectra tibble needs to contain at 12 | #' least of one of the the spectra columns `spc`, `spc_rs`, `spc_mean`, 13 | #' `spc_nocomp`, `sc_sm`, `sc_rf`, or `spc_pre` (list-columns with spectral 14 | #' `data.table`s), and `wavenumbers` or `wavelengths` (list-column with vectors 15 | #' of x-axis values corresponding to each spectrum). The help section *"Matching 16 | #' spectrum type and corresponding x-axis type"* describes the spectra types 17 | #' and corresponding x-axis types. 18 | #' @param column_in Character vector of length 1L or symbol/name 19 | #' specifying the name of list-column that contains the spectra to be resampled. 20 | #' @param x_unit Character vector of length 1L specifying the measurement unit 21 | #' of the x-axis values (list-column) of the input spectra in `spc_tbl`. 22 | #' Possible values are `"wavenumber"` (default) or `"wavelength"`. Wavenumber 23 | #' is a convenient unit of frequency in the mid-infrared spectral range, 24 | #' where wavelength is often used as spatial period for the visible and 25 | #' near-infrared range. 26 | #' @param wn_lower Numeric value of lowest wavenumber. This argument will only 27 | #' be used if `x_unit = "wavenumber"`. The value serves as starting value for 28 | #' the new wavenumber sequence that the spectra will be resampled upon. Default 29 | #' value is 500 (i.e., in reciprocal centimeters). 30 | #' @param wn_upper Numeric value of highest wavenumber. This argument will only 31 | #' be used if `x_unit = "wavenumber`. The value will be used as last value of 32 | #' the new wavenumber sequence that the spectra will be resampled upon. Default 33 | #' value is 4000 (i.e., in reciprocal centimeters). 34 | #' @param wn_interval Numeric value of the wavenumber increment for the new 35 | #' wavenumber sequence that the spectra will be resampled upon. Default value 36 | #' is 2 (i.e., in reciprocal centimeters). 37 | #' @param wl_lower Numeric value of lowest wavelength. This argument will only 38 | #' be used if `x_unit = "wavelength"`. The value serves as starting value of 39 | #' the new wavenumber sequence that the spectra will be resampled upon. 40 | #' Default value is 350 (i.e. in nanometers). 41 | #' @param wl_upper Numeric value of highest wavelength. This argument will only 42 | #' be used if `x_unit = "wavelength"`. The value will be used as last value of 43 | #' the new wavenumber sequence that the spectra will be resampled upon. Default 44 | #' value is 2500 (i.e., in nanometers). 45 | #' @param wl_interval Numeric value of the wavelength increment for the new 46 | #' wavenumber sequence that the spectra will be resampled upon. This argument 47 | #' will only be used if `x_unit = "wavelength"`. Default value is 1 (i.e., in 48 | #' nanometers). 49 | #' @param interpol_method Character of `"linear"` (default) or `"spline"` with 50 | #' the interpolation method. `"spline"` uses a cubic spline to interpolate the 51 | #' input spectra at given x-axis values to new equispaced x-axis intervals. 52 | #' @return A spectra tibble (`spc_tbl`) containing two added list-columns: 53 | #' * `spc_rs:` Resampled spectra as list of `data.table`s 54 | #' * `wavenumbers_rs` or `wavelengths_rs`: Resampled x-axis values as list of 55 | #' numeric vectors 56 | #' @section Matching spectrum type and corresponding x-axis type: 57 | #' The combinations of input spectrum types (`column_in`) and 58 | #' corresponding x-axis types are generated from a simple lookup list. The 59 | #' following key-value(s) pairs can be matched at given key, which is the column 60 | #' name from `column_in` containing the spectra. 61 | #' * `"spc"` : `"wavenumbers"` or `"wavelengths"` (raw spectra) 62 | #' * `"spc_rs"` : `"wavenumbers_rs"` or `"wavelengths_rs"`) (resampled spectra) 63 | #' * `"spc_mean"` : `"wavenumbers_rs"` or `"wavelengths_rs"` (mean spectra) 64 | #' * `"spc_nocomp"` `"wavenumbers"` or `"wavelengths"` (spectra prior 65 | #' atmospheric compensation) 66 | #' * `"sc_sm" : c("wavenumbers_sc_sm", "wavelengths_sc_sm")` (single channel 67 | #' sample spectra) 68 | #' * `"sc_rf" : c("wavenumbers_sc_rf", "wavelengths_sc_rf")` (single channel 69 | #' reference spectra) 70 | #' * `"spc_pre" : "xvalues_pre"` (preprocessed spectra) 71 | #' @export 72 | resample_spc <- function(spc_tbl, 73 | column_in = "spc", 74 | x_unit = c("wavenumber", "wavelength"), 75 | wn_lower = 500, wn_upper = 4000, wn_interval = 2, 76 | wl_lower = 350, wl_upper = 2500, wl_interval = 1, 77 | interpol_method = c("linear", "spline")) { 78 | # Capture user input as expressions (can be both of type character or symbol), 79 | # also called quoting; convert quosures to characters for later arg matching 80 | column_in <- rlang::enquo(column_in) 81 | column_in_chr <- rlang::quo_name(column_in) 82 | 83 | stopifnot( 84 | is.character(x_unit) && length(x_unit) > 0, 85 | is.numeric(wn_lower), is.numeric(wn_upper), is.numeric(wn_interval), 86 | is.numeric(wl_lower), is.numeric(wl_upper), is.numeric(wl_interval) 87 | ) 88 | 89 | # Lookup list to match spectrum types and corresponding x-axis types 90 | spc_xaxis_types <- list( 91 | "spc" = c("wavenumbers", "wavelengths"), # raw/unprocessed 92 | "spc_rs" = c("wavenumbers_rs", "wavelengths_rs"), # resampled 93 | "spc_mean" = c("wavenumbers_rs", "wavelengths_rs"), # mean 94 | "spc_nocomp" = c("wavenumbers", "wavelengths"), # no atm. compensation 95 | "sc_sm" = c("wavenumbers_sc_sm", "wavelengths_sc_sm"), # single channel sample 96 | "sc_rf" = c("wavenumbers_sc_rf", "wavelengths_sc_rf"), # single channel reference 97 | "spc_pre" = rep("xvalues_pre", 2) # preprocessed 98 | ) 99 | spctypes <- names(spc_xaxis_types) 100 | column_spc <- match.arg(column_in_chr, spctypes) 101 | 102 | x_unit <- match.arg(x_unit) 103 | switch(x_unit, 104 | wavenumber = {x_unit_int <- 1L}, 105 | wavelength = {x_unit_int <- 2L}) 106 | 107 | interpol_method <- match.arg(interpol_method) 108 | 109 | # Final selection of `x_unit` column name string from user input and lookup 110 | x_unit_sel <- spc_xaxis_types[[column_spc]][x_unit_int] 111 | 112 | # Both columns with X-values and input spectra need to be present in `spc_tbl` 113 | colnm <- colnames(spc_tbl) 114 | stopifnot(x_unit_sel %in% colnm, column_spc %in% colnm) 115 | 116 | # Extract list-column containing spectra 117 | spc_in_list <- dplyr::pull(spc_tbl, !!column_in) 118 | 119 | # Extract list-column containing x-axis values 120 | xvalues_in_list <- dplyr::pull(spc_tbl, !!x_unit_sel) 121 | 122 | # Automatically check the arrangement of the input x-Unit values; 123 | # often, it is convenient to have have a descending ordner of spectral columns 124 | # if the physical quantity of the x-axis is wavenumbers 125 | xvalue_order_chr <- purrr::map_chr(xvalues_in_list, seq_order) 126 | 127 | if (length(unique(xvalue_order_chr)) > 1L) { 128 | stop( 129 | glue::glue( 130 | "The column `{x_unit_sel}` which contains the list of X-values 131 | has both elements of ascending and descending order. 132 | * To resolve, you can split `spc_tbl` in a list of `spc_tbl`s 133 | with identical X-value vectors based on `group_by_col_hash()`, 134 | and apply `resample_spc()` separately to each list element. 135 | * Alternatively, you could fix the order of x-axis values 136 | for all input spectra and X-value vectors to all ascending or 137 | descending"), 138 | call. = FALSE) 139 | } 140 | xvalue_order <- xvalue_order_chr[1L] 141 | 142 | # Generate sequence of new x-axis values 143 | switch(x_unit_int, 144 | `1L` = { 145 | xvalues_out <- seq(from = wn_lower, to = wn_upper, by = wn_interval) 146 | x_unit_type_rs <- "wavenumbers_rs" 147 | }, 148 | `2L` = { 149 | xvalues_out <- seq(from = wl_lower, to = wl_upper, by = wl_interval) 150 | x_unit_type_rs <- "wavelengths_rs" 151 | }) 152 | 153 | if (xvalue_order == "descending") xvalues_out <- rev(xvalues_out) 154 | 155 | # Repeat sequence of new (resampled) x-axis values in list (for every obs.) 156 | xvalues_out_list <- rep(list(xvalues_out), nrow(spc_tbl)) 157 | names(xvalues_out_list) <- names(spc_in_list) 158 | 159 | # Resample all spectra extracted from list-column `column_in` using prospectr 160 | spc_rs <- lapply( 161 | seq_along(spc_in_list), 162 | function(i) { 163 | data.table::data.table( 164 | prospectr::resample( 165 | X = spc_in_list[[i]], # spectral data.table to resample 166 | wav = xvalues_in_list[[i]], # old x-values vector 167 | new.wav = xvalues_out_list[[i]], # new x-values vector 168 | interpol = interpol_method 169 | ) 170 | ) 171 | } 172 | ) 173 | names(spc_rs) <- names(spc_in_list) 174 | 175 | spc_tbl_out <- 176 | spc_tbl %>% 177 | tibble::add_column( 178 | spc_rs = spc_rs, 179 | !!x_unit_type_rs := xvalues_out_list 180 | ) 181 | return(spc_tbl_out) 182 | } 183 | 184 | # Helper 185 | seq_order <- function(x) ifelse(x[1L] < x[length(x)], "ascending", "descending") 186 | -------------------------------------------------------------------------------- /R/read-asd.R: -------------------------------------------------------------------------------- 1 | #' @title Read ASD fieldspec spectrometer data export into into simplerspec 2 | #' spectra tibble. 3 | #' @description Read tab delimited text (.txt) files exported from ASD field 4 | #' spectrometer into simplerspec spectra tibble. 5 | #' ASD Fieldspec data files are expected in .txt tab delimited file format. 6 | #' The first row should contain 7 | #' the name 'Wavelength' for the first column and the file names for the 8 | #' remaining columns. 9 | #' @param file Tab delmited file from ASD software export where the first 10 | #' column called \code{Wavelength} contais wavelengths in nanometer and the 11 | #' remaining columns are sample spectra referred by an ID name provided in the 12 | #' first row of these columns. 13 | #' @return Spectra data in tibble data frame (class `tbl_df`) that contains 14 | #' columns \code{sample_id} (derived from 2nd and following column names of 15 | #' tab delimited ASD exported text file), 16 | #' \code{spc} (list-column of spectral matrices) 17 | #' and \code{wavelengths} (list-column containing wavelength vectors). 18 | #' @importFrom tidyselect one_of 19 | #' @importFrom data.table fread 20 | #' @export 21 | read_asd <- function(file) { 22 | 23 | # Read fixed with file into a tibble 24 | asd_tbl <- tibble::as_tibble(data.table::fread(file = file)) 25 | # Transpose tibble and add Wavelengths as column names 26 | asd_tbl_t <- tibble::as_tibble( 27 | t(dplyr::select(asd_tbl, - tidyselect::one_of("Wavelength"))) 28 | ) 29 | colnames(asd_tbl_t) <- asd_tbl[["Wavelength"]] 30 | 31 | # Split matrix by each row into list of matrices 32 | asd_m <- as.matrix(asd_tbl_t) 33 | asd_listofv <- split(asd_m, row(asd_m)) # List of numerical vectors 34 | # Convert list of vectors into list of matrices 35 | asd_listofm <- lapply(seq_along(asd_listofv), 36 | function(i) matrix(asd_listofv[[i]], nrow = 1, byrow = FALSE)) 37 | # Assign file names as names for list of matrices 38 | names(asd_listofm) <- colnames(asd_tbl)[-1] # Remove "Wavelength" 39 | 40 | # Assign columnes for all matrices in list 41 | asd_listofm <- lapply(asd_listofm, 42 | function(x) {colnames(x) <- asd_tbl[["Wavelength"]]; x}) 43 | 44 | # Create list of wavelengths and assign sample names 45 | wavelengths_list <- rep(list(asd_tbl[["Wavelength"]]), length(asd_listofm)) 46 | names(wavelengths_list) <- names(asd_listofm) 47 | 48 | # Return spectra as tibble 49 | tibble::tibble( 50 | sample_id = names(asd_listofm), 51 | spc = asd_listofm, 52 | wavelengths = wavelengths_list 53 | ) 54 | 55 | } 56 | 57 | ## Simplespec spectra tibble version of ASD reader based on prospectr::readASD 58 | ## Reads binary ASD data and converts data into list-columns containing spectral 59 | ## data that can be further processed within the simplerspec spectra processing 60 | ## framework =================================================================== 61 | 62 | #' @title Read ASD binary files and gather spectra and metadata in tibble data 63 | #' frame. 64 | #' @description Read multiple ASD binary files and gather spectra and metadata 65 | #' into a simplerspec spectral tibble (data frame). The resulting spectral 66 | #' tibble is compatible with the simplerspec spectra processing and modeling 67 | #' framework. 68 | #' @param fnames Character vector containing full paths of ASD binary files 69 | #' to be read 70 | #' @return A spectral tibble (data frame) containing the follwing columns: 71 | #' \item{unique_id}{Character vector. Unique identifier containing file name 72 | #' pasted with date and time.} 73 | #' \item{file_id}{Character vector containing file names and exension} 74 | #' \item{sample_id}{Character vector containing files names without extension} 75 | #' \item{metadata}{List-column. List of data frames containing spectral 76 | #' metadata} 77 | #' \item{wavelengths}{List-column. List of wavelengths vectors (numeric).} 78 | #' \item{spc_radiance}{List-column. List of data.tables containing 79 | #' radiance sample spectra.} 80 | #' \item{spc_reference}{List-column. List of data.tables containing 81 | #' reference reflectance spectra.} 82 | #' \item{spc}{List-column. List of data.tables containing final reflectance 83 | #' spectra.} 84 | #' @export 85 | read_asd_bin <- function(fnames) { 86 | 87 | data <- prospectr::readASD(fnames = fnames, 88 | in_format = "binary", out_format = "list") 89 | gps <- purrr::map(data, c("header", "GPS")) 90 | header <- purrr::map(purrr::map(data, "header"), 91 | function(x) x[- which(names(x) == "GPS")]) 92 | file_id <- purrr::map_chr(data, "name") 93 | sample_id <- sub("(.+)\\.[[:alpha:]]+$", "\\1", file_id) # remove ".asd" 94 | datetime <- purrr::map(data, "datetime") 95 | unique_id <- mapply(function(x, y) paste0(x, "_", y), sample_id, datetime) 96 | metadata <- purrr::map(header, tibble::as_tibble) 97 | # Add GPS to metadata 98 | metadata <- purrr::map2(metadata, gps, dplyr::bind_cols) 99 | spc_l <- purrr::transpose( 100 | purrr::map(data, `[`, c("radiance", "reference", "reflectance"))) 101 | wl_l <- purrr::transpose(purrr::map(data, `[`, "wavelength")) 102 | spc_dt <- purrr::modify_depth(spc_l, 2, 103 | function(x) data.table::data.table(t(x))) 104 | # Change column names of spectral data tables of all spectrum types 105 | # by reference, use character converted wavenlengths 106 | purrr::map(.x = spc_dt, ~ map2(.x = .x, .y = wl_l[["wavelength"]], 107 | ~ data.table::setnames(.x, names(.x), as.character(.y))) 108 | ) 109 | 110 | tibble::tibble( 111 | unique_id = unique_id, 112 | file_id = file_id, 113 | sample_id = sample_id, 114 | metadata = metadata, 115 | wavelengths = wl_l[["wavelength"]], 116 | spc_radiance = spc_dt[["radiance"]], 117 | spc_reference = spc_dt[["reference"]], 118 | spc = spc_dt[["reflectance"]] 119 | ) 120 | } 121 | 122 | # Helper function to remove the ".asd.xxx" (.xxx for example ".ref" or "") 123 | # extension in id column (e.g. sample_id) strings in tibble with metadata or 124 | # reference analysis data ------------------------------------------------------ 125 | 126 | #' @importFrom stringr str_replace 127 | #' @importFrom dplyr pull 128 | remove_id_extension <- function(data, 129 | id_col = "sample_id", 130 | id_new_nm = "sample_id", 131 | extension = "\\.asd.*$") { 132 | id_col <- rlang::enquo(id_col) 133 | id_col_chr <- rlang::quo_name(id_col) 134 | id_col_rm <- rlang::expr(-!!rlang::sym(id_col_chr)) 135 | id_new_nm <- rlang::quo_name(rlang::enquo(id_new_nm)) 136 | 137 | id_new <- gsub(pattern = extension, replacement = "", 138 | x = dplyr::pull(data, !!id_col)) 139 | 140 | # Remove old id column and bind new id column to the remaining columns 141 | rest <- dplyr::select(data, !!id_col_rm) 142 | dplyr::bind_cols(!!id_new_nm := id_new, rest) 143 | } 144 | 145 | 146 | # Helper to orrect the sensor offset for ASD spectra; 147 | # shift between VIS and VNIR1, and VNIR1 and VNIR2 ranges; 148 | # based on subtracting gaps at `Join1Wavelength` and `Join2Wavelength` column 149 | # positions in `metadata` list-column data frames ------------------------------ 150 | 151 | correct_join_offset <- function(spc_tbl, 152 | lcol_spc = spc, 153 | lcol_xvalues = wavelengths, 154 | lcol_metadata = metadata) { 155 | swir2_offset <- swir1_offset <- wavelengths <- NULL 156 | lcol_spc <- rlang::enquo(lcol_spc) 157 | lcol_spc_chr <- rlang::quo_name(lcol_spc) 158 | lcol_spc_rm <- rlang::expr(-!!rlang::sym(lcol_spc_chr)) 159 | lcol_xvalues <- rlang::enquo(lcol_xvalues) 160 | lcol_xvalues_chr <- rlang::quo_name(lcol_xvalues) 161 | lcol_metadata <- rlang::enquo(lcol_metadata) 162 | 163 | spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!lcol_spc)) 164 | xvalues <- dplyr::pull(spc_tbl, !!lcol_xvalues) 165 | if (!all(sapply(xvalues, identical, xvalues[[1]]))) { 166 | stop(paste0("Error: Spectral tibble (`spc_tbl`) contains observations", 167 | " with unequal x unit values (`lcol_xvalues`).")) 168 | } 169 | metadata <- dplyr::pull(spc_tbl, !!lcol_metadata) 170 | 171 | join1_wavelength <- map(metadata, c("Join1Wavelength")) 172 | join2_wavelength <- map(metadata, c("Join2Wavelength")) 173 | 174 | join1_idx <- unique( 175 | purrr::map2_int(.x = xvalues, .y = join1_wavelength, 176 | ~ which.min(abs(.x - .y))) 177 | ) 178 | join2_idx <- unique( 179 | purrr::map2_int(.x = xvalues, .y = join2_wavelength, 180 | ~ which.min(abs(.x - .y))) 181 | ) 182 | xvalues_max_idx <- unique(map_int(xvalues, which.max)) 183 | 184 | join1_col1 <- names(spc)[join1_idx] 185 | join1_col2 <- names(spc)[join1_idx + 1] 186 | join2_col1 <- names(spc)[join2_idx] 187 | join2_col2 <- names(spc)[join2_idx + 1] 188 | swir1_cols <- names(spc)[(join1_idx + 1):join2_idx] 189 | swir2_cols <- names(spc)[(join2_idx + 1):xvalues_max_idx] 190 | 191 | # Calculate the swir1 and swir2 offsets to shift spectral ranges 192 | # https://stackoverflow.com/questions/19276194/data-table-assignment-expressions-with-dynamic-inputs-existing-columns-an 193 | spc[, `:=` ( 194 | swir1_offset = .SD[[join1_col2]] - .SD[[join1_col1]], 195 | swir2_offset = .SD[[join2_col2]] - .SD[[join2_col1]]), 196 | .SDcols = c(join1_col2, join1_col1, join2_col2, join2_col1)] 197 | 198 | # Substract offset(s) for SWIR1 and SWIR2, remove offset columns 199 | spc[, c(swir1_cols) := lapply(.SD, 200 | function(x) x - swir1_offset), .SDcols = swir1_cols] 201 | spc[, c(swir2_cols) := lapply(.SD, 202 | function(x) x - swir1_offset - swir2_offset), .SDcols = swir2_cols] 203 | spc[, `:=` (swir1_offset = NULL, swir2_offset = NULL)] 204 | 205 | # Remove old spectra list-column (`lcol_spc`) and 206 | # add new sensor join offset corrected spectra as list-column 207 | rest <- dplyr::select(spc_tbl, !!lcol_spc_rm) 208 | tibble::add_column(rest, 209 | # Convert `spc` single data.table back to list of data.tables 210 | # much faster than: # data.table:::split.data.table(spc, seq(nrow(spc)) 211 | !!lcol_spc_chr := map(purrr::transpose(spc), data.table::as.data.table), 212 | .after = eval(substitute(lcol_xvalues_chr))) 213 | } 214 | 215 | -------------------------------------------------------------------------------- /R/preprocess-spc.R: -------------------------------------------------------------------------------- 1 | #' @title Preprocess spectra 2 | #' @description Preprocesses spectra in tibble column by sample_id after 3 | #' averaging spectra by \code{simplerspec::average_spc()}. 4 | #' @param spc_tbl Tibble that contains spectra to be preprocessed within 5 | #' a list-column. 6 | #' @param select Character vector of predefined preprocessing options to be 7 | #' applied to the spectra list-column specified in \code{column_in}. 8 | #' Common prefined values are stated as abbreviated preprocessing methods and 9 | #' options such as \code{"sg_1_w21"}, where \code{"sg"} stands for 10 | #' Savitzky-Golay and \code{1} for first derivative and \code{"w21"} 11 | #' for a window size of 21 points. 12 | #' @param column_in Character vector of single list-column in \code{spc_tbl} that 13 | #' contain list of spectra (1 row matrix) to be processed by function supplied 14 | #' in \code{select}. 15 | #' @param custom_function A character string of a custom processing function 16 | #' that is later parsed (produces expression in a list) and evaluated within 17 | #' the function \code{preprocess_spc}. 18 | #' The character vector argument of \code{custom_function} 19 | #' needs to contain \code{"spc_raw"}, which is the single data table of spectra 20 | #' that results from binding a list of data.tables (spectra to preprocess) 21 | #' from the spectra list-column specified in \code{column_in}. 22 | #' An example for a value is 23 | #' \code{"prospectr::savitzkyGolay(X = spc_raw, m = 0, p = 3, w = 9)"}. 24 | #' Optional argument. Default is \code{NULL}. 25 | #' @export 26 | preprocess_spc <- function(spc_tbl, select, column_in = "spc_mean", 27 | custom_function = NULL) { 28 | 29 | # Convert list of spectral data.tables to one data.table 30 | spc_raw <- data.table::rbindlist(spc_tbl[column_in][[column_in]]) 31 | 32 | ## Perform preprocessing ===================================================== 33 | 34 | # Use custom function when supplying option ---------------------------------- 35 | if (!is.null(custom_function) & select == "custom") { 36 | # Create full character string for parsing 37 | custom_fct <- paste0("custom <- ", custom_function) 38 | # parse converts the character string into an expression 39 | # eval evaluates the expression; as a result, custom object is computed 40 | # and saved within the current workspace 41 | eval(parse(text = custom_fct)) 42 | ## x <- spc_raw 43 | ## custom <- eval(substitute(custom_function), envir = parent.frame()) 44 | # -> Error in is.data.frame(X) : object 'x' not found 45 | } 46 | # -> returns error: 47 | # custom_function = prospectr::savitzkyGolay(X = x, m = 0, p = 3, w = 9) 48 | # Error in is.data.frame(X) : object 'x' not found 49 | # -> Maybe solution: http://stackoverflow.com/questions/30563745/non-standard-evaluation-from-another-function-in-r 50 | 51 | # Savitzky-Golay preprocessing 52 | # use different derivatives and window sizes --------------------------------- 53 | 54 | # Zero order Savitzky-Golay (no derivative) -> only smoothing 55 | if (select == "sg_0_w9") { 56 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw, 57 | m = 0, p = 3, w = 9)} 58 | # First derivative Savitzky-Golay 59 | if (select == "sg_1_w5") { 60 | sg_1_w5 <- prospectr::savitzkyGolay(X = spc_raw, 61 | m = 1, p = 3, w = 5)} 62 | if (select == "sg_1_w9") { 63 | sg_1_w9 <- prospectr::savitzkyGolay(X = spc_raw, 64 | m = 1, p = 3, w = 9)} 65 | if (select == "sg_1_w11") { 66 | sg_1_w11 <- prospectr::savitzkyGolay(X = spc_raw, 67 | m = 1, p = 3, w = 11)} 68 | if (select == "sg_1_w13") { 69 | sg_1_w13 <- prospectr::savitzkyGolay(X = spc_raw, 70 | m = 1, p = 3, w = 13)} 71 | if (select == "sg_1_p2_w13") { 72 | sg_1_p2_w13 <- prospectr::savitzkyGolay(X = spc_raw, 73 | m = 1, p = 2, w = 13)} 74 | if (select == "sg_1_w15") { 75 | sg_1_w15 <- prospectr::savitzkyGolay(X = spc_raw, 76 | m = 1, p = 3, w = 15)} 77 | if (select == "sg_1_w17") { 78 | sg_1_w17 <- prospectr::savitzkyGolay(X = spc_raw, 79 | m = 1, p = 3, w = 17)} 80 | if (select == "sg_1_w19") { 81 | sg_1_w19 <- prospectr::savitzkyGolay(X = spc_raw, 82 | m = 1, p = 3, w = 19)} 83 | # Implement window size of 21, corresponds to ICRAF standard; 84 | # see e.g. Terhoeven-Urselmans et al. (2010) 85 | if (select == "sg_1_w21") { 86 | sg_1_w21 <- prospectr::savitzkyGolay(X = spc_raw, 87 | m = 1, p = 3, w = 21)} 88 | if (select == "sg_1_w23") { 89 | sg_1_w23 <- prospectr::savitzkyGolay(X = spc_raw, 90 | m = 1, p = 3, w = 23)} 91 | if (select == "sg_1_w25") { 92 | sg_1_w25 <- prospectr::savitzkyGolay(X = spc_raw, 93 | m = 1, p = 3, w = 25)} 94 | if (select == "sg_1_w27") { 95 | sg_1_w27 <- prospectr::savitzkyGolay(X = spc_raw, 96 | m = 1, p = 3, w = 27)} 97 | if (select == "sg_1_w35") { 98 | sg_1_w35 <- prospectr::savitzkyGolay(X = spc_raw, 99 | m = 1, p = 3, w = 35)} 100 | if (select == "sg_1_w41") { 101 | sg_1_w41 <- prospectr::savitzkyGolay(X = spc_raw, 102 | m = 1, p = 3, w = 41)} 103 | if (select == "sg_1_w51") { 104 | sg_1_w51 <- prospectr::savitzkyGolay(X = spc_raw, 105 | m = 1, p = 3, w = 51)} 106 | # Second derivative Savitzky-Golay 107 | if (select == "sg_2_w11") { 108 | sg_2_w11 <- prospectr::savitzkyGolay(X = spc_raw, 109 | m = 2, p = 3, w = 11)} 110 | if (select == "sg_2_w21") { 111 | sg_2_w21 <- prospectr::savitzkyGolay(X = spc_raw, 112 | m = 2, p = 3, w = 21)} 113 | # Savitzky-Golay (order 0) smoothing and derivative with a window size of 114 | # 21 points 115 | if (select == "sg_0_1_w21") { 116 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw, 117 | m = 0, p = 3, w = 9) 118 | sg_0_1_w21 <- prospectr::savitzkyGolay(X = sg_0_w9, 119 | m = 1, p = 3, w = 21)} 120 | # Savitzky-Golay second derivative 121 | if (select == "sg_2_w5") { 122 | sg_2_w5 <- prospectr::savitzkyGolay(X = spc_raw, 123 | m = 2, p = 3, w = 5)} 124 | if (select == "sg_2_w11") { 125 | sg_2_w11 <- prospectr::savitzkyGolay(X = spc_raw, 126 | m = 2, p = 3, w = 11)} 127 | 128 | # Standard normal variate (SNV) ---------------------------------------------- 129 | 130 | # Calculate standard normal variate (SNV) after Savitzky-Golay smoothing 131 | if (select == "sg_0_snv") { 132 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw, 133 | m = 0, p = 3, w = 9) 134 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)} 135 | if (select == "sg_1_snv") { 136 | sg_1_snv <- prospectr::standardNormalVariate(sg_1_w5)} 137 | if (select == "sg_1_p2_w13_snv") { 138 | sg_1_p2_w13 <- prospectr::savitzkyGolay(X = spc_raw, 139 | m = 1, p = 2, w = 13) 140 | sg_1_p2_w13_snv <- prospectr::standardNormalVariate(sg_1_p2_w13)} 141 | # Standard normal variate (SNV) and first gap-segment derivative 142 | if (select == "snv_gsd_m1_w11_s1") { 143 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw, 144 | m = 0, p = 3, w = 9) 145 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9) 146 | snv_gsd_m1_w11_s1 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 11, s = 1)} 147 | if (select == "snv_gsd_m1_w21_s5") { 148 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw, 149 | m = 0, p = 3, w = 9) 150 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9) 151 | snv_gsd_m1_w21_s5 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 21, s = 5)} 152 | if (select == "snv_gsd_m1_w31_s1") { 153 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw, 154 | m = 0, p = 3, w = 9) 155 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9) 156 | snv_gsd_m1_w31_s1 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 31, s = 5)} 157 | if (select == "snv_gsd_m1_w31_s5") { 158 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw, 159 | m = 0, p = 3, w = 9) 160 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9) 161 | snv_gsd_m1_w31_s5 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 31, s = 5)} 162 | # Standard normal variate (SNV) and second gap-segment derivative 163 | if (select == "snv_gsd_m2_w5_s1") { 164 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw, 165 | m = 0, p = 3, w = 9) 166 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9) 167 | snv_gsd_m2_w5_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 5, s = 1)} 168 | if (select == "snv_gsd_m2_w21_s1") { 169 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw, 170 | m = 0, p = 3, w = 9) 171 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9) 172 | snv_gsd_m2_w21_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 21, s = 1)} 173 | if (select == "snv_gsd_m2_w31_s1") { 174 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw, 175 | m = 0, p = 3, w = 9) 176 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9) 177 | snv_gsd_m2_w31_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 31, s = 5)} 178 | if (select == "snv_gsd_m2_w31_s5") { 179 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw, 180 | m = 0, p = 3, w = 9) 181 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9) 182 | snv_gsd_m2_w31_s5 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 31, s = 1)} 183 | if (select == "snv_gsd_m2_w51_s1") { 184 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw, 185 | m = 0, p = 3, w = 9) 186 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9) 187 | snv_gsd_m2_w51_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 51, s = 1)} 188 | if (select == "snv_gsd_m2_w51_s5") { 189 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw, 190 | m = 0, p = 3, w = 9) 191 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9) 192 | snv_gsd_m2_w51_s5 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 51, s = 5)} 193 | # 1rst Gap-segement derivative 194 | if (select == "gsd_m1_w5_s4") { 195 | gsd_m1_w5_s4 <- prospectr::gapDer(X = spc_raw, m = 1, w = 5, s = 4)} 196 | if (select == "gsd_m1_w11_s5") { 197 | gsd_m1_w11_s5 <- prospectr::gapDer(X = spc_raw, m = 1, w = 11, s = 5)} 198 | if (select == "gsd_m1_w11_s21") { 199 | gsd_m1_w11_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 11, s = 21)} 200 | if (select == "gsd_m1_w21_s1") { 201 | gsd_m1_w21_s1 <- prospectr::gapDer(X = spc_raw, m = 1, w = 21, s = 1)} 202 | if (select == "gsd_m1_w21_s21") { 203 | gsd_m1_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 21, s = 21)} 204 | if (select == "gsd_m1_w35_s21") { 205 | gsd_m1_w35_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 35, s = 21)} 206 | if (select == "gsd_m1_w5_s21") { 207 | gsd_m1_w5_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 5, s = 21)} 208 | # 2nd Gap-segment derivative 209 | if (select == "gsd_m2_w21_s21") { 210 | gsd_m2_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 2, w = 21, s = 21)} 211 | # 4th Gap-segment derivative 212 | if (select == "gsd_m4_w21_s21") { 213 | gsd_m4_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 4, w = 21, s = 21)} 214 | 215 | # Savitzky-Golay combined with multiple scatter correction (MSC -------------- 216 | # Savitzky-Golay with 3rd order polynomial, a window size of 21 217 | # and first derivative + MSC 218 | if (select == "sg_1_w21_msc") { 219 | sg_1_w21 <- prospectr::savitzkyGolay(X = spc_raw, 220 | m = 1, p = 3, w = 21) 221 | # Use msc function from the pls package; use column means of X as reference 222 | # spectrum 223 | sg_1_w21_msc <- pls::msc(X = sg_1_w21, reference = NULL) 224 | } 225 | # Savitzky-Golay combined with multiple scatter correction (MSC -------------- 226 | # Savitzky-Golay with 4th order polynomial, a window size of 21 227 | # and second derivative + MSC 228 | if (select == "sg_2_w21_msc") { 229 | sg_2_w21 <- prospectr::savitzkyGolay(X = spc_raw, 230 | m = 2, p = 4, w = 21) 231 | # Use msc function from the pls package; use column means of X as reference 232 | # spectrum 233 | sg_2_w21_msc <- pls::msc(X = sg_2_w21, reference = NULL) 234 | } 235 | 236 | # Continuum-removal ---------------------------------------------------------- 237 | if (select == "cr") { 238 | cr <- prospectr::continuumRemoval(X = spc_raw, 239 | wav = as.numeric(colnames(spc_raw)), type = "A")} 240 | if (select == "cr_refl") { 241 | cr_refl <- prospectr::continuumRemoval(X = spc_raw, 242 | wav = as.numeric(colnames(spc_raw)), type = "R")} 243 | 244 | # Select final preprocessing based on selection argument and 245 | # save matrix in data.table 246 | pre <- select 247 | spc_pre <- data.table::as.data.table(get(pre)) 248 | 249 | # Convert preprocessed spectra in data.table to list of data.table spectra 250 | # https://github.com/jennybc/row-oriented-workflows/blob/master/iterate-over-rows.md 251 | spc_pre_list <- map(purrr::transpose(spc_pre), data.table::as.data.table) 252 | # Convert x-values of preprocessed spectra in list of vectors 253 | # prospectr only hands over new xunits in matrix colnames of type character 254 | xvalues_pre_list <- lapply(spc_pre_list, 255 | function(x) as.numeric(colnames(x))) 256 | 257 | # Add list of preprocessed spectra and correspoding wavenumbers to tibble 258 | spc_tbl_out <- tibble::add_column(spc_tbl, 259 | spc_pre = spc_pre_list, xvalues_pre = xvalues_pre_list) 260 | return(spc_tbl_out) 261 | } 262 | -------------------------------------------------------------------------------- /R/pls-vip.R: -------------------------------------------------------------------------------- 1 | ### VIP.R: Implementation of VIP (variable importance in projection)(*) for the 2 | ### `pls' package. 3 | ### $Id: VIP.R,v 1.2 2007/07/30 09:17:36 bhm Exp $ 4 | 5 | ### Copyright: 2006,2007 Bjoern-Helge Mevik 6 | ### This program is free software; you can redistribute it and/or modify 7 | ### it under the terms of the GNU General Public License version 2 as 8 | ### published by the Free Software Foundation. 9 | ### 10 | ### This program is distributed in the hope that it will be useful, 11 | ### but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | ### GNU General Public License for more details. 14 | 15 | ### A copy of the GPL text is available here: 16 | ### http://www.gnu.org/licenses/gpl-2.0.txt 17 | 18 | ### Contact info: 19 | ### Boejrn-Helge Mevik 20 | ### bhx6@mevik.net 21 | ### Roedtvetvien 20 22 | ### N-0955 Oslo 23 | ### Norway 24 | 25 | ### (*) As described in Chong, Il-Gyo & Jun, Chi-Hyuck, 2005, Performance of 26 | ### some variable selection methods when multicollinearity is present, 27 | ### Chemometrics and Intelligent Laboratory Systems 78, 103--112. 28 | 29 | ## VIP returns all VIP values for all variables and all number of components, 30 | ## as a ncomp x nvars matrix. 31 | VIP <- function(object) { 32 | # pb: added to avoid `R CMD check` note 33 | method <- Yloadings <- scores <- loading.weights <- NULL 34 | if (object$method != "oscorespls") 35 | stop("Only implemented for orthogonal scores algorithm. Refit with 'method = \"oscorespls\"'") 36 | if (nrow(object$Yloadings) > 1) 37 | stop("Only implemented for single-response models") 38 | 39 | SS <- c(object$Yloadings)^2 * colSums(object$scores^2) 40 | Wnorm2 <- colSums(object$loading.weights^2) 41 | SSW <- sweep(object$loading.weights^2, 2, SS / Wnorm2, "*") 42 | sqrt(nrow(SSW) * apply(SSW, 1, cumsum) / cumsum(SS)) 43 | } 44 | 45 | 46 | ## VIPjh returns the VIP of variable j with h components 47 | VIPjh <- function(object, j, h) { 48 | # pb: added to avoid `R CMD check` note 49 | method <- Yloadings <- scores <- loading.weights <- NULL 50 | if (object$method != "oscorespls") 51 | stop("Only implemented for orthogonal scores algorithm. Refit with 'method = \"oscorespls\"'") 52 | if (nrow(object$Yloadings) > 1) 53 | stop("Only implemented for single-response models") 54 | 55 | b <- c(object$Yloadings)[1:h] 56 | T <- object$scores[,1:h, drop = FALSE] 57 | SS <- b^2 * colSums(T^2) 58 | W <- object$loading.weights[,1:h, drop = FALSE] 59 | Wnorm2 <- colSums(W^2) 60 | sqrt(nrow(W) * sum(SS * W[j,]^2 / Wnorm2) / sum(SS)) 61 | } 62 | 63 | #' @title Extract VIPs (variable importance in the projection) for a PLS 64 | #' regression model output returned from model fitting with 65 | #' \code{simplerspec::fit_pls()} 66 | #' @description VIPs are extracted based on the \code{finalModel} sublist 67 | #' in the \code{caret::train} output contained in the \code{model} element 68 | #' of the \code{simplerspec::fit_pls()} model output list. The VIPs for 69 | #' derived number of PLS components in the \code{finalModel} are computed. 70 | #' @param mout Model output list returned from \code{simplerspec::fit_pls()}. 71 | #' @usage extract_pls_vip(mout) 72 | #' @return A tibble data frame with columns \code{wavenumber} and correponding 73 | #' VIP values in the column \code{vip} for the finally chosen PLS regression 74 | #' model at the final number of PLS components. 75 | #' @export 76 | extract_pls_vip <- function(mout) { 77 | # Compute VIP for all wavenumbers and select only VIPs with ncomp in final 78 | # model 79 | final_model <- mout$model$finalModel 80 | vip <- VIP(object = final_model)[final_model$ncomp, ] 81 | # Collect wavenumbers from preprocessed spectra 82 | wn <- as.numeric(colnames(mout$data$calibration$spc_pre[[1]])) 83 | # Create a data frame with wavenumbers and VIP scores 84 | tibble::tibble(wavenumber = wn, vip = vip) 85 | } 86 | 87 | #' @title Create a data frame containing start and end positions (wavenumbers) 88 | #' where variable importance in projection (VIP) is > 1 89 | #' @description Given a data frame with VIP outputs (wavenumber and vip 90 | #' columns), start and end values denoting spectral regions where VIP > 1 91 | #' are returned as data frame. The functions can be used as helper 92 | #' function for plotting VIP. 93 | #' @param df_vip Data frame containing \code{wavenumber} and \code{vip} columns 94 | #' (numeric) 95 | #' @return Data.frame containing vectors \code{start} (numeric; wavenumber), 96 | #' \code{end} (numeric; wavenumber) and group (integer; values are 97 | #' \code{1:length(start))}. 98 | #' @export 99 | create_vip_rects <- function(df_vip) { 100 | 101 | # Avoid `R CMD check NOTE`: `no visible binding for global variable` 102 | VIP <- wavenumber <- tail <- NULL 103 | 104 | # Highlight region data 105 | # https://stackoverflow.com/questions/32543176/highlight-areas-within-certain-x-range-in-ggplot2 106 | v <- rep(0, nrow(df_vip)) 107 | v[df_vip$vip > 1] <- 1 108 | # Get the start and end points for highlighted regions 109 | inds <- diff(c(0, v)) 110 | start <- df_vip$wavenumber[inds == 1] 111 | end <- df_vip$wavenumber[inds == -1] 112 | if (length(start) > length(end)) { 113 | end <- c(end, tail(df_vip$wavenumber, 1)) 114 | } 115 | # Create data frame for rectangle layer (geom_rects) 116 | data.frame(start = start, end = end, group = seq_along(start)) 117 | } 118 | 119 | #' @title Plot stacked ggplot2 graphs with the Variable Importance for the 120 | #' Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id, 121 | #'and the preprocessed spectra. 122 | #' @description Plot stacked ggplot2 graphs of VIP for the final 123 | #' PLS regression model output of the calibration (training) data set for the 124 | #' final number of components, raw (replicate mean) spectra, and preprocessed 125 | #' spectra. Regions with VIP > 1 are highlighted across the stacked graphs 126 | #' in beige colour rectangles. VIP calculation is implemented as described in 127 | #' Chong, I.-G., and Jun, C.-H. (2005). Performance of some variable selection 128 | #' methods when multicollinearity is present. Chemometrics and Intelligent 129 | #' Laboratory Systems, 78(1--2), 103--112. https://doi.org/10.1016/j.chemolab.2004.12.011 130 | #' @param mout Model output list that is returned from 131 | #' \code{simplerspec::fit_pls()}. This object contains a nested list with 132 | #' the \code{caret::train()} object (class \code{train}), based on which 133 | #' VIPs at finally selected number of PLS components are computed. 134 | #' @param y1 Character vector of list-column name in 135 | #' \code{mout$data$calibration}, where spectra for bottom graph are extracted. 136 | #' Default is \code{"spc_mean"}, which plots the mean calibration spectra after 137 | #' resampling. 138 | #' @param y2 Character string of list-column name in 139 | #' \code{mout$data$calibration}, where spectra for bottom graph are extracted. 140 | #' Default is \code{"spc_pre"}, which plots the preprocessed calibration 141 | #' spectra after resampling. 142 | #' @param by Character string that is used to assign spectra to the same group 143 | #' and therefore ensures that all spectra are plotted with the same colour. 144 | #' Default is \code{"sample_id"} 145 | #' @param xlab Character string of X axis title for shared x axis of stacked 146 | #' graphs. Default is \code{expression(paste("Wavenumber [", cm^-1, "]"))} 147 | #' @param ylab1 Y axis title of bottom spectrum. Default is \code{"Absorbance"}. 148 | #' @param ylab2 Y axis title of bottom spectrum. Default is 149 | #' \code{"Preprocessed Abs."}. 150 | #' @param alpha Double between 0 and 1 that defines transparency of spectra 151 | #' lines in returned graph (ggplot plot object). 152 | #' @usage plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre", 153 | #' by = "sample_id", 154 | #' xlab = expression(paste("Wavenumber [", cm^-1, "]")), 155 | #' ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.", 156 | #' alpha = 0.2) 157 | #' @export 158 | plot_pls_vip <- function(mout, y1 = "spc_mean", y2 = "spc_pre", 159 | by = "sample_id", 160 | xlab = expression(paste("Wavenumber [", cm^-1, "]")), 161 | ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.", 162 | alpha = 0.2) { 163 | 164 | # Avoid `R CMD check` NOTE: no visible binding for global variable `...` 165 | variable <- wavenumber <- value <- group <- vip <- NULL 166 | 167 | # Extract spectra tibble for calibration 168 | spc_tbl <- mout$data$calibration 169 | # Gather spectra in one data.table 170 | 171 | ifelse(y1 == "spc", 172 | {dt1 <- data.table::data.table(do.call(rbind, spc_tbl[, y1][[y1]]))}, 173 | {dt1 <- data.table::rbindlist(spc_tbl[, y1][[y1]])} 174 | ) 175 | 176 | ifelse(y2 == "spc", 177 | {dt2 <- data.table::data.table(do.call(rbind, spc_tbl[, y2][[y2]]))}, 178 | {dt2 <- data.table::rbindlist(spc_tbl[, y2][[y2]])} 179 | ) 180 | 181 | # Extract ID variable and append it to the data.table 182 | id <- spc_tbl[, by][[by]] 183 | dt1[, id := id] 184 | dt2[, id := id] 185 | 186 | # Convert data table to long form for ggplot2 plotting 187 | dt1_long <- data.table::melt( 188 | dt1, measure = names(dt1)[!names(dt1) %in% c("id")] 189 | ) 190 | dt1_long[, variable := as.numeric(as.character(variable))] 191 | data.table::setnames(dt1_long, old = "variable", new = "wavenumber") 192 | 193 | dt2_long <- data.table::melt( 194 | dt2, measure = names(dt2)[!names(dt2) %in% c("id")] 195 | ) 196 | dt2_long[, variable := as.numeric(as.character(variable))] 197 | data.table::setnames(dt2_long, old = "variable", new = "wavenumber") 198 | 199 | # Plot spectra and VIP scores 200 | brk <- pretty(as.numeric(names(dt1)[!names(dt1) %in% c("id")]), 201 | n = 10) 202 | maxmin <- function(x) {c(max(x), min(x))} 203 | x_lim <- maxmin(as.numeric(names(dt1)[!names(dt1) %in% c("id")])) 204 | 205 | # Extract VIP (Variable Importance in Projection) scores for the given 206 | # model 207 | df_vip <- extract_pls_vip(mout) 208 | 209 | # Determine highlighted regions above VIP = 1 210 | rects <- create_vip_rects(df_vip) 211 | 212 | # Plot for resampled and mean replicate spectra 213 | p_spc <- ggplot2::ggplot(dt1_long, ggplot2::aes(x = wavenumber, y = value)) + 214 | ggplot2::geom_rect(data = rects, inherit.aes = FALSE, 215 | ggplot2::aes(xmin = start, xmax = end, ymin = min(dt1_long$value), 216 | ymax = max(dt1_long$value), group = group), color = "transparent", 217 | fill = "orange", alpha = 0.3) + 218 | ggplot2::geom_line(data = dt1_long, inherit.aes = FALSE, 219 | ggplot2::aes(x = wavenumber, y = value, group = id), 220 | alpha = alpha, size = 0.2) + 221 | ggplot2::scale_x_reverse(limits = x_lim, breaks = brk) + 222 | ggplot2::labs(x = xlab, y = ylab1) + 223 | ggplot2::theme_bw() + 224 | ggplot2::theme(plot.margin = ggplot2::unit(c(1, 5, -30, 6), 225 | units = "points"), axis.text.x = ggplot2::element_blank()) 226 | 227 | p_spc_pre <- ggplot2::ggplot(dt2_long, ggplot2::aes(wavenumber, value)) + 228 | ggplot2::geom_rect(data = rects, inherit.aes = FALSE, 229 | ggplot2::aes(xmin = start, xmax = end, ymin = min(dt2_long$value), 230 | ymax = max(dt2_long$value), group = group), color = "transparent", 231 | fill = "orange", alpha = 0.3) + 232 | ggplot2::geom_line(ggplot2::aes(group = id), alpha = alpha, size = 0.2) + 233 | ggplot2::labs(x = xlab, y = ylab2) + 234 | ggplot2::theme_bw() + 235 | ggplot2::theme(plot.margin = ggplot2::unit(c(0, 5, 1, 1), 236 | units = "points")) + 237 | ggplot2::scale_x_reverse(limits = x_lim, breaks = brk) + 238 | ggplot2::theme(plot.margin = ggplot2::unit(c(1, 5, -30, 6), 239 | units = "points"), 240 | axis.title.y = ggplot2::element_text(vjust = 0.25), 241 | axis.text.x = ggplot2::element_blank()) 242 | 243 | # Plot for VIP 244 | # Extract PLS model response variable and number of components 245 | response <- as.character(unique(mout$stats$response)) 246 | ncomp <- mout$model$finalModel$ncomp 247 | p_vip <- ggplot2::ggplot(data = df_vip, 248 | ggplot2::aes(x = wavenumber, y = vip)) + 249 | ggplot2::geom_rect(data = rects, inherit.aes = FALSE, 250 | ggplot2::aes(xmin = start, xmax = end, ymin = min(df_vip$vip), 251 | ymax = max(df_vip$vip), group = group), color = "transparent", 252 | fill = "orange", alpha = 0.3) + 253 | ggplot2::geom_hline(yintercept = 1, colour = "red") + 254 | ggplot2::geom_line() + 255 | ggplot2::xlab(xlab) + 256 | ggplot2::ylab( 257 | bquote(paste(VIP[PLSR], " (", .(response), 258 | ", ", .(ncomp), " comps)"))) + 259 | ggplot2::scale_x_reverse(limits = x_lim, breaks = brk) + 260 | ggplot2::theme_bw() + 261 | ggplot2::theme(plot.margin = ggplot2::unit(c(0, 5, 1, 1), 262 | units = "points"), axis.title.y = ggplot2::element_text(vjust = 0.25)) 263 | 264 | # Arrange plots in two panels without any margins in between 265 | # Hints from 266 | # https://stackoverflow.com/questions/42567045/alignment-of-two-plots-using-grid-arrange 267 | cowplot::plot_grid( 268 | p_spc, p_spc_pre, p_vip, rel_heights = c(0.4, 0.3, 0.3), 269 | ncol = 1, align = "v") 270 | } 271 | 272 | -------------------------------------------------------------------------------- /R/gather-spc.R: -------------------------------------------------------------------------------- 1 | #' @title Gather measurements of different spectra types, corresponding 2 | #' x-axis values and metadata from nested list. 3 | #' @description Gather spectra, corresponding x-axis values, and device and 4 | #' measurement metadata from a nested list into a spectra tibble, so that one 5 | #' row represents one spectral measurement. Spectra, x-axis values and metadata 6 | #' are mapped from the individual list elements (named after file name including 7 | #' the extension) and transformed into (list-)columns of a spectra tibble, 8 | #' which is an extended data frame. For each measurement, spectral data and 9 | #' metadata are combined into one row of the tidy data frame. In addition, the ID 10 | #' columns `unique_id`, `file_id`, and `sample_id` are extracted from 11 | #' `"metadata"` (data frame) list entries and returned as identifier columns of 12 | #' the spectra tibble. List-columns facilitate keeping related data together in 13 | #' a rectangular data structure. They can be manipulated easily during 14 | #' subsequent transformations, for example using the standardized functions of 15 | #' the simplerspec data processing pipeline. 16 | #' @param data Recursive list named with filename (`file_id`) at first level 17 | #' entries, where each element containing a sample measurement has nested 18 | #' metadata (`"metadata"`), spectra types (see `spc_types`), corresponding 19 | #' x-axis values (see section *"Details on spectra data checks and matching"*). 20 | #' The `data` list is a structural convention to organize spectra and their 21 | #' metadata. It follows for example the list structure returned from the Bruker 22 | #' OPUS binary reader `simplerspec::read_opus_univ()`. 23 | #' @param spc_types Character vector with the spectra types to be extracted 24 | #' from `data` list and gathered into list-columns. The spectra type names need 25 | #' to exactly follow the naming conventions, and the element names and contents 26 | #' need to be present at the second list hierarchy of `data`. These values are 27 | #' allowed: 28 | #' * `"spc"` (default): final raw spectra after atmospheric compensation, if 29 | #' performed (named `AB` in Bruker OPUS software; results from referencing 30 | #' sample to reference single channel reflectance and transforming to 31 | #' absorbance). 32 | #' * `"spc_nocomp"`: raw spectra without atmospheric correction 33 | #' * `"sc_sm"`: Single channel reflectance spectra of the samples 34 | #' * `"sc_rf"`: Single channel reflectance spectra of the reference (background 35 | #' spectra) 36 | #' * `"ig_sm"`: Interferograms of the sample spectra (currently only spectra 37 | #' without x-axis list-columns are matched and returned) 38 | #' * `"ig_rf"`: Interferograms of the reference spectra (currently only spectra 39 | #' without x-axis list-columns are matched and returned) 40 | #' @usage gather_spc(data, spc_types = "spc") 41 | #' @section Details on spectra data checks and matching: 42 | #' `gather_spc()` checks whether these conditions are met for each measurement 43 | #' in the list `data`: 44 | #' 1. Make sure that the first level `data` elements are named (assumed to be 45 | #' the file name the data originate from), and remove missing measurements with 46 | #' an informative message. 47 | #' 2. Remove any duplicated file names and raise a message if there are 48 | #' name duplicates at first level. 49 | #' 3. Check whether `spc_types` inputs are supported (see argument `spc_types`) 50 | #' and present at the second level of the `data` list. If not, remove 51 | #' all data elements for incomplete spectral measurements. 52 | #' 4. Match spectra types and possible corresponding x-axis types from 53 | #' a lookup list. For each selected spectrum type (left), at least one of 54 | #' the element names of the x-axis type (right) needs to be present for each 55 | #' measurement in the list `data`: 56 | #' * `"spc"` : `"wavenumbers"`, `"wavelengths"`, or `"x_values"` 57 | #' * `"spc_nocomp"` : `"wavenumbers"`, `"wavelengths"`, or `"x_values"` 58 | #' * `"sc_sm"` : `"wavenumbers_sc_sm"`, `"wavelengths_sc_sm"`, or 59 | #' `"x_values_sc_sm"` 60 | #' * `"sc_rf"` : `"wavenumbers_sc_rf"`, `"wavelengths_sc_rf"`, or 61 | #' `"x_values_sc_rf"` 62 | #' 5. Check if `"metadata"` elements are present and remove data elements for 63 | #' measurements with missing or incorrectly named metadata elements 64 | #' (message). 65 | #' @return Spectra tibble (`spc_tbl` with classes `"tbl_df"`, `"tbl"`, and 66 | #' `"data.frame"`) with the following (list-)columns: 67 | #' * `"unique_id"`: Character vector with unique measurement identifier, likely 68 | #' a string with file names in combination with date and time (extracted from 69 | #' each `"metadata"` data frame column). 70 | #' * `"file_id"` : Character vector with file name including the extension 71 | #' (extracted from each `"metadata"` data frame column). 72 | #' * `"sample_id"`: Character vector with sample identifier. For Bruker OPUS 73 | #' binary files, this corresponds to the file name without the file extension 74 | #' in integer increments of sample replicate measurements. 75 | #' * One or multiple of `"spc"`, `"spc_nocomp"`, `"sc_sm"`, or `"sc_rf"`: 76 | #' List(s) of data.table's containing spectra type(s). 77 | #' * One or multiple of `"wavenumbers"`, `"wavelengths"`, `"x_values"`, 78 | #' `"wavenumbers_sc_sm"`, `"wavelengths_sc_sm"`, `"x_values_sc_sm"`, 79 | #' `"wavenumbers_sc_rf"`, `"wavelengths_sc_rf"`, or `"x_values_sc_rf"`: 80 | #' List(s) of numeric vectors with matched x-axis values (see *"Details on 81 | #' spectra data checks and matching"* below). 82 | #' @importFrom rlang set_names 83 | #' @export 84 | gather_spc <- function(data, 85 | spc_types = "spc") { 86 | spc_types <- map(spc_types, rlang::sym) 87 | spc_types_chr <- map_chr(spc_types, rlang::quo_name) 88 | 89 | # Duplicate original data to refer to full `data` list in messages 90 | data_origin <- data 91 | 92 | ## Make sure first level `data` elements are named 93 | 94 | if (any(names(data) %in% "")) { 95 | which_missing <- which(names(data) %in% "") 96 | idx_nm_missing <- rlang::set_names( 97 | names(data)[which_missing], which_missing) 98 | message(paste0(length(which_missing), " `data` ", 99 | ifelse(length(which_missing) > 1, "elements", "element"), 100 | ifelse(length(which_missing) > 1, " have", " has"), 101 | " missing `file_id` names. ", 102 | ifelse(length(which_missing) > 1, "These", "This"), "`data` list index", 103 | ifelse(length(which_missing) > 1, " positions\n", " position\n"), 104 | ifelse(length(which_missing) > 1, "were", "was"), 105 | " therefore removed:\n\n", 106 | idx_nm_missing %>% 107 | purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = " "), 108 | "\n\n")) 109 | 110 | data <- data[-which_missing] 111 | } 112 | 113 | ## Check whether any file names (`file_id`'s) are duplicated; 114 | ## remove and raise warning in case duplicated 115 | 116 | is_duplicated <- duplicated(names(data)) 117 | idx_nm_duplicated <- rlang::set_names(names(data)[is_duplicated], 118 | which(is_duplicated)) 119 | if (any(is_duplicated)) { 120 | message(paste0(sum(is_duplicated), " `file_id` named ", 121 | ifelse(sum(is_duplicated) > 1, "elements", "element"), " of `data` ", 122 | ifelse(sum(is_duplicated) > 1, "are", "is"), 123 | " non-unique/duplicated\nand ", 124 | ifelse(sum(is_duplicated) > 1, "were", "was"), " therefore removed:\n\n", 125 | idx_nm_duplicated %>% 126 | purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = " "), 127 | "\n\n")) 128 | 129 | data <- data[-is_duplicated] 130 | } 131 | 132 | ## Check whether `spc_types` are allowed and present at the second level of 133 | ## the nested list `data` 134 | 135 | spc_types_allowed <- c("spc", "spc_nocomp", "sc_sm", "sc_rf", "ig_sm", 136 | "ig_rf") 137 | spc_types_present <- spc_types_chr %in% spc_types_allowed 138 | 139 | if (any(!spc_types_present)) { 140 | options(useFancyQuotes = FALSE) 141 | stop("The following spectrum types specified in `spc_types` are", 142 | " not supported:\n\n", 143 | paste(dQuote(spc_types_chr[!spc_types_present]), collapse = ", ")) } 144 | 145 | ## Collect names of spectrum types nested at second `file_id` `data` list 146 | ## level to check whether all specified `spc_types` are present in `data`; 147 | ## access sub-element names at that list level 148 | 149 | data_types_byfile <- purrr::modify_depth(data, .depth = 1, 150 | ~ purrr::imap_chr(.x, ~ .y)) 151 | spc_types_byfile_in <- map(data_types_byfile, ~ spc_types_chr %in% .x) 152 | 153 | if (any(map_lgl(spc_types_byfile_in, ~ any(!.x)))) { 154 | which_rm <- map_lgl(spc_types_byfile_in, ~ any(!.x)) %>% which() 155 | rm_origin <- names(data_origin[names(which_rm)]) 156 | rm_origin_lgl <- names(data_origin) %in% rm_origin 157 | which_rm_origin <- rlang::set_names(rm_origin, which(rm_origin_lgl)) 158 | 159 | message(paste0("Spectrum types (second list level names) specified in", 160 | " `spc_types` were not found\nwithin all first level elements ", 161 | " of list `data` or are NULL (spectra data by `file_id`).\n", 162 | "Therefore, all data elements for the corresponding", 163 | " list indices and `file_id`'s\nwere removed from `data`:\n\n", 164 | which_rm_origin %>% 165 | purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = " "), 166 | "\n\n")) 167 | 168 | data <- data[- which_rm] 169 | } 170 | 171 | ## Extract the spectra of different types by plucking all types specified in 172 | ## `spc_types_chr` argment into separate first list level elements 173 | 174 | spc_mapped <- map(rlang::set_names(spc_types_chr), 175 | ~ pluck_depth(data = data, .depth = 1, .string = .x)) 176 | 177 | ## Match and extract the values for the x-axis types 178 | 179 | spc_type_x_axis <- list( 180 | "spc" = c("wavenumbers", "wavelengths", "x_values"), 181 | "spc_nocomp" = c("wavenumbers", "wavelengths", "x_values"), 182 | "sc_sm" = c("wavenumbers_sc_sm", "wavelengths_sc_sm", "x_values_sc_sm"), 183 | "sc_rf" = c("wavenumbers_sc_rf", "wavelengths_sc_rf", "x_values_sc_rf") 184 | ) 185 | 186 | lcols_x_values_matching <- purrr::flatten_chr(spc_type_x_axis[spc_types_chr]) 187 | x_values_matching <- lcols_x_values_matching[lcols_x_values_matching 188 | %in% unique(purrr::flatten_chr(data_types_byfile))] 189 | 190 | # `data` has eventually been updated (removed `data` elements) 191 | data_types_byfile <- purrr::modify_depth(data, .depth = 1, 192 | ~ purrr::imap_chr(.x, ~ .y)) 193 | x_value_types_byfile_in <- map(data_types_byfile, ~ x_values_matching %in% .x) 194 | 195 | if (any(map_lgl(x_value_types_byfile_in, ~ any(!.x)))) { 196 | which_rm <- map_lgl(x_value_types_byfile_in, ~ any(!.x)) %>% which() 197 | rm_origin <- names(data_origin[names(which_rm)]) 198 | rm_origin_lgl <- names(data_origin) %in% rm_origin 199 | which_rm_origin <- rlang::set_names(rm_origin, which(rm_origin_lgl)) 200 | 201 | message(paste0("These x-axis types (second list level names)", 202 | " corresponding to \nspecified `spc_types` spectrum types were not", 203 | " found within\nall first level elements or are NULL", 204 | " (spectra data by `file_id`) of list `data`: \n\n", 205 | paste(dQuote(x_values_matching), collapse = ", ")), "\n\n", 206 | "Therefore, all data elements for the corresponding", 207 | " list indices\nand `file_id`'s were removed from `data`:\n\n", 208 | which_rm_origin %>% 209 | purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = " "), 210 | "\n\n") 211 | 212 | data <- data[- which_rm] 213 | spc_mapped <- map(spc_mapped, ~ .x[- which_rm]) 214 | } 215 | 216 | x_values_mapped <- map(rlang::set_names(x_values_matching), 217 | ~ pluck_depth(data = data, .depth = 1, .string = .x)) 218 | 219 | ## Combine the mapped spectra and correspoding x-axis types, and order 220 | ## list before returning spectral tibble (`spc_tbl`) 221 | 222 | spc_x_axis_types_order <- c("spc", "wavenumbers", "wavelengths", "x_values", 223 | "spc_nocomp", 224 | "sc_sm", "wavenumbers_sc_sm", "wavelengths_sc_sm", "x_values_sc_sm", 225 | "sc_rf", "wavenumbers_sc_rf", "wavelengths_sc_rf", "x_values_sc_rf", 226 | "ig_sm", "ig_rf") 227 | 228 | spc_x_values_mapped <- c(spc_mapped, x_values_mapped) 229 | spc_x_axis_types_matched <- spc_x_axis_types_order[spc_x_axis_types_order %in% 230 | names(spc_x_values_mapped)] 231 | spc_x_values_mapped <- spc_x_values_mapped[spc_x_axis_types_matched] 232 | 233 | ## Check if `metadata` elements are present for all list first level elements 234 | 235 | # `data` has eventually been updated (removed `data` elements) 236 | data_types_byfile <- purrr::modify_depth(data, .depth = 1, 237 | ~ purrr::imap_chr(.x, ~ .y)) 238 | metadata_byfile_in <- map(data_types_byfile, ~ "metadata" %in% .x) 239 | 240 | if (any(map_lgl(metadata_byfile_in, ~ any(!.x)))) { 241 | which_rm <- map_lgl(metadata_byfile_in, ~ any(!.x)) %>% which() 242 | rm_origin <- names(data_origin[names(which_rm)]) 243 | rm_origin_lgl <- names(data_origin) %in% rm_origin 244 | which_rm_origin <- rlang::set_names(rm_origin, which(rm_origin_lgl)) 245 | 246 | message(paste0("`metadata` (second list level names) was not", 247 | " found within\n all first level elements (`metadata` by `file_id`)", 248 | " of list `data` or are NULL.\n\n", 249 | "Therefore, all data elements for the corresponding", 250 | " list indices\nand `file_id`'s were removed from `data`:\n\n", 251 | which_rm_origin %>% 252 | purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = " "), 253 | "\n\n")) 254 | 255 | data <- data[- which_rm] 256 | spc_x_values_mapped <- map(spc_x_values_mapped, ~ .x[- which_rm]) 257 | } 258 | 259 | ## Extract metadata list elements and combine into tibble 260 | 261 | metadata_mapped <- map(rlang::set_names(list("metadata")), 262 | ~ pluck_depth(data = data, .depth = 1, .string = .x)) 263 | metadata_df <- purrr::map_df(data, "metadata") 264 | id_tbl <- tibble::as_tibble( 265 | metadata_df[c("unique_id", "file_id", "sample_id")] 266 | ) 267 | 268 | ## Column bind all tibbles of metadata id's, metadata, spectra, and 269 | ## x-axis type values; return on combined spectral tibble (`spc_tbl`) 270 | 271 | spc_tbl <- tibble::as_tibble(spc_x_values_mapped) 272 | metadata_tbl <- tibble::as_tibble(metadata_mapped) 273 | dplyr::bind_cols(list(id_tbl, metadata_tbl, spc_tbl)) 274 | } 275 | 276 | 277 | # Helpers ---------------------------------------------------------------------- 278 | 279 | # Pluck out single elements from a list hierarchy level (`.depth`) 280 | # specified in `.string` based on element names; All elements with the specified 281 | # name will be transposed into a single list named by the element name. 282 | # `pluck_depth` can be combined with `purrr::map` to extract elements of 283 | # different names at all list nodes at the specified level, using e.g. 284 | # map(rlang::set_names(c("spc", "spc_nocomp")), 285 | # ~ pluck_depth(data = data, .depth = 1, .string = .x)) 286 | 287 | pluck_depth <- function(data, .depth = 1, .string = NULL) { 288 | purrr::modify_depth(data, .depth = .depth, ~ purrr::pluck(.x, .string)) 289 | } 290 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # simplerspec 2 | 3 | 4 | [![tic](https://github.com/philipp-baumann/simplerspec/workflows/tic/badge.svg?branch=master)](https://github.com/philipp-baumann/simplerspec/actions) 5 | [![DOI](https://zenodo.org/badge/67121732.svg)](https://zenodo.org/badge/latestdoi/67121732) 6 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 7 | [![runiverse-package simplerspec](https://philipp-baumann.r-universe.dev/badges/simplerspec?scale=1&color=pink&style=round)](https://philipp-baumann.r-universe.dev/simplerspec) 8 | 9 | 10 | ## Short description 11 | 12 | The simplerspec package aims to facilitate spectral and additional data handling and model development for spectroscopy applications such as infrared soil spectroscopy. Different helper functions are designed to create a data and modeling workflow. Data inputs and outputs are stored in common S3 `R` objects (`lists` and `data frames`), using in addition [`data.table`](https://rdatatable.gitlab.io/data.table/) and [`tibble`](https://tibble.tidyverse.org/index.html) extensions. The functions are built to work in a pipeline and cover commonly used procedures for spectral model development and application. 13 | 14 | ## Installation 15 | 16 | The newest version of the package is available on this GitHub repository. If you find bugs you are highly welcome to report issues (write me an [email](mailto:info@spectral-cockpit.space) or create an [issue](https://github.com/philipp-baumann/simplerspec/issues)). You can install {simplerspec} from GitHub or directly from the r-universe. 17 | 18 | ```R 19 | # option 1 20 | if (!require("remotes")) install.packages("remotes") 21 | remotes::install_github("philipp-baumann/simplerspec") 22 | ``` 23 | 24 | ```R 25 | # option 2 26 | install.packages("simplerspec", 27 | repos = c("https://philipp-baumann.r-universe.dev", "https://cloud.r-project.org")) 28 | ``` 29 | 30 | ## Key features 31 | 32 | The current version of the package features among others the following functions: 33 | 34 | 1. [`read_opus_univ()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/read-opus-universal.R): Read spectra and metadata from Bruker OPUS binary files into R list 35 | 2. [`gather_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/gather-spc.R): Gather spectra and metadata from list into a tibble object (list-columns) 36 | 4. [`resample_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/resample-spc.R): Resample spectra to new wavenumber intervals 37 | 2. [`average_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/average-spc.R): Average spectra for replicate scans 38 | 5. [`preprocess_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/preprocess-spc.R): Perform pre-processing of spectra 39 | 6. [`select_spc_vars()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/select-spc.R): Select every `n`-th spectral variable and corresponding X-unit values. 40 | 7. [`join_spc_chem()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/join-chem-spectra.R): Join chemical and spectral data sets by `sample_id` 41 | 8. [`plot_spc_ext()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/plot-spc-extended.R): Extended spectral plotting; e.g. group spectra using different 42 | panels or color spectra based on chemical reference values to explore trends. 43 | 9. [`fit_pls()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/pls-modeling.R): Perform model tuning and evaluation based on Partial Least Squares (PLS) regression 44 | 10. [`select_ref_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/select-ref-spectra.R): Select a set of reference samples to measured by 45 | traditional analysis methods when no a priori sample data except spectra are 46 | available (based on Kennard-Stones sampling) 47 | 11. [`predict_from_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/predict-spc.R): Predict multiple chemical properties from a list of calibrated models and new soil spectra 48 | 12. [`assess_multimodels()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/utils-stats.R): Assess model performance given multiple pairs of predicted and measured variables. 49 | 50 | ## Cheatsheet 51 | 52 | 53 | 54 | ## Motivation and key concepts 55 | 56 | Many R packages are available to do tasks in spectral modeling such as pre-processing of spectral data. The motivation to create this package was: 57 | 58 | 1. Avoid repetition of code in model development (common source of errors). 59 | 2. Provide a reproducible data analysis workflow for FT-IR spectroscopy. 60 | 3. R packages are an ideal way to organize and share R code. 61 | 4. Make soil FT-IR spectroscopy modeling accessible to people that have basic R knowledge. 62 | 5. Provide an integrated data-model framework that features tidy data structures designed for both user-friendly printing and efficient data processing. 63 | 64 | This package builds mainly upon functions from the following R packages: 65 | 66 | * [`prospectr`](https://cran.r-project.org/web/packages/prospectr/index.html): Various utilities for pre-processing and sample selection based on spectroscopic data. An introduction to the package with examples can be found [here](https://l-ramirez-lopez.r-universe.dev/articles/prospectr/prospectr.html). 67 | * `plyr` and [`dplyr`](https://dplyr.tidyverse.org): Fast data manipulation tools with an unified interface. 68 | * `ggplot2 `: Alternative plotting system for R, based on the grammar of graphics. See [here](https://ggplot2.tidyverse.org). 69 | * `caret `: Classification and regression training. A set of functions that attempt to streamline the process for creating predictive models. See [here](https://topepo.github.io/caret/) for details. 70 | 71 | Consistent and reproducible data and metadata management is an important prerequisite for spectral model development. Therefore, simplerspec functions are based on storing spectral data and related data in R data structures which keep related data in rows. Every row representing an observation contains data related to a single spectral measurement. Simplerspec functions uses tibble data frames as principal data structures because they allow to store lists within the well-known data frame structures. Lists are flexible data structures and can e.g. contain other lists, vectors, data.frames, or matrices. 72 | 73 | List-columns features provided within the tibble framework are an excellent base to work with functional programming tools in R, which allows to efficiently write code. 74 | Simplerspec internally uses popular functional programming extension tools provided 75 | by the [`purrr`](https://purrr.tidyverse.org/) package for processing and transforming spectra. 76 | For learning more, I would recommend 77 | [this nice purrr list-column tutorial](https://github.com/jennybc/purrr-tutorial/tree/gh-pages) 78 | provided by Jenny Brian. Further, simplerspec well integrates with the 79 | data processing API provided by the dplyr package, which makes spectroscopic 80 | analysis tidy and easy to understand. 81 | 82 | ## Example workflow 83 | 84 | Bruker FTIR spectrometers produce binary files in the OPUS format that can contain different types of spectra and many parameters such as instrument type and settings that were used at the time of data acquisition and internal processing (e.g. Fourier transform operations). Basically, the entire set of setup measurement parameters, selected spectra, supplementary metadata such as the time of measurement are written into OPUS binary files. In contrast to simple text files that contain only plain text with a defined character encoding, binary files can contain any type of data represented as sequences of bytes (a single byte is sequence of 8 bits and 1 bit either represents 0 or 1). 85 | 86 | Simplerspec comes with reader function `read_opus_univ()` that is intended to be a universal Bruker OPUS file reader that extracts spectra and key metadata from files. Usually, one is mostly interested to extract the final absorbance spectra (shown as *AB* in the OPUS viewer software). 87 | 88 | ```R 89 | # Load simplerspec package for spectral model development wrapper functions 90 | library(simplerspec) 91 | # Load tidyverse packages: loads packages frequently used for data manipulation, 92 | # data tidying, import, and plotting 93 | library(tidyverse) 94 | 95 | ################################################################################ 96 | ## Part 1: Read and pre-process spectra, read chemical data, and join 97 | ## spectral and chemical data sets 98 | ################################################################################ 99 | 100 | ## Read spectra in list ======================================================== 101 | 102 | # List of OPUS binary spectra files 103 | lf <- dir("data/spectra/soilspec_eth_bin", full.names = TRUE) 104 | 105 | # Read spectra from files into R list 106 | spc_list <- read_opus_univ(fnames = lf, extract = c("spc")) 107 | # Returns messages: 108 | #> Extracted spectra data from file: 109 | #> Extracted spectra data from file: 110 | #> Extracted spectra data from file: 111 | #> Extracted spectra data from file: 112 | #> ... 113 | ``` 114 | 115 | Pipes can make R code more readable and allows step-wise data processing 116 | when developing spectral models. The pipe operator (`%>%`, called "then") is a new operator in R that was introduced 117 | with the magrittr package. It facilitates readability of code 118 | and avoids to type intermediate objects. The basic behavior of 119 | the pipe operator is 120 | that the object on the left hand side is passed as the first argument 121 | to the function on the right hand side. When loading the tidyverse packages, the 122 | pipe operator is attached to the current R session. 123 | More details can be found [here](https://magrittr.tidyverse.org). 124 | 125 | The model development process can be quickly coded as the example below illustrates: 126 | 127 | ```R 128 | ## Spectral data processing pipe =============================================== 129 | 130 | soilspec_tbl <- spc_list %>% 131 | # Gather list of spectra data into tibble data frame 132 | gather_spc() %>% 133 | # Resample spectra to new wavenumber interval 134 | resample_spc(wn_lower = 500, wn_upper = 3996, wn_interval = 2) %>% 135 | # Average replicate scans per sample_id 136 | average_spc() %>% 137 | # Preprocess spectra using Savitzky-Golay first derivative with a window size 138 | # of 21 points 139 | preprocess_spc(select = "sg_1_w21") 140 | 141 | soilspec_tbl 142 | # A tibble: 284 x 11 143 | #> unique_id file_id sample_id 144 | #> 145 | #> 1 BF_lo_01_soil_cal.0_2015-11-06 14:34:10 BF_lo_01_soil_cal.0 BF_lo_01_soil_cal 146 | #> 2 BF_lo_01_soil_cal.1_2015-11-06 14:38:14 BF_lo_01_soil_cal.1 BF_lo_01_soil_cal 147 | #> 3 BF_lo_01_soil_cal.2_2015-11-06 14:40:55 BF_lo_01_soil_cal.2 BF_lo_01_soil_cal 148 | #> 4 BF_lo_02_soil_cal.0_2015-11-06 17:27:55 BF_lo_02_soil_cal.0 BF_lo_02_soil_cal 149 | #> 5 BF_lo_02_soil_cal.1_2015-11-06 17:30:19 BF_lo_02_soil_cal.1 BF_lo_02_soil_cal 150 | #> 6 BF_lo_02_soil_cal.2_2015-11-06 17:32:47 BF_lo_02_soil_cal.2 BF_lo_02_soil_cal 151 | #> 7 BF_lo_03_soil_cal.0_2015-11-09 11:32:55 BF_lo_03_soil_cal.0 BF_lo_03_soil_cal 152 | #> 8 BF_lo_03_soil_cal.1_2015-11-09 11:35:26 BF_lo_03_soil_cal.1 BF_lo_03_soil_cal 153 | #> 9 BF_lo_03_soil_cal.2_2015-11-09 11:38:08 BF_lo_03_soil_cal.2 BF_lo_03_soil_cal 154 | #> 10 BF_lo_04_soil_cal.0_2015-11-06 10:36:13 BF_lo_04_soil_cal.0 BF_lo_04_soil_cal 155 | #> # ... with 274 more rows, and 8 more variables: spc , wavenumbers , 156 | #> # metadata , spc_rs , wavenumbers_rs , spc_mean , 157 | #> # spc_pre , xvalues_pre 158 | 159 | 160 | ## Read chemical reference data and join with spectral data ==================== 161 | 162 | # Read chemical reference analysis data 163 | soilchem_tbl <- read_csv(file = "data/soilchem/soilchem_yamsys.csv") 164 | #> Parsed with column specification: 165 | #> cols( 166 | #> .default = col_double(), 167 | #> sample_ID = col_character(), 168 | #> country = col_character(), 169 | #> site = col_character(), 170 | #> material = col_character(), 171 | #> site_comb = col_character() 172 | #> ) 173 | #> See spec(...) for full column specifications. 174 | 175 | # Join spectra tibble and chemical reference analysis tibble 176 | spec_chem <- join_spc_chem( 177 | spc_tbl = soilspec_tbl, chem_tbl = soilchem_tbl, by = "sample_id") 178 | #> Joining, by = "sample_id" 179 | 180 | ################################################################################ 181 | ## Part 2: Run PLS regression models for different soil variables 182 | ################################################################################ 183 | 184 | # Example Partial Least Squares (PLS) Regression model for total Carbon (C) 185 | # Use repeated k-fold cross-validation to tune the model (choose optimal 186 | # number of PLS components) and estimate model performance on hold-out 187 | # predictions of the finally chosen model (model assessment). 188 | # This allows to use the entire set for both model building and evaluation; 189 | # recommended for small data sets 190 | pls_C <- fit_pls( 191 | # remove rows with NA in the data 192 | spec_chem = spec_chem[!is.na(spec_chem$C), ], 193 | response = C, 194 | evaluation_method = "resampling", 195 | tuning_method = "resampling", 196 | resampling_method = "rep_kfold_cv", 197 | pls_ncomp_max = 7 # maximum number of PLS components tested during tuning 198 | ) 199 | ``` 200 | 201 | ## Projects using simplerspec 202 | 203 | * [Spectral platform for soil samples of the Democratic Republic of Congo](https://sae-interactive-data.ethz.ch/simplerspec.drc/) 204 | 205 | ## Package help 206 | 207 | After successfully installing simplerspec, you can use the R build-in help 208 | using `?simplerspec::` 209 | 210 | ## Like it? 211 | 212 | [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/specphil) 213 | 214 | ## Credits 215 | 216 | I would like to thank the following people for the inspiration by concepts, code and packages: 217 | 218 | * Antoine Stevens and Leonardo Ramirez-Lopez for their contributions to the [prospectr package](https://l-ramirez-lopez.r-universe.dev/prospectr) and the 219 | *Guide to Diffuse Reflectance Spectroscopy & Multivariate Calibration* 220 | * Andrew Sila, Tomislav Hengl, and Thomas Terhoeven-Urselmans for the [`read.opus()`](https://github.com/cran/soil.spec/blob/master/R/read.opus.R) 221 | function from the [soil.spec](https://cran.r-project.org/src/contrib/Archive/soil.spec/) package developed at ICRAF. 222 | * [Hadley Wickham](https://hadley.nz) for his work and concepts on data science within R 223 | * [Max Kuhn](https://github.com/topepo) for the creation of the caret package and for his excellent teaching 224 | materials on [applied predictive modeling](https://link.springer.com/book/10.1007/978-1-4614-6849-3) 225 | -------------------------------------------------------------------------------- /R/plot-spc-extended.R: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ## Helper functions to gather spectra, corresponding x-value vectors, 3 | ## metadata and measure columns (e.g. chemical reference data) from tibble 4 | ## list-columns into a single data.table or a list of data.tables conntaining 5 | ## long form data directly to be used for customized ggplot2 plotting functions 6 | ################################################################################ 7 | 8 | # bind a list-column in a tibble to a list of data.tables ---------------------- 9 | 10 | #' @title Bind list-columns within a tibble into a list of data.tables 11 | #' @description Bind one to many list-columns in spectral tibble into a list 12 | #' of data.tables. 13 | #' @param spc_tbl Spectral data in a tibble data frame (classes "tibble_df", 14 | #' "tbl" and "data.frame"). 15 | #' @param lcols Character vector of column names of list-columns to be bound 16 | #' into a list of data.tables 17 | #' @param spc_id Character vector denoting column name for a unique spectrum ID. 18 | #' Default is \code{"unique_id"}. 19 | #' @param group_id Character vector denoting column name for the spectrum group 20 | #' ID. Default is \code{"sample_id"}. The group ID can later be used for 21 | #' plotting spectra by group (e.g. by using different colors or panels). 22 | #' @return A list of data.tables. Elements contain data from list-columns 23 | #' specified in \code{lcols} argument as data.tables. All data.tables contain in 24 | #' addition \code{spc_id} and \code{group_id} columns. 25 | #' @export 26 | bind_lcols_dts <- function(spc_tbl, lcols, 27 | spc_id = "unique_id", 28 | group_id = "sample_id") { 29 | 30 | # todo: add warning for lcols not present in spc_tbl 31 | which_bind <- colnames(spc_tbl) %in% lcols 32 | lcols_to_bind <- colnames(spc_tbl)[which_bind] 33 | names(lcols_to_bind) <- lcols_to_bind 34 | dts <- purrr::map(lcols_to_bind, 35 | function(y) { 36 | if (is.list(spc_tbl[, y][[y]])) { 37 | # todo: Test if number of columns is equal in each data.frame or matrix 38 | # of the list-(column); if not, return a comprehensible error 39 | data.table::data.table(do.call(rbind, spc_tbl[, y][[y]])) 40 | } else if (is.atomic((spc_tbl[, y][[y]]))) { 41 | data.table::data.table(spc_tbl[, y]) 42 | } 43 | } 44 | ) 45 | # Append IDs to data.tables in list 46 | spc_id <- spc_tbl[, spc_id][[spc_id]] 47 | lcol_types <- purrr::imap(dts, ~ rep(.y, nrow(spc_tbl))) 48 | group_id <- as.character(spc_tbl[, group_id][[group_id]]) 49 | 50 | # Return list of data.tables 51 | purrr::imap(dts, function(dt, nm) { 52 | dt[, `:=` (spc_id = spc_id, group_id = group_id)] 53 | dt[, `:=` (lcol_type = lcol_types[[nm]])]} 54 | ) 55 | } 56 | 57 | 58 | # Convert list of wide form data.tables into long form ------------------------- 59 | 60 | dts_to_long <- function(spc_tbl, lcols, 61 | spc_id = "unique_id", 62 | group_id = "sample_id", 63 | variable_name = "variable", 64 | value_name = "value") { 65 | 66 | dts <- bind_lcols_dts(spc_tbl = spc_tbl, lcols = lcols, 67 | spc_id = spc_id, group_id = group_id) 68 | # Convert list of data.tables into long form 69 | dts_long <- purrr::map(dts, function(x) { 70 | data.table::melt( 71 | x, 72 | id.vars = c("spc_id", "lcol_type", "group_id"), 73 | variable.factor = FALSE, 74 | variable.name = variable_name, 75 | value.name = value_name 76 | )} 77 | ) 78 | # Append unique id (idx lg:= index 'long') for long form 79 | purrr::imap(dts_long, 80 | function(dt_long, nm) { 81 | dt_long[, `:=` (id_long = 1:nrow(dt_long))] 82 | } 83 | ) 84 | } 85 | 86 | 87 | # Match the spectra list columns and corresponding xunit list columns ---------- 88 | 89 | match_lcols <- function(spc_tbl, lcols) { 90 | 91 | # Determine to which spectrum types list-columns belong 92 | lcols_spc_all <- c("spc", "spc_rs", "spc_mean", "spc_nocomp", "sc_sm", 93 | "sc_rf", "spc_pre") 94 | xvalue_lookup <- list( 95 | "spc" = c("wavenumbers", "wavelengths"), 96 | "spc_rs" = c("wavenumbers_rs", "wavelengths_rs"), 97 | "spc_mean" = c("wavenumbers_rs", "wavelengths_rs"), 98 | "spc_nocomp" = c("wavenumbers", "wavelengths"), 99 | "sc_sm" = c("wavenumbers_sc_sm"), 100 | "sc_rf" = c("wavenumbers_sc_rf"), 101 | "spc_pre" = c("xvalues_pre") 102 | ) 103 | # Create character vector of spectra type names 104 | spc_matched <- lcols[lcols %in% lcols_spc_all] 105 | spc_matched <- spc_matched[order(match(spc_matched, lcols_spc_all))] 106 | # Create vector of corresponding xunit types in predefined order 107 | xvalues <- unlist(xvalue_lookup[spc_matched]) 108 | xvalues_matched <- colnames(spc_tbl)[colnames(spc_tbl) %in% xvalues] 109 | xvalues_matched <- xvalues_matched[order(match(xvalues_matched, xvalues))] 110 | # Return all matches as list of character vectors for spectra and x-values 111 | list( 112 | spc_matched = spc_matched, 113 | xvalues_matched = xvalues_matched 114 | ) 115 | } 116 | 117 | 118 | # Create a list of long form data.tables containing spectra and x-values for 119 | # a set of spectral types ------------------------------------------------------ 120 | 121 | tolist_spc_xvalues <- function(spc_tbl, lcols_spc, 122 | spc_id = "unique_id", 123 | group_id = "sample_id", 124 | variable_name = "variable", 125 | value_name = "value") { 126 | 127 | lcols_matched <- match_lcols(spc_tbl = spc_tbl, lcols = lcols_spc) 128 | # Check if length of matched spectra and xunits is equal and if not, 129 | # return an error 130 | spc_types <- lcols_matched[["spc_matched"]] 131 | xunit_types <- lcols_matched[["xvalues_matched"]] 132 | 133 | # Gather different spectra types into list of data.tables 134 | spc_dts <- dts_to_long(spc_tbl = spc_tbl, 135 | lcols = spc_types, spc_id = spc_id, group_id = group_id, 136 | variable_name = "spc_variable", value_name = "spc_value") 137 | # Gather corresponding xunit types into list of data.tables 138 | xvalues_dts <- dts_to_long(spc_tbl = spc_tbl, 139 | lcols = xunit_types, spc_id = spc_id, group_id = group_id, 140 | variable_name = "xvalues_variable", value_name = "xvalues_value") 141 | 142 | # Rename lcol_type to spc_type only for spectra data.tables 143 | spc_dts <- purrr::map(spc_dts, 144 | function(x) data.table::setnames(x, "lcol_type", "spc_type")) 145 | # Return data.tables in nested list 146 | list( 147 | "spc_dts" = spc_dts, 148 | "xvalues_dts" = xvalues_dts 149 | ) 150 | } 151 | 152 | 153 | # Merge data tables of spectra, xunits, metadata and measured variables 154 | # into a single long form data.table ------------------------------------------- 155 | 156 | #' @title Merge list-columns of spectra, x-axis values, metadata and additional 157 | #' measured variables into a single long form data.table 158 | #' @description Helper function that merges all spectra and related data into 159 | #' a single long form data.table than can subsequently be used for plotting. 160 | #' @param spc_tbl Tibble data frame containing spectra, x-axis values, metadata 161 | #' and eventual measured variables as list-columns. 162 | #' @param lcols_spc Character vector of spectral list-columns to be extracted. 163 | #' Default is \code{c("spc", "spc_pre")} (raw and preprocessed spectra). 164 | #' @param lcol_measure Character vector of length 1 denoting the column name 165 | #' of the measure columns. This argument is optional. Default is \code{NULL}, 166 | #' which does not extract an additional measure column. 167 | #' @param spc_id Character vector of column that contains a unique spectral 168 | #' identifier for all spectra. Default is \code{"unique_id"}. 169 | #' @param group_id Character vector of columns that is used assigning spectra 170 | #' into groups. Default is \code{"sample_id"}. The \code{group_id} can be 171 | #' used for later plotting and thereby visually separating spectral groups into 172 | #' using different colors or panels. 173 | #' @return A single data.table containing long form aggregated data of 174 | #' spectra, x-axis values, metadata and an additionally measured variable. 175 | #' @export 176 | merge_dts <- function(spc_tbl, 177 | lcols_spc = c("spc", "spc_pre"), lcol_measure = NULL, 178 | spc_id = "unique_id", 179 | group_id = "sample_id") { 180 | 181 | id_long <- NULL 182 | spc_xvalues <- tolist_spc_xvalues(spc_tbl = spc_tbl, 183 | lcols_spc = lcols_spc, spc_id = spc_id, group_id = group_id) 184 | # Set keys for merging list of data.tables for spectra and xunits 185 | purrr::imap( 186 | spc_xvalues, 187 | function(dts, nm) purrr::map(dts[[nm]], 188 | function(x) data.table::setkey(x = x, spc_id, id_long, group_id)) 189 | ) 190 | spc_xvalues <- purrr::map2(spc_xvalues[["spc_dts"]], 191 | spc_xvalues[["xvalues_dts"]], merge) 192 | 193 | # Bind metadata if present, and set keys for merging metadata to spectra 194 | metadata <- bind_lcols_dts(spc_tbl = spc_tbl, 195 | lcols = "metadata", spc_id = spc_id, group_id = group_id) 196 | dts <- list( 197 | "data" = spc_xvalues, 198 | "metadata" = rep(metadata, length(spc_xvalues)) 199 | ) 200 | if (length(metadata) == 0) dts$metadata <- NULL 201 | 202 | # Convert a "measure" tibble column (numeric|character) to list of data.tables 203 | if (!is.null(lcol_measure)) { 204 | measure <- bind_lcols_dts(spc_tbl = spc_tbl, 205 | lcols = lcol_measure, spc_id = spc_id, group_id = group_id) 206 | dts$measure <- rep(measure, length(spc_xvalues)) 207 | } 208 | # Set keys (common columns), merge metadata with spectral data (list of 209 | # data tables) and combine into a single data.table that is returned 210 | purrr::imap(dts, 211 | function(dt, nm) lapply(dts[[nm]], 212 | function(x) data.table::setkey(x = x, spc_id, group_id)) 213 | ) 214 | # Merge multiple data.table by common keys 215 | # https://gist.github.com/reinholdsson/67008ee3e671ff23b568 216 | data.table::rbindlist( 217 | lapply(seq_along(dts[[1]]), 218 | function(i) Reduce(merge, lapply(dts, `[[`, i))) 219 | ) 220 | } 221 | 222 | 223 | # Wrapper function around merge_dts for list of tibbles to aggregate data for 224 | # plotting --------------------------------------------------------------------- 225 | 226 | #' @title Wrapper function around \code{merge_dts()} for list of tibbles to 227 | #' aggregate data for plotting. 228 | #' @description Instead of a single spectral tibble (data frame) multiple 229 | #' spectral tibbles can be merged into a long-form data.table for plotting 230 | #' spectra and related data. For details, see 231 | #' \code{\link{merge_dts}}. 232 | #' @param spc_tbl_l List of spectral tibbles (data frames). 233 | #' @param lcols_spc Character vector of spectral list-columns to be extracted. 234 | #' Default is \code{c("spc", "spc_pre")} (raw and preprocessed spectra). 235 | #' @param lcol_measure Character vector of length 1 denoting the column name 236 | #' of the measure columns. This argument is optional. Default is \code{NULL}, 237 | #' which does not extract an additional measure column. 238 | #' @param spc_id Character vector of column that contains a unique spectral 239 | #' identifier for all spectra. Default is \code{"unique_id"}. 240 | #' @param group_id Character vector of columns that is used assigning spectra 241 | #' into groups. Default is \code{"sample_id"}. The \code{group_id} can be 242 | #' used for later plotting and thereby visually separating spectral groups into 243 | #' using different colors or panels. 244 | #' @return A single data.table containing long form aggregated data of 245 | #' spectra, x-axis values, metadata and an additionally measured variable. 246 | #' An additional column called \code{group_id_tbl} is appended. It denotes 247 | #' the name of the spectral tibble supplied with the list \code{spc_tbl_l}. 248 | #' @export 249 | merge_dts_l <- function(spc_tbl_l, 250 | lcols_spc = c("spc", "spc_pre"), 251 | lcol_measure = NULL, 252 | spc_id = "unique_id", 253 | group_id = "sample_id") { 254 | 255 | group_id_tbl <- NULL 256 | 257 | dts <- lapply(seq_along(spc_tbl_l), 258 | function(i) merge_dts(spc_tbl = spc_tbl_l[[i]], 259 | lcols_spc = lcols_spc, lcol_measure = lcol_measure, 260 | spc_id = spc_id, group_id = group_id)) 261 | dts <- lapply(seq_along(dts), 262 | function(i) dts[[i]][, group_id_tbl := names(spc_tbl_l[i])]) 263 | data.table::rbindlist(dts) 264 | } 265 | 266 | 267 | ## Create plotting functions based on complete long data.table ================= 268 | 269 | # Function that reorders factor column in data.table based on ascending numeric 270 | # order when converted to numeric type 271 | # https://stackoverflow.com/questions/15665535/reorder-factors-numerically-in-a-data-frame 272 | # ------------------------------------------------------------------------------ 273 | reorder_factor_num <- function(dt, column = "group_id") { 274 | group_id <- NULL 275 | dt[, group_id := as.factor(group_id)] 276 | sorted_labels <- paste(sort(as.numeric(levels(dt$group_id)))) 277 | dt$group_id <- factor(dt$group_id, levels = sorted_labels) 278 | dt 279 | } 280 | 281 | 282 | # Custom ggplot2 labeller for spectra types ------------------------------------ 283 | 284 | relabel_spc_types <- function(lb_sc_sm = "Reflectance sample ()", 285 | lb_sc_rf = "Reflectance reference ()", 286 | lb_ig_sm = "Interferogram sample ()", 287 | lb_ig_rf = "Interferogram reference ()", 288 | lb_spc_nocomp = "Abs. before atm. comp.", 289 | lb_spc = "Absorbance", 290 | lb_spc_rs = "Resampled Abs.", 291 | lb_spc_mean = "Mean Abs.", 292 | lb_spc_pre = "Preprocessed Abs.") { 293 | ggplot2::as_labeller( 294 | x = c( 295 | "sc_sm" = lb_sc_sm, 296 | "sc_rf" = lb_sc_rf, 297 | "ig_sm" = lb_ig_sm, 298 | "spc_nocomp" = lb_spc_nocomp, 299 | "spc" = lb_spc, 300 | "spc_rs" = lb_spc_rs, 301 | "spc_mean" = lb_spc_mean, 302 | "spc_pre" = lb_spc_pre 303 | ) 304 | ) 305 | } 306 | 307 | 308 | # Main spectra explorative analysis and diagnostics plotting function ---------- 309 | 310 | #' @title ggplot2 wrapper for extended spectra plotting 311 | #' @description \code{plot_spc_ext} is a custom plotting function developed 312 | #' within the simplerspec framework. Returns plots based on ggplot2 313 | #' (class "ggplot"). Different spectra types such as raw or preprocessed spectra 314 | #' and groups can be differentiated by different colors or by using panels 315 | #' (so called facets). Additionally, spectra can be colored based on an 316 | #' additional measure variable, e.g. determined by chemical reference analysis. 317 | #' @param spc_tbl Tibble data frame containing spectra, x-axis values, metadata 318 | #' and eventual measured variables as list-columns. 319 | #' @param spc_tbl_l List of spectral tibbles (data frames). Default is 320 | #' \code{NULL} (argument is not used). 321 | #' @param lcols_spc Character vector of spectral list-columns to be extracted. 322 | #' Default is \code{"spc"} (raw spectra). 323 | #' @param lcol_measure Character vector of length 1 denoting the column name 324 | #' of the measure columns. This argument is optional. Default is \code{NULL}, 325 | #' which does not extract an additional measure column. 326 | #' @param lcol_measure_col_palette Palette value supplied to 327 | #' `ggplot::scale_colour_brewer()`. Default is `"Spectral"`, but you can set 328 | #' it to the default argument `1` (will use 329 | #' `scale_colour_brewer(..., palette = 1)`). 330 | #' @param lcol_measure_col_direction Sets the the order of colours in the scale 331 | #' that is based on a measure column. Default is \code{-1} which reverses the 332 | #' scale. Argument is passed on to the function `ggplot2::sclae_colour_brewer()` 333 | #' as argument `direction`. 334 | #' @param spc_id Character vector denoting column name for a unique spectrum ID. 335 | #' Default is \code{"unique_id"}. 336 | #' @param group_id Character vector denoting column name for the spectrum group 337 | #' ID. Default is \code{"sample_id"}. The group ID is used for 338 | #' plotting spectra by group (e.g. by using different colors or panels). 339 | #' @param group_id_order Logical that specifies whether the panel names 340 | #' derived from a numeric \code{group_id} column are reordered using ascending 341 | #' numbers. Default is \code{TRUE}. 342 | #' @param group_color Logical defining whether spectra are colored by the column 343 | #' specified by \code{group_id}. 344 | #' @param group_color_palette Character (1L) defining the diverging colour 345 | #' scales from colorbrewer.org; see `?scale_colour_brewer` for supported 346 | #' diverging colur types (`palette` argument). 347 | #' @param group_panel Logical defining whether spectra are arranged into panels 348 | #' by groups specified in \code{group_id}. Default is \code{TRUE}. 349 | #' @param group_legend Logical defining whether a legend for the \code{group_id} 350 | #' is plotted. Default is \code{FALSE}. 351 | #' @param ncol Integer vector of length 1. Defines number of columns when 352 | #' plotting panels (facets). Default is \code{NULL} (argument not used). 353 | #' @param relabel_spc Logical defining whether panels are relabeled with custom 354 | #' names for spectra types. Default is TRUE. When \code{TRUE}, arguments 355 | #' from \code{relabel_spc_types} can be passed to \code{plot_spc_ext} 356 | #' (supported via the \code{...} (ellipsis) argument) 357 | #' @param ylab Character vector or vector of type \code{"expression"} created by 358 | #' mathematical expression created by \code{expression}. Custom annotation for 359 | #' y-axis of spectra 360 | #' @param alpha Integer of length 1, from 0 to 1. Defines transparency of 361 | #' spectral lines. Default is \code{0.5} (0 is completely transparent and 362 | #' 1 is no transparency). 363 | #' @param line_width Numeric vector of length 1 specifying the width of the 364 | #' spectral lines. Default is \code{0.2}. 365 | #' @param ... Further arguments to be passed to \code{plot_spc_ext}. Currently, 366 | #' arguments of \code{relabel_spc_types} are supported. 367 | #' @return Object of class \code{"ggplot"} (ggplot2 graph). 368 | #' @export 369 | plot_spc_ext <- function(spc_tbl, spc_tbl_l = NULL, 370 | lcols_spc = "spc", 371 | lcol_measure = NULL, 372 | lcol_measure_col_palette = "Spectral", 373 | lcol_measure_col_direction = -1, 374 | spc_id = "unique_id", 375 | group_id = "sample_id", group_id_order = TRUE, 376 | group_color = TRUE, 377 | group_color_palette = NULL, 378 | group_panel = TRUE, 379 | group_legend = FALSE, 380 | ncol = NULL, 381 | relabel_spc = TRUE, 382 | ylab = "Spectrum value", 383 | alpha = 0.5, line_width = 0.2, 384 | # Further arguments to be passed to functions called 385 | # within this function 386 | ...) { 387 | 388 | # Merge spectral data, additional (measurement data) and metadata into a 389 | # single long-form data.table 390 | if (!is.null(spc_tbl_l)) { 391 | dt <- merge_dts_l(spc_tbl_l = spc_tbl_l, 392 | lcols_spc = lcols_spc, lcol_measure = lcol_measure, 393 | spc_id = spc_id, group_id = group_id) # see merge_dts_l wrapper function 394 | } else { 395 | dt <- merge_dts(spc_tbl = spc_tbl, 396 | lcols_spc = lcols_spc, lcol_measure = lcol_measure, 397 | spc_id = spc_id, group_id = group_id) 398 | } 399 | # Option to order originally numeric group_id factors by group 400 | if (is.null(spc_tbl_l)) { 401 | if (group_id_order && is.numeric(dplyr::pull(spc_tbl, !!group_id))) { 402 | dt <- reorder_factor_num(dt = dt, column = "group_id") 403 | } 404 | } 405 | brk <- pretty(dt[["xvalues_value"]], n = 10) # Pretty x-axis breaks 406 | p <- ggplot2::ggplot(data = dt, 407 | ggplot2::aes_string(x = "xvalues_value", y = "spc_value")) 408 | 409 | if (group_color == TRUE && is.null(lcol_measure)) { 410 | p <- p + 411 | ggplot2::geom_line(ggplot2::aes_string(colour = "group_id", 412 | group = "spc_id"), 413 | alpha = alpha, size = line_width) 414 | if (!is.null(group_color_palette)) { 415 | p <- p + 416 | ggplot2::scale_colour_brewer(type = "div", 417 | palette = group_color_palette, direction = -1) 418 | 419 | } 420 | if (group_legend == FALSE) { 421 | p <- p + ggplot2::guides(colour = FALSE) 422 | } 423 | } else if (group_color == FALSE && is.null(lcol_measure)) { 424 | p <- p + ggplot2::geom_line( 425 | ggplot2::aes_string(group = "spc_id"), 426 | alpha = alpha, size = line_width) 427 | } 428 | 429 | if (!is.null(lcol_measure)) { 430 | p <- p + ggplot2::geom_line( 431 | ggplot2::aes_string(colour = lcol_measure, group = "spc_id", 432 | x = "xvalues_value", y = "spc_value"), 433 | alpha = alpha, size = line_width, inherit.aes = FALSE) + 434 | ggplot2::scale_colour_distiller(palette = lcol_measure_col_palette, 435 | direction = lcol_measure_col_direction) 436 | } 437 | 438 | # Plot different spectral types and group_id in panels 439 | if (group_panel && length(lcols_spc) > 1) { 440 | if (relabel_spc) { 441 | lbl <- relabel_spc_types(...) # see this function for arguments and values 442 | p <- p + ggplot2::facet_grid(spc_type ~ group_id, scales = "free", 443 | labeller = ggplot2::labeller(spc_type = lbl)) 444 | } else { 445 | p <- p + ggplot2::facet_grid(spc_type ~ group_id, scales = "free") 446 | } 447 | } 448 | 449 | if (group_panel && length(lcols_spc) == 1) { 450 | p <- p + ggplot2::facet_wrap(~ group_id, ncol = ncol, scales = "free") 451 | } 452 | # Special case when list of tibbles are supplied 453 | if (group_panel && !is.null(spc_tbl_l)) { 454 | p <- ggplot2::ggplot(data = dt, 455 | ggplot2::aes_string(x = "xvalues_value", y = "spc_value")) + 456 | ggplot2::geom_line(ggplot2::aes_string(colour = "group_id_tbl", 457 | group = "spc_id"), alpha = alpha, size = line_width) 458 | if (relabel_spc == TRUE) { 459 | lbl <- relabel_spc_types(...) 460 | p <- p + ggplot2::facet_grid(spc_type ~ group_id, scales = "free", 461 | labeller = ggplot2::labeller(spc_type = lbl)) 462 | } else if (relabel_spc == FALSE) { 463 | p <- p + ggplot2::facet_wrap(~ group_id, ncol = ncol, scales = "free") 464 | } 465 | } 466 | 467 | p <- p + ggplot2::scale_x_reverse(breaks = brk) + 468 | ggplot2::xlab(expression(paste("Wavenumber [", cm^-1, "]"))) + 469 | ggplot2::ylab(ylab) + 470 | ggplot2::theme_bw() + 471 | ggplot2::theme(legend.position = "right", 472 | axis.text.x = ggplot2::element_text(angle = 90, hjust = 1, vjust = 0.5)) 473 | p 474 | } 475 | 476 | --------------------------------------------------------------------------------