├── tests
    └── .Rapp.history
├── R
    ├── simplerspec.R
    ├── utils-pipe.R
    ├── data.R
    ├── clustering-outl-spc.R
    ├── select-spc.R
    ├── tbl-lcol-map-funprog.R
    ├── predict-spc.R
    ├── join-chem-spectra.R
    ├── average-spc.R
    ├── utils-unsupervised.R
    ├── select-ref-spectra.R
    ├── slice-spc.R
    ├── plot_spc.R
    ├── remove-outl-spectra.R
    ├── utils-stats.R
    ├── resample-spc.R
    ├── read-asd.R
    ├── preprocess-spc.R
    ├── pls-vip.R
    ├── gather-spc.R
    └── plot-spc-extended.R
├── man
    ├── .gitignore
    ├── figures
    │   └── simplerspec-logo.png
    ├── pipe.Rd
    ├── join_chem_spec.Rd
    ├── join_spc_chem.Rd
    ├── split_df2l.Rd
    ├── soilspec_yamsys.Rd
    ├── extract_lcols2dts.Rd
    ├── create_vip_rects.Rd
    ├── predict_from_spc.Rd
    ├── extract_pls_vip.Rd
    ├── select_spc_vars.Rd
    ├── select_ref_spc.Rd
    ├── evaluate_model.Rd
    ├── read_asd.Rd
    ├── bind_lcols_dts.Rd
    ├── assess_multimodels.Rd
    ├── read_asd_bin.Rd
    ├── read_opus_bin_univ.Rd
    ├── remove_outliers.Rd
    ├── slice_xvalues.Rd
    ├── average_spc.Rd
    ├── merge_dts.Rd
    ├── preprocess_spc.Rd
    ├── read_opus_univ.Rd
    ├── merge_dts_l.Rd
    ├── plot_pls_vip.Rd
    ├── plot_spc.Rd
    ├── fit_rf.Rd
    ├── plot_spc_ext.Rd
    ├── resample_spc.Rd
    ├── fit_pls.Rd
    └── gather_spc.Rd
├── .gitignore
├── _pkgdown.yml
├── data
    └── soilspec_yamsys.rda
├── .Rbuildignore
├── tic.R
├── simplerspec.Rproj
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── .github
    └── workflows
    │   └── tic.yml
└── README.md


/tests/.Rapp.history:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R/simplerspec.R:
--------------------------------------------------------------------------------
1 | #' @import rlang
2 | #' @import dplyr
3 | 


--------------------------------------------------------------------------------
/man/.gitignore:
--------------------------------------------------------------------------------
1 | .unison.tune_model_loocv_q.Rd.f89c7b744be0454bad4399a2fe4b87d9.unison.tmp
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | .DS_Store
6 | ._*
7 | docs/
8 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://philipp-baumann.github.io/simplerspec
2 | 
3 | development:
4 |   mode: auto
5 | 


--------------------------------------------------------------------------------
/data/soilspec_yamsys.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/philipp-baumann/simplerspec/HEAD/data/soilspec_yamsys.rda


--------------------------------------------------------------------------------
/man/figures/simplerspec-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/philipp-baumann/simplerspec/HEAD/man/figures/simplerspec-logo.png


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^\.travis\.yml$
 4 | LICENSE\.md
 5 | ^appveyor\.yml$
 6 | ^tic\.R$
 7 | ^\.ccache$
 8 | ^\.github$
 9 | _pkgdown.yml
10 | 


--------------------------------------------------------------------------------
/R/utils-pipe.R:
--------------------------------------------------------------------------------
 1 | #' Pipe operator
 2 | #'
 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
 4 | #'
 5 | #' @name %>%
 6 | #' @rdname pipe
 7 | #' @keywords internal
 8 | #' @export
 9 | #' @importFrom magrittr %>%
10 | #' @usage lhs \%>\% rhs
11 | NULL
12 | 


--------------------------------------------------------------------------------
/tic.R:
--------------------------------------------------------------------------------
1 | # installs dependencies, runs R CMD check, runs covr::codecov()
2 | do_package_checks()
3 | 
4 | if (ci_on_ghactions() && ci_has_env("BUILD_PKGDOWN")) {
5 |   # creates pkgdown site and pushes to gh-pages branch
6 |   # only for the runner with the "BUILD_PKGDOWN" env var set
7 |   do_pkgdown()
8 | }
9 | 


--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils-pipe.R
 3 | \name{\%>\%}
 4 | \alias{\%>\%}
 5 | \title{Pipe operator}
 6 | \usage{
 7 | lhs \%>\% rhs
 8 | }
 9 | \description{
10 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' Soil spectra and laboratory reference data from Baumann et al. (2021)
 2 | #'
 3 | #' Data from "Estimation of soil properties with mid-infrared soil spectroscopy
 4 | #' across yam production landscapes in West Africa".
 5 | #'
 6 | #' @format ## `soilspec_yamsys`
 7 | #' A tibble data frame with 284 rows and 40 columns. The spectra are in the
 8 | #' `spc` list-column.
 9 | #' @source <https://soil.copernicus.org/articles/7/717/2021/>
10 | "soilspec_yamsys"
11 | 


--------------------------------------------------------------------------------
/simplerspec.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace,vignette
22 | 


--------------------------------------------------------------------------------
/man/join_chem_spec.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/join-chem-spectra.R
 3 | \name{join_chem_spec}
 4 | \alias{join_chem_spec}
 5 | \title{Join chemical and spectral data frames}
 6 | \usage{
 7 | join_chem_spec(dat_chem, dat_spec, by = "sample_ID")
 8 | }
 9 | \arguments{
10 | \item{dat_chem}{data.frame that contains chemical values of
11 | the sample}
12 | 
13 | \item{dat_spec}{List that contains spectral data}
14 | 
15 | \item{by}{character of column name that defines sample_ID}
16 | }
17 | \value{
18 | List: xxx
19 | }
20 | \description{
21 | Combines spectral data (data.frame) and chemical
22 | data (data.frame).
23 | }
24 | 


--------------------------------------------------------------------------------
/man/join_spc_chem.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/join-chem-spectra.R
 3 | \name{join_spc_chem}
 4 | \alias{join_spc_chem}
 5 | \title{Join spectra data and chemical data tibbles}
 6 | \usage{
 7 | join_spc_chem(spc_tbl, chem_tbl, by = "sample_id")
 8 | }
 9 | \arguments{
10 | \item{spc_tbl}{Tibble that contains spectral data}
11 | 
12 | \item{chem_tbl}{Tibble that contains chemical reference values of
13 | the samples}
14 | 
15 | \item{by}{character of column name that defines sample_ID}
16 | }
17 | \value{
18 | Tibble joined by sample_id
19 | }
20 | \description{
21 | Combines spectral data (tibble class) and chemical
22 | data (tibble class).
23 | }
24 | 


--------------------------------------------------------------------------------
/man/split_df2l.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tbl-lcol-map-funprog.R
 3 | \name{split_df2l}
 4 | \alias{split_df2l}
 5 | \title{Split a tibble data frame into a list of tibbles by a group column}
 6 | \usage{
 7 | split_df2l(tbl_df, group)
 8 | }
 9 | \arguments{
10 | \item{tbl_df}{Tibble data frame}
11 | 
12 | \item{group}{Character vector with name of column based on which tibble
13 | is split into a list of tibbles}
14 | }
15 | \value{
16 | List of tibbles. Each tibble contains data split by
17 | a group column within \code{tbl_df}.
18 | }
19 | \description{
20 | Helper function that calls \code{split} on a tibble using a
21 | grouping column within tibble.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/soilspec_yamsys.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{soilspec_yamsys}
 5 | \alias{soilspec_yamsys}
 6 | \title{Soil spectra and laboratory reference data from Baumann et al. (2021)}
 7 | \format{
 8 | \subsection{\code{soilspec_yamsys}}{
 9 | 
10 | A tibble data frame with 284 rows and 40 columns. The spectra are in the
11 | \code{spc} list-column.
12 | }
13 | }
14 | \source{
15 | \url{https://soil.copernicus.org/articles/7/717/2021/}
16 | }
17 | \usage{
18 | soilspec_yamsys
19 | }
20 | \description{
21 | Data from "Estimation of soil properties with mid-infrared soil spectroscopy
22 | across yam production landscapes in West Africa".
23 | }
24 | \keyword{datasets}
25 | 


--------------------------------------------------------------------------------
/man/extract_lcols2dts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tbl-lcol-map-funprog.R
 3 | \name{extract_lcols2dts}
 4 | \alias{extract_lcols2dts}
 5 | \title{Extract multiple tibble list-columns and return data as list of
 6 | data.tables}
 7 | \usage{
 8 | extract_lcols2dts(spc_tbl, lcols)
 9 | }
10 | \arguments{
11 | \item{spc_tbl}{Spectral tibble (data frame) with spectral data contained
12 | in list-columns}
13 | 
14 | \item{lcols}{Character vector containing names of list-columns to be
15 | extracted into a list of data.tables}
16 | }
17 | \value{
18 | List of data.tables. Each element is a data.table derivied from a
19 | list-column specified in the \code{lcols} argument.
20 | }
21 | \description{
22 | Extract multiple tibble list columns, row bind them separately
23 | into single data tables and return a list of data.tables.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/create_vip_rects.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pls-vip.R
 3 | \name{create_vip_rects}
 4 | \alias{create_vip_rects}
 5 | \title{Create a data frame containing start and end positions (wavenumbers)
 6 | where variable importance in projection (VIP) is > 1}
 7 | \usage{
 8 | create_vip_rects(df_vip)
 9 | }
10 | \arguments{
11 | \item{df_vip}{Data frame containing \code{wavenumber} and \code{vip} columns
12 | (numeric)}
13 | }
14 | \value{
15 | Data.frame containing vectors \code{start} (numeric; wavenumber),
16 | \code{end} (numeric; wavenumber) and group (integer; values are
17 | \code{1:length(start))}.
18 | }
19 | \description{
20 | Given a data frame with VIP outputs (wavenumber and vip
21 | columns), start and end values denoting spectral regions where VIP > 1
22 | are returned as data frame. The functions can be used as helper
23 | function for plotting VIP.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/predict_from_spc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict-spc.R
 3 | \name{predict_from_spc}
 4 | \alias{predict_from_spc}
 5 | \title{Predict soil properties of new spectra based on a list of calibration models}
 6 | \usage{
 7 | predict_from_spc(model_list, spc_tbl, slice = TRUE)
 8 | }
 9 | \arguments{
10 | \item{model_list}{List of model output generated from calibration step
11 | (\code{pls_ken_stone()}}
12 | 
13 | \item{spc_tbl}{Tibble of spectra after preprocessing
14 | (\code{preprocess_spc()})}
15 | 
16 | \item{slice}{Logical expression wheather only one row per sample_id returned.}
17 | }
18 | \value{
19 | tibble with new columns \code{model}, and predicted values with
20 | column names of model list.
21 | }
22 | \description{
23 | Append predictions for a set of responses specified by a list
24 | of calibration models and a tibble containing preprocessed spectra as
25 | list-columns.
26 | }
27 | 


--------------------------------------------------------------------------------
/man/extract_pls_vip.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pls-vip.R
 3 | \name{extract_pls_vip}
 4 | \alias{extract_pls_vip}
 5 | \title{Extract VIPs (variable importance in the projection) for a PLS
 6 | regression model output returned from model fitting with
 7 | \code{simplerspec::fit_pls()}}
 8 | \usage{
 9 | extract_pls_vip(mout)
10 | }
11 | \arguments{
12 | \item{mout}{Model output list returned from \code{simplerspec::fit_pls()}.}
13 | }
14 | \value{
15 | A tibble data frame with columns \code{wavenumber} and correponding
16 | VIP values in the column \code{vip} for the finally chosen PLS regression
17 | model at the final number of PLS components.
18 | }
19 | \description{
20 | VIPs are extracted based on the \code{finalModel} sublist
21 | in the \code{caret::train} output contained in the \code{model} element
22 | of the \code{simplerspec::fit_pls()} model output list. The VIPs for
23 | derived number of PLS components in the \code{finalModel} are computed.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/select_spc_vars.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/select-spc.R
 3 | \name{select_spc_vars}
 4 | \alias{select_spc_vars}
 5 | \title{Select every n-th spectral variable for all spectra and x-values in spectral
 6 | tibble (\code{spc_tbl})}
 7 | \usage{
 8 | select_spc_vars(
 9 |   spc_tbl,
10 |   lcol_spc = "spc_pre",
11 |   lcol_xvalues = "xvalues_pre",
12 |   every = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{spc_tbl}{Tibble data.frame containing spectra in list-column}
17 | 
18 | \item{lcol_spc}{List-column containing spectra, specified with column
19 | name as symbols or 1L character vector.}
20 | 
21 | \item{lcol_xvalues}{List-column containing x-values, specified with
22 | column name as symbols or 1L character vector.}
23 | 
24 | \item{every}{Every n-th spectral positions to keep as 1L integer vector.}
25 | }
26 | \value{
27 | a spectral tibble
28 | }
29 | \description{
30 | Select every n-th spectral variable for all spectra and x-values in spectral
31 | tibble (\code{spc_tbl})
32 | }
33 | 


--------------------------------------------------------------------------------
/man/select_ref_spc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/select-ref-spectra.R
 3 | \name{select_ref_spc}
 4 | \alias{select_ref_spc}
 5 | \title{Select a set of reference spectra to be measured by reference analysis
 6 | methods}
 7 | \usage{
 8 | select_ref_spc(spc_tbl, ratio_ref, pc, print = TRUE)
 9 | }
10 | \arguments{
11 | \item{spc_tbl}{Spectra as tibble objects that contain preprocessed spectra}
12 | 
13 | \item{ratio_ref}{Ratio of desired reference samples to total sample number}
14 | 
15 | \item{pc}{Number of principal components (numeric). If pc < 1, the number
16 | of principal components kept corresponds to the number of components
17 | explaining at least (pc * 100) percent of the total variance.}
18 | 
19 | \item{print}{logical expression whether a plot (ggplot2) of sample selection
20 | for reference analysis is shown in PCA space
21 | (\code{TRUE} or \code{FALSE}).}
22 | }
23 | \description{
24 | Select a set of calibration spectra to develop spectral models.
25 | Samples in this list will be analyzed using laboratory reference methods.
26 | }
27 | 


--------------------------------------------------------------------------------
/R/clustering-outl-spc.R:
--------------------------------------------------------------------------------
 1 | ## Robust multivariate outlier detection method based on semi-robust principal
 2 | ## components ==================================================================
 3 | 
 4 | # Apply pcout for a nested list of matrices at a certain depth
 5 | pcout_depth <- function(x, depth = 2) {
 6 |   purrr::modify_depth(.x = x, .depth = depth,
 7 |     .f = ~ mvoutlier::pcout(x = ., makeplot = FALSE))
 8 | }
 9 | 
10 | # Extracts mvoutlier::pcout $wscat elements in nested list (default at depth 2)
11 | wscat_depth <- function(x, depth = 2) {
12 |   purrr::modify_depth(.x = x, .depth = 2, .f = "wscat")
13 | }
14 | 
15 | # Returns nested list containing logicals from test which scattering weights
16 | # are zero
17 | which_wscat0_depth <- function(x) {
18 |   purrr::modify_depth(wscat_depth(x), 2, ~ . == 0)
19 | }
20 | 
21 | # Extract $wfinal01 elements
22 | wfinal01_depth <- function(x) {
23 |   purrr::modify_depth(x, 2, "wfinal01")
24 | }
25 | 
26 | # Returns nested list containing logicals from test which final 0/1 weights
27 | # are zero
28 | which_wfinal0_depth <- function(x) {
29 |   purrr::modify_depth(wfinal01_depth(x), 2, ~ . == 0)
30 | }
31 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Type: Package
 2 | Package: simplerspec
 3 | Version: 0.2.1
 4 | Date: 2023-09-27
 5 | Title: Soil and plant spectroscopic model building and prediction
 6 | Description: Functions that cover
 7 |     reading of spectral data, outlier removal,
 8 |     spectral preprocessing, calibration sampling, PLS regression
 9 |     using caret, and model diagnostic statistics and plots.
10 | Authors@R: person("Philipp", "Baumann",
11 |   email = "baumann-philipp@protonmail.com", role = c("aut", "cre"))
12 | URL: https://github.com/philipp-baumann/simplerspec
13 | BugReports: https://github.com/philipp-baumann/simplerspec
14 | Depends: R (>= 3.0),
15 |     foreach
16 | Imports:
17 |     caret,
18 |     cowplot,
19 |     data.table,
20 |     dplyr (>= 0.7.0),
21 |     e1071,
22 |     ggplot2 (>= 2.0.0),
23 |     hexView,
24 |     magrittr,
25 |     mvoutlier,
26 |     modelr,
27 |     pls,
28 |     plyr,
29 |     prospectr,
30 |     purrr,
31 |     reshape2,
32 |     rlang (>= 0.2.0),
33 |     stringr,
34 |     tibble,
35 |     tidyselect,
36 |     tidyr,
37 |     utils,
38 |     glue,
39 |     broom
40 | License: GPL-2
41 | LazyData: true
42 | RoxygenNote: 7.2.3
43 | Roxygen: list(markdown = TRUE)
44 | Encoding: UTF-8
45 | 


--------------------------------------------------------------------------------
/man/evaluate_model.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils-stats.R
 3 | \name{evaluate_model}
 4 | \alias{evaluate_model}
 5 | \alias{summary_df}
 6 | \title{Calculate model evaluation metrics}
 7 | \usage{
 8 | evaluate_model(data, obs, pred)
 9 | 
10 | summary_df(df, x, y)
11 | }
12 | \arguments{
13 | \item{data}{\code{data.frame} with predicted and observed data in columns.}
14 | 
15 | \item{obs}{Column that contains observed values, \code{symbol}/\code{name} or
16 | \code{character} (wrapped in "").}
17 | 
18 | \item{pred}{Column that contains predicted values, \code{symbol}/\code{name} or
19 | \code{character} (wrapped in "").}
20 | 
21 | \item{df}{\code{data.frame} with predicted and observed data in columns.}
22 | 
23 | \item{x}{Column that contains observed values, \code{symbol}/\code{name} or
24 | \code{character} (wrapped in "").}
25 | 
26 | \item{y}{Column that contains predicted values, \code{symbol}/\code{name} or
27 | \code{character} (wrapped in "").}
28 | }
29 | \description{
30 | Calculate summary statistics of observed values and model
31 | evaluation statistics for assessing agreement between observed (\code{obs}) and
32 | predicted (\code{pred}) values.
33 | }
34 | 


--------------------------------------------------------------------------------
/man/read_asd.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read-asd.R
 3 | \name{read_asd}
 4 | \alias{read_asd}
 5 | \title{Read ASD fieldspec spectrometer data export into into simplerspec
 6 | spectra tibble.}
 7 | \usage{
 8 | read_asd(file)
 9 | }
10 | \arguments{
11 | \item{file}{Tab delmited file from ASD software export where the first
12 | column called \code{Wavelength} contais wavelengths in nanometer and the
13 | remaining columns are sample spectra referred by an ID name provided in the
14 | first row of these columns.}
15 | }
16 | \value{
17 | Spectra data in tibble data frame (class \code{tbl_df}) that contains
18 | columns \code{sample_id} (derived from 2nd and following column names of
19 | tab delimited ASD exported text file),
20 | \code{spc} (list-column of spectral matrices)
21 | and \code{wavelengths} (list-column containing wavelength vectors).
22 | }
23 | \description{
24 | Read tab delimited text (.txt) files exported from ASD field
25 | spectrometer into simplerspec spectra tibble.
26 | ASD Fieldspec data files are expected in .txt tab delimited file format.
27 | The first row should contain
28 | the name 'Wavelength' for the first column and the file names for the
29 | remaining columns.
30 | }
31 | 


--------------------------------------------------------------------------------
/man/bind_lcols_dts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot-spc-extended.R
 3 | \name{bind_lcols_dts}
 4 | \alias{bind_lcols_dts}
 5 | \title{Bind list-columns within a tibble into a list of data.tables}
 6 | \usage{
 7 | bind_lcols_dts(spc_tbl, lcols, spc_id = "unique_id", group_id = "sample_id")
 8 | }
 9 | \arguments{
10 | \item{spc_tbl}{Spectral data in a tibble data frame (classes "tibble_df",
11 | "tbl" and "data.frame").}
12 | 
13 | \item{lcols}{Character vector of column names of list-columns to be bound
14 | into a list of data.tables}
15 | 
16 | \item{spc_id}{Character vector denoting column name for a unique spectrum ID.
17 | Default is \code{"unique_id"}.}
18 | 
19 | \item{group_id}{Character vector denoting column name for the spectrum group
20 | ID. Default is \code{"sample_id"}. The group ID can later be used for
21 | plotting spectra by group (e.g. by using different colors or panels).}
22 | }
23 | \value{
24 | A list of data.tables. Elements contain data from list-columns
25 | specified in \code{lcols} argument as data.tables. All data.tables contain in
26 | addition \code{spc_id} and \code{group_id} columns.
27 | }
28 | \description{
29 | Bind one to many list-columns in spectral tibble into a list
30 | of data.tables.
31 | }
32 | 


--------------------------------------------------------------------------------
/man/assess_multimodels.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils-stats.R
 3 | \name{assess_multimodels}
 4 | \alias{assess_multimodels}
 5 | \title{Assess multiple pairs of measured and predicted values}
 6 | \usage{
 7 | assess_multimodels(
 8 |   data,
 9 |   ...,
10 |   .metrics = c("simplerspec", "yardstick"),
11 |   .model_name = "model"
12 | )
13 | }
14 | \arguments{
15 | \item{data}{Data frame with all measured (observed) and predicted variables.}
16 | 
17 | \item{...}{Multiple arguments with observed (measured)-predicted pairs,
18 | specified with \code{dplyr::vars(o = <column_name>, p = <column_name>)}.
19 | Column names can strings or symbols. The arguments in \code{...} need to be named.}
20 | 
21 | \item{.metrics}{Character vector with package used for metrics calculation.
22 | Default is \code{"simplerspec"}, which uses
23 | \code{simplerspec::evaluate_model()}.}
24 | 
25 | \item{.model_name}{String with name for the new column that specifies the
26 | model or the outcome variable. Default is \code{"model"}.}
27 | }
28 | \value{
29 | Data frame with with summary statistics for measured values and
30 | performance metrics for the pairs of measured and predicted values.
31 | }
32 | \description{
33 | Return performance metrics for test set predictions and
34 | measured values, e.g. for different model outcome variables.
35 | }
36 | 


--------------------------------------------------------------------------------
/man/read_asd_bin.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read-asd.R
 3 | \name{read_asd_bin}
 4 | \alias{read_asd_bin}
 5 | \title{Read ASD binary files and gather spectra and metadata in tibble data
 6 | frame.}
 7 | \usage{
 8 | read_asd_bin(fnames)
 9 | }
10 | \arguments{
11 | \item{fnames}{Character vector containing full paths of ASD binary files
12 | to be read}
13 | }
14 | \value{
15 | A spectral tibble (data frame) containing the follwing columns:
16 | \item{unique_id}{Character vector. Unique identifier containing file name
17 | pasted with date and time.}
18 | \item{file_id}{Character vector containing file names and exension}
19 | \item{sample_id}{Character vector containing files names without extension}
20 | \item{metadata}{List-column. List of data frames containing spectral
21 | metadata}
22 | \item{wavelengths}{List-column. List of wavelengths vectors (numeric).}
23 | \item{spc_radiance}{List-column. List of data.tables containing
24 | radiance sample spectra.}
25 | \item{spc_reference}{List-column. List of data.tables containing
26 | reference reflectance spectra.}
27 | \item{spc}{List-column. List of data.tables containing final reflectance
28 | spectra.}
29 | }
30 | \description{
31 | Read multiple ASD binary files and gather spectra and metadata
32 | into a simplerspec spectral tibble (data frame). The resulting spectral
33 | tibble is compatible with the simplerspec spectra processing and modeling
34 | framework.
35 | }
36 | 


--------------------------------------------------------------------------------
/man/read_opus_bin_univ.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read-opus-universal.R
 3 | \name{read_opus_bin_univ}
 4 | \alias{read_opus_bin_univ}
 5 | \title{Read a Bruker OPUS spectrum binary file}
 6 | \usage{
 7 | read_opus_bin_univ(file_path, extract = c("spc"),
 8 | print_progress = TRUE, atm_comp_minus4offset = FALSE)
 9 | }
10 | \arguments{
11 | \item{file_path}{Character vector with path to file}
12 | 
13 | \item{extract}{Character vector of spectra types to extract from OPUS binary
14 | file. Default is \code{c("spc")}, which will extract the final spectra, e.g.
15 | expressed in absorbance (named \code{AB} in Bruker OPUS programs). Possible
16 | additional values for the character vector supplied to extract are
17 | \code{"ScSm"} (single channel spectrum of the sample measurement), \
18 | code{"ScRf"} (single channel spectrum of the reference measurment),
19 | \code{"IgSm"} (interferogram of the sample measurment) and \code{"IgRf"}
20 | (interferogram of the reference measurement).}
21 | 
22 | \item{print_progress}{Logical (default \code{TRUE}) whether a message is
23 | printed when an OPUS binary file is parsed into an R list entry.}
24 | 
25 | \item{atm_comp_minus4offset}{Logical whether spectra after atmospheric
26 | compensation are read with an offset of \code{-4} bites from Bruker OPUS
27 | files. Default is \code{FALSE}.}
28 | }
29 | \description{
30 | Read single binary file acquired with an
31 | Bruker Vertex FTIR Instrument
32 | }
33 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export("%>%")
 4 | export(assess_multimodels)
 5 | export(average_spc)
 6 | export(bind_lcols_dts)
 7 | export(create_vip_rects)
 8 | export(evaluate_model)
 9 | export(extract_lcols2dts)
10 | export(extract_pls_vip)
11 | export(fit_pls)
12 | export(fit_rf)
13 | export(gather_spc)
14 | export(join_chem_spec)
15 | export(join_spc_chem)
16 | export(merge_dts)
17 | export(merge_dts_l)
18 | export(plot_pls_vip)
19 | export(plot_spc)
20 | export(plot_spc_ext)
21 | export(pls_ken_stone)
22 | export(predict_from_spc)
23 | export(preprocess_spc)
24 | export(read_asd)
25 | export(read_asd_bin)
26 | export(read_opus_bin_univ)
27 | export(read_opus_univ)
28 | export(remove_outliers)
29 | export(resample_spc)
30 | export(select_ref_spc)
31 | export(select_spc_vars)
32 | export(slice_xvalues)
33 | export(split_df2l)
34 | export(summary_df)
35 | import(purrr)
36 | import(stats)
37 | importFrom(data.table,":=")
38 | importFrom(data.table,.SD)
39 | importFrom(data.table,data.table)
40 | importFrom(data.table,fread)
41 | importFrom(data.table,rbindlist)
42 | importFrom(data.table,setDT)
43 | importFrom(data.table,setkey)
44 | importFrom(dplyr,pull)
45 | importFrom(e1071,kurtosis)
46 | importFrom(foreach,"%do%")
47 | importFrom(foreach,"%dopar%")
48 | importFrom(magrittr,"%>%")
49 | importFrom(purrr,imap)
50 | importFrom(purrr,modify_depth)
51 | importFrom(rlang,ensym)
52 | importFrom(rlang,quo_name)
53 | importFrom(rlang,set_names)
54 | importFrom(stringr,str_replace)
55 | importFrom(tidyselect,one_of)
56 | 


--------------------------------------------------------------------------------
/man/remove_outliers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/remove-outl-spectra.R
 3 | \name{remove_outliers}
 4 | \alias{remove_outliers}
 5 | \title{Remove outlier spectra}
 6 | \usage{
 7 | remove_outliers(list_spectra, remove = TRUE)
 8 | }
 9 | \arguments{
10 | \item{list_spectra}{List that contains averaged
11 | spectral information
12 | in list element \code{MIR_mean} (data.frame) and metadata in
13 | \code{data_meta} (data.frame).}
14 | 
15 | \item{remove}{logical expression (\code{TRUE} or \code{FALSE})
16 | that specifies weather spectra shall be removed.
17 | If \code{rm = FALSE}, there will be no outlier removal}
18 | }
19 | \value{
20 | Returns list \code{spectra_out} that contains:
21 | \itemize{
22 | \item \code{MIR_mean}: Outlier removed MIR spectra as
23 | data.frame object. If \code{remove = FALSE},
24 | the function will
25 | return almost identical list identical to \code{list_spectra},
26 | except that the first \code{indices} column of the spectral
27 | data frame \code{MIR_mean} is removed
28 | (This is done for both options
29 | \code{remove = TRUE} and \code{remove = FALSE}).
30 | \item \code{data_meta}: metadata data.frame, identical
31 | as in the \code{list_spectra} input list.
32 | \item \code{plot_out}: (optional) ggplot2 graph
33 | that shows all spectra (absorbance on x-axis and wavenumber
34 | on y-axis) with outlier marked, if
35 | \code{remove = TRUE}.
36 | }
37 | }
38 | \description{
39 | Remove outlier spectra based on the
40 | \code{pcout()} function of the \code{mvoutlier} package.
41 | }
42 | \details{
43 | This is an optional function if one wants to remove
44 | outliers.
45 | }
46 | 


--------------------------------------------------------------------------------
/R/select-spc.R:
--------------------------------------------------------------------------------
 1 | #' Select every n-th spectral variable for all spectra and x-values in spectral
 2 | #' tibble (`spc_tbl`)
 3 | #'
 4 | #' @param spc_tbl Tibble data.frame containing spectra in list-column
 5 | #' @param lcol_spc List-column containing spectra, specified with column
 6 | #' name as symbols or 1L character vector.
 7 | #' @param lcol_xvalues List-column containing x-values, specified with
 8 | #' column name as symbols or 1L character vector.
 9 | #' @param every Every n-th spectral positions to keep as 1L integer vector.
10 | #' @return a spectral tibble
11 | #' @export
12 | select_spc_vars <- function(spc_tbl,
13 |                             lcol_spc = "spc_pre",
14 |                             lcol_xvalues = "xvalues_pre",
15 |                             every = NULL) {
16 |   lcol_spc <- rlang::enquo(lcol_spc)
17 |   lcol_spc_nm <- rlang::quo_name(lcol_spc)
18 |   lcol_xvalues <- rlang::enquo(lcol_xvalues)
19 |   lcol_xvalues_nm <- rlang::quo_name(lcol_xvalues)
20 | 
21 |   stopifnot(tibble::is_tibble(spc_tbl))
22 | 
23 |   if (is.null(every)) {return(spc_tbl);
24 |     message("Returning `spc_tbl` and keep all variables.")}
25 | 
26 |   spc_lst <- dplyr::pull(spc_tbl, !!lcol_spc)
27 |   spc <- data.table::rbindlist(spc_lst)
28 | 
29 |   pos_sel <- seq(1L, ncol(spc), every)
30 | 
31 |   spc_sel <- spc[, pos_sel, with = FALSE]
32 | 
33 |   spc_lst_out <- stats::setNames(
34 |     map(purrr::transpose(spc_sel), data.table::as.data.table),
35 |       names(spc_lst))
36 | 
37 |   xvalues <- dplyr::pull(spc_tbl, !!lcol_xvalues)
38 |   xvalues_sel <- map(xvalues, ~ .x[pos_sel])
39 | 
40 |   dplyr::mutate(spc_tbl %>% dplyr::ungroup(),
41 |     !!lcol_spc_nm := spc_lst_out, !!lcol_xvalues_nm := xvalues_sel)
42 | }
43 | 


--------------------------------------------------------------------------------
/man/slice_xvalues.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/slice-spc.R
 3 | \name{slice_xvalues}
 4 | \alias{slice_xvalues}
 5 | \title{Slice spectra into defined x-axis ranges}
 6 | \usage{
 7 | slice_xvalues(
 8 |   spc_tbl,
 9 |   xunit_lcol = "wavenumbers",
10 |   spc_lcol = "spc",
11 |   xvalues_cut = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{spc_tbl}{Spectral data in a tibble object (classes "tibble_df", "tbl"
16 | and "data.frame"). The spectra tibble is expected to contain at least
17 | the column  \code{spc} (list-column with spectral matrices stored in a list)
18 | and \code{wavenumbers} or \code{wavelengths} (list-column that contains list
19 | of x-axis values).}
20 | 
21 | \item{xunit_lcol}{Character vector that specifies column name where x-axis
22 | axis units are stored within \code{spc_tbl}. Default is \code{"wavenumber"}.}
23 | 
24 | \item{spc_lcol}{Character vector that specifies which column (list-column)
25 | contains spectra to be sliced. Default is \code{"spc"}.}
26 | 
27 | \item{xvalues_cut}{List of numeric vectors that contains upper and lower bounds of respective regions to keep in spectra. The spectral regions outside
28 | the \code{xvalues_cut} intervals will be cut out in the output spectra.}
29 | }
30 | \value{
31 | Spectral tibble (data frame with list-columns) with sliced x-axis
32 | column and spectral column. Both the x-axis list-column and the spectral
33 | tibble list-column only contain data specified within the \code{xvalues_cut}
34 | argument (list of numeric vectors).
35 | }
36 | \description{
37 | Slice spectra contained in list-column of spectral tibble
38 | (data frame). A list of x-axis value ranges can be specified. Spectra are
39 | cut based on these ranges.
40 | }
41 | 


--------------------------------------------------------------------------------
/man/average_spc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/average-spc.R
 3 | \name{average_spc}
 4 | \alias{average_spc}
 5 | \title{Average spectra in list-column by entries in grouping column}
 6 | \usage{
 7 | average_spc(spc_tbl, by = "sample_id", column_in = "spc_rs")
 8 | }
 9 | \arguments{
10 | \item{spc_tbl}{Tibble data frame containing at least the grouping column
11 | given in argument \code{by} and input spectra given in list-column \code{column_in}.}
12 | 
13 | \item{by}{Character vector of length 1L or name/symbol that specifies the
14 | column by which groups of spectra are averaged. Default is \code{"sample_id"}.}
15 | 
16 | \item{column_in}{Character vector of length 1L or or name/symbol that
17 | specifies the list-column that contains the inputs spectra to be averaged.
18 | Default is \code{"spc_rs"}, which are resampled spectra (i.e., resulting after
19 | preceding \code{resample_spc()} step).}
20 | }
21 | \value{
22 | Spectra tibble data frame (class \code{"tbl_df"}, \code{"tbl"}, \code{"data.frame"})
23 | with a new list-column of column name \code{"spc_mean"} at the last position,
24 | containing mean spectra with identical row replicates within the same
25 | \code{by}-group.
26 | }
27 | \description{
28 | Average spectra in list-column of spectra tibble (\code{spc_tbl}) by
29 | groups given in group column.
30 | }
31 | \details{
32 | For memory efficiency and subsequent modeling, consider slicing the
33 | extra row copies of \code{spc_mean} resulting from \code{average_spc()} for example by
34 | \itemize{
35 | \item \verb{split(x = spc_tbl, f = spc_tbl$<by>) \%>\% lapply(., function(x) x x[1, ]) \%>\% do.call(., rbind)}
36 | \item \verb{dplyr::group_by(spc_tbl, <by>) \%>\% dplyr::slice(1L)}
37 | }
38 | }
39 | 


--------------------------------------------------------------------------------
/man/merge_dts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot-spc-extended.R
 3 | \name{merge_dts}
 4 | \alias{merge_dts}
 5 | \title{Merge list-columns of spectra, x-axis values, metadata and additional
 6 | measured variables into a single long form data.table}
 7 | \usage{
 8 | merge_dts(
 9 |   spc_tbl,
10 |   lcols_spc = c("spc", "spc_pre"),
11 |   lcol_measure = NULL,
12 |   spc_id = "unique_id",
13 |   group_id = "sample_id"
14 | )
15 | }
16 | \arguments{
17 | \item{spc_tbl}{Tibble data frame containing spectra, x-axis values, metadata
18 | and eventual measured variables as list-columns.}
19 | 
20 | \item{lcols_spc}{Character vector of spectral list-columns to be extracted.
21 | Default is \code{c("spc", "spc_pre")} (raw and preprocessed spectra).}
22 | 
23 | \item{lcol_measure}{Character vector of length 1 denoting the column name
24 | of the measure columns. This argument is optional. Default is \code{NULL},
25 | which does not extract an additional measure column.}
26 | 
27 | \item{spc_id}{Character vector of column that contains a unique spectral
28 | identifier for all spectra. Default is \code{"unique_id"}.}
29 | 
30 | \item{group_id}{Character vector of columns that is used assigning spectra
31 | into groups. Default is \code{"sample_id"}. The \code{group_id} can be
32 | used for later plotting and thereby visually separating spectral groups into
33 | using different colors or panels.}
34 | }
35 | \value{
36 | A single data.table containing long form aggregated data of
37 | spectra, x-axis values, metadata and an additionally measured variable.
38 | }
39 | \description{
40 | Helper function that merges all spectra and related data into
41 | a single long form data.table than can subsequently be used for plotting.
42 | }
43 | 


--------------------------------------------------------------------------------
/man/preprocess_spc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/preprocess-spc.R
 3 | \name{preprocess_spc}
 4 | \alias{preprocess_spc}
 5 | \title{Preprocess spectra}
 6 | \usage{
 7 | preprocess_spc(spc_tbl, select, column_in = "spc_mean", custom_function = NULL)
 8 | }
 9 | \arguments{
10 | \item{spc_tbl}{Tibble that contains spectra to be preprocessed within
11 | a list-column.}
12 | 
13 | \item{select}{Character vector of predefined preprocessing options to be
14 | applied to the spectra list-column specified in \code{column_in}.
15 | Common prefined values are stated as abbreviated preprocessing methods and
16 | options such as \code{"sg_1_w21"}, where \code{"sg"} stands for
17 | Savitzky-Golay and \code{1} for first derivative and \code{"w21"}
18 | for a window size of 21 points.}
19 | 
20 | \item{column_in}{Character vector of single list-column in \code{spc_tbl} that
21 | contain list of spectra (1 row matrix) to be processed by function supplied
22 | in \code{select}.}
23 | 
24 | \item{custom_function}{A character string of a custom processing function
25 | that is later parsed (produces expression in a list) and evaluated within
26 | the function \code{preprocess_spc}.
27 | The character vector argument of \code{custom_function}
28 | needs to contain \code{"spc_raw"}, which is the single data table of spectra
29 | that results from binding a list of data.tables (spectra to preprocess)
30 | from the spectra list-column specified in \code{column_in}.
31 | An example for a value is
32 | \code{"prospectr::savitzkyGolay(X = spc_raw, m = 0, p = 3, w = 9)"}.
33 | Optional argument. Default is \code{NULL}.}
34 | }
35 | \description{
36 | Preprocesses spectra in tibble column by sample_id after
37 | averaging spectra by \code{simplerspec::average_spc()}.
38 | }
39 | 


--------------------------------------------------------------------------------
/man/read_opus_univ.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read-opus-universal.R
 3 | \name{read_opus_univ}
 4 | \alias{read_opus_univ}
 5 | \title{Read a list of Bruker OPUS spectrum binary files.}
 6 | \usage{
 7 | read_opus_univ(fnames, extract = c("spc"), parallel = FALSE,
 8 | atm_comp_minus4offset = FALSE)
 9 | }
10 | \arguments{
11 | \item{fnames}{List of character vectors containing full path names of spectra}
12 | 
13 | \item{extract}{Character vector of spectra types to extract from file.
14 | Possible values are: "spc" (AB block in Bruker Opus software), "spc_nocomp"
15 | (Spectra before final atmospheric compensation; only present if background
16 | correction has been set in Opus), "ScSm" (Single channel spectrum of the
17 | sample), "ScRf" (Single channel spectrum of the sample), "IgSm" (Interferogram
18 | of the sample), "IgRf" (Interferogram of the reference). Default is
19 | \code{extract = c("spc")}.}
20 | 
21 | \item{parallel}{Logical (\code{TRUE} or \code{FALSE} indicating whether
22 | files are read in parallel (multiple processors or multiple cores)).
23 | Default is \code{parallel = FALSE}. If \code{TRUE} a parallel backend needs
24 | to be registered, e.g. by using the \code{doParallel} package.}
25 | 
26 | \item{atm_comp_minus4offset}{Logical whether spectra after atmospheric
27 | compensation are read with an offset of \code{-4} bites from Bruker OPUS
28 | files. Default is \code{FALSE}.}
29 | }
30 | \value{
31 | out List spectra and metadata (parameters) extracted from
32 | Bruker OPUS spectrometer files. List names are the names of the OPUS
33 | files whose spectral data were extracted.
34 | }
35 | \description{
36 | Read multiple spectral files measured with a Bruker FTIR Instrument. Files
37 | containing spectra are in OPUS binary format.
38 | \code{read_opus_univ} is a wrapper for \code{read_opus_bin_univ()})
39 | }
40 | 


--------------------------------------------------------------------------------
/man/merge_dts_l.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot-spc-extended.R
 3 | \name{merge_dts_l}
 4 | \alias{merge_dts_l}
 5 | \title{Wrapper function around \code{merge_dts()} for list of tibbles to
 6 | aggregate data for plotting.}
 7 | \usage{
 8 | merge_dts_l(
 9 |   spc_tbl_l,
10 |   lcols_spc = c("spc", "spc_pre"),
11 |   lcol_measure = NULL,
12 |   spc_id = "unique_id",
13 |   group_id = "sample_id"
14 | )
15 | }
16 | \arguments{
17 | \item{spc_tbl_l}{List of spectral tibbles (data frames).}
18 | 
19 | \item{lcols_spc}{Character vector of spectral list-columns to be extracted.
20 | Default is \code{c("spc", "spc_pre")} (raw and preprocessed spectra).}
21 | 
22 | \item{lcol_measure}{Character vector of length 1 denoting the column name
23 | of the measure columns. This argument is optional. Default is \code{NULL},
24 | which does not extract an additional measure column.}
25 | 
26 | \item{spc_id}{Character vector of column that contains a unique spectral
27 | identifier for all spectra. Default is \code{"unique_id"}.}
28 | 
29 | \item{group_id}{Character vector of columns that is used assigning spectra
30 | into groups. Default is \code{"sample_id"}. The \code{group_id} can be
31 | used for later plotting and thereby visually separating spectral groups into
32 | using different colors or panels.}
33 | }
34 | \value{
35 | A single data.table containing long form aggregated data of
36 | spectra, x-axis values, metadata and an additionally measured variable.
37 | An additional column called \code{group_id_tbl} is appended. It denotes
38 | the name of the spectral tibble supplied with the list \code{spc_tbl_l}.
39 | }
40 | \description{
41 | Instead of a single spectral tibble (data frame) multiple
42 | spectral tibbles can be merged into a long-form data.table for plotting
43 | spectra and related data. For details, see
44 | \code{\link{merge_dts}}.
45 | }
46 | 


--------------------------------------------------------------------------------
/R/tbl-lcol-map-funprog.R:
--------------------------------------------------------------------------------
 1 | # Split a tibble data frame into a list of tibbles by a group column
 2 | #' @title Split a tibble data frame into a list of tibbles by a group column
 3 | #' @description Helper function that calls \code{split} on a tibble using a
 4 | #' grouping column within tibble.
 5 | #' @param tbl_df Tibble data frame
 6 | #' @param group Character vector with name of column based on which tibble
 7 | #' is split into a list of tibbles
 8 | #' @return List of tibbles. Each tibble contains data split by
 9 | #' a group column within \code{tbl_df}.
10 | #' @export
11 | split_df2l <- function(tbl_df, group) {
12 |   split(tbl_df, tbl_df[, group])
13 | }
14 | 
15 | # Extract multiple tibble list columns, row bind them separately into
16 | # single data tables and return a list of data.tables
17 | #' @title Extract multiple tibble list-columns and return data as list of
18 | #' data.tables
19 | #' @description Extract multiple tibble list columns, row bind them separately
20 | #' into single data tables and return a list of data.tables.
21 | #' @param spc_tbl Spectral tibble (data frame) with spectral data contained
22 | #' in list-columns
23 | #' @param lcols Character vector containing names of list-columns to be
24 | #' extracted into a list of data.tables
25 | #' @return List of data.tables. Each element is a data.table derivied from a
26 | #' list-column specified in the \code{lcols} argument.
27 | #' @import purrr
28 | #' @export
29 | extract_lcols2dts <- function(spc_tbl, lcols) {
30 |   # Below code is first part of simplerspec::bind_lcols_dts
31 |   # todo: add warning for lcols not present in spc_tbl
32 |   which_bind <- colnames(spc_tbl) %in% lcols
33 |   lcols_to_bind <- colnames(spc_tbl)[which_bind]
34 |   names(lcols_to_bind) <- lcols_to_bind
35 |   dts <- map(lcols_to_bind,
36 |     function(y) {
37 |       if (is.list(spc_tbl[[y]])) {
38 |         # todo: Test if number of columns is equal in each data.frame or matrix
39 |         # of the list-(column); if not, return a comprehensible error
40 |         data.table::data.table(do.call(rbind, spc_tbl[[y]]))
41 |       } else if (is.atomic((spc_tbl[[y]]))) {
42 |         data.table::data.table(spc_tbl[, y])
43 |       }
44 |     }
45 |   )
46 | }
47 | 


--------------------------------------------------------------------------------
/R/predict-spc.R:
--------------------------------------------------------------------------------
 1 | #' @title Predict soil properties of new spectra based on a list of calibration models
 2 | #' @description Append predictions for a set of responses specified by a list
 3 | #' of calibration models and a tibble containing preprocessed spectra as
 4 | #' list-columns.
 5 | #' @param model_list List of model output generated from calibration step
 6 | #' (\code{pls_ken_stone()}
 7 | #' @param spc_tbl Tibble of spectra after preprocessing
 8 | #' (\code{preprocess_spc()})
 9 | #' @param slice Logical expression wheather only one row per sample_id returned.
10 | #' @usage predict_from_spc(model_list, spc_tbl, slice = TRUE)
11 | #' @return tibble with new columns \code{model}, and predicted values with
12 | #' column names of model list.
13 | #' @export
14 | predict_from_spc <- function(model_list, spc_tbl, slice = TRUE) {
15 | 
16 |   if (all(sapply(model_list, class) == "train")) {
17 |     # If model_list is a list of elements of class "train", model_list
18 |     # can be directly handed over to caret::extractPrediction
19 |     models <- model_list
20 |   } else {
21 |     # Extract pls_model elements (outputs from caret) for a list of models
22 |     models <- lapply(model_list, function(x) x[["model"]])
23 |     stopifnot(all(sapply(models, class) == "train"))
24 |   }
25 | 
26 |   # Group by spectra tibble by sample_id and keep one row per sample_id
27 |   if (slice == TRUE) {
28 |     spc_tbl <- spc_tbl %>% dplyr::group_by(sample_id) %>%
29 |       dplyr::slice(1L) %>% dplyr::ungroup()
30 |   }
31 | 
32 |   # Collect preprocessed spectra in one data.table
33 |   spc <- data.table::rbindlist(spc_tbl$spc_pre)
34 |   pred_caret <- caret::extractPrediction(
35 |     models,
36 |     unkX = spc
37 |   )
38 | 
39 |   # Number of caret model objects used to predict
40 |   n <- length(unique(pred_caret$object))
41 |   # Add sample_id from metadata of spectra to predicted values
42 |   sample_id <- spc_tbl$sample_id
43 |   # id column to long form data frame
44 |   id <- rep(sample_id, n)
45 |   pred_id <- cbind(pred_caret, sample_id = id)
46 |   # Get data into wide form
47 |   pred_wide <- tidyr::spread(
48 |     data = pred_id, key = "object", value = "pred"
49 |   )
50 | 
51 |   # Join predictions with tibble
52 |   dplyr::inner_join(spc_tbl, pred_wide, by = "sample_id")
53 | }
54 | 


--------------------------------------------------------------------------------
/R/join-chem-spectra.R:
--------------------------------------------------------------------------------
 1 | ## Join chemical and spectral data ==============================
 2 | #' @title Join chemical and spectral data frames
 3 | #' @description Combines spectral data (data.frame) and chemical
 4 | #' data (data.frame).
 5 | #' @param dat_chem data.frame that contains chemical values of
 6 | #' the sample
 7 | #' @param dat_spec List that contains spectral data
 8 | #' @return List: xxx
 9 | #' @param by character of column name that defines sample_ID
10 | #' @export
11 | join_chem_spec <- function(
12 |   dat_chem, dat_spec, by = "sample_ID") {
13 | 
14 |   # Alternative when "no visible binding for global variable":
15 |   data_meta <- MIR <- MIR_pre <- ori <- MIR_mean <- NULL
16 |   # http://stackoverflow.com/questions/23475309/in-r-is-it-possible-to-suppress-note-no-visible-binding-for-global-variable
17 |   # Replace sample_ID by ID
18 |   if(!is.data.frame(dat_chem)) {
19 |     stop(dat_chem, "needs to be a data.frame", call. = FALSE)
20 |   } else {
21 |   colnames(dat_chem)[colnames(dat_chem) == by] <- "ID"
22 |   dat_chem$ID <- as.factor(dat_chem$ID)
23 |   # Select only chemical data that have no outlier spectra
24 |   dat_chem <- dat_chem[dat_spec$data_meta$ID, ]
25 |   ID <- as.factor(dat_spec$data_meta$ID)
26 |   # Join ref analyses
27 |   MIRdata <- data.frame(ID = ID)
28 |   MIRdata$MIR <- dat_spec$MIR_pre
29 |   MIRdata$ori <- dat_spec$MIR_mean
30 |   # Joining by ID, type = "inner"
31 |   MIRdata_chem <- plyr::join(dat_chem, MIRdata, type = "inner")
32 |   # before dplyr::inner_join(dat_chem, MIRdata)
33 |   MIRdata_chem
34 |   }
35 | }
36 | 
37 | ## Join spectra and chemical tibbles ===========================================
38 | 
39 | #' @title Join spectra data and chemical data tibbles
40 | #' @description Combines spectral data (tibble class) and chemical
41 | #' data (tibble class).
42 | #' @param spc_tbl Tibble that contains spectral data
43 | #' @param chem_tbl Tibble that contains chemical reference values of
44 | #' the samples
45 | #' @param by character of column name that defines sample_ID
46 | #' @return Tibble joined by sample_id
47 | #' @export
48 | join_spc_chem <- function(spc_tbl, chem_tbl, by = "sample_id") {
49 | 
50 |   if(!tibble::is_tibble(spc_tbl)) {
51 |     stop(spc_tbl, "needs to be a Tibble", call. = FALSE)
52 |   } else {
53 |     # Rename column sample_ID to sample_id if sample_ID exists
54 |     if("sample_ID" %in% colnames(chem_tbl)) {
55 |       # Create a list with new column name; use standard evaluation
56 |       rename_list <- list(sample_id = "sample_ID")
57 |       chem_tbl <- dplyr::rename_(chem_tbl, .dots = rename_list)
58 |     }
59 |     spc_tbl <- dplyr::inner_join(spc_tbl, chem_tbl)
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/man/plot_pls_vip.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pls-vip.R
 3 | \name{plot_pls_vip}
 4 | \alias{plot_pls_vip}
 5 | \title{Plot stacked ggplot2 graphs with the Variable Importance for the
 6 | Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id,
 7 | and the preprocessed spectra.}
 8 | \usage{
 9 | plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre",
10 |   by = "sample_id",
11 |   xlab = expression(paste("Wavenumber [", cm^-1, "]")),
12 |   ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.",
13 |   alpha = 0.2)
14 | }
15 | \arguments{
16 | \item{mout}{Model output list that is returned from
17 | \code{simplerspec::fit_pls()}. This object contains a nested list with
18 | the \code{caret::train()} object (class \code{train}), based on which
19 | VIPs at finally selected number of PLS components are computed.}
20 | 
21 | \item{y1}{Character vector of list-column name in
22 | \code{mout$data$calibration}, where spectra for bottom graph are extracted.
23 | Default is \code{"spc_mean"}, which plots the mean calibration spectra after
24 | resampling.}
25 | 
26 | \item{y2}{Character string of list-column name in
27 | \code{mout$data$calibration}, where spectra for bottom graph are extracted.
28 | Default is \code{"spc_pre"}, which plots the preprocessed calibration
29 | spectra after resampling.}
30 | 
31 | \item{by}{Character string that is used to assign spectra to the same group
32 | and therefore ensures that all spectra are plotted with the same colour.
33 | Default is \code{"sample_id"}}
34 | 
35 | \item{xlab}{Character string of X axis title for shared x axis of stacked
36 | graphs. Default is \code{expression(paste("Wavenumber [", cm^-1, "]"))}}
37 | 
38 | \item{ylab1}{Y axis title of bottom spectrum. Default is \code{"Absorbance"}.}
39 | 
40 | \item{ylab2}{Y axis title of bottom spectrum. Default is
41 | \code{"Preprocessed Abs."}.}
42 | 
43 | \item{alpha}{Double between 0 and 1 that defines transparency of spectra
44 | lines in returned graph (ggplot plot object).}
45 | }
46 | \description{
47 | Plot stacked ggplot2 graphs of VIP for the final
48 | PLS regression model output of the calibration (training) data set for the
49 | final number of components, raw (replicate mean) spectra, and preprocessed
50 | spectra. Regions with VIP > 1 are highlighted across the stacked graphs
51 | in beige colour rectangles. VIP calculation is implemented as described in
52 | Chong, I.-G., and Jun, C.-H. (2005). Performance of some variable selection
53 | methods when multicollinearity is present. Chemometrics and Intelligent
54 | Laboratory Systems, 78(1--2), 103--112. https://doi.org/10.1016/j.chemolab.2004.12.011
55 | }
56 | 


--------------------------------------------------------------------------------
/man/plot_spc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_spc.R
 3 | \name{plot_spc}
 4 | \alias{plot_spc}
 5 | \title{Plot tibble spectra}
 6 | \usage{
 7 | plot_spc(spc_tbl, spc_tbl_2 = NULL,
 8 |   x_unit = "wavenumber",
 9 |   y = "spc", by = "unique_id",
10 |   graph_id_1 = "Set 1", graph_id_2 = "Set 2",
11 |   graph_id_1_col = "black", graph_id_2_col = "red",
12 |   xlab = expression(paste("Wavenumber [", cm^-1, "]")),
13 |   ylab = "Absorbance",
14 |   alpha = 0.2, legend = TRUE)
15 | }
16 | \arguments{
17 | \item{spc_tbl}{Tibble that contains the first set of spectra to plot as
18 | list-column}
19 | 
20 | \item{spc_tbl_2}{Tibble that contains the second set of spectra (optional)
21 | to plot as list-column.}
22 | 
23 | \item{x_unit}{Character string describing the x axis unit. Default is
24 | \code{"wavenumber"}, which will produce a graph with wavenumbers on the
25 | x axis with reversed number. If \code{x_unit = "wavelength"}, the axis
26 | will be in regular order (lower wavelengths in nm on the left and higher
27 | on the right side of the axis).}
28 | 
29 | \item{y}{Character string of list-column name in tibble where spectra of
30 | desired type are extracted to plot.}
31 | 
32 | \item{by}{Character string of column that is used to group the spectra.
33 | Default is \code{"unique_id"}. If replica spectra are present in the file
34 | and processed spectra resulting after averaging need to be plotted,
35 | it is recommend to use \code{"sample_id"} as argument to group according
36 | the sample_id column in the tibble(s) containing the spectra (\code{spc_tbl}
37 | and \code{spc_tbl_2}).}
38 | 
39 | \item{graph_id_1}{Character string used for grouping the first spectra set
40 | (\code{spc_tbl}) and producing
41 | the label text accordingly. Default is \code{"Set 1"}.}
42 | 
43 | \item{graph_id_2}{Character string used for grouping the second spectra set
44 | (\code{spc_tbl_2}) and producing the label text accordingly. Default is
45 | \code{"Set 2"}}
46 | 
47 | \item{graph_id_1_col}{Character string for the colour of the first spectra
48 | set. Default is \code{"black"}.}
49 | 
50 | \item{graph_id_2_col}{Character string for the colour of the first spectra
51 | set. Default is \code{"red"}.}
52 | 
53 | \item{xlab}{Character string or mathematical expression
54 | (use \code{expression}) for the x axis title. Default is
55 | \code{expression(paste("Wavenumber [", cm^-1, "]"))}.}
56 | 
57 | \item{ylab}{Character string or mathematical expression
58 | (use \code{expression}) for the y axis title. Default is \code{"absorbance"}.}
59 | 
60 | \item{alpha}{Double in between 0 and 1. Sets the transparency for the plotted
61 | spectra lines.}
62 | 
63 | \item{legend}{Logical whether to plot a legend for the spectra describing
64 | its name selected in arguments \code{graph_id_1} and \code{graph_id_2}.
65 | Default is \code{TRUE}.}
66 | }
67 | \description{
68 | Plot spectra from tibble spectra objects.
69 | }
70 | 


--------------------------------------------------------------------------------
/R/average-spc.R:
--------------------------------------------------------------------------------
 1 | #' @title Average spectra in list-column by entries in grouping column
 2 | #' @description Average spectra in list-column of spectra tibble (`spc_tbl`) by
 3 | #' groups given in group column.
 4 | #' @param spc_tbl Tibble data frame containing at least the grouping column
 5 | #' given in argument `by` and input spectra given in list-column `column_in`.
 6 | #' @param by Character vector of length 1L or name/symbol that specifies the
 7 | #' column by which groups of spectra are averaged. Default is `"sample_id"`.
 8 | #' @param column_in Character vector of length 1L or or name/symbol that
 9 | #' specifies the list-column that contains the inputs spectra to be averaged.
10 | #' Default is `"spc_rs"`, which are resampled spectra (i.e., resulting after
11 | #' preceding `resample_spc()` step).
12 | #' @return Spectra tibble data frame (class `"tbl_df"`, `"tbl"`, `"data.frame"`)
13 | #' with a new list-column of column name `"spc_mean"` at the last position,
14 | #' containing mean spectra with identical row replicates within the same
15 | #' `by`-group.
16 | #' @details For memory efficiency and subsequent modeling, consider slicing the
17 | #' extra row copies of `spc_mean` resulting from `average_spc()` for example by
18 | #' * `split(x = spc_tbl, f = spc_tbl$<by>) %>% lapply(., function(x) x x[1, ]) %>% do.call(., rbind)`
19 | #' * `dplyr::group_by(spc_tbl, <by>) %>% dplyr::slice(1L)`
20 | #' @import stats
21 | #' @importFrom data.table data.table rbindlist setkey setDT := .SD
22 | #' @importFrom rlang ensym quo_name
23 | #' @export
24 | average_spc <- function(spc_tbl, by = "sample_id", column_in = "spc_rs") {
25 | 
26 |   # Avoid R CMD check note: `"...no visible binding for global variable..."`
27 |   spc_rs <- sample_id <- id <- NULL
28 | 
29 |   # Quote the symbol or the string supplied by the second and third argument
30 |   column_in <- rlang::enquo(column_in)
31 |   by <- rlang::enquo(by)
32 | 
33 |   # Combine rows of all resampled spectra in one data.table
34 |   spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!column_in))
35 | 
36 |   # Add `id` group column to input spectra
37 |   spc[, id := dplyr::pull(spc_tbl, !!by)] # spc_tbl[, by][[by]]
38 | 
39 |   # Average spectra, use `id` column as index for grouping
40 |   data.table::setkey(spc, id)
41 |   spc_mean <- spc[, lapply(.SD, mean), by = id]
42 | 
43 |   # Create new vector of group ID values from column `id`
44 |   group_id_mean <- spc_mean[, id]
45 |   # Delete sample_id column in data.table
46 |   spc_mean_noid <- spc_mean[, id := NULL]
47 | 
48 |   # Create list of averaged spectra, one spectrum is one data.table
49 |   # Use best performing alternative:
50 |   # https://github.com/jennybc/row-oriented-workflows/blob/master/iterate-over-rows.md
51 |   spc_mean_list <- stats::setNames(
52 |     map(purrr::transpose(spc_mean_noid), data.table::as.data.table),
53 |       group_id_mean
54 |   )
55 | 
56 |   # Convert averaged spectra and sample_id to tibble
57 |   spc_mean_tbl <- tibble::tibble(
58 |     !!by := group_id_mean, spc_mean = spc_mean_list
59 |   )
60 |   # Join mean spectra tibble spc_tbl_mean to spc_tbl
61 |   spc_tbl_out <- dplyr::left_join(spc_tbl, spc_mean_tbl,
62 |     by = rlang::quo_name(by))
63 | 
64 |   return(spc_tbl_out)
65 | }
66 | 


--------------------------------------------------------------------------------
/R/utils-unsupervised.R:
--------------------------------------------------------------------------------
 1 | # Helper functions to compute principal component analysis on spectral
 2 | # data and to append scores and importance -------------------------------------
 3 | 
 4 | pca_append_scores <- function(spc_tbl, y = "spc_pre",
 5 |                               slice = TRUE, slice_by = sample_id,
 6 |                               select_comps = 1:2,
 7 |                               scale = TRUE, center = TRUE,
 8 |                               .unnest = NULL) {
 9 |   var <- rlang::enquo(y)
10 |   var_nm_pca <- paste0(rlang::quo_name(var), "_pca_scores")
11 |   slice_by <- rlang::enquo(slice_by)
12 | 
13 |   if (slice) {
14 |     spc_tbl <- spc_tbl %>% dplyr::group_by(!!slice_by) %>% dplyr::slice(1L)
15 |   }
16 | 
17 |   sample_id <- spc_tbl %>% dplyr::pull(!!slice_by)
18 | 
19 |   # Pull the list of preprocessed spectra data.tables into one data.table
20 |   spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!var))
21 | 
22 |   # Perform a principal component analysis
23 |   spc_pca <- stats::prcomp(spc, scale = scale, center = center)
24 | 
25 |   # Extract PCA scores for selected principal components
26 |   spc_pca_scores <- dplyr::as_data_frame(spc_pca$x[, select_comps])
27 |   # Convert data frame of scores to list column
28 |   spc_pca_scores <- stats::setNames(
29 |     split(spc_pca_scores, seq_len(nrow(spc_pca_scores))),
30 |     sample_id)
31 |   ncomp <- ncol(spc_pca$rotation)
32 | 
33 |   # Add list-column with PCA scores to the spectral tibble object
34 |   spc_tbl_pca <- tibble::add_column(spc_tbl, !!var_nm_pca := spc_pca_scores)
35 |   if (!is.null(.unnest)) {
36 |     # Convert variable name of new column to symbol
37 |     var_sym_pca <- rlang::sym(var_nm_pca)
38 |     spc_tbl_pca <- spc_tbl_pca %>% tidyr::unnest(!!var_sym_pca)
39 |   }
40 |   spc_tbl_pca
41 | }
42 | 
43 | #' @noRd
44 | pca_append_importance <- function(spc_tbl, y = "spc_pre",
45 |                                   slice = TRUE, slice_by = "sample_id",
46 |                                   select_comps = 1:2,
47 |                                   scale = TRUE, center = TRUE) {
48 |   var <- rlang::enquo(y)
49 |   slice_by <- rlang::enquo(slice_by)
50 | 
51 |   if (slice) {
52 |     spc_tbl <- spc_tbl %>% dplyr::group_by(!!slice_by) %>% dplyr::slice(1L)
53 |   }
54 | 
55 |   # Pull the list of preprocessed spectra data.tables into one data.table
56 |   spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!var))
57 | 
58 |   # Perform a principal component analysis
59 |   spc_pca <- stats::prcomp(spc, scale = scale, center = center)
60 |   ncomp <- ncol(spc_pca$rotation)
61 | 
62 |   # // pb 20180509: broom::tidy(spc_pca) returns error; this is a bug in
63 |   # broom:::tidy.prcomp(); does not work if data table without row names
64 |   # Extract variance explained
65 |   importance_measures <- c("sd", "var_prop", "var_cum")
66 |   # "sd" := "Standard deviation"; "var_prop" := "Proportion of Variance";
67 |   # "var_cum" := "Cumulative Proportion"
68 |   spc_pca_varexpl <- broom::fix_data_frame(
69 |     t(summary(spc_pca)$importance),
70 |     newnames = importance_measures, newcol = "PC") %>%
71 |     tibble::add_column(ncomp = 1:ncomp, .before = 1) %>%
72 |     tibble::as_tibble()
73 | 
74 |   # Return list of spectral tibble with pc scores attached, and
75 |   # the PCA importance measures in a tidy data frame
76 |   spc_pca_varexpl
77 | }
78 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | <!-- NEWS.md is maintained by https://cynkra.github.io/fledge, do not edit -->
 2 | 
 3 | # simplerspec 0.2.1
 4 | 
 5 | - `fit_pls()`: Fixed the join of predicted vs. measured values when choosing 
 6 |   evaluation with cross-validation; now predictions and model evaluation 
 7 |   statistics are reported at best `ncomp`. Instead of an `dplyr::inner_join()`,
 8 |   all cross-validated predictions were done with `dplyr::anti_join()`, 
 9 |   specifically when using `evaluation_method == "resampling"` together with 
10 |   `tuning_method == "resampling"`. This resulted in predictions incorrectly
11 |   being aggregated for all tested but not best ncomp (calculated in caret). 
12 |   Thus, also the cross-validation metrics were not correctly reported what 
13 |   should have been the case at optimal `ncomp` derived based on resampling and
14 |   model tuning. The fix now correctly does an inner join, so that only the
15 |   values at best `ncomp` are extracted and used for the evaluation statistics. 
16 |   This is also shown on the resulting plot outputs.
17 | 
18 | # simplerspec 0.2.0
19 | 
20 | - Add new example data set `soilspec_yamsys`.
21 | - `gather_spc()`: Consolidate the documentation with details on how data are matched from list and gathered into a spectra tibble.
22 | 
23 | # simplerspec 0.1.0.9001
24 | 
25 | * `resample_spc()` now supports flexible spectra and x-axis types as inputs. Its
26 |   interface has been carefully augmented without breaking previous 
27 |   functionality ([#9](https://github.com/philipp-baumann/simplerspec/issues/9)
28 |   * New argument `column_in` specifies the string or name (unquoting support)
29 |     of the input column that contains the list of spectra. The following 
30 |     spectrum types, which are automatically matched against the list-column that
31 |     contains the corresponding x-unit value vectors, are currently supported:
32 |     `spc` (raw or unprocessed spectra), `spc_rs` (resampled spectra),
33 |     `spc_mean` (mean spectra), `spc_nocomp` (spectra prior atmospheric 
34 |     compensation), `sc_sm` (single channel sample spectra), `sc_rf` (single 
35 |     channel reference spectra), `spc_pre` (preprocessed spectra).
36 |   * New argument `interpol_method` specifying the interpolation method is 
37 |     introduced. Default is `"linear"` to achieve identical results with both
38 |     prospectr v0.1.0 and v0.2.0. The current CRAN prospectr v0.2.0 has changed
39 |     the default of `interpol` to `"spline"`. The previous `resample_spc()` 
40 |     unfortunatelty did not explicitly state the method internally, and relied 
41 |     on the default instead. The measures taken ensure downward compatibility of 
42 |     `resample_spc()` with previous versions of prospectr and simplerspec.
43 |   * The arguments gain more defensive checks inside the function (supplied types
44 |     and presence of objects in spectra).
45 |   * The function components and the help are updated accordingly. Clearer 
46 |     vocabulary to describe the functionality and more consistent terminology for
47 |     physical quantities and R objects are used.
48 |   
49 | * Add UTF-8 support to DESCRIPTION because roxygen2 version 7.1.0 requires it.
50 | 
51 | 
52 | # simplerspec 0.1.0.9000
53 | 
54 | * Start using Kirill's `{fledge}` for tracking and communicating the simplerspec
55 |   development process in `NEWS`.
56 | 
57 | 
58 | # simplerspec 0.1.0
59 | 
60 | * Added a `NEWS.md` file to track changes to the package
61 | 
62 | # simplerspec 0.1.0.1
63 | 
64 | * `read_opus_bin_univ()`: Add support for Bruker files that have undefined `PLF` value (`:= NULL`)
65 | 


--------------------------------------------------------------------------------
/R/select-ref-spectra.R:
--------------------------------------------------------------------------------
 1 | # Quick fix implementation of select_ref_samples using the tibble framework ----
 2 | 
 3 | # Perform sampling for selection of reference samples based on spectral PCA ----
 4 | #' @title Select a set of reference spectra to be measured by reference analysis
 5 | #' methods
 6 | #' @description Select a set of calibration spectra to develop spectral models.
 7 | #' Samples in this list will be analyzed using laboratory reference methods.
 8 | #' @param spc_tbl Spectra as tibble objects that contain preprocessed spectra
 9 | #' @param ratio_ref Ratio of desired reference samples to total sample number
10 | #' @param pc Number of principal components (numeric). If pc < 1, the number
11 | #' of principal components kept corresponds to the number of components
12 | #' explaining at least (pc * 100) percent of the total variance.
13 | #' @param print logical expression whether a plot (ggplot2) of sample selection
14 | #' for reference analysis is shown in PCA space
15 | #' (\code{TRUE} or \code{FALSE}).
16 | #' @usage select_ref_spc(spc_tbl, ratio_ref, pc, print = TRUE)
17 | #' @export
18 | select_ref_spc <- function(spc_tbl, ratio_ref = 0.15, pc = 2,
19 |   print = TRUE) {
20 | 
21 |   # Avoid `R CMD check` NOTE: `no visible binding for global variable ...`
22 |   PC1 <- PC2 <- type <- NULL
23 | 
24 |   if (tibble::is_tibble(spc_tbl)) {
25 |     # Slice based on sample_id if spectral data is in tibble class
26 |     spc_tbl <- dplyr::group_by(spc_tbl, !!rlang::sym("sample_id")) %>%
27 |       dplyr::slice(1L)
28 |     # Bind list of data.tables in list-column spc_pre to one data table
29 |     # containing spectral data
30 |     spc_pre <- as.matrix(data.table::rbindlist(spc_tbl$spc_pre))
31 |   }
32 |   # Perform Kennard-Stone calibration sampling ---------------------------------
33 |   sel <- prospectr::kenStone(X = spc_pre,
34 |     k = round(ratio_ref * nrow(spc_pre)), pc = pc)
35 |   # Select spectra tibble of reference samples based on row indices
36 |   spc_ref <- spc_tbl[sel$model, ]
37 |   # Select spectra tibble of prediction samples based on row indices
38 |   spc_pred <- spc_tbl[-sel$model, ]
39 | 
40 |   # Prepare data for ggplot graphs of reference and prediction sample PC score
41 |   # plots (PC1 and PC2) --------------------------------------------------------
42 |   sel_df_ref <- data.frame(sel$pc[sel$model, 1:2])
43 |   sel_df_ref$type <- as.factor(
44 |     rep("reference analysis", nrow(sel_df_ref))
45 |   )
46 |   sel_df_pred <- data.frame(sel$pc[-sel$model, 1:2])
47 |   # Create type column for visually differentiate reference and prediction
48 |   # samples
49 |   sel_df_pred$type <- as.factor(
50 |     rep("model prediction", nrow(sel_df_pred)))
51 |   # Bind rows of reference and prediction PC scores data frames
52 |   sel_df <- rbind(sel_df_ref, sel_df_pred)
53 |   # Compute ratio needed to make the figure square
54 |   ratio <- with(sel_df, diff(range(PC1)) / diff(range(PC2)))
55 |   # Create spectra PC score plots ----------------------------------------------
56 |   p_pca <- ggplot2::ggplot(data = sel_df) +
57 |     ggplot2::geom_point(
58 |       ggplot2::aes(x = PC1, y = PC2, shape = type), size = 4) +
59 |     ggplot2::coord_fixed(ratio = 1) +
60 |     ggplot2::scale_shape_manual(values=c(19, 1)) +
61 |     ggplot2::scale_colour_manual(values=c("black", "red")) +
62 |     ggplot2::theme_bw() +
63 |     ggplot2::theme(legend.title = ggplot2::element_blank())
64 |   # Print reference and prediction samples in PC1 and PC2
65 |   if (print == TRUE) {
66 |     p_pca
67 |   }
68 |   # Return spectral tibbles for reference spectra (spc_ref),
69 |   # prediction spectra (spc_pr) and ggplot object of score plots (p_pca)
70 |   list(
71 |     spc_ref = spc_ref,
72 |     spc_pred = spc_pred,
73 |     p_pca = p_pca,
74 |     pc_scores = sel$pc
75 |   )
76 | }
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/R/slice-spc.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ## Functions to slice spectra into a set of x axis ranges. A subset of the
 3 | ## spectra stored in a list-column of a spectral tibble will be selected and
 4 | ## the x unit axis vector will be reduced accordingly
 5 | ################################################################################
 6 | 
 7 | # Define a set of helper functions ---------------------------------------------
 8 | 
 9 | # Helper function to find indices of minimum distances between two vectors
10 | get_idx <- function(x, x_cut) {
11 |   sapply(x, function(x) which.min(abs(x_cut - x)))
12 | }
13 | 
14 | # Returns a a list of wavenumber position index sequences (type integer) from
15 | # a vector or list of upper and lower slicing boundary values in spectra x
16 | # units (xvalues_cut)
17 | slice_xvalues_idxseq <- function(spc_l, xvalues, xvalues_cut) {
18 |   idx_lim <- lapply(seq_along(xvalues_cut),
19 |     function(i) get_idx(x = xvalues, x_cut = xvalues_cut[i]))
20 |   Map(function(from, to) seq(from, to), idx_lim[[1]], idx_lim[[2]])
21 | }
22 | 
23 | # Use helper functions in final spectrum x unit slicing function ---------------
24 | 
25 | #' @title Slice spectra into defined x-axis ranges
26 | #' @description Slice spectra contained in list-column of spectral tibble
27 | #' (data frame). A list of x-axis value ranges can be specified. Spectra are
28 | #' cut based on these ranges.
29 | #' @param spc_tbl Spectral data in a tibble object (classes "tibble_df", "tbl"
30 | #' and "data.frame"). The spectra tibble is expected to contain at least
31 | #' the column  \code{spc} (list-column with spectral matrices stored in a list)
32 | #' and \code{wavenumbers} or \code{wavelengths} (list-column that contains list
33 | #' of x-axis values).
34 | #' @param xunit_lcol Character vector that specifies column name where x-axis
35 | #' axis units are stored within \code{spc_tbl}. Default is \code{"wavenumber"}.
36 | #' @param spc_lcol Character vector that specifies which column (list-column)
37 | #' contains spectra to be sliced. Default is \code{"spc"}.
38 | #' @param xvalues_cut List of numeric vectors that contains upper and lower bounds of respective regions to keep in spectra. The spectral regions outside
39 | #' the \code{xvalues_cut} intervals will be cut out in the output spectra.
40 | #' @return Spectral tibble (data frame with list-columns) with sliced x-axis
41 | #' column and spectral column. Both the x-axis list-column and the spectral
42 | #' tibble list-column only contain data specified within the \code{xvalues_cut}
43 | #' argument (list of numeric vectors).
44 | #' @export
45 | slice_xvalues <- function(spc_tbl, xunit_lcol = "wavenumbers", spc_lcol = "spc",
46 |                           xvalues_cut = NULL) {
47 |   if (is.atomic(xvalues_cut)) xvalues_cut <- list(xvalues_cut)
48 |   if (!is.null(xvalues_cut)) {
49 |     spc_l <- spc_tbl[[spc_lcol]]
50 |     xvalues <- spc_tbl[[xunit_lcol]]
51 |     # Match spectral indices for columns based on xvalue ranges
52 |     idxseq <- lapply(seq_along(xvalues_cut),
53 |       function(i) slice_xvalues_idxseq(
54 |         spc_l = spc_l, xvalues = xvalues, xvalues_cut = xvalues_cut[[i]]))
55 |     idxseq_c <- Reduce(function(x,y) Map(c, x, y), idxseq)
56 |     if (any(sapply(spc_l, data.table::is.data.table))) {
57 |       spc_tbl[[spc_lcol]] <- Map(function(x, idx) x[, idx, with = FALSE],
58 |         spc_l, idxseq_c) # idx is not a column name of any data.table
59 |     } else {
60 |       spc_tbl[[spc_lcol]] <- Map(function(x, idx) x[, idx], spc_l, idxseq_c)
61 |     }
62 |     spc_tbl[[xunit_lcol]] <- Map(function(x, idx) x[idx], xvalues, idxseq_c)
63 |   }
64 |   spc_tbl
65 | }
66 | 
67 | # Test spectra slicing function:
68 | # tbl_sliced <- slice_xvalues(spc_tbl = spc_tbl,
69 | #   xvalues_cut = list(c(1500, 1024), c(1004, 998)))
70 | 


--------------------------------------------------------------------------------
/.github/workflows/tic.yml:
--------------------------------------------------------------------------------
  1 | ## tic GitHub Actions template: linux-macos-windows-deploy
  2 | ## revision date: 2022-11-23
  3 | on:
  4 |   workflow_dispatch:
  5 |   push:
  6 |     branches:
  7 |     - main
  8 |     - master
  9 |     - cran-*
 10 |   pull_request:
 11 |     branches:
 12 |     - main
 13 |     - master
 14 |   # for now, CRON jobs only run on the default branch of the repo (i.e. usually on master)
 15 |   schedule:
 16 |     # * is a special character in YAML so you have to quote this string
 17 |     - cron: "0 4 * * *"
 18 | 
 19 | name: tic
 20 | 
 21 | jobs:
 22 |   all:
 23 |     runs-on: ${{ matrix.config.os }}
 24 | 
 25 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
 26 | 
 27 |     strategy:
 28 |       fail-fast: false
 29 |       matrix:
 30 |         config:
 31 |           # use a different tic template type if you do not want to build on all listed platforms
 32 |           - { os: windows-latest, r: "release" }
 33 |           - { os: macOS-latest, r: "release", pkgdown: "true", latex: "true" }
 34 |           - { os: ubuntu-latest, r: "devel" }
 35 |           - { os: ubuntu-latest, r: "release" }
 36 | 
 37 |     env:
 38 |       # make sure to run `tic::use_ghactions_deploy()` to set up deployment
 39 |       TIC_DEPLOY_KEY: ${{ secrets.TIC_DEPLOY_KEY }}
 40 |       # prevent rgl issues because no X11 display is available
 41 |       RGL_USE_NULL: true
 42 |       # if you use bookdown or blogdown, replace "PKGDOWN" by the respective
 43 |       # capitalized term. This also might need to be done in tic.R
 44 |       BUILD_PKGDOWN: ${{ matrix.config.pkgdown }}
 45 |       # use GITHUB_TOKEN from GitHub to workaround rate limits in {remotes}
 46 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 47 | 
 48 |     steps:
 49 |       - uses: actions/checkout@v3
 50 | 
 51 |       - uses: r-lib/actions/setup-r@v2
 52 |         with:
 53 |           r-version: ${{ matrix.config.r }}
 54 |           Ncpus: 4
 55 | 
 56 |       - uses: r-lib/actions/setup-tinytex@v2
 57 |         if: matrix.config.latex == 'true'
 58 | 
 59 |       - uses: r-lib/actions/setup-pandoc@v2
 60 | 
 61 |       # set date/week for use in cache creation
 62 |       # https://github.community/t5/GitHub-Actions/How-to-set-and-access-a-Workflow-variable/m-p/42970
 63 |       # - cache R packages daily
 64 |       - name: "[Cache] Prepare daily timestamp for cache"
 65 |         if: runner.os != 'Windows'
 66 |         id: date
 67 |         run: echo "date=$(date '+%d-%m')" >> $GITHUB_OUTPUT
 68 | 
 69 |       - name: "[Cache] Cache R packages"
 70 |         if: runner.os != 'Windows'
 71 |         uses: pat-s/always-upload-cache@v3
 72 |         with:
 73 |           path: ${{ env.R_LIBS_USER }}
 74 |           key: ${{ runner.os }}-r-${{ matrix.config.r }}-${{steps.date.outputs.date}}
 75 |           restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-${{steps.date.outputs.date}}
 76 | 
 77 |       - name: "[Stage] Install pak"
 78 |         run: Rscript -e "install.packages('pak', repos = 'https://r-lib.github.io/p/pak/stable')"
 79 | 
 80 |       - name: "[Stage] Install"
 81 |         run: Rscript -e "if (grepl('Ubuntu', Sys.info()[['version']]) && !grepl('Under development', R.version[['status']])) {options(repos = c(CRAN = sprintf('https://packagemanager.rstudio.com/all/__linux__/%s/latest', system('lsb_release -cs', intern = TRUE))))}else{options(repos = c(CRAN = 'cloud.r-project.org'))}; pak::pkg_install('ropensci/tic')" -e "print(tic::dsl_load())" -e "tic::prepare_all_stages()" -e "tic::before_install()" -e "tic::install()"
 82 | 
 83 |       - name: "[Stage] Script"
 84 |         run: Rscript -e 'tic::script()'
 85 | 
 86 |       - name: "[Stage] After Success"
 87 |         run: Rscript -e "tic::after_success()"
 88 | 
 89 |       - name: "[Stage] Upload R CMD check artifacts"
 90 |         if: failure()
 91 |         uses: actions/upload-artifact@v2
 92 |         with:
 93 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
 94 |           path: check
 95 |       - name: "[Stage] Before Deploy"
 96 |         run: |
 97 |           Rscript -e "tic::before_deploy()"
 98 | 
 99 |       - name: "[Stage] Deploy"
100 |         run: Rscript -e "tic::deploy()"
101 | 
102 |       - name: "[Stage] After Deploy"
103 |         run: Rscript -e "tic::after_deploy()"
104 | 


--------------------------------------------------------------------------------
/man/fit_rf.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/pls-modeling.R
  3 | \name{fit_rf}
  4 | \alias{fit_rf}
  5 | \title{Calibration sampling, and random forest model tuning and evaluation}
  6 | \usage{
  7 | fit_rf(
  8 |   spec_chem,
  9 |   response,
 10 |   variable = NULL,
 11 |   evaluation_method = "test_set",
 12 |   validation = NULL,
 13 |   split_method = "ken_stone",
 14 |   ratio_val,
 15 |   ken_sto_pc = 2,
 16 |   pc = NULL,
 17 |   invert = TRUE,
 18 |   tuning_method = "resampling",
 19 |   resampling_seed = 123,
 20 |   cv = NULL,
 21 |   ntree_max = 500,
 22 |   print = TRUE,
 23 |   env = parent.frame()
 24 | )
 25 | }
 26 | \arguments{
 27 | \item{spec_chem}{Tibble that contains spectra, metadata and chemical
 28 | reference as list-columns. The tibble to be supplied to \code{spec_chem} can
 29 | be generated by the \verb{join_chem_spc() function}}
 30 | 
 31 | \item{response}{Response variable as symbol or name
 32 | (without quotes, no character string). The provided response symbol needs to be
 33 | a column name in the \code{spec_chem} tibble.}
 34 | 
 35 | \item{variable}{Depreciated and replaced by \code{response}}
 36 | 
 37 | \item{evaluation_method}{Character string stating evaluation method.
 38 | Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"}
 39 | will split the data into a calibration (training) and validation (test) set,
 40 | and evaluate the final model by predicting on the validation set.
 41 | If \code{"resampling"}, the finally selected model will be evaluated based
 42 | on the cross-validation hold-out predictions.}
 43 | 
 44 | \item{validation}{Depreciated and replaced by \code{evaluation_method}.
 45 | Default is \code{TRUE}.}
 46 | 
 47 | \item{split_method}{Method how to to split the data into a independent test
 48 | set. Default is \code{"ken_sto"}, which will select samples for calibration
 49 | based on Kennard-Stone sampling algorithm of preprocessed spectra. The
 50 | proportion of validation to the total number of samples can be specified
 51 | in the argument \code{ratio_val}.
 52 | \code{split_method = "random"} will create a single random split.}
 53 | 
 54 | \item{ratio_val}{Ratio of validation (test) samples to
 55 | total number of samples (calibration (training) and validation (test)).}
 56 | 
 57 | \item{ken_sto_pc}{Number of component used
 58 | for calculating mahalanobsis distance on PCA scores for computing
 59 | Kennard-Stone algorithm.
 60 | Default is \code{ken_sto_pc = 2}, which will use the first two PCA
 61 | components.}
 62 | 
 63 | \item{pc}{Depreciated; renamed argument is \code{ken_sto_pc}.}
 64 | 
 65 | \item{invert}{Logical}
 66 | 
 67 | \item{tuning_method}{Character specifying tuning method. Tuning method
 68 | affects how caret selects a final tuning value set from a list of candidate
 69 | values. Possible values are \code{"resampling"}, which will use a
 70 | specified resampling method such as repeated k-fold cross-validation (see
 71 | argument \code{resampling_method}) and the generated performance profile
 72 | based on the hold-out predictions to decide on the final tuning values
 73 | that lead to optimal model performance. The value \code{"none"} will force
 74 | caret to compute a final model for a predefined canditate PLS tuning
 75 | parameter number of PLS components. In this case, the value
 76 | supplied by \code{ncomp_fixed}` is used to set model complexity at
 77 | a fixed number of components.}
 78 | 
 79 | \item{resampling_seed}{Random seed (integer) that will be used for generating
 80 | resampling indices, which will be supplied to \code{caret::trainControl}.
 81 | This makes sure that modeling results are constant when re-fitting.
 82 | Default is \code{resampling_seed = 123}.}
 83 | 
 84 | \item{cv}{Depreciated. Use \code{resampling_method} instead.}
 85 | 
 86 | \item{ntree_max}{Maximum random forest trees
 87 | by caret::train. Caret will aggregate a performance profile using resampling
 88 | for an integer sequence from 1 to \code{ntree_max} trees.}
 89 | 
 90 | \item{print}{Logical expression whether model evaluation graphs shall be
 91 | printed}
 92 | 
 93 | \item{env}{Environment where function is evaluated. Default is
 94 | \code{parent.frame}.}
 95 | }
 96 | \description{
 97 | Perform calibration sampling and use selected
 98 | calibration set for model tuning
 99 | }
100 | 


--------------------------------------------------------------------------------
/man/plot_spc_ext.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/plot-spc-extended.R
  3 | \name{plot_spc_ext}
  4 | \alias{plot_spc_ext}
  5 | \title{ggplot2 wrapper for extended spectra plotting}
  6 | \usage{
  7 | plot_spc_ext(
  8 |   spc_tbl,
  9 |   spc_tbl_l = NULL,
 10 |   lcols_spc = "spc",
 11 |   lcol_measure = NULL,
 12 |   lcol_measure_col_palette = "Spectral",
 13 |   lcol_measure_col_direction = -1,
 14 |   spc_id = "unique_id",
 15 |   group_id = "sample_id",
 16 |   group_id_order = TRUE,
 17 |   group_color = TRUE,
 18 |   group_color_palette = NULL,
 19 |   group_panel = TRUE,
 20 |   group_legend = FALSE,
 21 |   ncol = NULL,
 22 |   relabel_spc = TRUE,
 23 |   ylab = "Spectrum value",
 24 |   alpha = 0.5,
 25 |   line_width = 0.2,
 26 |   ...
 27 | )
 28 | }
 29 | \arguments{
 30 | \item{spc_tbl}{Tibble data frame containing spectra, x-axis values, metadata
 31 | and eventual measured variables as list-columns.}
 32 | 
 33 | \item{spc_tbl_l}{List of spectral tibbles (data frames). Default is
 34 | \code{NULL} (argument is not used).}
 35 | 
 36 | \item{lcols_spc}{Character vector of spectral list-columns to be extracted.
 37 | Default is \code{"spc"} (raw spectra).}
 38 | 
 39 | \item{lcol_measure}{Character vector of length 1 denoting the column name
 40 | of the measure columns. This argument is optional. Default is \code{NULL},
 41 | which does not extract an additional measure column.}
 42 | 
 43 | \item{lcol_measure_col_palette}{Palette value supplied to
 44 | \code{ggplot::scale_colour_brewer()}. Default is \code{"Spectral"}, but you can set
 45 | it to the default argument \code{1} (will use
 46 | \code{scale_colour_brewer(..., palette = 1)}).}
 47 | 
 48 | \item{lcol_measure_col_direction}{Sets the the order of colours in the scale
 49 | that is based on a measure column. Default is \code{-1} which reverses the
 50 | scale. Argument is passed on to the function \code{ggplot2::sclae_colour_brewer()}
 51 | as argument \code{direction}.}
 52 | 
 53 | \item{spc_id}{Character vector denoting column name for a unique spectrum ID.
 54 | Default is \code{"unique_id"}.}
 55 | 
 56 | \item{group_id}{Character vector denoting column name for the spectrum group
 57 | ID. Default is \code{"sample_id"}. The group ID is used for
 58 | plotting spectra by group (e.g. by using different colors or panels).}
 59 | 
 60 | \item{group_id_order}{Logical that specifies whether the panel names
 61 | derived from a numeric \code{group_id} column are reordered using ascending
 62 | numbers. Default is \code{TRUE}.}
 63 | 
 64 | \item{group_color}{Logical defining whether spectra are colored by the column
 65 | specified by \code{group_id}.}
 66 | 
 67 | \item{group_color_palette}{Character (1L) defining the diverging colour
 68 | scales from colorbrewer.org; see \code{?scale_colour_brewer} for supported
 69 | diverging colur types (\code{palette} argument).}
 70 | 
 71 | \item{group_panel}{Logical defining whether spectra are arranged into panels
 72 | by groups specified in \code{group_id}. Default is \code{TRUE}.}
 73 | 
 74 | \item{group_legend}{Logical defining whether a legend for the \code{group_id}
 75 | is plotted. Default is \code{FALSE}.}
 76 | 
 77 | \item{ncol}{Integer vector of length 1. Defines number of columns when
 78 | plotting panels (facets). Default is \code{NULL} (argument not used).}
 79 | 
 80 | \item{relabel_spc}{Logical defining whether panels are relabeled with custom
 81 | names for spectra types. Default is TRUE. When \code{TRUE}, arguments
 82 | from \code{relabel_spc_types} can be passed to \code{plot_spc_ext}
 83 | (supported via the \code{...} (ellipsis) argument)}
 84 | 
 85 | \item{ylab}{Character vector or vector of type \code{"expression"} created by
 86 | mathematical expression created by \code{expression}. Custom annotation for
 87 | y-axis of spectra}
 88 | 
 89 | \item{alpha}{Integer of length 1, from 0 to 1. Defines transparency of
 90 | spectral lines. Default is \code{0.5} (0 is completely transparent and
 91 | 1 is no transparency).}
 92 | 
 93 | \item{line_width}{Numeric vector of length 1 specifying the width of the
 94 | spectral lines. Default is \code{0.2}.}
 95 | 
 96 | \item{...}{Further arguments to be passed to \code{plot_spc_ext}. Currently,
 97 | arguments of \code{relabel_spc_types} are supported.}
 98 | }
 99 | \value{
100 | Object of class \code{"ggplot"} (ggplot2 graph).
101 | }
102 | \description{
103 | \code{plot_spc_ext} is a custom plotting function developed
104 | within the simplerspec framework. Returns plots based on ggplot2
105 | (class "ggplot"). Different spectra types such as raw or preprocessed spectra
106 | and groups can be differentiated by different colors or by using panels
107 | (so called facets). Additionally, spectra can be colored based on an
108 | additional measure variable, e.g. determined by chemical reference analysis.
109 | }
110 | 


--------------------------------------------------------------------------------
/man/resample_spc.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/resample-spc.R
  3 | \name{resample_spc}
  4 | \alias{resample_spc}
  5 | \title{Resample spectra in list-column to new x-axis interval}
  6 | \usage{
  7 | resample_spc(
  8 |   spc_tbl,
  9 |   column_in = "spc",
 10 |   x_unit = c("wavenumber", "wavelength"),
 11 |   wn_lower = 500,
 12 |   wn_upper = 4000,
 13 |   wn_interval = 2,
 14 |   wl_lower = 350,
 15 |   wl_upper = 2500,
 16 |   wl_interval = 1,
 17 |   interpol_method = c("linear", "spline")
 18 | )
 19 | }
 20 | \arguments{
 21 | \item{spc_tbl}{Spectra data embedded in a tibble object (classes
 22 | \verb{"tbl_df", "tbl", "data.frame"}). The spectra tibble needs to contain at
 23 | least of one of the the spectra columns \code{spc}, \code{spc_rs}, \code{spc_mean},
 24 | \code{spc_nocomp}, \code{sc_sm}, \code{sc_rf}, or \code{spc_pre} (list-columns with spectral
 25 | \code{data.table}s), and \code{wavenumbers} or \code{wavelengths} (list-column with vectors
 26 | of x-axis values corresponding to each spectrum). The help section \emph{"Matching
 27 | spectrum type and corresponding x-axis type"} describes the spectra types
 28 | and corresponding x-axis types.}
 29 | 
 30 | \item{column_in}{Character vector of length 1L or symbol/name
 31 | specifying the name of list-column that contains the spectra to be resampled.}
 32 | 
 33 | \item{x_unit}{Character vector of length 1L specifying the measurement unit
 34 | of the x-axis values (list-column) of the input spectra in \code{spc_tbl}.
 35 | Possible values are \code{"wavenumber"} (default) or \code{"wavelength"}. Wavenumber
 36 | is a convenient unit of frequency in the mid-infrared spectral range,
 37 | where wavelength is often used as spatial period for the visible and
 38 | near-infrared range.}
 39 | 
 40 | \item{wn_lower}{Numeric value of lowest wavenumber. This argument will only
 41 | be used if \code{x_unit = "wavenumber"}. The value serves as starting value for
 42 | the new wavenumber sequence that the spectra will be resampled upon. Default
 43 | value is 500 (i.e., in reciprocal centimeters).}
 44 | 
 45 | \item{wn_upper}{Numeric value of highest wavenumber. This argument will only
 46 | be used if \verb{x_unit = "wavenumber}. The value will be used as last value of
 47 | the new wavenumber sequence that the spectra will be resampled upon. Default
 48 | value is 4000 (i.e., in reciprocal centimeters).}
 49 | 
 50 | \item{wn_interval}{Numeric value of the wavenumber increment for the new
 51 | wavenumber sequence that the spectra will be resampled upon. Default value
 52 | is 2 (i.e., in reciprocal centimeters).}
 53 | 
 54 | \item{wl_lower}{Numeric value of lowest wavelength. This argument will only
 55 | be used if \code{x_unit = "wavelength"}. The value serves as starting value of
 56 | the new wavenumber sequence that the spectra will be resampled upon.
 57 | Default value is 350 (i.e. in nanometers).}
 58 | 
 59 | \item{wl_upper}{Numeric value of highest wavelength. This argument will only
 60 | be used if \code{x_unit = "wavelength"}. The value will be used as last value of
 61 | the new wavenumber sequence that the spectra will be resampled upon. Default
 62 | value is 2500 (i.e., in nanometers).}
 63 | 
 64 | \item{wl_interval}{Numeric value of the wavelength increment for the new
 65 | wavenumber sequence that the spectra will be resampled upon. This argument
 66 | will only be used if \code{x_unit = "wavelength"}. Default value is 1 (i.e., in
 67 | nanometers).}
 68 | 
 69 | \item{interpol_method}{Character of \code{"linear"} (default) or \code{"spline"} with
 70 | the interpolation method. \code{"spline"} uses a cubic spline to interpolate the
 71 | input spectra at given x-axis values to new equispaced x-axis intervals.}
 72 | }
 73 | \value{
 74 | A spectra tibble (\code{spc_tbl}) containing two added list-columns:
 75 | \itemize{
 76 | \item \verb{spc_rs:} Resampled spectra as list of \code{data.table}s
 77 | \item \code{wavenumbers_rs} or \code{wavelengths_rs}: Resampled x-axis values as list of
 78 | numeric vectors
 79 | }
 80 | }
 81 | \description{
 82 | Resamples (interpolates) different spectra types with
 83 | corresponding x-axis values that are both stored in list-columns of a spectra
 84 | tibble. A spectra tibble hosts spectra, x-axis vectors, metadata, and
 85 | further linked data with standardized naming conventions. Data input for
 86 | resampling can for example be generated with \code{simplerspec::gather_spc()}.
 87 | Resampling is a key harmonizing step to process and later model spectra
 88 | measured at different resolutions and spectral ranges (i.e., different
 89 | spectrometer devices and/or measurement settings).
 90 | }
 91 | \section{Matching spectrum type and corresponding x-axis type}{
 92 | 
 93 | The combinations of input spectrum types (\code{column_in}) and
 94 | corresponding x-axis types are generated from a simple lookup list. The
 95 | following key-value(s) pairs can be matched at given key, which is the column
 96 | name from \code{column_in} containing the spectra.
 97 | \itemize{
 98 | \item \code{"spc"} : \code{"wavenumbers"} or \code{"wavelengths"} (raw spectra)
 99 | \item \code{"spc_rs"} : \code{"wavenumbers_rs"} or \code{"wavelengths_rs"}) (resampled spectra)
100 | \item \code{"spc_mean"} : \code{"wavenumbers_rs"} or \code{"wavelengths_rs"} (mean spectra)
101 | \item \code{"spc_nocomp"} \code{"wavenumbers"} or \code{"wavelengths"} (spectra prior
102 | atmospheric compensation)
103 | \item \code{"sc_sm" : c("wavenumbers_sc_sm", "wavelengths_sc_sm")} (single channel
104 | sample spectra)
105 | \item \code{"sc_rf" : c("wavenumbers_sc_rf", "wavelengths_sc_rf")} (single channel
106 | reference spectra)
107 | \item \code{"spc_pre" : "xvalues_pre"} (preprocessed spectra)
108 | }
109 | }
110 | 
111 | 


--------------------------------------------------------------------------------
/man/fit_pls.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/pls-modeling.R
  3 | \name{fit_pls}
  4 | \alias{fit_pls}
  5 | \alias{pls_ken_stone}
  6 | \title{Calibration sampling, model tuning, and PLS regression}
  7 | \usage{
  8 | fit_pls(
  9 |   spec_chem,
 10 |   response,
 11 |   variable = NULL,
 12 |   center = TRUE,
 13 |   scale = TRUE,
 14 |   evaluation_method = "test_set",
 15 |   validation = TRUE,
 16 |   split_method = "ken_stone",
 17 |   ratio_val = 1/3,
 18 |   ken_sto_pc = 2,
 19 |   pc,
 20 |   invert = TRUE,
 21 |   tuning_method = "resampling",
 22 |   resampling_method = "kfold_cv",
 23 |   cv = NULL,
 24 |   resampling_seed = 123,
 25 |   pls_ncomp_max = 20,
 26 |   ncomp_fixed = 5,
 27 |   print = TRUE,
 28 |   env = parent.frame()
 29 | )
 30 | 
 31 | pls_ken_stone(
 32 |   spec_chem,
 33 |   response,
 34 |   variable = NULL,
 35 |   center = TRUE,
 36 |   scale = TRUE,
 37 |   evaluation_method = "test_set",
 38 |   validation = TRUE,
 39 |   split_method = "ken_stone",
 40 |   ratio_val = 1/3,
 41 |   ken_sto_pc = 2,
 42 |   pc,
 43 |   invert = TRUE,
 44 |   tuning_method = "resampling",
 45 |   resampling_method = "kfold_cv",
 46 |   cv = NULL,
 47 |   resampling_seed = 123,
 48 |   pls_ncomp_max = 20,
 49 |   ncomp_fixed = 5,
 50 |   print = TRUE,
 51 |   env = parent.frame()
 52 | )
 53 | }
 54 | \arguments{
 55 | \item{spec_chem}{Tibble that contains spectra, metadata and chemical
 56 | reference as list-columns. The tibble to be supplied to \code{spec_chem} can
 57 | be generated by the \verb{join_chem_spc() function}}
 58 | 
 59 | \item{response}{Response variable as symbol or name
 60 | (without quotes, no character string). The provided response symbol needs to be
 61 | a column name in the \code{spec_chem} tibble.}
 62 | 
 63 | \item{variable}{Depreciated and replaced by \code{response}}
 64 | 
 65 | \item{center}{Logical whether to perform mean centering of each spectrum column
 66 | (e.g. wavenumber or wavelength) after common spectrum preprocessing. Default is
 67 | \code{center = TRUE}}
 68 | 
 69 | \item{scale}{Logical whether to perform standard deviation scaling
 70 | of each spectrum column (e.g. wavenumber or wavelength) after common
 71 | spectrum preprocessing. Default is \code{scale = TRUE}}
 72 | 
 73 | \item{evaluation_method}{Character string stating evaluation method.
 74 | Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"}
 75 | will split the data into a calibration (training) and validation (test) set,
 76 | and evaluate the final model by predicting on the validation set.
 77 | If \code{"resampling"}, the finally selected model will be evaluated based
 78 | on the cross-validation hold-out predictions.}
 79 | 
 80 | \item{validation}{Depreciated and replaced by \code{evaluation_method}.
 81 | Default is \code{TRUE}.}
 82 | 
 83 | \item{split_method}{Method how to to split the data into a independent test
 84 | set. Default is \code{"ken_sto"}, which will select samples for calibration
 85 | based on Kennard-Stone sampling algorithm of preprocessed spectra. The
 86 | proportion of validation to the total number of samples can be specified
 87 | in the argument \code{ratio_val}.
 88 | \code{split_method = "random"} will create a single random split.}
 89 | 
 90 | \item{ratio_val}{Ratio of validation (test) samples to
 91 | total number of samples (calibration (training) and validation (test)).}
 92 | 
 93 | \item{ken_sto_pc}{Number of component used
 94 | for calculating mahalanobsis distance on PCA scores for computing
 95 | Kennard-Stone algorithm.
 96 | Default is \code{ken_sto_pc = 2}, which will use the first two PCA
 97 | components.}
 98 | 
 99 | \item{pc}{Depreciated; renamed argument is \code{ken_sto_pc}.}
100 | 
101 | \item{invert}{Logical}
102 | 
103 | \item{tuning_method}{Character specifying tuning method. Tuning method
104 | affects how caret selects a final tuning value set from a list of candidate
105 | values. Possible values are \code{"resampling"}, which will use a
106 | specified resampling method such as repeated k-fold cross-validation (see
107 | argument \code{resampling_method}) and the generated performance profile
108 | based on the hold-out predictions to decide on the final tuning values
109 | that lead to optimal model performance. The value \code{"none"} will force
110 | caret to compute a final model for a predefined canditate PLS tuning
111 | parameter number of PLS components. In this case, the value
112 | supplied by \code{ncomp_fixed}` is used to set model complexity at
113 | a fixed number of components.}
114 | 
115 | \item{resampling_method}{Character specifying resampling method. Currently,
116 | \code{"kfold_cv"} (default, performs 10-fold cross-validation),
117 | \code{"rep_kfold_cv"} (performs 5-times repeated 10-fold cross-validation),
118 | \code{"loocv"} (performs leave-one-out cross-validation), and \code{"none"}
119 | (if \code{resampling_method = "none"}) are supported.}
120 | 
121 | \item{cv}{Depreciated. Use \code{resampling_method} instead.}
122 | 
123 | \item{resampling_seed}{Random seed (integer) that will be used for generating
124 | resampling indices, which will be supplied to \code{caret::trainControl}.
125 | This makes sure that modeling results are constant when re-fitting.
126 | Default is \code{resampling_seed = 123}.}
127 | 
128 | \item{pls_ncomp_max}{Maximum number of PLS components that are evaluated
129 | by caret::train. Caret will aggregate a performance profile using resampling
130 | for an integer sequence from 1 to \code{pls_ncomp_max}}
131 | 
132 | \item{ncomp_fixed}{Integer of fixed number of PLS components. Will only be
133 | used when \code{tuning_method = "none"} and  \code{resampling_method = "none"}
134 | are used.}
135 | 
136 | \item{print}{Logical expression whether model evaluation graphs shall be
137 | printed}
138 | 
139 | \item{env}{Environment where function is evaluated. Default is
140 | \code{parent.frame}.}
141 | }
142 | \description{
143 | Perform calibration sampling and use selected
144 | calibration set for model tuning
145 | }
146 | 


--------------------------------------------------------------------------------
/man/gather_spc.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/gather-spc.R
  3 | \name{gather_spc}
  4 | \alias{gather_spc}
  5 | \title{Gather measurements of different spectra types, corresponding
  6 | x-axis values and metadata from nested list.}
  7 | \usage{
  8 | gather_spc(data, spc_types = "spc")
  9 | }
 10 | \arguments{
 11 | \item{data}{Recursive list named with filename (\code{file_id}) at first level
 12 | entries, where each element containing a sample measurement has nested
 13 | metadata (\code{"metadata"}), spectra types (see \code{spc_types}), corresponding
 14 | x-axis values (see section \emph{"Details on spectra data checks and matching"}).
 15 | The \code{data} list is a structural convention to organize spectra and their
 16 | metadata. It follows for example the list structure returned from the Bruker
 17 | OPUS binary reader \code{simplerspec::read_opus_univ()}.}
 18 | 
 19 | \item{spc_types}{Character vector with the spectra types to be extracted
 20 | from \code{data} list and gathered into list-columns. The spectra type names need
 21 | to exactly follow the naming conventions, and the element names and contents
 22 | need to be present at the second list hierarchy of \code{data}. These values are
 23 | allowed:
 24 | \itemize{
 25 | \item \code{"spc"} (default): final raw spectra after atmospheric compensation, if
 26 | performed (named \code{AB} in Bruker OPUS software; results from referencing
 27 | sample to reference single channel reflectance and transforming to
 28 | absorbance).
 29 | \item \code{"spc_nocomp"}: raw spectra without atmospheric correction
 30 | \item \code{"sc_sm"}: Single channel reflectance spectra of the samples
 31 | \item \code{"sc_rf"}: Single channel reflectance spectra of the reference (background
 32 | spectra)
 33 | \item \code{"ig_sm"}: Interferograms of the sample spectra (currently only spectra
 34 | without x-axis list-columns are matched and returned)
 35 | \item \code{"ig_rf"}: Interferograms of the reference spectra (currently only spectra
 36 | without x-axis list-columns are matched and returned)
 37 | }}
 38 | }
 39 | \value{
 40 | Spectra tibble (\code{spc_tbl} with classes \code{"tbl_df"}, \code{"tbl"}, and
 41 | \code{"data.frame"}) with the following (list-)columns:
 42 | \itemize{
 43 | \item \code{"unique_id"}: Character vector with unique measurement identifier, likely
 44 | a string with file names in combination with date and time (extracted from
 45 | each \code{"metadata"} data frame column).
 46 | \item \code{"file_id"} : Character vector with file name including the extension
 47 | (extracted from each \code{"metadata"} data frame column).
 48 | \item \code{"sample_id"}: Character vector with sample identifier. For Bruker OPUS
 49 | binary files, this corresponds to the file name without the file extension
 50 | in integer increments of sample replicate measurements.
 51 | \item One or multiple of \code{"spc"}, \code{"spc_nocomp"}, \code{"sc_sm"}, or \code{"sc_rf"}:
 52 | List(s) of data.table's containing spectra type(s).
 53 | \item One or multiple of \code{"wavenumbers"}, \code{"wavelengths"}, \code{"x_values"},
 54 | \code{"wavenumbers_sc_sm"}, \code{"wavelengths_sc_sm"}, \code{"x_values_sc_sm"},
 55 | \code{"wavenumbers_sc_rf"}, \code{"wavelengths_sc_rf"}, or \code{"x_values_sc_rf"}:
 56 | List(s) of numeric vectors with matched x-axis values (see \emph{"Details on
 57 | spectra data checks and matching"} below).
 58 | }
 59 | }
 60 | \description{
 61 | Gather spectra, corresponding x-axis values, and device and
 62 | measurement metadata from a nested list into a spectra tibble, so that one
 63 | row represents one spectral measurement. Spectra, x-axis values and metadata
 64 | are mapped from the individual list elements (named after file name including
 65 | the extension) and transformed into (list-)columns of a spectra tibble,
 66 | which is an extended data frame. For each measurement, spectral data and
 67 | metadata are combined into one row of the tidy data frame. In addition, the ID
 68 | columns \code{unique_id}, \code{file_id}, and \code{sample_id} are extracted from
 69 | \code{"metadata"} (data frame) list entries and returned as identifier columns of
 70 | the spectra tibble. List-columns facilitate keeping related data together in
 71 | a rectangular data structure. They can be manipulated easily during
 72 | subsequent transformations, for example using the standardized functions of
 73 | the simplerspec data processing pipeline.
 74 | }
 75 | \section{Details on spectra data checks and matching}{
 76 | 
 77 | \code{gather_spc()} checks whether these conditions are met for each measurement
 78 | in the list \code{data}:
 79 | \enumerate{
 80 | \item Make sure that the first level \code{data} elements are named (assumed to be
 81 | the file name the data originate from), and remove missing measurements with
 82 | an informative message.
 83 | \item Remove any duplicated file names and raise a message if there are
 84 | name duplicates at first level.
 85 | \item Check whether \code{spc_types} inputs are supported (see argument \code{spc_types})
 86 | and present at the second level of the \code{data} list. If not, remove
 87 | all data elements for incomplete spectral measurements.
 88 | \item Match spectra types and possible corresponding x-axis types from
 89 | a lookup list. For each selected spectrum type (left), at least one of
 90 | the element names of the x-axis type (right) needs to be present for each
 91 | measurement in the list \code{data}:
 92 | \itemize{
 93 | \item \code{"spc"}  : \code{"wavenumbers"}, \code{"wavelengths"}, or \code{"x_values"}
 94 | \item \code{"spc_nocomp"} : \code{"wavenumbers"}, \code{"wavelengths"}, or \code{"x_values"}
 95 | \item \code{"sc_sm"} : \code{"wavenumbers_sc_sm"}, \code{"wavelengths_sc_sm"}, or
 96 | \code{"x_values_sc_sm"}
 97 | \item \code{"sc_rf"} : \code{"wavenumbers_sc_rf"}, \code{"wavelengths_sc_rf"}, or
 98 | \code{"x_values_sc_rf"}
 99 | }
100 | \item Check if \code{"metadata"} elements are present and remove data elements for
101 | measurements with missing or incorrectly named metadata elements
102 | (message).
103 | }
104 | }
105 | 
106 | 


--------------------------------------------------------------------------------
/R/plot_spc.R:
--------------------------------------------------------------------------------
  1 | #' @title Plot tibble spectra
  2 | #' @description Plot spectra from tibble spectra objects.
  3 | #' @param spc_tbl Tibble that contains the first set of spectra to plot as
  4 | #' list-column
  5 | #' @param spc_tbl_2 Tibble that contains the second set of spectra (optional)
  6 | #' to plot as list-column.
  7 | #' @param x_unit Character string describing the x axis unit. Default is
  8 | #'  \code{"wavenumber"}, which will produce a graph with wavenumbers on the
  9 | #'  x axis with reversed number. If \code{x_unit = "wavelength"}, the axis
 10 | #'  will be in regular order (lower wavelengths in nm on the left and higher
 11 | #'  on the right side of the axis).
 12 | #' @param y Character string of list-column name in tibble where spectra of
 13 | #' desired type are extracted to plot.
 14 | #' @param by Character string of column that is used to group the spectra.
 15 | #' Default is \code{"unique_id"}. If replica spectra are present in the file
 16 | #' and processed spectra resulting after averaging need to be plotted,
 17 | #' it is recommend to use \code{"sample_id"} as argument to group according
 18 | #' the sample_id column in the tibble(s) containing the spectra (\code{spc_tbl}
 19 | #' and \code{spc_tbl_2}).
 20 | #' @param graph_id_1 Character string used for grouping the first spectra set
 21 | #' (\code{spc_tbl}) and producing
 22 | #' the label text accordingly. Default is \code{"Set 1"}.
 23 | #' @param graph_id_2 Character string used for grouping the second spectra set
 24 | #' (\code{spc_tbl_2}) and producing the label text accordingly. Default is
 25 | #' \code{"Set 2"}
 26 | #' @param graph_id_1_col Character string for the colour of the first spectra
 27 | #' set. Default is \code{"black"}.
 28 | #' @param graph_id_2_col Character string for the colour of the first spectra
 29 | #' set. Default is \code{"red"}.
 30 | #' @param xlab Character string or mathematical expression
 31 | #' (use \code{expression}) for the x axis title. Default is
 32 | #' \code{expression(paste("Wavenumber [", cm^-1, "]"))}.
 33 | #' @param ylab Character string or mathematical expression
 34 | #' (use \code{expression}) for the y axis title. Default is \code{"absorbance"}.
 35 | #' @param alpha Double in between 0 and 1. Sets the transparency for the plotted
 36 | #' spectra lines.
 37 | #' @param legend Logical whether to plot a legend for the spectra describing
 38 | #' its name selected in arguments \code{graph_id_1} and \code{graph_id_2}.
 39 | #' Default is \code{TRUE}.
 40 | #' @usage plot_spc(spc_tbl, spc_tbl_2 = NULL,
 41 | #'   x_unit = "wavenumber",
 42 | #'   y = "spc", by = "unique_id",
 43 | #'   graph_id_1 = "Set 1", graph_id_2 = "Set 2",
 44 | #'   graph_id_1_col = "black", graph_id_2_col = "red",
 45 | #'   xlab = expression(paste("Wavenumber [", cm^-1, "]")),
 46 | #'   ylab = "Absorbance",
 47 | #'   alpha = 0.2, legend = TRUE)
 48 | #' @export
 49 | plot_spc <- function(spc_tbl, spc_tbl_2 = NULL,
 50 |                      x_unit = "wavenumber",
 51 |                      y = "spc", by = "unique_id",
 52 |                      graph_id_1 = "Set 1", graph_id_2 = "Set 2",
 53 |                      graph_id_1_col = "black", graph_id_2_col = "red",
 54 |                      xlab = expression(paste("Wavenumber [", cm^-1, "]")),
 55 |                      ylab = "Absorbance", alpha = 0.2,
 56 |                      legend = TRUE) {
 57 | 
 58 |   # Fix `R CMD check NOTE`: "no visible binding for global variable ‘...‘"
 59 |   graph_id <- id <- variable <- value <- NULL
 60 | 
 61 |   # (1) Gather spectra into one data.table
 62 |   if (!is.null(spc_tbl_2)) {
 63 |     if (y == "spc") {
 64 |       # raw spectra are not yet data.tables and extraction is done alternatively
 65 |       # via do.call(rbind, list) -> a little bit slower
 66 |       dt_1 <- data.table::data.table(do.call(rbind, spc_tbl[, y][[y]]))
 67 |       dt_2 <- data.table::data.table(do.call(rbind, spc_tbl_2[, y][[y]]))
 68 |     } else {
 69 |       dt_1 <- data.table::rbindlist(spc_tbl[, y][[y]])
 70 |       dt_2 <- data.table::rbindlist(spc_tbl_2[, y][[y]])
 71 |     }
 72 |   } else {
 73 |     if (y == "spc") {
 74 |       # raw spectra are not yet data.tables and extraction is done alternatively
 75 |       # via do.call(rbind, list) -> a little bit slower
 76 |       dt_1 <- data.table::data.table(do.call(rbind, spc_tbl[, y][[y]]))
 77 |     } else {
 78 |       dt_1 <- data.table::rbindlist(spc_tbl[, y][[y]])
 79 |     }
 80 |   }
 81 | 
 82 |   # (2) Extract ID variable and append it to the data.table
 83 |   id_1 <- spc_tbl[, by][[by]]
 84 |   # Add a graph identity column to distiguish graphical layers for spectra
 85 |   # tibble comparisons
 86 |   graph_id_1 <- as.factor(rep(graph_id_1, nrow(spc_tbl)))
 87 |   dt_1[, graph_id := graph_id_1]
 88 |   dt_1[, id := id_1]
 89 | 
 90 |   # Only if spc_tbl_2 exists
 91 |   if (!is.null(spc_tbl_2)) {
 92 |     # (2) Extract ID variable and append it to the data.table
 93 |     id_2 <- spc_tbl_2[, by][[by]]
 94 |     # Add a graph identity column to distiguish graphical layers for spectra
 95 |     # tibble comparisons
 96 |     graph_id_2 <- as.factor(rep(graph_id_2, nrow(spc_tbl_2)))
 97 |     dt_2[, graph_id := graph_id_2]
 98 |     dt_2[, id := id_2]
 99 |     dt_list <- list(
100 |       dt_1 = dt_1,
101 |       dt_2 = dt_2
102 |     )
103 |     dt <- data.table::rbindlist(dt_list)
104 |   } else {
105 |     dt <- dt_1
106 |   }
107 |   # (3) Convert data.table from wide to long form
108 |   dt_long <- data.table::melt(
109 |     dt,
110 |     measure = names(dt)[!names(dt) %in% c("id", "graph_id")]
111 |   )
112 |   # Convert variable column from factor to numeric
113 |   dt_long[, variable := as.numeric(as.character(variable))]
114 |   # (4) Plot spectra
115 |   # Define nice breaks for x axis
116 |   brk <- pretty(as.numeric(names(dt)[!names(dt) %in% c("id", "graph_id")]), n = 10)
117 |   p <- ggplot2::ggplot(dt_long, ggplot2::aes(variable, value)) +
118 |     ggplot2::labs(x = xlab, y = ylab) +
119 |     ggplot2::theme_bw() +
120 |     ggplot2::scale_x_reverse(breaks = brk) +
121 |     # Bring graph_id_2 spectra to front
122 |     # http://stackoverflow.com/questions/21120088/ggplot2-bring-one-line-to-the-front-but-save-the-colors
123 |     ggplot2::geom_line(ggplot2::aes(colour = graph_id, group = id),
124 |       alpha = alpha, size = 0.2
125 |     ) +
126 |     # scale_color_manual(values = rep("black", nrow(dt)))
127 |     ggplot2::scale_color_manual(values = c(graph_id_1_col, graph_id_2_col))
128 | 
129 |   if ("wavelengths_rs" %in% names(spc_tbl) && x_unit == "wavelength") {
130 |     p <- p +
131 |       ggplot2::scale_x_continuous(breaks = brk) +
132 |       ggplot2::xlab("Wavelength [nm]") +
133 |       ggplot2::ylab("Reflectance")
134 |   }
135 | 
136 |   if (legend == FALSE) {
137 |     p <- p +
138 |       # Remove legend
139 |       ggplot2::guides(colour = FALSE)
140 |   }
141 |   p
142 | }
143 | 


--------------------------------------------------------------------------------
/R/remove-outl-spectra.R:
--------------------------------------------------------------------------------
  1 | #' @title Remove outlier spectra
  2 | #' @description Remove outlier spectra based on the
  3 | #' \code{pcout()} function of the \code{mvoutlier} package.
  4 | #' @usage remove_outliers(list_spectra, remove = TRUE)
  5 | #' @param list_spectra List that contains averaged
  6 | #' spectral information
  7 | #' in list element \code{MIR_mean} (data.frame) and metadata in
  8 | #' \code{data_meta} (data.frame).
  9 | #' @param remove logical expression (\code{TRUE} or \code{FALSE})
 10 | #' that specifies weather spectra shall be removed.
 11 | #' If \code{rm = FALSE}, there will be no outlier removal
 12 | #' @return Returns list \code{spectra_out} that contains:
 13 | #' \itemize{
 14 | #'  \item \code{MIR_mean}: Outlier removed MIR spectra as
 15 | #'  data.frame object. If \code{remove = FALSE},
 16 | #'  the function will
 17 | #'  return almost identical list identical to \code{list_spectra},
 18 | #'  except that the first \code{indices} column of the spectral
 19 | #'  data frame \code{MIR_mean} is removed
 20 | #'  (This is done for both options
 21 | #'  \code{remove = TRUE} and \code{remove = FALSE}).
 22 | #'  \item \code{data_meta}: metadata data.frame, identical
 23 | #'  as in the \code{list_spectra} input list.
 24 | #'  \item \code{plot_out}: (optional) ggplot2 graph
 25 | #'  that shows all spectra (absorbance on x-axis and wavenumber
 26 | #'  on y-axis) with outlier marked, if
 27 | #'  \code{remove = TRUE}.
 28 | #' }
 29 | #' @details This is an optional function if one wants to remove
 30 | #' outliers.
 31 | #' @export
 32 | remove_outliers <- function(list_spectra, remove = TRUE) {
 33 |   # Outlier detection
 34 |   # Use the mvoutlier package and pcout function to identify
 35 |   # multivariate outliers
 36 |   wfinal01 <- ID <-  NULL
 37 |   if (remove == TRUE) {
 38 |     # Remove the 'indices' column
 39 |     list_spectra$MIR_mean <- list_spectra$MIR_mean[, -1]
 40 |     out <- mvoutlier::pcout(list_spectra$MIR_mean, makeplot = T,
 41 |       outbound = 0.05) # parameters should be adapted
 42 |     # Plot outlying spectra
 43 |     plot_out <- plotMIR(
 44 |       list_spectra$MIR_mean[
 45 |         order(out$wfinal01, decreasing = T), ],
 46 |       col = as.factor(out$wfinal01[order(out$wfinal01,
 47 |         decreasing = T)])) +
 48 |       ggplot2::scale_colour_brewer("outlier", palette = "Set1")
 49 |     out_id <- as.character(
 50 |       list_spectra$data_meta$ID[!as.logical(out$wfinal01)]
 51 |     )
 52 |     # Remove  outliers
 53 |     MIR_mean <- list_spectra$MIR_mean[
 54 |       ! list_spectra$data_meta$ID %in% out_id, ]
 55 |     # rep ID and country name
 56 |     data_meta <- list_spectra$data_meta[
 57 |       ! list_spectra$data_meta$ID %in% out_id, ]
 58 |     spectra_out <- list(MIR_mean = MIR_mean,
 59 |       data_meta = data_meta,
 60 |       plot_out = plot_out)
 61 |   } else {
 62 |     # Remove the 'indices' column
 63 |     list_spectra$MIR_mean <- list_spectra$MIR_mean[, -1]
 64 |     spectra_out <- list(MIR_mean = list_spectra$MIR_mean,
 65 |       data_meta = list_spectra$data_meta)
 66 |   }
 67 |   spectra_out
 68 | }
 69 | 
 70 | ## plotMIR function of Antoine Stevens; don't export this
 71 | ## function to the NAMESPACE
 72 | plotMIR <- function(spc, group = NULL, col = NULL,
 73 |   linetype = NULL, wr = NULL, brk = NULL,
 74 |   ylab = "Absorbance", xlab = "Wavenumber /cm-1",
 75 |   by = NULL, by.wrap = T, ...){
 76 |   # Function to plot spectra, based on the ggplot2 package
 77 |   # spc = spectral matrix, with colnames = wavelengths
 78 |   # group = grouping variable, usually the id's of the sample
 79 |   # wr = wavelength range to plot
 80 |   # brk = breaks of the x-axis
 81 |   # by = factor variable for which the mean and sd of
 82 |   # each level will be computed and plotted (optional)
 83 |   # Requires packages ggplot2; data.table; reshape2
 84 |   # Workaround to pass R CMD check:
 85 |   # http://stackoverflow.com/questions/9439256/how-can-i-handle-r-cmd-check-no-visible-binding-for-global-variable-notes-when
 86 |   # Setting the variables to NULL first
 87 |   variable <- value <- colour  <- NULL
 88 |   spc <- as.data.frame(spc)
 89 |   if (!is.null(wr))
 90 |     spc <- spc[, as.numeric(colnames(spc)) >= min(wr) &
 91 |         as.numeric(colnames(spc)) <= max(wr)]
 92 |   if (is.null(brk))
 93 |     brk  <- pretty(as.numeric(colnames(spc)), n = 10)
 94 |   if (!is.null(by)) {
 95 |     spc$by <- by
 96 |     spc <- data.table::data.table(spc, check.names = F)
 97 |     mean.spc <- reshape2::melt(
 98 |       spc[, lapply(data.table::.SD, mean), by = by],
 99 |       id.vars = "by"
100 |     )
101 |     sd.spc <- reshape2::melt(
102 |       spc[, lapply(data.table::.SD, sd), by = by],
103 |       id.vars = "by"
104 |     )
105 |     mean.spc$min <- mean.spc$value - sd.spc$value
106 |     mean.spc$max <- mean.spc$value + sd.spc$value
107 |     mean.spc$variable <-  as.numeric(
108 |       as.character(mean.spc$variable)
109 |     )
110 |     if (by.wrap) {
111 |       p <- ggplot2::ggplot(data = mean.spc) +
112 |         ggplot2::geom_ribbon(
113 |           ggplot2::aes(x = variable, ymin = min, ymax = max),
114 |           fill = "grey", col = "black", size = 0.15)  +
115 |         ggplot2::theme_bw()
116 |       p <-  p +  ggplot2::geom_line(
117 |         ggplot2::aes(x = variable, y = value),
118 |         size = 0.25) +
119 |         ggplot2::facet_wrap(~ by) +
120 |         ggplot2::labs(x = xlab, y = ylab) +
121 |         ggplot2::scale_x_reverse(breaks = brk)
122 |     } else {
123 |       p <- ggplot2::ggplot(data = mean.spc,
124 |         ggplot2::aes(x = variable, y = value, group = by, col = by)) +
125 |         ggplot2::geom_line(size = 0.25)  +
126 |         ggplot2::labs(x = xlab, y = ylab) +
127 |         ggplot2::scale_x_reverse(breaks = brk) +
128 |         ggplot2::theme_bw()
129 |     }
130 |     return(p)
131 |   } else {
132 |     if (is.null(group))
133 |       group  <- as.character(1:nrow(spc))
134 |     spc$group <- group
135 |     spc$colour <- col
136 |     spc$linetype <- linetype
137 |     id.var  <- colnames(spc)[
138 |       grep("group|colour|linetype",colnames(spc))]
139 |     tmp <- reshape2::melt(spc, id.var = id.var)
140 |     tmp$variable <- as.numeric(as.character(tmp$variable))
141 |     p <- ggplot2::ggplot(tmp,
142 |       ggplot2::aes(variable, value, group = group)) +
143 |       ggplot2::labs(x = xlab, y = ylab) +
144 |       ggplot2::theme_bw() +
145 |       ggplot2::scale_x_reverse(breaks = brk)
146 |     if (is.null(col) & is.null(linetype))
147 |       p <- p + ggplot2::geom_line(
148 |         ggplot2::aes(colour = group))
149 |     else if (!is.null(col) & is.null(linetype))
150 |       p <- p + ggplot2::geom_line(
151 |         ggplot2::aes(colour = colour))
152 |     else if (is.null(col) & !is.null(linetype))
153 |       p <- p + ggplot2::geom_line(
154 |         ggplot2::aes(colour = group,
155 |         linetype = linetype))
156 |     else  p <- p + ggplot2::geom_line(
157 |       ggplot2::aes(colour = colour,
158 |       linetype = linetype))
159 |     return(p)
160 |   }
161 | }
162 | 
163 | 


--------------------------------------------------------------------------------
/R/utils-stats.R:
--------------------------------------------------------------------------------
  1 | #' @title Assess multiple pairs of measured and predicted values
  2 | #' @description Return performance metrics for test set predictions and
  3 | #' measured values, e.g. for different model outcome variables.
  4 | #' @param data Data frame with all measured (observed) and predicted variables.
  5 | #' @param ... Multiple arguments with observed (measured)-predicted pairs,
  6 | #' specified with \code{dplyr::vars(o = <column_name>, p = <column_name>)}.
  7 | #' Column names can strings or symbols. The arguments in `...` need to be named.
  8 | #' @param .metrics Character vector with package used for metrics calculation.
  9 | #' Default is \code{"simplerspec"}, which uses
 10 | #' \code{simplerspec::evaluate_model()}.
 11 | #' @param .model_name String with name for the new column that specifies the
 12 | #' model or the outcome variable. Default is \code{"model"}.
 13 | #'
 14 | #' @return Data frame with with summary statistics for measured values and
 15 | #' performance metrics for the pairs of measured and predicted values.
 16 | #' @importFrom purrr modify_depth imap
 17 | #' @export
 18 | assess_multimodels <- function(data,
 19 |                                ...,
 20 |                                .metrics = c("simplerspec", "yardstick"),
 21 |                                .model_name = "model") {
 22 |   args <- rlang::enquos(...)
 23 |   args_tidy <- map(args, rlang::eval_tidy)
 24 |   stopifnot(
 25 |     all(map_int(names(args), nchar)),
 26 |     all(map_lgl(args_tidy, is.list))
 27 |   )
 28 | 
 29 |   vars_names <- modify_depth(args_tidy, .depth = 1, names)
 30 |   vars_names_ok <- map_lgl(vars_names, ~ all(c("o", "p") %in% .x))
 31 |   if (!all(vars_names_ok)) {
 32 |     stop("Assessment variables supplied in `vars()` need to be named with
 33 |       'o' (observed) and 'p' (predicted)")}
 34 | 
 35 |   metrics <- match.arg(.metrics)
 36 |   # pb 2018-05-09: todo: support yardstick metrics
 37 |   assessment <- switch (metrics,
 38 |     "simplerspec" = map(
 39 |       .x = args_tidy,
 40 |       ~ evaluate_model(data = data,
 41 |         obs = !!.x[["o"]], pred = !!.x[["p"]]))
 42 |   )
 43 |   assessment_models <- imap(assessment,
 44 |     ~ tibble::add_column(.x, !!.model_name := .y, .before = 1))
 45 | 
 46 |   dplyr::bind_rows(assessment_models)
 47 | }
 48 | 
 49 | 
 50 | #' @title Calculate model evaluation metrics
 51 | #' @description Calculate summary statistics of observed values and model
 52 | #' evaluation statistics for assessing agreement between observed (`obs`) and
 53 | #' predicted (`pred`) values.
 54 | #' @param data `data.frame` with predicted and observed data in columns.
 55 | #' @param obs Column that contains observed values, `symbol`/`name` or
 56 | #' `character` (wrapped in "").
 57 | #' @param pred Column that contains predicted values, `symbol`/`name` or
 58 | #' `character` (wrapped in "").
 59 | #' @importFrom e1071 kurtosis
 60 | #' @export
 61 | 
 62 | # Note that coefficient of determination (r2) derived from a linear regression
 63 | # of observed values on the prediction
 64 | # solely describes what proportion of variance in the measured data is
 65 | # simulated by the model. A linear regression line between observed (x) and
 66 | # predicted (y) does not provide a measure of model error!
 67 | 
 68 | # Mean squared error (MSE) can be decomposed into squared bias/deviation (SE^2)
 69 | # and mean squared variation (MSV) (:= SDE^2 := "squared standard deviation of
 70 | # the error"; see e.g. Kobayashi and Salam (2000)
 71 | # Gauch et al. (2003) propose a more sophisticated additive partitioning of the
 72 | # MSE that is more informative about the sources of error and link to the
 73 | # regression parameters; namely, these are squared bias (SB),
 74 | # non-unity slope (NU) and lack of correlation (LC)
 75 | 
 76 | evaluate_model <- function(data, obs, pred) {
 77 |   # Implement quasiquotation to first quote the `obs` and `pred` arguments, and
 78 |   # unquote/evaluate in the context of the `data` data.frame;
 79 |   # obs` and `pred` can be # both symbols or character;
 80 |   # `obs = obs` or `obs = "obs"` when the column that contains observed values
 81 |   # is named "obs"
 82 |   obs <- rlang::enquo(obs)
 83 |   pred <- rlang::enquo(pred)
 84 |   obs <- dplyr::pull(data, !!obs)
 85 |   pred <- dplyr::pull(data, !!pred)
 86 | 
 87 |   tibble::tibble(
 88 |     ## Compute descriptive statistics of the observations/measurements
 89 |     n = length(obs),
 90 |     min = min(obs, ra.rm = TRUE),
 91 |     max = max(obs, na.rm = TRUE),
 92 |     mean = mean(obs, na.rm = TRUE),
 93 |     median = median(obs, na.rm = TRUE),
 94 |     sdev = sd(obs, na.rm = TRUE),
 95 |     cv = sd(obs, na.rm = TRUE) / mean(obs, na.rm = TRUE),
 96 |     skewness_b1 = e1071::skewness(obs, na.rm = TRUE, type = 3),
 97 |     kurtosis = e1071::kurtosis(obs, na.rm = TRUE),
 98 | 
 99 |     ## Compute model evaluation measures to address different aspects
100 |     ## of how well predictions correspond to observations/measurements
101 |     # Root mean squared error
102 |     rmse = mean((obs - pred)^2, na.rm = TRUE)^.5,
103 |     # Mean squared error; mse^2 = me^2 + msv = me^2 + sde^2
104 |     mse = mean((obs - pred)^2, na.rm = TRUE),
105 |     # Terms mean error (ME) and bias are equivalent
106 |     me = mean(obs - pred, na.rm = TRUE),
107 |     bias = mean(obs - pred, na.rm = TRUE),
108 |     # Mean squared variation (of the error); difference between the simulation
109 |     # and the measurement with respect to the deviation from the means
110 |     msv = mean(((mean(obs, na.rm = TRUE) - obs)
111 |       - (mean(pred, na.rm = TRUE) - pred))^2),
112 |     # Standard deviation of the error := SDE = MSV^0.5
113 |     sde = mean(((mean(obs, na.rm = TRUE) - obs)
114 |       - (mean(pred, na.rm = TRUE) - pred))^2)^0.5,
115 |     # Mean absolute error
116 |     mae = mean(abs(obs - pred), na.rm = TRUE),
117 |     r2  = cor(obs, pred, use = "pairwise.complete.obs")^2,
118 |     b = lm(obs ~ pred)$coefficients[2],
119 |     # Ratio of performance to deviation
120 |     rpd = sd(obs, na.rm = TRUE) / mean((obs - pred)^2, na.rm = TRUE)^.5,
121 |     # Ratio of performance to interquartile range
122 |     rpiq = (quantile(obs, .75, na.rm = TRUE) - quantile(obs, .25, na.rm = TRUE))
123 |      / mean((obs - pred)^2, na.rm = TRUE)^.5,
124 |     # See Gauch et. al., 2003) for MSD decomposition into SB, NU and LC;
125 |     # Squared bias
126 |     SB = (mean(obs - pred, na.rm = TRUE))^2,
127 |     # Non-unity slope
128 |     NU = mean((pred - mean(pred))^2) * (1 - lm(obs ~ pred)$coefficients[2])^2,
129 |     # Lack of correlation
130 |     LC = mean((obs - mean(obs))^2)
131 |       * (1 - cor(obs, pred, use = "pairwise.complete.obs")^2),
132 |     # Proportional contributions of SB, NU and LC to MSE in percent
133 |     SB_prop = round((mean(obs - pred, na.rm = TRUE))^2
134 |       / mean((pred - obs)^2) * 100, 0),
135 |     NU_prop = round(mean((pred - mean(pred))^2)
136 |       * (1 - lm(obs ~ pred)$coefficients[2])^2 / mean((pred - obs)^2) * 100, 0),
137 |     LC_prop = round(mean((obs - mean(obs))^2)
138 |       * (1 - cor(obs, pred, use = "pairwise.complete.obs")^2)
139 |       / mean((pred - obs)^2) * 100, 0)
140 |   )
141 | }
142 | 
143 | # Wrapper function to ensure compatibility with old summary_df() function
144 | # !!! note that y are predicted values and x are observed values;
145 | # this is only for backward compatibility;
146 | # This deviates from the principles that observed (O) should be denoted in the
147 | # in the y-axis vs. predicted (P) in the x-axis ((OP) regressions), according
148 | # Pineiro et al. (2008)
149 | #' @rdname evaluate_model
150 | #' @param df `data.frame` with predicted and observed data in columns.
151 | #' @param x Column that contains observed values, `symbol`/`name` or
152 | #' `character` (wrapped in "").
153 | #' @param y Column that contains predicted values, `symbol`/`name` or
154 | #' `character` (wrapped in "").
155 | #' @export
156 | summary_df <- function(df, x, y) {
157 |   evaluate_model(data = df, obs = x, pred = y)
158 | }
159 | 
160 | # Function to calculate standard error of the mean
161 | sem_ci <- function(x) {
162 |   qt(0.975, df = length(na.omit(x)) - 1) *
163 |     sqrt(var(x, na.rm = TRUE) / length(na.omit(x)))
164 | }
165 | 
166 | # Calculate standard error
167 | se <- function(x) {
168 |   sqrt(var(x, na.rm = TRUE) / length(na.omit(x)))
169 | }
170 | 


--------------------------------------------------------------------------------
/R/resample-spc.R:
--------------------------------------------------------------------------------
  1 | #' @title Resample spectra in list-column to new x-axis interval
  2 | #' @description Resamples (interpolates) different spectra types with
  3 | #' corresponding x-axis values that are both stored in list-columns of a spectra
  4 | #' tibble. A spectra tibble hosts spectra, x-axis vectors, metadata, and
  5 | #' further linked data with standardized naming conventions. Data input for
  6 | #' resampling can for example be generated with `simplerspec::gather_spc()`.
  7 | #' Resampling is a key harmonizing step to process and later model spectra
  8 | #' measured at different resolutions and spectral ranges (i.e., different
  9 | #' spectrometer devices and/or measurement settings).
 10 | #' @param spc_tbl Spectra data embedded in a tibble object (classes
 11 | #' `"tbl_df", "tbl", "data.frame"`). The spectra tibble needs to contain at
 12 | #' least of one of the the spectra columns `spc`, `spc_rs`, `spc_mean`,
 13 | #' `spc_nocomp`, `sc_sm`, `sc_rf`, or `spc_pre` (list-columns with spectral
 14 | #' `data.table`s), and `wavenumbers` or `wavelengths` (list-column with vectors
 15 | #' of x-axis values corresponding to each spectrum). The help section *"Matching
 16 | #' spectrum type and corresponding x-axis type"* describes the spectra types
 17 | #' and corresponding x-axis types.
 18 | #' @param column_in Character vector of length 1L or symbol/name
 19 | #' specifying the name of list-column that contains the spectra to be resampled.
 20 | #' @param x_unit Character vector of length 1L specifying the measurement unit
 21 | #' of the x-axis values (list-column) of the input spectra in `spc_tbl`.
 22 | #' Possible values are `"wavenumber"` (default) or `"wavelength"`. Wavenumber
 23 | #' is a convenient unit of frequency in the mid-infrared spectral range,
 24 | #' where wavelength is often used as spatial period for the visible and
 25 | #' near-infrared range.
 26 | #' @param wn_lower Numeric value of lowest wavenumber. This argument will only
 27 | #' be used if `x_unit = "wavenumber"`. The value serves as starting value for
 28 | #' the new wavenumber sequence that the spectra will be resampled upon. Default
 29 | #' value is 500 (i.e., in reciprocal centimeters).
 30 | #' @param wn_upper Numeric value of highest wavenumber. This argument will only
 31 | #' be used if `x_unit = "wavenumber`. The value will be used as last value of
 32 | #' the new wavenumber sequence that the spectra will be resampled upon. Default
 33 | #' value is 4000 (i.e., in reciprocal centimeters).
 34 | #' @param wn_interval Numeric value of the wavenumber increment for the new
 35 | #' wavenumber sequence that the spectra will be resampled upon. Default value
 36 | #' is 2 (i.e., in reciprocal centimeters).
 37 | #' @param wl_lower Numeric value of lowest wavelength. This argument will only
 38 | #' be used if `x_unit = "wavelength"`. The value serves as starting value of
 39 | #' the new wavenumber sequence that the spectra will be resampled upon.
 40 | #' Default value is 350 (i.e. in nanometers).
 41 | #' @param wl_upper Numeric value of highest wavelength. This argument will only
 42 | #' be used if `x_unit = "wavelength"`. The value will be used as last value of
 43 | #' the new wavenumber sequence that the spectra will be resampled upon. Default
 44 | #' value is 2500 (i.e., in nanometers).
 45 | #' @param wl_interval Numeric value of the wavelength increment for the new
 46 | #' wavenumber sequence that the spectra will be resampled upon. This argument
 47 | #' will only be used if `x_unit = "wavelength"`. Default value is 1 (i.e., in
 48 | #' nanometers).
 49 | #' @param interpol_method Character of `"linear"` (default) or `"spline"` with
 50 | #' the interpolation method. `"spline"` uses a cubic spline to interpolate the
 51 | #' input spectra at given x-axis values to new equispaced x-axis intervals.
 52 | #' @return A spectra tibble (`spc_tbl`) containing two added list-columns:
 53 | #' * `spc_rs:` Resampled spectra as list of `data.table`s
 54 | #' * `wavenumbers_rs` or `wavelengths_rs`: Resampled x-axis values as list of
 55 | #'    numeric vectors
 56 | #' @section Matching spectrum type and corresponding x-axis type:
 57 | #' The combinations of input spectrum types (`column_in`) and
 58 | #' corresponding x-axis types are generated from a simple lookup list. The
 59 | #' following key-value(s) pairs can be matched at given key, which is the column
 60 | #' name from `column_in` containing the spectra.
 61 | #' * `"spc"` : `"wavenumbers"` or `"wavelengths"` (raw spectra)
 62 | #' * `"spc_rs"` : `"wavenumbers_rs"` or `"wavelengths_rs"`) (resampled spectra)
 63 | #' * `"spc_mean"` : `"wavenumbers_rs"` or `"wavelengths_rs"` (mean spectra)
 64 | #' * `"spc_nocomp"` `"wavenumbers"` or `"wavelengths"` (spectra prior
 65 | #'   atmospheric compensation)
 66 | #' * `"sc_sm" : c("wavenumbers_sc_sm", "wavelengths_sc_sm")` (single channel
 67 | #'   sample spectra)
 68 | #' * `"sc_rf" : c("wavenumbers_sc_rf", "wavelengths_sc_rf")` (single channel
 69 | #'   reference spectra)
 70 | #' * `"spc_pre" : "xvalues_pre"` (preprocessed spectra)
 71 | #' @export
 72 | resample_spc <- function(spc_tbl,
 73 |                          column_in = "spc",
 74 |                          x_unit = c("wavenumber", "wavelength"),
 75 |                          wn_lower = 500, wn_upper = 4000, wn_interval = 2,
 76 |                          wl_lower = 350, wl_upper = 2500, wl_interval = 1,
 77 |                          interpol_method = c("linear", "spline")) {
 78 |   # Capture user input as expressions (can be both of type character or symbol),
 79 |   # also called quoting; convert quosures to characters for later arg matching
 80 |   column_in <- rlang::enquo(column_in)
 81 |   column_in_chr <- rlang::quo_name(column_in)
 82 | 
 83 |   stopifnot(
 84 |     is.character(x_unit) && length(x_unit) > 0,
 85 |     is.numeric(wn_lower), is.numeric(wn_upper), is.numeric(wn_interval),
 86 |     is.numeric(wl_lower), is.numeric(wl_upper), is.numeric(wl_interval)
 87 |   )
 88 | 
 89 |   # Lookup list to match spectrum types and corresponding x-axis types
 90 |   spc_xaxis_types <- list(
 91 |     "spc" = c("wavenumbers", "wavelengths"), # raw/unprocessed
 92 |     "spc_rs" = c("wavenumbers_rs", "wavelengths_rs"), # resampled
 93 |     "spc_mean" = c("wavenumbers_rs", "wavelengths_rs"), # mean
 94 |     "spc_nocomp" = c("wavenumbers", "wavelengths"), # no atm. compensation
 95 |     "sc_sm" = c("wavenumbers_sc_sm", "wavelengths_sc_sm"), # single channel sample
 96 |     "sc_rf" = c("wavenumbers_sc_rf", "wavelengths_sc_rf"), # single channel reference
 97 |     "spc_pre" = rep("xvalues_pre", 2) # preprocessed
 98 |   )
 99 |   spctypes <- names(spc_xaxis_types)
100 |   column_spc <- match.arg(column_in_chr, spctypes)
101 | 
102 |   x_unit <- match.arg(x_unit)
103 |   switch(x_unit,
104 |          wavenumber = {x_unit_int <- 1L},
105 |          wavelength = {x_unit_int <- 2L})
106 | 
107 |   interpol_method <- match.arg(interpol_method)
108 | 
109 |   # Final selection of `x_unit` column name string from user input and lookup
110 |   x_unit_sel <- spc_xaxis_types[[column_spc]][x_unit_int]
111 | 
112 |   # Both columns with X-values and input spectra need to be present in `spc_tbl`
113 |   colnm <- colnames(spc_tbl)
114 |   stopifnot(x_unit_sel %in% colnm, column_spc %in% colnm)
115 | 
116 |   # Extract list-column containing spectra
117 |   spc_in_list <- dplyr::pull(spc_tbl, !!column_in)
118 | 
119 |   # Extract list-column containing x-axis values
120 |   xvalues_in_list <- dplyr::pull(spc_tbl, !!x_unit_sel)
121 | 
122 |   # Automatically check the arrangement of the input x-Unit values;
123 |   # often, it is convenient to have have a descending ordner of spectral columns
124 |   # if the physical quantity of the x-axis is wavenumbers
125 |   xvalue_order_chr <- purrr::map_chr(xvalues_in_list, seq_order)
126 | 
127 |   if (length(unique(xvalue_order_chr)) > 1L) {
128 |     stop(
129 |       glue::glue(
130 |         "The column `{x_unit_sel}` which contains the list of X-values
131 |         has both elements of ascending and descending order.
132 |         * To resolve, you can split `spc_tbl` in a list of `spc_tbl`s
133 |           with identical X-value vectors based on `group_by_col_hash()`,
134 |           and apply `resample_spc()` separately to each list element.
135 |         * Alternatively, you could fix the order of x-axis values
136 |           for all input spectra and X-value vectors to all ascending or
137 |           descending"),
138 |       call. = FALSE)
139 |   }
140 |   xvalue_order <- xvalue_order_chr[1L]
141 | 
142 |   # Generate sequence of new x-axis values
143 |   switch(x_unit_int,
144 |          `1L` = {
145 |            xvalues_out <- seq(from = wn_lower, to = wn_upper, by = wn_interval)
146 |            x_unit_type_rs <- "wavenumbers_rs"
147 |           },
148 |          `2L` = {
149 |            xvalues_out <- seq(from = wl_lower, to = wl_upper, by = wl_interval)
150 |            x_unit_type_rs <- "wavelengths_rs"
151 |          })
152 | 
153 |   if (xvalue_order == "descending") xvalues_out <- rev(xvalues_out)
154 | 
155 |   # Repeat sequence of new (resampled) x-axis values in list (for every obs.)
156 |   xvalues_out_list <- rep(list(xvalues_out), nrow(spc_tbl))
157 |   names(xvalues_out_list) <- names(spc_in_list)
158 | 
159 |   # Resample all spectra extracted from list-column `column_in` using prospectr
160 |   spc_rs <- lapply(
161 |     seq_along(spc_in_list),
162 |     function(i) {
163 |       data.table::data.table(
164 |         prospectr::resample(
165 |           X = spc_in_list[[i]], # spectral data.table to resample
166 |           wav = xvalues_in_list[[i]], # old x-values vector
167 |           new.wav = xvalues_out_list[[i]], # new x-values vector
168 |           interpol = interpol_method
169 |         )
170 |       )
171 |     }
172 |   )
173 |   names(spc_rs) <- names(spc_in_list)
174 | 
175 |   spc_tbl_out <-
176 |     spc_tbl %>%
177 |     tibble::add_column(
178 |       spc_rs = spc_rs,
179 |       !!x_unit_type_rs := xvalues_out_list
180 |     )
181 |   return(spc_tbl_out)
182 | }
183 | 
184 | # Helper
185 | seq_order <- function(x) ifelse(x[1L] < x[length(x)], "ascending", "descending")
186 | 


--------------------------------------------------------------------------------
/R/read-asd.R:
--------------------------------------------------------------------------------
  1 | #' @title Read ASD fieldspec spectrometer data export into into simplerspec
  2 | #' spectra tibble.
  3 | #' @description Read tab delimited text (.txt) files exported from ASD field
  4 | #' spectrometer into simplerspec spectra tibble.
  5 | #' ASD Fieldspec data files are expected in .txt tab delimited file format.
  6 | #' The first row should contain
  7 | #' the name 'Wavelength' for the first column and the file names for the
  8 | #' remaining columns.
  9 | #' @param file Tab delmited file from ASD software export where the first
 10 | #' column called \code{Wavelength} contais wavelengths in nanometer and the
 11 | #' remaining columns are sample spectra referred by an ID name provided in the
 12 | #' first row of these columns.
 13 | #' @return Spectra data in tibble data frame (class `tbl_df`) that contains
 14 | #' columns \code{sample_id} (derived from 2nd and following column names of
 15 | #' tab delimited ASD exported text file),
 16 | #' \code{spc} (list-column of spectral matrices)
 17 | #' and \code{wavelengths} (list-column containing wavelength vectors).
 18 | #' @importFrom tidyselect one_of
 19 | #' @importFrom data.table fread
 20 | #' @export
 21 | read_asd <- function(file) {
 22 | 
 23 |   # Read fixed with file into a tibble
 24 |   asd_tbl <- tibble::as_tibble(data.table::fread(file = file))
 25 |   # Transpose tibble and add Wavelengths as column names
 26 |   asd_tbl_t <- tibble::as_tibble(
 27 |     t(dplyr::select(asd_tbl, - tidyselect::one_of("Wavelength")))
 28 |   )
 29 |   colnames(asd_tbl_t) <- asd_tbl[["Wavelength"]]
 30 | 
 31 |   # Split matrix by each row into list of matrices
 32 |   asd_m <- as.matrix(asd_tbl_t)
 33 |   asd_listofv <- split(asd_m, row(asd_m)) # List of numerical vectors
 34 |   # Convert list of vectors into list of matrices
 35 |   asd_listofm <- lapply(seq_along(asd_listofv),
 36 |     function(i) matrix(asd_listofv[[i]], nrow = 1, byrow = FALSE))
 37 |   # Assign file names as names for list of matrices
 38 |   names(asd_listofm) <- colnames(asd_tbl)[-1] # Remove "Wavelength"
 39 | 
 40 |   # Assign columnes for all matrices in list
 41 |   asd_listofm <- lapply(asd_listofm,
 42 |     function(x) {colnames(x) <- asd_tbl[["Wavelength"]]; x})
 43 | 
 44 |   # Create list of wavelengths and assign sample names
 45 |   wavelengths_list <- rep(list(asd_tbl[["Wavelength"]]), length(asd_listofm))
 46 |   names(wavelengths_list) <- names(asd_listofm)
 47 | 
 48 |   # Return spectra as tibble
 49 |   tibble::tibble(
 50 |     sample_id = names(asd_listofm),
 51 |     spc = asd_listofm,
 52 |     wavelengths = wavelengths_list
 53 |   )
 54 | 
 55 | }
 56 | 
 57 | ## Simplespec spectra tibble version of ASD reader based on prospectr::readASD
 58 | ## Reads binary ASD data and converts data into list-columns containing spectral
 59 | ## data that can be further processed within the simplerspec spectra processing
 60 | ## framework ===================================================================
 61 | 
 62 | #' @title Read ASD binary files and gather spectra and metadata in tibble data
 63 | #' frame.
 64 | #' @description Read multiple ASD binary files and gather spectra and metadata
 65 | #' into a simplerspec spectral tibble (data frame). The resulting spectral
 66 | #' tibble is compatible with the simplerspec spectra processing and modeling
 67 | #' framework.
 68 | #' @param fnames Character vector containing full paths of ASD binary files
 69 | #' to be read
 70 | #' @return A spectral tibble (data frame) containing the follwing columns:
 71 | #' \item{unique_id}{Character vector. Unique identifier containing file name
 72 | #' pasted with date and time.}
 73 | #' \item{file_id}{Character vector containing file names and exension}
 74 | #' \item{sample_id}{Character vector containing files names without extension}
 75 | #' \item{metadata}{List-column. List of data frames containing spectral
 76 | #' metadata}
 77 | #' \item{wavelengths}{List-column. List of wavelengths vectors (numeric).}
 78 | #' \item{spc_radiance}{List-column. List of data.tables containing
 79 | #' radiance sample spectra.}
 80 | #' \item{spc_reference}{List-column. List of data.tables containing
 81 | #' reference reflectance spectra.}
 82 | #' \item{spc}{List-column. List of data.tables containing final reflectance
 83 | #' spectra.}
 84 | #' @export
 85 | read_asd_bin <- function(fnames) {
 86 | 
 87 |   data <- prospectr::readASD(fnames = fnames,
 88 |     in_format = "binary", out_format = "list")
 89 |   gps <- purrr::map(data, c("header", "GPS"))
 90 |   header <- purrr::map(purrr::map(data, "header"),
 91 |     function(x) x[- which(names(x) == "GPS")])
 92 |   file_id <- purrr::map_chr(data, "name")
 93 |   sample_id <- sub("(.+)\\.[[:alpha:]]+$", "\\1", file_id) # remove ".asd"
 94 |   datetime <- purrr::map(data, "datetime")
 95 |   unique_id <- mapply(function(x, y) paste0(x, "_", y), sample_id, datetime)
 96 |   metadata <- purrr::map(header, tibble::as_tibble)
 97 |   # Add GPS to metadata
 98 |   metadata <- purrr::map2(metadata, gps, dplyr::bind_cols)
 99 |   spc_l <- purrr::transpose(
100 |     purrr::map(data, `[`, c("radiance", "reference", "reflectance")))
101 |   wl_l <- purrr::transpose(purrr::map(data, `[`, "wavelength"))
102 |   spc_dt <- purrr::modify_depth(spc_l, 2,
103 |     function(x) data.table::data.table(t(x)))
104 |   # Change column names of spectral data tables of all spectrum types
105 |   # by reference, use character converted wavenlengths
106 |   purrr::map(.x = spc_dt, ~ map2(.x = .x, .y = wl_l[["wavelength"]],
107 |     ~ data.table::setnames(.x, names(.x), as.character(.y)))
108 |   )
109 | 
110 |   tibble::tibble(
111 |     unique_id = unique_id,
112 |     file_id = file_id,
113 |     sample_id = sample_id,
114 |     metadata = metadata,
115 |     wavelengths = wl_l[["wavelength"]],
116 |     spc_radiance = spc_dt[["radiance"]],
117 |     spc_reference = spc_dt[["reference"]],
118 |     spc = spc_dt[["reflectance"]]
119 |   )
120 | }
121 | 
122 | # Helper function to remove the ".asd.xxx" (.xxx for example ".ref" or "")
123 | # extension in id column (e.g. sample_id) strings in tibble with metadata or
124 | # reference analysis data ------------------------------------------------------
125 | 
126 | #' @importFrom stringr str_replace
127 | #' @importFrom dplyr pull
128 | remove_id_extension <- function(data,
129 |                                 id_col = "sample_id",
130 |                                 id_new_nm = "sample_id",
131 |                                 extension = "\\.asd.*$") {
132 |   id_col <- rlang::enquo(id_col)
133 |   id_col_chr <- rlang::quo_name(id_col)
134 |   id_col_rm <- rlang::expr(-!!rlang::sym(id_col_chr))
135 |   id_new_nm <- rlang::quo_name(rlang::enquo(id_new_nm))
136 | 
137 |   id_new <- gsub(pattern = extension, replacement = "",
138 |     x = dplyr::pull(data, !!id_col))
139 | 
140 |   # Remove old id column and bind new id column to the remaining columns
141 |   rest <- dplyr::select(data, !!id_col_rm)
142 |   dplyr::bind_cols(!!id_new_nm := id_new, rest)
143 | }
144 | 
145 | 
146 | # Helper to orrect the sensor offset for ASD spectra;
147 | # shift between VIS and VNIR1, and VNIR1 and VNIR2 ranges;
148 | # based on subtracting gaps at `Join1Wavelength` and `Join2Wavelength` column
149 | # positions in `metadata` list-column data frames ------------------------------
150 | 
151 | correct_join_offset <- function(spc_tbl,
152 |                                 lcol_spc = spc,
153 |                                 lcol_xvalues = wavelengths,
154 |                                 lcol_metadata = metadata) {
155 |   swir2_offset <- swir1_offset <- wavelengths <- NULL
156 |   lcol_spc <- rlang::enquo(lcol_spc)
157 |   lcol_spc_chr <- rlang::quo_name(lcol_spc)
158 |   lcol_spc_rm <- rlang::expr(-!!rlang::sym(lcol_spc_chr))
159 |   lcol_xvalues <- rlang::enquo(lcol_xvalues)
160 |   lcol_xvalues_chr <- rlang::quo_name(lcol_xvalues)
161 |   lcol_metadata <- rlang::enquo(lcol_metadata)
162 | 
163 |   spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!lcol_spc))
164 |   xvalues <- dplyr::pull(spc_tbl, !!lcol_xvalues)
165 |   if (!all(sapply(xvalues, identical, xvalues[[1]]))) {
166 |     stop(paste0("Error: Spectral tibble (`spc_tbl`) contains observations",
167 |      " with unequal x unit values (`lcol_xvalues`)."))
168 |   }
169 |   metadata <- dplyr::pull(spc_tbl, !!lcol_metadata)
170 | 
171 |   join1_wavelength <- map(metadata, c("Join1Wavelength"))
172 |   join2_wavelength <- map(metadata, c("Join2Wavelength"))
173 | 
174 |   join1_idx <- unique(
175 |     purrr::map2_int(.x = xvalues, .y = join1_wavelength,
176 |       ~ which.min(abs(.x - .y)))
177 |   )
178 |   join2_idx <- unique(
179 |     purrr::map2_int(.x = xvalues, .y = join2_wavelength,
180 |     ~ which.min(abs(.x - .y)))
181 |   )
182 |   xvalues_max_idx <- unique(map_int(xvalues, which.max))
183 | 
184 |   join1_col1 <- names(spc)[join1_idx]
185 |   join1_col2 <- names(spc)[join1_idx + 1]
186 |   join2_col1 <- names(spc)[join2_idx]
187 |   join2_col2 <- names(spc)[join2_idx + 1]
188 |   swir1_cols <- names(spc)[(join1_idx + 1):join2_idx]
189 |   swir2_cols <- names(spc)[(join2_idx + 1):xvalues_max_idx]
190 | 
191 |   # Calculate the swir1 and swir2 offsets to shift spectral ranges
192 |   # https://stackoverflow.com/questions/19276194/data-table-assignment-expressions-with-dynamic-inputs-existing-columns-an
193 |   spc[, `:=` (
194 |       swir1_offset = .SD[[join1_col2]] - .SD[[join1_col1]],
195 |       swir2_offset = .SD[[join2_col2]] - .SD[[join2_col1]]),
196 |     .SDcols = c(join1_col2, join1_col1, join2_col2, join2_col1)]
197 | 
198 |   # Substract offset(s) for SWIR1 and SWIR2, remove offset columns
199 |   spc[, c(swir1_cols) := lapply(.SD,
200 |     function(x) x - swir1_offset), .SDcols = swir1_cols]
201 |   spc[, c(swir2_cols) := lapply(.SD,
202 |     function(x) x - swir1_offset - swir2_offset), .SDcols = swir2_cols]
203 |   spc[, `:=` (swir1_offset = NULL, swir2_offset = NULL)]
204 | 
205 |   # Remove old spectra list-column (`lcol_spc`) and
206 |   # add new sensor join offset corrected spectra as list-column
207 |   rest <- dplyr::select(spc_tbl, !!lcol_spc_rm)
208 |   tibble::add_column(rest,
209 |     # Convert `spc` single data.table back to list of data.tables
210 |     # much faster than: # data.table:::split.data.table(spc, seq(nrow(spc))
211 |     !!lcol_spc_chr := map(purrr::transpose(spc), data.table::as.data.table),
212 |     .after = eval(substitute(lcol_xvalues_chr)))
213 | }
214 | 
215 | 


--------------------------------------------------------------------------------
/R/preprocess-spc.R:
--------------------------------------------------------------------------------
  1 | #' @title Preprocess spectra
  2 | #' @description Preprocesses spectra in tibble column by sample_id after
  3 | #' averaging spectra by \code{simplerspec::average_spc()}.
  4 | #' @param spc_tbl Tibble that contains spectra to be preprocessed within
  5 | #' a list-column.
  6 | #' @param select Character vector of predefined preprocessing options to be
  7 | #' applied to the spectra list-column specified in \code{column_in}.
  8 | #' Common prefined values are stated as abbreviated preprocessing methods and
  9 | #' options such as \code{"sg_1_w21"}, where \code{"sg"} stands for
 10 | #' Savitzky-Golay and \code{1} for first derivative and \code{"w21"}
 11 | #' for a window size of 21 points.
 12 | #' @param column_in Character vector of single list-column in \code{spc_tbl} that
 13 | #' contain list of spectra (1 row matrix) to be processed by function supplied
 14 | #' in \code{select}.
 15 | #' @param custom_function A character string of a custom processing function
 16 | #' that is later parsed (produces expression in a list) and evaluated within
 17 | #'  the function \code{preprocess_spc}.
 18 | #' The character vector argument of \code{custom_function}
 19 | #' needs to contain \code{"spc_raw"}, which is the single data table of spectra
 20 | #' that results from binding a list of data.tables (spectra to preprocess)
 21 | #' from the spectra list-column specified in \code{column_in}.
 22 | #' An example for a value is
 23 | #' \code{"prospectr::savitzkyGolay(X = spc_raw, m = 0, p = 3, w = 9)"}.
 24 | #' Optional argument. Default is \code{NULL}.
 25 | #' @export
 26 | preprocess_spc <- function(spc_tbl, select, column_in = "spc_mean",
 27 |   custom_function = NULL) {
 28 | 
 29 |   # Convert list of spectral data.tables to one data.table
 30 |   spc_raw <- data.table::rbindlist(spc_tbl[column_in][[column_in]])
 31 | 
 32 |   ## Perform preprocessing =====================================================
 33 | 
 34 |   # Use custom function when supplying option ----------------------------------
 35 |   if (!is.null(custom_function) & select == "custom") {
 36 |     # Create full character string for parsing
 37 |     custom_fct <- paste0("custom <- ", custom_function)
 38 |     # parse converts the character string into an expression
 39 |     # eval evaluates the expression; as a result, custom object is computed
 40 |     # and saved within the current workspace
 41 |     eval(parse(text = custom_fct))
 42 |     ## x <- spc_raw
 43 |     ## custom <- eval(substitute(custom_function), envir = parent.frame())
 44 |     # -> Error in is.data.frame(X) : object 'x' not found
 45 |   }
 46 |   # -> returns error:
 47 |   # custom_function = prospectr::savitzkyGolay(X = x, m = 0, p = 3, w = 9)
 48 |   # Error in is.data.frame(X) : object 'x' not found
 49 |   # -> Maybe solution: http://stackoverflow.com/questions/30563745/non-standard-evaluation-from-another-function-in-r
 50 | 
 51 |   # Savitzky-Golay preprocessing
 52 |   # use different derivatives and window sizes ---------------------------------
 53 | 
 54 |   # Zero order Savitzky-Golay (no derivative) -> only smoothing
 55 |   if (select == "sg_0_w9") {
 56 |     sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
 57 |       m = 0, p = 3, w = 9)}
 58 |   # First derivative Savitzky-Golay
 59 |   if (select == "sg_1_w5") {
 60 |     sg_1_w5 <- prospectr::savitzkyGolay(X = spc_raw,
 61 |       m = 1, p = 3, w = 5)}
 62 |   if (select == "sg_1_w9") {
 63 |     sg_1_w9 <- prospectr::savitzkyGolay(X = spc_raw,
 64 |       m = 1, p = 3, w = 9)}
 65 |   if (select == "sg_1_w11") {
 66 |     sg_1_w11 <- prospectr::savitzkyGolay(X = spc_raw,
 67 |       m = 1, p = 3, w = 11)}
 68 |   if (select == "sg_1_w13") {
 69 |     sg_1_w13 <- prospectr::savitzkyGolay(X = spc_raw,
 70 |       m = 1, p = 3, w = 13)}
 71 |   if (select == "sg_1_p2_w13") {
 72 |     sg_1_p2_w13 <- prospectr::savitzkyGolay(X = spc_raw,
 73 |       m = 1, p = 2, w = 13)}
 74 |   if (select == "sg_1_w15") {
 75 |     sg_1_w15 <- prospectr::savitzkyGolay(X = spc_raw,
 76 |       m = 1, p = 3, w = 15)}
 77 |   if (select == "sg_1_w17") {
 78 |     sg_1_w17 <- prospectr::savitzkyGolay(X = spc_raw,
 79 |       m = 1, p = 3, w = 17)}
 80 |   if (select == "sg_1_w19") {
 81 |     sg_1_w19 <- prospectr::savitzkyGolay(X = spc_raw,
 82 |       m = 1, p = 3, w = 19)}
 83 |   # Implement window size of 21, corresponds to ICRAF standard;
 84 |   # see e.g. Terhoeven-Urselmans et al. (2010)
 85 |   if (select == "sg_1_w21") {
 86 |     sg_1_w21 <- prospectr::savitzkyGolay(X = spc_raw,
 87 |       m = 1, p = 3, w = 21)}
 88 |   if (select == "sg_1_w23") {
 89 |     sg_1_w23 <- prospectr::savitzkyGolay(X = spc_raw,
 90 |       m = 1, p = 3, w = 23)}
 91 |   if (select == "sg_1_w25") {
 92 |     sg_1_w25 <- prospectr::savitzkyGolay(X = spc_raw,
 93 |       m = 1, p = 3, w = 25)}
 94 |   if (select == "sg_1_w27") {
 95 |     sg_1_w27 <- prospectr::savitzkyGolay(X = spc_raw,
 96 |       m = 1, p = 3, w = 27)}
 97 |   if (select == "sg_1_w35") {
 98 |     sg_1_w35 <- prospectr::savitzkyGolay(X = spc_raw,
 99 |       m = 1, p = 3, w = 35)}
100 |   if (select == "sg_1_w41") {
101 |     sg_1_w41 <- prospectr::savitzkyGolay(X = spc_raw,
102 |       m = 1, p = 3, w = 41)}
103 |   if (select == "sg_1_w51") {
104 |     sg_1_w51 <- prospectr::savitzkyGolay(X = spc_raw,
105 |       m = 1, p = 3, w = 51)}
106 |   # Second derivative Savitzky-Golay
107 |   if (select == "sg_2_w11") {
108 |     sg_2_w11 <- prospectr::savitzkyGolay(X = spc_raw,
109 |       m = 2, p = 3, w = 11)}
110 |   if (select == "sg_2_w21") {
111 |     sg_2_w21 <- prospectr::savitzkyGolay(X = spc_raw,
112 |       m = 2, p = 3, w = 21)}
113 |   # Savitzky-Golay (order 0) smoothing and derivative with a window size of
114 |   # 21 points
115 |   if (select == "sg_0_1_w21") {
116 |     sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
117 |       m = 0, p = 3, w = 9)
118 |     sg_0_1_w21 <- prospectr::savitzkyGolay(X = sg_0_w9,
119 |       m = 1, p = 3, w = 21)}
120 |   # Savitzky-Golay second derivative
121 |   if (select == "sg_2_w5") {
122 |     sg_2_w5 <- prospectr::savitzkyGolay(X = spc_raw,
123 |       m = 2, p = 3, w = 5)}
124 |   if (select == "sg_2_w11") {
125 |     sg_2_w11 <- prospectr::savitzkyGolay(X = spc_raw,
126 |       m = 2, p = 3, w = 11)}
127 | 
128 |   # Standard normal variate (SNV) ----------------------------------------------
129 | 
130 |   # Calculate standard normal variate (SNV) after Savitzky-Golay smoothing
131 |   if (select == "sg_0_snv") {
132 |     sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
133 |       m = 0, p = 3, w = 9)
134 |     sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)}
135 |   if (select == "sg_1_snv") {
136 |     sg_1_snv <- prospectr::standardNormalVariate(sg_1_w5)}
137 |   if (select == "sg_1_p2_w13_snv") {
138 |     sg_1_p2_w13 <- prospectr::savitzkyGolay(X = spc_raw,
139 |       m = 1, p = 2, w = 13)
140 |     sg_1_p2_w13_snv <- prospectr::standardNormalVariate(sg_1_p2_w13)}
141 |   # Standard normal variate (SNV) and first gap-segment derivative
142 |   if (select == "snv_gsd_m1_w11_s1") {
143 |     sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
144 |       m = 0, p = 3, w = 9)
145 |     sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
146 |     snv_gsd_m1_w11_s1 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 11, s = 1)}
147 |   if (select == "snv_gsd_m1_w21_s5") {
148 |     sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
149 |       m = 0, p = 3, w = 9)
150 |     sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
151 |     snv_gsd_m1_w21_s5 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 21, s = 5)}
152 |   if (select == "snv_gsd_m1_w31_s1") {
153 |     sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
154 |       m = 0, p = 3, w = 9)
155 |     sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
156 |     snv_gsd_m1_w31_s1 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 31, s = 5)}
157 |   if (select == "snv_gsd_m1_w31_s5") {
158 |     sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
159 |       m = 0, p = 3, w = 9)
160 |     sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
161 |     snv_gsd_m1_w31_s5 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 31, s = 5)}
162 |   # Standard normal variate (SNV) and second gap-segment derivative
163 |   if (select == "snv_gsd_m2_w5_s1") {
164 |     sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
165 |       m = 0, p = 3, w = 9)
166 |     sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
167 |     snv_gsd_m2_w5_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 5, s = 1)}
168 |   if (select == "snv_gsd_m2_w21_s1") {
169 |     sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
170 |       m = 0, p = 3, w = 9)
171 |     sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
172 |     snv_gsd_m2_w21_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 21, s = 1)}
173 |   if (select == "snv_gsd_m2_w31_s1") {
174 |     sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
175 |       m = 0, p = 3, w = 9)
176 |     sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
177 |     snv_gsd_m2_w31_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 31, s = 5)}
178 |   if (select == "snv_gsd_m2_w31_s5") {
179 |     sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
180 |       m = 0, p = 3, w = 9)
181 |     sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
182 |     snv_gsd_m2_w31_s5 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 31, s = 1)}
183 |   if (select == "snv_gsd_m2_w51_s1") {
184 |     sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
185 |       m = 0, p = 3, w = 9)
186 |     sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
187 |     snv_gsd_m2_w51_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 51, s = 1)}
188 |   if (select == "snv_gsd_m2_w51_s5") {
189 |     sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
190 |       m = 0, p = 3, w = 9)
191 |     sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
192 |     snv_gsd_m2_w51_s5 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 51, s = 5)}
193 |   # 1rst Gap-segement derivative
194 |   if (select == "gsd_m1_w5_s4") {
195 |     gsd_m1_w5_s4 <- prospectr::gapDer(X = spc_raw, m = 1, w = 5, s = 4)}
196 |   if (select == "gsd_m1_w11_s5") {
197 |     gsd_m1_w11_s5 <- prospectr::gapDer(X = spc_raw, m = 1, w = 11, s = 5)}
198 |   if (select == "gsd_m1_w11_s21") {
199 |     gsd_m1_w11_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 11, s = 21)}
200 |   if (select == "gsd_m1_w21_s1") {
201 |     gsd_m1_w21_s1 <- prospectr::gapDer(X = spc_raw, m = 1, w = 21, s = 1)}
202 |   if (select == "gsd_m1_w21_s21") {
203 |     gsd_m1_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 21, s = 21)}
204 |   if (select == "gsd_m1_w35_s21") {
205 |     gsd_m1_w35_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 35, s = 21)}
206 |   if (select == "gsd_m1_w5_s21") {
207 |     gsd_m1_w5_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 5, s = 21)}
208 |   # 2nd Gap-segment derivative
209 |   if (select == "gsd_m2_w21_s21") {
210 |     gsd_m2_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 2, w = 21, s = 21)}
211 |   # 4th Gap-segment derivative
212 |   if (select == "gsd_m4_w21_s21") {
213 |     gsd_m4_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 4, w = 21, s = 21)}
214 | 
215 |   # Savitzky-Golay combined with multiple scatter correction (MSC --------------
216 |   # Savitzky-Golay with 3rd order polynomial, a window size of 21
217 |   # and first derivative + MSC
218 |   if (select == "sg_1_w21_msc") {
219 |     sg_1_w21 <- prospectr::savitzkyGolay(X = spc_raw,
220 |       m = 1, p = 3, w = 21)
221 |     # Use msc function from the pls package; use column means of X as reference
222 |     # spectrum
223 |     sg_1_w21_msc <- pls::msc(X = sg_1_w21, reference = NULL)
224 |   }
225 |   # Savitzky-Golay combined with multiple scatter correction (MSC --------------
226 |   # Savitzky-Golay with 4th order polynomial, a window size of 21
227 |   # and second derivative + MSC
228 |   if (select == "sg_2_w21_msc") {
229 |     sg_2_w21 <- prospectr::savitzkyGolay(X = spc_raw,
230 |       m = 2, p = 4, w = 21)
231 |     # Use msc function from the pls package; use column means of X as reference
232 |     # spectrum
233 |     sg_2_w21_msc <- pls::msc(X = sg_2_w21, reference = NULL)
234 |   }
235 | 
236 |   # Continuum-removal ----------------------------------------------------------
237 |   if (select == "cr") {
238 |     cr <- prospectr::continuumRemoval(X = spc_raw,
239 |       wav = as.numeric(colnames(spc_raw)), type = "A")}
240 |   if (select == "cr_refl") {
241 |     cr_refl <- prospectr::continuumRemoval(X = spc_raw,
242 |       wav = as.numeric(colnames(spc_raw)), type = "R")}
243 | 
244 |   # Select final preprocessing based on selection argument and
245 |   # save matrix in data.table
246 |   pre <- select
247 |   spc_pre <- data.table::as.data.table(get(pre))
248 | 
249 |   # Convert preprocessed spectra in data.table to list of data.table spectra
250 |   # https://github.com/jennybc/row-oriented-workflows/blob/master/iterate-over-rows.md
251 |   spc_pre_list <- map(purrr::transpose(spc_pre), data.table::as.data.table)
252 |   # Convert x-values of preprocessed spectra in list of vectors
253 |   # prospectr only hands over new xunits in matrix colnames of type character
254 |   xvalues_pre_list <- lapply(spc_pre_list,
255 |     function(x) as.numeric(colnames(x)))
256 | 
257 |   # Add list of preprocessed spectra and correspoding wavenumbers to tibble
258 |   spc_tbl_out <- tibble::add_column(spc_tbl,
259 |     spc_pre = spc_pre_list, xvalues_pre = xvalues_pre_list)
260 |   return(spc_tbl_out)
261 | }
262 | 


--------------------------------------------------------------------------------
/R/pls-vip.R:
--------------------------------------------------------------------------------
  1 | ### VIP.R: Implementation of VIP (variable importance in projection)(*) for the
  2 | ### `pls' package.
  3 | ### $Id: VIP.R,v 1.2 2007/07/30 09:17:36 bhm Exp $
  4 | 
  5 | ### Copyright: 2006,2007 Bjoern-Helge Mevik
  6 | ### This program is free software; you can redistribute it and/or modify
  7 | ### it under the terms of the GNU General Public License version 2 as
  8 | ### published by the Free Software Foundation.
  9 | ###
 10 | ### This program is distributed in the hope that it will be useful,
 11 | ### but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | ### GNU General Public License for more details.
 14 | 
 15 | ### A copy of the GPL text is available here:
 16 | ### http://www.gnu.org/licenses/gpl-2.0.txt
 17 | 
 18 | ### Contact info:
 19 | ### Boejrn-Helge Mevik
 20 | ### bhx6@mevik.net
 21 | ### Roedtvetvien 20
 22 | ### N-0955 Oslo
 23 | ### Norway
 24 | 
 25 | ### (*) As described in Chong, Il-Gyo & Jun, Chi-Hyuck, 2005, Performance of
 26 | ### some variable selection methods when multicollinearity is present,
 27 | ### Chemometrics and Intelligent Laboratory Systems 78, 103--112.
 28 | 
 29 | ## VIP returns all VIP values for all variables and all number of components,
 30 | ## as a ncomp x nvars matrix.
 31 | VIP <- function(object) {
 32 |     # pb: added to avoid `R CMD check` note
 33 |     method <- Yloadings <- scores <- loading.weights <- NULL
 34 |     if (object$method != "oscorespls")
 35 |         stop("Only implemented for orthogonal scores algorithm.  Refit with 'method = \"oscorespls\"'")
 36 |     if (nrow(object$Yloadings) > 1)
 37 |         stop("Only implemented for single-response models")
 38 | 
 39 |     SS <- c(object$Yloadings)^2 * colSums(object$scores^2)
 40 |     Wnorm2 <- colSums(object$loading.weights^2)
 41 |     SSW <- sweep(object$loading.weights^2, 2, SS / Wnorm2, "*")
 42 |     sqrt(nrow(SSW) * apply(SSW, 1, cumsum) / cumsum(SS))
 43 | }
 44 | 
 45 | 
 46 | ## VIPjh returns the VIP of variable j with h components
 47 | VIPjh <- function(object, j, h) {
 48 |     # pb: added to avoid `R CMD check` note
 49 |     method <- Yloadings <- scores <- loading.weights <- NULL
 50 |     if (object$method != "oscorespls")
 51 |         stop("Only implemented for orthogonal scores algorithm.  Refit with 'method = \"oscorespls\"'")
 52 |     if (nrow(object$Yloadings) > 1)
 53 |         stop("Only implemented for single-response models")
 54 | 
 55 |     b <- c(object$Yloadings)[1:h]
 56 |     T <- object$scores[,1:h, drop = FALSE]
 57 |     SS <- b^2 * colSums(T^2)
 58 |     W <- object$loading.weights[,1:h, drop = FALSE]
 59 |     Wnorm2 <- colSums(W^2)
 60 |     sqrt(nrow(W) * sum(SS * W[j,]^2 / Wnorm2) / sum(SS))
 61 | }
 62 | 
 63 | #' @title Extract VIPs (variable importance in the projection) for a PLS
 64 | #' regression model output returned from model fitting with
 65 | #' \code{simplerspec::fit_pls()}
 66 | #' @description VIPs are extracted based on the \code{finalModel} sublist
 67 | #' in the \code{caret::train} output contained in the \code{model} element
 68 | #' of the \code{simplerspec::fit_pls()} model output list. The VIPs for
 69 | #' derived number of PLS components in the \code{finalModel} are computed.
 70 | #' @param mout Model output list returned from \code{simplerspec::fit_pls()}.
 71 | #' @usage extract_pls_vip(mout)
 72 | #' @return A tibble data frame with columns \code{wavenumber} and correponding
 73 | #' VIP values in the column \code{vip} for the finally chosen PLS regression
 74 | #' model at the final number of PLS components.
 75 | #' @export
 76 | extract_pls_vip <- function(mout) {
 77 |   # Compute VIP for all wavenumbers and select only VIPs with ncomp in final
 78 |   # model
 79 |   final_model <- mout$model$finalModel
 80 |   vip <- VIP(object = final_model)[final_model$ncomp, ]
 81 |   # Collect wavenumbers from preprocessed spectra
 82 |   wn <- as.numeric(colnames(mout$data$calibration$spc_pre[[1]]))
 83 |   # Create a data frame with wavenumbers and VIP scores
 84 |   tibble::tibble(wavenumber = wn, vip = vip)
 85 | }
 86 | 
 87 | #' @title Create a data frame containing start and end positions (wavenumbers)
 88 | #' where variable importance in projection (VIP) is > 1
 89 | #' @description Given a data frame with VIP outputs (wavenumber and vip
 90 | #' columns), start and end values denoting spectral regions where VIP > 1
 91 | #' are returned as data frame. The functions can be used as helper
 92 | #' function for plotting VIP.
 93 | #' @param df_vip Data frame containing \code{wavenumber} and \code{vip} columns
 94 | #' (numeric)
 95 | #' @return Data.frame containing vectors \code{start} (numeric; wavenumber),
 96 | #' \code{end} (numeric; wavenumber) and group (integer; values are
 97 | #' \code{1:length(start))}.
 98 | #' @export
 99 | create_vip_rects <- function(df_vip) {
100 | 
101 |   # Avoid `R CMD check NOTE`: `no visible binding for global variable`
102 |   VIP <- wavenumber <- tail <- NULL
103 | 
104 |   # Highlight region data
105 |   # https://stackoverflow.com/questions/32543176/highlight-areas-within-certain-x-range-in-ggplot2
106 |   v <- rep(0, nrow(df_vip))
107 |   v[df_vip$vip > 1] <- 1
108 |   # Get the start and end points for highlighted regions
109 |   inds <- diff(c(0, v))
110 |   start <- df_vip$wavenumber[inds == 1]
111 |   end <- df_vip$wavenumber[inds == -1]
112 |   if (length(start) > length(end)) {
113 |     end <- c(end, tail(df_vip$wavenumber, 1))
114 |   }
115 |   # Create data frame for rectangle layer (geom_rects)
116 |   data.frame(start = start, end = end, group = seq_along(start))
117 | }
118 | 
119 | #' @title Plot stacked ggplot2 graphs with the Variable Importance for the
120 | #' Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id,
121 | #'and the preprocessed spectra.
122 | #' @description Plot stacked ggplot2 graphs of VIP for the final
123 | #' PLS regression model output of the calibration (training) data set for the
124 | #' final number of components, raw (replicate mean) spectra, and preprocessed
125 | #' spectra. Regions with VIP > 1 are highlighted across the stacked graphs
126 | #' in beige colour rectangles. VIP calculation is implemented as described in
127 | #' Chong, I.-G., and Jun, C.-H. (2005). Performance of some variable selection
128 | #' methods when multicollinearity is present. Chemometrics and Intelligent
129 | #' Laboratory Systems, 78(1--2), 103--112. https://doi.org/10.1016/j.chemolab.2004.12.011
130 | #' @param mout Model output list that is returned from
131 | #' \code{simplerspec::fit_pls()}. This object contains a nested list with
132 | #' the \code{caret::train()} object (class \code{train}), based on which
133 | #' VIPs at finally selected number of PLS components are computed.
134 | #' @param y1 Character vector of list-column name in
135 | #' \code{mout$data$calibration}, where spectra for bottom graph are extracted.
136 | #' Default is \code{"spc_mean"}, which plots the mean calibration spectra after
137 | #' resampling.
138 | #' @param y2 Character string of list-column name in
139 | #' \code{mout$data$calibration}, where spectra for bottom graph are extracted.
140 | #' Default is \code{"spc_pre"}, which plots the preprocessed calibration
141 | #' spectra after resampling.
142 | #' @param by Character string that is used to assign spectra to the same group
143 | #' and therefore ensures that all spectra are plotted with the same colour.
144 | #' Default is \code{"sample_id"}
145 | #' @param xlab Character string of X axis title for shared x axis of stacked
146 | #' graphs. Default is \code{expression(paste("Wavenumber [", cm^-1, "]"))}
147 | #' @param ylab1 Y axis title of bottom spectrum. Default is \code{"Absorbance"}.
148 | #' @param ylab2 Y axis title of bottom spectrum. Default is
149 | #' \code{"Preprocessed Abs."}.
150 | #' @param alpha Double between 0 and 1 that defines transparency of spectra
151 | #' lines in returned graph (ggplot plot object).
152 | #' @usage plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre",
153 | #'   by = "sample_id",
154 | #'   xlab = expression(paste("Wavenumber [", cm^-1, "]")),
155 | #'   ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.",
156 | #'   alpha = 0.2)
157 | #' @export
158 | plot_pls_vip <- function(mout, y1 = "spc_mean", y2 = "spc_pre",
159 |                          by = "sample_id",
160 |                          xlab = expression(paste("Wavenumber [", cm^-1, "]")),
161 |                          ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.",
162 |                          alpha = 0.2) {
163 | 
164 |   # Avoid `R CMD check` NOTE: no visible binding for global variable `...`
165 |   variable <- wavenumber <- value <- group <- vip <- NULL
166 | 
167 |   # Extract spectra tibble for calibration
168 |   spc_tbl <- mout$data$calibration
169 |   # Gather spectra in one data.table
170 | 
171 |   ifelse(y1 == "spc",
172 |     {dt1 <- data.table::data.table(do.call(rbind, spc_tbl[, y1][[y1]]))},
173 |     {dt1 <- data.table::rbindlist(spc_tbl[, y1][[y1]])}
174 |   )
175 | 
176 |   ifelse(y2 == "spc",
177 |     {dt2 <- data.table::data.table(do.call(rbind, spc_tbl[, y2][[y2]]))},
178 |     {dt2 <- data.table::rbindlist(spc_tbl[, y2][[y2]])}
179 |   )
180 | 
181 |   # Extract ID variable and append it to the data.table
182 |   id <- spc_tbl[, by][[by]]
183 |   dt1[, id := id]
184 |   dt2[, id := id]
185 | 
186 |   # Convert data table to long form for ggplot2 plotting
187 |   dt1_long <- data.table::melt(
188 |     dt1, measure = names(dt1)[!names(dt1) %in% c("id")]
189 |   )
190 |   dt1_long[, variable := as.numeric(as.character(variable))]
191 |   data.table::setnames(dt1_long, old = "variable", new = "wavenumber")
192 | 
193 |   dt2_long <- data.table::melt(
194 |     dt2, measure = names(dt2)[!names(dt2) %in% c("id")]
195 |   )
196 |   dt2_long[, variable := as.numeric(as.character(variable))]
197 |   data.table::setnames(dt2_long, old = "variable", new = "wavenumber")
198 | 
199 |   # Plot spectra and VIP scores
200 |   brk <- pretty(as.numeric(names(dt1)[!names(dt1) %in% c("id")]),
201 |     n = 10)
202 |   maxmin <- function(x) {c(max(x), min(x))}
203 |   x_lim <- maxmin(as.numeric(names(dt1)[!names(dt1) %in% c("id")]))
204 | 
205 |   # Extract VIP (Variable Importance in Projection) scores for the given
206 |   # model
207 |   df_vip <- extract_pls_vip(mout)
208 | 
209 |   # Determine highlighted regions above VIP = 1
210 |   rects <- create_vip_rects(df_vip)
211 | 
212 |   # Plot for resampled and mean replicate spectra
213 |   p_spc <- ggplot2::ggplot(dt1_long, ggplot2::aes(x = wavenumber, y = value)) +
214 |     ggplot2::geom_rect(data = rects, inherit.aes = FALSE,
215 |       ggplot2::aes(xmin = start, xmax = end, ymin = min(dt1_long$value),
216 |         ymax = max(dt1_long$value), group = group), color = "transparent",
217 |         fill = "orange", alpha = 0.3) +
218 |     ggplot2::geom_line(data = dt1_long, inherit.aes = FALSE,
219 |       ggplot2::aes(x = wavenumber, y = value, group = id),
220 |         alpha = alpha, size = 0.2) +
221 |     ggplot2::scale_x_reverse(limits = x_lim, breaks = brk) +
222 |     ggplot2::labs(x = xlab, y = ylab1) +
223 |     ggplot2::theme_bw() +
224 |     ggplot2::theme(plot.margin = ggplot2::unit(c(1, 5, -30, 6),
225 |       units = "points"), axis.text.x = ggplot2::element_blank())
226 | 
227 |     p_spc_pre <- ggplot2::ggplot(dt2_long, ggplot2::aes(wavenumber, value)) +
228 |     ggplot2::geom_rect(data = rects, inherit.aes = FALSE,
229 |       ggplot2::aes(xmin = start, xmax = end, ymin = min(dt2_long$value),
230 |         ymax = max(dt2_long$value), group = group), color = "transparent",
231 |         fill = "orange", alpha = 0.3) +
232 |     ggplot2::geom_line(ggplot2::aes(group = id), alpha = alpha, size = 0.2) +
233 |     ggplot2::labs(x = xlab, y = ylab2) +
234 |     ggplot2::theme_bw() +
235 |     ggplot2::theme(plot.margin = ggplot2::unit(c(0, 5, 1, 1),
236 |       units = "points")) +
237 |     ggplot2::scale_x_reverse(limits = x_lim, breaks = brk) +
238 |     ggplot2::theme(plot.margin = ggplot2::unit(c(1, 5, -30, 6),
239 |       units = "points"),
240 |       axis.title.y = ggplot2::element_text(vjust = 0.25),
241 |       axis.text.x = ggplot2::element_blank())
242 | 
243 |   # Plot for VIP
244 |   # Extract PLS model response variable and number of components
245 |   response <- as.character(unique(mout$stats$response))
246 |   ncomp <- mout$model$finalModel$ncomp
247 |   p_vip <- ggplot2::ggplot(data = df_vip,
248 |       ggplot2::aes(x = wavenumber, y = vip)) +
249 |     ggplot2::geom_rect(data = rects, inherit.aes = FALSE,
250 |       ggplot2::aes(xmin = start, xmax = end, ymin = min(df_vip$vip),
251 |       ymax = max(df_vip$vip), group = group), color = "transparent",
252 |       fill = "orange", alpha = 0.3) +
253 |     ggplot2::geom_hline(yintercept = 1, colour = "red") +
254 |     ggplot2::geom_line() +
255 |     ggplot2::xlab(xlab) +
256 |     ggplot2::ylab(
257 |       bquote(paste(VIP[PLSR], " (", .(response),
258 |         ", ", .(ncomp), " comps)"))) +
259 |     ggplot2::scale_x_reverse(limits = x_lim, breaks = brk) +
260 |     ggplot2::theme_bw() +
261 |     ggplot2::theme(plot.margin = ggplot2::unit(c(0, 5, 1, 1),
262 |       units = "points"), axis.title.y = ggplot2::element_text(vjust = 0.25))
263 | 
264 |   # Arrange plots in two panels without any margins in between
265 |   # Hints from
266 |   # https://stackoverflow.com/questions/42567045/alignment-of-two-plots-using-grid-arrange
267 |   cowplot::plot_grid(
268 |     p_spc, p_spc_pre, p_vip, rel_heights = c(0.4, 0.3, 0.3),
269 |     ncol = 1, align = "v")
270 | }
271 | 
272 | 


--------------------------------------------------------------------------------
/R/gather-spc.R:
--------------------------------------------------------------------------------
  1 | #' @title Gather measurements of different spectra types, corresponding
  2 | #' x-axis values and metadata from nested list.
  3 | #' @description Gather spectra, corresponding x-axis values, and device and
  4 | #' measurement metadata from a nested list into a spectra tibble, so that one
  5 | #' row represents one spectral measurement. Spectra, x-axis values and metadata
  6 | #' are mapped from the individual list elements (named after file name including
  7 | #' the extension) and transformed into (list-)columns of a spectra tibble,
  8 | #' which is an extended data frame. For each measurement, spectral data and
  9 | #' metadata are combined into one row of the tidy data frame. In addition, the ID
 10 | #' columns `unique_id`, `file_id`, and `sample_id` are extracted from
 11 | #' `"metadata"` (data frame) list entries and returned as identifier columns of
 12 | #' the spectra tibble. List-columns facilitate keeping related data together in
 13 | #' a rectangular data structure. They can be manipulated easily during
 14 | #' subsequent transformations, for example using the standardized functions of
 15 | #' the simplerspec data processing pipeline.
 16 | #' @param data Recursive list named with filename (`file_id`) at first level
 17 | #' entries, where each element containing a sample measurement has nested
 18 | #' metadata (`"metadata"`), spectra types (see `spc_types`), corresponding
 19 | #' x-axis values (see section *"Details on spectra data checks and matching"*).
 20 | #' The `data` list is a structural convention to organize spectra and their
 21 | #' metadata. It follows for example the list structure returned from the Bruker
 22 | #' OPUS binary reader `simplerspec::read_opus_univ()`.
 23 | #' @param spc_types Character vector with the spectra types to be extracted
 24 | #' from `data` list and gathered into list-columns. The spectra type names need
 25 | #' to exactly follow the naming conventions, and the element names and contents
 26 | #' need to be present at the second list hierarchy of `data`. These values are
 27 | #' allowed:
 28 | #' * `"spc"` (default): final raw spectra after atmospheric compensation, if
 29 | #'   performed (named `AB` in Bruker OPUS software; results from referencing
 30 | #'   sample to reference single channel reflectance and transforming to
 31 | #'   absorbance).
 32 | #' * `"spc_nocomp"`: raw spectra without atmospheric correction
 33 | #' * `"sc_sm"`: Single channel reflectance spectra of the samples
 34 | #' * `"sc_rf"`: Single channel reflectance spectra of the reference (background
 35 | #'   spectra)
 36 | #' * `"ig_sm"`: Interferograms of the sample spectra (currently only spectra
 37 | #'   without x-axis list-columns are matched and returned)
 38 | #' * `"ig_rf"`: Interferograms of the reference spectra (currently only spectra
 39 | #'   without x-axis list-columns are matched and returned)
 40 | #' @usage gather_spc(data, spc_types = "spc")
 41 | #' @section Details on spectra data checks and matching:
 42 | #' `gather_spc()` checks whether these conditions are met for each measurement
 43 | #' in the list `data`:
 44 | #' 1. Make sure that the first level `data` elements are named (assumed to be
 45 | #' the file name the data originate from), and remove missing measurements with
 46 | #' an informative message.
 47 | #' 2. Remove any duplicated file names and raise a message if there are
 48 | #'    name duplicates at first level.
 49 | #' 3. Check whether `spc_types` inputs are supported (see argument `spc_types`)
 50 | #'    and present at the second level of the `data` list. If not, remove
 51 | #'    all data elements for incomplete spectral measurements.
 52 | #' 4. Match spectra types and possible corresponding x-axis types from
 53 | #'    a lookup list. For each selected spectrum type (left), at least one of
 54 | #'    the element names of the x-axis type (right) needs to be present for each
 55 | #'    measurement in the list `data`:
 56 | #'    * `"spc"`  : `"wavenumbers"`, `"wavelengths"`, or `"x_values"`
 57 | #'    * `"spc_nocomp"` : `"wavenumbers"`, `"wavelengths"`, or `"x_values"`
 58 | #'    * `"sc_sm"` : `"wavenumbers_sc_sm"`, `"wavelengths_sc_sm"`, or
 59 | #'      `"x_values_sc_sm"`
 60 | #'    * `"sc_rf"` : `"wavenumbers_sc_rf"`, `"wavelengths_sc_rf"`, or
 61 | #'      `"x_values_sc_rf"`
 62 | #' 5. Check if `"metadata"` elements are present and remove data elements for
 63 | #'    measurements with missing or incorrectly named metadata elements
 64 | #'    (message).
 65 | #' @return Spectra tibble (`spc_tbl` with classes `"tbl_df"`, `"tbl"`, and
 66 | #' `"data.frame"`) with the following (list-)columns:
 67 | #' * `"unique_id"`: Character vector with unique measurement identifier, likely
 68 | #'   a string with file names in combination with date and time (extracted from
 69 | #'   each `"metadata"` data frame column).
 70 | #' * `"file_id"` : Character vector with file name including the extension
 71 | #'   (extracted from each `"metadata"` data frame column).
 72 | #' * `"sample_id"`: Character vector with sample identifier. For Bruker OPUS
 73 | #'   binary files, this corresponds to the file name without the file extension
 74 | #'   in integer increments of sample replicate measurements.
 75 | #' * One or multiple of `"spc"`, `"spc_nocomp"`, `"sc_sm"`, or `"sc_rf"`:
 76 | #'   List(s) of data.table's containing spectra type(s).
 77 | #' * One or multiple of `"wavenumbers"`, `"wavelengths"`, `"x_values"`,
 78 | #'   `"wavenumbers_sc_sm"`, `"wavelengths_sc_sm"`, `"x_values_sc_sm"`,
 79 | #'   `"wavenumbers_sc_rf"`, `"wavelengths_sc_rf"`, or `"x_values_sc_rf"`:
 80 | #'   List(s) of numeric vectors with matched x-axis values (see *"Details on
 81 | #'   spectra data checks and matching"* below).
 82 | #' @importFrom rlang set_names
 83 | #' @export
 84 | gather_spc <- function(data,
 85 |                        spc_types = "spc") {
 86 |   spc_types <- map(spc_types, rlang::sym)
 87 |   spc_types_chr <- map_chr(spc_types, rlang::quo_name)
 88 | 
 89 |   # Duplicate original data to refer to full `data` list in messages
 90 |   data_origin <- data
 91 | 
 92 |   ## Make sure first level `data` elements are named
 93 | 
 94 |   if (any(names(data) %in% "")) {
 95 |     which_missing <- which(names(data) %in% "")
 96 |     idx_nm_missing <- rlang::set_names(
 97 |       names(data)[which_missing], which_missing)
 98 |     message(paste0(length(which_missing), " `data` ",
 99 |       ifelse(length(which_missing) > 1, "elements", "element"),
100 |       ifelse(length(which_missing) > 1, " have", " has"),
101 |       " missing `file_id` names. ",
102 |       ifelse(length(which_missing) > 1, "These", "This"), "`data` list index",
103 |       ifelse(length(which_missing) > 1, " positions\n", " position\n"),
104 |       ifelse(length(which_missing) > 1, "were", "was"),
105 |       " therefore removed:\n\n",
106 |       idx_nm_missing %>%
107 |         purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = "  "),
108 |       "\n\n"))
109 | 
110 |     data <- data[-which_missing]
111 |   }
112 | 
113 |   ## Check whether any file names (`file_id`'s) are duplicated;
114 |   ## remove and raise warning in case duplicated
115 | 
116 |   is_duplicated <- duplicated(names(data))
117 |   idx_nm_duplicated <- rlang::set_names(names(data)[is_duplicated],
118 |     which(is_duplicated))
119 |   if (any(is_duplicated)) {
120 |     message(paste0(sum(is_duplicated), " `file_id` named ",
121 |       ifelse(sum(is_duplicated) > 1, "elements", "element"), " of `data` ",
122 |       ifelse(sum(is_duplicated) > 1, "are", "is"),
123 |       " non-unique/duplicated\nand ",
124 |       ifelse(sum(is_duplicated) > 1, "were", "was"), " therefore removed:\n\n",
125 |       idx_nm_duplicated %>%
126 |         purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = "  "),
127 |       "\n\n"))
128 | 
129 |     data <- data[-is_duplicated]
130 |   }
131 | 
132 |   ## Check whether `spc_types` are allowed and present at the second level of
133 |   ## the nested list `data`
134 | 
135 |   spc_types_allowed <- c("spc", "spc_nocomp", "sc_sm", "sc_rf", "ig_sm",
136 |     "ig_rf")
137 |   spc_types_present <- spc_types_chr %in% spc_types_allowed
138 | 
139 |   if (any(!spc_types_present)) {
140 |     options(useFancyQuotes = FALSE)
141 |     stop("The following spectrum types specified in `spc_types` are",
142 |       " not supported:\n\n",
143 |       paste(dQuote(spc_types_chr[!spc_types_present]), collapse = ", ")) }
144 | 
145 |   ## Collect names of spectrum types nested at second `file_id` `data` list
146 |   ## level to check whether all specified `spc_types` are present in `data`;
147 |   ## access sub-element names at that list level
148 | 
149 |   data_types_byfile <- purrr::modify_depth(data, .depth = 1,
150 |     ~ purrr::imap_chr(.x, ~ .y))
151 |   spc_types_byfile_in <- map(data_types_byfile, ~ spc_types_chr %in% .x)
152 | 
153 |   if (any(map_lgl(spc_types_byfile_in, ~ any(!.x)))) {
154 |     which_rm <- map_lgl(spc_types_byfile_in, ~ any(!.x)) %>% which()
155 |     rm_origin <- names(data_origin[names(which_rm)])
156 |     rm_origin_lgl <- names(data_origin) %in% rm_origin
157 |     which_rm_origin <- rlang::set_names(rm_origin, which(rm_origin_lgl))
158 | 
159 |     message(paste0("Spectrum types (second list level names) specified in",
160 |       " `spc_types` were not found\nwithin all first level elements ",
161 |       " of list `data` or are NULL (spectra data by `file_id`).\n",
162 |       "Therefore, all data elements for the corresponding",
163 |       " list indices and `file_id`'s\nwere removed from `data`:\n\n",
164 |       which_rm_origin %>%
165 |         purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = "  "),
166 |       "\n\n"))
167 | 
168 |     data <- data[- which_rm]
169 |   }
170 | 
171 |   ## Extract the spectra of different types by plucking all types specified in
172 |   ## `spc_types_chr` argment into separate first list level elements
173 | 
174 |   spc_mapped <- map(rlang::set_names(spc_types_chr),
175 |     ~ pluck_depth(data = data, .depth = 1, .string = .x))
176 | 
177 |   ## Match and extract the values for the x-axis types
178 | 
179 |   spc_type_x_axis <- list(
180 |     "spc" = c("wavenumbers", "wavelengths", "x_values"),
181 |     "spc_nocomp" = c("wavenumbers", "wavelengths", "x_values"),
182 |     "sc_sm" = c("wavenumbers_sc_sm", "wavelengths_sc_sm", "x_values_sc_sm"),
183 |     "sc_rf" = c("wavenumbers_sc_rf", "wavelengths_sc_rf", "x_values_sc_rf")
184 |   )
185 | 
186 |   lcols_x_values_matching <- purrr::flatten_chr(spc_type_x_axis[spc_types_chr])
187 |   x_values_matching <- lcols_x_values_matching[lcols_x_values_matching
188 |     %in% unique(purrr::flatten_chr(data_types_byfile))]
189 | 
190 |   # `data` has eventually been updated (removed `data` elements)
191 |   data_types_byfile <- purrr::modify_depth(data, .depth = 1,
192 |     ~ purrr::imap_chr(.x, ~ .y))
193 |   x_value_types_byfile_in <- map(data_types_byfile, ~ x_values_matching %in% .x)
194 | 
195 |   if (any(map_lgl(x_value_types_byfile_in, ~ any(!.x)))) {
196 |     which_rm <- map_lgl(x_value_types_byfile_in, ~ any(!.x)) %>% which()
197 |     rm_origin <- names(data_origin[names(which_rm)])
198 |     rm_origin_lgl <- names(data_origin) %in% rm_origin
199 |     which_rm_origin <- rlang::set_names(rm_origin, which(rm_origin_lgl))
200 | 
201 |     message(paste0("These x-axis types (second list level names)",
202 |       " corresponding to \nspecified `spc_types` spectrum types were not",
203 |       " found within\nall first level elements or are NULL",
204 |       " (spectra data by `file_id`) of list `data`: \n\n",
205 |       paste(dQuote(x_values_matching), collapse = ", ")), "\n\n",
206 |       "Therefore, all data elements for the corresponding",
207 |       " list indices\nand `file_id`'s were removed from `data`:\n\n",
208 |       which_rm_origin %>%
209 |         purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = "  "),
210 |       "\n\n")
211 | 
212 |     data <- data[- which_rm]
213 |     spc_mapped <- map(spc_mapped, ~ .x[- which_rm])
214 |   }
215 | 
216 |   x_values_mapped <- map(rlang::set_names(x_values_matching),
217 |     ~ pluck_depth(data = data, .depth = 1, .string = .x))
218 | 
219 |   ## Combine the mapped spectra and correspoding x-axis types, and order
220 |   ## list before returning spectral tibble (`spc_tbl`)
221 | 
222 |   spc_x_axis_types_order <- c("spc", "wavenumbers", "wavelengths", "x_values",
223 |     "spc_nocomp",
224 |     "sc_sm", "wavenumbers_sc_sm", "wavelengths_sc_sm", "x_values_sc_sm",
225 |     "sc_rf", "wavenumbers_sc_rf", "wavelengths_sc_rf", "x_values_sc_rf",
226 |     "ig_sm", "ig_rf")
227 | 
228 |   spc_x_values_mapped <- c(spc_mapped, x_values_mapped)
229 |   spc_x_axis_types_matched <- spc_x_axis_types_order[spc_x_axis_types_order %in%
230 |     names(spc_x_values_mapped)]
231 |   spc_x_values_mapped <- spc_x_values_mapped[spc_x_axis_types_matched]
232 | 
233 |   ## Check if `metadata` elements are present for all list first level elements
234 | 
235 |   # `data` has eventually been updated (removed `data` elements)
236 |   data_types_byfile <- purrr::modify_depth(data, .depth = 1,
237 |     ~ purrr::imap_chr(.x, ~ .y))
238 |   metadata_byfile_in <- map(data_types_byfile, ~ "metadata" %in% .x)
239 | 
240 |   if (any(map_lgl(metadata_byfile_in, ~ any(!.x)))) {
241 |     which_rm <- map_lgl(metadata_byfile_in, ~ any(!.x)) %>% which()
242 |     rm_origin <- names(data_origin[names(which_rm)])
243 |     rm_origin_lgl <- names(data_origin) %in% rm_origin
244 |     which_rm_origin <- rlang::set_names(rm_origin, which(rm_origin_lgl))
245 | 
246 |     message(paste0("`metadata` (second list level names) was not",
247 |       " found within\n all first level elements (`metadata` by `file_id`)",
248 |       " of list `data` or are NULL.\n\n",
249 |     "Therefore, all data elements for the corresponding",
250 |     " list indices\nand `file_id`'s were removed from `data`:\n\n",
251 |     which_rm_origin %>%
252 |       purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = "  "),
253 |         "\n\n"))
254 | 
255 |     data <- data[- which_rm]
256 |     spc_x_values_mapped <- map(spc_x_values_mapped, ~ .x[- which_rm])
257 |   }
258 | 
259 |   ## Extract metadata list elements and combine into tibble
260 | 
261 |   metadata_mapped <- map(rlang::set_names(list("metadata")),
262 |     ~ pluck_depth(data = data, .depth = 1, .string = .x))
263 |   metadata_df <- purrr::map_df(data, "metadata")
264 |   id_tbl <- tibble::as_tibble(
265 |     metadata_df[c("unique_id", "file_id", "sample_id")]
266 |   )
267 | 
268 |   ## Column bind all tibbles of metadata id's, metadata, spectra, and
269 |   ## x-axis type values; return on combined spectral tibble (`spc_tbl`)
270 | 
271 |   spc_tbl <- tibble::as_tibble(spc_x_values_mapped)
272 |   metadata_tbl <- tibble::as_tibble(metadata_mapped)
273 |   dplyr::bind_cols(list(id_tbl, metadata_tbl, spc_tbl))
274 | }
275 | 
276 | 
277 | # Helpers ----------------------------------------------------------------------
278 | 
279 | # Pluck out single elements from a list hierarchy level (`.depth`)
280 | # specified in `.string` based on element names; All elements with the specified
281 | # name will be transposed into a single list named by the element name.
282 | # `pluck_depth` can be combined with `purrr::map` to extract elements of
283 | # different names at all list nodes at the specified level, using e.g.
284 | # map(rlang::set_names(c("spc", "spc_nocomp")),
285 | #   ~ pluck_depth(data = data, .depth = 1, .string = .x))
286 | 
287 | pluck_depth <- function(data, .depth = 1, .string = NULL) {
288 |   purrr::modify_depth(data, .depth = .depth, ~ purrr::pluck(.x, .string))
289 | }
290 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # simplerspec <img src="man/figures/simplerspec-logo.png" align="right" width = "250" />
  2 | 
  3 | <!-- badges: start -->
  4 | [![tic](https://github.com/philipp-baumann/simplerspec/workflows/tic/badge.svg?branch=master)](https://github.com/philipp-baumann/simplerspec/actions)
  5 | [![DOI](https://zenodo.org/badge/67121732.svg)](https://zenodo.org/badge/latestdoi/67121732)
  6 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
  7 | [![runiverse-package simplerspec](https://philipp-baumann.r-universe.dev/badges/simplerspec?scale=1&color=pink&style=round)](https://philipp-baumann.r-universe.dev/simplerspec)
  8 | <!-- badges: end -->
  9 | 
 10 | ## Short description
 11 | 
 12 | The simplerspec package aims to facilitate spectral and additional data handling and model development for spectroscopy applications such as infrared soil spectroscopy. Different helper functions are designed to create a data and modeling workflow. Data inputs and outputs are stored in common S3 `R` objects (`lists` and `data frames`), using in addition [`data.table`](https://rdatatable.gitlab.io/data.table/) and [`tibble`](https://tibble.tidyverse.org/index.html) extensions. The functions are built to work in a pipeline and cover commonly used procedures for spectral model development and application.
 13 | 
 14 | ## Installation
 15 | 
 16 | The newest version of the package is available on this GitHub repository. If you find bugs you are highly welcome to report issues (write me an [email](mailto:info@spectral-cockpit.space) or create an [issue](https://github.com/philipp-baumann/simplerspec/issues)). You can install {simplerspec} from GitHub or directly from the r-universe.
 17 | 
 18 | ```R
 19 | # option 1
 20 | if (!require("remotes")) install.packages("remotes")
 21 | remotes::install_github("philipp-baumann/simplerspec")
 22 | ```
 23 | 
 24 | ```R
 25 | # option 2
 26 | install.packages("simplerspec",
 27 |   repos = c("https://philipp-baumann.r-universe.dev", "https://cloud.r-project.org"))
 28 | ```
 29 | 
 30 | ## Key features
 31 | 
 32 | The current version of the package features among others the following functions:
 33 | 
 34 | 1. [`read_opus_univ()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/read-opus-universal.R): Read spectra and metadata from Bruker OPUS binary files into R list
 35 | 2. [`gather_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/gather-spc.R): Gather spectra and metadata from list into a tibble object (list-columns)
 36 | 4. [`resample_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/resample-spc.R): Resample spectra to new wavenumber intervals
 37 | 2. [`average_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/average-spc.R): Average spectra for replicate scans
 38 | 5. [`preprocess_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/preprocess-spc.R): Perform pre-processing of spectra
 39 | 6. [`select_spc_vars()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/select-spc.R): Select every `n`-th spectral variable and corresponding X-unit values.
 40 | 7. [`join_spc_chem()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/join-chem-spectra.R): Join chemical and spectral data sets by `sample_id`
 41 | 8. [`plot_spc_ext()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/plot-spc-extended.R): Extended spectral plotting; e.g. group spectra using different
 42 | panels or color spectra based on chemical reference values to explore trends.
 43 | 9. [`fit_pls()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/pls-modeling.R): Perform model tuning and evaluation based on Partial Least Squares (PLS) regression
 44 | 10. [`select_ref_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/select-ref-spectra.R): Select a set of reference samples to measured by
 45 | traditional analysis methods when no a priori sample data except spectra are 
 46 | available (based on Kennard-Stones sampling)
 47 | 11. [`predict_from_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/predict-spc.R): Predict multiple chemical properties from a list of calibrated models and new soil spectra
 48 | 12. [`assess_multimodels()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/utils-stats.R): Assess model performance given multiple pairs of predicted and measured variables.
 49 | 
 50 | ## Cheatsheet
 51 | 
 52 | <a href="https://github.com/philipp-baumann/spc-proc-concepts/blob/master/img/simplerspec_cheatsheet_crop.pdf"><img src="https://github.com/philipp-baumann/spc-proc-concepts/blob/master/img/simplerspec_cheatsheet.png" width="630"/></a>
 53 | 
 54 | ## Motivation and key concepts
 55 | 
 56 | Many R packages are available to do tasks in spectral modeling such as pre-processing of spectral data. The motivation to create this package was:
 57 | 
 58 | 1. Avoid repetition of code in model development (common source of errors).
 59 | 2. Provide a reproducible data analysis workflow for FT-IR spectroscopy.
 60 | 3. R packages are an ideal way to organize and share R code.
 61 | 4. Make soil FT-IR spectroscopy modeling accessible to people that have basic R knowledge.
 62 | 5. Provide an integrated data-model framework that features tidy data structures designed for both user-friendly printing and efficient data processing.
 63 | 
 64 | This package builds mainly upon functions from the following R packages:
 65 | 
 66 | * [`prospectr`](https://cran.r-project.org/web/packages/prospectr/index.html): Various utilities for pre-processing and sample selection based on spectroscopic data. An introduction to the package with examples can be found [here](https://l-ramirez-lopez.r-universe.dev/articles/prospectr/prospectr.html).
 67 | * `plyr` and [`dplyr`](https://dplyr.tidyverse.org): Fast data manipulation tools with an unified interface.
 68 | * `ggplot2 `: Alternative plotting system for R, based on the grammar of graphics. See [here](https://ggplot2.tidyverse.org).
 69 | * `caret `: Classification and regression training. A set of functions that attempt to streamline the process for creating predictive models. See [here](https://topepo.github.io/caret/) for details.
 70 | 
 71 | Consistent and reproducible data and metadata management is an important prerequisite for spectral model development. Therefore, simplerspec functions are based on storing spectral data and related data in R data structures which keep related data in rows. Every row representing an observation contains data related to a single spectral measurement. Simplerspec functions uses tibble data frames as principal data structures because they allow to store lists within the well-known data frame structures. Lists are flexible data structures and can e.g. contain other lists, vectors, data.frames, or matrices.
 72 | 
 73 | List-columns features provided within the tibble framework are an excellent base to work with functional programming tools in R, which allows to efficiently write code. 
 74 | Simplerspec internally uses popular functional programming extension tools provided
 75 | by the [`purrr`](https://purrr.tidyverse.org/) package for processing and transforming spectra. 
 76 | For learning more, I would recommend
 77 | [this nice purrr list-column tutorial](https://github.com/jennybc/purrr-tutorial/tree/gh-pages) 
 78 | provided by Jenny Brian. Further, simplerspec well integrates with the 
 79 | data processing API provided by the dplyr package, which makes spectroscopic
 80 | analysis tidy and easy to understand.
 81 | 
 82 | ## Example workflow
 83 | 
 84 | Bruker FTIR spectrometers produce binary files in the OPUS format that can contain different types of spectra and many parameters such as instrument type and settings that were used at the time of data acquisition and internal processing (e.g. Fourier transform operations). Basically, the entire set of setup measurement parameters, selected spectra, supplementary metadata such as the time of measurement are written into OPUS binary files. In contrast to simple text files that contain only plain text with a defined character encoding, binary files can contain any type of data represented as sequences of bytes (a single byte is sequence of 8 bits and 1 bit either represents 0 or 1).
 85 | 
 86 | Simplerspec comes with reader function `read_opus_univ()` that is intended to be a universal Bruker OPUS file reader that extracts spectra and key metadata from files. Usually, one is mostly interested to extract the final absorbance spectra (shown as *AB* in the OPUS viewer software).
 87 | 
 88 | ```R
 89 | # Load simplerspec package for spectral model development wrapper functions
 90 | library(simplerspec)
 91 | # Load tidyverse packages: loads packages frequently used for data manipulation,
 92 | # data tidying, import, and plotting
 93 | library(tidyverse)
 94 | 
 95 | ################################################################################
 96 | ## Part 1: Read and pre-process spectra, read chemical data, and join
 97 | ## spectral and chemical data sets
 98 | ################################################################################
 99 | 
100 | ## Read spectra in list ========================================================
101 | 
102 | # List of OPUS binary spectra files
103 | lf <- dir("data/spectra/soilspec_eth_bin", full.names = TRUE)
104 | 
105 | # Read spectra from files into R list
106 | spc_list <- read_opus_univ(fnames = lf, extract = c("spc"))
107 | # Returns messages:
108 | #> Extracted spectra data from file: <BF_lo_01_soil_cal.0>
109 | #> Extracted spectra data from file: <BF_lo_01_soil_cal.1>
110 | #> Extracted spectra data from file: <BF_lo_01_soil_cal.2>
111 | #> Extracted spectra data from file: <BF_lo_02_soil_cal.0>
112 | #> ...
113 | ```
114 | 
115 | Pipes can make R code more readable and allows step-wise data processing
116 | when developing spectral models. The pipe operator (`%>%`, called "then") is a new operator in R that was introduced
117 | with the magrittr package. It facilitates readability of code
118 | and avoids to type intermediate objects. The basic behavior of
119 | the pipe operator is
120 | that the object on the left hand side is passed as the first argument
121 | to the function on the right hand side. When loading the tidyverse packages, the
122 | pipe operator is attached to the current R session.
123 | More details can be found [here](https://magrittr.tidyverse.org).
124 | 
125 | The model development process can be quickly coded as the example below illustrates:
126 | 
127 | ```R
128 | ## Spectral data processing pipe ===============================================
129 | 
130 | soilspec_tbl <- spc_list %>%
131 |   # Gather list of spectra data into tibble data frame
132 |   gather_spc() %>% 
133 |   # Resample spectra to new wavenumber interval
134 |   resample_spc(wn_lower = 500, wn_upper = 3996, wn_interval = 2) %>%
135 |   # Average replicate scans per sample_id
136 |   average_spc() %>%
137 |   # Preprocess spectra using Savitzky-Golay first derivative with a window size
138 |   # of 21 points
139 |   preprocess_spc(select = "sg_1_w21")
140 |   
141 | soilspec_tbl
142 | # A tibble: 284 x 11
143 | #>                                 unique_id             file_id         sample_id
144 | #>                                     <chr>               <chr>             <chr>
145 | #> 1 BF_lo_01_soil_cal.0_2015-11-06 14:34:10 BF_lo_01_soil_cal.0 BF_lo_01_soil_cal
146 | #> 2 BF_lo_01_soil_cal.1_2015-11-06 14:38:14 BF_lo_01_soil_cal.1 BF_lo_01_soil_cal
147 | #> 3 BF_lo_01_soil_cal.2_2015-11-06 14:40:55 BF_lo_01_soil_cal.2 BF_lo_01_soil_cal
148 | #> 4 BF_lo_02_soil_cal.0_2015-11-06 17:27:55 BF_lo_02_soil_cal.0 BF_lo_02_soil_cal
149 | #> 5 BF_lo_02_soil_cal.1_2015-11-06 17:30:19 BF_lo_02_soil_cal.1 BF_lo_02_soil_cal
150 | #> 6 BF_lo_02_soil_cal.2_2015-11-06 17:32:47 BF_lo_02_soil_cal.2 BF_lo_02_soil_cal
151 | #> 7 BF_lo_03_soil_cal.0_2015-11-09 11:32:55 BF_lo_03_soil_cal.0 BF_lo_03_soil_cal
152 | #> 8 BF_lo_03_soil_cal.1_2015-11-09 11:35:26 BF_lo_03_soil_cal.1 BF_lo_03_soil_cal
153 | #> 9 BF_lo_03_soil_cal.2_2015-11-09 11:38:08 BF_lo_03_soil_cal.2 BF_lo_03_soil_cal
154 | #> 10 BF_lo_04_soil_cal.0_2015-11-06 10:36:13 BF_lo_04_soil_cal.0 BF_lo_04_soil_cal
155 | #> # ... with 274 more rows, and 8 more variables: spc <list>, wavenumbers <list>,
156 | #> #   metadata <list>, spc_rs <list>, wavenumbers_rs <list>, spc_mean <list>,
157 | #> #   spc_pre <list>, xvalues_pre <list>
158 |   
159 | 
160 | ## Read chemical reference data and join with spectral data ====================
161 | 
162 | # Read chemical reference analysis data
163 | soilchem_tbl <- read_csv(file = "data/soilchem/soilchem_yamsys.csv")
164 | #> Parsed with column specification:
165 | #> cols(
166 | #>   .default = col_double(),
167 | #>   sample_ID = col_character(),
168 | #>   country = col_character(),
169 | #>   site = col_character(),
170 | #>   material = col_character(),
171 | #>   site_comb = col_character()
172 | #> )
173 | #> See spec(...) for full column specifications.
174 | 
175 | # Join spectra tibble and chemical reference analysis tibble
176 | spec_chem <- join_spc_chem(
177 |   spc_tbl = soilspec_tbl, chem_tbl = soilchem_tbl, by = "sample_id")
178 | #> Joining, by = "sample_id"
179 | 
180 | ################################################################################
181 | ## Part 2: Run PLS regression models for different soil variables
182 | ################################################################################
183 | 
184 | # Example Partial Least Squares (PLS) Regression model for total Carbon (C)
185 | # Use repeated k-fold cross-validation to tune the model (choose optimal 
186 | # number of PLS components) and estimate model performance on hold-out 
187 | # predictions of the finally chosen model (model assessment).
188 | # This allows to use the entire set for both model building and evaluation;
189 | # recommended for small data sets
190 | pls_C <- fit_pls(
191 |   # remove rows with NA in the data
192 |   spec_chem = spec_chem[!is.na(spec_chem$C), ],
193 |   response = C,
194 |   evaluation_method = "resampling",
195 |   tuning_method = "resampling",
196 |   resampling_method = "rep_kfold_cv",
197 |   pls_ncomp_max = 7 # maximum number of PLS components tested during tuning
198 | ) 
199 | ```
200 | 
201 | ## Projects using simplerspec
202 | 
203 | * [Spectral platform for soil samples of the Democratic Republic of Congo](https://sae-interactive-data.ethz.ch/simplerspec.drc/)
204 | 
205 | ## Package help
206 | 
207 | After successfully installing simplerspec, you can use the R build-in help
208 | using `?simplerspec::<fun_name>`
209 | 
210 | ## Like it?
211 | 
212 | [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/specphil)
213 | 
214 | ## Credits
215 | 
216 | I would like to thank the following people for the inspiration by concepts, code and packages:
217 | 
218 | * Antoine Stevens and Leonardo Ramirez-Lopez for their contributions to the [prospectr package](https://l-ramirez-lopez.r-universe.dev/prospectr) and the
219 | *Guide to Diffuse Reflectance Spectroscopy & Multivariate Calibration*
220 | * Andrew Sila, Tomislav Hengl, and Thomas Terhoeven-Urselmans for the [`read.opus()`](https://github.com/cran/soil.spec/blob/master/R/read.opus.R)
221 | function from the [soil.spec](https://cran.r-project.org/src/contrib/Archive/soil.spec/) package developed at ICRAF.
222 | * [Hadley Wickham](https://hadley.nz) for his work and concepts on data science within R
223 | * [Max Kuhn](https://github.com/topepo) for the creation of the caret package and for his excellent teaching
224 | materials on [applied predictive modeling](https://link.springer.com/book/10.1007/978-1-4614-6849-3)
225 | 


--------------------------------------------------------------------------------
/R/plot-spc-extended.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | ## Helper functions to gather spectra, corresponding x-value vectors,
  3 | ## metadata and measure columns (e.g. chemical reference data) from tibble
  4 | ## list-columns into a single data.table or a list of data.tables conntaining
  5 | ## long form data directly to be used for customized ggplot2 plotting functions
  6 | ################################################################################
  7 | 
  8 | # bind a list-column in a tibble to a list of data.tables ----------------------
  9 | 
 10 | #' @title Bind list-columns within a tibble into a list of data.tables
 11 | #' @description Bind one to many list-columns in spectral tibble into a list
 12 | #' of data.tables.
 13 | #' @param spc_tbl Spectral data in a tibble data frame (classes "tibble_df",
 14 | #' "tbl" and "data.frame").
 15 | #' @param lcols Character vector of column names of list-columns to be bound
 16 | #' into a list of data.tables
 17 | #' @param spc_id Character vector denoting column name for a unique spectrum ID.
 18 | #' Default is \code{"unique_id"}.
 19 | #' @param group_id Character vector denoting column name for the spectrum group
 20 | #' ID. Default is \code{"sample_id"}. The group ID can later be used for
 21 | #' plotting spectra by group (e.g. by using different colors or panels).
 22 | #' @return A list of data.tables. Elements contain data from list-columns
 23 | #' specified in \code{lcols} argument as data.tables. All data.tables contain in
 24 | #' addition \code{spc_id} and \code{group_id} columns.
 25 | #' @export
 26 | bind_lcols_dts <- function(spc_tbl, lcols,
 27 |                            spc_id = "unique_id",
 28 |                            group_id = "sample_id") {
 29 | 
 30 |   # todo: add warning for lcols not present in spc_tbl
 31 |   which_bind <- colnames(spc_tbl) %in% lcols
 32 |   lcols_to_bind <- colnames(spc_tbl)[which_bind]
 33 |   names(lcols_to_bind) <- lcols_to_bind
 34 |   dts <- purrr::map(lcols_to_bind,
 35 |     function(y) {
 36 |       if (is.list(spc_tbl[, y][[y]])) {
 37 |         # todo: Test if number of columns is equal in each data.frame or matrix
 38 |         # of the list-(column); if not, return a comprehensible error
 39 |         data.table::data.table(do.call(rbind, spc_tbl[, y][[y]]))
 40 |       } else if (is.atomic((spc_tbl[, y][[y]]))) {
 41 |         data.table::data.table(spc_tbl[, y])
 42 |       }
 43 |     }
 44 |   )
 45 |   # Append IDs to data.tables in list
 46 |   spc_id <- spc_tbl[, spc_id][[spc_id]]
 47 |   lcol_types <- purrr::imap(dts, ~ rep(.y, nrow(spc_tbl)))
 48 |   group_id <- as.character(spc_tbl[, group_id][[group_id]])
 49 | 
 50 |   # Return list of data.tables
 51 |   purrr::imap(dts, function(dt, nm) {
 52 |     dt[, `:=` (spc_id = spc_id, group_id = group_id)]
 53 |     dt[, `:=` (lcol_type = lcol_types[[nm]])]}
 54 |   )
 55 | }
 56 | 
 57 | 
 58 | # Convert list of wide form data.tables into long form -------------------------
 59 | 
 60 | dts_to_long <- function(spc_tbl, lcols,
 61 |                         spc_id = "unique_id",
 62 |                         group_id = "sample_id",
 63 |                         variable_name = "variable",
 64 |                         value_name = "value") {
 65 | 
 66 |   dts <- bind_lcols_dts(spc_tbl = spc_tbl, lcols = lcols,
 67 |     spc_id = spc_id, group_id = group_id)
 68 |   # Convert list of data.tables into long form
 69 |   dts_long <- purrr::map(dts, function(x) {
 70 |     data.table::melt(
 71 |       x,
 72 |       id.vars = c("spc_id", "lcol_type", "group_id"),
 73 |       variable.factor = FALSE,
 74 |       variable.name = variable_name,
 75 |       value.name = value_name
 76 |     )}
 77 |   )
 78 |   # Append unique id (idx lg:= index 'long') for long form
 79 |   purrr::imap(dts_long,
 80 |     function(dt_long, nm) {
 81 |       dt_long[, `:=` (id_long = 1:nrow(dt_long))]
 82 |     }
 83 |   )
 84 | }
 85 | 
 86 | 
 87 | # Match the spectra list columns and corresponding xunit list columns ----------
 88 | 
 89 | match_lcols <- function(spc_tbl, lcols) {
 90 | 
 91 |   # Determine to which spectrum types list-columns belong
 92 |   lcols_spc_all <- c("spc",  "spc_rs", "spc_mean", "spc_nocomp", "sc_sm",
 93 |     "sc_rf", "spc_pre")
 94 |   xvalue_lookup <- list(
 95 |     "spc" = c("wavenumbers", "wavelengths"),
 96 |     "spc_rs" = c("wavenumbers_rs", "wavelengths_rs"),
 97 |     "spc_mean" = c("wavenumbers_rs", "wavelengths_rs"),
 98 |     "spc_nocomp" = c("wavenumbers", "wavelengths"),
 99 |     "sc_sm" = c("wavenumbers_sc_sm"),
100 |     "sc_rf" = c("wavenumbers_sc_rf"),
101 |     "spc_pre" = c("xvalues_pre")
102 |   )
103 |   # Create character vector of spectra type names
104 |   spc_matched <- lcols[lcols %in% lcols_spc_all]
105 |   spc_matched <- spc_matched[order(match(spc_matched, lcols_spc_all))]
106 |   # Create vector of corresponding xunit types in predefined order
107 |   xvalues <- unlist(xvalue_lookup[spc_matched])
108 |   xvalues_matched <- colnames(spc_tbl)[colnames(spc_tbl) %in% xvalues]
109 |   xvalues_matched <- xvalues_matched[order(match(xvalues_matched, xvalues))]
110 |   # Return all matches as list of character vectors for spectra and x-values
111 |   list(
112 |     spc_matched = spc_matched,
113 |     xvalues_matched = xvalues_matched
114 |   )
115 | }
116 | 
117 | 
118 | # Create a list of long form data.tables containing spectra and x-values for
119 | # a set of spectral types ------------------------------------------------------
120 | 
121 | tolist_spc_xvalues <- function(spc_tbl, lcols_spc,
122 |                       spc_id = "unique_id",
123 |                       group_id = "sample_id",
124 |                       variable_name = "variable",
125 |                       value_name = "value") {
126 | 
127 |   lcols_matched <- match_lcols(spc_tbl = spc_tbl, lcols = lcols_spc)
128 |   # Check if length of matched spectra and xunits is equal and if not,
129 |   # return an error
130 |   spc_types <- lcols_matched[["spc_matched"]]
131 |   xunit_types <- lcols_matched[["xvalues_matched"]]
132 | 
133 |   # Gather different spectra types into list of data.tables
134 |   spc_dts <- dts_to_long(spc_tbl = spc_tbl,
135 |     lcols = spc_types, spc_id = spc_id, group_id = group_id,
136 |     variable_name = "spc_variable", value_name = "spc_value")
137 |   # Gather corresponding xunit types into list of data.tables
138 |   xvalues_dts <- dts_to_long(spc_tbl = spc_tbl,
139 |     lcols = xunit_types, spc_id = spc_id, group_id = group_id,
140 |     variable_name = "xvalues_variable", value_name = "xvalues_value")
141 | 
142 |   # Rename lcol_type to spc_type only for spectra data.tables
143 |   spc_dts <- purrr::map(spc_dts,
144 |     function(x) data.table::setnames(x, "lcol_type", "spc_type"))
145 |   # Return data.tables in nested list
146 |   list(
147 |     "spc_dts" = spc_dts,
148 |     "xvalues_dts" = xvalues_dts
149 |   )
150 | }
151 | 
152 | 
153 | # Merge data tables of spectra, xunits, metadata and measured variables
154 | # into a single long form data.table -------------------------------------------
155 | 
156 | #' @title Merge list-columns of spectra, x-axis values, metadata and additional
157 | #' measured variables into a single long form data.table
158 | #' @description Helper function that merges all spectra and related data into
159 | #' a single long form data.table than can subsequently be used for plotting.
160 | #' @param spc_tbl Tibble data frame containing spectra, x-axis values, metadata
161 | #' and eventual measured variables as list-columns.
162 | #' @param lcols_spc Character vector of spectral list-columns to be extracted.
163 | #' Default is \code{c("spc", "spc_pre")} (raw and preprocessed spectra).
164 | #' @param lcol_measure Character vector of length 1 denoting the column name
165 | #' of the measure columns. This argument is optional. Default is \code{NULL},
166 | #' which does not extract an additional measure column.
167 | #' @param spc_id Character vector of column that contains a unique spectral
168 | #' identifier for all spectra. Default is \code{"unique_id"}.
169 | #' @param group_id Character vector of columns that is used assigning spectra
170 | #' into groups. Default is \code{"sample_id"}. The \code{group_id} can be
171 | #' used for later plotting and thereby visually separating spectral groups into
172 | #' using different colors or panels.
173 | #' @return A single data.table containing long form aggregated data of
174 | #' spectra, x-axis values, metadata and an additionally measured variable.
175 | #' @export
176 | merge_dts <- function(spc_tbl,
177 |                       lcols_spc = c("spc", "spc_pre"), lcol_measure = NULL,
178 |                       spc_id = "unique_id",
179 |                       group_id = "sample_id") {
180 | 
181 |   id_long <- NULL
182 |   spc_xvalues <- tolist_spc_xvalues(spc_tbl = spc_tbl,
183 |     lcols_spc = lcols_spc, spc_id = spc_id, group_id = group_id)
184 |   # Set keys for merging list of data.tables for spectra and xunits
185 |   purrr::imap(
186 |     spc_xvalues,
187 |     function(dts, nm) purrr::map(dts[[nm]],
188 |       function(x) data.table::setkey(x = x, spc_id, id_long, group_id))
189 |   )
190 |   spc_xvalues <- purrr::map2(spc_xvalues[["spc_dts"]],
191 |     spc_xvalues[["xvalues_dts"]], merge)
192 | 
193 |   # Bind metadata if present, and set keys for merging metadata to spectra
194 |   metadata <- bind_lcols_dts(spc_tbl = spc_tbl,
195 |     lcols = "metadata", spc_id = spc_id, group_id = group_id)
196 |   dts <- list(
197 |     "data" = spc_xvalues,
198 |     "metadata" = rep(metadata, length(spc_xvalues))
199 |   )
200 |   if (length(metadata) == 0) dts$metadata <- NULL
201 | 
202 |   # Convert a "measure" tibble column (numeric|character) to list of data.tables
203 |   if (!is.null(lcol_measure)) {
204 |     measure <- bind_lcols_dts(spc_tbl = spc_tbl,
205 |       lcols = lcol_measure, spc_id = spc_id, group_id = group_id)
206 |     dts$measure <- rep(measure, length(spc_xvalues))
207 |   }
208 |   # Set keys (common columns), merge metadata with spectral data (list of
209 |   # data tables) and combine into a single data.table that is returned
210 |   purrr::imap(dts,
211 |     function(dt, nm) lapply(dts[[nm]],
212 |       function(x) data.table::setkey(x = x, spc_id, group_id))
213 |   )
214 |   # Merge multiple data.table by common keys
215 |   # https://gist.github.com/reinholdsson/67008ee3e671ff23b568
216 |   data.table::rbindlist(
217 |     lapply(seq_along(dts[[1]]),
218 |       function(i) Reduce(merge, lapply(dts, `[[`, i)))
219 |   )
220 | }
221 | 
222 | 
223 | # Wrapper function around merge_dts for list of tibbles to aggregate data for
224 | # plotting ---------------------------------------------------------------------
225 | 
226 | #' @title Wrapper function around \code{merge_dts()} for list of tibbles to
227 | #' aggregate data for plotting.
228 | #' @description Instead of a single spectral tibble (data frame) multiple
229 | #' spectral tibbles can be merged into a long-form data.table for plotting
230 | #' spectra and related data. For details, see
231 | #' \code{\link{merge_dts}}.
232 | #' @param spc_tbl_l List of spectral tibbles (data frames).
233 | #' @param lcols_spc Character vector of spectral list-columns to be extracted.
234 | #' Default is \code{c("spc", "spc_pre")} (raw and preprocessed spectra).
235 | #' @param lcol_measure Character vector of length 1 denoting the column name
236 | #' of the measure columns. This argument is optional. Default is \code{NULL},
237 | #' which does not extract an additional measure column.
238 | #' @param spc_id Character vector of column that contains a unique spectral
239 | #' identifier for all spectra. Default is \code{"unique_id"}.
240 | #' @param group_id Character vector of columns that is used assigning spectra
241 | #' into groups. Default is \code{"sample_id"}. The \code{group_id} can be
242 | #' used for later plotting and thereby visually separating spectral groups into
243 | #' using different colors or panels.
244 | #' @return A single data.table containing long form aggregated data of
245 | #' spectra, x-axis values, metadata and an additionally measured variable.
246 | #' An additional column called \code{group_id_tbl} is appended. It denotes
247 | #' the name of the spectral tibble supplied with the list \code{spc_tbl_l}.
248 | #' @export
249 | merge_dts_l <- function(spc_tbl_l,
250 |                         lcols_spc = c("spc", "spc_pre"),
251 |                         lcol_measure = NULL,
252 |                         spc_id = "unique_id",
253 |                         group_id = "sample_id") {
254 | 
255 |   group_id_tbl <- NULL
256 | 
257 |   dts <- lapply(seq_along(spc_tbl_l),
258 |     function(i) merge_dts(spc_tbl = spc_tbl_l[[i]],
259 |       lcols_spc = lcols_spc, lcol_measure = lcol_measure,
260 |       spc_id = spc_id, group_id = group_id))
261 |   dts <- lapply(seq_along(dts),
262 |     function(i) dts[[i]][, group_id_tbl := names(spc_tbl_l[i])])
263 |   data.table::rbindlist(dts)
264 | }
265 | 
266 | 
267 | ## Create plotting functions based on complete long data.table =================
268 | 
269 | # Function that reorders factor column in data.table based on ascending numeric
270 | # order when converted to numeric type
271 | # https://stackoverflow.com/questions/15665535/reorder-factors-numerically-in-a-data-frame
272 | # ------------------------------------------------------------------------------
273 | reorder_factor_num <- function(dt, column = "group_id") {
274 |   group_id <- NULL
275 |   dt[, group_id := as.factor(group_id)]
276 |   sorted_labels <- paste(sort(as.numeric(levels(dt$group_id))))
277 |   dt$group_id <- factor(dt$group_id, levels = sorted_labels)
278 |   dt
279 | }
280 | 
281 | 
282 | # Custom ggplot2 labeller for spectra types ------------------------------------
283 | 
284 | relabel_spc_types <- function(lb_sc_sm = "Reflectance sample (<ScSm>)",
285 |                               lb_sc_rf = "Reflectance reference (<ScRf>)",
286 |                               lb_ig_sm = "Interferogram sample (<IgSm>)",
287 |                               lb_ig_rf = "Interferogram reference (<IgRf>)",
288 |                               lb_spc_nocomp = "Abs. before atm. comp.",
289 |                               lb_spc = "Absorbance",
290 |                               lb_spc_rs = "Resampled Abs.",
291 |                               lb_spc_mean = "Mean Abs.",
292 |                               lb_spc_pre = "Preprocessed Abs.") {
293 |   ggplot2::as_labeller(
294 |     x = c(
295 |       "sc_sm" = lb_sc_sm,
296 |       "sc_rf" = lb_sc_rf,
297 |       "ig_sm" = lb_ig_sm,
298 |       "spc_nocomp" = lb_spc_nocomp,
299 |       "spc" = lb_spc,
300 |       "spc_rs" = lb_spc_rs,
301 |       "spc_mean" = lb_spc_mean,
302 |       "spc_pre" = lb_spc_pre
303 |     )
304 |   )
305 | }
306 | 
307 | 
308 | # Main spectra explorative analysis and diagnostics plotting function ----------
309 | 
310 | #' @title ggplot2 wrapper for extended spectra plotting
311 | #' @description \code{plot_spc_ext} is a custom plotting function developed
312 | #' within the simplerspec framework. Returns plots based on ggplot2
313 | #' (class "ggplot"). Different spectra types such as raw or preprocessed spectra
314 | #' and groups can be differentiated by different colors or by using panels
315 | #' (so called facets). Additionally, spectra can be colored based on an
316 | #' additional measure variable, e.g. determined by chemical reference analysis.
317 | #' @param spc_tbl Tibble data frame containing spectra, x-axis values, metadata
318 | #' and eventual measured variables as list-columns.
319 | #' @param spc_tbl_l List of spectral tibbles (data frames). Default is
320 | #' \code{NULL} (argument is not used).
321 | #' @param lcols_spc Character vector of spectral list-columns to be extracted.
322 | #' Default is \code{"spc"} (raw spectra).
323 | #' @param lcol_measure Character vector of length 1 denoting the column name
324 | #' of the measure columns. This argument is optional. Default is \code{NULL},
325 | #' which does not extract an additional measure column.
326 | #' @param lcol_measure_col_palette Palette value supplied to
327 | #' `ggplot::scale_colour_brewer()`. Default is `"Spectral"`, but you can set
328 | #' it to the default argument `1` (will use
329 | #' `scale_colour_brewer(..., palette = 1)`).
330 | #' @param lcol_measure_col_direction Sets the the order of colours in the scale
331 | #' that is based on a measure column. Default is \code{-1} which reverses the
332 | #' scale. Argument is passed on to the function `ggplot2::sclae_colour_brewer()`
333 | #' as argument `direction`.
334 | #' @param spc_id Character vector denoting column name for a unique spectrum ID.
335 | #' Default is \code{"unique_id"}.
336 | #' @param group_id Character vector denoting column name for the spectrum group
337 | #' ID. Default is \code{"sample_id"}. The group ID is used for
338 | #' plotting spectra by group (e.g. by using different colors or panels).
339 | #' @param group_id_order Logical that specifies whether the panel names
340 | #' derived from a numeric \code{group_id} column are reordered using ascending
341 | #' numbers. Default is \code{TRUE}.
342 | #' @param group_color Logical defining whether spectra are colored by the column
343 | #' specified by \code{group_id}.
344 | #' @param group_color_palette Character (1L) defining the diverging colour
345 | #' scales from colorbrewer.org; see `?scale_colour_brewer` for supported
346 | #' diverging colur types (`palette` argument).
347 | #' @param group_panel Logical defining whether spectra are arranged into panels
348 | #' by groups specified in \code{group_id}. Default is \code{TRUE}.
349 | #' @param group_legend Logical defining whether a legend for the \code{group_id}
350 | #' is plotted. Default is \code{FALSE}.
351 | #' @param ncol Integer vector of length 1. Defines number of columns when
352 | #' plotting panels (facets). Default is \code{NULL} (argument not used).
353 | #' @param relabel_spc Logical defining whether panels are relabeled with custom
354 | #' names for spectra types. Default is TRUE. When \code{TRUE}, arguments
355 | #' from \code{relabel_spc_types} can be passed to \code{plot_spc_ext}
356 | #' (supported via the \code{...} (ellipsis) argument)
357 | #' @param ylab Character vector or vector of type \code{"expression"} created by
358 | #' mathematical expression created by \code{expression}. Custom annotation for
359 | #' y-axis of spectra
360 | #' @param alpha Integer of length 1, from 0 to 1. Defines transparency of
361 | #' spectral lines. Default is \code{0.5} (0 is completely transparent and
362 | #' 1 is no transparency).
363 | #' @param line_width Numeric vector of length 1 specifying the width of the
364 | #' spectral lines. Default is \code{0.2}.
365 | #' @param ... Further arguments to be passed to \code{plot_spc_ext}. Currently,
366 | #' arguments of \code{relabel_spc_types} are supported.
367 | #' @return Object of class \code{"ggplot"} (ggplot2 graph).
368 | #' @export
369 | plot_spc_ext <- function(spc_tbl, spc_tbl_l = NULL,
370 |                          lcols_spc = "spc",
371 |                          lcol_measure = NULL,
372 |                          lcol_measure_col_palette = "Spectral",
373 |                          lcol_measure_col_direction = -1,
374 |                          spc_id = "unique_id",
375 |                          group_id = "sample_id", group_id_order = TRUE,
376 |                          group_color = TRUE,
377 |                          group_color_palette = NULL,
378 |                          group_panel = TRUE,
379 |                          group_legend = FALSE,
380 |                          ncol = NULL,
381 |                          relabel_spc = TRUE,
382 |                          ylab = "Spectrum value",
383 |                          alpha = 0.5, line_width = 0.2,
384 |                          # Further arguments to be passed to functions called
385 |                          # within this function
386 |                          ...) {
387 | 
388 |   # Merge spectral data, additional (measurement data) and metadata into a
389 |   # single long-form data.table
390 |   if (!is.null(spc_tbl_l)) {
391 |     dt <- merge_dts_l(spc_tbl_l = spc_tbl_l,
392 |       lcols_spc = lcols_spc, lcol_measure = lcol_measure,
393 |       spc_id = spc_id, group_id = group_id) # see merge_dts_l wrapper function
394 |   } else {
395 |     dt <- merge_dts(spc_tbl = spc_tbl,
396 |       lcols_spc = lcols_spc, lcol_measure = lcol_measure,
397 |       spc_id = spc_id, group_id = group_id)
398 |   }
399 |   # Option to order originally numeric group_id factors by group
400 |   if (is.null(spc_tbl_l)) {
401 |     if (group_id_order && is.numeric(dplyr::pull(spc_tbl, !!group_id))) {
402 |     dt <- reorder_factor_num(dt = dt, column = "group_id")
403 |     }
404 |   }
405 |   brk <- pretty(dt[["xvalues_value"]], n = 10) # Pretty x-axis breaks
406 |   p <- ggplot2::ggplot(data = dt,
407 |     ggplot2::aes_string(x = "xvalues_value", y = "spc_value"))
408 | 
409 |   if (group_color == TRUE && is.null(lcol_measure)) {
410 |     p <- p +
411 |       ggplot2::geom_line(ggplot2::aes_string(colour = "group_id",
412 |         group = "spc_id"),
413 |         alpha = alpha, size = line_width)
414 |       if (!is.null(group_color_palette)) {
415 |         p <- p +
416 |           ggplot2::scale_colour_brewer(type = "div",
417 |             palette = group_color_palette, direction = -1)
418 | 
419 |       }
420 |     if (group_legend == FALSE) {
421 |       p <- p + ggplot2::guides(colour = FALSE)
422 |     }
423 |   } else if (group_color == FALSE && is.null(lcol_measure)) {
424 |     p <- p + ggplot2::geom_line(
425 |       ggplot2::aes_string(group = "spc_id"),
426 |         alpha = alpha, size = line_width)
427 |   }
428 | 
429 |   if (!is.null(lcol_measure)) {
430 |     p <- p + ggplot2::geom_line(
431 |       ggplot2::aes_string(colour = lcol_measure, group = "spc_id",
432 |         x = "xvalues_value", y = "spc_value"),
433 |         alpha = alpha, size = line_width, inherit.aes = FALSE) +
434 |       ggplot2::scale_colour_distiller(palette = lcol_measure_col_palette,
435 |         direction = lcol_measure_col_direction)
436 |   }
437 | 
438 |   # Plot different spectral types and group_id in panels
439 |   if (group_panel && length(lcols_spc) > 1) {
440 |     if (relabel_spc) {
441 |       lbl <- relabel_spc_types(...) # see this function for arguments and values
442 |       p <- p + ggplot2::facet_grid(spc_type ~ group_id, scales = "free",
443 |         labeller = ggplot2::labeller(spc_type = lbl))
444 |     } else {
445 |       p <- p + ggplot2::facet_grid(spc_type ~ group_id, scales = "free")
446 |     }
447 |   }
448 | 
449 |   if (group_panel && length(lcols_spc) == 1) {
450 |     p <- p + ggplot2::facet_wrap(~ group_id, ncol = ncol, scales = "free")
451 |   }
452 |   # Special case when list of tibbles are supplied
453 |   if (group_panel && !is.null(spc_tbl_l)) {
454 |     p <- ggplot2::ggplot(data = dt,
455 |       ggplot2::aes_string(x = "xvalues_value", y = "spc_value")) +
456 |       ggplot2::geom_line(ggplot2::aes_string(colour = "group_id_tbl",
457 |         group = "spc_id"), alpha = alpha, size = line_width)
458 |     if (relabel_spc == TRUE) {
459 |       lbl <- relabel_spc_types(...)
460 |       p <- p + ggplot2::facet_grid(spc_type ~ group_id, scales = "free",
461 |         labeller = ggplot2::labeller(spc_type = lbl))
462 |     } else if (relabel_spc == FALSE) {
463 |       p <- p + ggplot2::facet_wrap(~ group_id, ncol = ncol, scales = "free")
464 |     }
465 |   }
466 | 
467 |   p <- p + ggplot2::scale_x_reverse(breaks = brk) +
468 |     ggplot2::xlab(expression(paste("Wavenumber [", cm^-1, "]"))) +
469 |     ggplot2::ylab(ylab) +
470 |     ggplot2::theme_bw() +
471 |     ggplot2::theme(legend.position = "right",
472 |       axis.text.x = ggplot2::element_text(angle = 90, hjust = 1, vjust = 0.5))
473 |   p
474 | }
475 | 
476 | 


--------------------------------------------------------------------------------