├── tests
└── .Rapp.history
├── R
├── simplerspec.R
├── utils-pipe.R
├── data.R
├── clustering-outl-spc.R
├── select-spc.R
├── tbl-lcol-map-funprog.R
├── predict-spc.R
├── join-chem-spectra.R
├── average-spc.R
├── utils-unsupervised.R
├── select-ref-spectra.R
├── slice-spc.R
├── plot_spc.R
├── remove-outl-spectra.R
├── utils-stats.R
├── resample-spc.R
├── read-asd.R
├── preprocess-spc.R
├── pls-vip.R
├── gather-spc.R
└── plot-spc-extended.R
├── man
├── .gitignore
├── figures
│ └── simplerspec-logo.png
├── pipe.Rd
├── join_chem_spec.Rd
├── join_spc_chem.Rd
├── split_df2l.Rd
├── soilspec_yamsys.Rd
├── extract_lcols2dts.Rd
├── create_vip_rects.Rd
├── predict_from_spc.Rd
├── extract_pls_vip.Rd
├── select_spc_vars.Rd
├── select_ref_spc.Rd
├── evaluate_model.Rd
├── read_asd.Rd
├── bind_lcols_dts.Rd
├── assess_multimodels.Rd
├── read_asd_bin.Rd
├── read_opus_bin_univ.Rd
├── remove_outliers.Rd
├── slice_xvalues.Rd
├── average_spc.Rd
├── merge_dts.Rd
├── preprocess_spc.Rd
├── read_opus_univ.Rd
├── merge_dts_l.Rd
├── plot_pls_vip.Rd
├── plot_spc.Rd
├── fit_rf.Rd
├── plot_spc_ext.Rd
├── resample_spc.Rd
├── fit_pls.Rd
└── gather_spc.Rd
├── .gitignore
├── _pkgdown.yml
├── data
└── soilspec_yamsys.rda
├── .Rbuildignore
├── tic.R
├── simplerspec.Rproj
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── .github
└── workflows
│ └── tic.yml
└── README.md
/tests/.Rapp.history:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/R/simplerspec.R:
--------------------------------------------------------------------------------
1 | #' @import rlang
2 | #' @import dplyr
3 |
--------------------------------------------------------------------------------
/man/.gitignore:
--------------------------------------------------------------------------------
1 | .unison.tune_model_loocv_q.Rd.f89c7b744be0454bad4399a2fe4b87d9.unison.tmp
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | .DS_Store
6 | ._*
7 | docs/
8 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://philipp-baumann.github.io/simplerspec
2 |
3 | development:
4 | mode: auto
5 |
--------------------------------------------------------------------------------
/data/soilspec_yamsys.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/philipp-baumann/simplerspec/HEAD/data/soilspec_yamsys.rda
--------------------------------------------------------------------------------
/man/figures/simplerspec-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/philipp-baumann/simplerspec/HEAD/man/figures/simplerspec-logo.png
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^\.travis\.yml$
4 | LICENSE\.md
5 | ^appveyor\.yml$
6 | ^tic\.R$
7 | ^\.ccache$
8 | ^\.github$
9 | _pkgdown.yml
10 |
--------------------------------------------------------------------------------
/R/utils-pipe.R:
--------------------------------------------------------------------------------
1 | #' Pipe operator
2 | #'
3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
4 | #'
5 | #' @name %>%
6 | #' @rdname pipe
7 | #' @keywords internal
8 | #' @export
9 | #' @importFrom magrittr %>%
10 | #' @usage lhs \%>\% rhs
11 | NULL
12 |
--------------------------------------------------------------------------------
/tic.R:
--------------------------------------------------------------------------------
1 | # installs dependencies, runs R CMD check, runs covr::codecov()
2 | do_package_checks()
3 |
4 | if (ci_on_ghactions() && ci_has_env("BUILD_PKGDOWN")) {
5 | # creates pkgdown site and pushes to gh-pages branch
6 | # only for the runner with the "BUILD_PKGDOWN" env var set
7 | do_pkgdown()
8 | }
9 |
--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils-pipe.R
3 | \name{\%>\%}
4 | \alias{\%>\%}
5 | \title{Pipe operator}
6 | \usage{
7 | lhs \%>\% rhs
8 | }
9 | \description{
10 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
11 | }
12 | \keyword{internal}
13 |
--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
1 | #' Soil spectra and laboratory reference data from Baumann et al. (2021)
2 | #'
3 | #' Data from "Estimation of soil properties with mid-infrared soil spectroscopy
4 | #' across yam production landscapes in West Africa".
5 | #'
6 | #' @format ## `soilspec_yamsys`
7 | #' A tibble data frame with 284 rows and 40 columns. The spectra are in the
8 | #' `spc` list-column.
9 | #' @source
10 | "soilspec_yamsys"
11 |
--------------------------------------------------------------------------------
/simplerspec.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace,vignette
22 |
--------------------------------------------------------------------------------
/man/join_chem_spec.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/join-chem-spectra.R
3 | \name{join_chem_spec}
4 | \alias{join_chem_spec}
5 | \title{Join chemical and spectral data frames}
6 | \usage{
7 | join_chem_spec(dat_chem, dat_spec, by = "sample_ID")
8 | }
9 | \arguments{
10 | \item{dat_chem}{data.frame that contains chemical values of
11 | the sample}
12 |
13 | \item{dat_spec}{List that contains spectral data}
14 |
15 | \item{by}{character of column name that defines sample_ID}
16 | }
17 | \value{
18 | List: xxx
19 | }
20 | \description{
21 | Combines spectral data (data.frame) and chemical
22 | data (data.frame).
23 | }
24 |
--------------------------------------------------------------------------------
/man/join_spc_chem.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/join-chem-spectra.R
3 | \name{join_spc_chem}
4 | \alias{join_spc_chem}
5 | \title{Join spectra data and chemical data tibbles}
6 | \usage{
7 | join_spc_chem(spc_tbl, chem_tbl, by = "sample_id")
8 | }
9 | \arguments{
10 | \item{spc_tbl}{Tibble that contains spectral data}
11 |
12 | \item{chem_tbl}{Tibble that contains chemical reference values of
13 | the samples}
14 |
15 | \item{by}{character of column name that defines sample_ID}
16 | }
17 | \value{
18 | Tibble joined by sample_id
19 | }
20 | \description{
21 | Combines spectral data (tibble class) and chemical
22 | data (tibble class).
23 | }
24 |
--------------------------------------------------------------------------------
/man/split_df2l.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/tbl-lcol-map-funprog.R
3 | \name{split_df2l}
4 | \alias{split_df2l}
5 | \title{Split a tibble data frame into a list of tibbles by a group column}
6 | \usage{
7 | split_df2l(tbl_df, group)
8 | }
9 | \arguments{
10 | \item{tbl_df}{Tibble data frame}
11 |
12 | \item{group}{Character vector with name of column based on which tibble
13 | is split into a list of tibbles}
14 | }
15 | \value{
16 | List of tibbles. Each tibble contains data split by
17 | a group column within \code{tbl_df}.
18 | }
19 | \description{
20 | Helper function that calls \code{split} on a tibble using a
21 | grouping column within tibble.
22 | }
23 |
--------------------------------------------------------------------------------
/man/soilspec_yamsys.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{soilspec_yamsys}
5 | \alias{soilspec_yamsys}
6 | \title{Soil spectra and laboratory reference data from Baumann et al. (2021)}
7 | \format{
8 | \subsection{\code{soilspec_yamsys}}{
9 |
10 | A tibble data frame with 284 rows and 40 columns. The spectra are in the
11 | \code{spc} list-column.
12 | }
13 | }
14 | \source{
15 | \url{https://soil.copernicus.org/articles/7/717/2021/}
16 | }
17 | \usage{
18 | soilspec_yamsys
19 | }
20 | \description{
21 | Data from "Estimation of soil properties with mid-infrared soil spectroscopy
22 | across yam production landscapes in West Africa".
23 | }
24 | \keyword{datasets}
25 |
--------------------------------------------------------------------------------
/man/extract_lcols2dts.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/tbl-lcol-map-funprog.R
3 | \name{extract_lcols2dts}
4 | \alias{extract_lcols2dts}
5 | \title{Extract multiple tibble list-columns and return data as list of
6 | data.tables}
7 | \usage{
8 | extract_lcols2dts(spc_tbl, lcols)
9 | }
10 | \arguments{
11 | \item{spc_tbl}{Spectral tibble (data frame) with spectral data contained
12 | in list-columns}
13 |
14 | \item{lcols}{Character vector containing names of list-columns to be
15 | extracted into a list of data.tables}
16 | }
17 | \value{
18 | List of data.tables. Each element is a data.table derivied from a
19 | list-column specified in the \code{lcols} argument.
20 | }
21 | \description{
22 | Extract multiple tibble list columns, row bind them separately
23 | into single data tables and return a list of data.tables.
24 | }
25 |
--------------------------------------------------------------------------------
/man/create_vip_rects.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/pls-vip.R
3 | \name{create_vip_rects}
4 | \alias{create_vip_rects}
5 | \title{Create a data frame containing start and end positions (wavenumbers)
6 | where variable importance in projection (VIP) is > 1}
7 | \usage{
8 | create_vip_rects(df_vip)
9 | }
10 | \arguments{
11 | \item{df_vip}{Data frame containing \code{wavenumber} and \code{vip} columns
12 | (numeric)}
13 | }
14 | \value{
15 | Data.frame containing vectors \code{start} (numeric; wavenumber),
16 | \code{end} (numeric; wavenumber) and group (integer; values are
17 | \code{1:length(start))}.
18 | }
19 | \description{
20 | Given a data frame with VIP outputs (wavenumber and vip
21 | columns), start and end values denoting spectral regions where VIP > 1
22 | are returned as data frame. The functions can be used as helper
23 | function for plotting VIP.
24 | }
25 |
--------------------------------------------------------------------------------
/man/predict_from_spc.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/predict-spc.R
3 | \name{predict_from_spc}
4 | \alias{predict_from_spc}
5 | \title{Predict soil properties of new spectra based on a list of calibration models}
6 | \usage{
7 | predict_from_spc(model_list, spc_tbl, slice = TRUE)
8 | }
9 | \arguments{
10 | \item{model_list}{List of model output generated from calibration step
11 | (\code{pls_ken_stone()}}
12 |
13 | \item{spc_tbl}{Tibble of spectra after preprocessing
14 | (\code{preprocess_spc()})}
15 |
16 | \item{slice}{Logical expression wheather only one row per sample_id returned.}
17 | }
18 | \value{
19 | tibble with new columns \code{model}, and predicted values with
20 | column names of model list.
21 | }
22 | \description{
23 | Append predictions for a set of responses specified by a list
24 | of calibration models and a tibble containing preprocessed spectra as
25 | list-columns.
26 | }
27 |
--------------------------------------------------------------------------------
/man/extract_pls_vip.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/pls-vip.R
3 | \name{extract_pls_vip}
4 | \alias{extract_pls_vip}
5 | \title{Extract VIPs (variable importance in the projection) for a PLS
6 | regression model output returned from model fitting with
7 | \code{simplerspec::fit_pls()}}
8 | \usage{
9 | extract_pls_vip(mout)
10 | }
11 | \arguments{
12 | \item{mout}{Model output list returned from \code{simplerspec::fit_pls()}.}
13 | }
14 | \value{
15 | A tibble data frame with columns \code{wavenumber} and correponding
16 | VIP values in the column \code{vip} for the finally chosen PLS regression
17 | model at the final number of PLS components.
18 | }
19 | \description{
20 | VIPs are extracted based on the \code{finalModel} sublist
21 | in the \code{caret::train} output contained in the \code{model} element
22 | of the \code{simplerspec::fit_pls()} model output list. The VIPs for
23 | derived number of PLS components in the \code{finalModel} are computed.
24 | }
25 |
--------------------------------------------------------------------------------
/man/select_spc_vars.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/select-spc.R
3 | \name{select_spc_vars}
4 | \alias{select_spc_vars}
5 | \title{Select every n-th spectral variable for all spectra and x-values in spectral
6 | tibble (\code{spc_tbl})}
7 | \usage{
8 | select_spc_vars(
9 | spc_tbl,
10 | lcol_spc = "spc_pre",
11 | lcol_xvalues = "xvalues_pre",
12 | every = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{spc_tbl}{Tibble data.frame containing spectra in list-column}
17 |
18 | \item{lcol_spc}{List-column containing spectra, specified with column
19 | name as symbols or 1L character vector.}
20 |
21 | \item{lcol_xvalues}{List-column containing x-values, specified with
22 | column name as symbols or 1L character vector.}
23 |
24 | \item{every}{Every n-th spectral positions to keep as 1L integer vector.}
25 | }
26 | \value{
27 | a spectral tibble
28 | }
29 | \description{
30 | Select every n-th spectral variable for all spectra and x-values in spectral
31 | tibble (\code{spc_tbl})
32 | }
33 |
--------------------------------------------------------------------------------
/man/select_ref_spc.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/select-ref-spectra.R
3 | \name{select_ref_spc}
4 | \alias{select_ref_spc}
5 | \title{Select a set of reference spectra to be measured by reference analysis
6 | methods}
7 | \usage{
8 | select_ref_spc(spc_tbl, ratio_ref, pc, print = TRUE)
9 | }
10 | \arguments{
11 | \item{spc_tbl}{Spectra as tibble objects that contain preprocessed spectra}
12 |
13 | \item{ratio_ref}{Ratio of desired reference samples to total sample number}
14 |
15 | \item{pc}{Number of principal components (numeric). If pc < 1, the number
16 | of principal components kept corresponds to the number of components
17 | explaining at least (pc * 100) percent of the total variance.}
18 |
19 | \item{print}{logical expression whether a plot (ggplot2) of sample selection
20 | for reference analysis is shown in PCA space
21 | (\code{TRUE} or \code{FALSE}).}
22 | }
23 | \description{
24 | Select a set of calibration spectra to develop spectral models.
25 | Samples in this list will be analyzed using laboratory reference methods.
26 | }
27 |
--------------------------------------------------------------------------------
/R/clustering-outl-spc.R:
--------------------------------------------------------------------------------
1 | ## Robust multivariate outlier detection method based on semi-robust principal
2 | ## components ==================================================================
3 |
4 | # Apply pcout for a nested list of matrices at a certain depth
5 | pcout_depth <- function(x, depth = 2) {
6 | purrr::modify_depth(.x = x, .depth = depth,
7 | .f = ~ mvoutlier::pcout(x = ., makeplot = FALSE))
8 | }
9 |
10 | # Extracts mvoutlier::pcout $wscat elements in nested list (default at depth 2)
11 | wscat_depth <- function(x, depth = 2) {
12 | purrr::modify_depth(.x = x, .depth = 2, .f = "wscat")
13 | }
14 |
15 | # Returns nested list containing logicals from test which scattering weights
16 | # are zero
17 | which_wscat0_depth <- function(x) {
18 | purrr::modify_depth(wscat_depth(x), 2, ~ . == 0)
19 | }
20 |
21 | # Extract $wfinal01 elements
22 | wfinal01_depth <- function(x) {
23 | purrr::modify_depth(x, 2, "wfinal01")
24 | }
25 |
26 | # Returns nested list containing logicals from test which final 0/1 weights
27 | # are zero
28 | which_wfinal0_depth <- function(x) {
29 | purrr::modify_depth(wfinal01_depth(x), 2, ~ . == 0)
30 | }
31 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Type: Package
2 | Package: simplerspec
3 | Version: 0.2.1
4 | Date: 2023-09-27
5 | Title: Soil and plant spectroscopic model building and prediction
6 | Description: Functions that cover
7 | reading of spectral data, outlier removal,
8 | spectral preprocessing, calibration sampling, PLS regression
9 | using caret, and model diagnostic statistics and plots.
10 | Authors@R: person("Philipp", "Baumann",
11 | email = "baumann-philipp@protonmail.com", role = c("aut", "cre"))
12 | URL: https://github.com/philipp-baumann/simplerspec
13 | BugReports: https://github.com/philipp-baumann/simplerspec
14 | Depends: R (>= 3.0),
15 | foreach
16 | Imports:
17 | caret,
18 | cowplot,
19 | data.table,
20 | dplyr (>= 0.7.0),
21 | e1071,
22 | ggplot2 (>= 2.0.0),
23 | hexView,
24 | magrittr,
25 | mvoutlier,
26 | modelr,
27 | pls,
28 | plyr,
29 | prospectr,
30 | purrr,
31 | reshape2,
32 | rlang (>= 0.2.0),
33 | stringr,
34 | tibble,
35 | tidyselect,
36 | tidyr,
37 | utils,
38 | glue,
39 | broom
40 | License: GPL-2
41 | LazyData: true
42 | RoxygenNote: 7.2.3
43 | Roxygen: list(markdown = TRUE)
44 | Encoding: UTF-8
45 |
--------------------------------------------------------------------------------
/man/evaluate_model.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils-stats.R
3 | \name{evaluate_model}
4 | \alias{evaluate_model}
5 | \alias{summary_df}
6 | \title{Calculate model evaluation metrics}
7 | \usage{
8 | evaluate_model(data, obs, pred)
9 |
10 | summary_df(df, x, y)
11 | }
12 | \arguments{
13 | \item{data}{\code{data.frame} with predicted and observed data in columns.}
14 |
15 | \item{obs}{Column that contains observed values, \code{symbol}/\code{name} or
16 | \code{character} (wrapped in "").}
17 |
18 | \item{pred}{Column that contains predicted values, \code{symbol}/\code{name} or
19 | \code{character} (wrapped in "").}
20 |
21 | \item{df}{\code{data.frame} with predicted and observed data in columns.}
22 |
23 | \item{x}{Column that contains observed values, \code{symbol}/\code{name} or
24 | \code{character} (wrapped in "").}
25 |
26 | \item{y}{Column that contains predicted values, \code{symbol}/\code{name} or
27 | \code{character} (wrapped in "").}
28 | }
29 | \description{
30 | Calculate summary statistics of observed values and model
31 | evaluation statistics for assessing agreement between observed (\code{obs}) and
32 | predicted (\code{pred}) values.
33 | }
34 |
--------------------------------------------------------------------------------
/man/read_asd.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/read-asd.R
3 | \name{read_asd}
4 | \alias{read_asd}
5 | \title{Read ASD fieldspec spectrometer data export into into simplerspec
6 | spectra tibble.}
7 | \usage{
8 | read_asd(file)
9 | }
10 | \arguments{
11 | \item{file}{Tab delmited file from ASD software export where the first
12 | column called \code{Wavelength} contais wavelengths in nanometer and the
13 | remaining columns are sample spectra referred by an ID name provided in the
14 | first row of these columns.}
15 | }
16 | \value{
17 | Spectra data in tibble data frame (class \code{tbl_df}) that contains
18 | columns \code{sample_id} (derived from 2nd and following column names of
19 | tab delimited ASD exported text file),
20 | \code{spc} (list-column of spectral matrices)
21 | and \code{wavelengths} (list-column containing wavelength vectors).
22 | }
23 | \description{
24 | Read tab delimited text (.txt) files exported from ASD field
25 | spectrometer into simplerspec spectra tibble.
26 | ASD Fieldspec data files are expected in .txt tab delimited file format.
27 | The first row should contain
28 | the name 'Wavelength' for the first column and the file names for the
29 | remaining columns.
30 | }
31 |
--------------------------------------------------------------------------------
/man/bind_lcols_dts.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plot-spc-extended.R
3 | \name{bind_lcols_dts}
4 | \alias{bind_lcols_dts}
5 | \title{Bind list-columns within a tibble into a list of data.tables}
6 | \usage{
7 | bind_lcols_dts(spc_tbl, lcols, spc_id = "unique_id", group_id = "sample_id")
8 | }
9 | \arguments{
10 | \item{spc_tbl}{Spectral data in a tibble data frame (classes "tibble_df",
11 | "tbl" and "data.frame").}
12 |
13 | \item{lcols}{Character vector of column names of list-columns to be bound
14 | into a list of data.tables}
15 |
16 | \item{spc_id}{Character vector denoting column name for a unique spectrum ID.
17 | Default is \code{"unique_id"}.}
18 |
19 | \item{group_id}{Character vector denoting column name for the spectrum group
20 | ID. Default is \code{"sample_id"}. The group ID can later be used for
21 | plotting spectra by group (e.g. by using different colors or panels).}
22 | }
23 | \value{
24 | A list of data.tables. Elements contain data from list-columns
25 | specified in \code{lcols} argument as data.tables. All data.tables contain in
26 | addition \code{spc_id} and \code{group_id} columns.
27 | }
28 | \description{
29 | Bind one to many list-columns in spectral tibble into a list
30 | of data.tables.
31 | }
32 |
--------------------------------------------------------------------------------
/man/assess_multimodels.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils-stats.R
3 | \name{assess_multimodels}
4 | \alias{assess_multimodels}
5 | \title{Assess multiple pairs of measured and predicted values}
6 | \usage{
7 | assess_multimodels(
8 | data,
9 | ...,
10 | .metrics = c("simplerspec", "yardstick"),
11 | .model_name = "model"
12 | )
13 | }
14 | \arguments{
15 | \item{data}{Data frame with all measured (observed) and predicted variables.}
16 |
17 | \item{...}{Multiple arguments with observed (measured)-predicted pairs,
18 | specified with \code{dplyr::vars(o = , p = )}.
19 | Column names can strings or symbols. The arguments in \code{...} need to be named.}
20 |
21 | \item{.metrics}{Character vector with package used for metrics calculation.
22 | Default is \code{"simplerspec"}, which uses
23 | \code{simplerspec::evaluate_model()}.}
24 |
25 | \item{.model_name}{String with name for the new column that specifies the
26 | model or the outcome variable. Default is \code{"model"}.}
27 | }
28 | \value{
29 | Data frame with with summary statistics for measured values and
30 | performance metrics for the pairs of measured and predicted values.
31 | }
32 | \description{
33 | Return performance metrics for test set predictions and
34 | measured values, e.g. for different model outcome variables.
35 | }
36 |
--------------------------------------------------------------------------------
/man/read_asd_bin.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/read-asd.R
3 | \name{read_asd_bin}
4 | \alias{read_asd_bin}
5 | \title{Read ASD binary files and gather spectra and metadata in tibble data
6 | frame.}
7 | \usage{
8 | read_asd_bin(fnames)
9 | }
10 | \arguments{
11 | \item{fnames}{Character vector containing full paths of ASD binary files
12 | to be read}
13 | }
14 | \value{
15 | A spectral tibble (data frame) containing the follwing columns:
16 | \item{unique_id}{Character vector. Unique identifier containing file name
17 | pasted with date and time.}
18 | \item{file_id}{Character vector containing file names and exension}
19 | \item{sample_id}{Character vector containing files names without extension}
20 | \item{metadata}{List-column. List of data frames containing spectral
21 | metadata}
22 | \item{wavelengths}{List-column. List of wavelengths vectors (numeric).}
23 | \item{spc_radiance}{List-column. List of data.tables containing
24 | radiance sample spectra.}
25 | \item{spc_reference}{List-column. List of data.tables containing
26 | reference reflectance spectra.}
27 | \item{spc}{List-column. List of data.tables containing final reflectance
28 | spectra.}
29 | }
30 | \description{
31 | Read multiple ASD binary files and gather spectra and metadata
32 | into a simplerspec spectral tibble (data frame). The resulting spectral
33 | tibble is compatible with the simplerspec spectra processing and modeling
34 | framework.
35 | }
36 |
--------------------------------------------------------------------------------
/man/read_opus_bin_univ.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/read-opus-universal.R
3 | \name{read_opus_bin_univ}
4 | \alias{read_opus_bin_univ}
5 | \title{Read a Bruker OPUS spectrum binary file}
6 | \usage{
7 | read_opus_bin_univ(file_path, extract = c("spc"),
8 | print_progress = TRUE, atm_comp_minus4offset = FALSE)
9 | }
10 | \arguments{
11 | \item{file_path}{Character vector with path to file}
12 |
13 | \item{extract}{Character vector of spectra types to extract from OPUS binary
14 | file. Default is \code{c("spc")}, which will extract the final spectra, e.g.
15 | expressed in absorbance (named \code{AB} in Bruker OPUS programs). Possible
16 | additional values for the character vector supplied to extract are
17 | \code{"ScSm"} (single channel spectrum of the sample measurement), \
18 | code{"ScRf"} (single channel spectrum of the reference measurment),
19 | \code{"IgSm"} (interferogram of the sample measurment) and \code{"IgRf"}
20 | (interferogram of the reference measurement).}
21 |
22 | \item{print_progress}{Logical (default \code{TRUE}) whether a message is
23 | printed when an OPUS binary file is parsed into an R list entry.}
24 |
25 | \item{atm_comp_minus4offset}{Logical whether spectra after atmospheric
26 | compensation are read with an offset of \code{-4} bites from Bruker OPUS
27 | files. Default is \code{FALSE}.}
28 | }
29 | \description{
30 | Read single binary file acquired with an
31 | Bruker Vertex FTIR Instrument
32 | }
33 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export("%>%")
4 | export(assess_multimodels)
5 | export(average_spc)
6 | export(bind_lcols_dts)
7 | export(create_vip_rects)
8 | export(evaluate_model)
9 | export(extract_lcols2dts)
10 | export(extract_pls_vip)
11 | export(fit_pls)
12 | export(fit_rf)
13 | export(gather_spc)
14 | export(join_chem_spec)
15 | export(join_spc_chem)
16 | export(merge_dts)
17 | export(merge_dts_l)
18 | export(plot_pls_vip)
19 | export(plot_spc)
20 | export(plot_spc_ext)
21 | export(pls_ken_stone)
22 | export(predict_from_spc)
23 | export(preprocess_spc)
24 | export(read_asd)
25 | export(read_asd_bin)
26 | export(read_opus_bin_univ)
27 | export(read_opus_univ)
28 | export(remove_outliers)
29 | export(resample_spc)
30 | export(select_ref_spc)
31 | export(select_spc_vars)
32 | export(slice_xvalues)
33 | export(split_df2l)
34 | export(summary_df)
35 | import(purrr)
36 | import(stats)
37 | importFrom(data.table,":=")
38 | importFrom(data.table,.SD)
39 | importFrom(data.table,data.table)
40 | importFrom(data.table,fread)
41 | importFrom(data.table,rbindlist)
42 | importFrom(data.table,setDT)
43 | importFrom(data.table,setkey)
44 | importFrom(dplyr,pull)
45 | importFrom(e1071,kurtosis)
46 | importFrom(foreach,"%do%")
47 | importFrom(foreach,"%dopar%")
48 | importFrom(magrittr,"%>%")
49 | importFrom(purrr,imap)
50 | importFrom(purrr,modify_depth)
51 | importFrom(rlang,ensym)
52 | importFrom(rlang,quo_name)
53 | importFrom(rlang,set_names)
54 | importFrom(stringr,str_replace)
55 | importFrom(tidyselect,one_of)
56 |
--------------------------------------------------------------------------------
/man/remove_outliers.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/remove-outl-spectra.R
3 | \name{remove_outliers}
4 | \alias{remove_outliers}
5 | \title{Remove outlier spectra}
6 | \usage{
7 | remove_outliers(list_spectra, remove = TRUE)
8 | }
9 | \arguments{
10 | \item{list_spectra}{List that contains averaged
11 | spectral information
12 | in list element \code{MIR_mean} (data.frame) and metadata in
13 | \code{data_meta} (data.frame).}
14 |
15 | \item{remove}{logical expression (\code{TRUE} or \code{FALSE})
16 | that specifies weather spectra shall be removed.
17 | If \code{rm = FALSE}, there will be no outlier removal}
18 | }
19 | \value{
20 | Returns list \code{spectra_out} that contains:
21 | \itemize{
22 | \item \code{MIR_mean}: Outlier removed MIR spectra as
23 | data.frame object. If \code{remove = FALSE},
24 | the function will
25 | return almost identical list identical to \code{list_spectra},
26 | except that the first \code{indices} column of the spectral
27 | data frame \code{MIR_mean} is removed
28 | (This is done for both options
29 | \code{remove = TRUE} and \code{remove = FALSE}).
30 | \item \code{data_meta}: metadata data.frame, identical
31 | as in the \code{list_spectra} input list.
32 | \item \code{plot_out}: (optional) ggplot2 graph
33 | that shows all spectra (absorbance on x-axis and wavenumber
34 | on y-axis) with outlier marked, if
35 | \code{remove = TRUE}.
36 | }
37 | }
38 | \description{
39 | Remove outlier spectra based on the
40 | \code{pcout()} function of the \code{mvoutlier} package.
41 | }
42 | \details{
43 | This is an optional function if one wants to remove
44 | outliers.
45 | }
46 |
--------------------------------------------------------------------------------
/R/select-spc.R:
--------------------------------------------------------------------------------
1 | #' Select every n-th spectral variable for all spectra and x-values in spectral
2 | #' tibble (`spc_tbl`)
3 | #'
4 | #' @param spc_tbl Tibble data.frame containing spectra in list-column
5 | #' @param lcol_spc List-column containing spectra, specified with column
6 | #' name as symbols or 1L character vector.
7 | #' @param lcol_xvalues List-column containing x-values, specified with
8 | #' column name as symbols or 1L character vector.
9 | #' @param every Every n-th spectral positions to keep as 1L integer vector.
10 | #' @return a spectral tibble
11 | #' @export
12 | select_spc_vars <- function(spc_tbl,
13 | lcol_spc = "spc_pre",
14 | lcol_xvalues = "xvalues_pre",
15 | every = NULL) {
16 | lcol_spc <- rlang::enquo(lcol_spc)
17 | lcol_spc_nm <- rlang::quo_name(lcol_spc)
18 | lcol_xvalues <- rlang::enquo(lcol_xvalues)
19 | lcol_xvalues_nm <- rlang::quo_name(lcol_xvalues)
20 |
21 | stopifnot(tibble::is_tibble(spc_tbl))
22 |
23 | if (is.null(every)) {return(spc_tbl);
24 | message("Returning `spc_tbl` and keep all variables.")}
25 |
26 | spc_lst <- dplyr::pull(spc_tbl, !!lcol_spc)
27 | spc <- data.table::rbindlist(spc_lst)
28 |
29 | pos_sel <- seq(1L, ncol(spc), every)
30 |
31 | spc_sel <- spc[, pos_sel, with = FALSE]
32 |
33 | spc_lst_out <- stats::setNames(
34 | map(purrr::transpose(spc_sel), data.table::as.data.table),
35 | names(spc_lst))
36 |
37 | xvalues <- dplyr::pull(spc_tbl, !!lcol_xvalues)
38 | xvalues_sel <- map(xvalues, ~ .x[pos_sel])
39 |
40 | dplyr::mutate(spc_tbl %>% dplyr::ungroup(),
41 | !!lcol_spc_nm := spc_lst_out, !!lcol_xvalues_nm := xvalues_sel)
42 | }
43 |
--------------------------------------------------------------------------------
/man/slice_xvalues.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/slice-spc.R
3 | \name{slice_xvalues}
4 | \alias{slice_xvalues}
5 | \title{Slice spectra into defined x-axis ranges}
6 | \usage{
7 | slice_xvalues(
8 | spc_tbl,
9 | xunit_lcol = "wavenumbers",
10 | spc_lcol = "spc",
11 | xvalues_cut = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{spc_tbl}{Spectral data in a tibble object (classes "tibble_df", "tbl"
16 | and "data.frame"). The spectra tibble is expected to contain at least
17 | the column \code{spc} (list-column with spectral matrices stored in a list)
18 | and \code{wavenumbers} or \code{wavelengths} (list-column that contains list
19 | of x-axis values).}
20 |
21 | \item{xunit_lcol}{Character vector that specifies column name where x-axis
22 | axis units are stored within \code{spc_tbl}. Default is \code{"wavenumber"}.}
23 |
24 | \item{spc_lcol}{Character vector that specifies which column (list-column)
25 | contains spectra to be sliced. Default is \code{"spc"}.}
26 |
27 | \item{xvalues_cut}{List of numeric vectors that contains upper and lower bounds of respective regions to keep in spectra. The spectral regions outside
28 | the \code{xvalues_cut} intervals will be cut out in the output spectra.}
29 | }
30 | \value{
31 | Spectral tibble (data frame with list-columns) with sliced x-axis
32 | column and spectral column. Both the x-axis list-column and the spectral
33 | tibble list-column only contain data specified within the \code{xvalues_cut}
34 | argument (list of numeric vectors).
35 | }
36 | \description{
37 | Slice spectra contained in list-column of spectral tibble
38 | (data frame). A list of x-axis value ranges can be specified. Spectra are
39 | cut based on these ranges.
40 | }
41 |
--------------------------------------------------------------------------------
/man/average_spc.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/average-spc.R
3 | \name{average_spc}
4 | \alias{average_spc}
5 | \title{Average spectra in list-column by entries in grouping column}
6 | \usage{
7 | average_spc(spc_tbl, by = "sample_id", column_in = "spc_rs")
8 | }
9 | \arguments{
10 | \item{spc_tbl}{Tibble data frame containing at least the grouping column
11 | given in argument \code{by} and input spectra given in list-column \code{column_in}.}
12 |
13 | \item{by}{Character vector of length 1L or name/symbol that specifies the
14 | column by which groups of spectra are averaged. Default is \code{"sample_id"}.}
15 |
16 | \item{column_in}{Character vector of length 1L or or name/symbol that
17 | specifies the list-column that contains the inputs spectra to be averaged.
18 | Default is \code{"spc_rs"}, which are resampled spectra (i.e., resulting after
19 | preceding \code{resample_spc()} step).}
20 | }
21 | \value{
22 | Spectra tibble data frame (class \code{"tbl_df"}, \code{"tbl"}, \code{"data.frame"})
23 | with a new list-column of column name \code{"spc_mean"} at the last position,
24 | containing mean spectra with identical row replicates within the same
25 | \code{by}-group.
26 | }
27 | \description{
28 | Average spectra in list-column of spectra tibble (\code{spc_tbl}) by
29 | groups given in group column.
30 | }
31 | \details{
32 | For memory efficiency and subsequent modeling, consider slicing the
33 | extra row copies of \code{spc_mean} resulting from \code{average_spc()} for example by
34 | \itemize{
35 | \item \verb{split(x = spc_tbl, f = spc_tbl$) \%>\% lapply(., function(x) x x[1, ]) \%>\% do.call(., rbind)}
36 | \item \verb{dplyr::group_by(spc_tbl, ) \%>\% dplyr::slice(1L)}
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/man/merge_dts.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plot-spc-extended.R
3 | \name{merge_dts}
4 | \alias{merge_dts}
5 | \title{Merge list-columns of spectra, x-axis values, metadata and additional
6 | measured variables into a single long form data.table}
7 | \usage{
8 | merge_dts(
9 | spc_tbl,
10 | lcols_spc = c("spc", "spc_pre"),
11 | lcol_measure = NULL,
12 | spc_id = "unique_id",
13 | group_id = "sample_id"
14 | )
15 | }
16 | \arguments{
17 | \item{spc_tbl}{Tibble data frame containing spectra, x-axis values, metadata
18 | and eventual measured variables as list-columns.}
19 |
20 | \item{lcols_spc}{Character vector of spectral list-columns to be extracted.
21 | Default is \code{c("spc", "spc_pre")} (raw and preprocessed spectra).}
22 |
23 | \item{lcol_measure}{Character vector of length 1 denoting the column name
24 | of the measure columns. This argument is optional. Default is \code{NULL},
25 | which does not extract an additional measure column.}
26 |
27 | \item{spc_id}{Character vector of column that contains a unique spectral
28 | identifier for all spectra. Default is \code{"unique_id"}.}
29 |
30 | \item{group_id}{Character vector of columns that is used assigning spectra
31 | into groups. Default is \code{"sample_id"}. The \code{group_id} can be
32 | used for later plotting and thereby visually separating spectral groups into
33 | using different colors or panels.}
34 | }
35 | \value{
36 | A single data.table containing long form aggregated data of
37 | spectra, x-axis values, metadata and an additionally measured variable.
38 | }
39 | \description{
40 | Helper function that merges all spectra and related data into
41 | a single long form data.table than can subsequently be used for plotting.
42 | }
43 |
--------------------------------------------------------------------------------
/man/preprocess_spc.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/preprocess-spc.R
3 | \name{preprocess_spc}
4 | \alias{preprocess_spc}
5 | \title{Preprocess spectra}
6 | \usage{
7 | preprocess_spc(spc_tbl, select, column_in = "spc_mean", custom_function = NULL)
8 | }
9 | \arguments{
10 | \item{spc_tbl}{Tibble that contains spectra to be preprocessed within
11 | a list-column.}
12 |
13 | \item{select}{Character vector of predefined preprocessing options to be
14 | applied to the spectra list-column specified in \code{column_in}.
15 | Common prefined values are stated as abbreviated preprocessing methods and
16 | options such as \code{"sg_1_w21"}, where \code{"sg"} stands for
17 | Savitzky-Golay and \code{1} for first derivative and \code{"w21"}
18 | for a window size of 21 points.}
19 |
20 | \item{column_in}{Character vector of single list-column in \code{spc_tbl} that
21 | contain list of spectra (1 row matrix) to be processed by function supplied
22 | in \code{select}.}
23 |
24 | \item{custom_function}{A character string of a custom processing function
25 | that is later parsed (produces expression in a list) and evaluated within
26 | the function \code{preprocess_spc}.
27 | The character vector argument of \code{custom_function}
28 | needs to contain \code{"spc_raw"}, which is the single data table of spectra
29 | that results from binding a list of data.tables (spectra to preprocess)
30 | from the spectra list-column specified in \code{column_in}.
31 | An example for a value is
32 | \code{"prospectr::savitzkyGolay(X = spc_raw, m = 0, p = 3, w = 9)"}.
33 | Optional argument. Default is \code{NULL}.}
34 | }
35 | \description{
36 | Preprocesses spectra in tibble column by sample_id after
37 | averaging spectra by \code{simplerspec::average_spc()}.
38 | }
39 |
--------------------------------------------------------------------------------
/man/read_opus_univ.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/read-opus-universal.R
3 | \name{read_opus_univ}
4 | \alias{read_opus_univ}
5 | \title{Read a list of Bruker OPUS spectrum binary files.}
6 | \usage{
7 | read_opus_univ(fnames, extract = c("spc"), parallel = FALSE,
8 | atm_comp_minus4offset = FALSE)
9 | }
10 | \arguments{
11 | \item{fnames}{List of character vectors containing full path names of spectra}
12 |
13 | \item{extract}{Character vector of spectra types to extract from file.
14 | Possible values are: "spc" (AB block in Bruker Opus software), "spc_nocomp"
15 | (Spectra before final atmospheric compensation; only present if background
16 | correction has been set in Opus), "ScSm" (Single channel spectrum of the
17 | sample), "ScRf" (Single channel spectrum of the sample), "IgSm" (Interferogram
18 | of the sample), "IgRf" (Interferogram of the reference). Default is
19 | \code{extract = c("spc")}.}
20 |
21 | \item{parallel}{Logical (\code{TRUE} or \code{FALSE} indicating whether
22 | files are read in parallel (multiple processors or multiple cores)).
23 | Default is \code{parallel = FALSE}. If \code{TRUE} a parallel backend needs
24 | to be registered, e.g. by using the \code{doParallel} package.}
25 |
26 | \item{atm_comp_minus4offset}{Logical whether spectra after atmospheric
27 | compensation are read with an offset of \code{-4} bites from Bruker OPUS
28 | files. Default is \code{FALSE}.}
29 | }
30 | \value{
31 | out List spectra and metadata (parameters) extracted from
32 | Bruker OPUS spectrometer files. List names are the names of the OPUS
33 | files whose spectral data were extracted.
34 | }
35 | \description{
36 | Read multiple spectral files measured with a Bruker FTIR Instrument. Files
37 | containing spectra are in OPUS binary format.
38 | \code{read_opus_univ} is a wrapper for \code{read_opus_bin_univ()})
39 | }
40 |
--------------------------------------------------------------------------------
/man/merge_dts_l.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plot-spc-extended.R
3 | \name{merge_dts_l}
4 | \alias{merge_dts_l}
5 | \title{Wrapper function around \code{merge_dts()} for list of tibbles to
6 | aggregate data for plotting.}
7 | \usage{
8 | merge_dts_l(
9 | spc_tbl_l,
10 | lcols_spc = c("spc", "spc_pre"),
11 | lcol_measure = NULL,
12 | spc_id = "unique_id",
13 | group_id = "sample_id"
14 | )
15 | }
16 | \arguments{
17 | \item{spc_tbl_l}{List of spectral tibbles (data frames).}
18 |
19 | \item{lcols_spc}{Character vector of spectral list-columns to be extracted.
20 | Default is \code{c("spc", "spc_pre")} (raw and preprocessed spectra).}
21 |
22 | \item{lcol_measure}{Character vector of length 1 denoting the column name
23 | of the measure columns. This argument is optional. Default is \code{NULL},
24 | which does not extract an additional measure column.}
25 |
26 | \item{spc_id}{Character vector of column that contains a unique spectral
27 | identifier for all spectra. Default is \code{"unique_id"}.}
28 |
29 | \item{group_id}{Character vector of columns that is used assigning spectra
30 | into groups. Default is \code{"sample_id"}. The \code{group_id} can be
31 | used for later plotting and thereby visually separating spectral groups into
32 | using different colors or panels.}
33 | }
34 | \value{
35 | A single data.table containing long form aggregated data of
36 | spectra, x-axis values, metadata and an additionally measured variable.
37 | An additional column called \code{group_id_tbl} is appended. It denotes
38 | the name of the spectral tibble supplied with the list \code{spc_tbl_l}.
39 | }
40 | \description{
41 | Instead of a single spectral tibble (data frame) multiple
42 | spectral tibbles can be merged into a long-form data.table for plotting
43 | spectra and related data. For details, see
44 | \code{\link{merge_dts}}.
45 | }
46 |
--------------------------------------------------------------------------------
/R/tbl-lcol-map-funprog.R:
--------------------------------------------------------------------------------
1 | # Split a tibble data frame into a list of tibbles by a group column
2 | #' @title Split a tibble data frame into a list of tibbles by a group column
3 | #' @description Helper function that calls \code{split} on a tibble using a
4 | #' grouping column within tibble.
5 | #' @param tbl_df Tibble data frame
6 | #' @param group Character vector with name of column based on which tibble
7 | #' is split into a list of tibbles
8 | #' @return List of tibbles. Each tibble contains data split by
9 | #' a group column within \code{tbl_df}.
10 | #' @export
11 | split_df2l <- function(tbl_df, group) {
12 | split(tbl_df, tbl_df[, group])
13 | }
14 |
15 | # Extract multiple tibble list columns, row bind them separately into
16 | # single data tables and return a list of data.tables
17 | #' @title Extract multiple tibble list-columns and return data as list of
18 | #' data.tables
19 | #' @description Extract multiple tibble list columns, row bind them separately
20 | #' into single data tables and return a list of data.tables.
21 | #' @param spc_tbl Spectral tibble (data frame) with spectral data contained
22 | #' in list-columns
23 | #' @param lcols Character vector containing names of list-columns to be
24 | #' extracted into a list of data.tables
25 | #' @return List of data.tables. Each element is a data.table derivied from a
26 | #' list-column specified in the \code{lcols} argument.
27 | #' @import purrr
28 | #' @export
29 | extract_lcols2dts <- function(spc_tbl, lcols) {
30 | # Below code is first part of simplerspec::bind_lcols_dts
31 | # todo: add warning for lcols not present in spc_tbl
32 | which_bind <- colnames(spc_tbl) %in% lcols
33 | lcols_to_bind <- colnames(spc_tbl)[which_bind]
34 | names(lcols_to_bind) <- lcols_to_bind
35 | dts <- map(lcols_to_bind,
36 | function(y) {
37 | if (is.list(spc_tbl[[y]])) {
38 | # todo: Test if number of columns is equal in each data.frame or matrix
39 | # of the list-(column); if not, return a comprehensible error
40 | data.table::data.table(do.call(rbind, spc_tbl[[y]]))
41 | } else if (is.atomic((spc_tbl[[y]]))) {
42 | data.table::data.table(spc_tbl[, y])
43 | }
44 | }
45 | )
46 | }
47 |
--------------------------------------------------------------------------------
/R/predict-spc.R:
--------------------------------------------------------------------------------
1 | #' @title Predict soil properties of new spectra based on a list of calibration models
2 | #' @description Append predictions for a set of responses specified by a list
3 | #' of calibration models and a tibble containing preprocessed spectra as
4 | #' list-columns.
5 | #' @param model_list List of model output generated from calibration step
6 | #' (\code{pls_ken_stone()}
7 | #' @param spc_tbl Tibble of spectra after preprocessing
8 | #' (\code{preprocess_spc()})
9 | #' @param slice Logical expression wheather only one row per sample_id returned.
10 | #' @usage predict_from_spc(model_list, spc_tbl, slice = TRUE)
11 | #' @return tibble with new columns \code{model}, and predicted values with
12 | #' column names of model list.
13 | #' @export
14 | predict_from_spc <- function(model_list, spc_tbl, slice = TRUE) {
15 |
16 | if (all(sapply(model_list, class) == "train")) {
17 | # If model_list is a list of elements of class "train", model_list
18 | # can be directly handed over to caret::extractPrediction
19 | models <- model_list
20 | } else {
21 | # Extract pls_model elements (outputs from caret) for a list of models
22 | models <- lapply(model_list, function(x) x[["model"]])
23 | stopifnot(all(sapply(models, class) == "train"))
24 | }
25 |
26 | # Group by spectra tibble by sample_id and keep one row per sample_id
27 | if (slice == TRUE) {
28 | spc_tbl <- spc_tbl %>% dplyr::group_by(sample_id) %>%
29 | dplyr::slice(1L) %>% dplyr::ungroup()
30 | }
31 |
32 | # Collect preprocessed spectra in one data.table
33 | spc <- data.table::rbindlist(spc_tbl$spc_pre)
34 | pred_caret <- caret::extractPrediction(
35 | models,
36 | unkX = spc
37 | )
38 |
39 | # Number of caret model objects used to predict
40 | n <- length(unique(pred_caret$object))
41 | # Add sample_id from metadata of spectra to predicted values
42 | sample_id <- spc_tbl$sample_id
43 | # id column to long form data frame
44 | id <- rep(sample_id, n)
45 | pred_id <- cbind(pred_caret, sample_id = id)
46 | # Get data into wide form
47 | pred_wide <- tidyr::spread(
48 | data = pred_id, key = "object", value = "pred"
49 | )
50 |
51 | # Join predictions with tibble
52 | dplyr::inner_join(spc_tbl, pred_wide, by = "sample_id")
53 | }
54 |
--------------------------------------------------------------------------------
/R/join-chem-spectra.R:
--------------------------------------------------------------------------------
1 | ## Join chemical and spectral data ==============================
2 | #' @title Join chemical and spectral data frames
3 | #' @description Combines spectral data (data.frame) and chemical
4 | #' data (data.frame).
5 | #' @param dat_chem data.frame that contains chemical values of
6 | #' the sample
7 | #' @param dat_spec List that contains spectral data
8 | #' @return List: xxx
9 | #' @param by character of column name that defines sample_ID
10 | #' @export
11 | join_chem_spec <- function(
12 | dat_chem, dat_spec, by = "sample_ID") {
13 |
14 | # Alternative when "no visible binding for global variable":
15 | data_meta <- MIR <- MIR_pre <- ori <- MIR_mean <- NULL
16 | # http://stackoverflow.com/questions/23475309/in-r-is-it-possible-to-suppress-note-no-visible-binding-for-global-variable
17 | # Replace sample_ID by ID
18 | if(!is.data.frame(dat_chem)) {
19 | stop(dat_chem, "needs to be a data.frame", call. = FALSE)
20 | } else {
21 | colnames(dat_chem)[colnames(dat_chem) == by] <- "ID"
22 | dat_chem$ID <- as.factor(dat_chem$ID)
23 | # Select only chemical data that have no outlier spectra
24 | dat_chem <- dat_chem[dat_spec$data_meta$ID, ]
25 | ID <- as.factor(dat_spec$data_meta$ID)
26 | # Join ref analyses
27 | MIRdata <- data.frame(ID = ID)
28 | MIRdata$MIR <- dat_spec$MIR_pre
29 | MIRdata$ori <- dat_spec$MIR_mean
30 | # Joining by ID, type = "inner"
31 | MIRdata_chem <- plyr::join(dat_chem, MIRdata, type = "inner")
32 | # before dplyr::inner_join(dat_chem, MIRdata)
33 | MIRdata_chem
34 | }
35 | }
36 |
37 | ## Join spectra and chemical tibbles ===========================================
38 |
39 | #' @title Join spectra data and chemical data tibbles
40 | #' @description Combines spectral data (tibble class) and chemical
41 | #' data (tibble class).
42 | #' @param spc_tbl Tibble that contains spectral data
43 | #' @param chem_tbl Tibble that contains chemical reference values of
44 | #' the samples
45 | #' @param by character of column name that defines sample_ID
46 | #' @return Tibble joined by sample_id
47 | #' @export
48 | join_spc_chem <- function(spc_tbl, chem_tbl, by = "sample_id") {
49 |
50 | if(!tibble::is_tibble(spc_tbl)) {
51 | stop(spc_tbl, "needs to be a Tibble", call. = FALSE)
52 | } else {
53 | # Rename column sample_ID to sample_id if sample_ID exists
54 | if("sample_ID" %in% colnames(chem_tbl)) {
55 | # Create a list with new column name; use standard evaluation
56 | rename_list <- list(sample_id = "sample_ID")
57 | chem_tbl <- dplyr::rename_(chem_tbl, .dots = rename_list)
58 | }
59 | spc_tbl <- dplyr::inner_join(spc_tbl, chem_tbl)
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/man/plot_pls_vip.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/pls-vip.R
3 | \name{plot_pls_vip}
4 | \alias{plot_pls_vip}
5 | \title{Plot stacked ggplot2 graphs with the Variable Importance for the
6 | Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id,
7 | and the preprocessed spectra.}
8 | \usage{
9 | plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre",
10 | by = "sample_id",
11 | xlab = expression(paste("Wavenumber [", cm^-1, "]")),
12 | ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.",
13 | alpha = 0.2)
14 | }
15 | \arguments{
16 | \item{mout}{Model output list that is returned from
17 | \code{simplerspec::fit_pls()}. This object contains a nested list with
18 | the \code{caret::train()} object (class \code{train}), based on which
19 | VIPs at finally selected number of PLS components are computed.}
20 |
21 | \item{y1}{Character vector of list-column name in
22 | \code{mout$data$calibration}, where spectra for bottom graph are extracted.
23 | Default is \code{"spc_mean"}, which plots the mean calibration spectra after
24 | resampling.}
25 |
26 | \item{y2}{Character string of list-column name in
27 | \code{mout$data$calibration}, where spectra for bottom graph are extracted.
28 | Default is \code{"spc_pre"}, which plots the preprocessed calibration
29 | spectra after resampling.}
30 |
31 | \item{by}{Character string that is used to assign spectra to the same group
32 | and therefore ensures that all spectra are plotted with the same colour.
33 | Default is \code{"sample_id"}}
34 |
35 | \item{xlab}{Character string of X axis title for shared x axis of stacked
36 | graphs. Default is \code{expression(paste("Wavenumber [", cm^-1, "]"))}}
37 |
38 | \item{ylab1}{Y axis title of bottom spectrum. Default is \code{"Absorbance"}.}
39 |
40 | \item{ylab2}{Y axis title of bottom spectrum. Default is
41 | \code{"Preprocessed Abs."}.}
42 |
43 | \item{alpha}{Double between 0 and 1 that defines transparency of spectra
44 | lines in returned graph (ggplot plot object).}
45 | }
46 | \description{
47 | Plot stacked ggplot2 graphs of VIP for the final
48 | PLS regression model output of the calibration (training) data set for the
49 | final number of components, raw (replicate mean) spectra, and preprocessed
50 | spectra. Regions with VIP > 1 are highlighted across the stacked graphs
51 | in beige colour rectangles. VIP calculation is implemented as described in
52 | Chong, I.-G., and Jun, C.-H. (2005). Performance of some variable selection
53 | methods when multicollinearity is present. Chemometrics and Intelligent
54 | Laboratory Systems, 78(1--2), 103--112. https://doi.org/10.1016/j.chemolab.2004.12.011
55 | }
56 |
--------------------------------------------------------------------------------
/man/plot_spc.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plot_spc.R
3 | \name{plot_spc}
4 | \alias{plot_spc}
5 | \title{Plot tibble spectra}
6 | \usage{
7 | plot_spc(spc_tbl, spc_tbl_2 = NULL,
8 | x_unit = "wavenumber",
9 | y = "spc", by = "unique_id",
10 | graph_id_1 = "Set 1", graph_id_2 = "Set 2",
11 | graph_id_1_col = "black", graph_id_2_col = "red",
12 | xlab = expression(paste("Wavenumber [", cm^-1, "]")),
13 | ylab = "Absorbance",
14 | alpha = 0.2, legend = TRUE)
15 | }
16 | \arguments{
17 | \item{spc_tbl}{Tibble that contains the first set of spectra to plot as
18 | list-column}
19 |
20 | \item{spc_tbl_2}{Tibble that contains the second set of spectra (optional)
21 | to plot as list-column.}
22 |
23 | \item{x_unit}{Character string describing the x axis unit. Default is
24 | \code{"wavenumber"}, which will produce a graph with wavenumbers on the
25 | x axis with reversed number. If \code{x_unit = "wavelength"}, the axis
26 | will be in regular order (lower wavelengths in nm on the left and higher
27 | on the right side of the axis).}
28 |
29 | \item{y}{Character string of list-column name in tibble where spectra of
30 | desired type are extracted to plot.}
31 |
32 | \item{by}{Character string of column that is used to group the spectra.
33 | Default is \code{"unique_id"}. If replica spectra are present in the file
34 | and processed spectra resulting after averaging need to be plotted,
35 | it is recommend to use \code{"sample_id"} as argument to group according
36 | the sample_id column in the tibble(s) containing the spectra (\code{spc_tbl}
37 | and \code{spc_tbl_2}).}
38 |
39 | \item{graph_id_1}{Character string used for grouping the first spectra set
40 | (\code{spc_tbl}) and producing
41 | the label text accordingly. Default is \code{"Set 1"}.}
42 |
43 | \item{graph_id_2}{Character string used for grouping the second spectra set
44 | (\code{spc_tbl_2}) and producing the label text accordingly. Default is
45 | \code{"Set 2"}}
46 |
47 | \item{graph_id_1_col}{Character string for the colour of the first spectra
48 | set. Default is \code{"black"}.}
49 |
50 | \item{graph_id_2_col}{Character string for the colour of the first spectra
51 | set. Default is \code{"red"}.}
52 |
53 | \item{xlab}{Character string or mathematical expression
54 | (use \code{expression}) for the x axis title. Default is
55 | \code{expression(paste("Wavenumber [", cm^-1, "]"))}.}
56 |
57 | \item{ylab}{Character string or mathematical expression
58 | (use \code{expression}) for the y axis title. Default is \code{"absorbance"}.}
59 |
60 | \item{alpha}{Double in between 0 and 1. Sets the transparency for the plotted
61 | spectra lines.}
62 |
63 | \item{legend}{Logical whether to plot a legend for the spectra describing
64 | its name selected in arguments \code{graph_id_1} and \code{graph_id_2}.
65 | Default is \code{TRUE}.}
66 | }
67 | \description{
68 | Plot spectra from tibble spectra objects.
69 | }
70 |
--------------------------------------------------------------------------------
/R/average-spc.R:
--------------------------------------------------------------------------------
1 | #' @title Average spectra in list-column by entries in grouping column
2 | #' @description Average spectra in list-column of spectra tibble (`spc_tbl`) by
3 | #' groups given in group column.
4 | #' @param spc_tbl Tibble data frame containing at least the grouping column
5 | #' given in argument `by` and input spectra given in list-column `column_in`.
6 | #' @param by Character vector of length 1L or name/symbol that specifies the
7 | #' column by which groups of spectra are averaged. Default is `"sample_id"`.
8 | #' @param column_in Character vector of length 1L or or name/symbol that
9 | #' specifies the list-column that contains the inputs spectra to be averaged.
10 | #' Default is `"spc_rs"`, which are resampled spectra (i.e., resulting after
11 | #' preceding `resample_spc()` step).
12 | #' @return Spectra tibble data frame (class `"tbl_df"`, `"tbl"`, `"data.frame"`)
13 | #' with a new list-column of column name `"spc_mean"` at the last position,
14 | #' containing mean spectra with identical row replicates within the same
15 | #' `by`-group.
16 | #' @details For memory efficiency and subsequent modeling, consider slicing the
17 | #' extra row copies of `spc_mean` resulting from `average_spc()` for example by
18 | #' * `split(x = spc_tbl, f = spc_tbl$) %>% lapply(., function(x) x x[1, ]) %>% do.call(., rbind)`
19 | #' * `dplyr::group_by(spc_tbl, ) %>% dplyr::slice(1L)`
20 | #' @import stats
21 | #' @importFrom data.table data.table rbindlist setkey setDT := .SD
22 | #' @importFrom rlang ensym quo_name
23 | #' @export
24 | average_spc <- function(spc_tbl, by = "sample_id", column_in = "spc_rs") {
25 |
26 | # Avoid R CMD check note: `"...no visible binding for global variable..."`
27 | spc_rs <- sample_id <- id <- NULL
28 |
29 | # Quote the symbol or the string supplied by the second and third argument
30 | column_in <- rlang::enquo(column_in)
31 | by <- rlang::enquo(by)
32 |
33 | # Combine rows of all resampled spectra in one data.table
34 | spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!column_in))
35 |
36 | # Add `id` group column to input spectra
37 | spc[, id := dplyr::pull(spc_tbl, !!by)] # spc_tbl[, by][[by]]
38 |
39 | # Average spectra, use `id` column as index for grouping
40 | data.table::setkey(spc, id)
41 | spc_mean <- spc[, lapply(.SD, mean), by = id]
42 |
43 | # Create new vector of group ID values from column `id`
44 | group_id_mean <- spc_mean[, id]
45 | # Delete sample_id column in data.table
46 | spc_mean_noid <- spc_mean[, id := NULL]
47 |
48 | # Create list of averaged spectra, one spectrum is one data.table
49 | # Use best performing alternative:
50 | # https://github.com/jennybc/row-oriented-workflows/blob/master/iterate-over-rows.md
51 | spc_mean_list <- stats::setNames(
52 | map(purrr::transpose(spc_mean_noid), data.table::as.data.table),
53 | group_id_mean
54 | )
55 |
56 | # Convert averaged spectra and sample_id to tibble
57 | spc_mean_tbl <- tibble::tibble(
58 | !!by := group_id_mean, spc_mean = spc_mean_list
59 | )
60 | # Join mean spectra tibble spc_tbl_mean to spc_tbl
61 | spc_tbl_out <- dplyr::left_join(spc_tbl, spc_mean_tbl,
62 | by = rlang::quo_name(by))
63 |
64 | return(spc_tbl_out)
65 | }
66 |
--------------------------------------------------------------------------------
/R/utils-unsupervised.R:
--------------------------------------------------------------------------------
1 | # Helper functions to compute principal component analysis on spectral
2 | # data and to append scores and importance -------------------------------------
3 |
4 | pca_append_scores <- function(spc_tbl, y = "spc_pre",
5 | slice = TRUE, slice_by = sample_id,
6 | select_comps = 1:2,
7 | scale = TRUE, center = TRUE,
8 | .unnest = NULL) {
9 | var <- rlang::enquo(y)
10 | var_nm_pca <- paste0(rlang::quo_name(var), "_pca_scores")
11 | slice_by <- rlang::enquo(slice_by)
12 |
13 | if (slice) {
14 | spc_tbl <- spc_tbl %>% dplyr::group_by(!!slice_by) %>% dplyr::slice(1L)
15 | }
16 |
17 | sample_id <- spc_tbl %>% dplyr::pull(!!slice_by)
18 |
19 | # Pull the list of preprocessed spectra data.tables into one data.table
20 | spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!var))
21 |
22 | # Perform a principal component analysis
23 | spc_pca <- stats::prcomp(spc, scale = scale, center = center)
24 |
25 | # Extract PCA scores for selected principal components
26 | spc_pca_scores <- dplyr::as_data_frame(spc_pca$x[, select_comps])
27 | # Convert data frame of scores to list column
28 | spc_pca_scores <- stats::setNames(
29 | split(spc_pca_scores, seq_len(nrow(spc_pca_scores))),
30 | sample_id)
31 | ncomp <- ncol(spc_pca$rotation)
32 |
33 | # Add list-column with PCA scores to the spectral tibble object
34 | spc_tbl_pca <- tibble::add_column(spc_tbl, !!var_nm_pca := spc_pca_scores)
35 | if (!is.null(.unnest)) {
36 | # Convert variable name of new column to symbol
37 | var_sym_pca <- rlang::sym(var_nm_pca)
38 | spc_tbl_pca <- spc_tbl_pca %>% tidyr::unnest(!!var_sym_pca)
39 | }
40 | spc_tbl_pca
41 | }
42 |
43 | #' @noRd
44 | pca_append_importance <- function(spc_tbl, y = "spc_pre",
45 | slice = TRUE, slice_by = "sample_id",
46 | select_comps = 1:2,
47 | scale = TRUE, center = TRUE) {
48 | var <- rlang::enquo(y)
49 | slice_by <- rlang::enquo(slice_by)
50 |
51 | if (slice) {
52 | spc_tbl <- spc_tbl %>% dplyr::group_by(!!slice_by) %>% dplyr::slice(1L)
53 | }
54 |
55 | # Pull the list of preprocessed spectra data.tables into one data.table
56 | spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!var))
57 |
58 | # Perform a principal component analysis
59 | spc_pca <- stats::prcomp(spc, scale = scale, center = center)
60 | ncomp <- ncol(spc_pca$rotation)
61 |
62 | # // pb 20180509: broom::tidy(spc_pca) returns error; this is a bug in
63 | # broom:::tidy.prcomp(); does not work if data table without row names
64 | # Extract variance explained
65 | importance_measures <- c("sd", "var_prop", "var_cum")
66 | # "sd" := "Standard deviation"; "var_prop" := "Proportion of Variance";
67 | # "var_cum" := "Cumulative Proportion"
68 | spc_pca_varexpl <- broom::fix_data_frame(
69 | t(summary(spc_pca)$importance),
70 | newnames = importance_measures, newcol = "PC") %>%
71 | tibble::add_column(ncomp = 1:ncomp, .before = 1) %>%
72 | tibble::as_tibble()
73 |
74 | # Return list of spectral tibble with pc scores attached, and
75 | # the PCA importance measures in a tidy data frame
76 | spc_pca_varexpl
77 | }
78 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # simplerspec 0.2.1
4 |
5 | - `fit_pls()`: Fixed the join of predicted vs. measured values when choosing
6 | evaluation with cross-validation; now predictions and model evaluation
7 | statistics are reported at best `ncomp`. Instead of an `dplyr::inner_join()`,
8 | all cross-validated predictions were done with `dplyr::anti_join()`,
9 | specifically when using `evaluation_method == "resampling"` together with
10 | `tuning_method == "resampling"`. This resulted in predictions incorrectly
11 | being aggregated for all tested but not best ncomp (calculated in caret).
12 | Thus, also the cross-validation metrics were not correctly reported what
13 | should have been the case at optimal `ncomp` derived based on resampling and
14 | model tuning. The fix now correctly does an inner join, so that only the
15 | values at best `ncomp` are extracted and used for the evaluation statistics.
16 | This is also shown on the resulting plot outputs.
17 |
18 | # simplerspec 0.2.0
19 |
20 | - Add new example data set `soilspec_yamsys`.
21 | - `gather_spc()`: Consolidate the documentation with details on how data are matched from list and gathered into a spectra tibble.
22 |
23 | # simplerspec 0.1.0.9001
24 |
25 | * `resample_spc()` now supports flexible spectra and x-axis types as inputs. Its
26 | interface has been carefully augmented without breaking previous
27 | functionality ([#9](https://github.com/philipp-baumann/simplerspec/issues/9)
28 | * New argument `column_in` specifies the string or name (unquoting support)
29 | of the input column that contains the list of spectra. The following
30 | spectrum types, which are automatically matched against the list-column that
31 | contains the corresponding x-unit value vectors, are currently supported:
32 | `spc` (raw or unprocessed spectra), `spc_rs` (resampled spectra),
33 | `spc_mean` (mean spectra), `spc_nocomp` (spectra prior atmospheric
34 | compensation), `sc_sm` (single channel sample spectra), `sc_rf` (single
35 | channel reference spectra), `spc_pre` (preprocessed spectra).
36 | * New argument `interpol_method` specifying the interpolation method is
37 | introduced. Default is `"linear"` to achieve identical results with both
38 | prospectr v0.1.0 and v0.2.0. The current CRAN prospectr v0.2.0 has changed
39 | the default of `interpol` to `"spline"`. The previous `resample_spc()`
40 | unfortunatelty did not explicitly state the method internally, and relied
41 | on the default instead. The measures taken ensure downward compatibility of
42 | `resample_spc()` with previous versions of prospectr and simplerspec.
43 | * The arguments gain more defensive checks inside the function (supplied types
44 | and presence of objects in spectra).
45 | * The function components and the help are updated accordingly. Clearer
46 | vocabulary to describe the functionality and more consistent terminology for
47 | physical quantities and R objects are used.
48 |
49 | * Add UTF-8 support to DESCRIPTION because roxygen2 version 7.1.0 requires it.
50 |
51 |
52 | # simplerspec 0.1.0.9000
53 |
54 | * Start using Kirill's `{fledge}` for tracking and communicating the simplerspec
55 | development process in `NEWS`.
56 |
57 |
58 | # simplerspec 0.1.0
59 |
60 | * Added a `NEWS.md` file to track changes to the package
61 |
62 | # simplerspec 0.1.0.1
63 |
64 | * `read_opus_bin_univ()`: Add support for Bruker files that have undefined `PLF` value (`:= NULL`)
65 |
--------------------------------------------------------------------------------
/R/select-ref-spectra.R:
--------------------------------------------------------------------------------
1 | # Quick fix implementation of select_ref_samples using the tibble framework ----
2 |
3 | # Perform sampling for selection of reference samples based on spectral PCA ----
4 | #' @title Select a set of reference spectra to be measured by reference analysis
5 | #' methods
6 | #' @description Select a set of calibration spectra to develop spectral models.
7 | #' Samples in this list will be analyzed using laboratory reference methods.
8 | #' @param spc_tbl Spectra as tibble objects that contain preprocessed spectra
9 | #' @param ratio_ref Ratio of desired reference samples to total sample number
10 | #' @param pc Number of principal components (numeric). If pc < 1, the number
11 | #' of principal components kept corresponds to the number of components
12 | #' explaining at least (pc * 100) percent of the total variance.
13 | #' @param print logical expression whether a plot (ggplot2) of sample selection
14 | #' for reference analysis is shown in PCA space
15 | #' (\code{TRUE} or \code{FALSE}).
16 | #' @usage select_ref_spc(spc_tbl, ratio_ref, pc, print = TRUE)
17 | #' @export
18 | select_ref_spc <- function(spc_tbl, ratio_ref = 0.15, pc = 2,
19 | print = TRUE) {
20 |
21 | # Avoid `R CMD check` NOTE: `no visible binding for global variable ...`
22 | PC1 <- PC2 <- type <- NULL
23 |
24 | if (tibble::is_tibble(spc_tbl)) {
25 | # Slice based on sample_id if spectral data is in tibble class
26 | spc_tbl <- dplyr::group_by(spc_tbl, !!rlang::sym("sample_id")) %>%
27 | dplyr::slice(1L)
28 | # Bind list of data.tables in list-column spc_pre to one data table
29 | # containing spectral data
30 | spc_pre <- as.matrix(data.table::rbindlist(spc_tbl$spc_pre))
31 | }
32 | # Perform Kennard-Stone calibration sampling ---------------------------------
33 | sel <- prospectr::kenStone(X = spc_pre,
34 | k = round(ratio_ref * nrow(spc_pre)), pc = pc)
35 | # Select spectra tibble of reference samples based on row indices
36 | spc_ref <- spc_tbl[sel$model, ]
37 | # Select spectra tibble of prediction samples based on row indices
38 | spc_pred <- spc_tbl[-sel$model, ]
39 |
40 | # Prepare data for ggplot graphs of reference and prediction sample PC score
41 | # plots (PC1 and PC2) --------------------------------------------------------
42 | sel_df_ref <- data.frame(sel$pc[sel$model, 1:2])
43 | sel_df_ref$type <- as.factor(
44 | rep("reference analysis", nrow(sel_df_ref))
45 | )
46 | sel_df_pred <- data.frame(sel$pc[-sel$model, 1:2])
47 | # Create type column for visually differentiate reference and prediction
48 | # samples
49 | sel_df_pred$type <- as.factor(
50 | rep("model prediction", nrow(sel_df_pred)))
51 | # Bind rows of reference and prediction PC scores data frames
52 | sel_df <- rbind(sel_df_ref, sel_df_pred)
53 | # Compute ratio needed to make the figure square
54 | ratio <- with(sel_df, diff(range(PC1)) / diff(range(PC2)))
55 | # Create spectra PC score plots ----------------------------------------------
56 | p_pca <- ggplot2::ggplot(data = sel_df) +
57 | ggplot2::geom_point(
58 | ggplot2::aes(x = PC1, y = PC2, shape = type), size = 4) +
59 | ggplot2::coord_fixed(ratio = 1) +
60 | ggplot2::scale_shape_manual(values=c(19, 1)) +
61 | ggplot2::scale_colour_manual(values=c("black", "red")) +
62 | ggplot2::theme_bw() +
63 | ggplot2::theme(legend.title = ggplot2::element_blank())
64 | # Print reference and prediction samples in PC1 and PC2
65 | if (print == TRUE) {
66 | p_pca
67 | }
68 | # Return spectral tibbles for reference spectra (spc_ref),
69 | # prediction spectra (spc_pr) and ggplot object of score plots (p_pca)
70 | list(
71 | spc_ref = spc_ref,
72 | spc_pred = spc_pred,
73 | p_pca = p_pca,
74 | pc_scores = sel$pc
75 | )
76 | }
77 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/R/slice-spc.R:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | ## Functions to slice spectra into a set of x axis ranges. A subset of the
3 | ## spectra stored in a list-column of a spectral tibble will be selected and
4 | ## the x unit axis vector will be reduced accordingly
5 | ################################################################################
6 |
7 | # Define a set of helper functions ---------------------------------------------
8 |
9 | # Helper function to find indices of minimum distances between two vectors
10 | get_idx <- function(x, x_cut) {
11 | sapply(x, function(x) which.min(abs(x_cut - x)))
12 | }
13 |
14 | # Returns a a list of wavenumber position index sequences (type integer) from
15 | # a vector or list of upper and lower slicing boundary values in spectra x
16 | # units (xvalues_cut)
17 | slice_xvalues_idxseq <- function(spc_l, xvalues, xvalues_cut) {
18 | idx_lim <- lapply(seq_along(xvalues_cut),
19 | function(i) get_idx(x = xvalues, x_cut = xvalues_cut[i]))
20 | Map(function(from, to) seq(from, to), idx_lim[[1]], idx_lim[[2]])
21 | }
22 |
23 | # Use helper functions in final spectrum x unit slicing function ---------------
24 |
25 | #' @title Slice spectra into defined x-axis ranges
26 | #' @description Slice spectra contained in list-column of spectral tibble
27 | #' (data frame). A list of x-axis value ranges can be specified. Spectra are
28 | #' cut based on these ranges.
29 | #' @param spc_tbl Spectral data in a tibble object (classes "tibble_df", "tbl"
30 | #' and "data.frame"). The spectra tibble is expected to contain at least
31 | #' the column \code{spc} (list-column with spectral matrices stored in a list)
32 | #' and \code{wavenumbers} or \code{wavelengths} (list-column that contains list
33 | #' of x-axis values).
34 | #' @param xunit_lcol Character vector that specifies column name where x-axis
35 | #' axis units are stored within \code{spc_tbl}. Default is \code{"wavenumber"}.
36 | #' @param spc_lcol Character vector that specifies which column (list-column)
37 | #' contains spectra to be sliced. Default is \code{"spc"}.
38 | #' @param xvalues_cut List of numeric vectors that contains upper and lower bounds of respective regions to keep in spectra. The spectral regions outside
39 | #' the \code{xvalues_cut} intervals will be cut out in the output spectra.
40 | #' @return Spectral tibble (data frame with list-columns) with sliced x-axis
41 | #' column and spectral column. Both the x-axis list-column and the spectral
42 | #' tibble list-column only contain data specified within the \code{xvalues_cut}
43 | #' argument (list of numeric vectors).
44 | #' @export
45 | slice_xvalues <- function(spc_tbl, xunit_lcol = "wavenumbers", spc_lcol = "spc",
46 | xvalues_cut = NULL) {
47 | if (is.atomic(xvalues_cut)) xvalues_cut <- list(xvalues_cut)
48 | if (!is.null(xvalues_cut)) {
49 | spc_l <- spc_tbl[[spc_lcol]]
50 | xvalues <- spc_tbl[[xunit_lcol]]
51 | # Match spectral indices for columns based on xvalue ranges
52 | idxseq <- lapply(seq_along(xvalues_cut),
53 | function(i) slice_xvalues_idxseq(
54 | spc_l = spc_l, xvalues = xvalues, xvalues_cut = xvalues_cut[[i]]))
55 | idxseq_c <- Reduce(function(x,y) Map(c, x, y), idxseq)
56 | if (any(sapply(spc_l, data.table::is.data.table))) {
57 | spc_tbl[[spc_lcol]] <- Map(function(x, idx) x[, idx, with = FALSE],
58 | spc_l, idxseq_c) # idx is not a column name of any data.table
59 | } else {
60 | spc_tbl[[spc_lcol]] <- Map(function(x, idx) x[, idx], spc_l, idxseq_c)
61 | }
62 | spc_tbl[[xunit_lcol]] <- Map(function(x, idx) x[idx], xvalues, idxseq_c)
63 | }
64 | spc_tbl
65 | }
66 |
67 | # Test spectra slicing function:
68 | # tbl_sliced <- slice_xvalues(spc_tbl = spc_tbl,
69 | # xvalues_cut = list(c(1500, 1024), c(1004, 998)))
70 |
--------------------------------------------------------------------------------
/.github/workflows/tic.yml:
--------------------------------------------------------------------------------
1 | ## tic GitHub Actions template: linux-macos-windows-deploy
2 | ## revision date: 2022-11-23
3 | on:
4 | workflow_dispatch:
5 | push:
6 | branches:
7 | - main
8 | - master
9 | - cran-*
10 | pull_request:
11 | branches:
12 | - main
13 | - master
14 | # for now, CRON jobs only run on the default branch of the repo (i.e. usually on master)
15 | schedule:
16 | # * is a special character in YAML so you have to quote this string
17 | - cron: "0 4 * * *"
18 |
19 | name: tic
20 |
21 | jobs:
22 | all:
23 | runs-on: ${{ matrix.config.os }}
24 |
25 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
26 |
27 | strategy:
28 | fail-fast: false
29 | matrix:
30 | config:
31 | # use a different tic template type if you do not want to build on all listed platforms
32 | - { os: windows-latest, r: "release" }
33 | - { os: macOS-latest, r: "release", pkgdown: "true", latex: "true" }
34 | - { os: ubuntu-latest, r: "devel" }
35 | - { os: ubuntu-latest, r: "release" }
36 |
37 | env:
38 | # make sure to run `tic::use_ghactions_deploy()` to set up deployment
39 | TIC_DEPLOY_KEY: ${{ secrets.TIC_DEPLOY_KEY }}
40 | # prevent rgl issues because no X11 display is available
41 | RGL_USE_NULL: true
42 | # if you use bookdown or blogdown, replace "PKGDOWN" by the respective
43 | # capitalized term. This also might need to be done in tic.R
44 | BUILD_PKGDOWN: ${{ matrix.config.pkgdown }}
45 | # use GITHUB_TOKEN from GitHub to workaround rate limits in {remotes}
46 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
47 |
48 | steps:
49 | - uses: actions/checkout@v3
50 |
51 | - uses: r-lib/actions/setup-r@v2
52 | with:
53 | r-version: ${{ matrix.config.r }}
54 | Ncpus: 4
55 |
56 | - uses: r-lib/actions/setup-tinytex@v2
57 | if: matrix.config.latex == 'true'
58 |
59 | - uses: r-lib/actions/setup-pandoc@v2
60 |
61 | # set date/week for use in cache creation
62 | # https://github.community/t5/GitHub-Actions/How-to-set-and-access-a-Workflow-variable/m-p/42970
63 | # - cache R packages daily
64 | - name: "[Cache] Prepare daily timestamp for cache"
65 | if: runner.os != 'Windows'
66 | id: date
67 | run: echo "date=$(date '+%d-%m')" >> $GITHUB_OUTPUT
68 |
69 | - name: "[Cache] Cache R packages"
70 | if: runner.os != 'Windows'
71 | uses: pat-s/always-upload-cache@v3
72 | with:
73 | path: ${{ env.R_LIBS_USER }}
74 | key: ${{ runner.os }}-r-${{ matrix.config.r }}-${{steps.date.outputs.date}}
75 | restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-${{steps.date.outputs.date}}
76 |
77 | - name: "[Stage] Install pak"
78 | run: Rscript -e "install.packages('pak', repos = 'https://r-lib.github.io/p/pak/stable')"
79 |
80 | - name: "[Stage] Install"
81 | run: Rscript -e "if (grepl('Ubuntu', Sys.info()[['version']]) && !grepl('Under development', R.version[['status']])) {options(repos = c(CRAN = sprintf('https://packagemanager.rstudio.com/all/__linux__/%s/latest', system('lsb_release -cs', intern = TRUE))))}else{options(repos = c(CRAN = 'cloud.r-project.org'))}; pak::pkg_install('ropensci/tic')" -e "print(tic::dsl_load())" -e "tic::prepare_all_stages()" -e "tic::before_install()" -e "tic::install()"
82 |
83 | - name: "[Stage] Script"
84 | run: Rscript -e 'tic::script()'
85 |
86 | - name: "[Stage] After Success"
87 | run: Rscript -e "tic::after_success()"
88 |
89 | - name: "[Stage] Upload R CMD check artifacts"
90 | if: failure()
91 | uses: actions/upload-artifact@v2
92 | with:
93 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results
94 | path: check
95 | - name: "[Stage] Before Deploy"
96 | run: |
97 | Rscript -e "tic::before_deploy()"
98 |
99 | - name: "[Stage] Deploy"
100 | run: Rscript -e "tic::deploy()"
101 |
102 | - name: "[Stage] After Deploy"
103 | run: Rscript -e "tic::after_deploy()"
104 |
--------------------------------------------------------------------------------
/man/fit_rf.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/pls-modeling.R
3 | \name{fit_rf}
4 | \alias{fit_rf}
5 | \title{Calibration sampling, and random forest model tuning and evaluation}
6 | \usage{
7 | fit_rf(
8 | spec_chem,
9 | response,
10 | variable = NULL,
11 | evaluation_method = "test_set",
12 | validation = NULL,
13 | split_method = "ken_stone",
14 | ratio_val,
15 | ken_sto_pc = 2,
16 | pc = NULL,
17 | invert = TRUE,
18 | tuning_method = "resampling",
19 | resampling_seed = 123,
20 | cv = NULL,
21 | ntree_max = 500,
22 | print = TRUE,
23 | env = parent.frame()
24 | )
25 | }
26 | \arguments{
27 | \item{spec_chem}{Tibble that contains spectra, metadata and chemical
28 | reference as list-columns. The tibble to be supplied to \code{spec_chem} can
29 | be generated by the \verb{join_chem_spc() function}}
30 |
31 | \item{response}{Response variable as symbol or name
32 | (without quotes, no character string). The provided response symbol needs to be
33 | a column name in the \code{spec_chem} tibble.}
34 |
35 | \item{variable}{Depreciated and replaced by \code{response}}
36 |
37 | \item{evaluation_method}{Character string stating evaluation method.
38 | Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"}
39 | will split the data into a calibration (training) and validation (test) set,
40 | and evaluate the final model by predicting on the validation set.
41 | If \code{"resampling"}, the finally selected model will be evaluated based
42 | on the cross-validation hold-out predictions.}
43 |
44 | \item{validation}{Depreciated and replaced by \code{evaluation_method}.
45 | Default is \code{TRUE}.}
46 |
47 | \item{split_method}{Method how to to split the data into a independent test
48 | set. Default is \code{"ken_sto"}, which will select samples for calibration
49 | based on Kennard-Stone sampling algorithm of preprocessed spectra. The
50 | proportion of validation to the total number of samples can be specified
51 | in the argument \code{ratio_val}.
52 | \code{split_method = "random"} will create a single random split.}
53 |
54 | \item{ratio_val}{Ratio of validation (test) samples to
55 | total number of samples (calibration (training) and validation (test)).}
56 |
57 | \item{ken_sto_pc}{Number of component used
58 | for calculating mahalanobsis distance on PCA scores for computing
59 | Kennard-Stone algorithm.
60 | Default is \code{ken_sto_pc = 2}, which will use the first two PCA
61 | components.}
62 |
63 | \item{pc}{Depreciated; renamed argument is \code{ken_sto_pc}.}
64 |
65 | \item{invert}{Logical}
66 |
67 | \item{tuning_method}{Character specifying tuning method. Tuning method
68 | affects how caret selects a final tuning value set from a list of candidate
69 | values. Possible values are \code{"resampling"}, which will use a
70 | specified resampling method such as repeated k-fold cross-validation (see
71 | argument \code{resampling_method}) and the generated performance profile
72 | based on the hold-out predictions to decide on the final tuning values
73 | that lead to optimal model performance. The value \code{"none"} will force
74 | caret to compute a final model for a predefined canditate PLS tuning
75 | parameter number of PLS components. In this case, the value
76 | supplied by \code{ncomp_fixed}` is used to set model complexity at
77 | a fixed number of components.}
78 |
79 | \item{resampling_seed}{Random seed (integer) that will be used for generating
80 | resampling indices, which will be supplied to \code{caret::trainControl}.
81 | This makes sure that modeling results are constant when re-fitting.
82 | Default is \code{resampling_seed = 123}.}
83 |
84 | \item{cv}{Depreciated. Use \code{resampling_method} instead.}
85 |
86 | \item{ntree_max}{Maximum random forest trees
87 | by caret::train. Caret will aggregate a performance profile using resampling
88 | for an integer sequence from 1 to \code{ntree_max} trees.}
89 |
90 | \item{print}{Logical expression whether model evaluation graphs shall be
91 | printed}
92 |
93 | \item{env}{Environment where function is evaluated. Default is
94 | \code{parent.frame}.}
95 | }
96 | \description{
97 | Perform calibration sampling and use selected
98 | calibration set for model tuning
99 | }
100 |
--------------------------------------------------------------------------------
/man/plot_spc_ext.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plot-spc-extended.R
3 | \name{plot_spc_ext}
4 | \alias{plot_spc_ext}
5 | \title{ggplot2 wrapper for extended spectra plotting}
6 | \usage{
7 | plot_spc_ext(
8 | spc_tbl,
9 | spc_tbl_l = NULL,
10 | lcols_spc = "spc",
11 | lcol_measure = NULL,
12 | lcol_measure_col_palette = "Spectral",
13 | lcol_measure_col_direction = -1,
14 | spc_id = "unique_id",
15 | group_id = "sample_id",
16 | group_id_order = TRUE,
17 | group_color = TRUE,
18 | group_color_palette = NULL,
19 | group_panel = TRUE,
20 | group_legend = FALSE,
21 | ncol = NULL,
22 | relabel_spc = TRUE,
23 | ylab = "Spectrum value",
24 | alpha = 0.5,
25 | line_width = 0.2,
26 | ...
27 | )
28 | }
29 | \arguments{
30 | \item{spc_tbl}{Tibble data frame containing spectra, x-axis values, metadata
31 | and eventual measured variables as list-columns.}
32 |
33 | \item{spc_tbl_l}{List of spectral tibbles (data frames). Default is
34 | \code{NULL} (argument is not used).}
35 |
36 | \item{lcols_spc}{Character vector of spectral list-columns to be extracted.
37 | Default is \code{"spc"} (raw spectra).}
38 |
39 | \item{lcol_measure}{Character vector of length 1 denoting the column name
40 | of the measure columns. This argument is optional. Default is \code{NULL},
41 | which does not extract an additional measure column.}
42 |
43 | \item{lcol_measure_col_palette}{Palette value supplied to
44 | \code{ggplot::scale_colour_brewer()}. Default is \code{"Spectral"}, but you can set
45 | it to the default argument \code{1} (will use
46 | \code{scale_colour_brewer(..., palette = 1)}).}
47 |
48 | \item{lcol_measure_col_direction}{Sets the the order of colours in the scale
49 | that is based on a measure column. Default is \code{-1} which reverses the
50 | scale. Argument is passed on to the function \code{ggplot2::sclae_colour_brewer()}
51 | as argument \code{direction}.}
52 |
53 | \item{spc_id}{Character vector denoting column name for a unique spectrum ID.
54 | Default is \code{"unique_id"}.}
55 |
56 | \item{group_id}{Character vector denoting column name for the spectrum group
57 | ID. Default is \code{"sample_id"}. The group ID is used for
58 | plotting spectra by group (e.g. by using different colors or panels).}
59 |
60 | \item{group_id_order}{Logical that specifies whether the panel names
61 | derived from a numeric \code{group_id} column are reordered using ascending
62 | numbers. Default is \code{TRUE}.}
63 |
64 | \item{group_color}{Logical defining whether spectra are colored by the column
65 | specified by \code{group_id}.}
66 |
67 | \item{group_color_palette}{Character (1L) defining the diverging colour
68 | scales from colorbrewer.org; see \code{?scale_colour_brewer} for supported
69 | diverging colur types (\code{palette} argument).}
70 |
71 | \item{group_panel}{Logical defining whether spectra are arranged into panels
72 | by groups specified in \code{group_id}. Default is \code{TRUE}.}
73 |
74 | \item{group_legend}{Logical defining whether a legend for the \code{group_id}
75 | is plotted. Default is \code{FALSE}.}
76 |
77 | \item{ncol}{Integer vector of length 1. Defines number of columns when
78 | plotting panels (facets). Default is \code{NULL} (argument not used).}
79 |
80 | \item{relabel_spc}{Logical defining whether panels are relabeled with custom
81 | names for spectra types. Default is TRUE. When \code{TRUE}, arguments
82 | from \code{relabel_spc_types} can be passed to \code{plot_spc_ext}
83 | (supported via the \code{...} (ellipsis) argument)}
84 |
85 | \item{ylab}{Character vector or vector of type \code{"expression"} created by
86 | mathematical expression created by \code{expression}. Custom annotation for
87 | y-axis of spectra}
88 |
89 | \item{alpha}{Integer of length 1, from 0 to 1. Defines transparency of
90 | spectral lines. Default is \code{0.5} (0 is completely transparent and
91 | 1 is no transparency).}
92 |
93 | \item{line_width}{Numeric vector of length 1 specifying the width of the
94 | spectral lines. Default is \code{0.2}.}
95 |
96 | \item{...}{Further arguments to be passed to \code{plot_spc_ext}. Currently,
97 | arguments of \code{relabel_spc_types} are supported.}
98 | }
99 | \value{
100 | Object of class \code{"ggplot"} (ggplot2 graph).
101 | }
102 | \description{
103 | \code{plot_spc_ext} is a custom plotting function developed
104 | within the simplerspec framework. Returns plots based on ggplot2
105 | (class "ggplot"). Different spectra types such as raw or preprocessed spectra
106 | and groups can be differentiated by different colors or by using panels
107 | (so called facets). Additionally, spectra can be colored based on an
108 | additional measure variable, e.g. determined by chemical reference analysis.
109 | }
110 |
--------------------------------------------------------------------------------
/man/resample_spc.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/resample-spc.R
3 | \name{resample_spc}
4 | \alias{resample_spc}
5 | \title{Resample spectra in list-column to new x-axis interval}
6 | \usage{
7 | resample_spc(
8 | spc_tbl,
9 | column_in = "spc",
10 | x_unit = c("wavenumber", "wavelength"),
11 | wn_lower = 500,
12 | wn_upper = 4000,
13 | wn_interval = 2,
14 | wl_lower = 350,
15 | wl_upper = 2500,
16 | wl_interval = 1,
17 | interpol_method = c("linear", "spline")
18 | )
19 | }
20 | \arguments{
21 | \item{spc_tbl}{Spectra data embedded in a tibble object (classes
22 | \verb{"tbl_df", "tbl", "data.frame"}). The spectra tibble needs to contain at
23 | least of one of the the spectra columns \code{spc}, \code{spc_rs}, \code{spc_mean},
24 | \code{spc_nocomp}, \code{sc_sm}, \code{sc_rf}, or \code{spc_pre} (list-columns with spectral
25 | \code{data.table}s), and \code{wavenumbers} or \code{wavelengths} (list-column with vectors
26 | of x-axis values corresponding to each spectrum). The help section \emph{"Matching
27 | spectrum type and corresponding x-axis type"} describes the spectra types
28 | and corresponding x-axis types.}
29 |
30 | \item{column_in}{Character vector of length 1L or symbol/name
31 | specifying the name of list-column that contains the spectra to be resampled.}
32 |
33 | \item{x_unit}{Character vector of length 1L specifying the measurement unit
34 | of the x-axis values (list-column) of the input spectra in \code{spc_tbl}.
35 | Possible values are \code{"wavenumber"} (default) or \code{"wavelength"}. Wavenumber
36 | is a convenient unit of frequency in the mid-infrared spectral range,
37 | where wavelength is often used as spatial period for the visible and
38 | near-infrared range.}
39 |
40 | \item{wn_lower}{Numeric value of lowest wavenumber. This argument will only
41 | be used if \code{x_unit = "wavenumber"}. The value serves as starting value for
42 | the new wavenumber sequence that the spectra will be resampled upon. Default
43 | value is 500 (i.e., in reciprocal centimeters).}
44 |
45 | \item{wn_upper}{Numeric value of highest wavenumber. This argument will only
46 | be used if \verb{x_unit = "wavenumber}. The value will be used as last value of
47 | the new wavenumber sequence that the spectra will be resampled upon. Default
48 | value is 4000 (i.e., in reciprocal centimeters).}
49 |
50 | \item{wn_interval}{Numeric value of the wavenumber increment for the new
51 | wavenumber sequence that the spectra will be resampled upon. Default value
52 | is 2 (i.e., in reciprocal centimeters).}
53 |
54 | \item{wl_lower}{Numeric value of lowest wavelength. This argument will only
55 | be used if \code{x_unit = "wavelength"}. The value serves as starting value of
56 | the new wavenumber sequence that the spectra will be resampled upon.
57 | Default value is 350 (i.e. in nanometers).}
58 |
59 | \item{wl_upper}{Numeric value of highest wavelength. This argument will only
60 | be used if \code{x_unit = "wavelength"}. The value will be used as last value of
61 | the new wavenumber sequence that the spectra will be resampled upon. Default
62 | value is 2500 (i.e., in nanometers).}
63 |
64 | \item{wl_interval}{Numeric value of the wavelength increment for the new
65 | wavenumber sequence that the spectra will be resampled upon. This argument
66 | will only be used if \code{x_unit = "wavelength"}. Default value is 1 (i.e., in
67 | nanometers).}
68 |
69 | \item{interpol_method}{Character of \code{"linear"} (default) or \code{"spline"} with
70 | the interpolation method. \code{"spline"} uses a cubic spline to interpolate the
71 | input spectra at given x-axis values to new equispaced x-axis intervals.}
72 | }
73 | \value{
74 | A spectra tibble (\code{spc_tbl}) containing two added list-columns:
75 | \itemize{
76 | \item \verb{spc_rs:} Resampled spectra as list of \code{data.table}s
77 | \item \code{wavenumbers_rs} or \code{wavelengths_rs}: Resampled x-axis values as list of
78 | numeric vectors
79 | }
80 | }
81 | \description{
82 | Resamples (interpolates) different spectra types with
83 | corresponding x-axis values that are both stored in list-columns of a spectra
84 | tibble. A spectra tibble hosts spectra, x-axis vectors, metadata, and
85 | further linked data with standardized naming conventions. Data input for
86 | resampling can for example be generated with \code{simplerspec::gather_spc()}.
87 | Resampling is a key harmonizing step to process and later model spectra
88 | measured at different resolutions and spectral ranges (i.e., different
89 | spectrometer devices and/or measurement settings).
90 | }
91 | \section{Matching spectrum type and corresponding x-axis type}{
92 |
93 | The combinations of input spectrum types (\code{column_in}) and
94 | corresponding x-axis types are generated from a simple lookup list. The
95 | following key-value(s) pairs can be matched at given key, which is the column
96 | name from \code{column_in} containing the spectra.
97 | \itemize{
98 | \item \code{"spc"} : \code{"wavenumbers"} or \code{"wavelengths"} (raw spectra)
99 | \item \code{"spc_rs"} : \code{"wavenumbers_rs"} or \code{"wavelengths_rs"}) (resampled spectra)
100 | \item \code{"spc_mean"} : \code{"wavenumbers_rs"} or \code{"wavelengths_rs"} (mean spectra)
101 | \item \code{"spc_nocomp"} \code{"wavenumbers"} or \code{"wavelengths"} (spectra prior
102 | atmospheric compensation)
103 | \item \code{"sc_sm" : c("wavenumbers_sc_sm", "wavelengths_sc_sm")} (single channel
104 | sample spectra)
105 | \item \code{"sc_rf" : c("wavenumbers_sc_rf", "wavelengths_sc_rf")} (single channel
106 | reference spectra)
107 | \item \code{"spc_pre" : "xvalues_pre"} (preprocessed spectra)
108 | }
109 | }
110 |
111 |
--------------------------------------------------------------------------------
/man/fit_pls.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/pls-modeling.R
3 | \name{fit_pls}
4 | \alias{fit_pls}
5 | \alias{pls_ken_stone}
6 | \title{Calibration sampling, model tuning, and PLS regression}
7 | \usage{
8 | fit_pls(
9 | spec_chem,
10 | response,
11 | variable = NULL,
12 | center = TRUE,
13 | scale = TRUE,
14 | evaluation_method = "test_set",
15 | validation = TRUE,
16 | split_method = "ken_stone",
17 | ratio_val = 1/3,
18 | ken_sto_pc = 2,
19 | pc,
20 | invert = TRUE,
21 | tuning_method = "resampling",
22 | resampling_method = "kfold_cv",
23 | cv = NULL,
24 | resampling_seed = 123,
25 | pls_ncomp_max = 20,
26 | ncomp_fixed = 5,
27 | print = TRUE,
28 | env = parent.frame()
29 | )
30 |
31 | pls_ken_stone(
32 | spec_chem,
33 | response,
34 | variable = NULL,
35 | center = TRUE,
36 | scale = TRUE,
37 | evaluation_method = "test_set",
38 | validation = TRUE,
39 | split_method = "ken_stone",
40 | ratio_val = 1/3,
41 | ken_sto_pc = 2,
42 | pc,
43 | invert = TRUE,
44 | tuning_method = "resampling",
45 | resampling_method = "kfold_cv",
46 | cv = NULL,
47 | resampling_seed = 123,
48 | pls_ncomp_max = 20,
49 | ncomp_fixed = 5,
50 | print = TRUE,
51 | env = parent.frame()
52 | )
53 | }
54 | \arguments{
55 | \item{spec_chem}{Tibble that contains spectra, metadata and chemical
56 | reference as list-columns. The tibble to be supplied to \code{spec_chem} can
57 | be generated by the \verb{join_chem_spc() function}}
58 |
59 | \item{response}{Response variable as symbol or name
60 | (without quotes, no character string). The provided response symbol needs to be
61 | a column name in the \code{spec_chem} tibble.}
62 |
63 | \item{variable}{Depreciated and replaced by \code{response}}
64 |
65 | \item{center}{Logical whether to perform mean centering of each spectrum column
66 | (e.g. wavenumber or wavelength) after common spectrum preprocessing. Default is
67 | \code{center = TRUE}}
68 |
69 | \item{scale}{Logical whether to perform standard deviation scaling
70 | of each spectrum column (e.g. wavenumber or wavelength) after common
71 | spectrum preprocessing. Default is \code{scale = TRUE}}
72 |
73 | \item{evaluation_method}{Character string stating evaluation method.
74 | Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"}
75 | will split the data into a calibration (training) and validation (test) set,
76 | and evaluate the final model by predicting on the validation set.
77 | If \code{"resampling"}, the finally selected model will be evaluated based
78 | on the cross-validation hold-out predictions.}
79 |
80 | \item{validation}{Depreciated and replaced by \code{evaluation_method}.
81 | Default is \code{TRUE}.}
82 |
83 | \item{split_method}{Method how to to split the data into a independent test
84 | set. Default is \code{"ken_sto"}, which will select samples for calibration
85 | based on Kennard-Stone sampling algorithm of preprocessed spectra. The
86 | proportion of validation to the total number of samples can be specified
87 | in the argument \code{ratio_val}.
88 | \code{split_method = "random"} will create a single random split.}
89 |
90 | \item{ratio_val}{Ratio of validation (test) samples to
91 | total number of samples (calibration (training) and validation (test)).}
92 |
93 | \item{ken_sto_pc}{Number of component used
94 | for calculating mahalanobsis distance on PCA scores for computing
95 | Kennard-Stone algorithm.
96 | Default is \code{ken_sto_pc = 2}, which will use the first two PCA
97 | components.}
98 |
99 | \item{pc}{Depreciated; renamed argument is \code{ken_sto_pc}.}
100 |
101 | \item{invert}{Logical}
102 |
103 | \item{tuning_method}{Character specifying tuning method. Tuning method
104 | affects how caret selects a final tuning value set from a list of candidate
105 | values. Possible values are \code{"resampling"}, which will use a
106 | specified resampling method such as repeated k-fold cross-validation (see
107 | argument \code{resampling_method}) and the generated performance profile
108 | based on the hold-out predictions to decide on the final tuning values
109 | that lead to optimal model performance. The value \code{"none"} will force
110 | caret to compute a final model for a predefined canditate PLS tuning
111 | parameter number of PLS components. In this case, the value
112 | supplied by \code{ncomp_fixed}` is used to set model complexity at
113 | a fixed number of components.}
114 |
115 | \item{resampling_method}{Character specifying resampling method. Currently,
116 | \code{"kfold_cv"} (default, performs 10-fold cross-validation),
117 | \code{"rep_kfold_cv"} (performs 5-times repeated 10-fold cross-validation),
118 | \code{"loocv"} (performs leave-one-out cross-validation), and \code{"none"}
119 | (if \code{resampling_method = "none"}) are supported.}
120 |
121 | \item{cv}{Depreciated. Use \code{resampling_method} instead.}
122 |
123 | \item{resampling_seed}{Random seed (integer) that will be used for generating
124 | resampling indices, which will be supplied to \code{caret::trainControl}.
125 | This makes sure that modeling results are constant when re-fitting.
126 | Default is \code{resampling_seed = 123}.}
127 |
128 | \item{pls_ncomp_max}{Maximum number of PLS components that are evaluated
129 | by caret::train. Caret will aggregate a performance profile using resampling
130 | for an integer sequence from 1 to \code{pls_ncomp_max}}
131 |
132 | \item{ncomp_fixed}{Integer of fixed number of PLS components. Will only be
133 | used when \code{tuning_method = "none"} and \code{resampling_method = "none"}
134 | are used.}
135 |
136 | \item{print}{Logical expression whether model evaluation graphs shall be
137 | printed}
138 |
139 | \item{env}{Environment where function is evaluated. Default is
140 | \code{parent.frame}.}
141 | }
142 | \description{
143 | Perform calibration sampling and use selected
144 | calibration set for model tuning
145 | }
146 |
--------------------------------------------------------------------------------
/man/gather_spc.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/gather-spc.R
3 | \name{gather_spc}
4 | \alias{gather_spc}
5 | \title{Gather measurements of different spectra types, corresponding
6 | x-axis values and metadata from nested list.}
7 | \usage{
8 | gather_spc(data, spc_types = "spc")
9 | }
10 | \arguments{
11 | \item{data}{Recursive list named with filename (\code{file_id}) at first level
12 | entries, where each element containing a sample measurement has nested
13 | metadata (\code{"metadata"}), spectra types (see \code{spc_types}), corresponding
14 | x-axis values (see section \emph{"Details on spectra data checks and matching"}).
15 | The \code{data} list is a structural convention to organize spectra and their
16 | metadata. It follows for example the list structure returned from the Bruker
17 | OPUS binary reader \code{simplerspec::read_opus_univ()}.}
18 |
19 | \item{spc_types}{Character vector with the spectra types to be extracted
20 | from \code{data} list and gathered into list-columns. The spectra type names need
21 | to exactly follow the naming conventions, and the element names and contents
22 | need to be present at the second list hierarchy of \code{data}. These values are
23 | allowed:
24 | \itemize{
25 | \item \code{"spc"} (default): final raw spectra after atmospheric compensation, if
26 | performed (named \code{AB} in Bruker OPUS software; results from referencing
27 | sample to reference single channel reflectance and transforming to
28 | absorbance).
29 | \item \code{"spc_nocomp"}: raw spectra without atmospheric correction
30 | \item \code{"sc_sm"}: Single channel reflectance spectra of the samples
31 | \item \code{"sc_rf"}: Single channel reflectance spectra of the reference (background
32 | spectra)
33 | \item \code{"ig_sm"}: Interferograms of the sample spectra (currently only spectra
34 | without x-axis list-columns are matched and returned)
35 | \item \code{"ig_rf"}: Interferograms of the reference spectra (currently only spectra
36 | without x-axis list-columns are matched and returned)
37 | }}
38 | }
39 | \value{
40 | Spectra tibble (\code{spc_tbl} with classes \code{"tbl_df"}, \code{"tbl"}, and
41 | \code{"data.frame"}) with the following (list-)columns:
42 | \itemize{
43 | \item \code{"unique_id"}: Character vector with unique measurement identifier, likely
44 | a string with file names in combination with date and time (extracted from
45 | each \code{"metadata"} data frame column).
46 | \item \code{"file_id"} : Character vector with file name including the extension
47 | (extracted from each \code{"metadata"} data frame column).
48 | \item \code{"sample_id"}: Character vector with sample identifier. For Bruker OPUS
49 | binary files, this corresponds to the file name without the file extension
50 | in integer increments of sample replicate measurements.
51 | \item One or multiple of \code{"spc"}, \code{"spc_nocomp"}, \code{"sc_sm"}, or \code{"sc_rf"}:
52 | List(s) of data.table's containing spectra type(s).
53 | \item One or multiple of \code{"wavenumbers"}, \code{"wavelengths"}, \code{"x_values"},
54 | \code{"wavenumbers_sc_sm"}, \code{"wavelengths_sc_sm"}, \code{"x_values_sc_sm"},
55 | \code{"wavenumbers_sc_rf"}, \code{"wavelengths_sc_rf"}, or \code{"x_values_sc_rf"}:
56 | List(s) of numeric vectors with matched x-axis values (see \emph{"Details on
57 | spectra data checks and matching"} below).
58 | }
59 | }
60 | \description{
61 | Gather spectra, corresponding x-axis values, and device and
62 | measurement metadata from a nested list into a spectra tibble, so that one
63 | row represents one spectral measurement. Spectra, x-axis values and metadata
64 | are mapped from the individual list elements (named after file name including
65 | the extension) and transformed into (list-)columns of a spectra tibble,
66 | which is an extended data frame. For each measurement, spectral data and
67 | metadata are combined into one row of the tidy data frame. In addition, the ID
68 | columns \code{unique_id}, \code{file_id}, and \code{sample_id} are extracted from
69 | \code{"metadata"} (data frame) list entries and returned as identifier columns of
70 | the spectra tibble. List-columns facilitate keeping related data together in
71 | a rectangular data structure. They can be manipulated easily during
72 | subsequent transformations, for example using the standardized functions of
73 | the simplerspec data processing pipeline.
74 | }
75 | \section{Details on spectra data checks and matching}{
76 |
77 | \code{gather_spc()} checks whether these conditions are met for each measurement
78 | in the list \code{data}:
79 | \enumerate{
80 | \item Make sure that the first level \code{data} elements are named (assumed to be
81 | the file name the data originate from), and remove missing measurements with
82 | an informative message.
83 | \item Remove any duplicated file names and raise a message if there are
84 | name duplicates at first level.
85 | \item Check whether \code{spc_types} inputs are supported (see argument \code{spc_types})
86 | and present at the second level of the \code{data} list. If not, remove
87 | all data elements for incomplete spectral measurements.
88 | \item Match spectra types and possible corresponding x-axis types from
89 | a lookup list. For each selected spectrum type (left), at least one of
90 | the element names of the x-axis type (right) needs to be present for each
91 | measurement in the list \code{data}:
92 | \itemize{
93 | \item \code{"spc"} : \code{"wavenumbers"}, \code{"wavelengths"}, or \code{"x_values"}
94 | \item \code{"spc_nocomp"} : \code{"wavenumbers"}, \code{"wavelengths"}, or \code{"x_values"}
95 | \item \code{"sc_sm"} : \code{"wavenumbers_sc_sm"}, \code{"wavelengths_sc_sm"}, or
96 | \code{"x_values_sc_sm"}
97 | \item \code{"sc_rf"} : \code{"wavenumbers_sc_rf"}, \code{"wavelengths_sc_rf"}, or
98 | \code{"x_values_sc_rf"}
99 | }
100 | \item Check if \code{"metadata"} elements are present and remove data elements for
101 | measurements with missing or incorrectly named metadata elements
102 | (message).
103 | }
104 | }
105 |
106 |
--------------------------------------------------------------------------------
/R/plot_spc.R:
--------------------------------------------------------------------------------
1 | #' @title Plot tibble spectra
2 | #' @description Plot spectra from tibble spectra objects.
3 | #' @param spc_tbl Tibble that contains the first set of spectra to plot as
4 | #' list-column
5 | #' @param spc_tbl_2 Tibble that contains the second set of spectra (optional)
6 | #' to plot as list-column.
7 | #' @param x_unit Character string describing the x axis unit. Default is
8 | #' \code{"wavenumber"}, which will produce a graph with wavenumbers on the
9 | #' x axis with reversed number. If \code{x_unit = "wavelength"}, the axis
10 | #' will be in regular order (lower wavelengths in nm on the left and higher
11 | #' on the right side of the axis).
12 | #' @param y Character string of list-column name in tibble where spectra of
13 | #' desired type are extracted to plot.
14 | #' @param by Character string of column that is used to group the spectra.
15 | #' Default is \code{"unique_id"}. If replica spectra are present in the file
16 | #' and processed spectra resulting after averaging need to be plotted,
17 | #' it is recommend to use \code{"sample_id"} as argument to group according
18 | #' the sample_id column in the tibble(s) containing the spectra (\code{spc_tbl}
19 | #' and \code{spc_tbl_2}).
20 | #' @param graph_id_1 Character string used for grouping the first spectra set
21 | #' (\code{spc_tbl}) and producing
22 | #' the label text accordingly. Default is \code{"Set 1"}.
23 | #' @param graph_id_2 Character string used for grouping the second spectra set
24 | #' (\code{spc_tbl_2}) and producing the label text accordingly. Default is
25 | #' \code{"Set 2"}
26 | #' @param graph_id_1_col Character string for the colour of the first spectra
27 | #' set. Default is \code{"black"}.
28 | #' @param graph_id_2_col Character string for the colour of the first spectra
29 | #' set. Default is \code{"red"}.
30 | #' @param xlab Character string or mathematical expression
31 | #' (use \code{expression}) for the x axis title. Default is
32 | #' \code{expression(paste("Wavenumber [", cm^-1, "]"))}.
33 | #' @param ylab Character string or mathematical expression
34 | #' (use \code{expression}) for the y axis title. Default is \code{"absorbance"}.
35 | #' @param alpha Double in between 0 and 1. Sets the transparency for the plotted
36 | #' spectra lines.
37 | #' @param legend Logical whether to plot a legend for the spectra describing
38 | #' its name selected in arguments \code{graph_id_1} and \code{graph_id_2}.
39 | #' Default is \code{TRUE}.
40 | #' @usage plot_spc(spc_tbl, spc_tbl_2 = NULL,
41 | #' x_unit = "wavenumber",
42 | #' y = "spc", by = "unique_id",
43 | #' graph_id_1 = "Set 1", graph_id_2 = "Set 2",
44 | #' graph_id_1_col = "black", graph_id_2_col = "red",
45 | #' xlab = expression(paste("Wavenumber [", cm^-1, "]")),
46 | #' ylab = "Absorbance",
47 | #' alpha = 0.2, legend = TRUE)
48 | #' @export
49 | plot_spc <- function(spc_tbl, spc_tbl_2 = NULL,
50 | x_unit = "wavenumber",
51 | y = "spc", by = "unique_id",
52 | graph_id_1 = "Set 1", graph_id_2 = "Set 2",
53 | graph_id_1_col = "black", graph_id_2_col = "red",
54 | xlab = expression(paste("Wavenumber [", cm^-1, "]")),
55 | ylab = "Absorbance", alpha = 0.2,
56 | legend = TRUE) {
57 |
58 | # Fix `R CMD check NOTE`: "no visible binding for global variable ‘...‘"
59 | graph_id <- id <- variable <- value <- NULL
60 |
61 | # (1) Gather spectra into one data.table
62 | if (!is.null(spc_tbl_2)) {
63 | if (y == "spc") {
64 | # raw spectra are not yet data.tables and extraction is done alternatively
65 | # via do.call(rbind, list) -> a little bit slower
66 | dt_1 <- data.table::data.table(do.call(rbind, spc_tbl[, y][[y]]))
67 | dt_2 <- data.table::data.table(do.call(rbind, spc_tbl_2[, y][[y]]))
68 | } else {
69 | dt_1 <- data.table::rbindlist(spc_tbl[, y][[y]])
70 | dt_2 <- data.table::rbindlist(spc_tbl_2[, y][[y]])
71 | }
72 | } else {
73 | if (y == "spc") {
74 | # raw spectra are not yet data.tables and extraction is done alternatively
75 | # via do.call(rbind, list) -> a little bit slower
76 | dt_1 <- data.table::data.table(do.call(rbind, spc_tbl[, y][[y]]))
77 | } else {
78 | dt_1 <- data.table::rbindlist(spc_tbl[, y][[y]])
79 | }
80 | }
81 |
82 | # (2) Extract ID variable and append it to the data.table
83 | id_1 <- spc_tbl[, by][[by]]
84 | # Add a graph identity column to distiguish graphical layers for spectra
85 | # tibble comparisons
86 | graph_id_1 <- as.factor(rep(graph_id_1, nrow(spc_tbl)))
87 | dt_1[, graph_id := graph_id_1]
88 | dt_1[, id := id_1]
89 |
90 | # Only if spc_tbl_2 exists
91 | if (!is.null(spc_tbl_2)) {
92 | # (2) Extract ID variable and append it to the data.table
93 | id_2 <- spc_tbl_2[, by][[by]]
94 | # Add a graph identity column to distiguish graphical layers for spectra
95 | # tibble comparisons
96 | graph_id_2 <- as.factor(rep(graph_id_2, nrow(spc_tbl_2)))
97 | dt_2[, graph_id := graph_id_2]
98 | dt_2[, id := id_2]
99 | dt_list <- list(
100 | dt_1 = dt_1,
101 | dt_2 = dt_2
102 | )
103 | dt <- data.table::rbindlist(dt_list)
104 | } else {
105 | dt <- dt_1
106 | }
107 | # (3) Convert data.table from wide to long form
108 | dt_long <- data.table::melt(
109 | dt,
110 | measure = names(dt)[!names(dt) %in% c("id", "graph_id")]
111 | )
112 | # Convert variable column from factor to numeric
113 | dt_long[, variable := as.numeric(as.character(variable))]
114 | # (4) Plot spectra
115 | # Define nice breaks for x axis
116 | brk <- pretty(as.numeric(names(dt)[!names(dt) %in% c("id", "graph_id")]), n = 10)
117 | p <- ggplot2::ggplot(dt_long, ggplot2::aes(variable, value)) +
118 | ggplot2::labs(x = xlab, y = ylab) +
119 | ggplot2::theme_bw() +
120 | ggplot2::scale_x_reverse(breaks = brk) +
121 | # Bring graph_id_2 spectra to front
122 | # http://stackoverflow.com/questions/21120088/ggplot2-bring-one-line-to-the-front-but-save-the-colors
123 | ggplot2::geom_line(ggplot2::aes(colour = graph_id, group = id),
124 | alpha = alpha, size = 0.2
125 | ) +
126 | # scale_color_manual(values = rep("black", nrow(dt)))
127 | ggplot2::scale_color_manual(values = c(graph_id_1_col, graph_id_2_col))
128 |
129 | if ("wavelengths_rs" %in% names(spc_tbl) && x_unit == "wavelength") {
130 | p <- p +
131 | ggplot2::scale_x_continuous(breaks = brk) +
132 | ggplot2::xlab("Wavelength [nm]") +
133 | ggplot2::ylab("Reflectance")
134 | }
135 |
136 | if (legend == FALSE) {
137 | p <- p +
138 | # Remove legend
139 | ggplot2::guides(colour = FALSE)
140 | }
141 | p
142 | }
143 |
--------------------------------------------------------------------------------
/R/remove-outl-spectra.R:
--------------------------------------------------------------------------------
1 | #' @title Remove outlier spectra
2 | #' @description Remove outlier spectra based on the
3 | #' \code{pcout()} function of the \code{mvoutlier} package.
4 | #' @usage remove_outliers(list_spectra, remove = TRUE)
5 | #' @param list_spectra List that contains averaged
6 | #' spectral information
7 | #' in list element \code{MIR_mean} (data.frame) and metadata in
8 | #' \code{data_meta} (data.frame).
9 | #' @param remove logical expression (\code{TRUE} or \code{FALSE})
10 | #' that specifies weather spectra shall be removed.
11 | #' If \code{rm = FALSE}, there will be no outlier removal
12 | #' @return Returns list \code{spectra_out} that contains:
13 | #' \itemize{
14 | #' \item \code{MIR_mean}: Outlier removed MIR spectra as
15 | #' data.frame object. If \code{remove = FALSE},
16 | #' the function will
17 | #' return almost identical list identical to \code{list_spectra},
18 | #' except that the first \code{indices} column of the spectral
19 | #' data frame \code{MIR_mean} is removed
20 | #' (This is done for both options
21 | #' \code{remove = TRUE} and \code{remove = FALSE}).
22 | #' \item \code{data_meta}: metadata data.frame, identical
23 | #' as in the \code{list_spectra} input list.
24 | #' \item \code{plot_out}: (optional) ggplot2 graph
25 | #' that shows all spectra (absorbance on x-axis and wavenumber
26 | #' on y-axis) with outlier marked, if
27 | #' \code{remove = TRUE}.
28 | #' }
29 | #' @details This is an optional function if one wants to remove
30 | #' outliers.
31 | #' @export
32 | remove_outliers <- function(list_spectra, remove = TRUE) {
33 | # Outlier detection
34 | # Use the mvoutlier package and pcout function to identify
35 | # multivariate outliers
36 | wfinal01 <- ID <- NULL
37 | if (remove == TRUE) {
38 | # Remove the 'indices' column
39 | list_spectra$MIR_mean <- list_spectra$MIR_mean[, -1]
40 | out <- mvoutlier::pcout(list_spectra$MIR_mean, makeplot = T,
41 | outbound = 0.05) # parameters should be adapted
42 | # Plot outlying spectra
43 | plot_out <- plotMIR(
44 | list_spectra$MIR_mean[
45 | order(out$wfinal01, decreasing = T), ],
46 | col = as.factor(out$wfinal01[order(out$wfinal01,
47 | decreasing = T)])) +
48 | ggplot2::scale_colour_brewer("outlier", palette = "Set1")
49 | out_id <- as.character(
50 | list_spectra$data_meta$ID[!as.logical(out$wfinal01)]
51 | )
52 | # Remove outliers
53 | MIR_mean <- list_spectra$MIR_mean[
54 | ! list_spectra$data_meta$ID %in% out_id, ]
55 | # rep ID and country name
56 | data_meta <- list_spectra$data_meta[
57 | ! list_spectra$data_meta$ID %in% out_id, ]
58 | spectra_out <- list(MIR_mean = MIR_mean,
59 | data_meta = data_meta,
60 | plot_out = plot_out)
61 | } else {
62 | # Remove the 'indices' column
63 | list_spectra$MIR_mean <- list_spectra$MIR_mean[, -1]
64 | spectra_out <- list(MIR_mean = list_spectra$MIR_mean,
65 | data_meta = list_spectra$data_meta)
66 | }
67 | spectra_out
68 | }
69 |
70 | ## plotMIR function of Antoine Stevens; don't export this
71 | ## function to the NAMESPACE
72 | plotMIR <- function(spc, group = NULL, col = NULL,
73 | linetype = NULL, wr = NULL, brk = NULL,
74 | ylab = "Absorbance", xlab = "Wavenumber /cm-1",
75 | by = NULL, by.wrap = T, ...){
76 | # Function to plot spectra, based on the ggplot2 package
77 | # spc = spectral matrix, with colnames = wavelengths
78 | # group = grouping variable, usually the id's of the sample
79 | # wr = wavelength range to plot
80 | # brk = breaks of the x-axis
81 | # by = factor variable for which the mean and sd of
82 | # each level will be computed and plotted (optional)
83 | # Requires packages ggplot2; data.table; reshape2
84 | # Workaround to pass R CMD check:
85 | # http://stackoverflow.com/questions/9439256/how-can-i-handle-r-cmd-check-no-visible-binding-for-global-variable-notes-when
86 | # Setting the variables to NULL first
87 | variable <- value <- colour <- NULL
88 | spc <- as.data.frame(spc)
89 | if (!is.null(wr))
90 | spc <- spc[, as.numeric(colnames(spc)) >= min(wr) &
91 | as.numeric(colnames(spc)) <= max(wr)]
92 | if (is.null(brk))
93 | brk <- pretty(as.numeric(colnames(spc)), n = 10)
94 | if (!is.null(by)) {
95 | spc$by <- by
96 | spc <- data.table::data.table(spc, check.names = F)
97 | mean.spc <- reshape2::melt(
98 | spc[, lapply(data.table::.SD, mean), by = by],
99 | id.vars = "by"
100 | )
101 | sd.spc <- reshape2::melt(
102 | spc[, lapply(data.table::.SD, sd), by = by],
103 | id.vars = "by"
104 | )
105 | mean.spc$min <- mean.spc$value - sd.spc$value
106 | mean.spc$max <- mean.spc$value + sd.spc$value
107 | mean.spc$variable <- as.numeric(
108 | as.character(mean.spc$variable)
109 | )
110 | if (by.wrap) {
111 | p <- ggplot2::ggplot(data = mean.spc) +
112 | ggplot2::geom_ribbon(
113 | ggplot2::aes(x = variable, ymin = min, ymax = max),
114 | fill = "grey", col = "black", size = 0.15) +
115 | ggplot2::theme_bw()
116 | p <- p + ggplot2::geom_line(
117 | ggplot2::aes(x = variable, y = value),
118 | size = 0.25) +
119 | ggplot2::facet_wrap(~ by) +
120 | ggplot2::labs(x = xlab, y = ylab) +
121 | ggplot2::scale_x_reverse(breaks = brk)
122 | } else {
123 | p <- ggplot2::ggplot(data = mean.spc,
124 | ggplot2::aes(x = variable, y = value, group = by, col = by)) +
125 | ggplot2::geom_line(size = 0.25) +
126 | ggplot2::labs(x = xlab, y = ylab) +
127 | ggplot2::scale_x_reverse(breaks = brk) +
128 | ggplot2::theme_bw()
129 | }
130 | return(p)
131 | } else {
132 | if (is.null(group))
133 | group <- as.character(1:nrow(spc))
134 | spc$group <- group
135 | spc$colour <- col
136 | spc$linetype <- linetype
137 | id.var <- colnames(spc)[
138 | grep("group|colour|linetype",colnames(spc))]
139 | tmp <- reshape2::melt(spc, id.var = id.var)
140 | tmp$variable <- as.numeric(as.character(tmp$variable))
141 | p <- ggplot2::ggplot(tmp,
142 | ggplot2::aes(variable, value, group = group)) +
143 | ggplot2::labs(x = xlab, y = ylab) +
144 | ggplot2::theme_bw() +
145 | ggplot2::scale_x_reverse(breaks = brk)
146 | if (is.null(col) & is.null(linetype))
147 | p <- p + ggplot2::geom_line(
148 | ggplot2::aes(colour = group))
149 | else if (!is.null(col) & is.null(linetype))
150 | p <- p + ggplot2::geom_line(
151 | ggplot2::aes(colour = colour))
152 | else if (is.null(col) & !is.null(linetype))
153 | p <- p + ggplot2::geom_line(
154 | ggplot2::aes(colour = group,
155 | linetype = linetype))
156 | else p <- p + ggplot2::geom_line(
157 | ggplot2::aes(colour = colour,
158 | linetype = linetype))
159 | return(p)
160 | }
161 | }
162 |
163 |
--------------------------------------------------------------------------------
/R/utils-stats.R:
--------------------------------------------------------------------------------
1 | #' @title Assess multiple pairs of measured and predicted values
2 | #' @description Return performance metrics for test set predictions and
3 | #' measured values, e.g. for different model outcome variables.
4 | #' @param data Data frame with all measured (observed) and predicted variables.
5 | #' @param ... Multiple arguments with observed (measured)-predicted pairs,
6 | #' specified with \code{dplyr::vars(o = , p = )}.
7 | #' Column names can strings or symbols. The arguments in `...` need to be named.
8 | #' @param .metrics Character vector with package used for metrics calculation.
9 | #' Default is \code{"simplerspec"}, which uses
10 | #' \code{simplerspec::evaluate_model()}.
11 | #' @param .model_name String with name for the new column that specifies the
12 | #' model or the outcome variable. Default is \code{"model"}.
13 | #'
14 | #' @return Data frame with with summary statistics for measured values and
15 | #' performance metrics for the pairs of measured and predicted values.
16 | #' @importFrom purrr modify_depth imap
17 | #' @export
18 | assess_multimodels <- function(data,
19 | ...,
20 | .metrics = c("simplerspec", "yardstick"),
21 | .model_name = "model") {
22 | args <- rlang::enquos(...)
23 | args_tidy <- map(args, rlang::eval_tidy)
24 | stopifnot(
25 | all(map_int(names(args), nchar)),
26 | all(map_lgl(args_tidy, is.list))
27 | )
28 |
29 | vars_names <- modify_depth(args_tidy, .depth = 1, names)
30 | vars_names_ok <- map_lgl(vars_names, ~ all(c("o", "p") %in% .x))
31 | if (!all(vars_names_ok)) {
32 | stop("Assessment variables supplied in `vars()` need to be named with
33 | 'o' (observed) and 'p' (predicted)")}
34 |
35 | metrics <- match.arg(.metrics)
36 | # pb 2018-05-09: todo: support yardstick metrics
37 | assessment <- switch (metrics,
38 | "simplerspec" = map(
39 | .x = args_tidy,
40 | ~ evaluate_model(data = data,
41 | obs = !!.x[["o"]], pred = !!.x[["p"]]))
42 | )
43 | assessment_models <- imap(assessment,
44 | ~ tibble::add_column(.x, !!.model_name := .y, .before = 1))
45 |
46 | dplyr::bind_rows(assessment_models)
47 | }
48 |
49 |
50 | #' @title Calculate model evaluation metrics
51 | #' @description Calculate summary statistics of observed values and model
52 | #' evaluation statistics for assessing agreement between observed (`obs`) and
53 | #' predicted (`pred`) values.
54 | #' @param data `data.frame` with predicted and observed data in columns.
55 | #' @param obs Column that contains observed values, `symbol`/`name` or
56 | #' `character` (wrapped in "").
57 | #' @param pred Column that contains predicted values, `symbol`/`name` or
58 | #' `character` (wrapped in "").
59 | #' @importFrom e1071 kurtosis
60 | #' @export
61 |
62 | # Note that coefficient of determination (r2) derived from a linear regression
63 | # of observed values on the prediction
64 | # solely describes what proportion of variance in the measured data is
65 | # simulated by the model. A linear regression line between observed (x) and
66 | # predicted (y) does not provide a measure of model error!
67 |
68 | # Mean squared error (MSE) can be decomposed into squared bias/deviation (SE^2)
69 | # and mean squared variation (MSV) (:= SDE^2 := "squared standard deviation of
70 | # the error"; see e.g. Kobayashi and Salam (2000)
71 | # Gauch et al. (2003) propose a more sophisticated additive partitioning of the
72 | # MSE that is more informative about the sources of error and link to the
73 | # regression parameters; namely, these are squared bias (SB),
74 | # non-unity slope (NU) and lack of correlation (LC)
75 |
76 | evaluate_model <- function(data, obs, pred) {
77 | # Implement quasiquotation to first quote the `obs` and `pred` arguments, and
78 | # unquote/evaluate in the context of the `data` data.frame;
79 | # obs` and `pred` can be # both symbols or character;
80 | # `obs = obs` or `obs = "obs"` when the column that contains observed values
81 | # is named "obs"
82 | obs <- rlang::enquo(obs)
83 | pred <- rlang::enquo(pred)
84 | obs <- dplyr::pull(data, !!obs)
85 | pred <- dplyr::pull(data, !!pred)
86 |
87 | tibble::tibble(
88 | ## Compute descriptive statistics of the observations/measurements
89 | n = length(obs),
90 | min = min(obs, ra.rm = TRUE),
91 | max = max(obs, na.rm = TRUE),
92 | mean = mean(obs, na.rm = TRUE),
93 | median = median(obs, na.rm = TRUE),
94 | sdev = sd(obs, na.rm = TRUE),
95 | cv = sd(obs, na.rm = TRUE) / mean(obs, na.rm = TRUE),
96 | skewness_b1 = e1071::skewness(obs, na.rm = TRUE, type = 3),
97 | kurtosis = e1071::kurtosis(obs, na.rm = TRUE),
98 |
99 | ## Compute model evaluation measures to address different aspects
100 | ## of how well predictions correspond to observations/measurements
101 | # Root mean squared error
102 | rmse = mean((obs - pred)^2, na.rm = TRUE)^.5,
103 | # Mean squared error; mse^2 = me^2 + msv = me^2 + sde^2
104 | mse = mean((obs - pred)^2, na.rm = TRUE),
105 | # Terms mean error (ME) and bias are equivalent
106 | me = mean(obs - pred, na.rm = TRUE),
107 | bias = mean(obs - pred, na.rm = TRUE),
108 | # Mean squared variation (of the error); difference between the simulation
109 | # and the measurement with respect to the deviation from the means
110 | msv = mean(((mean(obs, na.rm = TRUE) - obs)
111 | - (mean(pred, na.rm = TRUE) - pred))^2),
112 | # Standard deviation of the error := SDE = MSV^0.5
113 | sde = mean(((mean(obs, na.rm = TRUE) - obs)
114 | - (mean(pred, na.rm = TRUE) - pred))^2)^0.5,
115 | # Mean absolute error
116 | mae = mean(abs(obs - pred), na.rm = TRUE),
117 | r2 = cor(obs, pred, use = "pairwise.complete.obs")^2,
118 | b = lm(obs ~ pred)$coefficients[2],
119 | # Ratio of performance to deviation
120 | rpd = sd(obs, na.rm = TRUE) / mean((obs - pred)^2, na.rm = TRUE)^.5,
121 | # Ratio of performance to interquartile range
122 | rpiq = (quantile(obs, .75, na.rm = TRUE) - quantile(obs, .25, na.rm = TRUE))
123 | / mean((obs - pred)^2, na.rm = TRUE)^.5,
124 | # See Gauch et. al., 2003) for MSD decomposition into SB, NU and LC;
125 | # Squared bias
126 | SB = (mean(obs - pred, na.rm = TRUE))^2,
127 | # Non-unity slope
128 | NU = mean((pred - mean(pred))^2) * (1 - lm(obs ~ pred)$coefficients[2])^2,
129 | # Lack of correlation
130 | LC = mean((obs - mean(obs))^2)
131 | * (1 - cor(obs, pred, use = "pairwise.complete.obs")^2),
132 | # Proportional contributions of SB, NU and LC to MSE in percent
133 | SB_prop = round((mean(obs - pred, na.rm = TRUE))^2
134 | / mean((pred - obs)^2) * 100, 0),
135 | NU_prop = round(mean((pred - mean(pred))^2)
136 | * (1 - lm(obs ~ pred)$coefficients[2])^2 / mean((pred - obs)^2) * 100, 0),
137 | LC_prop = round(mean((obs - mean(obs))^2)
138 | * (1 - cor(obs, pred, use = "pairwise.complete.obs")^2)
139 | / mean((pred - obs)^2) * 100, 0)
140 | )
141 | }
142 |
143 | # Wrapper function to ensure compatibility with old summary_df() function
144 | # !!! note that y are predicted values and x are observed values;
145 | # this is only for backward compatibility;
146 | # This deviates from the principles that observed (O) should be denoted in the
147 | # in the y-axis vs. predicted (P) in the x-axis ((OP) regressions), according
148 | # Pineiro et al. (2008)
149 | #' @rdname evaluate_model
150 | #' @param df `data.frame` with predicted and observed data in columns.
151 | #' @param x Column that contains observed values, `symbol`/`name` or
152 | #' `character` (wrapped in "").
153 | #' @param y Column that contains predicted values, `symbol`/`name` or
154 | #' `character` (wrapped in "").
155 | #' @export
156 | summary_df <- function(df, x, y) {
157 | evaluate_model(data = df, obs = x, pred = y)
158 | }
159 |
160 | # Function to calculate standard error of the mean
161 | sem_ci <- function(x) {
162 | qt(0.975, df = length(na.omit(x)) - 1) *
163 | sqrt(var(x, na.rm = TRUE) / length(na.omit(x)))
164 | }
165 |
166 | # Calculate standard error
167 | se <- function(x) {
168 | sqrt(var(x, na.rm = TRUE) / length(na.omit(x)))
169 | }
170 |
--------------------------------------------------------------------------------
/R/resample-spc.R:
--------------------------------------------------------------------------------
1 | #' @title Resample spectra in list-column to new x-axis interval
2 | #' @description Resamples (interpolates) different spectra types with
3 | #' corresponding x-axis values that are both stored in list-columns of a spectra
4 | #' tibble. A spectra tibble hosts spectra, x-axis vectors, metadata, and
5 | #' further linked data with standardized naming conventions. Data input for
6 | #' resampling can for example be generated with `simplerspec::gather_spc()`.
7 | #' Resampling is a key harmonizing step to process and later model spectra
8 | #' measured at different resolutions and spectral ranges (i.e., different
9 | #' spectrometer devices and/or measurement settings).
10 | #' @param spc_tbl Spectra data embedded in a tibble object (classes
11 | #' `"tbl_df", "tbl", "data.frame"`). The spectra tibble needs to contain at
12 | #' least of one of the the spectra columns `spc`, `spc_rs`, `spc_mean`,
13 | #' `spc_nocomp`, `sc_sm`, `sc_rf`, or `spc_pre` (list-columns with spectral
14 | #' `data.table`s), and `wavenumbers` or `wavelengths` (list-column with vectors
15 | #' of x-axis values corresponding to each spectrum). The help section *"Matching
16 | #' spectrum type and corresponding x-axis type"* describes the spectra types
17 | #' and corresponding x-axis types.
18 | #' @param column_in Character vector of length 1L or symbol/name
19 | #' specifying the name of list-column that contains the spectra to be resampled.
20 | #' @param x_unit Character vector of length 1L specifying the measurement unit
21 | #' of the x-axis values (list-column) of the input spectra in `spc_tbl`.
22 | #' Possible values are `"wavenumber"` (default) or `"wavelength"`. Wavenumber
23 | #' is a convenient unit of frequency in the mid-infrared spectral range,
24 | #' where wavelength is often used as spatial period for the visible and
25 | #' near-infrared range.
26 | #' @param wn_lower Numeric value of lowest wavenumber. This argument will only
27 | #' be used if `x_unit = "wavenumber"`. The value serves as starting value for
28 | #' the new wavenumber sequence that the spectra will be resampled upon. Default
29 | #' value is 500 (i.e., in reciprocal centimeters).
30 | #' @param wn_upper Numeric value of highest wavenumber. This argument will only
31 | #' be used if `x_unit = "wavenumber`. The value will be used as last value of
32 | #' the new wavenumber sequence that the spectra will be resampled upon. Default
33 | #' value is 4000 (i.e., in reciprocal centimeters).
34 | #' @param wn_interval Numeric value of the wavenumber increment for the new
35 | #' wavenumber sequence that the spectra will be resampled upon. Default value
36 | #' is 2 (i.e., in reciprocal centimeters).
37 | #' @param wl_lower Numeric value of lowest wavelength. This argument will only
38 | #' be used if `x_unit = "wavelength"`. The value serves as starting value of
39 | #' the new wavenumber sequence that the spectra will be resampled upon.
40 | #' Default value is 350 (i.e. in nanometers).
41 | #' @param wl_upper Numeric value of highest wavelength. This argument will only
42 | #' be used if `x_unit = "wavelength"`. The value will be used as last value of
43 | #' the new wavenumber sequence that the spectra will be resampled upon. Default
44 | #' value is 2500 (i.e., in nanometers).
45 | #' @param wl_interval Numeric value of the wavelength increment for the new
46 | #' wavenumber sequence that the spectra will be resampled upon. This argument
47 | #' will only be used if `x_unit = "wavelength"`. Default value is 1 (i.e., in
48 | #' nanometers).
49 | #' @param interpol_method Character of `"linear"` (default) or `"spline"` with
50 | #' the interpolation method. `"spline"` uses a cubic spline to interpolate the
51 | #' input spectra at given x-axis values to new equispaced x-axis intervals.
52 | #' @return A spectra tibble (`spc_tbl`) containing two added list-columns:
53 | #' * `spc_rs:` Resampled spectra as list of `data.table`s
54 | #' * `wavenumbers_rs` or `wavelengths_rs`: Resampled x-axis values as list of
55 | #' numeric vectors
56 | #' @section Matching spectrum type and corresponding x-axis type:
57 | #' The combinations of input spectrum types (`column_in`) and
58 | #' corresponding x-axis types are generated from a simple lookup list. The
59 | #' following key-value(s) pairs can be matched at given key, which is the column
60 | #' name from `column_in` containing the spectra.
61 | #' * `"spc"` : `"wavenumbers"` or `"wavelengths"` (raw spectra)
62 | #' * `"spc_rs"` : `"wavenumbers_rs"` or `"wavelengths_rs"`) (resampled spectra)
63 | #' * `"spc_mean"` : `"wavenumbers_rs"` or `"wavelengths_rs"` (mean spectra)
64 | #' * `"spc_nocomp"` `"wavenumbers"` or `"wavelengths"` (spectra prior
65 | #' atmospheric compensation)
66 | #' * `"sc_sm" : c("wavenumbers_sc_sm", "wavelengths_sc_sm")` (single channel
67 | #' sample spectra)
68 | #' * `"sc_rf" : c("wavenumbers_sc_rf", "wavelengths_sc_rf")` (single channel
69 | #' reference spectra)
70 | #' * `"spc_pre" : "xvalues_pre"` (preprocessed spectra)
71 | #' @export
72 | resample_spc <- function(spc_tbl,
73 | column_in = "spc",
74 | x_unit = c("wavenumber", "wavelength"),
75 | wn_lower = 500, wn_upper = 4000, wn_interval = 2,
76 | wl_lower = 350, wl_upper = 2500, wl_interval = 1,
77 | interpol_method = c("linear", "spline")) {
78 | # Capture user input as expressions (can be both of type character or symbol),
79 | # also called quoting; convert quosures to characters for later arg matching
80 | column_in <- rlang::enquo(column_in)
81 | column_in_chr <- rlang::quo_name(column_in)
82 |
83 | stopifnot(
84 | is.character(x_unit) && length(x_unit) > 0,
85 | is.numeric(wn_lower), is.numeric(wn_upper), is.numeric(wn_interval),
86 | is.numeric(wl_lower), is.numeric(wl_upper), is.numeric(wl_interval)
87 | )
88 |
89 | # Lookup list to match spectrum types and corresponding x-axis types
90 | spc_xaxis_types <- list(
91 | "spc" = c("wavenumbers", "wavelengths"), # raw/unprocessed
92 | "spc_rs" = c("wavenumbers_rs", "wavelengths_rs"), # resampled
93 | "spc_mean" = c("wavenumbers_rs", "wavelengths_rs"), # mean
94 | "spc_nocomp" = c("wavenumbers", "wavelengths"), # no atm. compensation
95 | "sc_sm" = c("wavenumbers_sc_sm", "wavelengths_sc_sm"), # single channel sample
96 | "sc_rf" = c("wavenumbers_sc_rf", "wavelengths_sc_rf"), # single channel reference
97 | "spc_pre" = rep("xvalues_pre", 2) # preprocessed
98 | )
99 | spctypes <- names(spc_xaxis_types)
100 | column_spc <- match.arg(column_in_chr, spctypes)
101 |
102 | x_unit <- match.arg(x_unit)
103 | switch(x_unit,
104 | wavenumber = {x_unit_int <- 1L},
105 | wavelength = {x_unit_int <- 2L})
106 |
107 | interpol_method <- match.arg(interpol_method)
108 |
109 | # Final selection of `x_unit` column name string from user input and lookup
110 | x_unit_sel <- spc_xaxis_types[[column_spc]][x_unit_int]
111 |
112 | # Both columns with X-values and input spectra need to be present in `spc_tbl`
113 | colnm <- colnames(spc_tbl)
114 | stopifnot(x_unit_sel %in% colnm, column_spc %in% colnm)
115 |
116 | # Extract list-column containing spectra
117 | spc_in_list <- dplyr::pull(spc_tbl, !!column_in)
118 |
119 | # Extract list-column containing x-axis values
120 | xvalues_in_list <- dplyr::pull(spc_tbl, !!x_unit_sel)
121 |
122 | # Automatically check the arrangement of the input x-Unit values;
123 | # often, it is convenient to have have a descending ordner of spectral columns
124 | # if the physical quantity of the x-axis is wavenumbers
125 | xvalue_order_chr <- purrr::map_chr(xvalues_in_list, seq_order)
126 |
127 | if (length(unique(xvalue_order_chr)) > 1L) {
128 | stop(
129 | glue::glue(
130 | "The column `{x_unit_sel}` which contains the list of X-values
131 | has both elements of ascending and descending order.
132 | * To resolve, you can split `spc_tbl` in a list of `spc_tbl`s
133 | with identical X-value vectors based on `group_by_col_hash()`,
134 | and apply `resample_spc()` separately to each list element.
135 | * Alternatively, you could fix the order of x-axis values
136 | for all input spectra and X-value vectors to all ascending or
137 | descending"),
138 | call. = FALSE)
139 | }
140 | xvalue_order <- xvalue_order_chr[1L]
141 |
142 | # Generate sequence of new x-axis values
143 | switch(x_unit_int,
144 | `1L` = {
145 | xvalues_out <- seq(from = wn_lower, to = wn_upper, by = wn_interval)
146 | x_unit_type_rs <- "wavenumbers_rs"
147 | },
148 | `2L` = {
149 | xvalues_out <- seq(from = wl_lower, to = wl_upper, by = wl_interval)
150 | x_unit_type_rs <- "wavelengths_rs"
151 | })
152 |
153 | if (xvalue_order == "descending") xvalues_out <- rev(xvalues_out)
154 |
155 | # Repeat sequence of new (resampled) x-axis values in list (for every obs.)
156 | xvalues_out_list <- rep(list(xvalues_out), nrow(spc_tbl))
157 | names(xvalues_out_list) <- names(spc_in_list)
158 |
159 | # Resample all spectra extracted from list-column `column_in` using prospectr
160 | spc_rs <- lapply(
161 | seq_along(spc_in_list),
162 | function(i) {
163 | data.table::data.table(
164 | prospectr::resample(
165 | X = spc_in_list[[i]], # spectral data.table to resample
166 | wav = xvalues_in_list[[i]], # old x-values vector
167 | new.wav = xvalues_out_list[[i]], # new x-values vector
168 | interpol = interpol_method
169 | )
170 | )
171 | }
172 | )
173 | names(spc_rs) <- names(spc_in_list)
174 |
175 | spc_tbl_out <-
176 | spc_tbl %>%
177 | tibble::add_column(
178 | spc_rs = spc_rs,
179 | !!x_unit_type_rs := xvalues_out_list
180 | )
181 | return(spc_tbl_out)
182 | }
183 |
184 | # Helper
185 | seq_order <- function(x) ifelse(x[1L] < x[length(x)], "ascending", "descending")
186 |
--------------------------------------------------------------------------------
/R/read-asd.R:
--------------------------------------------------------------------------------
1 | #' @title Read ASD fieldspec spectrometer data export into into simplerspec
2 | #' spectra tibble.
3 | #' @description Read tab delimited text (.txt) files exported from ASD field
4 | #' spectrometer into simplerspec spectra tibble.
5 | #' ASD Fieldspec data files are expected in .txt tab delimited file format.
6 | #' The first row should contain
7 | #' the name 'Wavelength' for the first column and the file names for the
8 | #' remaining columns.
9 | #' @param file Tab delmited file from ASD software export where the first
10 | #' column called \code{Wavelength} contais wavelengths in nanometer and the
11 | #' remaining columns are sample spectra referred by an ID name provided in the
12 | #' first row of these columns.
13 | #' @return Spectra data in tibble data frame (class `tbl_df`) that contains
14 | #' columns \code{sample_id} (derived from 2nd and following column names of
15 | #' tab delimited ASD exported text file),
16 | #' \code{spc} (list-column of spectral matrices)
17 | #' and \code{wavelengths} (list-column containing wavelength vectors).
18 | #' @importFrom tidyselect one_of
19 | #' @importFrom data.table fread
20 | #' @export
21 | read_asd <- function(file) {
22 |
23 | # Read fixed with file into a tibble
24 | asd_tbl <- tibble::as_tibble(data.table::fread(file = file))
25 | # Transpose tibble and add Wavelengths as column names
26 | asd_tbl_t <- tibble::as_tibble(
27 | t(dplyr::select(asd_tbl, - tidyselect::one_of("Wavelength")))
28 | )
29 | colnames(asd_tbl_t) <- asd_tbl[["Wavelength"]]
30 |
31 | # Split matrix by each row into list of matrices
32 | asd_m <- as.matrix(asd_tbl_t)
33 | asd_listofv <- split(asd_m, row(asd_m)) # List of numerical vectors
34 | # Convert list of vectors into list of matrices
35 | asd_listofm <- lapply(seq_along(asd_listofv),
36 | function(i) matrix(asd_listofv[[i]], nrow = 1, byrow = FALSE))
37 | # Assign file names as names for list of matrices
38 | names(asd_listofm) <- colnames(asd_tbl)[-1] # Remove "Wavelength"
39 |
40 | # Assign columnes for all matrices in list
41 | asd_listofm <- lapply(asd_listofm,
42 | function(x) {colnames(x) <- asd_tbl[["Wavelength"]]; x})
43 |
44 | # Create list of wavelengths and assign sample names
45 | wavelengths_list <- rep(list(asd_tbl[["Wavelength"]]), length(asd_listofm))
46 | names(wavelengths_list) <- names(asd_listofm)
47 |
48 | # Return spectra as tibble
49 | tibble::tibble(
50 | sample_id = names(asd_listofm),
51 | spc = asd_listofm,
52 | wavelengths = wavelengths_list
53 | )
54 |
55 | }
56 |
57 | ## Simplespec spectra tibble version of ASD reader based on prospectr::readASD
58 | ## Reads binary ASD data and converts data into list-columns containing spectral
59 | ## data that can be further processed within the simplerspec spectra processing
60 | ## framework ===================================================================
61 |
62 | #' @title Read ASD binary files and gather spectra and metadata in tibble data
63 | #' frame.
64 | #' @description Read multiple ASD binary files and gather spectra and metadata
65 | #' into a simplerspec spectral tibble (data frame). The resulting spectral
66 | #' tibble is compatible with the simplerspec spectra processing and modeling
67 | #' framework.
68 | #' @param fnames Character vector containing full paths of ASD binary files
69 | #' to be read
70 | #' @return A spectral tibble (data frame) containing the follwing columns:
71 | #' \item{unique_id}{Character vector. Unique identifier containing file name
72 | #' pasted with date and time.}
73 | #' \item{file_id}{Character vector containing file names and exension}
74 | #' \item{sample_id}{Character vector containing files names without extension}
75 | #' \item{metadata}{List-column. List of data frames containing spectral
76 | #' metadata}
77 | #' \item{wavelengths}{List-column. List of wavelengths vectors (numeric).}
78 | #' \item{spc_radiance}{List-column. List of data.tables containing
79 | #' radiance sample spectra.}
80 | #' \item{spc_reference}{List-column. List of data.tables containing
81 | #' reference reflectance spectra.}
82 | #' \item{spc}{List-column. List of data.tables containing final reflectance
83 | #' spectra.}
84 | #' @export
85 | read_asd_bin <- function(fnames) {
86 |
87 | data <- prospectr::readASD(fnames = fnames,
88 | in_format = "binary", out_format = "list")
89 | gps <- purrr::map(data, c("header", "GPS"))
90 | header <- purrr::map(purrr::map(data, "header"),
91 | function(x) x[- which(names(x) == "GPS")])
92 | file_id <- purrr::map_chr(data, "name")
93 | sample_id <- sub("(.+)\\.[[:alpha:]]+$", "\\1", file_id) # remove ".asd"
94 | datetime <- purrr::map(data, "datetime")
95 | unique_id <- mapply(function(x, y) paste0(x, "_", y), sample_id, datetime)
96 | metadata <- purrr::map(header, tibble::as_tibble)
97 | # Add GPS to metadata
98 | metadata <- purrr::map2(metadata, gps, dplyr::bind_cols)
99 | spc_l <- purrr::transpose(
100 | purrr::map(data, `[`, c("radiance", "reference", "reflectance")))
101 | wl_l <- purrr::transpose(purrr::map(data, `[`, "wavelength"))
102 | spc_dt <- purrr::modify_depth(spc_l, 2,
103 | function(x) data.table::data.table(t(x)))
104 | # Change column names of spectral data tables of all spectrum types
105 | # by reference, use character converted wavenlengths
106 | purrr::map(.x = spc_dt, ~ map2(.x = .x, .y = wl_l[["wavelength"]],
107 | ~ data.table::setnames(.x, names(.x), as.character(.y)))
108 | )
109 |
110 | tibble::tibble(
111 | unique_id = unique_id,
112 | file_id = file_id,
113 | sample_id = sample_id,
114 | metadata = metadata,
115 | wavelengths = wl_l[["wavelength"]],
116 | spc_radiance = spc_dt[["radiance"]],
117 | spc_reference = spc_dt[["reference"]],
118 | spc = spc_dt[["reflectance"]]
119 | )
120 | }
121 |
122 | # Helper function to remove the ".asd.xxx" (.xxx for example ".ref" or "")
123 | # extension in id column (e.g. sample_id) strings in tibble with metadata or
124 | # reference analysis data ------------------------------------------------------
125 |
126 | #' @importFrom stringr str_replace
127 | #' @importFrom dplyr pull
128 | remove_id_extension <- function(data,
129 | id_col = "sample_id",
130 | id_new_nm = "sample_id",
131 | extension = "\\.asd.*$") {
132 | id_col <- rlang::enquo(id_col)
133 | id_col_chr <- rlang::quo_name(id_col)
134 | id_col_rm <- rlang::expr(-!!rlang::sym(id_col_chr))
135 | id_new_nm <- rlang::quo_name(rlang::enquo(id_new_nm))
136 |
137 | id_new <- gsub(pattern = extension, replacement = "",
138 | x = dplyr::pull(data, !!id_col))
139 |
140 | # Remove old id column and bind new id column to the remaining columns
141 | rest <- dplyr::select(data, !!id_col_rm)
142 | dplyr::bind_cols(!!id_new_nm := id_new, rest)
143 | }
144 |
145 |
146 | # Helper to orrect the sensor offset for ASD spectra;
147 | # shift between VIS and VNIR1, and VNIR1 and VNIR2 ranges;
148 | # based on subtracting gaps at `Join1Wavelength` and `Join2Wavelength` column
149 | # positions in `metadata` list-column data frames ------------------------------
150 |
151 | correct_join_offset <- function(spc_tbl,
152 | lcol_spc = spc,
153 | lcol_xvalues = wavelengths,
154 | lcol_metadata = metadata) {
155 | swir2_offset <- swir1_offset <- wavelengths <- NULL
156 | lcol_spc <- rlang::enquo(lcol_spc)
157 | lcol_spc_chr <- rlang::quo_name(lcol_spc)
158 | lcol_spc_rm <- rlang::expr(-!!rlang::sym(lcol_spc_chr))
159 | lcol_xvalues <- rlang::enquo(lcol_xvalues)
160 | lcol_xvalues_chr <- rlang::quo_name(lcol_xvalues)
161 | lcol_metadata <- rlang::enquo(lcol_metadata)
162 |
163 | spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!lcol_spc))
164 | xvalues <- dplyr::pull(spc_tbl, !!lcol_xvalues)
165 | if (!all(sapply(xvalues, identical, xvalues[[1]]))) {
166 | stop(paste0("Error: Spectral tibble (`spc_tbl`) contains observations",
167 | " with unequal x unit values (`lcol_xvalues`)."))
168 | }
169 | metadata <- dplyr::pull(spc_tbl, !!lcol_metadata)
170 |
171 | join1_wavelength <- map(metadata, c("Join1Wavelength"))
172 | join2_wavelength <- map(metadata, c("Join2Wavelength"))
173 |
174 | join1_idx <- unique(
175 | purrr::map2_int(.x = xvalues, .y = join1_wavelength,
176 | ~ which.min(abs(.x - .y)))
177 | )
178 | join2_idx <- unique(
179 | purrr::map2_int(.x = xvalues, .y = join2_wavelength,
180 | ~ which.min(abs(.x - .y)))
181 | )
182 | xvalues_max_idx <- unique(map_int(xvalues, which.max))
183 |
184 | join1_col1 <- names(spc)[join1_idx]
185 | join1_col2 <- names(spc)[join1_idx + 1]
186 | join2_col1 <- names(spc)[join2_idx]
187 | join2_col2 <- names(spc)[join2_idx + 1]
188 | swir1_cols <- names(spc)[(join1_idx + 1):join2_idx]
189 | swir2_cols <- names(spc)[(join2_idx + 1):xvalues_max_idx]
190 |
191 | # Calculate the swir1 and swir2 offsets to shift spectral ranges
192 | # https://stackoverflow.com/questions/19276194/data-table-assignment-expressions-with-dynamic-inputs-existing-columns-an
193 | spc[, `:=` (
194 | swir1_offset = .SD[[join1_col2]] - .SD[[join1_col1]],
195 | swir2_offset = .SD[[join2_col2]] - .SD[[join2_col1]]),
196 | .SDcols = c(join1_col2, join1_col1, join2_col2, join2_col1)]
197 |
198 | # Substract offset(s) for SWIR1 and SWIR2, remove offset columns
199 | spc[, c(swir1_cols) := lapply(.SD,
200 | function(x) x - swir1_offset), .SDcols = swir1_cols]
201 | spc[, c(swir2_cols) := lapply(.SD,
202 | function(x) x - swir1_offset - swir2_offset), .SDcols = swir2_cols]
203 | spc[, `:=` (swir1_offset = NULL, swir2_offset = NULL)]
204 |
205 | # Remove old spectra list-column (`lcol_spc`) and
206 | # add new sensor join offset corrected spectra as list-column
207 | rest <- dplyr::select(spc_tbl, !!lcol_spc_rm)
208 | tibble::add_column(rest,
209 | # Convert `spc` single data.table back to list of data.tables
210 | # much faster than: # data.table:::split.data.table(spc, seq(nrow(spc))
211 | !!lcol_spc_chr := map(purrr::transpose(spc), data.table::as.data.table),
212 | .after = eval(substitute(lcol_xvalues_chr)))
213 | }
214 |
215 |
--------------------------------------------------------------------------------
/R/preprocess-spc.R:
--------------------------------------------------------------------------------
1 | #' @title Preprocess spectra
2 | #' @description Preprocesses spectra in tibble column by sample_id after
3 | #' averaging spectra by \code{simplerspec::average_spc()}.
4 | #' @param spc_tbl Tibble that contains spectra to be preprocessed within
5 | #' a list-column.
6 | #' @param select Character vector of predefined preprocessing options to be
7 | #' applied to the spectra list-column specified in \code{column_in}.
8 | #' Common prefined values are stated as abbreviated preprocessing methods and
9 | #' options such as \code{"sg_1_w21"}, where \code{"sg"} stands for
10 | #' Savitzky-Golay and \code{1} for first derivative and \code{"w21"}
11 | #' for a window size of 21 points.
12 | #' @param column_in Character vector of single list-column in \code{spc_tbl} that
13 | #' contain list of spectra (1 row matrix) to be processed by function supplied
14 | #' in \code{select}.
15 | #' @param custom_function A character string of a custom processing function
16 | #' that is later parsed (produces expression in a list) and evaluated within
17 | #' the function \code{preprocess_spc}.
18 | #' The character vector argument of \code{custom_function}
19 | #' needs to contain \code{"spc_raw"}, which is the single data table of spectra
20 | #' that results from binding a list of data.tables (spectra to preprocess)
21 | #' from the spectra list-column specified in \code{column_in}.
22 | #' An example for a value is
23 | #' \code{"prospectr::savitzkyGolay(X = spc_raw, m = 0, p = 3, w = 9)"}.
24 | #' Optional argument. Default is \code{NULL}.
25 | #' @export
26 | preprocess_spc <- function(spc_tbl, select, column_in = "spc_mean",
27 | custom_function = NULL) {
28 |
29 | # Convert list of spectral data.tables to one data.table
30 | spc_raw <- data.table::rbindlist(spc_tbl[column_in][[column_in]])
31 |
32 | ## Perform preprocessing =====================================================
33 |
34 | # Use custom function when supplying option ----------------------------------
35 | if (!is.null(custom_function) & select == "custom") {
36 | # Create full character string for parsing
37 | custom_fct <- paste0("custom <- ", custom_function)
38 | # parse converts the character string into an expression
39 | # eval evaluates the expression; as a result, custom object is computed
40 | # and saved within the current workspace
41 | eval(parse(text = custom_fct))
42 | ## x <- spc_raw
43 | ## custom <- eval(substitute(custom_function), envir = parent.frame())
44 | # -> Error in is.data.frame(X) : object 'x' not found
45 | }
46 | # -> returns error:
47 | # custom_function = prospectr::savitzkyGolay(X = x, m = 0, p = 3, w = 9)
48 | # Error in is.data.frame(X) : object 'x' not found
49 | # -> Maybe solution: http://stackoverflow.com/questions/30563745/non-standard-evaluation-from-another-function-in-r
50 |
51 | # Savitzky-Golay preprocessing
52 | # use different derivatives and window sizes ---------------------------------
53 |
54 | # Zero order Savitzky-Golay (no derivative) -> only smoothing
55 | if (select == "sg_0_w9") {
56 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
57 | m = 0, p = 3, w = 9)}
58 | # First derivative Savitzky-Golay
59 | if (select == "sg_1_w5") {
60 | sg_1_w5 <- prospectr::savitzkyGolay(X = spc_raw,
61 | m = 1, p = 3, w = 5)}
62 | if (select == "sg_1_w9") {
63 | sg_1_w9 <- prospectr::savitzkyGolay(X = spc_raw,
64 | m = 1, p = 3, w = 9)}
65 | if (select == "sg_1_w11") {
66 | sg_1_w11 <- prospectr::savitzkyGolay(X = spc_raw,
67 | m = 1, p = 3, w = 11)}
68 | if (select == "sg_1_w13") {
69 | sg_1_w13 <- prospectr::savitzkyGolay(X = spc_raw,
70 | m = 1, p = 3, w = 13)}
71 | if (select == "sg_1_p2_w13") {
72 | sg_1_p2_w13 <- prospectr::savitzkyGolay(X = spc_raw,
73 | m = 1, p = 2, w = 13)}
74 | if (select == "sg_1_w15") {
75 | sg_1_w15 <- prospectr::savitzkyGolay(X = spc_raw,
76 | m = 1, p = 3, w = 15)}
77 | if (select == "sg_1_w17") {
78 | sg_1_w17 <- prospectr::savitzkyGolay(X = spc_raw,
79 | m = 1, p = 3, w = 17)}
80 | if (select == "sg_1_w19") {
81 | sg_1_w19 <- prospectr::savitzkyGolay(X = spc_raw,
82 | m = 1, p = 3, w = 19)}
83 | # Implement window size of 21, corresponds to ICRAF standard;
84 | # see e.g. Terhoeven-Urselmans et al. (2010)
85 | if (select == "sg_1_w21") {
86 | sg_1_w21 <- prospectr::savitzkyGolay(X = spc_raw,
87 | m = 1, p = 3, w = 21)}
88 | if (select == "sg_1_w23") {
89 | sg_1_w23 <- prospectr::savitzkyGolay(X = spc_raw,
90 | m = 1, p = 3, w = 23)}
91 | if (select == "sg_1_w25") {
92 | sg_1_w25 <- prospectr::savitzkyGolay(X = spc_raw,
93 | m = 1, p = 3, w = 25)}
94 | if (select == "sg_1_w27") {
95 | sg_1_w27 <- prospectr::savitzkyGolay(X = spc_raw,
96 | m = 1, p = 3, w = 27)}
97 | if (select == "sg_1_w35") {
98 | sg_1_w35 <- prospectr::savitzkyGolay(X = spc_raw,
99 | m = 1, p = 3, w = 35)}
100 | if (select == "sg_1_w41") {
101 | sg_1_w41 <- prospectr::savitzkyGolay(X = spc_raw,
102 | m = 1, p = 3, w = 41)}
103 | if (select == "sg_1_w51") {
104 | sg_1_w51 <- prospectr::savitzkyGolay(X = spc_raw,
105 | m = 1, p = 3, w = 51)}
106 | # Second derivative Savitzky-Golay
107 | if (select == "sg_2_w11") {
108 | sg_2_w11 <- prospectr::savitzkyGolay(X = spc_raw,
109 | m = 2, p = 3, w = 11)}
110 | if (select == "sg_2_w21") {
111 | sg_2_w21 <- prospectr::savitzkyGolay(X = spc_raw,
112 | m = 2, p = 3, w = 21)}
113 | # Savitzky-Golay (order 0) smoothing and derivative with a window size of
114 | # 21 points
115 | if (select == "sg_0_1_w21") {
116 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
117 | m = 0, p = 3, w = 9)
118 | sg_0_1_w21 <- prospectr::savitzkyGolay(X = sg_0_w9,
119 | m = 1, p = 3, w = 21)}
120 | # Savitzky-Golay second derivative
121 | if (select == "sg_2_w5") {
122 | sg_2_w5 <- prospectr::savitzkyGolay(X = spc_raw,
123 | m = 2, p = 3, w = 5)}
124 | if (select == "sg_2_w11") {
125 | sg_2_w11 <- prospectr::savitzkyGolay(X = spc_raw,
126 | m = 2, p = 3, w = 11)}
127 |
128 | # Standard normal variate (SNV) ----------------------------------------------
129 |
130 | # Calculate standard normal variate (SNV) after Savitzky-Golay smoothing
131 | if (select == "sg_0_snv") {
132 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
133 | m = 0, p = 3, w = 9)
134 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)}
135 | if (select == "sg_1_snv") {
136 | sg_1_snv <- prospectr::standardNormalVariate(sg_1_w5)}
137 | if (select == "sg_1_p2_w13_snv") {
138 | sg_1_p2_w13 <- prospectr::savitzkyGolay(X = spc_raw,
139 | m = 1, p = 2, w = 13)
140 | sg_1_p2_w13_snv <- prospectr::standardNormalVariate(sg_1_p2_w13)}
141 | # Standard normal variate (SNV) and first gap-segment derivative
142 | if (select == "snv_gsd_m1_w11_s1") {
143 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
144 | m = 0, p = 3, w = 9)
145 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
146 | snv_gsd_m1_w11_s1 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 11, s = 1)}
147 | if (select == "snv_gsd_m1_w21_s5") {
148 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
149 | m = 0, p = 3, w = 9)
150 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
151 | snv_gsd_m1_w21_s5 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 21, s = 5)}
152 | if (select == "snv_gsd_m1_w31_s1") {
153 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
154 | m = 0, p = 3, w = 9)
155 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
156 | snv_gsd_m1_w31_s1 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 31, s = 5)}
157 | if (select == "snv_gsd_m1_w31_s5") {
158 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
159 | m = 0, p = 3, w = 9)
160 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
161 | snv_gsd_m1_w31_s5 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 31, s = 5)}
162 | # Standard normal variate (SNV) and second gap-segment derivative
163 | if (select == "snv_gsd_m2_w5_s1") {
164 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
165 | m = 0, p = 3, w = 9)
166 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
167 | snv_gsd_m2_w5_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 5, s = 1)}
168 | if (select == "snv_gsd_m2_w21_s1") {
169 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
170 | m = 0, p = 3, w = 9)
171 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
172 | snv_gsd_m2_w21_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 21, s = 1)}
173 | if (select == "snv_gsd_m2_w31_s1") {
174 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
175 | m = 0, p = 3, w = 9)
176 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
177 | snv_gsd_m2_w31_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 31, s = 5)}
178 | if (select == "snv_gsd_m2_w31_s5") {
179 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
180 | m = 0, p = 3, w = 9)
181 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
182 | snv_gsd_m2_w31_s5 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 31, s = 1)}
183 | if (select == "snv_gsd_m2_w51_s1") {
184 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
185 | m = 0, p = 3, w = 9)
186 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
187 | snv_gsd_m2_w51_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 51, s = 1)}
188 | if (select == "snv_gsd_m2_w51_s5") {
189 | sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
190 | m = 0, p = 3, w = 9)
191 | sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
192 | snv_gsd_m2_w51_s5 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 51, s = 5)}
193 | # 1rst Gap-segement derivative
194 | if (select == "gsd_m1_w5_s4") {
195 | gsd_m1_w5_s4 <- prospectr::gapDer(X = spc_raw, m = 1, w = 5, s = 4)}
196 | if (select == "gsd_m1_w11_s5") {
197 | gsd_m1_w11_s5 <- prospectr::gapDer(X = spc_raw, m = 1, w = 11, s = 5)}
198 | if (select == "gsd_m1_w11_s21") {
199 | gsd_m1_w11_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 11, s = 21)}
200 | if (select == "gsd_m1_w21_s1") {
201 | gsd_m1_w21_s1 <- prospectr::gapDer(X = spc_raw, m = 1, w = 21, s = 1)}
202 | if (select == "gsd_m1_w21_s21") {
203 | gsd_m1_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 21, s = 21)}
204 | if (select == "gsd_m1_w35_s21") {
205 | gsd_m1_w35_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 35, s = 21)}
206 | if (select == "gsd_m1_w5_s21") {
207 | gsd_m1_w5_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 5, s = 21)}
208 | # 2nd Gap-segment derivative
209 | if (select == "gsd_m2_w21_s21") {
210 | gsd_m2_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 2, w = 21, s = 21)}
211 | # 4th Gap-segment derivative
212 | if (select == "gsd_m4_w21_s21") {
213 | gsd_m4_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 4, w = 21, s = 21)}
214 |
215 | # Savitzky-Golay combined with multiple scatter correction (MSC --------------
216 | # Savitzky-Golay with 3rd order polynomial, a window size of 21
217 | # and first derivative + MSC
218 | if (select == "sg_1_w21_msc") {
219 | sg_1_w21 <- prospectr::savitzkyGolay(X = spc_raw,
220 | m = 1, p = 3, w = 21)
221 | # Use msc function from the pls package; use column means of X as reference
222 | # spectrum
223 | sg_1_w21_msc <- pls::msc(X = sg_1_w21, reference = NULL)
224 | }
225 | # Savitzky-Golay combined with multiple scatter correction (MSC --------------
226 | # Savitzky-Golay with 4th order polynomial, a window size of 21
227 | # and second derivative + MSC
228 | if (select == "sg_2_w21_msc") {
229 | sg_2_w21 <- prospectr::savitzkyGolay(X = spc_raw,
230 | m = 2, p = 4, w = 21)
231 | # Use msc function from the pls package; use column means of X as reference
232 | # spectrum
233 | sg_2_w21_msc <- pls::msc(X = sg_2_w21, reference = NULL)
234 | }
235 |
236 | # Continuum-removal ----------------------------------------------------------
237 | if (select == "cr") {
238 | cr <- prospectr::continuumRemoval(X = spc_raw,
239 | wav = as.numeric(colnames(spc_raw)), type = "A")}
240 | if (select == "cr_refl") {
241 | cr_refl <- prospectr::continuumRemoval(X = spc_raw,
242 | wav = as.numeric(colnames(spc_raw)), type = "R")}
243 |
244 | # Select final preprocessing based on selection argument and
245 | # save matrix in data.table
246 | pre <- select
247 | spc_pre <- data.table::as.data.table(get(pre))
248 |
249 | # Convert preprocessed spectra in data.table to list of data.table spectra
250 | # https://github.com/jennybc/row-oriented-workflows/blob/master/iterate-over-rows.md
251 | spc_pre_list <- map(purrr::transpose(spc_pre), data.table::as.data.table)
252 | # Convert x-values of preprocessed spectra in list of vectors
253 | # prospectr only hands over new xunits in matrix colnames of type character
254 | xvalues_pre_list <- lapply(spc_pre_list,
255 | function(x) as.numeric(colnames(x)))
256 |
257 | # Add list of preprocessed spectra and correspoding wavenumbers to tibble
258 | spc_tbl_out <- tibble::add_column(spc_tbl,
259 | spc_pre = spc_pre_list, xvalues_pre = xvalues_pre_list)
260 | return(spc_tbl_out)
261 | }
262 |
--------------------------------------------------------------------------------
/R/pls-vip.R:
--------------------------------------------------------------------------------
1 | ### VIP.R: Implementation of VIP (variable importance in projection)(*) for the
2 | ### `pls' package.
3 | ### $Id: VIP.R,v 1.2 2007/07/30 09:17:36 bhm Exp $
4 |
5 | ### Copyright: 2006,2007 Bjoern-Helge Mevik
6 | ### This program is free software; you can redistribute it and/or modify
7 | ### it under the terms of the GNU General Public License version 2 as
8 | ### published by the Free Software Foundation.
9 | ###
10 | ### This program is distributed in the hope that it will be useful,
11 | ### but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | ### GNU General Public License for more details.
14 |
15 | ### A copy of the GPL text is available here:
16 | ### http://www.gnu.org/licenses/gpl-2.0.txt
17 |
18 | ### Contact info:
19 | ### Boejrn-Helge Mevik
20 | ### bhx6@mevik.net
21 | ### Roedtvetvien 20
22 | ### N-0955 Oslo
23 | ### Norway
24 |
25 | ### (*) As described in Chong, Il-Gyo & Jun, Chi-Hyuck, 2005, Performance of
26 | ### some variable selection methods when multicollinearity is present,
27 | ### Chemometrics and Intelligent Laboratory Systems 78, 103--112.
28 |
29 | ## VIP returns all VIP values for all variables and all number of components,
30 | ## as a ncomp x nvars matrix.
31 | VIP <- function(object) {
32 | # pb: added to avoid `R CMD check` note
33 | method <- Yloadings <- scores <- loading.weights <- NULL
34 | if (object$method != "oscorespls")
35 | stop("Only implemented for orthogonal scores algorithm. Refit with 'method = \"oscorespls\"'")
36 | if (nrow(object$Yloadings) > 1)
37 | stop("Only implemented for single-response models")
38 |
39 | SS <- c(object$Yloadings)^2 * colSums(object$scores^2)
40 | Wnorm2 <- colSums(object$loading.weights^2)
41 | SSW <- sweep(object$loading.weights^2, 2, SS / Wnorm2, "*")
42 | sqrt(nrow(SSW) * apply(SSW, 1, cumsum) / cumsum(SS))
43 | }
44 |
45 |
46 | ## VIPjh returns the VIP of variable j with h components
47 | VIPjh <- function(object, j, h) {
48 | # pb: added to avoid `R CMD check` note
49 | method <- Yloadings <- scores <- loading.weights <- NULL
50 | if (object$method != "oscorespls")
51 | stop("Only implemented for orthogonal scores algorithm. Refit with 'method = \"oscorespls\"'")
52 | if (nrow(object$Yloadings) > 1)
53 | stop("Only implemented for single-response models")
54 |
55 | b <- c(object$Yloadings)[1:h]
56 | T <- object$scores[,1:h, drop = FALSE]
57 | SS <- b^2 * colSums(T^2)
58 | W <- object$loading.weights[,1:h, drop = FALSE]
59 | Wnorm2 <- colSums(W^2)
60 | sqrt(nrow(W) * sum(SS * W[j,]^2 / Wnorm2) / sum(SS))
61 | }
62 |
63 | #' @title Extract VIPs (variable importance in the projection) for a PLS
64 | #' regression model output returned from model fitting with
65 | #' \code{simplerspec::fit_pls()}
66 | #' @description VIPs are extracted based on the \code{finalModel} sublist
67 | #' in the \code{caret::train} output contained in the \code{model} element
68 | #' of the \code{simplerspec::fit_pls()} model output list. The VIPs for
69 | #' derived number of PLS components in the \code{finalModel} are computed.
70 | #' @param mout Model output list returned from \code{simplerspec::fit_pls()}.
71 | #' @usage extract_pls_vip(mout)
72 | #' @return A tibble data frame with columns \code{wavenumber} and correponding
73 | #' VIP values in the column \code{vip} for the finally chosen PLS regression
74 | #' model at the final number of PLS components.
75 | #' @export
76 | extract_pls_vip <- function(mout) {
77 | # Compute VIP for all wavenumbers and select only VIPs with ncomp in final
78 | # model
79 | final_model <- mout$model$finalModel
80 | vip <- VIP(object = final_model)[final_model$ncomp, ]
81 | # Collect wavenumbers from preprocessed spectra
82 | wn <- as.numeric(colnames(mout$data$calibration$spc_pre[[1]]))
83 | # Create a data frame with wavenumbers and VIP scores
84 | tibble::tibble(wavenumber = wn, vip = vip)
85 | }
86 |
87 | #' @title Create a data frame containing start and end positions (wavenumbers)
88 | #' where variable importance in projection (VIP) is > 1
89 | #' @description Given a data frame with VIP outputs (wavenumber and vip
90 | #' columns), start and end values denoting spectral regions where VIP > 1
91 | #' are returned as data frame. The functions can be used as helper
92 | #' function for plotting VIP.
93 | #' @param df_vip Data frame containing \code{wavenumber} and \code{vip} columns
94 | #' (numeric)
95 | #' @return Data.frame containing vectors \code{start} (numeric; wavenumber),
96 | #' \code{end} (numeric; wavenumber) and group (integer; values are
97 | #' \code{1:length(start))}.
98 | #' @export
99 | create_vip_rects <- function(df_vip) {
100 |
101 | # Avoid `R CMD check NOTE`: `no visible binding for global variable`
102 | VIP <- wavenumber <- tail <- NULL
103 |
104 | # Highlight region data
105 | # https://stackoverflow.com/questions/32543176/highlight-areas-within-certain-x-range-in-ggplot2
106 | v <- rep(0, nrow(df_vip))
107 | v[df_vip$vip > 1] <- 1
108 | # Get the start and end points for highlighted regions
109 | inds <- diff(c(0, v))
110 | start <- df_vip$wavenumber[inds == 1]
111 | end <- df_vip$wavenumber[inds == -1]
112 | if (length(start) > length(end)) {
113 | end <- c(end, tail(df_vip$wavenumber, 1))
114 | }
115 | # Create data frame for rectangle layer (geom_rects)
116 | data.frame(start = start, end = end, group = seq_along(start))
117 | }
118 |
119 | #' @title Plot stacked ggplot2 graphs with the Variable Importance for the
120 | #' Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id,
121 | #'and the preprocessed spectra.
122 | #' @description Plot stacked ggplot2 graphs of VIP for the final
123 | #' PLS regression model output of the calibration (training) data set for the
124 | #' final number of components, raw (replicate mean) spectra, and preprocessed
125 | #' spectra. Regions with VIP > 1 are highlighted across the stacked graphs
126 | #' in beige colour rectangles. VIP calculation is implemented as described in
127 | #' Chong, I.-G., and Jun, C.-H. (2005). Performance of some variable selection
128 | #' methods when multicollinearity is present. Chemometrics and Intelligent
129 | #' Laboratory Systems, 78(1--2), 103--112. https://doi.org/10.1016/j.chemolab.2004.12.011
130 | #' @param mout Model output list that is returned from
131 | #' \code{simplerspec::fit_pls()}. This object contains a nested list with
132 | #' the \code{caret::train()} object (class \code{train}), based on which
133 | #' VIPs at finally selected number of PLS components are computed.
134 | #' @param y1 Character vector of list-column name in
135 | #' \code{mout$data$calibration}, where spectra for bottom graph are extracted.
136 | #' Default is \code{"spc_mean"}, which plots the mean calibration spectra after
137 | #' resampling.
138 | #' @param y2 Character string of list-column name in
139 | #' \code{mout$data$calibration}, where spectra for bottom graph are extracted.
140 | #' Default is \code{"spc_pre"}, which plots the preprocessed calibration
141 | #' spectra after resampling.
142 | #' @param by Character string that is used to assign spectra to the same group
143 | #' and therefore ensures that all spectra are plotted with the same colour.
144 | #' Default is \code{"sample_id"}
145 | #' @param xlab Character string of X axis title for shared x axis of stacked
146 | #' graphs. Default is \code{expression(paste("Wavenumber [", cm^-1, "]"))}
147 | #' @param ylab1 Y axis title of bottom spectrum. Default is \code{"Absorbance"}.
148 | #' @param ylab2 Y axis title of bottom spectrum. Default is
149 | #' \code{"Preprocessed Abs."}.
150 | #' @param alpha Double between 0 and 1 that defines transparency of spectra
151 | #' lines in returned graph (ggplot plot object).
152 | #' @usage plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre",
153 | #' by = "sample_id",
154 | #' xlab = expression(paste("Wavenumber [", cm^-1, "]")),
155 | #' ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.",
156 | #' alpha = 0.2)
157 | #' @export
158 | plot_pls_vip <- function(mout, y1 = "spc_mean", y2 = "spc_pre",
159 | by = "sample_id",
160 | xlab = expression(paste("Wavenumber [", cm^-1, "]")),
161 | ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.",
162 | alpha = 0.2) {
163 |
164 | # Avoid `R CMD check` NOTE: no visible binding for global variable `...`
165 | variable <- wavenumber <- value <- group <- vip <- NULL
166 |
167 | # Extract spectra tibble for calibration
168 | spc_tbl <- mout$data$calibration
169 | # Gather spectra in one data.table
170 |
171 | ifelse(y1 == "spc",
172 | {dt1 <- data.table::data.table(do.call(rbind, spc_tbl[, y1][[y1]]))},
173 | {dt1 <- data.table::rbindlist(spc_tbl[, y1][[y1]])}
174 | )
175 |
176 | ifelse(y2 == "spc",
177 | {dt2 <- data.table::data.table(do.call(rbind, spc_tbl[, y2][[y2]]))},
178 | {dt2 <- data.table::rbindlist(spc_tbl[, y2][[y2]])}
179 | )
180 |
181 | # Extract ID variable and append it to the data.table
182 | id <- spc_tbl[, by][[by]]
183 | dt1[, id := id]
184 | dt2[, id := id]
185 |
186 | # Convert data table to long form for ggplot2 plotting
187 | dt1_long <- data.table::melt(
188 | dt1, measure = names(dt1)[!names(dt1) %in% c("id")]
189 | )
190 | dt1_long[, variable := as.numeric(as.character(variable))]
191 | data.table::setnames(dt1_long, old = "variable", new = "wavenumber")
192 |
193 | dt2_long <- data.table::melt(
194 | dt2, measure = names(dt2)[!names(dt2) %in% c("id")]
195 | )
196 | dt2_long[, variable := as.numeric(as.character(variable))]
197 | data.table::setnames(dt2_long, old = "variable", new = "wavenumber")
198 |
199 | # Plot spectra and VIP scores
200 | brk <- pretty(as.numeric(names(dt1)[!names(dt1) %in% c("id")]),
201 | n = 10)
202 | maxmin <- function(x) {c(max(x), min(x))}
203 | x_lim <- maxmin(as.numeric(names(dt1)[!names(dt1) %in% c("id")]))
204 |
205 | # Extract VIP (Variable Importance in Projection) scores for the given
206 | # model
207 | df_vip <- extract_pls_vip(mout)
208 |
209 | # Determine highlighted regions above VIP = 1
210 | rects <- create_vip_rects(df_vip)
211 |
212 | # Plot for resampled and mean replicate spectra
213 | p_spc <- ggplot2::ggplot(dt1_long, ggplot2::aes(x = wavenumber, y = value)) +
214 | ggplot2::geom_rect(data = rects, inherit.aes = FALSE,
215 | ggplot2::aes(xmin = start, xmax = end, ymin = min(dt1_long$value),
216 | ymax = max(dt1_long$value), group = group), color = "transparent",
217 | fill = "orange", alpha = 0.3) +
218 | ggplot2::geom_line(data = dt1_long, inherit.aes = FALSE,
219 | ggplot2::aes(x = wavenumber, y = value, group = id),
220 | alpha = alpha, size = 0.2) +
221 | ggplot2::scale_x_reverse(limits = x_lim, breaks = brk) +
222 | ggplot2::labs(x = xlab, y = ylab1) +
223 | ggplot2::theme_bw() +
224 | ggplot2::theme(plot.margin = ggplot2::unit(c(1, 5, -30, 6),
225 | units = "points"), axis.text.x = ggplot2::element_blank())
226 |
227 | p_spc_pre <- ggplot2::ggplot(dt2_long, ggplot2::aes(wavenumber, value)) +
228 | ggplot2::geom_rect(data = rects, inherit.aes = FALSE,
229 | ggplot2::aes(xmin = start, xmax = end, ymin = min(dt2_long$value),
230 | ymax = max(dt2_long$value), group = group), color = "transparent",
231 | fill = "orange", alpha = 0.3) +
232 | ggplot2::geom_line(ggplot2::aes(group = id), alpha = alpha, size = 0.2) +
233 | ggplot2::labs(x = xlab, y = ylab2) +
234 | ggplot2::theme_bw() +
235 | ggplot2::theme(plot.margin = ggplot2::unit(c(0, 5, 1, 1),
236 | units = "points")) +
237 | ggplot2::scale_x_reverse(limits = x_lim, breaks = brk) +
238 | ggplot2::theme(plot.margin = ggplot2::unit(c(1, 5, -30, 6),
239 | units = "points"),
240 | axis.title.y = ggplot2::element_text(vjust = 0.25),
241 | axis.text.x = ggplot2::element_blank())
242 |
243 | # Plot for VIP
244 | # Extract PLS model response variable and number of components
245 | response <- as.character(unique(mout$stats$response))
246 | ncomp <- mout$model$finalModel$ncomp
247 | p_vip <- ggplot2::ggplot(data = df_vip,
248 | ggplot2::aes(x = wavenumber, y = vip)) +
249 | ggplot2::geom_rect(data = rects, inherit.aes = FALSE,
250 | ggplot2::aes(xmin = start, xmax = end, ymin = min(df_vip$vip),
251 | ymax = max(df_vip$vip), group = group), color = "transparent",
252 | fill = "orange", alpha = 0.3) +
253 | ggplot2::geom_hline(yintercept = 1, colour = "red") +
254 | ggplot2::geom_line() +
255 | ggplot2::xlab(xlab) +
256 | ggplot2::ylab(
257 | bquote(paste(VIP[PLSR], " (", .(response),
258 | ", ", .(ncomp), " comps)"))) +
259 | ggplot2::scale_x_reverse(limits = x_lim, breaks = brk) +
260 | ggplot2::theme_bw() +
261 | ggplot2::theme(plot.margin = ggplot2::unit(c(0, 5, 1, 1),
262 | units = "points"), axis.title.y = ggplot2::element_text(vjust = 0.25))
263 |
264 | # Arrange plots in two panels without any margins in between
265 | # Hints from
266 | # https://stackoverflow.com/questions/42567045/alignment-of-two-plots-using-grid-arrange
267 | cowplot::plot_grid(
268 | p_spc, p_spc_pre, p_vip, rel_heights = c(0.4, 0.3, 0.3),
269 | ncol = 1, align = "v")
270 | }
271 |
272 |
--------------------------------------------------------------------------------
/R/gather-spc.R:
--------------------------------------------------------------------------------
1 | #' @title Gather measurements of different spectra types, corresponding
2 | #' x-axis values and metadata from nested list.
3 | #' @description Gather spectra, corresponding x-axis values, and device and
4 | #' measurement metadata from a nested list into a spectra tibble, so that one
5 | #' row represents one spectral measurement. Spectra, x-axis values and metadata
6 | #' are mapped from the individual list elements (named after file name including
7 | #' the extension) and transformed into (list-)columns of a spectra tibble,
8 | #' which is an extended data frame. For each measurement, spectral data and
9 | #' metadata are combined into one row of the tidy data frame. In addition, the ID
10 | #' columns `unique_id`, `file_id`, and `sample_id` are extracted from
11 | #' `"metadata"` (data frame) list entries and returned as identifier columns of
12 | #' the spectra tibble. List-columns facilitate keeping related data together in
13 | #' a rectangular data structure. They can be manipulated easily during
14 | #' subsequent transformations, for example using the standardized functions of
15 | #' the simplerspec data processing pipeline.
16 | #' @param data Recursive list named with filename (`file_id`) at first level
17 | #' entries, where each element containing a sample measurement has nested
18 | #' metadata (`"metadata"`), spectra types (see `spc_types`), corresponding
19 | #' x-axis values (see section *"Details on spectra data checks and matching"*).
20 | #' The `data` list is a structural convention to organize spectra and their
21 | #' metadata. It follows for example the list structure returned from the Bruker
22 | #' OPUS binary reader `simplerspec::read_opus_univ()`.
23 | #' @param spc_types Character vector with the spectra types to be extracted
24 | #' from `data` list and gathered into list-columns. The spectra type names need
25 | #' to exactly follow the naming conventions, and the element names and contents
26 | #' need to be present at the second list hierarchy of `data`. These values are
27 | #' allowed:
28 | #' * `"spc"` (default): final raw spectra after atmospheric compensation, if
29 | #' performed (named `AB` in Bruker OPUS software; results from referencing
30 | #' sample to reference single channel reflectance and transforming to
31 | #' absorbance).
32 | #' * `"spc_nocomp"`: raw spectra without atmospheric correction
33 | #' * `"sc_sm"`: Single channel reflectance spectra of the samples
34 | #' * `"sc_rf"`: Single channel reflectance spectra of the reference (background
35 | #' spectra)
36 | #' * `"ig_sm"`: Interferograms of the sample spectra (currently only spectra
37 | #' without x-axis list-columns are matched and returned)
38 | #' * `"ig_rf"`: Interferograms of the reference spectra (currently only spectra
39 | #' without x-axis list-columns are matched and returned)
40 | #' @usage gather_spc(data, spc_types = "spc")
41 | #' @section Details on spectra data checks and matching:
42 | #' `gather_spc()` checks whether these conditions are met for each measurement
43 | #' in the list `data`:
44 | #' 1. Make sure that the first level `data` elements are named (assumed to be
45 | #' the file name the data originate from), and remove missing measurements with
46 | #' an informative message.
47 | #' 2. Remove any duplicated file names and raise a message if there are
48 | #' name duplicates at first level.
49 | #' 3. Check whether `spc_types` inputs are supported (see argument `spc_types`)
50 | #' and present at the second level of the `data` list. If not, remove
51 | #' all data elements for incomplete spectral measurements.
52 | #' 4. Match spectra types and possible corresponding x-axis types from
53 | #' a lookup list. For each selected spectrum type (left), at least one of
54 | #' the element names of the x-axis type (right) needs to be present for each
55 | #' measurement in the list `data`:
56 | #' * `"spc"` : `"wavenumbers"`, `"wavelengths"`, or `"x_values"`
57 | #' * `"spc_nocomp"` : `"wavenumbers"`, `"wavelengths"`, or `"x_values"`
58 | #' * `"sc_sm"` : `"wavenumbers_sc_sm"`, `"wavelengths_sc_sm"`, or
59 | #' `"x_values_sc_sm"`
60 | #' * `"sc_rf"` : `"wavenumbers_sc_rf"`, `"wavelengths_sc_rf"`, or
61 | #' `"x_values_sc_rf"`
62 | #' 5. Check if `"metadata"` elements are present and remove data elements for
63 | #' measurements with missing or incorrectly named metadata elements
64 | #' (message).
65 | #' @return Spectra tibble (`spc_tbl` with classes `"tbl_df"`, `"tbl"`, and
66 | #' `"data.frame"`) with the following (list-)columns:
67 | #' * `"unique_id"`: Character vector with unique measurement identifier, likely
68 | #' a string with file names in combination with date and time (extracted from
69 | #' each `"metadata"` data frame column).
70 | #' * `"file_id"` : Character vector with file name including the extension
71 | #' (extracted from each `"metadata"` data frame column).
72 | #' * `"sample_id"`: Character vector with sample identifier. For Bruker OPUS
73 | #' binary files, this corresponds to the file name without the file extension
74 | #' in integer increments of sample replicate measurements.
75 | #' * One or multiple of `"spc"`, `"spc_nocomp"`, `"sc_sm"`, or `"sc_rf"`:
76 | #' List(s) of data.table's containing spectra type(s).
77 | #' * One or multiple of `"wavenumbers"`, `"wavelengths"`, `"x_values"`,
78 | #' `"wavenumbers_sc_sm"`, `"wavelengths_sc_sm"`, `"x_values_sc_sm"`,
79 | #' `"wavenumbers_sc_rf"`, `"wavelengths_sc_rf"`, or `"x_values_sc_rf"`:
80 | #' List(s) of numeric vectors with matched x-axis values (see *"Details on
81 | #' spectra data checks and matching"* below).
82 | #' @importFrom rlang set_names
83 | #' @export
84 | gather_spc <- function(data,
85 | spc_types = "spc") {
86 | spc_types <- map(spc_types, rlang::sym)
87 | spc_types_chr <- map_chr(spc_types, rlang::quo_name)
88 |
89 | # Duplicate original data to refer to full `data` list in messages
90 | data_origin <- data
91 |
92 | ## Make sure first level `data` elements are named
93 |
94 | if (any(names(data) %in% "")) {
95 | which_missing <- which(names(data) %in% "")
96 | idx_nm_missing <- rlang::set_names(
97 | names(data)[which_missing], which_missing)
98 | message(paste0(length(which_missing), " `data` ",
99 | ifelse(length(which_missing) > 1, "elements", "element"),
100 | ifelse(length(which_missing) > 1, " have", " has"),
101 | " missing `file_id` names. ",
102 | ifelse(length(which_missing) > 1, "These", "This"), "`data` list index",
103 | ifelse(length(which_missing) > 1, " positions\n", " position\n"),
104 | ifelse(length(which_missing) > 1, "were", "was"),
105 | " therefore removed:\n\n",
106 | idx_nm_missing %>%
107 | purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = " "),
108 | "\n\n"))
109 |
110 | data <- data[-which_missing]
111 | }
112 |
113 | ## Check whether any file names (`file_id`'s) are duplicated;
114 | ## remove and raise warning in case duplicated
115 |
116 | is_duplicated <- duplicated(names(data))
117 | idx_nm_duplicated <- rlang::set_names(names(data)[is_duplicated],
118 | which(is_duplicated))
119 | if (any(is_duplicated)) {
120 | message(paste0(sum(is_duplicated), " `file_id` named ",
121 | ifelse(sum(is_duplicated) > 1, "elements", "element"), " of `data` ",
122 | ifelse(sum(is_duplicated) > 1, "are", "is"),
123 | " non-unique/duplicated\nand ",
124 | ifelse(sum(is_duplicated) > 1, "were", "was"), " therefore removed:\n\n",
125 | idx_nm_duplicated %>%
126 | purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = " "),
127 | "\n\n"))
128 |
129 | data <- data[-is_duplicated]
130 | }
131 |
132 | ## Check whether `spc_types` are allowed and present at the second level of
133 | ## the nested list `data`
134 |
135 | spc_types_allowed <- c("spc", "spc_nocomp", "sc_sm", "sc_rf", "ig_sm",
136 | "ig_rf")
137 | spc_types_present <- spc_types_chr %in% spc_types_allowed
138 |
139 | if (any(!spc_types_present)) {
140 | options(useFancyQuotes = FALSE)
141 | stop("The following spectrum types specified in `spc_types` are",
142 | " not supported:\n\n",
143 | paste(dQuote(spc_types_chr[!spc_types_present]), collapse = ", ")) }
144 |
145 | ## Collect names of spectrum types nested at second `file_id` `data` list
146 | ## level to check whether all specified `spc_types` are present in `data`;
147 | ## access sub-element names at that list level
148 |
149 | data_types_byfile <- purrr::modify_depth(data, .depth = 1,
150 | ~ purrr::imap_chr(.x, ~ .y))
151 | spc_types_byfile_in <- map(data_types_byfile, ~ spc_types_chr %in% .x)
152 |
153 | if (any(map_lgl(spc_types_byfile_in, ~ any(!.x)))) {
154 | which_rm <- map_lgl(spc_types_byfile_in, ~ any(!.x)) %>% which()
155 | rm_origin <- names(data_origin[names(which_rm)])
156 | rm_origin_lgl <- names(data_origin) %in% rm_origin
157 | which_rm_origin <- rlang::set_names(rm_origin, which(rm_origin_lgl))
158 |
159 | message(paste0("Spectrum types (second list level names) specified in",
160 | " `spc_types` were not found\nwithin all first level elements ",
161 | " of list `data` or are NULL (spectra data by `file_id`).\n",
162 | "Therefore, all data elements for the corresponding",
163 | " list indices and `file_id`'s\nwere removed from `data`:\n\n",
164 | which_rm_origin %>%
165 | purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = " "),
166 | "\n\n"))
167 |
168 | data <- data[- which_rm]
169 | }
170 |
171 | ## Extract the spectra of different types by plucking all types specified in
172 | ## `spc_types_chr` argment into separate first list level elements
173 |
174 | spc_mapped <- map(rlang::set_names(spc_types_chr),
175 | ~ pluck_depth(data = data, .depth = 1, .string = .x))
176 |
177 | ## Match and extract the values for the x-axis types
178 |
179 | spc_type_x_axis <- list(
180 | "spc" = c("wavenumbers", "wavelengths", "x_values"),
181 | "spc_nocomp" = c("wavenumbers", "wavelengths", "x_values"),
182 | "sc_sm" = c("wavenumbers_sc_sm", "wavelengths_sc_sm", "x_values_sc_sm"),
183 | "sc_rf" = c("wavenumbers_sc_rf", "wavelengths_sc_rf", "x_values_sc_rf")
184 | )
185 |
186 | lcols_x_values_matching <- purrr::flatten_chr(spc_type_x_axis[spc_types_chr])
187 | x_values_matching <- lcols_x_values_matching[lcols_x_values_matching
188 | %in% unique(purrr::flatten_chr(data_types_byfile))]
189 |
190 | # `data` has eventually been updated (removed `data` elements)
191 | data_types_byfile <- purrr::modify_depth(data, .depth = 1,
192 | ~ purrr::imap_chr(.x, ~ .y))
193 | x_value_types_byfile_in <- map(data_types_byfile, ~ x_values_matching %in% .x)
194 |
195 | if (any(map_lgl(x_value_types_byfile_in, ~ any(!.x)))) {
196 | which_rm <- map_lgl(x_value_types_byfile_in, ~ any(!.x)) %>% which()
197 | rm_origin <- names(data_origin[names(which_rm)])
198 | rm_origin_lgl <- names(data_origin) %in% rm_origin
199 | which_rm_origin <- rlang::set_names(rm_origin, which(rm_origin_lgl))
200 |
201 | message(paste0("These x-axis types (second list level names)",
202 | " corresponding to \nspecified `spc_types` spectrum types were not",
203 | " found within\nall first level elements or are NULL",
204 | " (spectra data by `file_id`) of list `data`: \n\n",
205 | paste(dQuote(x_values_matching), collapse = ", ")), "\n\n",
206 | "Therefore, all data elements for the corresponding",
207 | " list indices\nand `file_id`'s were removed from `data`:\n\n",
208 | which_rm_origin %>%
209 | purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = " "),
210 | "\n\n")
211 |
212 | data <- data[- which_rm]
213 | spc_mapped <- map(spc_mapped, ~ .x[- which_rm])
214 | }
215 |
216 | x_values_mapped <- map(rlang::set_names(x_values_matching),
217 | ~ pluck_depth(data = data, .depth = 1, .string = .x))
218 |
219 | ## Combine the mapped spectra and correspoding x-axis types, and order
220 | ## list before returning spectral tibble (`spc_tbl`)
221 |
222 | spc_x_axis_types_order <- c("spc", "wavenumbers", "wavelengths", "x_values",
223 | "spc_nocomp",
224 | "sc_sm", "wavenumbers_sc_sm", "wavelengths_sc_sm", "x_values_sc_sm",
225 | "sc_rf", "wavenumbers_sc_rf", "wavelengths_sc_rf", "x_values_sc_rf",
226 | "ig_sm", "ig_rf")
227 |
228 | spc_x_values_mapped <- c(spc_mapped, x_values_mapped)
229 | spc_x_axis_types_matched <- spc_x_axis_types_order[spc_x_axis_types_order %in%
230 | names(spc_x_values_mapped)]
231 | spc_x_values_mapped <- spc_x_values_mapped[spc_x_axis_types_matched]
232 |
233 | ## Check if `metadata` elements are present for all list first level elements
234 |
235 | # `data` has eventually been updated (removed `data` elements)
236 | data_types_byfile <- purrr::modify_depth(data, .depth = 1,
237 | ~ purrr::imap_chr(.x, ~ .y))
238 | metadata_byfile_in <- map(data_types_byfile, ~ "metadata" %in% .x)
239 |
240 | if (any(map_lgl(metadata_byfile_in, ~ any(!.x)))) {
241 | which_rm <- map_lgl(metadata_byfile_in, ~ any(!.x)) %>% which()
242 | rm_origin <- names(data_origin[names(which_rm)])
243 | rm_origin_lgl <- names(data_origin) %in% rm_origin
244 | which_rm_origin <- rlang::set_names(rm_origin, which(rm_origin_lgl))
245 |
246 | message(paste0("`metadata` (second list level names) was not",
247 | " found within\n all first level elements (`metadata` by `file_id`)",
248 | " of list `data` or are NULL.\n\n",
249 | "Therefore, all data elements for the corresponding",
250 | " list indices\nand `file_id`'s were removed from `data`:\n\n",
251 | which_rm_origin %>%
252 | purrr::imap_chr(~ paste0(.y, " : ", .x)) %>% paste(collapse = " "),
253 | "\n\n"))
254 |
255 | data <- data[- which_rm]
256 | spc_x_values_mapped <- map(spc_x_values_mapped, ~ .x[- which_rm])
257 | }
258 |
259 | ## Extract metadata list elements and combine into tibble
260 |
261 | metadata_mapped <- map(rlang::set_names(list("metadata")),
262 | ~ pluck_depth(data = data, .depth = 1, .string = .x))
263 | metadata_df <- purrr::map_df(data, "metadata")
264 | id_tbl <- tibble::as_tibble(
265 | metadata_df[c("unique_id", "file_id", "sample_id")]
266 | )
267 |
268 | ## Column bind all tibbles of metadata id's, metadata, spectra, and
269 | ## x-axis type values; return on combined spectral tibble (`spc_tbl`)
270 |
271 | spc_tbl <- tibble::as_tibble(spc_x_values_mapped)
272 | metadata_tbl <- tibble::as_tibble(metadata_mapped)
273 | dplyr::bind_cols(list(id_tbl, metadata_tbl, spc_tbl))
274 | }
275 |
276 |
277 | # Helpers ----------------------------------------------------------------------
278 |
279 | # Pluck out single elements from a list hierarchy level (`.depth`)
280 | # specified in `.string` based on element names; All elements with the specified
281 | # name will be transposed into a single list named by the element name.
282 | # `pluck_depth` can be combined with `purrr::map` to extract elements of
283 | # different names at all list nodes at the specified level, using e.g.
284 | # map(rlang::set_names(c("spc", "spc_nocomp")),
285 | # ~ pluck_depth(data = data, .depth = 1, .string = .x))
286 |
287 | pluck_depth <- function(data, .depth = 1, .string = NULL) {
288 | purrr::modify_depth(data, .depth = .depth, ~ purrr::pluck(.x, .string))
289 | }
290 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # simplerspec
2 |
3 |
4 | [](https://github.com/philipp-baumann/simplerspec/actions)
5 | [](https://zenodo.org/badge/latestdoi/67121732)
6 | [](https://opensource.org/licenses/MIT)
7 | [](https://philipp-baumann.r-universe.dev/simplerspec)
8 |
9 |
10 | ## Short description
11 |
12 | The simplerspec package aims to facilitate spectral and additional data handling and model development for spectroscopy applications such as infrared soil spectroscopy. Different helper functions are designed to create a data and modeling workflow. Data inputs and outputs are stored in common S3 `R` objects (`lists` and `data frames`), using in addition [`data.table`](https://rdatatable.gitlab.io/data.table/) and [`tibble`](https://tibble.tidyverse.org/index.html) extensions. The functions are built to work in a pipeline and cover commonly used procedures for spectral model development and application.
13 |
14 | ## Installation
15 |
16 | The newest version of the package is available on this GitHub repository. If you find bugs you are highly welcome to report issues (write me an [email](mailto:info@spectral-cockpit.space) or create an [issue](https://github.com/philipp-baumann/simplerspec/issues)). You can install {simplerspec} from GitHub or directly from the r-universe.
17 |
18 | ```R
19 | # option 1
20 | if (!require("remotes")) install.packages("remotes")
21 | remotes::install_github("philipp-baumann/simplerspec")
22 | ```
23 |
24 | ```R
25 | # option 2
26 | install.packages("simplerspec",
27 | repos = c("https://philipp-baumann.r-universe.dev", "https://cloud.r-project.org"))
28 | ```
29 |
30 | ## Key features
31 |
32 | The current version of the package features among others the following functions:
33 |
34 | 1. [`read_opus_univ()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/read-opus-universal.R): Read spectra and metadata from Bruker OPUS binary files into R list
35 | 2. [`gather_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/gather-spc.R): Gather spectra and metadata from list into a tibble object (list-columns)
36 | 4. [`resample_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/resample-spc.R): Resample spectra to new wavenumber intervals
37 | 2. [`average_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/average-spc.R): Average spectra for replicate scans
38 | 5. [`preprocess_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/preprocess-spc.R): Perform pre-processing of spectra
39 | 6. [`select_spc_vars()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/select-spc.R): Select every `n`-th spectral variable and corresponding X-unit values.
40 | 7. [`join_spc_chem()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/join-chem-spectra.R): Join chemical and spectral data sets by `sample_id`
41 | 8. [`plot_spc_ext()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/plot-spc-extended.R): Extended spectral plotting; e.g. group spectra using different
42 | panels or color spectra based on chemical reference values to explore trends.
43 | 9. [`fit_pls()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/pls-modeling.R): Perform model tuning and evaluation based on Partial Least Squares (PLS) regression
44 | 10. [`select_ref_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/select-ref-spectra.R): Select a set of reference samples to measured by
45 | traditional analysis methods when no a priori sample data except spectra are
46 | available (based on Kennard-Stones sampling)
47 | 11. [`predict_from_spc()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/predict-spc.R): Predict multiple chemical properties from a list of calibrated models and new soil spectra
48 | 12. [`assess_multimodels()`](https://github.com/philipp-baumann/simplerspec/blob/master/R/utils-stats.R): Assess model performance given multiple pairs of predicted and measured variables.
49 |
50 | ## Cheatsheet
51 |
52 |
53 |
54 | ## Motivation and key concepts
55 |
56 | Many R packages are available to do tasks in spectral modeling such as pre-processing of spectral data. The motivation to create this package was:
57 |
58 | 1. Avoid repetition of code in model development (common source of errors).
59 | 2. Provide a reproducible data analysis workflow for FT-IR spectroscopy.
60 | 3. R packages are an ideal way to organize and share R code.
61 | 4. Make soil FT-IR spectroscopy modeling accessible to people that have basic R knowledge.
62 | 5. Provide an integrated data-model framework that features tidy data structures designed for both user-friendly printing and efficient data processing.
63 |
64 | This package builds mainly upon functions from the following R packages:
65 |
66 | * [`prospectr`](https://cran.r-project.org/web/packages/prospectr/index.html): Various utilities for pre-processing and sample selection based on spectroscopic data. An introduction to the package with examples can be found [here](https://l-ramirez-lopez.r-universe.dev/articles/prospectr/prospectr.html).
67 | * `plyr` and [`dplyr`](https://dplyr.tidyverse.org): Fast data manipulation tools with an unified interface.
68 | * `ggplot2 `: Alternative plotting system for R, based on the grammar of graphics. See [here](https://ggplot2.tidyverse.org).
69 | * `caret `: Classification and regression training. A set of functions that attempt to streamline the process for creating predictive models. See [here](https://topepo.github.io/caret/) for details.
70 |
71 | Consistent and reproducible data and metadata management is an important prerequisite for spectral model development. Therefore, simplerspec functions are based on storing spectral data and related data in R data structures which keep related data in rows. Every row representing an observation contains data related to a single spectral measurement. Simplerspec functions uses tibble data frames as principal data structures because they allow to store lists within the well-known data frame structures. Lists are flexible data structures and can e.g. contain other lists, vectors, data.frames, or matrices.
72 |
73 | List-columns features provided within the tibble framework are an excellent base to work with functional programming tools in R, which allows to efficiently write code.
74 | Simplerspec internally uses popular functional programming extension tools provided
75 | by the [`purrr`](https://purrr.tidyverse.org/) package for processing and transforming spectra.
76 | For learning more, I would recommend
77 | [this nice purrr list-column tutorial](https://github.com/jennybc/purrr-tutorial/tree/gh-pages)
78 | provided by Jenny Brian. Further, simplerspec well integrates with the
79 | data processing API provided by the dplyr package, which makes spectroscopic
80 | analysis tidy and easy to understand.
81 |
82 | ## Example workflow
83 |
84 | Bruker FTIR spectrometers produce binary files in the OPUS format that can contain different types of spectra and many parameters such as instrument type and settings that were used at the time of data acquisition and internal processing (e.g. Fourier transform operations). Basically, the entire set of setup measurement parameters, selected spectra, supplementary metadata such as the time of measurement are written into OPUS binary files. In contrast to simple text files that contain only plain text with a defined character encoding, binary files can contain any type of data represented as sequences of bytes (a single byte is sequence of 8 bits and 1 bit either represents 0 or 1).
85 |
86 | Simplerspec comes with reader function `read_opus_univ()` that is intended to be a universal Bruker OPUS file reader that extracts spectra and key metadata from files. Usually, one is mostly interested to extract the final absorbance spectra (shown as *AB* in the OPUS viewer software).
87 |
88 | ```R
89 | # Load simplerspec package for spectral model development wrapper functions
90 | library(simplerspec)
91 | # Load tidyverse packages: loads packages frequently used for data manipulation,
92 | # data tidying, import, and plotting
93 | library(tidyverse)
94 |
95 | ################################################################################
96 | ## Part 1: Read and pre-process spectra, read chemical data, and join
97 | ## spectral and chemical data sets
98 | ################################################################################
99 |
100 | ## Read spectra in list ========================================================
101 |
102 | # List of OPUS binary spectra files
103 | lf <- dir("data/spectra/soilspec_eth_bin", full.names = TRUE)
104 |
105 | # Read spectra from files into R list
106 | spc_list <- read_opus_univ(fnames = lf, extract = c("spc"))
107 | # Returns messages:
108 | #> Extracted spectra data from file:
109 | #> Extracted spectra data from file:
110 | #> Extracted spectra data from file:
111 | #> Extracted spectra data from file:
112 | #> ...
113 | ```
114 |
115 | Pipes can make R code more readable and allows step-wise data processing
116 | when developing spectral models. The pipe operator (`%>%`, called "then") is a new operator in R that was introduced
117 | with the magrittr package. It facilitates readability of code
118 | and avoids to type intermediate objects. The basic behavior of
119 | the pipe operator is
120 | that the object on the left hand side is passed as the first argument
121 | to the function on the right hand side. When loading the tidyverse packages, the
122 | pipe operator is attached to the current R session.
123 | More details can be found [here](https://magrittr.tidyverse.org).
124 |
125 | The model development process can be quickly coded as the example below illustrates:
126 |
127 | ```R
128 | ## Spectral data processing pipe ===============================================
129 |
130 | soilspec_tbl <- spc_list %>%
131 | # Gather list of spectra data into tibble data frame
132 | gather_spc() %>%
133 | # Resample spectra to new wavenumber interval
134 | resample_spc(wn_lower = 500, wn_upper = 3996, wn_interval = 2) %>%
135 | # Average replicate scans per sample_id
136 | average_spc() %>%
137 | # Preprocess spectra using Savitzky-Golay first derivative with a window size
138 | # of 21 points
139 | preprocess_spc(select = "sg_1_w21")
140 |
141 | soilspec_tbl
142 | # A tibble: 284 x 11
143 | #> unique_id file_id sample_id
144 | #>
145 | #> 1 BF_lo_01_soil_cal.0_2015-11-06 14:34:10 BF_lo_01_soil_cal.0 BF_lo_01_soil_cal
146 | #> 2 BF_lo_01_soil_cal.1_2015-11-06 14:38:14 BF_lo_01_soil_cal.1 BF_lo_01_soil_cal
147 | #> 3 BF_lo_01_soil_cal.2_2015-11-06 14:40:55 BF_lo_01_soil_cal.2 BF_lo_01_soil_cal
148 | #> 4 BF_lo_02_soil_cal.0_2015-11-06 17:27:55 BF_lo_02_soil_cal.0 BF_lo_02_soil_cal
149 | #> 5 BF_lo_02_soil_cal.1_2015-11-06 17:30:19 BF_lo_02_soil_cal.1 BF_lo_02_soil_cal
150 | #> 6 BF_lo_02_soil_cal.2_2015-11-06 17:32:47 BF_lo_02_soil_cal.2 BF_lo_02_soil_cal
151 | #> 7 BF_lo_03_soil_cal.0_2015-11-09 11:32:55 BF_lo_03_soil_cal.0 BF_lo_03_soil_cal
152 | #> 8 BF_lo_03_soil_cal.1_2015-11-09 11:35:26 BF_lo_03_soil_cal.1 BF_lo_03_soil_cal
153 | #> 9 BF_lo_03_soil_cal.2_2015-11-09 11:38:08 BF_lo_03_soil_cal.2 BF_lo_03_soil_cal
154 | #> 10 BF_lo_04_soil_cal.0_2015-11-06 10:36:13 BF_lo_04_soil_cal.0 BF_lo_04_soil_cal
155 | #> # ... with 274 more rows, and 8 more variables: spc , wavenumbers ,
156 | #> # metadata , spc_rs , wavenumbers_rs , spc_mean ,
157 | #> # spc_pre , xvalues_pre
158 |
159 |
160 | ## Read chemical reference data and join with spectral data ====================
161 |
162 | # Read chemical reference analysis data
163 | soilchem_tbl <- read_csv(file = "data/soilchem/soilchem_yamsys.csv")
164 | #> Parsed with column specification:
165 | #> cols(
166 | #> .default = col_double(),
167 | #> sample_ID = col_character(),
168 | #> country = col_character(),
169 | #> site = col_character(),
170 | #> material = col_character(),
171 | #> site_comb = col_character()
172 | #> )
173 | #> See spec(...) for full column specifications.
174 |
175 | # Join spectra tibble and chemical reference analysis tibble
176 | spec_chem <- join_spc_chem(
177 | spc_tbl = soilspec_tbl, chem_tbl = soilchem_tbl, by = "sample_id")
178 | #> Joining, by = "sample_id"
179 |
180 | ################################################################################
181 | ## Part 2: Run PLS regression models for different soil variables
182 | ################################################################################
183 |
184 | # Example Partial Least Squares (PLS) Regression model for total Carbon (C)
185 | # Use repeated k-fold cross-validation to tune the model (choose optimal
186 | # number of PLS components) and estimate model performance on hold-out
187 | # predictions of the finally chosen model (model assessment).
188 | # This allows to use the entire set for both model building and evaluation;
189 | # recommended for small data sets
190 | pls_C <- fit_pls(
191 | # remove rows with NA in the data
192 | spec_chem = spec_chem[!is.na(spec_chem$C), ],
193 | response = C,
194 | evaluation_method = "resampling",
195 | tuning_method = "resampling",
196 | resampling_method = "rep_kfold_cv",
197 | pls_ncomp_max = 7 # maximum number of PLS components tested during tuning
198 | )
199 | ```
200 |
201 | ## Projects using simplerspec
202 |
203 | * [Spectral platform for soil samples of the Democratic Republic of Congo](https://sae-interactive-data.ethz.ch/simplerspec.drc/)
204 |
205 | ## Package help
206 |
207 | After successfully installing simplerspec, you can use the R build-in help
208 | using `?simplerspec::`
209 |
210 | ## Like it?
211 |
212 | [](https://www.buymeacoffee.com/specphil)
213 |
214 | ## Credits
215 |
216 | I would like to thank the following people for the inspiration by concepts, code and packages:
217 |
218 | * Antoine Stevens and Leonardo Ramirez-Lopez for their contributions to the [prospectr package](https://l-ramirez-lopez.r-universe.dev/prospectr) and the
219 | *Guide to Diffuse Reflectance Spectroscopy & Multivariate Calibration*
220 | * Andrew Sila, Tomislav Hengl, and Thomas Terhoeven-Urselmans for the [`read.opus()`](https://github.com/cran/soil.spec/blob/master/R/read.opus.R)
221 | function from the [soil.spec](https://cran.r-project.org/src/contrib/Archive/soil.spec/) package developed at ICRAF.
222 | * [Hadley Wickham](https://hadley.nz) for his work and concepts on data science within R
223 | * [Max Kuhn](https://github.com/topepo) for the creation of the caret package and for his excellent teaching
224 | materials on [applied predictive modeling](https://link.springer.com/book/10.1007/978-1-4614-6849-3)
225 |
--------------------------------------------------------------------------------
/R/plot-spc-extended.R:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | ## Helper functions to gather spectra, corresponding x-value vectors,
3 | ## metadata and measure columns (e.g. chemical reference data) from tibble
4 | ## list-columns into a single data.table or a list of data.tables conntaining
5 | ## long form data directly to be used for customized ggplot2 plotting functions
6 | ################################################################################
7 |
8 | # bind a list-column in a tibble to a list of data.tables ----------------------
9 |
10 | #' @title Bind list-columns within a tibble into a list of data.tables
11 | #' @description Bind one to many list-columns in spectral tibble into a list
12 | #' of data.tables.
13 | #' @param spc_tbl Spectral data in a tibble data frame (classes "tibble_df",
14 | #' "tbl" and "data.frame").
15 | #' @param lcols Character vector of column names of list-columns to be bound
16 | #' into a list of data.tables
17 | #' @param spc_id Character vector denoting column name for a unique spectrum ID.
18 | #' Default is \code{"unique_id"}.
19 | #' @param group_id Character vector denoting column name for the spectrum group
20 | #' ID. Default is \code{"sample_id"}. The group ID can later be used for
21 | #' plotting spectra by group (e.g. by using different colors or panels).
22 | #' @return A list of data.tables. Elements contain data from list-columns
23 | #' specified in \code{lcols} argument as data.tables. All data.tables contain in
24 | #' addition \code{spc_id} and \code{group_id} columns.
25 | #' @export
26 | bind_lcols_dts <- function(spc_tbl, lcols,
27 | spc_id = "unique_id",
28 | group_id = "sample_id") {
29 |
30 | # todo: add warning for lcols not present in spc_tbl
31 | which_bind <- colnames(spc_tbl) %in% lcols
32 | lcols_to_bind <- colnames(spc_tbl)[which_bind]
33 | names(lcols_to_bind) <- lcols_to_bind
34 | dts <- purrr::map(lcols_to_bind,
35 | function(y) {
36 | if (is.list(spc_tbl[, y][[y]])) {
37 | # todo: Test if number of columns is equal in each data.frame or matrix
38 | # of the list-(column); if not, return a comprehensible error
39 | data.table::data.table(do.call(rbind, spc_tbl[, y][[y]]))
40 | } else if (is.atomic((spc_tbl[, y][[y]]))) {
41 | data.table::data.table(spc_tbl[, y])
42 | }
43 | }
44 | )
45 | # Append IDs to data.tables in list
46 | spc_id <- spc_tbl[, spc_id][[spc_id]]
47 | lcol_types <- purrr::imap(dts, ~ rep(.y, nrow(spc_tbl)))
48 | group_id <- as.character(spc_tbl[, group_id][[group_id]])
49 |
50 | # Return list of data.tables
51 | purrr::imap(dts, function(dt, nm) {
52 | dt[, `:=` (spc_id = spc_id, group_id = group_id)]
53 | dt[, `:=` (lcol_type = lcol_types[[nm]])]}
54 | )
55 | }
56 |
57 |
58 | # Convert list of wide form data.tables into long form -------------------------
59 |
60 | dts_to_long <- function(spc_tbl, lcols,
61 | spc_id = "unique_id",
62 | group_id = "sample_id",
63 | variable_name = "variable",
64 | value_name = "value") {
65 |
66 | dts <- bind_lcols_dts(spc_tbl = spc_tbl, lcols = lcols,
67 | spc_id = spc_id, group_id = group_id)
68 | # Convert list of data.tables into long form
69 | dts_long <- purrr::map(dts, function(x) {
70 | data.table::melt(
71 | x,
72 | id.vars = c("spc_id", "lcol_type", "group_id"),
73 | variable.factor = FALSE,
74 | variable.name = variable_name,
75 | value.name = value_name
76 | )}
77 | )
78 | # Append unique id (idx lg:= index 'long') for long form
79 | purrr::imap(dts_long,
80 | function(dt_long, nm) {
81 | dt_long[, `:=` (id_long = 1:nrow(dt_long))]
82 | }
83 | )
84 | }
85 |
86 |
87 | # Match the spectra list columns and corresponding xunit list columns ----------
88 |
89 | match_lcols <- function(spc_tbl, lcols) {
90 |
91 | # Determine to which spectrum types list-columns belong
92 | lcols_spc_all <- c("spc", "spc_rs", "spc_mean", "spc_nocomp", "sc_sm",
93 | "sc_rf", "spc_pre")
94 | xvalue_lookup <- list(
95 | "spc" = c("wavenumbers", "wavelengths"),
96 | "spc_rs" = c("wavenumbers_rs", "wavelengths_rs"),
97 | "spc_mean" = c("wavenumbers_rs", "wavelengths_rs"),
98 | "spc_nocomp" = c("wavenumbers", "wavelengths"),
99 | "sc_sm" = c("wavenumbers_sc_sm"),
100 | "sc_rf" = c("wavenumbers_sc_rf"),
101 | "spc_pre" = c("xvalues_pre")
102 | )
103 | # Create character vector of spectra type names
104 | spc_matched <- lcols[lcols %in% lcols_spc_all]
105 | spc_matched <- spc_matched[order(match(spc_matched, lcols_spc_all))]
106 | # Create vector of corresponding xunit types in predefined order
107 | xvalues <- unlist(xvalue_lookup[spc_matched])
108 | xvalues_matched <- colnames(spc_tbl)[colnames(spc_tbl) %in% xvalues]
109 | xvalues_matched <- xvalues_matched[order(match(xvalues_matched, xvalues))]
110 | # Return all matches as list of character vectors for spectra and x-values
111 | list(
112 | spc_matched = spc_matched,
113 | xvalues_matched = xvalues_matched
114 | )
115 | }
116 |
117 |
118 | # Create a list of long form data.tables containing spectra and x-values for
119 | # a set of spectral types ------------------------------------------------------
120 |
121 | tolist_spc_xvalues <- function(spc_tbl, lcols_spc,
122 | spc_id = "unique_id",
123 | group_id = "sample_id",
124 | variable_name = "variable",
125 | value_name = "value") {
126 |
127 | lcols_matched <- match_lcols(spc_tbl = spc_tbl, lcols = lcols_spc)
128 | # Check if length of matched spectra and xunits is equal and if not,
129 | # return an error
130 | spc_types <- lcols_matched[["spc_matched"]]
131 | xunit_types <- lcols_matched[["xvalues_matched"]]
132 |
133 | # Gather different spectra types into list of data.tables
134 | spc_dts <- dts_to_long(spc_tbl = spc_tbl,
135 | lcols = spc_types, spc_id = spc_id, group_id = group_id,
136 | variable_name = "spc_variable", value_name = "spc_value")
137 | # Gather corresponding xunit types into list of data.tables
138 | xvalues_dts <- dts_to_long(spc_tbl = spc_tbl,
139 | lcols = xunit_types, spc_id = spc_id, group_id = group_id,
140 | variable_name = "xvalues_variable", value_name = "xvalues_value")
141 |
142 | # Rename lcol_type to spc_type only for spectra data.tables
143 | spc_dts <- purrr::map(spc_dts,
144 | function(x) data.table::setnames(x, "lcol_type", "spc_type"))
145 | # Return data.tables in nested list
146 | list(
147 | "spc_dts" = spc_dts,
148 | "xvalues_dts" = xvalues_dts
149 | )
150 | }
151 |
152 |
153 | # Merge data tables of spectra, xunits, metadata and measured variables
154 | # into a single long form data.table -------------------------------------------
155 |
156 | #' @title Merge list-columns of spectra, x-axis values, metadata and additional
157 | #' measured variables into a single long form data.table
158 | #' @description Helper function that merges all spectra and related data into
159 | #' a single long form data.table than can subsequently be used for plotting.
160 | #' @param spc_tbl Tibble data frame containing spectra, x-axis values, metadata
161 | #' and eventual measured variables as list-columns.
162 | #' @param lcols_spc Character vector of spectral list-columns to be extracted.
163 | #' Default is \code{c("spc", "spc_pre")} (raw and preprocessed spectra).
164 | #' @param lcol_measure Character vector of length 1 denoting the column name
165 | #' of the measure columns. This argument is optional. Default is \code{NULL},
166 | #' which does not extract an additional measure column.
167 | #' @param spc_id Character vector of column that contains a unique spectral
168 | #' identifier for all spectra. Default is \code{"unique_id"}.
169 | #' @param group_id Character vector of columns that is used assigning spectra
170 | #' into groups. Default is \code{"sample_id"}. The \code{group_id} can be
171 | #' used for later plotting and thereby visually separating spectral groups into
172 | #' using different colors or panels.
173 | #' @return A single data.table containing long form aggregated data of
174 | #' spectra, x-axis values, metadata and an additionally measured variable.
175 | #' @export
176 | merge_dts <- function(spc_tbl,
177 | lcols_spc = c("spc", "spc_pre"), lcol_measure = NULL,
178 | spc_id = "unique_id",
179 | group_id = "sample_id") {
180 |
181 | id_long <- NULL
182 | spc_xvalues <- tolist_spc_xvalues(spc_tbl = spc_tbl,
183 | lcols_spc = lcols_spc, spc_id = spc_id, group_id = group_id)
184 | # Set keys for merging list of data.tables for spectra and xunits
185 | purrr::imap(
186 | spc_xvalues,
187 | function(dts, nm) purrr::map(dts[[nm]],
188 | function(x) data.table::setkey(x = x, spc_id, id_long, group_id))
189 | )
190 | spc_xvalues <- purrr::map2(spc_xvalues[["spc_dts"]],
191 | spc_xvalues[["xvalues_dts"]], merge)
192 |
193 | # Bind metadata if present, and set keys for merging metadata to spectra
194 | metadata <- bind_lcols_dts(spc_tbl = spc_tbl,
195 | lcols = "metadata", spc_id = spc_id, group_id = group_id)
196 | dts <- list(
197 | "data" = spc_xvalues,
198 | "metadata" = rep(metadata, length(spc_xvalues))
199 | )
200 | if (length(metadata) == 0) dts$metadata <- NULL
201 |
202 | # Convert a "measure" tibble column (numeric|character) to list of data.tables
203 | if (!is.null(lcol_measure)) {
204 | measure <- bind_lcols_dts(spc_tbl = spc_tbl,
205 | lcols = lcol_measure, spc_id = spc_id, group_id = group_id)
206 | dts$measure <- rep(measure, length(spc_xvalues))
207 | }
208 | # Set keys (common columns), merge metadata with spectral data (list of
209 | # data tables) and combine into a single data.table that is returned
210 | purrr::imap(dts,
211 | function(dt, nm) lapply(dts[[nm]],
212 | function(x) data.table::setkey(x = x, spc_id, group_id))
213 | )
214 | # Merge multiple data.table by common keys
215 | # https://gist.github.com/reinholdsson/67008ee3e671ff23b568
216 | data.table::rbindlist(
217 | lapply(seq_along(dts[[1]]),
218 | function(i) Reduce(merge, lapply(dts, `[[`, i)))
219 | )
220 | }
221 |
222 |
223 | # Wrapper function around merge_dts for list of tibbles to aggregate data for
224 | # plotting ---------------------------------------------------------------------
225 |
226 | #' @title Wrapper function around \code{merge_dts()} for list of tibbles to
227 | #' aggregate data for plotting.
228 | #' @description Instead of a single spectral tibble (data frame) multiple
229 | #' spectral tibbles can be merged into a long-form data.table for plotting
230 | #' spectra and related data. For details, see
231 | #' \code{\link{merge_dts}}.
232 | #' @param spc_tbl_l List of spectral tibbles (data frames).
233 | #' @param lcols_spc Character vector of spectral list-columns to be extracted.
234 | #' Default is \code{c("spc", "spc_pre")} (raw and preprocessed spectra).
235 | #' @param lcol_measure Character vector of length 1 denoting the column name
236 | #' of the measure columns. This argument is optional. Default is \code{NULL},
237 | #' which does not extract an additional measure column.
238 | #' @param spc_id Character vector of column that contains a unique spectral
239 | #' identifier for all spectra. Default is \code{"unique_id"}.
240 | #' @param group_id Character vector of columns that is used assigning spectra
241 | #' into groups. Default is \code{"sample_id"}. The \code{group_id} can be
242 | #' used for later plotting and thereby visually separating spectral groups into
243 | #' using different colors or panels.
244 | #' @return A single data.table containing long form aggregated data of
245 | #' spectra, x-axis values, metadata and an additionally measured variable.
246 | #' An additional column called \code{group_id_tbl} is appended. It denotes
247 | #' the name of the spectral tibble supplied with the list \code{spc_tbl_l}.
248 | #' @export
249 | merge_dts_l <- function(spc_tbl_l,
250 | lcols_spc = c("spc", "spc_pre"),
251 | lcol_measure = NULL,
252 | spc_id = "unique_id",
253 | group_id = "sample_id") {
254 |
255 | group_id_tbl <- NULL
256 |
257 | dts <- lapply(seq_along(spc_tbl_l),
258 | function(i) merge_dts(spc_tbl = spc_tbl_l[[i]],
259 | lcols_spc = lcols_spc, lcol_measure = lcol_measure,
260 | spc_id = spc_id, group_id = group_id))
261 | dts <- lapply(seq_along(dts),
262 | function(i) dts[[i]][, group_id_tbl := names(spc_tbl_l[i])])
263 | data.table::rbindlist(dts)
264 | }
265 |
266 |
267 | ## Create plotting functions based on complete long data.table =================
268 |
269 | # Function that reorders factor column in data.table based on ascending numeric
270 | # order when converted to numeric type
271 | # https://stackoverflow.com/questions/15665535/reorder-factors-numerically-in-a-data-frame
272 | # ------------------------------------------------------------------------------
273 | reorder_factor_num <- function(dt, column = "group_id") {
274 | group_id <- NULL
275 | dt[, group_id := as.factor(group_id)]
276 | sorted_labels <- paste(sort(as.numeric(levels(dt$group_id))))
277 | dt$group_id <- factor(dt$group_id, levels = sorted_labels)
278 | dt
279 | }
280 |
281 |
282 | # Custom ggplot2 labeller for spectra types ------------------------------------
283 |
284 | relabel_spc_types <- function(lb_sc_sm = "Reflectance sample ()",
285 | lb_sc_rf = "Reflectance reference ()",
286 | lb_ig_sm = "Interferogram sample ()",
287 | lb_ig_rf = "Interferogram reference ()",
288 | lb_spc_nocomp = "Abs. before atm. comp.",
289 | lb_spc = "Absorbance",
290 | lb_spc_rs = "Resampled Abs.",
291 | lb_spc_mean = "Mean Abs.",
292 | lb_spc_pre = "Preprocessed Abs.") {
293 | ggplot2::as_labeller(
294 | x = c(
295 | "sc_sm" = lb_sc_sm,
296 | "sc_rf" = lb_sc_rf,
297 | "ig_sm" = lb_ig_sm,
298 | "spc_nocomp" = lb_spc_nocomp,
299 | "spc" = lb_spc,
300 | "spc_rs" = lb_spc_rs,
301 | "spc_mean" = lb_spc_mean,
302 | "spc_pre" = lb_spc_pre
303 | )
304 | )
305 | }
306 |
307 |
308 | # Main spectra explorative analysis and diagnostics plotting function ----------
309 |
310 | #' @title ggplot2 wrapper for extended spectra plotting
311 | #' @description \code{plot_spc_ext} is a custom plotting function developed
312 | #' within the simplerspec framework. Returns plots based on ggplot2
313 | #' (class "ggplot"). Different spectra types such as raw or preprocessed spectra
314 | #' and groups can be differentiated by different colors or by using panels
315 | #' (so called facets). Additionally, spectra can be colored based on an
316 | #' additional measure variable, e.g. determined by chemical reference analysis.
317 | #' @param spc_tbl Tibble data frame containing spectra, x-axis values, metadata
318 | #' and eventual measured variables as list-columns.
319 | #' @param spc_tbl_l List of spectral tibbles (data frames). Default is
320 | #' \code{NULL} (argument is not used).
321 | #' @param lcols_spc Character vector of spectral list-columns to be extracted.
322 | #' Default is \code{"spc"} (raw spectra).
323 | #' @param lcol_measure Character vector of length 1 denoting the column name
324 | #' of the measure columns. This argument is optional. Default is \code{NULL},
325 | #' which does not extract an additional measure column.
326 | #' @param lcol_measure_col_palette Palette value supplied to
327 | #' `ggplot::scale_colour_brewer()`. Default is `"Spectral"`, but you can set
328 | #' it to the default argument `1` (will use
329 | #' `scale_colour_brewer(..., palette = 1)`).
330 | #' @param lcol_measure_col_direction Sets the the order of colours in the scale
331 | #' that is based on a measure column. Default is \code{-1} which reverses the
332 | #' scale. Argument is passed on to the function `ggplot2::sclae_colour_brewer()`
333 | #' as argument `direction`.
334 | #' @param spc_id Character vector denoting column name for a unique spectrum ID.
335 | #' Default is \code{"unique_id"}.
336 | #' @param group_id Character vector denoting column name for the spectrum group
337 | #' ID. Default is \code{"sample_id"}. The group ID is used for
338 | #' plotting spectra by group (e.g. by using different colors or panels).
339 | #' @param group_id_order Logical that specifies whether the panel names
340 | #' derived from a numeric \code{group_id} column are reordered using ascending
341 | #' numbers. Default is \code{TRUE}.
342 | #' @param group_color Logical defining whether spectra are colored by the column
343 | #' specified by \code{group_id}.
344 | #' @param group_color_palette Character (1L) defining the diverging colour
345 | #' scales from colorbrewer.org; see `?scale_colour_brewer` for supported
346 | #' diverging colur types (`palette` argument).
347 | #' @param group_panel Logical defining whether spectra are arranged into panels
348 | #' by groups specified in \code{group_id}. Default is \code{TRUE}.
349 | #' @param group_legend Logical defining whether a legend for the \code{group_id}
350 | #' is plotted. Default is \code{FALSE}.
351 | #' @param ncol Integer vector of length 1. Defines number of columns when
352 | #' plotting panels (facets). Default is \code{NULL} (argument not used).
353 | #' @param relabel_spc Logical defining whether panels are relabeled with custom
354 | #' names for spectra types. Default is TRUE. When \code{TRUE}, arguments
355 | #' from \code{relabel_spc_types} can be passed to \code{plot_spc_ext}
356 | #' (supported via the \code{...} (ellipsis) argument)
357 | #' @param ylab Character vector or vector of type \code{"expression"} created by
358 | #' mathematical expression created by \code{expression}. Custom annotation for
359 | #' y-axis of spectra
360 | #' @param alpha Integer of length 1, from 0 to 1. Defines transparency of
361 | #' spectral lines. Default is \code{0.5} (0 is completely transparent and
362 | #' 1 is no transparency).
363 | #' @param line_width Numeric vector of length 1 specifying the width of the
364 | #' spectral lines. Default is \code{0.2}.
365 | #' @param ... Further arguments to be passed to \code{plot_spc_ext}. Currently,
366 | #' arguments of \code{relabel_spc_types} are supported.
367 | #' @return Object of class \code{"ggplot"} (ggplot2 graph).
368 | #' @export
369 | plot_spc_ext <- function(spc_tbl, spc_tbl_l = NULL,
370 | lcols_spc = "spc",
371 | lcol_measure = NULL,
372 | lcol_measure_col_palette = "Spectral",
373 | lcol_measure_col_direction = -1,
374 | spc_id = "unique_id",
375 | group_id = "sample_id", group_id_order = TRUE,
376 | group_color = TRUE,
377 | group_color_palette = NULL,
378 | group_panel = TRUE,
379 | group_legend = FALSE,
380 | ncol = NULL,
381 | relabel_spc = TRUE,
382 | ylab = "Spectrum value",
383 | alpha = 0.5, line_width = 0.2,
384 | # Further arguments to be passed to functions called
385 | # within this function
386 | ...) {
387 |
388 | # Merge spectral data, additional (measurement data) and metadata into a
389 | # single long-form data.table
390 | if (!is.null(spc_tbl_l)) {
391 | dt <- merge_dts_l(spc_tbl_l = spc_tbl_l,
392 | lcols_spc = lcols_spc, lcol_measure = lcol_measure,
393 | spc_id = spc_id, group_id = group_id) # see merge_dts_l wrapper function
394 | } else {
395 | dt <- merge_dts(spc_tbl = spc_tbl,
396 | lcols_spc = lcols_spc, lcol_measure = lcol_measure,
397 | spc_id = spc_id, group_id = group_id)
398 | }
399 | # Option to order originally numeric group_id factors by group
400 | if (is.null(spc_tbl_l)) {
401 | if (group_id_order && is.numeric(dplyr::pull(spc_tbl, !!group_id))) {
402 | dt <- reorder_factor_num(dt = dt, column = "group_id")
403 | }
404 | }
405 | brk <- pretty(dt[["xvalues_value"]], n = 10) # Pretty x-axis breaks
406 | p <- ggplot2::ggplot(data = dt,
407 | ggplot2::aes_string(x = "xvalues_value", y = "spc_value"))
408 |
409 | if (group_color == TRUE && is.null(lcol_measure)) {
410 | p <- p +
411 | ggplot2::geom_line(ggplot2::aes_string(colour = "group_id",
412 | group = "spc_id"),
413 | alpha = alpha, size = line_width)
414 | if (!is.null(group_color_palette)) {
415 | p <- p +
416 | ggplot2::scale_colour_brewer(type = "div",
417 | palette = group_color_palette, direction = -1)
418 |
419 | }
420 | if (group_legend == FALSE) {
421 | p <- p + ggplot2::guides(colour = FALSE)
422 | }
423 | } else if (group_color == FALSE && is.null(lcol_measure)) {
424 | p <- p + ggplot2::geom_line(
425 | ggplot2::aes_string(group = "spc_id"),
426 | alpha = alpha, size = line_width)
427 | }
428 |
429 | if (!is.null(lcol_measure)) {
430 | p <- p + ggplot2::geom_line(
431 | ggplot2::aes_string(colour = lcol_measure, group = "spc_id",
432 | x = "xvalues_value", y = "spc_value"),
433 | alpha = alpha, size = line_width, inherit.aes = FALSE) +
434 | ggplot2::scale_colour_distiller(palette = lcol_measure_col_palette,
435 | direction = lcol_measure_col_direction)
436 | }
437 |
438 | # Plot different spectral types and group_id in panels
439 | if (group_panel && length(lcols_spc) > 1) {
440 | if (relabel_spc) {
441 | lbl <- relabel_spc_types(...) # see this function for arguments and values
442 | p <- p + ggplot2::facet_grid(spc_type ~ group_id, scales = "free",
443 | labeller = ggplot2::labeller(spc_type = lbl))
444 | } else {
445 | p <- p + ggplot2::facet_grid(spc_type ~ group_id, scales = "free")
446 | }
447 | }
448 |
449 | if (group_panel && length(lcols_spc) == 1) {
450 | p <- p + ggplot2::facet_wrap(~ group_id, ncol = ncol, scales = "free")
451 | }
452 | # Special case when list of tibbles are supplied
453 | if (group_panel && !is.null(spc_tbl_l)) {
454 | p <- ggplot2::ggplot(data = dt,
455 | ggplot2::aes_string(x = "xvalues_value", y = "spc_value")) +
456 | ggplot2::geom_line(ggplot2::aes_string(colour = "group_id_tbl",
457 | group = "spc_id"), alpha = alpha, size = line_width)
458 | if (relabel_spc == TRUE) {
459 | lbl <- relabel_spc_types(...)
460 | p <- p + ggplot2::facet_grid(spc_type ~ group_id, scales = "free",
461 | labeller = ggplot2::labeller(spc_type = lbl))
462 | } else if (relabel_spc == FALSE) {
463 | p <- p + ggplot2::facet_wrap(~ group_id, ncol = ncol, scales = "free")
464 | }
465 | }
466 |
467 | p <- p + ggplot2::scale_x_reverse(breaks = brk) +
468 | ggplot2::xlab(expression(paste("Wavenumber [", cm^-1, "]"))) +
469 | ggplot2::ylab(ylab) +
470 | ggplot2::theme_bw() +
471 | ggplot2::theme(legend.position = "right",
472 | axis.text.x = ggplot2::element_text(angle = 90, hjust = 1, vjust = 0.5))
473 | p
474 | }
475 |
476 |
--------------------------------------------------------------------------------